1from apify_client import ApifyClient
2
3
4
5client = ApifyClient("<YOUR_API_TOKEN>")
6
7
8run_input = {
9 "startUrls": [{
10 "key": "START",
11 "value": "https://apify.com/",
12 }],
13 "crawlPurls": [{
14 "key": "MY_LABEL",
15 "value": "https://www.example.com/[.*]",
16 }],
17 "clickableElementsSelector": "a:not([rel=nofollow])",
18 "proxyConfiguration": { "useApifyProxy": True },
19 "pageFunction": """function pageFunction(context) {
20 // called on every page the crawler visits, use it to extract data from it
21 var $ = context.jQuery;
22 var result = {
23 title: $('title').text(),
24 myValue: $('TODO').text()
25 };
26 return result;
27}
28""",
29 "interceptRequest": """function interceptRequest(context, newRequest) {
30 // called whenever the crawler finds a link to a new page,
31 // use it to override default behavior
32 return newRequest;
33}
34""",
35}
36
37
38run = client.actor("barry8schneider/legacy-phantomjs-crawler").call(run_input=run_input)
39
40
41print("💾 Check your data here: https://console.apify.com/storage/datasets/" + run["defaultDatasetId"])
42for item in client.dataset(run["defaultDatasetId"]).iterate_items():
43 print(item)
44
45