1from apify_client import ApifyClient
2
3
4
5client = ApifyClient("<YOUR_API_TOKEN>")
6
7
8run_input = {
9 "startUrls": [{
10 "key": "START",
11 "value": "https://www.example.com/",
12 }],
13 "crawlPurls": [{
14 "key": "MY_LABEL",
15 "value": "https://www.example.com/[.*]",
16 }],
17 "clickableElementsSelector": "a:not([rel=nofollow])",
18 "pageFunction": """function pageFunction(context) {
19 // called on every page the crawler visits, use it to extract data from it
20 var $ = context.jQuery;
21 var result = {
22 title: $('title').text(),
23 myValue: $('TODO').text()
24 };
25 return result;
26}
27""",
28 "interceptRequest": """function interceptRequest(context, newRequest) {
29 // called whenever the crawler finds a link to a new page,
30 // use it to override default behavior
31 return newRequest;
32}
33""",
34}
35
36
37run = client.actor("apify/legacy-phantomjs-crawler").call(run_input=run_input)
38
39
40print("💾 Check your data here: https://console.apify.com/storage/datasets/" + run["defaultDatasetId"])
41for item in client.dataset(run["defaultDatasetId"]).iterate_items():
42 print(item)
43
44