1from apify_client import ApifyClient
2
3
4
5client = ApifyClient("<YOUR_API_TOKEN>")
6
7
8run_input = {
9 "startUrls": [{ "url": "https://www.theguardian.com" }],
10 "isUrlArticleDefinition": {
11 "minDashes": 4,
12 "hasDate": True,
13 "linkIncludes": [
14 "article",
15 "storyid",
16 "?p=",
17 "id=",
18 "/fpss/track",
19 ".html",
20 "/content/",
21 ],
22 },
23 "proxyConfiguration": { "useApifyProxy": True },
24 "extendOutputFunction": """($) => {
25 const result = {};
26 // Uncomment to add a title to the output
27 // result.pageTitle = $('title').text().trim();
28
29 return result;
30}""",
31}
32
33
34run = client.actor("lukaskrivka/article-extractor-smart").call(run_input=run_input)
35
36
37print("💾 Check your data here: https://console.apify.com/storage/datasets/" + run["defaultDatasetId"])
38for item in client.dataset(run["defaultDatasetId"]).iterate_items():
39 print(item)
40
41