1from apify_client import ApifyClient
2
3
4
5client = ApifyClient("<YOUR_API_TOKEN>")
6
7
8run_input = { "startUrls": [
9 { "url": "https://example.com" },
10 { "url": "https://www.iana.org/domains/reserved" },
11 { "url": "https://www.iana.org/help/example-domains" },
12 { "url": "https://docs.apify.com/platform" },
13 { "url": "https://docs.apify.com/platform/storage" },
14 { "url": "https://docs.apify.com/platform/proxy" },
15 { "url": "https://docs.apify.com/sdk/js" },
16 { "url": "https://en.wikipedia.org/wiki/Web_scraping" },
17 { "url": "https://en.wikipedia.org/wiki/HTML" },
18 { "url": "https://en.wikipedia.org/wiki/Markdown" },
19 { "url": "https://en.wikipedia.org/wiki/HTTP" },
20 { "url": "https://en.wikipedia.org/wiki/URL" },
21 { "url": "https://en.wikipedia.org/wiki/Web_page" },
22 { "url": "https://en.wikipedia.org/wiki/Website" },
23 { "url": "https://en.wikipedia.org/wiki/Search_engine" },
24 { "url": "https://en.wikipedia.org/wiki/Natural_language_processing" },
25 { "url": "https://en.wikipedia.org/wiki/Information_retrieval" },
26 { "url": "https://en.wikipedia.org/wiki/Data_extraction" },
27 { "url": "https://en.wikipedia.org/wiki/Document_Object_Model" },
28 { "url": "https://en.wikipedia.org/wiki/JavaScript" },
29 { "url": "https://en.wikipedia.org/wiki/TypeScript" },
30 { "url": "https://en.wikipedia.org/wiki/Node.js" },
31 { "url": "https://en.wikipedia.org/wiki/Representational_state_transfer" },
32 { "url": "https://en.wikipedia.org/wiki/Web_API" },
33 { "url": "https://en.wikipedia.org/wiki/Application_programming_interface" },
34 ] }
35
36
37run = client.actor("maximedupre/webpage-text-extractor").call(run_input=run_input)
38
39
40print("💾 Check your data here: https://console.apify.com/storage/datasets/" + run["defaultDatasetId"])
41for item in client.dataset(run["defaultDatasetId"]).iterate_items():
42 print(item)
43
44