1from apify_client import ApifyClient
2
3
4
5client = ApifyClient("<YOUR_API_TOKEN>")
6
7
8run_input = {
9 "startUrls": [{ "url": "https://crawlee.dev/js" }],
10 "globs": [{ "glob": "https://crawlee.dev/js/*/*" }],
11 "pseudoUrls": [],
12 "excludes": [{ "glob": "/**/*.{png,jpg,jpeg,pdf}" }],
13 "linkSelector": "a",
14 "respectRobotsTxtFile": True,
15 "initialCookies": [],
16 "customHttpHeaders": {},
17 "proxyConfiguration": { "useApifyProxy": True },
18 "launcher": "CHROMIUM",
19 "waitUntil": "DOM_CONTENT_LOADED",
20 "customData": {},
21}
22
23
24run = client.actor("shortc/htmlwasher-beta").call(run_input=run_input)
25
26
27print("💾 Check your data here: https://console.apify.com/storage/datasets/" + run["defaultDatasetId"])
28for item in client.dataset(run["defaultDatasetId"]).iterate_items():
29 print(item)
30
31