1from apify_client import ApifyClient
2
3
4
5client = ApifyClient("<YOUR_API_TOKEN>")
6
7
8run_input = {
9 "startUrls": [{ "url": "https://docs.apify.com/academy/web-scraping-for-beginners" }],
10 "useSitemaps": False,
11 "useLlmsTxt": False,
12 "respectRobotsTxtFile": True,
13 "crawlerType": "playwright:adaptive",
14 "includeUrlGlobs": [],
15 "excludeUrlGlobs": [],
16 "initialCookies": [],
17 "customHttpHeaders": {},
18 "signHttpRequests": False,
19 "proxyConfiguration": { "useApifyProxy": True },
20 "keepElementsCssSelector": "",
21 "removeElementsCssSelector": """nav, footer, script, style, noscript, svg, img[src^='data:'],
22[role=\"alert\"],
23[role=\"banner\"],
24[role=\"dialog\"],
25[role=\"alertdialog\"],
26[role=\"region\"][aria-label*=\"skip\" i],
27[aria-modal=\"true\"]""",
28 "blockMedia": True,
29 "clickElementsCssSelector": "[aria-expanded=\"false\"]",
30 "storeSkippedUrls": False,
31}
32
33
34run = client.actor("apify/website-content-crawler").call(run_input=run_input)
35
36
37print("๐พ Check your data here: https://console.apify.com/storage/datasets/" + run["defaultDatasetId"])
38for item in client.dataset(run["defaultDatasetId"]).iterate_items():
39 print(item)
40
41