1from apify_client import ApifyClient
2
3
4
5client = ApifyClient("<YOUR_API_TOKEN>")
6
7
8run_input = {
9 "startUrl": "https://crawlee.dev",
10 "maxPages": 500,
11 "maxDepth": 5,
12 "defaultChangefreq": "weekly",
13 "sitemapFormat": "xml",
14 "splitAtCount": 50000,
15 "existingSitemapAction": "merge",
16 "crawlerType": "cheerio",
17 "maxConcurrency": 20,
18 "requestTimeout": 30000,
19 "maxRetries": 3,
20 "userAgent": "JuniprSitemapBot/1.0",
21 "proxyConfiguration": { "useApifyProxy": True },
22}
23
24
25run = client.actor("junipr/sitemap-generator").call(run_input=run_input)
26
27
28print("💾 Check your data here: https://console.apify.com/storage/datasets/" + run["defaultDatasetId"])
29for item in client.dataset(run["defaultDatasetId"]).iterate_items():
30 print(item)
31
32