1from apify_client import ApifyClient
2
3
4
5client = ApifyClient("<YOUR_API_TOKEN>")
6
7
8run_input = {
9 "startUrls": [{ "url": "https://crawlee.dev/docs/introduction" }],
10 "maxPages": 100,
11 "maxDepth": 0,
12 "outputFormats": ["markdown"],
13 "chunkSize": 1000,
14 "chunkOverlap": 200,
15 "chunkStrategy": "semantic",
16 "waitForTimeout": 5000,
17 "maxScrolls": 20,
18 "paginationMaxPages": 10,
19 "minContentLength": 50,
20 "proxyConfiguration": { "useApifyProxy": True },
21 "maxRetries": 3,
22 "requestTimeout": 30000,
23}
24
25
26run = client.actor("junipr/rag-web-extractor").call(run_input=run_input)
27
28
29print("💾 Check your data here: https://console.apify.com/storage/datasets/" + run["defaultDatasetId"])
30for item in client.dataset(run["defaultDatasetId"]).iterate_items():
31 print(item)
32
33