1from apify_client import ApifyClient
2
3
4
5client = ApifyClient("<YOUR_API_TOKEN>")
6
7
8run_input = {
9 "start_urls": [{ "url": "https://docs.python.org/3/" }],
10 "documents": [{
11 "text": "Your document text here...",
12 "source_id": "doc_001",
13 }],
14 "content_selectors": [
15 "article",
16 "main",
17 ".content",
18 ],
19 "exclude_selectors": [
20 "nav",
21 "footer",
22 "aside",
23 ".sidebar",
24 ],
25 "language_filter": ["en"],
26}
27
28
29run = client.actor("mea/ai-training-data-curator").call(run_input=run_input)
30
31
32print("๐พ Check your data here: https://console.apify.com/storage/datasets/" + run["defaultDatasetId"])
33for item in client.dataset(run["defaultDatasetId"]).iterate_items():
34 print(item)
35
36