1from apify_client import ApifyClient
2
3
4
5client = ApifyClient("<YOUR_API_TOKEN>")
6
7
8run_input = {
9 "startUrls": [{ "url": "https://crawlee.dev" }],
10 "maxCrawlingDepth": 1,
11 "requestTimeout": 10,
12 "linkSelector": "a[href]",
13 "linkPatterns": [".*crawlee\\.dev.*"],
14 "pageFunction": """from typing import Any
15from crawlee.crawlers import BeautifulSoupCrawlingContext
16
17# See the context section in readme to find out what fields you can access
18# https://apify.com/apify/beautifulsoup-scraper#context
19def page_function(context: BeautifulSoupCrawlingContext) -> Any:
20 url = context.request.url
21 title = context.soup.title.string if context.soup.title else None
22 return {'url': url, 'title': title}
23""",
24 "soupFeatures": "html.parser",
25 "proxyConfiguration": { "useApifyProxy": True },
26}
27
28
29run = client.actor("apify/beautifulsoup-scraper").call(run_input=run_input)
30
31
32print("💾 Check your data here: https://console.apify.com/storage/datasets/" + run["defaultDatasetId"])
33for item in client.dataset(run["defaultDatasetId"]).iterate_items():
34 print(item)
35
36