1from apify_client import ApifyClient
2
3
4
5client = ApifyClient("<YOUR_API_TOKEN>")
6
7
8run_input = {
9 "targetUrl": "https://books.toscrape.com/",
10 "userGoal": "Get me a list of all the books on the first page. For each book, I want its title, price, star rating, and whether it is in stock.",
11 "actors": [
12 {
13 "name": "cheerio-scraper",
14 "enabled": True,
15 "input": {
16 "maxRequestRetries": 3,
17 "requestTimeoutSecs": 30,
18 "maxPagesPerCrawl": 1,
19 "pageFunction": """async function pageFunction(context) {
20 const { request, log, $ } = context;
21 try {
22 const title = $('title').text() || '';
23 const html = $('html').html() || '';
24 return {
25 url: request.url,
26 title: title,
27 html: html
28 };
29 } catch (error) {
30 log.error('Error in pageFunction:', error);
31 return {
32 url: request.url,
33 title: '',
34 html: ''
35 };
36 }
37}""",
38 "proxyConfiguration": { "useApifyProxy": True },
39 },
40 },
41 {
42 "name": "web-scraper",
43 "enabled": False,
44 "input": {
45 "maxRequestRetries": 3,
46 "requestTimeoutSecs": 30,
47 "maxPagesPerCrawl": 1,
48 "pageFunction": """async function pageFunction(context) {
49 const { request, log, page } = context;
50 try {
51 const title = await page.title();
52 const html = await page.content();
53 return {
54 url: request.url,
55 title: title,
56 html: html
57 };
58 } catch (error) {
59 log.error('Error in pageFunction:', error);
60 return {
61 url: request.url,
62 title: '',
63 html: ''
64 };
65 }
66}""",
67 "proxyConfiguration": { "useApifyProxy": True },
68 },
69 },
70 {
71 "name": "website-content-crawler",
72 "enabled": True,
73 "input": {
74 "maxCrawlPages": 1,
75 "crawler": "playwright",
76 "proxyConfiguration": { "useApifyProxy": True },
77 },
78 },
79 {
80 "name": "playwright-scraper",
81 "enabled": False,
82 "input": {
83 "maxRequestRetries": 2,
84 "requestTimeoutSecs": 45,
85 "maxPagesPerCrawl": 1,
86 "pageFunction": """async function pageFunction(context) {
87 const { request, log, page } = context;
88 try {
89 const title = await page.title();
90 const html = await page.content();
91 return {
92 url: request.url,
93 title: title,
94 html: html
95 };
96 } catch (error) {
97 log.error('Error in pageFunction:', error);
98 return {
99 url: request.url,
100 title: '',
101 html: ''
102 };
103 }
104}""",
105 "proxyConfiguration": { "useApifyProxy": True },
106 },
107 },
108 ],
109}
110
111
112run = client.actor("ohlava/scrapercodegenerator").call(run_input=run_input)
113
114
115print("💾 Check your data here: https://console.apify.com/storage/datasets/" + run["defaultDatasetId"])
116for item in client.dataset(run["defaultDatasetId"]).iterate_items():
117 print(item)
118
119