1import { ApifyClient } from 'apify-client';
2
3
4
5const client = new ApifyClient({
6 token: '<YOUR_API_TOKEN>',
7});
8
9
10const input = {
11 "targetUrl": "https://books.toscrape.com/",
12 "userGoal": "Get me a list of all the books on the first page. For each book, I want its title, price, star rating, and whether it is in stock.",
13 "actors": [
14 {
15 "name": "cheerio-scraper",
16 "enabled": true,
17 "input": {
18 "maxRequestRetries": 3,
19 "requestTimeoutSecs": 30,
20 "maxPagesPerCrawl": 1,
21 "pageFunction": "async function pageFunction(context) {\n const { request, log, $ } = context;\n try {\n const title = $('title').text() || '';\n const html = $('html').html() || '';\n return {\n url: request.url,\n title: title,\n html: html\n };\n } catch (error) {\n log.error('Error in pageFunction:', error);\n return {\n url: request.url,\n title: '',\n html: ''\n };\n }\n}",
22 "proxyConfiguration": {
23 "useApifyProxy": true
24 }
25 }
26 },
27 {
28 "name": "web-scraper",
29 "enabled": false,
30 "input": {
31 "maxRequestRetries": 3,
32 "requestTimeoutSecs": 30,
33 "maxPagesPerCrawl": 1,
34 "pageFunction": "async function pageFunction(context) {\n const { request, log, page } = context;\n try {\n const title = await page.title();\n const html = await page.content();\n return {\n url: request.url,\n title: title,\n html: html\n };\n } catch (error) {\n log.error('Error in pageFunction:', error);\n return {\n url: request.url,\n title: '',\n html: ''\n };\n }\n}",
35 "proxyConfiguration": {
36 "useApifyProxy": true
37 }
38 }
39 },
40 {
41 "name": "website-content-crawler",
42 "enabled": true,
43 "input": {
44 "maxCrawlPages": 1,
45 "crawler": "playwright",
46 "proxyConfiguration": {
47 "useApifyProxy": true
48 }
49 }
50 },
51 {
52 "name": "playwright-scraper",
53 "enabled": false,
54 "input": {
55 "maxRequestRetries": 2,
56 "requestTimeoutSecs": 45,
57 "maxPagesPerCrawl": 1,
58 "pageFunction": "async function pageFunction(context) {\n const { request, log, page } = context;\n try {\n const title = await page.title();\n const html = await page.content();\n return {\n url: request.url,\n title: title,\n html: html\n };\n } catch (error) {\n log.error('Error in pageFunction:', error);\n return {\n url: request.url,\n title: '',\n html: ''\n };\n }\n}",
59 "proxyConfiguration": {
60 "useApifyProxy": true
61 }
62 }
63 }
64 ]
65};
66
67
68const run = await client.actor("ohlava/scrapercodegenerator").call(input);
69
70
71console.log('Results from dataset');
72console.log(`💾 Check your data here: https://console.apify.com/storage/datasets/${run.defaultDatasetId}`);
73const { items } = await client.dataset(run.defaultDatasetId).listItems();
74items.forEach((item) => {
75 console.dir(item);
76});
77
78