1const Apify = require('apify');
2
3Apify.main(async () => {
4 const input = await Apify.getInput();
5
6 const requestList = await Apify.openRequestList('my-list', input.requestListSources);
7 const proxyConfiguration = await Apify.createProxyConfiguration(input.proxyConfiguration);
8
9 const handlePageFunction = async ({ request, response, page }) => {
10 const { waitForSelector } = request.userData;
11
12 if (waitForSelector) {
13 await page.waitForSelector(waitForSelector);
14 }
15
16 await Apify.pushData({
17 url: request.url,
18 finishedAt: new Date(),
19 fullHtml: await page.content(),
20 html: await page.evaluate(() => document.body.outerHTML),
21 '#debug': Apify.utils.createRequestDebugInfo(request, response),
22 '#error': false,
23 });
24 };
25
26 const handleFailedRequestFunction = async ({ request }) => {
27 await Apify.pushData({
28 url: request.url,
29 finishedAt: new Date(),
30 '#debug': Apify.utils.createRequestDebugInfo(request),
31 '#error': true,
32 });
33 };
34
35 const puppeteerCrawler = new Apify.PuppeteerCrawler({
36 requestList,
37 handlePageFunction,
38 handleFailedRequestFunction,
39 proxyConfiguration,
40 useSessionPool: true,
41 sessionPoolOptions: {
42 sessionOptions: {
43 maxErrorScore: 0.5,
44 },
45 },
46 browserPoolOptions: {
47 useFingerprints: true,
48 retireBrowserAfterPageCount: 1,
49 maxOpenPagesPerBrowser: 1,
50 },
51 persistCookiesPerSession: false,
52 maxRequestRetries: typeof input.maxRequestRetries === 'number' ? input.maxRequestRetries : 1,
53 handlePageTimeoutSecs: input.handlePageTimeoutSecs || 60,
54 launchContext: {
55 useChrome: input.useChrome || false,
56 launchOptions: {
57 headless: false,
58 ignoreHTTPSErrors: true,
59 args: ['--ignore-certificate-errors']
60 }
61 },
62 });
63
64 await puppeteerCrawler.run();
65});