1const _ = require('underscore');
2const Apify = require('apify');
3
4Apify.main(async () => {
5 const input = await Apify.getInput();
6
7 const requestList = await Apify.openRequestList('my-urls', input.urls);
8 const keyValueStore = await Apify.openKeyValueStore();
9
10 const proxyConfiguration = await Apify.createProxyConfiguration()
11
12 const crawler = new Apify.BasicCrawler({
13 requestList,
14 handleRequestTimeoutSecs: 120,
15 maxRequestRetries: input.pageMaxRetryCount,
16
17 handleRequestFunction: async ({ request }) => {
18
19
20
21 const browser = await Apify.launchPuppeteer({
22 proxyUrl: proxyConfiguration.newUrl(),
23 useChrome: true,
24 stealth: true,
25 });
26 const page = await browser.newPage();
27
28 if (input.viewportWidth || input.viewportHeight) {
29 log(request, `Setting page viewport to ${input.viewportWidth}x${input.viewportHeight}`);
30 await page.setViewport({
31 width: input.viewportWidth,
32 height: input.viewportHeight
33 });
34 }
35
36 log(request, 'Loading page');
37 const response = await page.goto(request.url, {
38 timeout: input.pageLoadTimeoutSecs * 1000,
39 });
40 request.response = {
41 status: response.status(),
42 headers: response.headers(),
43 };
44
45
46 if (input.delaySecs > 0) {
47 await new Promise(resolve => setTimeout(resolve, input.delaySecs * 1000));
48 }
49
50 log(request, `Taking screenshot`);
51 const buffer = await page.screenshot({
52 type: input.imageType,
53 fullPage: true,
54 });
55
56
57 const key = `screenshot-${Math.floor(Math.random()*0xFFFFFFFF).toString(16)}.${input.imageType}`;
58 await keyValueStore.setValue(key, buffer, {
59 contentType: `image/${input.imageType}`,
60 });
61
62
63 await Apify.pushData({
64 request: _.pick(request, 'url', 'method', 'payload', 'userData'),
65 response: {
66 status: response.status(),
67 headers: response.headers(),
68 },
69 finishedAt: new Date(),
70 screenshot: {
71 url: keyValueStore.getPublicUrl(key),
72 size: buffer.length,
73 },
74 });
75
76 log(request, 'Handling of page succeeded');
77
78 try {
79 await browser.close();
80 } catch (e) {
81
82 console.error(e);
83 }
84 },
85
86
87 handleFailedRequestFunction: async ({ request }) => {
88 log(request, 'Handling of page failed');
89 await Apify.pushData({
90 request: _.pick(request, 'url', 'method', 'payload', 'userData'),
91 response: request.response || null,
92 finishedAt: new Date(),
93 errorMessages: request.errorMessages,
94 });
95 },
96 });
97
98
99 await crawler.run();
100
101 console.log('Crawler finished.');
102});
103
104function log(request, message) {
105 console.log(`[${request.url}] ${message}`);
106}