Screenshot Taker avatar
Screenshot Taker
Try for free

No credit card required

View all Actors
Screenshot Taker

Screenshot Taker

jancurn/screenshot-taker
Try for free

No credit card required

Takes a screenshot of one or more web pages using the Chrome browser. The actor enables the setting of custom viewport size, page load timeout, delay, proxies, and output image format.

Dockerfile

1FROM apify/actor-node-puppeteer-chrome:16
2
3# Second, copy just package.json since it should be the only file
4# that affects NPM install in the next step
5COPY package.json ./
6
7# Install NPM packages, skip optional and development dependencies to
8# keep the image small. Avoid logging too much and print the dependency
9# tree for debugging
10RUN npm --quiet set progress=false \
11 && npm install --only=prod --no-optional \
12 && echo "Installed NPM packages:" \
13 && (npm list || true) \
14 && echo "Node.js version:" \
15 && node --version \
16 && echo "NPM version:" \
17 && npm --version
18
19# Next, copy the remaining files and directories with the source code.
20# Since we do this after NPM install, quick build will be really fast
21# for most source file changes.
22COPY . ./

INPUT_SCHEMA.json

1{
2    "title": "Schema for the actor",
3    "type": "object",
4    "schemaVersion": 1,
5    "properties": {
6        "urls": {
7            "title": "Page URLs",
8            "type": "array",
9            "description": "List of URLs of web pages to take the screenshot of.",
10            "prefill": [
11                { "url": "https://www.example.com" },
12                { "url": "https://sdk.apify.com" }
13            ],
14            "editor": "requestListSources"
15        },
16        "pageLoadTimeoutSecs": {
17            "title": "Page load timeout",
18            "type": "integer",
19            "description": "Timeout for the web page load, in seconds. If the web page does not load in this time frame, it is considered to have failed and will be retried, similarly as with other page load errors.",
20            "minimum": 1,
21            "maximum": 180,
22            "default": 60,
23            "unit": "seconds"
24        },
25        "pageMaxRetryCount": {
26            "title": "Page retry count",
27            "type": "integer",
28            "description": "How many times to retry to load the page on error or timeout.",
29            "minimum": 0,
30            "maximum": 10,
31            "default": 2
32        },
33        "waitUntil": {
34            "title": "Wait until",
35            "type": "string",
36            "description": "Indicates when to consider the navigation to the page as succeeded. For more details, see <code>waitUntil</code> parameter of <a href='https://github.com/GoogleChrome/puppeteer/blob/master/docs/api.md#pagegotourl-options' target='_blank' rel='noopener'>Page.goto()</a> function in Puppeteer documention.",
37            "default": "load",
38            "enum": [
39                "load",
40                "domcontentloaded",
41                "networkidle0",
42                "networkidle2"
43            ],
44            "enumTitles": [
45                "The load event is fired (load)",
46                "The DOMContentLoaded event is fired (domcontentloaded)",
47                "There are no more than 0 network connections for at least 500 ms (networkidle0)",
48                "There are no more than 2 network connections for at least 500 ms (networkidle2)"
49            ],
50            "editor": "select"
51        },
52        "viewportWidth": {
53            "title": "Viewport width",
54            "type": "integer",
55            "description": "Width of the browser window.",
56            "default": 1200,
57            "minimum": 1,
58            "maximum": 10000,
59            "unit": "pixels"
60        },
61        "viewportHeight": {
62            "title": "Viewport height",
63            "type": "integer",
64            "description": "Height of the browser window.",
65            "default": 900,
66            "minimum": 1,
67            "maximum": 10000,
68            "unit": "pixels"
69        },
70        "delaySecs": {
71            "title": "Delay before screenshot",
72            "type": "integer",
73            "description": "How long time to wait after loading the page before taking the screenshot.",
74            "default": 0,
75            "minimum": 0,
76            "maximum": 120,
77            "unit": "seconds"
78        },
79        "imageType": {
80            "title": "Image type",
81            "type": "string",
82            "description": "Format of the image.",
83            "default": "jpeg",
84            "enum": [
85                "jpeg",
86                "png"
87            ],
88            "enumTitles": [
89                "JPEG",
90                "PNG"
91            ],
92            "editor": "select"
93        },
94        "proxyConfiguration": {
95            "title": "Proxy configuration",
96            "type": "object",
97            "description": "Specifies the type of proxy servers that will be used by the crawler in order to hide its origin.",
98            "editor": "proxy"
99        }
100    },
101    "required": [
102        "urls"
103    ]
104}

main.js

1const _ = require('underscore');
2const Apify = require('apify');
3
4Apify.main(async () => {
5    const input = await Apify.getInput();
6
7    const requestList = await Apify.openRequestList('my-urls', input.urls);
8    const keyValueStore = await Apify.openKeyValueStore();
9
10    const proxyConfiguration = await Apify.createProxyConfiguration()
11
12    const crawler = new Apify.BasicCrawler({
13        requestList,
14        handleRequestTimeoutSecs: 120,
15        maxRequestRetries: input.pageMaxRetryCount,
16
17        handleRequestFunction: async ({ request }) => {
18            
19            // BEFORE PAGE IS NAVIGATED TO
20            // Create browser instance with or without userAgent or proxy set.
21            const browser = await Apify.launchPuppeteer({
22                proxyUrl: proxyConfiguration.newUrl(),
23                useChrome: true,
24                stealth: true,
25            });
26            const page = await browser.newPage();
27
28            if (input.viewportWidth || input.viewportHeight) {
29                log(request, `Setting page viewport to ${input.viewportWidth}x${input.viewportHeight}`);
30                await page.setViewport({
31                    width: input.viewportWidth,
32                    height: input.viewportHeight
33                });
34            }
35
36            log(request, 'Loading page');
37            const response = await page.goto(request.url, {
38                timeout: input.pageLoadTimeoutSecs * 1000,
39            });
40            request.response = {
41                status: response.status(),
42                headers: response.headers(),
43            };
44
45            // Wait (if requested)
46            if (input.delaySecs > 0) {
47                await new Promise(resolve => setTimeout(resolve, input.delaySecs * 1000));
48            }
49
50            log(request, `Taking screenshot`);
51            const buffer = await page.screenshot({
52                type: input.imageType,
53                fullPage: true,
54            });
55
56            // Save screenshot to key-value store
57            const key = `screenshot-${Math.floor(Math.random()*0xFFFFFFFF).toString(16)}.${input.imageType}`;
58            await keyValueStore.setValue(key, buffer, {
59                contentType: `image/${input.imageType}`,
60            });
61
62            // Save record to dataset
63            await Apify.pushData({
64                request: _.pick(request, 'url', 'method', 'payload', 'userData'),
65                response: {
66                    status: response.status(),
67                    headers: response.headers(),
68                },
69                finishedAt: new Date(),
70                screenshot: {
71                    url: keyValueStore.getPublicUrl(key),
72                    size: buffer.length,
73                },
74            });
75
76            log(request, 'Handling of page succeeded');
77
78            try {
79                await browser.close();
80            } catch (e) {
81                // This one can be ignored
82                console.error(e);
83            }
84        },
85
86        // This function is called if the page processing failed more than maxRequestRetries+1 times.
87        handleFailedRequestFunction: async ({ request }) => {
88            log(request, 'Handling of page failed');
89            await Apify.pushData({
90                request: _.pick(request, 'url', 'method', 'payload', 'userData'),
91                response: request.response || null,
92                finishedAt: new Date(),               
93                errorMessages: request.errorMessages,
94            });
95        },
96    });
97
98    // Run the crawler and wait for it to finish.
99    await crawler.run();
100
101    console.log('Crawler finished.');
102});
103
104function log(request, message) {
105    console.log(`[${request.url}] ${message}`);
106}

package.json

1{
2    "name": "my-actor",
3    "version": "0.0.1",
4    "dependencies": {
5        "apify": "^2.0.0",
6        "puppeteer": "*"
7    },
8    "scripts": {
9        "start": "node main.js"
10    },
11    "author": "Me!"
12}
Developer
Maintained by Community
Actor metrics
  • 34 monthly users
  • 100.0% runs succeeded
  • Created in Jul 2019
  • Modified over 2 years ago
Categories