Screenshot Taker
Try for free
No credit card required
View all Actors
Screenshot Taker
jancurn/screenshot-taker
Try for free
No credit card required
Takes a screenshot of one or more web pages using the Chrome browser. The actor enables the setting of custom viewport size, page load timeout, delay, proxies, and output image format.
Dockerfile
1FROM apify/actor-node-puppeteer-chrome:16
2
3# Second, copy just package.json since it should be the only file
4# that affects NPM install in the next step
5COPY package.json ./
6
7# Install NPM packages, skip optional and development dependencies to
8# keep the image small. Avoid logging too much and print the dependency
9# tree for debugging
10RUN npm --quiet set progress=false \
11 && npm install --only=prod --no-optional \
12 && echo "Installed NPM packages:" \
13 && (npm list || true) \
14 && echo "Node.js version:" \
15 && node --version \
16 && echo "NPM version:" \
17 && npm --version
18
19# Next, copy the remaining files and directories with the source code.
20# Since we do this after NPM install, quick build will be really fast
21# for most source file changes.
22COPY . ./
INPUT_SCHEMA.json
1{
2 "title": "Schema for the actor",
3 "type": "object",
4 "schemaVersion": 1,
5 "properties": {
6 "urls": {
7 "title": "Page URLs",
8 "type": "array",
9 "description": "List of URLs of web pages to take the screenshot of.",
10 "prefill": [
11 { "url": "https://www.example.com" },
12 { "url": "https://sdk.apify.com" }
13 ],
14 "editor": "requestListSources"
15 },
16 "pageLoadTimeoutSecs": {
17 "title": "Page load timeout",
18 "type": "integer",
19 "description": "Timeout for the web page load, in seconds. If the web page does not load in this time frame, it is considered to have failed and will be retried, similarly as with other page load errors.",
20 "minimum": 1,
21 "maximum": 180,
22 "default": 60,
23 "unit": "seconds"
24 },
25 "pageMaxRetryCount": {
26 "title": "Page retry count",
27 "type": "integer",
28 "description": "How many times to retry to load the page on error or timeout.",
29 "minimum": 0,
30 "maximum": 10,
31 "default": 2
32 },
33 "waitUntil": {
34 "title": "Wait until",
35 "type": "string",
36 "description": "Indicates when to consider the navigation to the page as succeeded. For more details, see <code>waitUntil</code> parameter of <a href='https://github.com/GoogleChrome/puppeteer/blob/master/docs/api.md#pagegotourl-options' target='_blank' rel='noopener'>Page.goto()</a> function in Puppeteer documention.",
37 "default": "load",
38 "enum": [
39 "load",
40 "domcontentloaded",
41 "networkidle0",
42 "networkidle2"
43 ],
44 "enumTitles": [
45 "The load event is fired (load)",
46 "The DOMContentLoaded event is fired (domcontentloaded)",
47 "There are no more than 0 network connections for at least 500 ms (networkidle0)",
48 "There are no more than 2 network connections for at least 500 ms (networkidle2)"
49 ],
50 "editor": "select"
51 },
52 "viewportWidth": {
53 "title": "Viewport width",
54 "type": "integer",
55 "description": "Width of the browser window.",
56 "default": 1200,
57 "minimum": 1,
58 "maximum": 10000,
59 "unit": "pixels"
60 },
61 "viewportHeight": {
62 "title": "Viewport height",
63 "type": "integer",
64 "description": "Height of the browser window.",
65 "default": 900,
66 "minimum": 1,
67 "maximum": 10000,
68 "unit": "pixels"
69 },
70 "delaySecs": {
71 "title": "Delay before screenshot",
72 "type": "integer",
73 "description": "How long time to wait after loading the page before taking the screenshot.",
74 "default": 0,
75 "minimum": 0,
76 "maximum": 120,
77 "unit": "seconds"
78 },
79 "imageType": {
80 "title": "Image type",
81 "type": "string",
82 "description": "Format of the image.",
83 "default": "jpeg",
84 "enum": [
85 "jpeg",
86 "png"
87 ],
88 "enumTitles": [
89 "JPEG",
90 "PNG"
91 ],
92 "editor": "select"
93 },
94 "proxyConfiguration": {
95 "title": "Proxy configuration",
96 "type": "object",
97 "description": "Specifies the type of proxy servers that will be used by the crawler in order to hide its origin.",
98 "editor": "proxy"
99 }
100 },
101 "required": [
102 "urls"
103 ]
104}
main.js
1const _ = require('underscore');
2const Apify = require('apify');
3
4Apify.main(async () => {
5 const input = await Apify.getInput();
6
7 const requestList = await Apify.openRequestList('my-urls', input.urls);
8 const keyValueStore = await Apify.openKeyValueStore();
9
10 const proxyConfiguration = await Apify.createProxyConfiguration()
11
12 const crawler = new Apify.BasicCrawler({
13 requestList,
14 handleRequestTimeoutSecs: 120,
15 maxRequestRetries: input.pageMaxRetryCount,
16
17 handleRequestFunction: async ({ request }) => {
18
19 // BEFORE PAGE IS NAVIGATED TO
20 // Create browser instance with or without userAgent or proxy set.
21 const browser = await Apify.launchPuppeteer({
22 proxyUrl: proxyConfiguration.newUrl(),
23 useChrome: true,
24 stealth: true,
25 });
26 const page = await browser.newPage();
27
28 if (input.viewportWidth || input.viewportHeight) {
29 log(request, `Setting page viewport to ${input.viewportWidth}x${input.viewportHeight}`);
30 await page.setViewport({
31 width: input.viewportWidth,
32 height: input.viewportHeight
33 });
34 }
35
36 log(request, 'Loading page');
37 const response = await page.goto(request.url, {
38 timeout: input.pageLoadTimeoutSecs * 1000,
39 });
40 request.response = {
41 status: response.status(),
42 headers: response.headers(),
43 };
44
45 // Wait (if requested)
46 if (input.delaySecs > 0) {
47 await new Promise(resolve => setTimeout(resolve, input.delaySecs * 1000));
48 }
49
50 log(request, `Taking screenshot`);
51 const buffer = await page.screenshot({
52 type: input.imageType,
53 fullPage: true,
54 });
55
56 // Save screenshot to key-value store
57 const key = `screenshot-${Math.floor(Math.random()*0xFFFFFFFF).toString(16)}.${input.imageType}`;
58 await keyValueStore.setValue(key, buffer, {
59 contentType: `image/${input.imageType}`,
60 });
61
62 // Save record to dataset
63 await Apify.pushData({
64 request: _.pick(request, 'url', 'method', 'payload', 'userData'),
65 response: {
66 status: response.status(),
67 headers: response.headers(),
68 },
69 finishedAt: new Date(),
70 screenshot: {
71 url: keyValueStore.getPublicUrl(key),
72 size: buffer.length,
73 },
74 });
75
76 log(request, 'Handling of page succeeded');
77
78 try {
79 await browser.close();
80 } catch (e) {
81 // This one can be ignored
82 console.error(e);
83 }
84 },
85
86 // This function is called if the page processing failed more than maxRequestRetries+1 times.
87 handleFailedRequestFunction: async ({ request }) => {
88 log(request, 'Handling of page failed');
89 await Apify.pushData({
90 request: _.pick(request, 'url', 'method', 'payload', 'userData'),
91 response: request.response || null,
92 finishedAt: new Date(),
93 errorMessages: request.errorMessages,
94 });
95 },
96 });
97
98 // Run the crawler and wait for it to finish.
99 await crawler.run();
100
101 console.log('Crawler finished.');
102});
103
104function log(request, message) {
105 console.log(`[${request.url}] ${message}`);
106}
package.json
1{
2 "name": "my-actor",
3 "version": "0.0.1",
4 "dependencies": {
5 "apify": "^2.0.0",
6 "puppeteer": "*"
7 },
8 "scripts": {
9 "start": "node main.js"
10 },
11 "author": "Me!"
12}
Developer
Maintained by Community
Actor Metrics
32 monthly users
-
12 stars
>99% runs succeeded
Created in Jul 2019
Modified 3 years ago
Categories