
Example Puppeteer Promise Pool
Pricing
Pay per usage
Go to Store

Example Puppeteer Promise Pool
Example how to use Puppeteer in parallel using 'es6-promise-pool' npm package.
0.0 (0)
Pricing
Pay per usage
3
Total users
18
Monthly users
1
Runs succeeded
>99%
Last modified
2 years ago
Dockerfile
# This is a template for a Dockerfile used to run acts in Actor system.# The base image name below is set during the act build, based on user settings.# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/userFROM apify/actor-node-puppeteer:beta
# Second, copy just package.json and package-lock.json since it should be# the only file that affects "npm install" in the next step, to speed up the buildCOPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --only=prod --no-optional \ && echo "Installed NPM packages:" \ && (npm list --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version
# Copy source code to container# Do this in the last step, to have fast build if only the source code changedCOPY . ./
# NOTE: The CMD is already defined by the base image.# Uncomment this for local node inspector debugging:# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]
package.json
{ "name": "apify-project", "version": "0.0.1", "description": "", "author": "It's not you it's me", "license": "ISC", "dependencies": { "apify": "latest", "es6-promise-pool": "latest" }, "scripts": { "start": "node main.js" }}
main.js
1const Apify = require('apify');2const PromisePool = require('es6-promise-pool');3
4// How may urls we want to process in parallel.5const CONCURRENCY = 5;6
7// Urls to process.8const URLS = [9 'http://example.com',10 'http://news.ycombinator.com',11 'https://news.ycombinator.com/news?p=2',12 'https://news.ycombinator.com/news?p=3',13 'https://news.ycombinator.com/news?p=4',14 'https://news.ycombinator.com/news?p=5',15 'https://www.reddit.com/',16];17
18let browser;19let results = [];20
21// This function returns promise that gets resolved once Puppeteer22// opens url, evaluates content and closes it.23const crawlUrl = async (url) => {24 const page = await browser.newPage();25 26 console.log(`Opening ${url}`);27 await page.goto(url);28 29 console.log(`Evaluating ${url}`);30 const result = await page.evaluate(() => {31 return {32 title: document.title,33 url: window.location.href,34 };35 });36 37 results.push(result);38 39 console.log(`Closing ${url}`);40 await page.close();41};42
43// Every time it's called takes one url from URLS constant and returns 44// crawlUrl(url) promise. When URLS gets empty returns null.45const promiseProducer = () => {46 const url = URLS.pop();47 48 return url ? crawlUrl(url) : null;49};50
51Apify.main(async () => {52 // Starts browser.53 browser = await Apify.launchPuppeteer();54
55 // Runs thru all the urls in a pool of given concurrency.56 const pool = new PromisePool(promiseProducer, CONCURRENCY);57 await pool.start();58 59 // Print results.60 console.log('Results:');61 console.log(JSON.stringify(results, null, 2));62 63 await Apify.setValue('OUTPUT', results);64 await browser.close();65});