Actor picture

Example Puppeteer Promise Pool

mtrunkat/puppeteer-promise-pool-example

Example how to use Puppeteer in parallel using 'es6-promise-pool' npm package.

No credit card required

Author's avatarMarek Trunkát
  • Modified
  • Users14
  • Runs35
Actor picture

Example Puppeteer Promise Pool

Based on the apify/actor-node-puppeteer:beta Docker image (see docs).

const Apify = require('apify');
const PromisePool = require('es6-promise-pool');

// How may urls we want to process in parallel.
const CONCURRENCY = 5;

// Urls to process.
const URLS = [
    'http://example.com',
    'http://news.ycombinator.com',
    'https://news.ycombinator.com/news?p=2',
    'https://news.ycombinator.com/news?p=3',
    'https://news.ycombinator.com/news?p=4',
    'https://news.ycombinator.com/news?p=5',
    'https://www.reddit.com/',
];

let browser;
let results = [];

// This function returns promise that gets resolved once Puppeteer
// opens url, evaluates content and closes it.
const crawlUrl = async (url) => {
    const page = await browser.newPage();
        
    console.log(`Opening ${url}`);
    await page.goto(url);
        
    console.log(`Evaluating ${url}`);
    const result = await page.evaluate(() => {
        return {
            title: document.title,
            url: window.location.href,
        };
    });
        
    results.push(result);
        
    console.log(`Closing ${url}`);
    await page.close();
};

// Every time it's called takes one url from URLS constant and returns 
// crawlUrl(url) promise. When URLS gets empty returns null.
const promiseProducer = () => {
    const url = URLS.pop();
    
    return url ? crawlUrl(url) : null;
};

Apify.main(async () => {
    // Starts browser.
    browser = await Apify.launchPuppeteer();

    // Runs thru all the urls in a pool of given concurrency.
    const pool = new PromisePool(promiseProducer, CONCURRENCY);
    await pool.start();
    
    // Print results.
    console.log('Results:');
    console.log(JSON.stringify(results, null, 2));
    
    await Apify.setValue('OUTPUT', results);
    await browser.close();
});