1const Apify = require('apify');
2const PromisePool = require('es6-promise-pool');
3
4
5const CONCURRENCY = 5;
6
7
8const URLS = [
9 'http://example.com',
10 'http://news.ycombinator.com',
11 'https://news.ycombinator.com/news?p=2',
12 'https://news.ycombinator.com/news?p=3',
13 'https://news.ycombinator.com/news?p=4',
14 'https://news.ycombinator.com/news?p=5',
15 'https://www.reddit.com/',
16];
17
18let browser;
19let results = [];
20
21
22
23const crawlUrl = async (url) => {
24 const page = await browser.newPage();
25
26 console.log(`Opening ${url}`);
27 await page.goto(url);
28
29 console.log(`Evaluating ${url}`);
30 const result = await page.evaluate(() => {
31 return {
32 title: document.title,
33 url: window.location.href,
34 };
35 });
36
37 results.push(result);
38
39 console.log(`Closing ${url}`);
40 await page.close();
41};
42
43
44
45const promiseProducer = () => {
46 const url = URLS.pop();
47
48 return url ? crawlUrl(url) : null;
49};
50
51Apify.main(async () => {
52
53 browser = await Apify.launchPuppeteer();
54
55
56 const pool = new PromisePool(promiseProducer, CONCURRENCY);
57 await pool.start();
58
59
60 console.log('Results:');
61 console.log(JSON.stringify(results, null, 2));
62
63 await Apify.setValue('OUTPUT', results);
64 await browser.close();
65});