1
2
3
4const Apify = require('apify');
5
6Apify.main(async () => {
7
8
9 const input = await Apify.getInput();
10 console.log('Input:');
11 console.dir(input);
12
13
14
15 const metamorphInput = {
16 "startUrls": [
17 {
18 "url": `https://www.economist.com/${input.category}/?page=1`,
19 "method": "GET"
20 }
21 ],
22 "useRequestQueue": true,
23 "pseudoUrls": [
24 {
25 "purl": `https://www.economist.com/${input.category}/?page=[\\d+]`,
26 "method": "GET"
27 }
28 ],
29 "linkSelector": "a",
30 "pageFunction": async function pageFunction(context) {
31
32
33 const request = context.request;
34 const $ = context.jQuery;
35 const pageNum = parseInt(request.url.split('?page=').pop());
36
37 context.log.info(`Scraping ${context.request.url}`);
38
39
40 const articles = [];
41 $('article').each((index, articleEl) => {
42 const $articleEl = $(articleEl);
43
44
45 const $h3El = $articleEl.find('h3');
46
47
48 articles.push({
49 pageNum,
50 topic: $h3El.children().first().text(),
51 title: $h3El.children().last().text(),
52 url: $articleEl.find('a')[0].href,
53 teaser: $articleEl.find('.teaser__text').text(),
54 });
55 });
56
57
58 return articles;
59 },
60 "proxyConfiguration": {
61 "useApifyProxy": true
62 },
63 "debugLog": false,
64 "browserLog": false,
65 "injectJQuery": true,
66 "injectUnderscore": false,
67 "downloadMedia": false,
68 "downloadCss": false,
69 "ignoreSslErrors": false
70 };
71
72
73 await Apify.metamorph('apify/web-scraper', metamorphInput);
74});