
Economist Category Scraper
Deprecated
Pricing
Pay per usage
Go to Store

Economist Category Scraper
Deprecated
Example implementation of economist.com scraper built using apify/web-scraper actor. Crawls latest updates from a given economist category.
0.0 (0)
Pricing
Pay per usage
2
Total users
18
Monthly users
2
Last modified
3 years ago
Dockerfile
# Dockerfile contains instructions how to build a Docker image that# will contain all the code and configuration needed to run your actor.# For a full Dockerfile reference,# see https://docs.docker.com/engine/reference/builder/
# First, specify the base Docker image. Apify provides the following# base images for your convenience:# apify/actor-node-basic (Node.js 10 on Alpine Linux, small and fast)# apify/actor-node-chrome (Node.js 10 + Chrome on Debian)# apify/actor-node-chrome-xvfb (Node.js 10 + Chrome + Xvfb on Debian)# For more information, see https://apify.com/docs/actor#base-images# Note that you can use any other image from Docker Hub.FROM apify/actor-node-basic
# Second, copy just package.json since it should be the only file# that affects NPM install in the next stepCOPY package.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --only=prod --no-optional \ && echo "Installed NPM packages:" \ && npm list \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version # Next, copy the remaining files and directories with the source code.# Since we do this after NPM install, quick build will be really fast# for most source file changes. COPY . ./
# Optionally, specify how to launch the source code of your actor.# By default, Apify's base Docker images define the CMD instruction# that runs the source code using the command specified# in the "scripts.start" section of the package.json file.# In short, the instruction looks something like this: # CMD npm start
INPUT_SCHEMA.json
{ "title": "My input schema", "type": "object", "schemaVersion": 1, "properties": { "category": { "title": "Category", "type": "string", "description": "Economist.com category to be scraped", "editor": "textfield", "prefill": "briefing" } }}
main.js
1// This is the main Node.js source code file of your actor.2// It is referenced from the "scripts" section of the package.json file.3
4const Apify = require('apify');5
6Apify.main(async () => {7 // Get input of the actor. Input fields can be modified in INPUT_SCHEMA.json file.8 // For more information, see https://apify.com/docs/actor/input-schema9 const input = await Apify.getInput();10 console.log('Input:');11 console.dir(input);12
13 // Here you can prepare your input for actor apify/web-scraper this input is based on a actor14 // task you used as the starting point.15 const metamorphInput = {16 "startUrls": [17 {18 "url": `https://www.economist.com/${input.category}/?page=1`,19 "method": "GET"20 }21 ],22 "useRequestQueue": true,23 "pseudoUrls": [24 {25 "purl": `https://www.economist.com/${input.category}/?page=[\\d+]`,26 "method": "GET"27 }28 ],29 "linkSelector": "a",30 "pageFunction": async function pageFunction(context) {31 // request is an instance of Apify.Request (https://sdk.apify.com/docs/api/request)32 // $ is an instance of jQuery (http://jquery.com/)33 const request = context.request;34 const $ = context.jQuery;35 const pageNum = parseInt(request.url.split('?page=').pop());36 37 context.log.info(`Scraping ${context.request.url}`);38 39 // Extract all articles.40 const articles = [];41 $('article').each((index, articleEl) => {42 const $articleEl = $(articleEl);43 44 // H3 contains 2 child elements where first one is topic and second is article title.45 const $h3El = $articleEl.find('h3');46 47 // Extract additonal info and push it to data object.48 articles.push({49 pageNum,50 topic: $h3El.children().first().text(),51 title: $h3El.children().last().text(),52 url: $articleEl.find('a')[0].href,53 teaser: $articleEl.find('.teaser__text').text(),54 });55 });56 57 // Return results.58 return articles;59 },60 "proxyConfiguration": {61 "useApifyProxy": true62 },63 "debugLog": false,64 "browserLog": false,65 "injectJQuery": true,66 "injectUnderscore": false,67 "downloadMedia": false,68 "downloadCss": false,69 "ignoreSslErrors": false70 };71
72 // Now let's metamorph into actor apify/web-scraper using the created input.73 await Apify.metamorph('apify/web-scraper', metamorphInput);74});
package.json
{ "name": "my-actor", "version": "0.0.1", "dependencies": { "apify": "^0.14.5" }, "scripts": { "start": "node main.js" }, "author": "Me!"}