
Metascraper
Deprecated
Pricing
Pay per usage
Go to Store

Metascraper
Deprecated
Simple actor that loads webpage and scrapes metadata using Metascraper library. Metadata – A library to easily scrape metadata from an article on the web using Open Graph, JSON+LD, regular HTML metadata, and series of fallbacks. https://metascraper.js.org
0.0 (0)
Pricing
Pay per usage
2
Total users
149
Monthly users
1
Runs succeeded
>99%
Last modified
3 years ago
Dockerfile
# This is a template for a Dockerfile used to run acts in Actor system.# The base image name below is set during the act build, based on user settings.# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/userFROM apify/actor-node-basic:v0.21.10
# Second, copy just package.json and package-lock.json since it should be# the only file that affects "npm install" in the next step, to speed up the buildCOPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --only=prod --no-optional \ && echo "Installed NPM packages:" \ && (npm list --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version
# Copy source code to container# Do this in the last step, to have fast build if only the source code changedCOPY . ./
# NOTE: The CMD is already defined by the base image.# Uncomment this for local node inspector debugging:# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]
package.json
{ "name": "apify-project", "version": "0.0.1", "description": "", "author": "It's not you it's me", "license": "ISC", "dependencies": { "apify": "0.21.10", "request-promise": "latest", "metascraper": "latest", "metascraper-author": "latest", "metascraper-date": "latest", "metascraper-description": "latest", "metascraper-image": "latest", "metascraper-video": "latest", "metascraper-youtube": "latest", "metascraper-logo": "latest", "metascraper-clearbit": "latest", "metascraper-publisher": "latest", "metascraper-title": "latest", "metascraper-url": "latest" }, "scripts": { "start": "node main.js" }}
main.js
1const Apify = require('apify');2const request = require('request-promise');3
4const metascraper = require('metascraper')([5 require('metascraper-author')(),6 require('metascraper-date')(),7 require('metascraper-description')(),8 require('metascraper-image')(),9 require('metascraper-video')(),10 require('metascraper-youtube')(),11 require('metascraper-logo')(),12 require('metascraper-clearbit')(),13 require('metascraper-publisher')(),14 require('metascraper-title')(),15 require('metascraper-url')()16])17
18
19Apify.main(async () => {20 // Get input of your actor21 const input = await Apify.getInput();22
23 if (!input || !input.url) throw new Error('Invalid input, must be a JSON object with the "url" field!');24
25 const html = await request(input.url);26 const metadata = await metascraper({ html: html, url: input.url })27
28 console.dir(metadata)29 30 await Apify.setValue('OUTPUT', JSON.stringify(metadata));31});