
CSS Selector
Deprecated
Pricing
Pay per usage
Go to Store

CSS Selector
Deprecated
Scrape a Webpage using CSS Selector
0.0 (0)
Pricing
Pay per usage
1
Total users
35
Monthly users
4
Last modified
2 years ago
.actor/actor.json
{ "actorSpecification": 1, "name": "CSS scraper", "title": "CSS scraper", "description": "Scrape webpage using CSS selector.", "version": "0.0.1", "storages": { "dataset": { "actorSpecification": 1, "title": "CSS scraper", "views": { "overview": { "title": "JSON", "transformation": { "fields": [ "JSON" ] }, "display": { "component": "table", "columns": [ { "label": "JSON", "format": "text", "field": "JSON" } ] } } } } }}
src/main.js
1import { Actor } from 'apify'2import { JSDOM } from 'jsdom'3
4await Actor.init()5
6console.log('Loading input')7
8// Structure of input is defined in INPUT_SCHEMA.json.9const input = await Actor.getInput()10const {url, css} = input11
12console.log('URL:', url)13console.log('CSS:', css)14
15// load dom, root = parent of window16var root = await JSDOM.fromURL( url )17
18// select from body19var items = root.window.document.body.querySelectorAll( css )20
21// get attributes22var res = Array.prototype.map.call( items, item => Object.fromEntries( Object.values( item.attributes ).map(a => [a.name, a.value] ) ) )23console.log( res.length, 'results' )24
25// Structure of output is defined in .actor/actor.json26await Actor.pushData( res.map(e => ({JSON: JSON.stringify(e)}) ) )27
28await Actor.exit()
.dockerignore
# configurations.idea
# crawlee and apify storage foldersapify_storagecrawlee_storagestorage
# installed filesnode_modules
Dockerfile
# Specify the base Docker image. You can read more about# the available images at https://sdk.apify.com/docs/guides/docker-images# You can also use any other image from Docker Hub.FROM apify/actor-node:16
# Copy just package.json and package-lock.json# to speed up the build using Docker layer cache.COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --omit=dev --omit=optional \ && echo "Installed NPM packages:" \ && (npm list --omit=dev --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version
# Next, copy the remaining files and directories with the source code.# Since we do this after NPM install, quick build will be really fast# for most source file changes.COPY . ./
# Run the image.CMD npm start --silent
INPUT_SCHEMA.json
{ "title": "Add two integers", "type": "object", "schemaVersion": 1, "properties": { "url": { "title": "Url", "type": "string", "description": "", "editor": "textfield", "prefill":"https://www.amazon.com/s?k=cat" }, "css": { "title": "CSS", "type": "string", "description": "CSS Selector", "editor": "textfield", "prefill":"div[data-asin][data-index]" } }, "required": ["url","css"]}
package.json
{ "name": "getting-started-node", "version": "0.0.1", "type": "module", "description": "This is an example of an Apify actor.", "dependencies": { "apify": "^3.0.0", "jsdom": "^20.0.0" }, "devDependencies": {}, "scripts": { "start": "node src/main.js", "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1" }, "author": "It's not you it's me", "license": "ISC"}