My Actor

No credit card required

This Actor is under maintenance.

This actor is under maintenance and it may unreliable.

My Actor

My Actor

flamboyant_marimba/my-actor

No credit card required

Testing

.actor/Dockerfile

1# Specify the base Docker image. You can read more about 2# the available images at https://docs.apify.com/sdk/js/docs/guides/docker-images 3# You can also use any other image from Docker Hub. 4FROM apify/actor-node:16 5 6# Copy just package.json and package-lock.json 7# to speed up the build using Docker layer cache. 8COPY package*.json ./ 9 10# Install NPM packages, skip optional and development dependencies to 11# keep the image small. Avoid logging too much and print the dependency 12# tree for debugging 13RUN npm --quiet set progress=false \ 14 && npm install --omit=dev --omit=optional \ 15 && echo "Installed NPM packages:" \ 16 && (npm list --omit=dev --all || true) \ 17 && echo "Node.js version:" \ 18 && node --version \ 19 && echo "NPM version:" \ 20 && npm --version \ 21 && rm -r ~/.npm 22 23# Next, copy the remaining files and directories with the source code. 24# Since we do this after NPM install, quick build will be really fast 25# for most source file changes. 26COPY . ./ 27 28 29# Run the image. 30CMD npm start --silent 31

.actor/actor.json

1{ 2 "actorSpecification": 1, 3 "name": "project-cheerio-crawler-javascript", 4 "title": "Project Cheerio Crawler Javascript", 5 "description": "Crawlee and Cheerio project in javascript.", 6 "version": "0.0", 7 "meta": { 8 "templateId": "js-crawlee-cheerio" 9 }, 10 "input": "./input_schema.json", 11 "dockerfile": "./Dockerfile" 12} 13

.actor/input_schema.json

1{ 2 "title": "CheerioCrawler Template", 3 "type": "object", 4 "schemaVersion": 1, 5 "properties": { 6 "startUrls": { 7 "title": "Start URLs", 8 "type": "array", 9 "description": "URLs to start with.", 10 "editor": "requestListSources", 11 "prefill": [ 12 { 13 "url": "https://apify.com" 14 } 15 ] 16 } 17 } 18} 19

src/main.js

1/** 2 * This template is a production ready boilerplate for developing with `CheerioCrawler`. 3 * Use this to bootstrap your projects using the most up-to-date code. 4 * If you're looking for examples or want to learn more, see README. 5 */ 6 7// For more information, see https://docs.apify.com/sdk/js 8import { Actor } from 'apify'; 9// For more information, see https://crawlee.dev 10import { CheerioCrawler } from 'crawlee'; 11import { router } from './routes.js'; 12 13// Initialize the Apify SDK 14await Actor.init(); 15 16const startUrls = ['https://api.apify.com/v2/acts/flamboyant_marimba~my-actor/runs?token=apify_api_r0Xcytmdv3sTxxWfOJMaF9p6c6YVGY0OLBgv']; 17 18const proxyConfiguration = await Actor.createProxyConfiguration(); 19 20const crawler = new CheerioCrawler({ 21 proxyConfiguration, 22 requestHandler: router, 23}); 24 25await crawler.run(startUrls); 26 27// Exit successfully 28await Actor.exit(); 29

src/routes.js

1import { Dataset, createCheerioRouter } from 'crawlee'; 2 3export const router = createCheerioRouter(); 4 5router.addDefaultHandler(async ({ enqueueLinks, log }) => { 6 log.info(`enqueueing new URLs`); 7 await enqueueLinks({ 8 globs: ['https://apify.com/*'], 9 label: 'detail', 10 }); 11}); 12 13router.addHandler('detail', async ({ request, $, log }) => { 14 const title = $('title').text(); 15 log.info(`${title}`, { url: request.loadedUrl }); 16 17 await Dataset.pushData({ 18 url: request.loadedUrl, 19 title, 20 }); 21}); 22

.dockerignore

1# configurations 2.idea 3 4# crawlee and apify storage folders 5apify_storage 6crawlee_storage 7storage 8 9# installed files 10node_modules 11 12# git folder 13.git 14

.editorconfig

1root = true 2 3[*] 4indent_style = space 5indent_size = 4 6charset = utf-8 7trim_trailing_whitespace = true 8insert_final_newline = true 9end_of_line = lf 10

.eslintrc

1{ 2 "extends": "@apify", 3 "root": true 4} 5

.gitignore

1# This file tells Git which files shouldn't be added to source control 2 3.DS_Store 4.idea 5dist 6node_modules 7apify_storage 8storage 9

package.json

1{ 2 "name": "crawlee-cheerio-javascript", 3 "version": "0.0.1", 4 "type": "module", 5 "description": "This is a boilerplate of an Apify actor.", 6 "engines": { 7 "node": ">=16.0.0" 8 }, 9 "dependencies": { 10 "apify": "^3.0.0", 11 "crawlee": "^3.0.0" 12 }, 13 "devDependencies": { 14 "@apify/eslint-config": "^0.3.1", 15 "eslint": "^8.36.0" 16 }, 17 "scripts": { 18 "start": "node src/main.js", 19 "lint": "eslint ./src --ext .js,.jsx", 20 "lint:fix": "eslint ./src --ext .js,.jsx --fix", 21 "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1" 22 }, 23 "author": "It's not you it's me", 24 "license": "ISC" 25} 26
Developer
Maintained by Community
Actor stats
  • 2 users
  • 223 runs
  • Modified 7 months ago
Categories

You might also like these Actors