
Vdab-be jobs
Deprecated
Pricing
Pay per usage
Go to Store

Vdab-be jobs
Deprecated
Pick up any job categories from Belgium national job board https://www.vdab.be/jobs then choose sorting or filtering if you want and get list results for around 1 cent per 100 jobs!
0.0 (0)
Pricing
Pay per usage
2
Total users
6
Monthly users
1
Last modified
3 years ago
.actor/actor.json
{ "actorSpecification": 1, "name": "default-output", "title": "Live output", "description": "Auto format for dataset items", "version": "0.0.1", "storages": { "dataset": { "actorSpecification": 1, "title": "", "description": "", "views": {} } }}
src/main.js
1import { Actor } from 'apify';2import { KeyValueStore, CheerioCrawler } from 'crawlee';3import { handleSearch } from './routes.js';4
5await Actor.init();6
7const input = await KeyValueStore.getInput();8const {9 startUrls = [],10 proxyConfiguration = {11 useApifyProxy: true,12 },13} = input;14
15const proxyConfig = await Actor.createProxyConfiguration(proxyConfiguration);16
17const crawler = new CheerioCrawler({18 proxyConfiguration: proxyConfig,19 async requestHandler(context) {20 return handleSearch(context, input);21 }22});23
24await crawler.run(startUrls);25
26// Exit successfully27await Actor.exit();
src/routes.js
1import { Dataset, log } from 'crawlee';2
3export const handleSearch = async (context, input) => {4 const { request, $, crawler } = context;5 const { url, userData } = request;6 const { page = 1 } = userData;7 const { resultsLimit = 0 } = input;8
9 const items = Array.from($('.result-item')).map((x) => {10 const source_url = $('a', x).attr('href');11 const job_title = $('.result-title', x).text().trim();12 const loc = Array.from($('.location-span strong', x)).map((a) => $(a).text().trim());13 const advertiser_name = loc?.[0];14 const advertiser_location = loc?.[1];15 const full_text = $('p.job-description', x).text().trim();16 // const = $('', x).text().trim();17 return {18 source_url,19 job_title,20 advertiser_name,21 advertiser_location,22 full_text,23 searchUrl: url24 };25 });26
27 if (!items?.length) {28 log.info(`[NO-DATA]: no jobs at ${url}`);29 return;30 }31
32 const itemsCounter = (page - 1) * 10;33 const resultsCounter = itemsCounter + items.length;34
35 await Dataset.pushData(items.slice(0, resultsLimit && resultsCounter > resultsLimit ? resultsLimit - itemsCounter : undefined));36
37 const counter = parseInt($('span.amount-of-jobs.desktop-block > strong').text().replace('.', '')) || 0;38
39 if (!(resultsCounter >= counter) && resultsLimit && !(resultsCounter >= resultsLimit)) {40 const pagedUrl = new URL(url);41 pagedUrl.searchParams.set('pageNumber', page + 1);42 await crawler.requestQueue.addRequest({43 url: pagedUrl.toString(),44 userData: {45 page: page + 146 }47 });48 } else {49 log.info(`[DONE]: ${resultsCounter} job(s) out of ${counter} at ${url}`);50 }51 }
.dockerignore
# configurations.idea
# crawlee and apify storage foldersapify_storagecrawlee_storagestorage
# installed filesnode_modules
# git folder.git
.editorconfig
root = true
[*]indent_style = spaceindent_size = 4charset = utf-8trim_trailing_whitespace = trueinsert_final_newline = trueend_of_line = lf
.eslintrc
{ "extends": "@apify", "root": true}
.gitignore
# This file tells Git which files shouldn't be added to source control
.ideadistnode_modulesapify_storagestorage
Dockerfile
# Specify the base Docker image. You can read more about# the available images at https://sdk.apify.com/docs/guides/docker-images# You can also use any other image from Docker Hub.FROM apify/actor-node:16
# Copy just package.json and package-lock.json# to speed up the build using Docker layer cache.COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --omit=dev --omit=optional \ && echo "Installed NPM packages:" \ && (npm list --omit=dev --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version \ && rm -r ~/.npm
# Next, copy the remaining files and directories with the source code.# Since we do this after NPM install, quick build will be really fast# for most source file changes.COPY . ./
# Run the image.CMD npm start --silent
INPUT_SCHEMA.json
{ "title": "Input schema", "type": "object", "schemaVersion": 1, "properties": { "startUrls": { "title": "Add Vdab.be job categories URLs you want to scrape ", "type": "array", "description": "Add one or more job categories with optional sorting and filtering", "editor": "stringList", "placeholderValue": "URL", "prefill": ["https://www.vdab.be/vindeenjob/jobs/financieel?sort=1"], "patternValue": "https:\\/\\/(www\\.)?vdab\\.be\\/.+", "uniqueItems": true }, "resultsLimit": { "title": "Max results", "type": "integer", "description": "How many jobs you want to scrape from each category URL", "editor": "number", "unit": "per page", "default": 100 }, "proxyConfiguration": { "title": "Proxy configuration", "type": "object", "description": "A proxy server is required to run this actor!", "prefill": { "useApifyProxy": true }, "editor": "proxy", "sectionCaption": "Proxy configuration", "sectionDescription": "Select your proxy here." } }}
package.json
{ "name": "crawlee-cheerio-javascript", "version": "0.0.1", "type": "module", "description": "This is a boilerplate of an Apify actor.", "engines": { "node": ">=16.0.0" }, "dependencies": { "apify": "^3.0.0", "crawlee": "^3.0.0" }, "devDependencies": { "@apify/eslint-config": "^0.3.1", "eslint": "^8.20.0" }, "scripts": { "start": "node src/main.js", "lint": "eslint ./src --ext .js,.jsx", "lint:fix": "eslint ./src --ext .js,.jsx --fix", "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1" }, "author": "It's not you it's me", "license": "ISC"}