Prazdne Domy Scraper
Deprecated
Pricing
Pay per usage
Go to Store
Prazdne Domy Scraper
Deprecated
Simple scraper for https://prazdnedomy.cz which gather old, valuable but not inhabited houses.
0.0 (0)
Pricing
Pay per usage
1
Total users
10
Monthly users
1
Last modified
4 years ago
Dockerfile
# Dockerfile contains instructions how to build a Docker image that# will contain all the code and configuration needed to run your actor.# For a full Dockerfile reference,# see https://docs.docker.com/engine/reference/builder/
# First, specify the base Docker image. Apify provides the following# base images for your convenience:# apify/actor-node-basic (Node.js 10 on Alpine Linux, small and fast)# apify/actor-node-chrome (Node.js 10 + Chrome on Debian)# apify/actor-node-chrome-xvfb (Node.js 10 + Chrome + Xvfb on Debian)# For more information, see https://apify.com/docs/actor#base-images# Note that you can use any other image from Docker Hub.FROM apify/actor-node-basic
# Copy all files and directories with the source codeCOPY . ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging to much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --only=prod --no-optional \ && echo "Installed NPM packages:" \ && npm list \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version
# Specify how to run the source codeCMD npm start
package.json
{ "name": "my-actor", "version": "0.0.1", "dependencies": { "apify": "^0.13.7" }, "scripts": { "start": "node main.js" }, "author": "Me!"}
INPUT_SCHEMA.json
{ "title": "My input schema", "type": "object", "schemaVersion": 1, "properties": { "myField": { "title": "My input field", "type": "string", "nullable": false, "description": "This is a customizable description providing help to the users of your actor.", "editor": "textarea" } }}
main.js
1// This is the main Node.js source code file of your actor.2// It is referenced from the "scripts" section of the package.json file.3
4const Apify = require('apify');5
6Apify.main(async () => {7 // Get input of the actor. Input fields can be modified in INPUT_SCHEMA.json file.8 // For more information, see https://apify.com/docs/actor/input-schema9 const input = await Apify.getInput();10 console.log('Input:');11 console.dir(input);12
13 // Here you can prepare your input for actor apify/cheerio-scraper this input is based on a actor14 // task you used as the starting point.15 const metamorphInput = {16 "startUrls": [17 {18 "url": "https://prazdnedomy.cz/domy/objekty/?paginator-page=1",19 "method": "GET"20 }21 ],22 "useRequestQueue": true,23 "pseudoUrls": [24 {25 "purl": "https://prazdnedomy.cz/domy/objekty/?paginator-page=[\\d+]",26 "method": "GET"27 }28 ],29 "linkSelector": "a",30 "pageFunction": "async function pageFunction(context) {\n const { request, $ } = context;\n let result = [];\n $('.estates-list .estate').each(function(i) {\n let typ = null;\n let stav = null;\n let gps = null;\n let adresa = null;\n\n $(this).find('.icons .icon').each(function() {\n const maybeHtml = $(this).attr('title')\n if (!maybeHtml) return\n const maybeTyp = maybeHtml.match(/<td>Typ: <\\/td><td>(.+?)<\\/td>/)\n if (maybeTyp) {\n typ = maybeTyp[1]\n }\n const maybeStav = maybeHtml.match(/<td>Stav: <\\/td><td>(.+)<\\/td>/)\n if (maybeStav) {\n stav = maybeStav[1]\n return\n }\n \n const maybeGps = maybeHtml.match(/\\d+°.+''/)\n if (maybeGps) {\n gps = maybeGps[0]\n adresa = maybeHtml.replace(gps, '')\n }\n \n })\n\n result.push({\n title: $(this).find('.content .title').text().trim(),\n url: 'https://prazdnedomy.cz' + $(this).find('a').attr('href'),\n typ,\n stav,\n gps,\n adresa,\n })\n })\n return result;\n}",31 "proxyConfiguration": {32 "useApifyProxy": false33 },34 "debugLog": false,35 "ignoreSslErrors": false,36 "useCookieJar": false37 };38
39 // Now let's metamorph into actor apify/cheerio-scraper using the created input.40 await Apify.metamorph('apify/cheerio-scraper', metamorphInput);41});