Prazdne Domy Scraper
Go to Store
This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?
See alternative ActorsPrazdne Domy Scraper
lukaskrivka/prazdne-domy
Simple scraper for https://prazdnedomy.cz which gather old, valuable but not inhabited houses.
Dockerfile
1# Dockerfile contains instructions how to build a Docker image that
2# will contain all the code and configuration needed to run your actor.
3# For a full Dockerfile reference,
4# see https://docs.docker.com/engine/reference/builder/
5
6# First, specify the base Docker image. Apify provides the following
7# base images for your convenience:
8# apify/actor-node-basic (Node.js 10 on Alpine Linux, small and fast)
9# apify/actor-node-chrome (Node.js 10 + Chrome on Debian)
10# apify/actor-node-chrome-xvfb (Node.js 10 + Chrome + Xvfb on Debian)
11# For more information, see https://apify.com/docs/actor#base-images
12# Note that you can use any other image from Docker Hub.
13FROM apify/actor-node-basic
14
15# Copy all files and directories with the source code
16COPY . ./
17
18# Install NPM packages, skip optional and development dependencies to
19# keep the image small. Avoid logging to much and print the dependency
20# tree for debugging
21RUN npm --quiet set progress=false \
22 && npm install --only=prod --no-optional \
23 && echo "Installed NPM packages:" \
24 && npm list \
25 && echo "Node.js version:" \
26 && node --version \
27 && echo "NPM version:" \
28 && npm --version
29
30# Specify how to run the source code
31CMD npm start
package.json
1{
2 "name": "my-actor",
3 "version": "0.0.1",
4 "dependencies": {
5 "apify": "^0.13.7"
6 },
7 "scripts": {
8 "start": "node main.js"
9 },
10 "author": "Me!"
11}
INPUT_SCHEMA.json
1{
2 "title": "My input schema",
3 "type": "object",
4 "schemaVersion": 1,
5 "properties": {
6 "myField": {
7 "title": "My input field",
8 "type": "string",
9 "nullable": false,
10 "description": "This is a customizable description providing help to the users of your actor.",
11 "editor": "textarea"
12 }
13 }
14}
main.js
1// This is the main Node.js source code file of your actor.
2// It is referenced from the "scripts" section of the package.json file.
3
4const Apify = require('apify');
5
6Apify.main(async () => {
7 // Get input of the actor. Input fields can be modified in INPUT_SCHEMA.json file.
8 // For more information, see https://apify.com/docs/actor/input-schema
9 const input = await Apify.getInput();
10 console.log('Input:');
11 console.dir(input);
12
13 // Here you can prepare your input for actor apify/cheerio-scraper this input is based on a actor
14 // task you used as the starting point.
15 const metamorphInput = {
16 "startUrls": [
17 {
18 "url": "https://prazdnedomy.cz/domy/objekty/?paginator-page=1",
19 "method": "GET"
20 }
21 ],
22 "useRequestQueue": true,
23 "pseudoUrls": [
24 {
25 "purl": "https://prazdnedomy.cz/domy/objekty/?paginator-page=[\\d+]",
26 "method": "GET"
27 }
28 ],
29 "linkSelector": "a",
30 "pageFunction": "async function pageFunction(context) {\n const { request, $ } = context;\n let result = [];\n $('.estates-list .estate').each(function(i) {\n let typ = null;\n let stav = null;\n let gps = null;\n let adresa = null;\n\n $(this).find('.icons .icon').each(function() {\n const maybeHtml = $(this).attr('title')\n if (!maybeHtml) return\n const maybeTyp = maybeHtml.match(/<td>Typ: <\\/td><td>(.+?)<\\/td>/)\n if (maybeTyp) {\n typ = maybeTyp[1]\n }\n const maybeStav = maybeHtml.match(/<td>Stav: <\\/td><td>(.+)<\\/td>/)\n if (maybeStav) {\n stav = maybeStav[1]\n return\n }\n \n const maybeGps = maybeHtml.match(/\\d+°.+''/)\n if (maybeGps) {\n gps = maybeGps[0]\n adresa = maybeHtml.replace(gps, '')\n }\n \n })\n\n result.push({\n title: $(this).find('.content .title').text().trim(),\n url: 'https://prazdnedomy.cz' + $(this).find('a').attr('href'),\n typ,\n stav,\n gps,\n adresa,\n })\n })\n return result;\n}",
31 "proxyConfiguration": {
32 "useApifyProxy": false
33 },
34 "debugLog": false,
35 "ignoreSslErrors": false,
36 "useCookieJar": false
37 };
38
39 // Now let's metamorph into actor apify/cheerio-scraper using the created input.
40 await Apify.metamorph('apify/cheerio-scraper', metamorphInput);
41});
Developer
Maintained by Community
Categories