Prazdne Domy Scraper avatar

Prazdne Domy Scraper

Deprecated
Go to Store
This Actor is deprecated

This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?

See alternative Actors
Prazdne Domy Scraper

Prazdne Domy Scraper

lukaskrivka/prazdne-domy

Simple scraper for https://prazdnedomy.cz which gather old, valuable but not inhabited houses.

Dockerfile

1# Dockerfile contains instructions how to build a Docker image that
2# will contain all the code and configuration needed to run your actor.
3# For a full Dockerfile reference,
4# see https://docs.docker.com/engine/reference/builder/
5
6# First, specify the base Docker image. Apify provides the following
7# base images for your convenience:
8#  apify/actor-node-basic (Node.js 10 on Alpine Linux, small and fast)
9#  apify/actor-node-chrome (Node.js 10 + Chrome on Debian)
10#  apify/actor-node-chrome-xvfb (Node.js 10 + Chrome + Xvfb on Debian)
11# For more information, see https://apify.com/docs/actor#base-images
12# Note that you can use any other image from Docker Hub.
13FROM apify/actor-node-basic
14
15# Copy all files and directories with the source code
16COPY . ./
17
18# Install NPM packages, skip optional and development dependencies to
19# keep the image small. Avoid logging to much and print the dependency
20# tree for debugging
21RUN npm --quiet set progress=false \
22 && npm install --only=prod --no-optional \
23 && echo "Installed NPM packages:" \
24 && npm list \
25 && echo "Node.js version:" \
26 && node --version \
27 && echo "NPM version:" \
28 && npm --version
29
30# Specify how to run the source code
31CMD npm start

package.json

1{
2    "name": "my-actor",
3    "version": "0.0.1",
4    "dependencies": {
5        "apify": "^0.13.7"
6    },
7    "scripts": {
8        "start": "node main.js"
9    },
10    "author": "Me!"
11}

INPUT_SCHEMA.json

1{
2    "title": "My input schema",
3    "type": "object",
4    "schemaVersion": 1,
5    "properties": {
6        "myField": {
7            "title": "My input field",
8            "type": "string",
9            "nullable": false,
10            "description": "This is a customizable description providing help to the users of your actor.",
11            "editor": "textarea"
12        }
13    }
14}

main.js

1// This is the main Node.js source code file of your actor.
2// It is referenced from the "scripts" section of the package.json file.
3
4const Apify = require('apify');
5
6Apify.main(async () => {
7    // Get input of the actor. Input fields can be modified in INPUT_SCHEMA.json file.
8    // For more information, see https://apify.com/docs/actor/input-schema
9    const input = await Apify.getInput();
10    console.log('Input:');
11    console.dir(input);
12
13    // Here you can prepare your input for actor apify/cheerio-scraper this input is based on a actor
14    // task you used as the starting point.
15    const metamorphInput = {
16        "startUrls": [
17          {
18            "url": "https://prazdnedomy.cz/domy/objekty/?paginator-page=1",
19            "method": "GET"
20          }
21        ],
22        "useRequestQueue": true,
23        "pseudoUrls": [
24          {
25            "purl": "https://prazdnedomy.cz/domy/objekty/?paginator-page=[\\d+]",
26            "method": "GET"
27          }
28        ],
29        "linkSelector": "a",
30        "pageFunction": "async function pageFunction(context) {\n    const { request, $ } = context;\n    let result = [];\n    $('.estates-list .estate').each(function(i) {\n        let typ = null;\n        let stav = null;\n        let gps = null;\n        let adresa = null;\n\n        $(this).find('.icons .icon').each(function() {\n            const maybeHtml = $(this).attr('title')\n            if (!maybeHtml) return\n            const maybeTyp = maybeHtml.match(/<td>Typ: <\\/td><td>(.+?)<\\/td>/)\n            if (maybeTyp) {\n                typ = maybeTyp[1]\n            }\n            const maybeStav = maybeHtml.match(/<td>Stav: <\\/td><td>(.+)<\\/td>/)\n            if (maybeStav) {\n                stav = maybeStav[1]\n                return\n            }\n            \n            const maybeGps = maybeHtml.match(/\\d+°.+''/)\n            if (maybeGps) {\n                gps = maybeGps[0]\n                adresa = maybeHtml.replace(gps, '')\n            }\n            \n        })\n\n        result.push({\n            title: $(this).find('.content .title').text().trim(),\n            url: 'https://prazdnedomy.cz' + $(this).find('a').attr('href'),\n            typ,\n            stav,\n            gps,\n            adresa,\n        })\n    })\n    return result;\n}",
31        "proxyConfiguration": {
32          "useApifyProxy": false
33        },
34        "debugLog": false,
35        "ignoreSslErrors": false,
36        "useCookieJar": false
37      };
38
39    // Now let's metamorph into actor apify/cheerio-scraper using the created input.
40    await Apify.metamorph('apify/cheerio-scraper', metamorphInput);
41});
Developer
Maintained by Community
Categories