BezRealitky Scraper avatar

BezRealitky Scraper

Deprecated
View all Actors
This Actor is deprecated

This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?

See alternative Actors
BezRealitky Scraper

BezRealitky Scraper

fcoudy/bezrealitkyscraper

Easy way to access reality market!

Dockerfile

1# First, specify the base Docker image. You can read more about
2# the available images at https://sdk.apify.com/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node:16
5
6# Second, copy just package.json and package-lock.json since those are the only
7# files that affect "npm install" in the next step, to speed up the build.
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --only=prod --no-optional \
15 && echo "Installed NPM packages:" \
16 && (npm list || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version
21
22# Next, copy the remaining files and directories with the source code.
23# Since we do this after NPM install, quick build will be really fast
24# for most source file changes.
25COPY . ./
26
27# Optionally, specify how to launch the source code of your actor.
28# By default, Apify's base Docker images define the CMD instruction
29# that runs the Node.js source code using the command specified
30# in the "scripts.start" section of the package.json file.
31# In short, the instruction looks something like this:
32#
33# CMD npm start

INPUT_SCHEMA.json

1{
2  "title": "Actor input schema",
3  "description": "This is actor input schema. Yes it is.",
4  "type": "object",
5  "schemaVersion": 1,
6  "properties": {
7    "Typ poměru": {
8      "title": "advertType",
9      "type": "string",
10      "description": "Zadej, zda se zajímáš o nákup, pronájem či spolubydlení",
11      "editor": "javascript",
12      "enum": [
13        "us",
14        "de",
15        "fr"
16      ],
17      "enumTitles": [
18        "USA",
19        "Germany",
20        "France"
21      ],
22      "sectionCaption": "Vybírejte mezi: \"prodej\", \"pronajem\", \"spolubydleni\"",
23      "prefill": "prodej",
24      "nullable": false
25    },
26    "Typ nemovitosti": {
27      "title": "Typ nemovitosti",
28      "type": "string",
29      "description": "Vyberte, zda máte zájem o byt, dům či jiné",
30     
31      "sectionCaption": "Zde si vybíráte, jaké nemovitosti se vám zobrazí",
32      "sectionDescription": "Můžete si vybrat mezi následujícími:  \"byt\", \"dum\", \"pozemek\", \"garaz\", \"kancelar\", \"nebytovy-prostor\" a  \"chata-chalupa\"",
33      "editor": "javascript",
34      "prefill": "byt",
35      "nullable": false
36    }
37  },
38  "required": [
39    "Typ poměru"
40  ]
41}

main.js

1const Apify = require('apify');
2
3function removeChars(string){
4    try{
5        var find = '\n';
6        var re = new RegExp(find, 'g');
7        string = string.replace(re, '');
8
9        string = string.trim();
10        return string;
11    }catch(error){
12        //pass
13    }
14}
15
16Apify.main(async () => {
17    var input = await Apify.getInput();
18    var input1 = input["advertTypeList"];
19    var input2 = input["propTypeList"];
20    var advertTypeList = [input1];
21    var propTypeList = [input2];   
22    console.log("multifile"); 
23
24    for(var advertType in advertTypeList)
25    {
26        for(var propType in propTypeList)
27        {
28            let pageNumber = 1;
29            var goOn = true;
30            while (goOn === true)
31            {
32                const requestQueue = await Apify.openRequestQueue();
33                var urlString = 'https://www.bezrealitky.cz/vypis/nabidka-'+advertTypeList[advertType]+'/'+propTypeList[propType]+'?page=' + String(pageNumber);
34                await requestQueue.addRequest({ 
35                    url: urlString,
36                    userData: {label: "START"} 
37                });
38
39                const proxyConfiguration = await Apify.createProxyConfiguration()
40
41                
42                const crawler = new Apify.PuppeteerCrawler({
43                    requestQueue,
44                    proxyConfiguration,
45                    maxRequestsPerCrawl: 50,
46                    handlePageFunction: async ({ request, page, body }) => {
47                        console.log(`Processing ${request.url}...`);
48    
49                        if (request.userData.label === "START"){
50                            const infos = await Apify.utils.enqueueLinks({
51                                page,
52                                requestQueue,
53                                selector: '.product__link.js-product-link',
54                            });
55
56                            const pageCount = await page.$$eval('ul.pagination li', (els) => {
57                                const targetEl = els[els.length - 2];
58                                return Number(targetEl.textContent);
59                            })
60
61                            console.log(`Page count: ${pageCount}`)
62                            if(pageCount === pageNumber){goOn = false;}
63                            
64                            var keyImage = "Start" + String(pageNumber)
65                            await Apify.utils.puppeteer.saveSnapshot(page, { key: keyImage})
66                        }else{
67                            console.log(`Processing ${request.url}...`);
68
69                            var price = await page.$eval('.detail-price', (el) => el.textContent);
70                            var find = '\n';
71                            var re = new RegExp(find, 'g');
72                            price = price.replace(re, '');
73                            find = ' ';
74                            re = new RegExp(find, 'g');
75                            price = price.replace(re, '');    
76                            
77                            var name = await page.$eval('.col h1', (el) => el.textContent);
78
79                            var adress = await page.$eval('.col.col-12 h2', (el) => el.textContent);
80                            var adressList = adress.split(", ");
81                            var street = adressList[0];
82                            street = removeChars(street);
83                            var town = adressList[1];
84                            var region = adressList[2];
85                            region = removeChars(region);
86
87                        var dispozition = await page.$$eval('#detail-parameters .row.pl-md-4  .col.col-6.param-value', (els) => {
88                            return els[2].textContent;
89                        })
90                        var rooms = removeChars(dispozition);
91
92                        dispozition = await page.$$eval('#detail-parameters .row.pl-md-4  .col.col-6.param-value', (els) => {
93                            return els[3].textContent;
94                        })
95                        var state = removeChars(dispozition);
96
97                        dispozition = await page.$$eval('#detail-parameters .row.pl-md-4  .col.col-6.param-value', (els) => {
98                            return els[4].textContent;
99                        })
100                        var surface = removeChars(dispozition);
101
102                        dispozition = await page.$$eval('#detail-parameters .row.pl-md-4  .col.col-6.param-value', (els) => {
103                            return els[7].textContent;
104                        })
105                        var ownerType = removeChars(dispozition);
106
107                        dispozition = await page.$$eval('#detail-parameters .row.pl-md-4  .col.col-6.param-value', (els) => {
108                            return els[8].textContent;
109                        })
110                        var buildingType = removeChars(dispozition);
111
112
113                            await Apify.pushData({
114                                url: request.url,
115                                name: name,
116                                price: price,
117                                advertType: advertTypeList[advertTypeList],
118                                propTypeList: propTypeList[propTypeList],
119                                street: street,
120                                town: town,
121                                region: region,
122                                rooms: rooms,
123                                state: state,
124                                ownerType: ownerType,
125                                surface: surface,
126                                buildingType: buildingType
127                            });
128                            //COMING SOON
129                        }
130                    },
131                });
132                
133                await crawler.run();
134                pageNumber ++;
135            }
136        }
137    }
138    console.log('Crawler finished.');
139});

package.json

1{
2    "name": "my-actor",
3    "version": "0.0.1",
4    "dependencies": {
5        "apify": "^2.0.7"
6    },
7    "scripts": {
8        "start": "node main.js"
9    },
10    "author": "Me!"
11}
Developer
Maintained by Community