BezRealitky Scraper avatar
BezRealitky Scraper

Deprecated

Pricing

Pay per usage

Go to Store
BezRealitky Scraper

BezRealitky Scraper

Deprecated

Developed by

fcoudy

fcoudy

Maintained by Community

Easy way to access reality market!

0.0 (0)

Pricing

Pay per usage

1

Total users

14

Monthly users

3

Last modified

4 years ago

Dockerfile

# First, specify the base Docker image. You can read more about
# the available images at https://sdk.apify.com/docs/guides/docker-images
# You can also use any other image from Docker Hub.
FROM apify/actor-node:16
# Second, copy just package.json and package-lock.json since those are the only
# files that affect "npm install" in the next step, to speed up the build.
COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
&& npm install --only=prod --no-optional \
&& echo "Installed NPM packages:" \
&& (npm list || true) \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version
# Next, copy the remaining files and directories with the source code.
# Since we do this after NPM install, quick build will be really fast
# for most source file changes.
COPY . ./
# Optionally, specify how to launch the source code of your actor.
# By default, Apify's base Docker images define the CMD instruction
# that runs the Node.js source code using the command specified
# in the "scripts.start" section of the package.json file.
# In short, the instruction looks something like this:
#
# CMD npm start

INPUT_SCHEMA.json

{
"title": "Actor input schema",
"description": "This is actor input schema. Yes it is.",
"type": "object",
"schemaVersion": 1,
"properties": {
"Typ poměru": {
"title": "advertType",
"type": "string",
"description": "Zadej, zda se zajímáš o nákup, pronájem či spolubydlení",
"editor": "javascript",
"enum": [
"us",
"de",
"fr"
],
"enumTitles": [
"USA",
"Germany",
"France"
],
"sectionCaption": "Vybírejte mezi: \"prodej\", \"pronajem\", \"spolubydleni\"",
"prefill": "prodej",
"nullable": false
},
"Typ nemovitosti": {
"title": "Typ nemovitosti",
"type": "string",
"description": "Vyberte, zda máte zájem o byt, dům či jiné",
"sectionCaption": "Zde si vybíráte, jaké nemovitosti se vám zobrazí",
"sectionDescription": "Můžete si vybrat mezi následujícími: \"byt\", \"dum\", \"pozemek\", \"garaz\", \"kancelar\", \"nebytovy-prostor\" a \"chata-chalupa\"",
"editor": "javascript",
"prefill": "byt",
"nullable": false
}
},
"required": [
"Typ poměru"
]
}

main.js

1const Apify = require('apify');
2
3function removeChars(string){
4 try{
5 var find = '\n';
6 var re = new RegExp(find, 'g');
7 string = string.replace(re, '');
8
9 string = string.trim();
10 return string;
11 }catch(error){
12 //pass
13 }
14}
15
16Apify.main(async () => {
17 var input = await Apify.getInput();
18 var input1 = input["advertTypeList"];
19 var input2 = input["propTypeList"];
20 var advertTypeList = [input1];
21 var propTypeList = [input2];
22 console.log("multifile");
23
24 for(var advertType in advertTypeList)
25 {
26 for(var propType in propTypeList)
27 {
28 let pageNumber = 1;
29 var goOn = true;
30 while (goOn === true)
31 {
32 const requestQueue = await Apify.openRequestQueue();
33 var urlString = 'https://www.bezrealitky.cz/vypis/nabidka-'+advertTypeList[advertType]+'/'+propTypeList[propType]+'?page=' + String(pageNumber);
34 await requestQueue.addRequest({
35 url: urlString,
36 userData: {label: "START"}
37 });
38
39 const proxyConfiguration = await Apify.createProxyConfiguration()
40
41
42 const crawler = new Apify.PuppeteerCrawler({
43 requestQueue,
44 proxyConfiguration,
45 maxRequestsPerCrawl: 50,
46 handlePageFunction: async ({ request, page, body }) => {
47 console.log(`Processing ${request.url}...`);
48
49 if (request.userData.label === "START"){
50 const infos = await Apify.utils.enqueueLinks({
51 page,
52 requestQueue,
53 selector: '.product__link.js-product-link',
54 });
55
56 const pageCount = await page.$$eval('ul.pagination li', (els) => {
57 const targetEl = els[els.length - 2];
58 return Number(targetEl.textContent);
59 })
60
61 console.log(`Page count: ${pageCount}`)
62 if(pageCount === pageNumber){goOn = false;}
63
64 var keyImage = "Start" + String(pageNumber)
65 await Apify.utils.puppeteer.saveSnapshot(page, { key: keyImage})
66 }else{
67 console.log(`Processing ${request.url}...`);
68
69 var price = await page.$eval('.detail-price', (el) => el.textContent);
70 var find = '\n';
71 var re = new RegExp(find, 'g');
72 price = price.replace(re, '');
73 find = ' ';
74 re = new RegExp(find, 'g');
75 price = price.replace(re, '');
76
77 var name = await page.$eval('.col h1', (el) => el.textContent);
78
79 var adress = await page.$eval('.col.col-12 h2', (el) => el.textContent);
80 var adressList = adress.split(", ");
81 var street = adressList[0];
82 street = removeChars(street);
83 var town = adressList[1];
84 var region = adressList[2];
85 region = removeChars(region);
86
87 var dispozition = await page.$$eval('#detail-parameters .row.pl-md-4 .col.col-6.param-value', (els) => {
88 return els[2].textContent;
89 })
90 var rooms = removeChars(dispozition);
91
92 dispozition = await page.$$eval('#detail-parameters .row.pl-md-4 .col.col-6.param-value', (els) => {
93 return els[3].textContent;
94 })
95 var state = removeChars(dispozition);
96
97 dispozition = await page.$$eval('#detail-parameters .row.pl-md-4 .col.col-6.param-value', (els) => {
98 return els[4].textContent;
99 })
100 var surface = removeChars(dispozition);
101
102 dispozition = await page.$$eval('#detail-parameters .row.pl-md-4 .col.col-6.param-value', (els) => {
103 return els[7].textContent;
104 })
105 var ownerType = removeChars(dispozition);
106
107 dispozition = await page.$$eval('#detail-parameters .row.pl-md-4 .col.col-6.param-value', (els) => {
108 return els[8].textContent;
109 })
110 var buildingType = removeChars(dispozition);
111
112
113 await Apify.pushData({
114 url: request.url,
115 name: name,
116 price: price,
117 advertType: advertTypeList[advertTypeList],
118 propTypeList: propTypeList[propTypeList],
119 street: street,
120 town: town,
121 region: region,
122 rooms: rooms,
123 state: state,
124 ownerType: ownerType,
125 surface: surface,
126 buildingType: buildingType
127 });
128 //COMING SOON
129 }
130 },
131 });
132
133 await crawler.run();
134 pageNumber ++;
135 }
136 }
137 }
138 console.log('Crawler finished.');
139});

package.json

{
"name": "my-actor",
"version": "0.0.1",
"dependencies": {
"apify": "^2.0.7"
},
"scripts": {
"start": "node main.js"
},
"author": "Me!"
}