BezRealitky Scraper
View all Actors
This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?
See alternative ActorsBezRealitky Scraper
fcoudy/bezrealitkyscraper
Easy way to access reality market!
Dockerfile
1# First, specify the base Docker image. You can read more about
2# the available images at https://sdk.apify.com/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node:16
5
6# Second, copy just package.json and package-lock.json since those are the only
7# files that affect "npm install" in the next step, to speed up the build.
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --only=prod --no-optional \
15 && echo "Installed NPM packages:" \
16 && (npm list || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version
21
22# Next, copy the remaining files and directories with the source code.
23# Since we do this after NPM install, quick build will be really fast
24# for most source file changes.
25COPY . ./
26
27# Optionally, specify how to launch the source code of your actor.
28# By default, Apify's base Docker images define the CMD instruction
29# that runs the Node.js source code using the command specified
30# in the "scripts.start" section of the package.json file.
31# In short, the instruction looks something like this:
32#
33# CMD npm start
INPUT_SCHEMA.json
1{
2 "title": "Actor input schema",
3 "description": "This is actor input schema. Yes it is.",
4 "type": "object",
5 "schemaVersion": 1,
6 "properties": {
7 "Typ poměru": {
8 "title": "advertType",
9 "type": "string",
10 "description": "Zadej, zda se zajímáš o nákup, pronájem či spolubydlení",
11 "editor": "javascript",
12 "enum": [
13 "us",
14 "de",
15 "fr"
16 ],
17 "enumTitles": [
18 "USA",
19 "Germany",
20 "France"
21 ],
22 "sectionCaption": "Vybírejte mezi: \"prodej\", \"pronajem\", \"spolubydleni\"",
23 "prefill": "prodej",
24 "nullable": false
25 },
26 "Typ nemovitosti": {
27 "title": "Typ nemovitosti",
28 "type": "string",
29 "description": "Vyberte, zda máte zájem o byt, dům či jiné",
30
31 "sectionCaption": "Zde si vybíráte, jaké nemovitosti se vám zobrazí",
32 "sectionDescription": "Můžete si vybrat mezi následujícími: \"byt\", \"dum\", \"pozemek\", \"garaz\", \"kancelar\", \"nebytovy-prostor\" a \"chata-chalupa\"",
33 "editor": "javascript",
34 "prefill": "byt",
35 "nullable": false
36 }
37 },
38 "required": [
39 "Typ poměru"
40 ]
41}
main.js
1const Apify = require('apify');
2
3function removeChars(string){
4 try{
5 var find = '\n';
6 var re = new RegExp(find, 'g');
7 string = string.replace(re, '');
8
9 string = string.trim();
10 return string;
11 }catch(error){
12 //pass
13 }
14}
15
16Apify.main(async () => {
17 var input = await Apify.getInput();
18 var input1 = input["advertTypeList"];
19 var input2 = input["propTypeList"];
20 var advertTypeList = [input1];
21 var propTypeList = [input2];
22 console.log("multifile");
23
24 for(var advertType in advertTypeList)
25 {
26 for(var propType in propTypeList)
27 {
28 let pageNumber = 1;
29 var goOn = true;
30 while (goOn === true)
31 {
32 const requestQueue = await Apify.openRequestQueue();
33 var urlString = 'https://www.bezrealitky.cz/vypis/nabidka-'+advertTypeList[advertType]+'/'+propTypeList[propType]+'?page=' + String(pageNumber);
34 await requestQueue.addRequest({
35 url: urlString,
36 userData: {label: "START"}
37 });
38
39 const proxyConfiguration = await Apify.createProxyConfiguration()
40
41
42 const crawler = new Apify.PuppeteerCrawler({
43 requestQueue,
44 proxyConfiguration,
45 maxRequestsPerCrawl: 50,
46 handlePageFunction: async ({ request, page, body }) => {
47 console.log(`Processing ${request.url}...`);
48
49 if (request.userData.label === "START"){
50 const infos = await Apify.utils.enqueueLinks({
51 page,
52 requestQueue,
53 selector: '.product__link.js-product-link',
54 });
55
56 const pageCount = await page.$$eval('ul.pagination li', (els) => {
57 const targetEl = els[els.length - 2];
58 return Number(targetEl.textContent);
59 })
60
61 console.log(`Page count: ${pageCount}`)
62 if(pageCount === pageNumber){goOn = false;}
63
64 var keyImage = "Start" + String(pageNumber)
65 await Apify.utils.puppeteer.saveSnapshot(page, { key: keyImage})
66 }else{
67 console.log(`Processing ${request.url}...`);
68
69 var price = await page.$eval('.detail-price', (el) => el.textContent);
70 var find = '\n';
71 var re = new RegExp(find, 'g');
72 price = price.replace(re, '');
73 find = ' ';
74 re = new RegExp(find, 'g');
75 price = price.replace(re, '');
76
77 var name = await page.$eval('.col h1', (el) => el.textContent);
78
79 var adress = await page.$eval('.col.col-12 h2', (el) => el.textContent);
80 var adressList = adress.split(", ");
81 var street = adressList[0];
82 street = removeChars(street);
83 var town = adressList[1];
84 var region = adressList[2];
85 region = removeChars(region);
86
87 var dispozition = await page.$$eval('#detail-parameters .row.pl-md-4 .col.col-6.param-value', (els) => {
88 return els[2].textContent;
89 })
90 var rooms = removeChars(dispozition);
91
92 dispozition = await page.$$eval('#detail-parameters .row.pl-md-4 .col.col-6.param-value', (els) => {
93 return els[3].textContent;
94 })
95 var state = removeChars(dispozition);
96
97 dispozition = await page.$$eval('#detail-parameters .row.pl-md-4 .col.col-6.param-value', (els) => {
98 return els[4].textContent;
99 })
100 var surface = removeChars(dispozition);
101
102 dispozition = await page.$$eval('#detail-parameters .row.pl-md-4 .col.col-6.param-value', (els) => {
103 return els[7].textContent;
104 })
105 var ownerType = removeChars(dispozition);
106
107 dispozition = await page.$$eval('#detail-parameters .row.pl-md-4 .col.col-6.param-value', (els) => {
108 return els[8].textContent;
109 })
110 var buildingType = removeChars(dispozition);
111
112
113 await Apify.pushData({
114 url: request.url,
115 name: name,
116 price: price,
117 advertType: advertTypeList[advertTypeList],
118 propTypeList: propTypeList[propTypeList],
119 street: street,
120 town: town,
121 region: region,
122 rooms: rooms,
123 state: state,
124 ownerType: ownerType,
125 surface: surface,
126 buildingType: buildingType
127 });
128 //COMING SOON
129 }
130 },
131 });
132
133 await crawler.run();
134 pageNumber ++;
135 }
136 }
137 }
138 console.log('Crawler finished.');
139});
package.json
1{
2 "name": "my-actor",
3 "version": "0.0.1",
4 "dependencies": {
5 "apify": "^2.0.7"
6 },
7 "scripts": {
8 "start": "node main.js"
9 },
10 "author": "Me!"
11}
Developer
Maintained by Community
Categories