
BezRealitky Scraper
Deprecated
Pricing
Pay per usage
Go to Store

BezRealitky Scraper
Deprecated
Easy way to access reality market!
0.0 (0)
Pricing
Pay per usage
1
Total users
14
Monthly users
3
Last modified
4 years ago
Dockerfile
# First, specify the base Docker image. You can read more about# the available images at https://sdk.apify.com/docs/guides/docker-images# You can also use any other image from Docker Hub.FROM apify/actor-node:16
# Second, copy just package.json and package-lock.json since those are the only# files that affect "npm install" in the next step, to speed up the build.COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --only=prod --no-optional \ && echo "Installed NPM packages:" \ && (npm list || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version
# Next, copy the remaining files and directories with the source code.# Since we do this after NPM install, quick build will be really fast# for most source file changes.COPY . ./
# Optionally, specify how to launch the source code of your actor.# By default, Apify's base Docker images define the CMD instruction# that runs the Node.js source code using the command specified# in the "scripts.start" section of the package.json file.# In short, the instruction looks something like this:## CMD npm start
INPUT_SCHEMA.json
{ "title": "Actor input schema", "description": "This is actor input schema. Yes it is.", "type": "object", "schemaVersion": 1, "properties": { "Typ poměru": { "title": "advertType", "type": "string", "description": "Zadej, zda se zajímáš o nákup, pronájem či spolubydlení", "editor": "javascript", "enum": [ "us", "de", "fr" ], "enumTitles": [ "USA", "Germany", "France" ], "sectionCaption": "Vybírejte mezi: \"prodej\", \"pronajem\", \"spolubydleni\"", "prefill": "prodej", "nullable": false }, "Typ nemovitosti": { "title": "Typ nemovitosti", "type": "string", "description": "Vyberte, zda máte zájem o byt, dům či jiné", "sectionCaption": "Zde si vybíráte, jaké nemovitosti se vám zobrazí", "sectionDescription": "Můžete si vybrat mezi následujícími: \"byt\", \"dum\", \"pozemek\", \"garaz\", \"kancelar\", \"nebytovy-prostor\" a \"chata-chalupa\"", "editor": "javascript", "prefill": "byt", "nullable": false } }, "required": [ "Typ poměru" ]}
main.js
1const Apify = require('apify');2
3function removeChars(string){4 try{5 var find = '\n';6 var re = new RegExp(find, 'g');7 string = string.replace(re, '');8
9 string = string.trim();10 return string;11 }catch(error){12 //pass13 }14}15
16Apify.main(async () => {17 var input = await Apify.getInput();18 var input1 = input["advertTypeList"];19 var input2 = input["propTypeList"];20 var advertTypeList = [input1];21 var propTypeList = [input2]; 22 console.log("multifile"); 23
24 for(var advertType in advertTypeList)25 {26 for(var propType in propTypeList)27 {28 let pageNumber = 1;29 var goOn = true;30 while (goOn === true)31 {32 const requestQueue = await Apify.openRequestQueue();33 var urlString = 'https://www.bezrealitky.cz/vypis/nabidka-'+advertTypeList[advertType]+'/'+propTypeList[propType]+'?page=' + String(pageNumber);34 await requestQueue.addRequest({ 35 url: urlString,36 userData: {label: "START"} 37 });38
39 const proxyConfiguration = await Apify.createProxyConfiguration()40
41 42 const crawler = new Apify.PuppeteerCrawler({43 requestQueue,44 proxyConfiguration,45 maxRequestsPerCrawl: 50,46 handlePageFunction: async ({ request, page, body }) => {47 console.log(`Processing ${request.url}...`);48 49 if (request.userData.label === "START"){50 const infos = await Apify.utils.enqueueLinks({51 page,52 requestQueue,53 selector: '.product__link.js-product-link',54 });55
56 const pageCount = await page.$$eval('ul.pagination li', (els) => {57 const targetEl = els[els.length - 2];58 return Number(targetEl.textContent);59 })60
61 console.log(`Page count: ${pageCount}`)62 if(pageCount === pageNumber){goOn = false;}63 64 var keyImage = "Start" + String(pageNumber)65 await Apify.utils.puppeteer.saveSnapshot(page, { key: keyImage})66 }else{67 console.log(`Processing ${request.url}...`);68
69 var price = await page.$eval('.detail-price', (el) => el.textContent);70 var find = '\n';71 var re = new RegExp(find, 'g');72 price = price.replace(re, '');73 find = ' ';74 re = new RegExp(find, 'g');75 price = price.replace(re, ''); 76 77 var name = await page.$eval('.col h1', (el) => el.textContent);78
79 var adress = await page.$eval('.col.col-12 h2', (el) => el.textContent);80 var adressList = adress.split(", ");81 var street = adressList[0];82 street = removeChars(street);83 var town = adressList[1];84 var region = adressList[2];85 region = removeChars(region);86
87 var dispozition = await page.$$eval('#detail-parameters .row.pl-md-4 .col.col-6.param-value', (els) => {88 return els[2].textContent;89 })90 var rooms = removeChars(dispozition);91
92 dispozition = await page.$$eval('#detail-parameters .row.pl-md-4 .col.col-6.param-value', (els) => {93 return els[3].textContent;94 })95 var state = removeChars(dispozition);96
97 dispozition = await page.$$eval('#detail-parameters .row.pl-md-4 .col.col-6.param-value', (els) => {98 return els[4].textContent;99 })100 var surface = removeChars(dispozition);101
102 dispozition = await page.$$eval('#detail-parameters .row.pl-md-4 .col.col-6.param-value', (els) => {103 return els[7].textContent;104 })105 var ownerType = removeChars(dispozition);106
107 dispozition = await page.$$eval('#detail-parameters .row.pl-md-4 .col.col-6.param-value', (els) => {108 return els[8].textContent;109 })110 var buildingType = removeChars(dispozition);111
112
113 await Apify.pushData({114 url: request.url,115 name: name,116 price: price,117 advertType: advertTypeList[advertTypeList],118 propTypeList: propTypeList[propTypeList],119 street: street,120 town: town,121 region: region,122 rooms: rooms,123 state: state,124 ownerType: ownerType,125 surface: surface,126 buildingType: buildingType127 });128 //COMING SOON129 }130 },131 });132 133 await crawler.run();134 pageNumber ++;135 }136 }137 }138 console.log('Crawler finished.');139});
package.json
{ "name": "my-actor", "version": "0.0.1", "dependencies": { "apify": "^2.0.7" }, "scripts": { "start": "node main.js" }, "author": "Me!"}