Zoopla
Deprecated
Pricing
Pay per usage
Go to Store
Zoopla
Deprecated
Get zoopla listings
0.0 (0)
Pricing
Pay per usage
1
Total users
33
Monthly users
1
Last modified
3 years ago
Dockerfile
# This is a template for a Dockerfile used to run acts in Actor system.# The base image name below is set during the act build, based on user settings.# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/userFROM apify/actor-node-basic:v0.21.10
# Second, copy just package.json and package-lock.json since it should be# the only file that affects "npm install" in the next step, to speed up the buildCOPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --only=prod --no-optional \ && echo "Installed NPM packages:" \ && (npm list --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version
# Copy source code to container# Do this in the last step, to have fast build if only the source code changedCOPY . ./
# NOTE: The CMD is already defined by the base image.# Uncomment this for local node inspector debugging:# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]
package.json
{ "name": "apify-project", "version": "0.0.1", "description": "", "author": "It's not you it's me", "license": "ISC", "dependencies": { "apify": "0.21.10" }, "scripts": { "start": "node main.js" }}
main.js
1const Apify = require("apify");2
3const templateOutput = {4 "error": null,5 "data": {6 7 }8};9
10const templateResult = {11 "title": "",12 "description": "",13 "surfaceArea": null,14 "surfaceAreaUnit": null,15 "price": -1,16 "currency": "",17 "numberOfRooms": null,18 "numberOfBedrooms": -1,19 "publishingDate": null,20 "monthlyRent": null,21 "weeklyRent": null,22 "marketedBy": {23 "name": "",24 "address": "",25 "phoneNumber": ""26 },27 "energyClass": null,28 "postCode": null,29 "latitude": null,30 "longitude": null,31 "greenhouseGazClass": null,32 "image": [],33 "siteURL": "",34
35 "siteHtml": null,36 "error": null,37
38 "statusCode": null,39 "htmlLength": -1,40 "captchaFound": false,41 "isHtmlPage": true,42 43 "host": ""44};45
46
47Apify.main(async () => {48 const requestQueue = await Apify.openRequestQueue(49 'zoopla'50 );51
52 const input = await Apify.getInput();53
54 const dataset = await Apify.openDataset('zoopla2');55
56 for (let link of input.links) {57 console.log(link)58 await requestQueue.addRequest({59 url: link60 });61 }62
63
64 const crawler = new Apify.CheerioCrawler({65 requestQueue,66 handlePageFunction: async ({ request, html, $ }) => {67 const title = $("title").text();68 console.log(`Request URL: ${request.url}`);69 console.log(`Title of ${request.url}: ${title}`);70
71
72 const generateResult = () => {73 const result = JSON.parse(JSON.stringify(templateResult));74
75 const title = $('h1.ui-title-subgroup').first().text().trim();76
77 const address = $('h2.ui-property-summary__address').first().text().trim();78
79 result.title = title + ' in ' + address;80
81 result.description = $('#dp-description-expand + div').first().text().trim();82
83 let priceString = $('h2 + .ui-pricing .ui-pricing__main-price').first().text().trim();84 try {85 if (!priceString) priceString = $('.ui-pricing .ui-pricing__main-price').first().text().trim();86 result.price = parseInt(priceString.replace(',', '').replace('£', ''));87 } catch {88 result.price = -1;89 }90
91 result.currency = priceString[0] === '£' ? 'GBP' : '';92
93 const keyFeatureBedrooms = $('.icon-bed + .dp-features-list__text').filter(function () {94 return $(this).text().includes('edrooms')95 }).first().text();96
97 let numberOfBedrooms;98 try {99 if (!keyFeatureBedrooms)100 numberOfBedrooms = parseInt(html.match(/num_beds:(.*),/g)[0].match(/(\d)+/)[0])101 else102 numberOfBedrooms = parseInt(keyFeatureBedrooms.match(/\d/)[0])103 } catch {104 numberOfBedrooms = null105 }106
107 result.numberOfBedrooms = numberOfBedrooms;108
109 result.marketedBy.name = $('.ui-agent .ui-agent__name').first().text().trim();110 result.marketedBy.address = $('.ui-agent address').first().text().trim();111 result.marketedBy.phoneNumber = $('.ui-agent__tel a').first().text().replace('Call', '').trim();112
113 $('img.dp-gallery__image').each((index, el) => {114 result.image.push($(el).attr("src"));115 });116
117 result.siteURL = request.url;118
119 result.htmlLength = html.length;120
121 const urlMatches = request.url.match(/^https?\:\/\/([^\/?#]+)(?:[\/?#]|$)/i);122 result.host = urlMatches && urlMatches[1];123
124 if (html.match(/incode:[^,]*/) && html.match(/outcode:[^,]*/)) {125 result.postCode = html.match(/outcode:[^,]*/)[0].replace('outcode: ', '').replace(/"/g, '') + html.match(/incode:[^,]*/)[0].replace('incode: ', '').replace(/"/g, '')126 result.postCode = result.postCode.trim()127 }128
129 if (html.match(/"geo": {\n.*"@type": "GeoCoordinates",\n.*\n.*\n[^,]*/)){130 const geoCoords = html.match(/"geo": {\n.*"@type": "GeoCoordinates",\n.*\n.*\n[^,]*/)[0];131
132 if (geoCoords.match(/"latitude":[^,]*/))133 result.latitude = geoCoords.match(/"latitude":[^,]*/)[0].replace('latitude', '').replace(':', '').replace('}', '').replace(/"/g, '').trim()134
135 if (geoCoords.match(/"longitude":[^,]*/))136 result.longitude = geoCoords.match(/"longitude":[^,]*/)[0].replace('longitude', '').replace(':', '').replace(/"/g, '').replace('}', '').trim()137 }138 139
140 return result;141 }142 143 const output = {144 "error": null,145 "data": generateResult(),146 };147
148 await dataset.pushData(output);149 }150 });151
152 await crawler.run();153});