Zoopla
Go to Store
This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?
See alternative ActorsZoopla
zyberg/zoopla
Get zoopla listings
Dockerfile
1# This is a template for a Dockerfile used to run acts in Actor system.
2# The base image name below is set during the act build, based on user settings.
3# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/user
4FROM apify/actor-node-basic:v0.21.10
5
6# Second, copy just package.json and package-lock.json since it should be
7# the only file that affects "npm install" in the next step, to speed up the build
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --only=prod --no-optional \
15 && echo "Installed NPM packages:" \
16 && (npm list --all || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version
21
22# Copy source code to container
23# Do this in the last step, to have fast build if only the source code changed
24COPY . ./
25
26# NOTE: The CMD is already defined by the base image.
27# Uncomment this for local node inspector debugging:
28# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]
package.json
1{
2 "name": "apify-project",
3 "version": "0.0.1",
4 "description": "",
5 "author": "It's not you it's me",
6 "license": "ISC",
7 "dependencies": {
8 "apify": "0.21.10"
9 },
10 "scripts": {
11 "start": "node main.js"
12 }
13}
main.js
1const Apify = require("apify");
2
3const templateOutput = {
4 "error": null,
5 "data": {
6
7 }
8};
9
10const templateResult = {
11 "title": "",
12 "description": "",
13 "surfaceArea": null,
14 "surfaceAreaUnit": null,
15 "price": -1,
16 "currency": "",
17 "numberOfRooms": null,
18 "numberOfBedrooms": -1,
19 "publishingDate": null,
20 "monthlyRent": null,
21 "weeklyRent": null,
22 "marketedBy": {
23 "name": "",
24 "address": "",
25 "phoneNumber": ""
26 },
27 "energyClass": null,
28 "postCode": null,
29 "latitude": null,
30 "longitude": null,
31 "greenhouseGazClass": null,
32 "image": [],
33 "siteURL": "",
34
35 "siteHtml": null,
36 "error": null,
37
38 "statusCode": null,
39 "htmlLength": -1,
40 "captchaFound": false,
41 "isHtmlPage": true,
42
43 "host": ""
44};
45
46
47Apify.main(async () => {
48 const requestQueue = await Apify.openRequestQueue(
49 'zoopla'
50 );
51
52 const input = await Apify.getInput();
53
54 const dataset = await Apify.openDataset('zoopla2');
55
56 for (let link of input.links) {
57 console.log(link)
58 await requestQueue.addRequest({
59 url: link
60 });
61 }
62
63
64 const crawler = new Apify.CheerioCrawler({
65 requestQueue,
66 handlePageFunction: async ({ request, html, $ }) => {
67 const title = $("title").text();
68 console.log(`Request URL: ${request.url}`);
69 console.log(`Title of ${request.url}: ${title}`);
70
71
72 const generateResult = () => {
73 const result = JSON.parse(JSON.stringify(templateResult));
74
75 const title = $('h1.ui-title-subgroup').first().text().trim();
76
77 const address = $('h2.ui-property-summary__address').first().text().trim();
78
79 result.title = title + ' in ' + address;
80
81 result.description = $('#dp-description-expand + div').first().text().trim();
82
83 let priceString = $('h2 + .ui-pricing .ui-pricing__main-price').first().text().trim();
84 try {
85 if (!priceString) priceString = $('.ui-pricing .ui-pricing__main-price').first().text().trim();
86 result.price = parseInt(priceString.replace(',', '').replace('£', ''));
87 } catch {
88 result.price = -1;
89 }
90
91 result.currency = priceString[0] === '£' ? 'GBP' : '';
92
93 const keyFeatureBedrooms = $('.icon-bed + .dp-features-list__text').filter(function () {
94 return $(this).text().includes('edrooms')
95 }).first().text();
96
97 let numberOfBedrooms;
98 try {
99 if (!keyFeatureBedrooms)
100 numberOfBedrooms = parseInt(html.match(/num_beds:(.*),/g)[0].match(/(\d)+/)[0])
101 else
102 numberOfBedrooms = parseInt(keyFeatureBedrooms.match(/\d/)[0])
103 } catch {
104 numberOfBedrooms = null
105 }
106
107 result.numberOfBedrooms = numberOfBedrooms;
108
109 result.marketedBy.name = $('.ui-agent .ui-agent__name').first().text().trim();
110 result.marketedBy.address = $('.ui-agent address').first().text().trim();
111 result.marketedBy.phoneNumber = $('.ui-agent__tel a').first().text().replace('Call', '').trim();
112
113 $('img.dp-gallery__image').each((index, el) => {
114 result.image.push($(el).attr("src"));
115 });
116
117 result.siteURL = request.url;
118
119 result.htmlLength = html.length;
120
121 const urlMatches = request.url.match(/^https?\:\/\/([^\/?#]+)(?:[\/?#]|$)/i);
122 result.host = urlMatches && urlMatches[1];
123
124 if (html.match(/incode:[^,]*/) && html.match(/outcode:[^,]*/)) {
125 result.postCode = html.match(/outcode:[^,]*/)[0].replace('outcode: ', '').replace(/"/g, '') + html.match(/incode:[^,]*/)[0].replace('incode: ', '').replace(/"/g, '')
126 result.postCode = result.postCode.trim()
127 }
128
129 if (html.match(/"geo": {\n.*"@type": "GeoCoordinates",\n.*\n.*\n[^,]*/)){
130 const geoCoords = html.match(/"geo": {\n.*"@type": "GeoCoordinates",\n.*\n.*\n[^,]*/)[0];
131
132 if (geoCoords.match(/"latitude":[^,]*/))
133 result.latitude = geoCoords.match(/"latitude":[^,]*/)[0].replace('latitude', '').replace(':', '').replace('}', '').replace(/"/g, '').trim()
134
135 if (geoCoords.match(/"longitude":[^,]*/))
136 result.longitude = geoCoords.match(/"longitude":[^,]*/)[0].replace('longitude', '').replace(':', '').replace(/"/g, '').replace('}', '').trim()
137 }
138
139
140 return result;
141 }
142
143 const output = {
144 "error": null,
145 "data": generateResult(),
146 };
147
148 await dataset.pushData(output);
149 }
150 });
151
152 await crawler.run();
153});
Developer
Maintained by Community
Categories