Zoopla avatar
Zoopla

Deprecated

Pricing

Pay per usage

Go to Store
Zoopla

Zoopla

Deprecated

Developed by

Nikolajus Elmutis

Nikolajus Elmutis

Maintained by Community

Get zoopla listings

0.0 (0)

Pricing

Pay per usage

1

Total users

33

Monthly users

1

Last modified

3 years ago

Dockerfile

# This is a template for a Dockerfile used to run acts in Actor system.
# The base image name below is set during the act build, based on user settings.
# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/user
FROM apify/actor-node-basic:v0.21.10
# Second, copy just package.json and package-lock.json since it should be
# the only file that affects "npm install" in the next step, to speed up the build
COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
&& npm install --only=prod --no-optional \
&& echo "Installed NPM packages:" \
&& (npm list --all || true) \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version
# Copy source code to container
# Do this in the last step, to have fast build if only the source code changed
COPY . ./
# NOTE: The CMD is already defined by the base image.
# Uncomment this for local node inspector debugging:
# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]

package.json

{
"name": "apify-project",
"version": "0.0.1",
"description": "",
"author": "It's not you it's me",
"license": "ISC",
"dependencies": {
"apify": "0.21.10"
},
"scripts": {
"start": "node main.js"
}
}

main.js

1const Apify = require("apify");
2
3const templateOutput = {
4 "error": null,
5 "data": {
6
7 }
8};
9
10const templateResult = {
11 "title": "",
12 "description": "",
13 "surfaceArea": null,
14 "surfaceAreaUnit": null,
15 "price": -1,
16 "currency": "",
17 "numberOfRooms": null,
18 "numberOfBedrooms": -1,
19 "publishingDate": null,
20 "monthlyRent": null,
21 "weeklyRent": null,
22 "marketedBy": {
23 "name": "",
24 "address": "",
25 "phoneNumber": ""
26 },
27 "energyClass": null,
28 "postCode": null,
29 "latitude": null,
30 "longitude": null,
31 "greenhouseGazClass": null,
32 "image": [],
33 "siteURL": "",
34
35 "siteHtml": null,
36 "error": null,
37
38 "statusCode": null,
39 "htmlLength": -1,
40 "captchaFound": false,
41 "isHtmlPage": true,
42
43 "host": ""
44};
45
46
47Apify.main(async () => {
48 const requestQueue = await Apify.openRequestQueue(
49 'zoopla'
50 );
51
52 const input = await Apify.getInput();
53
54 const dataset = await Apify.openDataset('zoopla2');
55
56 for (let link of input.links) {
57 console.log(link)
58 await requestQueue.addRequest({
59 url: link
60 });
61 }
62
63
64 const crawler = new Apify.CheerioCrawler({
65 requestQueue,
66 handlePageFunction: async ({ request, html, $ }) => {
67 const title = $("title").text();
68 console.log(`Request URL: ${request.url}`);
69 console.log(`Title of ${request.url}: ${title}`);
70
71
72 const generateResult = () => {
73 const result = JSON.parse(JSON.stringify(templateResult));
74
75 const title = $('h1.ui-title-subgroup').first().text().trim();
76
77 const address = $('h2.ui-property-summary__address').first().text().trim();
78
79 result.title = title + ' in ' + address;
80
81 result.description = $('#dp-description-expand + div').first().text().trim();
82
83 let priceString = $('h2 + .ui-pricing .ui-pricing__main-price').first().text().trim();
84 try {
85 if (!priceString) priceString = $('.ui-pricing .ui-pricing__main-price').first().text().trim();
86 result.price = parseInt(priceString.replace(',', '').replace('£', ''));
87 } catch {
88 result.price = -1;
89 }
90
91 result.currency = priceString[0] === '£' ? 'GBP' : '';
92
93 const keyFeatureBedrooms = $('.icon-bed + .dp-features-list__text').filter(function () {
94 return $(this).text().includes('edrooms')
95 }).first().text();
96
97 let numberOfBedrooms;
98 try {
99 if (!keyFeatureBedrooms)
100 numberOfBedrooms = parseInt(html.match(/num_beds:(.*),/g)[0].match(/(\d)+/)[0])
101 else
102 numberOfBedrooms = parseInt(keyFeatureBedrooms.match(/\d/)[0])
103 } catch {
104 numberOfBedrooms = null
105 }
106
107 result.numberOfBedrooms = numberOfBedrooms;
108
109 result.marketedBy.name = $('.ui-agent .ui-agent__name').first().text().trim();
110 result.marketedBy.address = $('.ui-agent address').first().text().trim();
111 result.marketedBy.phoneNumber = $('.ui-agent__tel a').first().text().replace('Call', '').trim();
112
113 $('img.dp-gallery__image').each((index, el) => {
114 result.image.push($(el).attr("src"));
115 });
116
117 result.siteURL = request.url;
118
119 result.htmlLength = html.length;
120
121 const urlMatches = request.url.match(/^https?\:\/\/([^\/?#]+)(?:[\/?#]|$)/i);
122 result.host = urlMatches && urlMatches[1];
123
124 if (html.match(/incode:[^,]*/) && html.match(/outcode:[^,]*/)) {
125 result.postCode = html.match(/outcode:[^,]*/)[0].replace('outcode: ', '').replace(/"/g, '') + html.match(/incode:[^,]*/)[0].replace('incode: ', '').replace(/"/g, '')
126 result.postCode = result.postCode.trim()
127 }
128
129 if (html.match(/"geo": {\n.*"@type": "GeoCoordinates",\n.*\n.*\n[^,]*/)){
130 const geoCoords = html.match(/"geo": {\n.*"@type": "GeoCoordinates",\n.*\n.*\n[^,]*/)[0];
131
132 if (geoCoords.match(/"latitude":[^,]*/))
133 result.latitude = geoCoords.match(/"latitude":[^,]*/)[0].replace('latitude', '').replace(':', '').replace('}', '').replace(/"/g, '').trim()
134
135 if (geoCoords.match(/"longitude":[^,]*/))
136 result.longitude = geoCoords.match(/"longitude":[^,]*/)[0].replace('longitude', '').replace(':', '').replace(/"/g, '').replace('}', '').trim()
137 }
138
139
140 return result;
141 }
142
143 const output = {
144 "error": null,
145 "data": generateResult(),
146 };
147
148 await dataset.pushData(output);
149 }
150 });
151
152 await crawler.run();
153});