Zoopla avatar

Zoopla

Deprecated
View all Actors
This Actor is deprecated

This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?

See alternative Actors
Zoopla

Zoopla

zyberg/zoopla

Get zoopla listings

Dockerfile

1# This is a template for a Dockerfile used to run acts in Actor system.
2# The base image name below is set during the act build, based on user settings.
3# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/user
4FROM apify/actor-node-basic:v0.21.10
5
6# Second, copy just package.json and package-lock.json since it should be
7# the only file that affects "npm install" in the next step, to speed up the build
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --only=prod --no-optional \
15 && echo "Installed NPM packages:" \
16 && (npm list --all || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version
21
22# Copy source code to container
23# Do this in the last step, to have fast build if only the source code changed
24COPY  . ./
25
26# NOTE: The CMD is already defined by the base image.
27# Uncomment this for local node inspector debugging:
28# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]

package.json

1{
2    "name": "apify-project",
3    "version": "0.0.1",
4    "description": "",
5    "author": "It's not you it's me",
6    "license": "ISC",
7    "dependencies": {
8        "apify": "0.21.10"
9    },
10    "scripts": {
11        "start": "node main.js"
12    }
13}

main.js

1const Apify = require("apify");
2
3const templateOutput = {
4  "error": null,
5  "data": {
6    
7  }
8};
9
10const templateResult = {
11  "title": "",
12  "description": "",
13  "surfaceArea": null,
14  "surfaceAreaUnit": null,
15  "price": -1,
16  "currency": "",
17  "numberOfRooms": null,
18  "numberOfBedrooms": -1,
19  "publishingDate": null,
20  "monthlyRent": null,
21  "weeklyRent": null,
22  "marketedBy": {
23    "name": "",
24    "address": "",
25    "phoneNumber": ""
26  },
27  "energyClass": null,
28  "postCode": null,
29  "latitude": null,
30  "longitude": null,
31  "greenhouseGazClass": null,
32  "image": [],
33  "siteURL": "",
34
35  "siteHtml": null,
36  "error": null,
37
38  "statusCode": null,
39  "htmlLength": -1,
40  "captchaFound": false,
41  "isHtmlPage": true,
42  
43  "host": ""
44};
45
46
47Apify.main(async () => {
48  const requestQueue = await Apify.openRequestQueue(
49    'zoopla'
50  );
51
52  const input = await Apify.getInput();
53
54  const dataset = await Apify.openDataset('zoopla2');
55
56    for (let link of input.links) {
57        console.log(link)
58        await requestQueue.addRequest({
59            url: link
60        });
61    }
62
63
64  const crawler = new Apify.CheerioCrawler({
65    requestQueue,
66    handlePageFunction: async ({ request, html, $ }) => {
67      const title = $("title").text();
68      console.log(`Request URL: ${request.url}`);
69      console.log(`Title of ${request.url}: ${title}`);
70
71
72      const generateResult = () => {
73        const result = JSON.parse(JSON.stringify(templateResult));
74
75        const title = $('h1.ui-title-subgroup').first().text().trim();
76
77        const address = $('h2.ui-property-summary__address').first().text().trim();
78
79        result.title = title + ' in ' + address;
80
81        result.description = $('#dp-description-expand + div').first().text().trim();
82
83        let priceString = $('h2 + .ui-pricing .ui-pricing__main-price').first().text().trim();
84        try {
85            if (!priceString) priceString = $('.ui-pricing .ui-pricing__main-price').first().text().trim();
86          result.price = parseInt(priceString.replace(',', '').replace('£', ''));
87        } catch {
88          result.price = -1;
89        }
90
91        result.currency = priceString[0] === '£' ? 'GBP' : '';
92
93        const keyFeatureBedrooms = $('.icon-bed + .dp-features-list__text').filter(function () {
94          return $(this).text().includes('edrooms')
95        }).first().text();
96
97        let numberOfBedrooms;
98        try {
99            if (!keyFeatureBedrooms)
100                numberOfBedrooms = parseInt(html.match(/num_beds:(.*),/g)[0].match(/(\d)+/)[0])
101            else
102              numberOfBedrooms = parseInt(keyFeatureBedrooms.match(/\d/)[0])
103        } catch {
104          numberOfBedrooms = null
105        }
106
107        result.numberOfBedrooms = numberOfBedrooms;
108
109        result.marketedBy.name = $('.ui-agent .ui-agent__name').first().text().trim();
110        result.marketedBy.address = $('.ui-agent address').first().text().trim();
111        result.marketedBy.phoneNumber = $('.ui-agent__tel a').first().text().replace('Call', '').trim();
112
113        $('img.dp-gallery__image').each((index, el) => {
114          result.image.push($(el).attr("src"));
115        });
116
117        result.siteURL = request.url;
118
119        result.htmlLength = html.length;
120
121        const urlMatches = request.url.match(/^https?\:\/\/([^\/?#]+)(?:[\/?#]|$)/i);
122        result.host = urlMatches && urlMatches[1];
123
124        if (html.match(/incode:[^,]*/) && html.match(/outcode:[^,]*/)) {
125            result.postCode = html.match(/outcode:[^,]*/)[0].replace('outcode: ', '').replace(/"/g, '') + html.match(/incode:[^,]*/)[0].replace('incode: ', '').replace(/"/g, '')
126            result.postCode = result.postCode.trim()
127        }
128
129        if (html.match(/"geo": {\n.*"@type": "GeoCoordinates",\n.*\n.*\n[^,]*/)){
130            const geoCoords = html.match(/"geo": {\n.*"@type": "GeoCoordinates",\n.*\n.*\n[^,]*/)[0];
131
132            if (geoCoords.match(/"latitude":[^,]*/))
133                result.latitude = geoCoords.match(/"latitude":[^,]*/)[0].replace('latitude', '').replace(':', '').replace('}', '').replace(/"/g, '').trim()
134
135            if (geoCoords.match(/"longitude":[^,]*/))
136                result.longitude = geoCoords.match(/"longitude":[^,]*/)[0].replace('longitude', '').replace(':', '').replace(/"/g, '').replace('}', '').trim()
137        }
138        
139
140        return result;
141      }
142      
143      const output = {
144        "error": null,
145        "data": generateResult(),
146      };
147
148      await dataset.pushData(output);
149    }
150  });
151
152  await crawler.run();
153});
Developer
Maintained by Community
Categories