Zoopla avatar
Zoopla

Deprecated

Pricing

Pay per usage

Go to Store
Zoopla

Zoopla

Deprecated

Developed by

Nikolajus Elmutis

Nikolajus Elmutis

Maintained by Community

Get zoopla listings

0.0 (0)

Pricing

Pay per usage

1

Total users

33

Monthly users

1

Last modified

3 years ago

Dockerfile

1# This is a template for a Dockerfile used to run acts in Actor system.
2# The base image name below is set during the act build, based on user settings.
3# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/user
4FROM apify/actor-node-basic:v0.21.10
5
6# Second, copy just package.json and package-lock.json since it should be
7# the only file that affects "npm install" in the next step, to speed up the build
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --only=prod --no-optional \
15 && echo "Installed NPM packages:" \
16 && (npm list --all || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version
21
22# Copy source code to container
23# Do this in the last step, to have fast build if only the source code changed
24COPY  . ./
25
26# NOTE: The CMD is already defined by the base image.
27# Uncomment this for local node inspector debugging:
28# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]

package.json

1{
2    "name": "apify-project",
3    "version": "0.0.1",
4    "description": "",
5    "author": "It's not you it's me",
6    "license": "ISC",
7    "dependencies": {
8        "apify": "0.21.10"
9    },
10    "scripts": {
11        "start": "node main.js"
12    }
13}

main.js

1const Apify = require("apify");
2
3const templateOutput = {
4  "error": null,
5  "data": {
6    
7  }
8};
9
10const templateResult = {
11  "title": "",
12  "description": "",
13  "surfaceArea": null,
14  "surfaceAreaUnit": null,
15  "price": -1,
16  "currency": "",
17  "numberOfRooms": null,
18  "numberOfBedrooms": -1,
19  "publishingDate": null,
20  "monthlyRent": null,
21  "weeklyRent": null,
22  "marketedBy": {
23    "name": "",
24    "address": "",
25    "phoneNumber": ""
26  },
27  "energyClass": null,
28  "postCode": null,
29  "latitude": null,
30  "longitude": null,
31  "greenhouseGazClass": null,
32  "image": [],
33  "siteURL": "",
34
35  "siteHtml": null,
36  "error": null,
37
38  "statusCode": null,
39  "htmlLength": -1,
40  "captchaFound": false,
41  "isHtmlPage": true,
42  
43  "host": ""
44};
45
46
47Apify.main(async () => {
48  const requestQueue = await Apify.openRequestQueue(
49    'zoopla'
50  );
51
52  const input = await Apify.getInput();
53
54  const dataset = await Apify.openDataset('zoopla2');
55
56    for (let link of input.links) {
57        console.log(link)
58        await requestQueue.addRequest({
59            url: link
60        });
61    }
62
63
64  const crawler = new Apify.CheerioCrawler({
65    requestQueue,
66    handlePageFunction: async ({ request, html, $ }) => {
67      const title = $("title").text();
68      console.log(`Request URL: ${request.url}`);
69      console.log(`Title of ${request.url}: ${title}`);
70
71
72      const generateResult = () => {
73        const result = JSON.parse(JSON.stringify(templateResult));
74
75        const title = $('h1.ui-title-subgroup').first().text().trim();
76
77        const address = $('h2.ui-property-summary__address').first().text().trim();
78
79        result.title = title + ' in ' + address;
80
81        result.description = $('#dp-description-expand + div').first().text().trim();
82
83        let priceString = $('h2 + .ui-pricing .ui-pricing__main-price').first().text().trim();
84        try {
85            if (!priceString) priceString = $('.ui-pricing .ui-pricing__main-price').first().text().trim();
86          result.price = parseInt(priceString.replace(',', '').replace('£', ''));
87        } catch {
88          result.price = -1;
89        }
90
91        result.currency = priceString[0] === '£' ? 'GBP' : '';
92
93        const keyFeatureBedrooms = $('.icon-bed + .dp-features-list__text').filter(function () {
94          return $(this).text().includes('edrooms')
95        }).first().text();
96
97        let numberOfBedrooms;
98        try {
99            if (!keyFeatureBedrooms)
100                numberOfBedrooms = parseInt(html.match(/num_beds:(.*),/g)[0].match(/(\d)+/)[0])
101            else
102              numberOfBedrooms = parseInt(keyFeatureBedrooms.match(/\d/)[0])
103        } catch {
104          numberOfBedrooms = null
105        }
106
107        result.numberOfBedrooms = numberOfBedrooms;
108
109        result.marketedBy.name = $('.ui-agent .ui-agent__name').first().text().trim();
110        result.marketedBy.address = $('.ui-agent address').first().text().trim();
111        result.marketedBy.phoneNumber = $('.ui-agent__tel a').first().text().replace('Call', '').trim();
112
113        $('img.dp-gallery__image').each((index, el) => {
114          result.image.push($(el).attr("src"));
115        });
116
117        result.siteURL = request.url;
118
119        result.htmlLength = html.length;
120
121        const urlMatches = request.url.match(/^https?\:\/\/([^\/?#]+)(?:[\/?#]|$)/i);
122        result.host = urlMatches && urlMatches[1];
123
124        if (html.match(/incode:[^,]*/) && html.match(/outcode:[^,]*/)) {
125            result.postCode = html.match(/outcode:[^,]*/)[0].replace('outcode: ', '').replace(/"/g, '') + html.match(/incode:[^,]*/)[0].replace('incode: ', '').replace(/"/g, '')
126            result.postCode = result.postCode.trim()
127        }
128
129        if (html.match(/"geo": {\n.*"@type": "GeoCoordinates",\n.*\n.*\n[^,]*/)){
130            const geoCoords = html.match(/"geo": {\n.*"@type": "GeoCoordinates",\n.*\n.*\n[^,]*/)[0];
131
132            if (geoCoords.match(/"latitude":[^,]*/))
133                result.latitude = geoCoords.match(/"latitude":[^,]*/)[0].replace('latitude', '').replace(':', '').replace('}', '').replace(/"/g, '').trim()
134
135            if (geoCoords.match(/"longitude":[^,]*/))
136                result.longitude = geoCoords.match(/"longitude":[^,]*/)[0].replace('longitude', '').replace(':', '').replace(/"/g, '').replace('}', '').trim()
137        }
138        
139
140        return result;
141      }
142      
143      const output = {
144        "error": null,
145        "data": generateResult(),
146      };
147
148      await dataset.pushData(output);
149    }
150  });
151
152  await crawler.run();
153});