RightMove avatar

RightMove

Deprecated
View all Actors
This Actor is deprecated

This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?

See alternative Actors
RightMove

RightMove

zyberg/rightmove

Dockerfile

1# This is a template for a Dockerfile used to run acts in Actor system.
2# The base image name below is set during the act build, based on user settings.
3# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/user
4FROM apify/actor-node-basic:v0.21.10
5
6# Second, copy just package.json and package-lock.json since it should be
7# the only file that affects "npm install" in the next step, to speed up the build
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --only=prod --no-optional \
15 && echo "Installed NPM packages:" \
16 && (npm list --all || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version
21
22# Copy source code to container
23# Do this in the last step, to have fast build if only the source code changed
24COPY  . ./
25
26# NOTE: The CMD is already defined by the base image.
27# Uncomment this for local node inspector debugging:
28# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]

package.json

1{
2    "name": "apify-project",
3    "version": "0.0.1",
4    "description": "",
5    "author": "It's not you it's me",
6    "license": "ISC",
7    "dependencies": {
8        "apify": "0.21.10",
9        "moment": "latest"
10    },
11    "scripts": {
12        "start": "node main.js"
13    }
14}

main.js

1const Apify = require("apify");
2const moment = require("moment");
3
4const templateResult = {
5  "title": "",
6  "description": "",
7  "surfaceArea": null,
8  "surfaceAreaUnit": null,
9  "price": -1,
10  "currency": "",
11  "numberOfRooms": null,
12  "numberOfBedrooms": -1,
13  "publishingDate": null,
14  "monthlyRent": null,
15  "weeklyRent": null,
16  "marketedBy": {
17    "name": "",
18    "address": "",
19    "phoneNumber": ""
20  },
21  "postcode": null,
22  "propertyType": null,
23  "latitude": null,
24  "longitude": null,
25  "energyClass": null,
26  "greenhouseGazClass": null,
27  "image": [],
28  "siteURL": "",
29
30  "siteHtml": null,
31  "error": null,
32
33  "statusCode": null,
34  "htmlLength": -1,
35  "captchaFound": false,
36  "isHtmlPage": true,
37  
38  "host": ""
39};
40
41
42Apify.main(async () => {
43  const requestQueue = await Apify.openRequestQueue(
44    `rightmove`
45  );
46
47  const input = await Apify.getInput();
48  const dataset = await Apify.openDataset('rightmove');
49
50  for (let link of input.links) {
51      console.log(link)
52      await requestQueue.addRequest({
53        url: link,
54        uniqueKey: (new Date).toString()
55    });
56  }
57    
58
59  const crawler = new Apify.CheerioCrawler({
60    //...settings,
61    requestQueue,
62    handlePageFunction: async ({ request, html, $ }) => {
63      const title = $("title").text();
64      console.log(`Request URL: ${request.url}`);
65      console.log(`Title of ${request.url}: ${title}`);
66
67
68      const generateResult = () => {
69        const result = JSON.parse(JSON.stringify(templateResult));
70
71        const title = $('#primaryContent h1').first().text().trim();
72
73        const address = $('#primaryContent  h1 + address').first().text().trim();
74
75        result.title = title + ' in ' + address;
76
77        const tenure = $('div#description h3 + .sect').first().text().trim();
78
79        const description = $('div#description p[itemprop="description"]').first().text().trim();
80
81        result.description = tenure + '\n' + description;
82
83        const priceString = $('#propertyHeaderPrice').first().text().trim();
84
85        try {
86          result.price = parseInt(priceString.replace(/([a-z]|[A-Z])*/g, '').replace(',', '').replace('£', '').trim());
87        } catch {
88          result.price = -1
89        }
90
91        result.currency = priceString[0] === '£' ? 'GBP' : '';
92
93        const keyFeatureBedrooms = $('.key-features ul li').filter(function () {
94          return $(this).text().includes('edrooms')
95        }).first().text();
96
97        let numberOfBedrooms;
98        try {
99            if (keyFeatureBedrooms.match(/\d*\/\d*/))
100                numberOfBedrooms = parseInt(keyFeatureBedrooms.match(/\d*\/\d*/)[0].split('/')[0])
101
102          if (!numberOfBedrooms)
103            numberOfBedrooms = parseInt(title.match(/\d*/)[0]);
104        } catch {
105          numberOfBedrooms = null
106        }
107
108        result.numberOfBedrooms = numberOfBedrooms;
109
110        result.marketedBy.name = $('#secondaryAgentDetails a#aboutBranchLink').first().text().trim();
111        result.marketedBy.address = $('#secondaryAgentDetails address').first().text().trim();
112        result.marketedBy.phoneNumber = $('.agent-details-display .branch-telephone-number').first().text().trim();
113
114        $('.gallery-thumbs-carousel meta[itemprop="contentUrl"]').each((index, el) => {
115          result.image.push($(el).attr("content"));
116        });
117
118        result.siteURL = request.url;
119
120        result.htmlLength = html.length;
121
122        const urlMatches = request.url.match(/^https?\:\/\/([^\/?#]+)(?:[\/?#]|$)/i);
123        result.host = urlMatches && urlMatches[1];
124
125        if (html.match(/"postcode":"[^"]*"/))
126            result.postcode = html.match(/"postcode":"[^"]*"/)[0].replace('postcode', '').replace(':', '').replace(/"/g, '')
127
128        if (html.match(/"latitude":[^,]*/))
129            result.latitude = html.match(/"latitude":[^,]*/)[0].replace('latitude', '').replace(':', '').replace(/"/g, '')
130
131        if (html.match(/"longitude":[^,}]*/))
132            result.longitude = html.match(/"longitude":[^,}]*/)[0].replace('longitude', '').replace(':', '').replace(/"/g, '')
133
134        if (html.match(/"propertyType":[^,}]*/))
135            result.propertyType = html.match(/"propertyType":[^,}]*/)[0].replace('propertyType', '').replace(':', '').replace(/"/g, '')
136        
137        return result;
138      }
139      
140      const output = {
141        "error": null,
142        "data": generateResult(),
143      };
144
145      await dataset.pushData(output);
146    }
147  });
148
149  await crawler.run();
150});
Developer
Maintained by Community
Categories