RightMove
Go to Store
This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?
See alternative ActorsRightMove
zyberg/rightmove
Dockerfile
1# This is a template for a Dockerfile used to run acts in Actor system.
2# The base image name below is set during the act build, based on user settings.
3# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/user
4FROM apify/actor-node-basic:v0.21.10
5
6# Second, copy just package.json and package-lock.json since it should be
7# the only file that affects "npm install" in the next step, to speed up the build
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --only=prod --no-optional \
15 && echo "Installed NPM packages:" \
16 && (npm list --all || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version
21
22# Copy source code to container
23# Do this in the last step, to have fast build if only the source code changed
24COPY . ./
25
26# NOTE: The CMD is already defined by the base image.
27# Uncomment this for local node inspector debugging:
28# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]
package.json
1{
2 "name": "apify-project",
3 "version": "0.0.1",
4 "description": "",
5 "author": "It's not you it's me",
6 "license": "ISC",
7 "dependencies": {
8 "apify": "0.21.10",
9 "moment": "latest"
10 },
11 "scripts": {
12 "start": "node main.js"
13 }
14}
main.js
1const Apify = require("apify");
2const moment = require("moment");
3
4const templateResult = {
5 "title": "",
6 "description": "",
7 "surfaceArea": null,
8 "surfaceAreaUnit": null,
9 "price": -1,
10 "currency": "",
11 "numberOfRooms": null,
12 "numberOfBedrooms": -1,
13 "publishingDate": null,
14 "monthlyRent": null,
15 "weeklyRent": null,
16 "marketedBy": {
17 "name": "",
18 "address": "",
19 "phoneNumber": ""
20 },
21 "postcode": null,
22 "propertyType": null,
23 "latitude": null,
24 "longitude": null,
25 "energyClass": null,
26 "greenhouseGazClass": null,
27 "image": [],
28 "siteURL": "",
29
30 "siteHtml": null,
31 "error": null,
32
33 "statusCode": null,
34 "htmlLength": -1,
35 "captchaFound": false,
36 "isHtmlPage": true,
37
38 "host": ""
39};
40
41
42Apify.main(async () => {
43 const requestQueue = await Apify.openRequestQueue(
44 `rightmove`
45 );
46
47 const input = await Apify.getInput();
48 const dataset = await Apify.openDataset('rightmove');
49
50 for (let link of input.links) {
51 console.log(link)
52 await requestQueue.addRequest({
53 url: link,
54 uniqueKey: (new Date).toString()
55 });
56 }
57
58
59 const crawler = new Apify.CheerioCrawler({
60 //...settings,
61 requestQueue,
62 handlePageFunction: async ({ request, html, $ }) => {
63 const title = $("title").text();
64 console.log(`Request URL: ${request.url}`);
65 console.log(`Title of ${request.url}: ${title}`);
66
67
68 const generateResult = () => {
69 const result = JSON.parse(JSON.stringify(templateResult));
70
71 const title = $('#primaryContent h1').first().text().trim();
72
73 const address = $('#primaryContent h1 + address').first().text().trim();
74
75 result.title = title + ' in ' + address;
76
77 const tenure = $('div#description h3 + .sect').first().text().trim();
78
79 const description = $('div#description p[itemprop="description"]').first().text().trim();
80
81 result.description = tenure + '\n' + description;
82
83 const priceString = $('#propertyHeaderPrice').first().text().trim();
84
85 try {
86 result.price = parseInt(priceString.replace(/([a-z]|[A-Z])*/g, '').replace(',', '').replace('£', '').trim());
87 } catch {
88 result.price = -1
89 }
90
91 result.currency = priceString[0] === '£' ? 'GBP' : '';
92
93 const keyFeatureBedrooms = $('.key-features ul li').filter(function () {
94 return $(this).text().includes('edrooms')
95 }).first().text();
96
97 let numberOfBedrooms;
98 try {
99 if (keyFeatureBedrooms.match(/\d*\/\d*/))
100 numberOfBedrooms = parseInt(keyFeatureBedrooms.match(/\d*\/\d*/)[0].split('/')[0])
101
102 if (!numberOfBedrooms)
103 numberOfBedrooms = parseInt(title.match(/\d*/)[0]);
104 } catch {
105 numberOfBedrooms = null
106 }
107
108 result.numberOfBedrooms = numberOfBedrooms;
109
110 result.marketedBy.name = $('#secondaryAgentDetails a#aboutBranchLink').first().text().trim();
111 result.marketedBy.address = $('#secondaryAgentDetails address').first().text().trim();
112 result.marketedBy.phoneNumber = $('.agent-details-display .branch-telephone-number').first().text().trim();
113
114 $('.gallery-thumbs-carousel meta[itemprop="contentUrl"]').each((index, el) => {
115 result.image.push($(el).attr("content"));
116 });
117
118 result.siteURL = request.url;
119
120 result.htmlLength = html.length;
121
122 const urlMatches = request.url.match(/^https?\:\/\/([^\/?#]+)(?:[\/?#]|$)/i);
123 result.host = urlMatches && urlMatches[1];
124
125 if (html.match(/"postcode":"[^"]*"/))
126 result.postcode = html.match(/"postcode":"[^"]*"/)[0].replace('postcode', '').replace(':', '').replace(/"/g, '')
127
128 if (html.match(/"latitude":[^,]*/))
129 result.latitude = html.match(/"latitude":[^,]*/)[0].replace('latitude', '').replace(':', '').replace(/"/g, '')
130
131 if (html.match(/"longitude":[^,}]*/))
132 result.longitude = html.match(/"longitude":[^,}]*/)[0].replace('longitude', '').replace(':', '').replace(/"/g, '')
133
134 if (html.match(/"propertyType":[^,}]*/))
135 result.propertyType = html.match(/"propertyType":[^,}]*/)[0].replace('propertyType', '').replace(':', '').replace(/"/g, '')
136
137 return result;
138 }
139
140 const output = {
141 "error": null,
142 "data": generateResult(),
143 };
144
145 await dataset.pushData(output);
146 }
147 });
148
149 await crawler.run();
150});
Developer
Maintained by Community
Categories