RightMove avatar
RightMove

Deprecated

Pricing

Pay per usage

Go to Store
RightMove

RightMove

Deprecated

Developed by

Nikolajus Elmutis

Nikolajus Elmutis

Maintained by Community

0.0 (0)

Pricing

Pay per usage

1

Total users

43

Monthly users

1

Runs succeeded

0%

Last modified

3 years ago

Dockerfile

# This is a template for a Dockerfile used to run acts in Actor system.
# The base image name below is set during the act build, based on user settings.
# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/user
FROM apify/actor-node-basic:v0.21.10
# Second, copy just package.json and package-lock.json since it should be
# the only file that affects "npm install" in the next step, to speed up the build
COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
&& npm install --only=prod --no-optional \
&& echo "Installed NPM packages:" \
&& (npm list --all || true) \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version
# Copy source code to container
# Do this in the last step, to have fast build if only the source code changed
COPY . ./
# NOTE: The CMD is already defined by the base image.
# Uncomment this for local node inspector debugging:
# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]

package.json

{
"name": "apify-project",
"version": "0.0.1",
"description": "",
"author": "It's not you it's me",
"license": "ISC",
"dependencies": {
"apify": "0.21.10",
"moment": "latest"
},
"scripts": {
"start": "node main.js"
}
}

main.js

1const Apify = require("apify");
2const moment = require("moment");
3
4const templateResult = {
5 "title": "",
6 "description": "",
7 "surfaceArea": null,
8 "surfaceAreaUnit": null,
9 "price": -1,
10 "currency": "",
11 "numberOfRooms": null,
12 "numberOfBedrooms": -1,
13 "publishingDate": null,
14 "monthlyRent": null,
15 "weeklyRent": null,
16 "marketedBy": {
17 "name": "",
18 "address": "",
19 "phoneNumber": ""
20 },
21 "postcode": null,
22 "propertyType": null,
23 "latitude": null,
24 "longitude": null,
25 "energyClass": null,
26 "greenhouseGazClass": null,
27 "image": [],
28 "siteURL": "",
29
30 "siteHtml": null,
31 "error": null,
32
33 "statusCode": null,
34 "htmlLength": -1,
35 "captchaFound": false,
36 "isHtmlPage": true,
37
38 "host": ""
39};
40
41
42Apify.main(async () => {
43 const requestQueue = await Apify.openRequestQueue(
44 `rightmove`
45 );
46
47 const input = await Apify.getInput();
48 const dataset = await Apify.openDataset('rightmove');
49
50 for (let link of input.links) {
51 console.log(link)
52 await requestQueue.addRequest({
53 url: link,
54 uniqueKey: (new Date).toString()
55 });
56 }
57
58
59 const crawler = new Apify.CheerioCrawler({
60 //...settings,
61 requestQueue,
62 handlePageFunction: async ({ request, html, $ }) => {
63 const title = $("title").text();
64 console.log(`Request URL: ${request.url}`);
65 console.log(`Title of ${request.url}: ${title}`);
66
67
68 const generateResult = () => {
69 const result = JSON.parse(JSON.stringify(templateResult));
70
71 const title = $('#primaryContent h1').first().text().trim();
72
73 const address = $('#primaryContent h1 + address').first().text().trim();
74
75 result.title = title + ' in ' + address;
76
77 const tenure = $('div#description h3 + .sect').first().text().trim();
78
79 const description = $('div#description p[itemprop="description"]').first().text().trim();
80
81 result.description = tenure + '\n' + description;
82
83 const priceString = $('#propertyHeaderPrice').first().text().trim();
84
85 try {
86 result.price = parseInt(priceString.replace(/([a-z]|[A-Z])*/g, '').replace(',', '').replace('£', '').trim());
87 } catch {
88 result.price = -1
89 }
90
91 result.currency = priceString[0] === '£' ? 'GBP' : '';
92
93 const keyFeatureBedrooms = $('.key-features ul li').filter(function () {
94 return $(this).text().includes('edrooms')
95 }).first().text();
96
97 let numberOfBedrooms;
98 try {
99 if (keyFeatureBedrooms.match(/\d*\/\d*/))
100 numberOfBedrooms = parseInt(keyFeatureBedrooms.match(/\d*\/\d*/)[0].split('/')[0])
101
102 if (!numberOfBedrooms)
103 numberOfBedrooms = parseInt(title.match(/\d*/)[0]);
104 } catch {
105 numberOfBedrooms = null
106 }
107
108 result.numberOfBedrooms = numberOfBedrooms;
109
110 result.marketedBy.name = $('#secondaryAgentDetails a#aboutBranchLink').first().text().trim();
111 result.marketedBy.address = $('#secondaryAgentDetails address').first().text().trim();
112 result.marketedBy.phoneNumber = $('.agent-details-display .branch-telephone-number').first().text().trim();
113
114 $('.gallery-thumbs-carousel meta[itemprop="contentUrl"]').each((index, el) => {
115 result.image.push($(el).attr("content"));
116 });
117
118 result.siteURL = request.url;
119
120 result.htmlLength = html.length;
121
122 const urlMatches = request.url.match(/^https?\:\/\/([^\/?#]+)(?:[\/?#]|$)/i);
123 result.host = urlMatches && urlMatches[1];
124
125 if (html.match(/"postcode":"[^"]*"/))
126 result.postcode = html.match(/"postcode":"[^"]*"/)[0].replace('postcode', '').replace(':', '').replace(/"/g, '')
127
128 if (html.match(/"latitude":[^,]*/))
129 result.latitude = html.match(/"latitude":[^,]*/)[0].replace('latitude', '').replace(':', '').replace(/"/g, '')
130
131 if (html.match(/"longitude":[^,}]*/))
132 result.longitude = html.match(/"longitude":[^,}]*/)[0].replace('longitude', '').replace(':', '').replace(/"/g, '')
133
134 if (html.match(/"propertyType":[^,}]*/))
135 result.propertyType = html.match(/"propertyType":[^,}]*/)[0].replace('propertyType', '').replace(':', '').replace(/"/g, '')
136
137 return result;
138 }
139
140 const output = {
141 "error": null,
142 "data": generateResult(),
143 };
144
145 await dataset.pushData(output);
146 }
147 });
148
149 await crawler.run();
150});