RightMove
Deprecated
Pricing
Pay per usage
Go to Store
RightMove
Deprecated
0.0 (0)
Pricing
Pay per usage
1
Total users
43
Monthly users
1
Runs succeeded
0%
Last modified
3 years ago
Dockerfile
# This is a template for a Dockerfile used to run acts in Actor system.# The base image name below is set during the act build, based on user settings.# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/userFROM apify/actor-node-basic:v0.21.10
# Second, copy just package.json and package-lock.json since it should be# the only file that affects "npm install" in the next step, to speed up the buildCOPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --only=prod --no-optional \ && echo "Installed NPM packages:" \ && (npm list --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version
# Copy source code to container# Do this in the last step, to have fast build if only the source code changedCOPY . ./
# NOTE: The CMD is already defined by the base image.# Uncomment this for local node inspector debugging:# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]
package.json
{ "name": "apify-project", "version": "0.0.1", "description": "", "author": "It's not you it's me", "license": "ISC", "dependencies": { "apify": "0.21.10", "moment": "latest" }, "scripts": { "start": "node main.js" }}
main.js
1const Apify = require("apify");2const moment = require("moment");3
4const templateResult = {5 "title": "",6 "description": "",7 "surfaceArea": null,8 "surfaceAreaUnit": null,9 "price": -1,10 "currency": "",11 "numberOfRooms": null,12 "numberOfBedrooms": -1,13 "publishingDate": null,14 "monthlyRent": null,15 "weeklyRent": null,16 "marketedBy": {17 "name": "",18 "address": "",19 "phoneNumber": ""20 },21 "postcode": null,22 "propertyType": null,23 "latitude": null,24 "longitude": null,25 "energyClass": null,26 "greenhouseGazClass": null,27 "image": [],28 "siteURL": "",29
30 "siteHtml": null,31 "error": null,32
33 "statusCode": null,34 "htmlLength": -1,35 "captchaFound": false,36 "isHtmlPage": true,37 38 "host": ""39};40
41
42Apify.main(async () => {43 const requestQueue = await Apify.openRequestQueue(44 `rightmove`45 );46
47 const input = await Apify.getInput();48 const dataset = await Apify.openDataset('rightmove');49
50 for (let link of input.links) {51 console.log(link)52 await requestQueue.addRequest({53 url: link,54 uniqueKey: (new Date).toString()55 });56 }57 58
59 const crawler = new Apify.CheerioCrawler({60 //...settings,61 requestQueue,62 handlePageFunction: async ({ request, html, $ }) => {63 const title = $("title").text();64 console.log(`Request URL: ${request.url}`);65 console.log(`Title of ${request.url}: ${title}`);66
67
68 const generateResult = () => {69 const result = JSON.parse(JSON.stringify(templateResult));70
71 const title = $('#primaryContent h1').first().text().trim();72
73 const address = $('#primaryContent h1 + address').first().text().trim();74
75 result.title = title + ' in ' + address;76
77 const tenure = $('div#description h3 + .sect').first().text().trim();78
79 const description = $('div#description p[itemprop="description"]').first().text().trim();80
81 result.description = tenure + '\n' + description;82
83 const priceString = $('#propertyHeaderPrice').first().text().trim();84
85 try {86 result.price = parseInt(priceString.replace(/([a-z]|[A-Z])*/g, '').replace(',', '').replace('£', '').trim());87 } catch {88 result.price = -189 }90
91 result.currency = priceString[0] === '£' ? 'GBP' : '';92
93 const keyFeatureBedrooms = $('.key-features ul li').filter(function () {94 return $(this).text().includes('edrooms')95 }).first().text();96
97 let numberOfBedrooms;98 try {99 if (keyFeatureBedrooms.match(/\d*\/\d*/))100 numberOfBedrooms = parseInt(keyFeatureBedrooms.match(/\d*\/\d*/)[0].split('/')[0])101
102 if (!numberOfBedrooms)103 numberOfBedrooms = parseInt(title.match(/\d*/)[0]);104 } catch {105 numberOfBedrooms = null106 }107
108 result.numberOfBedrooms = numberOfBedrooms;109
110 result.marketedBy.name = $('#secondaryAgentDetails a#aboutBranchLink').first().text().trim();111 result.marketedBy.address = $('#secondaryAgentDetails address').first().text().trim();112 result.marketedBy.phoneNumber = $('.agent-details-display .branch-telephone-number').first().text().trim();113
114 $('.gallery-thumbs-carousel meta[itemprop="contentUrl"]').each((index, el) => {115 result.image.push($(el).attr("content"));116 });117
118 result.siteURL = request.url;119
120 result.htmlLength = html.length;121
122 const urlMatches = request.url.match(/^https?\:\/\/([^\/?#]+)(?:[\/?#]|$)/i);123 result.host = urlMatches && urlMatches[1];124
125 if (html.match(/"postcode":"[^"]*"/))126 result.postcode = html.match(/"postcode":"[^"]*"/)[0].replace('postcode', '').replace(':', '').replace(/"/g, '')127
128 if (html.match(/"latitude":[^,]*/))129 result.latitude = html.match(/"latitude":[^,]*/)[0].replace('latitude', '').replace(':', '').replace(/"/g, '')130
131 if (html.match(/"longitude":[^,}]*/))132 result.longitude = html.match(/"longitude":[^,}]*/)[0].replace('longitude', '').replace(':', '').replace(/"/g, '')133
134 if (html.match(/"propertyType":[^,}]*/))135 result.propertyType = html.match(/"propertyType":[^,}]*/)[0].replace('propertyType', '').replace(':', '').replace(/"/g, '')136 137 return result;138 }139 140 const output = {141 "error": null,142 "data": generateResult(),143 };144
145 await dataset.pushData(output);146 }147 });148
149 await crawler.run();150});