1import { Actor } from 'apify';
2import { PuppeteerCrawler, Dataset } from 'crawlee';
3import log from '@apify/log';
4
5
6
7
8
9
10
11
12
13await Actor.init();
14
15const input = await Actor.getInput();
16const { URL, OutputLimiterEnabled, OutputLimit } = input;
17
18let output = [];
19
20const BaseURL = "https://auctions.savills.co.uk/";
21
22const crawler = new PuppeteerCrawler({
23
24 navigationTimeoutSecs: 600,
25 requestHandlerTimeoutSecs: 600,
26 requestHandler: async({ page, request }) => {
27 await page.content();
28 await page.waitForSelector('ul.lots-list', { visible: true });
29 log.info("SAVILLS.CO.UK AUCTION DATA SCRAPER");
30 const auction_date = await page.$eval('.auction-information__wrapper--left > h1', ($p) => {
31 return $p.innerText;
32 });
33 log.info("TARGET URL : "+request.url);
34 log.info("AUCTION DATE : "+auction_date);
35 let search = await page.$eval('ul.lots-list', e => {
36 e.children[1].querySelector('div.lot-right > div.lot-content > a.lot-name').click();
37 });
38
39 let count = 0;
40 let nextPage = true;
41 while(nextPage)
42 {
43 await page.waitForNavigation(0);
44 await page.waitForSelector('.lot-details-inner', { visible: true });
45 count++;
46 log.info(`SCRAPING PAGE ${count}. PROPERTY URL : ${page.url()}`);
47 const pR_Data = await page.$eval('.lot-details-inner', property => {
48 const data = [];
49 const prop_lot_info = property.querySelector('.sv-property-intro__address-block > h4')
50 const prop_address_1 = property.querySelector('h1.sv-property-intro__address-line-1')
51 const prop_address_2 = property.querySelectorAll('span.sv-property-intro__address-line-2')
52 const prop_guide_price = property.querySelector('span.sv-property-price__value > span:not([class])')
53 const prop_sell_value = property.querySelector('span.sv-property-price__value > span.value')
54 const prop_status = property.querySelector('div.lot-status-container')
55 const prop_key_features = property.querySelectorAll('div.sv-key-features > ul > li')
56 const prop_description = property.querySelector('div.lot-details-description')
57 const prop_exData = property.querySelector('div.additional-container')
58
59 let prop_address = "";
60 prop_address_2.forEach(address_line => { prop_address = prop_address + address_line.innerText; });
61
62 const features = [];
63 prop_key_features.forEach(item => { features.push(item.innerText); });
64
65 data.push({
66 LotInfo : prop_lot_info ? prop_lot_info.innerText : null,
67 AddressLine1 : prop_address_1 ? prop_address_1.innerText : null,
68 AddressLine2 : prop_address ? prop_address : null,
69 GuidePrice : prop_guide_price ? prop_guide_price.innerText : null,
70 SellValue : prop_sell_value ? prop_sell_value.innerText : null,
71 Status : prop_status ? prop_status.innerHTML.toString().replaceAll(/<[^>]*>/g, '') : null,
72 KeyFeatures : features.length > 0 ? features : null,
73 Description : prop_description ? prop_description.innerHTML.toString().replaceAll('</p>','/n').replaceAll(/<[^>]*>/g, '') : null,
74 ExtraData : prop_exData ? prop_exData.innerHTML.toString().replaceAll('</p>','/n').replaceAll(/<[^>]*>/g, '') : null
75 });
76 return data;
77 });
78 output.push({
79 Page : count,
80 URL : page.url(),
81 AuctionDate : auction_date,
82 PropertyData : pR_Data});
83
84 if(OutputLimiterEnabled)
85 {
86 if(count == OutputLimit)
87 break;
88 }
89 nextPage = await page.$eval('div.lot-details-controls--top-right', pagination => {
90 const b_next = pagination.querySelector('.nextLot');
91 if (b_next) {
92 b_next.click();
93 return true;
94 } else {
95 return false;
96 }
97 });
98 }
99 },
100 async failedRequestHandler({ request }) {
101 await Dataset.pushData({
102 url: request.url,
103 succeeded: false,
104 errors: request.errorMessages,
105 })
106 },
107 launchContext: {
108 launchOptions: {
109 args: [
110 '--disable-gpu',
111 '--no-sandbox',
112 ],
113 },
114 },
115});
116
117if (URL.toLowerCase().includes(BaseURL)){
118 await crawler.run([URL]);
119 console.log('Crawler finished.');
120 await Actor.pushData(output);
121} else {
122 log.error("Invalid URL: ", URL);
123}
124await Actor.exit();