1const Apify = require("apify");
2
3const {
4 utils: { log },
5} = Apify;
6
7exports.handleStart = async ({ request, page, crawler }) => {
8 log.info("[START]: Start.");
9
10 const lastPageNumber = await page.$$eval("li.paginator__list-item", $list => {
11 const classContainer = Array.from($list)
12 const item = classContainer[classContainer.length - 2];
13
14 return parseInt(item.querySelector('a').textContent);
15 });
16
17 const pageUrl = page.url();
18
19 for (let index = 1; index < lastPageNumber; index++) {
20 const link = pageUrl.concat(`&stranka=${index}`);
21 const request = {
22 userData: {
23 label: "LIST",
24 },
25 url: link,
26 };
27 log.info(`Adding listing page url: ${request.url}`);
28 await crawler.requestQueue.addRequest(request);
29 }
30
31 log.info("[START]: Listing pages pushed.");
32};
33
34exports.handleList = async ({ request, page, crawler }) => {
35 log.info("[LIST]: Start.");
36
37 const listings = await page.$$eval(".advert-list-items__content", ($listing) => {
38 const items = [];
39 $listing.forEach(($item) => {
40 const link = $item.querySelector("h2 > a").getAttribute("href");
41 items.push({
42 userData: {
43 label: "DETAIL",
44 },
45 url: link,
46 });
47 });
48 return items;
49 });
50
51 log.info("[LIST]: Scraped.");
52
53 for (let index = 0; index < listings.length; index++) {
54 const request = listings[index];
55 await crawler.requestQueue.addRequest(request);
56 }
57
58 log.info("[LIST]: Listings pushed.");
59};
60
61exports.handleDetail = async ({ request, page }) => {
62 log.info("[DETAIL]: Getting detail info.");
63
64 let {
65 buildingType,
66 location,
67 price
68 } = await page.$$eval(".advert-detail-fixed-top__content-info", $list => {
69 const item = Array.from($list)[0];
70
71 const headerText = item.querySelector("h4").textContent.trim();
72 const buildingType = headerText.split(",")[0].trim();
73 const location = item.querySelector("p").textContent.trim();
74
75 const highlight = item.querySelector("strong");
76 const price = highlight.querySelector("span").textContent.trim();
77
78 flag = true;
79
80 return {
81 "buildingType" : buildingType,
82 "location" : location,
83 "price" : price
84 }
85 });
86
87 let mainImage = await page.$$eval(".gallery__main-img-inner", $list => {
88 const item = Array.from($list)[0];
89
90 const imageUrl = item.querySelector("img").src;
91
92 return { "imageUrl" : imageUrl }
93 });
94
95 let smallImages = await page.$$eval(".gallery__item--image", $list => {
96 const items = [];
97
98 $list.forEach($item => {
99 const imageContainer = $item.querySelector("a");
100 const imageUrl = imageContainer.querySelector("img").src;
101
102 items.push({ "imageUrl" : imageUrl });
103 });
104
105 return items;
106 });
107
108 let images = [ mainImage ].concat(smallImages);
109
110 let propertyElList = await page.$$eval(".detail-information__data-item", $list => {
111 const items = [];
112
113 $list.forEach($item => {
114 const key = $item.querySelector("span:nth-child(1)").textContent.trim();
115 const value = $item.querySelector("span:nth-child(2)").textContent.trim();
116
117 items.push({ "key": key, "value" : value });
118 });
119
120 return items;
121 });
122
123 let {
124 description
125 } = await page.$$eval(".advert-description__text-inner-inner", $list => {
126 const item = Array.from($list)[0];
127
128 return { "description" : item.textContent.trim() };
129 });
130
131 await Apify.pushData({buildingType, location, price, "images" : images, "properties" : propertyElList, description });
132
133 log.info("[DETAIL]: Detail info done.");
134};