1const Apify = require('apify');
2const { load } = require('cheerio');
3const { utils: { log } } = Apify;
4let itemsCounter = 0;
5exports.handleStart = async ({ request, page }, requestQueue) => {
6 const { startReviews, maxItems } = await Apify.getInput();
7 log.info('DEFAULT HANDLER REQUEST')
8 const url = new URL(page.url());
9 url.searchParams.set('languages', 'all')
10 const body = await page.content();
11
12 const $ = load(body)
13 if (maxItems) {
14 let pages = Math.ceil(maxItems / 20);
15
16 log.info('Number of pages')
17 log.info(pages)
18 for (let index = 1; index <= pages; index++) {
19 if (index == 1) {
20 url.searchParams.delete('page');
21 } else {
22 url.searchParams.set('page', String(index))
23 }
24 let urlStarts = url.toString();
25 startReviews?.forEach((starRewview) => {
26 urlStarts += `&stars=${starRewview}`
27 })
28 await requestQueue.addRequest({
29 url: urlStarts,
30 userData: {
31 label: 'LIST'
32 }
33 })
34 }
35 } else {
36 const totalPages = Number($('a[data-pagination-button-last-link]').text());
37 log.info('Number of pages to scrape')
38 log.info(totalPages)
39 for (let index = 1; index <= totalPages; index++) {
40 if (index == 1) {
41 url.searchParams.delete('page');
42 }
43 url.searchParams.set('page', String(index))
44 let urlStarts = url.toString();
45 startReviews?.forEach((starRewview) => {
46 urlStarts += `&stars=${starRewview}`
47 })
48 await requestQueue.addRequest({
49 url: urlStarts,
50 userData: {
51 label: 'DETAIL'
52 }
53 })
54 }
55 }
56
57 log.info(`Handle Start URLs`);
58};
59
60exports.handleList = async ({ request, page }) => {
61 const { maxItems } = await Apify.getInput();
62 log.info(`Handle pagination`);
63 await page.waitForSelector('section[data-business-unit-reviews-section="true"]')
64 const html = await page.content();
65
66 const $ = load(html);
67 $('article[data-service-review-card-paper="true"]').each(async (_i, article) => {
68 itemsCounter++;
69 let item = {};
70 item.userName = $(article).find(`div[data-consumer-name-typography="true"]`).text()
71 item.userCountry = $(article).find(`span[data-consumer-country-typography]`).text()
72 item.reviewRating = Number($(article).find('div[data-service-review-rating]').attr('data-service-review-rating'))
73 item.reviewDate = $(article).find(`time[data-service-review-date-time-ago]`).attr('datetime')
74 item.reviewTitle = $(article).find(`a[data-review-title-typography]`).text()
75 item.reviewText = $(article).find(`p[data-service-review-text-typography]`).text()
76 item.verified = $(article).find(`button[data-review-label-tooltip-trigger="true"]`).text()
77 ? true
78 : false
79 item.businessReply = $(article).find('[data-service-review-business-reply-text-typography="true"]').length == 0
80 ? null
81 : $(article).find('[data-service-review-business-reply-text-typography="true"]').text()
82 if (itemsCounter <= maxItems) {
83 await Apify.pushData(item)
84 }
85 })
86
87 log.info('Items push to dataset default')
88};
89
90exports.handleDetail = async ({ request, page }) => {
91 log.info(`Handle pagination`);
92 await page.waitForSelector('section[data-business-unit-reviews-section="true"]')
93 const html = await page.content();
94 const $ = load(html);
95 $('article[data-service-review-card-paper="true"]').each(async (_i, article) => {
96 let item = {};
97 item.userName = $(article).find(`div[data-consumer-name-typography="true"]`).text()
98 item.userCountry = $(article).find(`span[data-consumer-country-typography]`).text()
99 item.reviewRating = $(article).find('div[data-service-review-rating]').attr('data-service-review-rating')
100 item.reviewDate = $(article).find(`time[data-service-review-date-time-ago]`).attr('datetime')
101 item.reviewTitle = $(article).find(`a[data-review-title-typography]`).text()
102 item.reviewText = $(article).find(`p[data-service-review-text-typography]`).text()
103 item.verified = $(article).find(`button[data-review-label-tooltip-trigger="true"]`).text()
104 ? true
105 : false
106 item.businessReply = $(article).find('[data-service-review-business-reply-text-typography="true"]')
107 ? $(article).find('[data-service-review-business-reply-text-typography="true"]').text()
108 : null
109 await Apify.pushData(item)
110 })
111 log.info('Items push to dataset default')
112};