1import { chromium } from 'playwright';
2import { Actor } from 'apify';
3
4(async () => {
5 Actor.init();
6
7
8 const input = await Actor.getInput();
9
10 const googleMapsURL = input.googleMapsURL;
11
12
13 console.time("Execution Time");
14 const browser = await chromium.launch({ headless: true });
15 const context = await browser.newContext();
16 const page = await context.newPage();
17
18
19 await page.goto(googleMapsURL, { waitUntil: 'domcontentloaded' });
20 await page.waitForSelector('[jstcache="3"]');
21
22 let urls = [];
23
24
25 while (true) {
26 const pageContent = await page.content();
27 if (pageContent.includes("You've reached the end of the list.")) {
28 console.log("Reached the end of the list.");
29 break;
30 } else {
31 await page.evaluate(() => {
32 const scrollElement = document.evaluate('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[1]/div[1]', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
33 scrollElement.scrollTop += 500;
34 });
35 }
36 }
37
38 urls = await page.evaluate(() => {
39 let elements = Array.from(document.querySelectorAll('a[href*="https://www.google.com/maps/place"]'));
40 return elements.map(element => element.href);
41 });
42
43 await page.close();
44
45 console.log(`Number of URLs extracted: ${urls.length}`);
46
47
48 const concurrency = 5;
49 const promises = [];
50
51 for (let url of urls) {
52 const p = processUrl(url, context).then(details => {
53
54 Actor.pushData(details);
55 console.log(`Data pushed for URL: ${details.url}`);
56 }).catch(error => {
57 console.error(`Error processing URL ${url}: ${error}`);
58 });
59
60 promises.push(p);
61
62 if (promises.length >= concurrency) {
63 await Promise.all(promises);
64 promises.length = 0;
65 }
66 }
67
68
69 await Promise.all(promises);
70 console.timeEnd("Execution Time");
71 await Actor.exit();
72})();
73
74
75async function processUrl(url, context) {
76 const page = await context.newPage();
77 await page.goto(url, { waitUntil: 'domcontentloaded' });
78 await page.waitForSelector('[jstcache="3"]');
79
80
81 const details = await page.evaluate(() => {
82
83
84 const getText = (selector) => {
85 const element = document.querySelector(selector);
86 return element ? element.innerText : '';
87 };
88
89
90 const getHref = (primarySelector, fallbackSelector) => {
91 let element = document.querySelector(primarySelector);
92 if (!element) {
93 element = document.querySelector(fallbackSelector);
94 }
95 return element && element.href ? element.href : '';
96 };
97
98
99 const getTextFromXPath = (xpath) => {
100 const result = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
101 return result.singleNodeValue ? result.singleNodeValue.innerText : '';
102 };
103
104 const companyName = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[1]/h1');
105 const rating = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[1]/div[2]/span[1]/span[1]');
106 let numberReviews = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[1]/div[2]/span[2]/span/span');
107 numberReviews = numberReviews.replace(/\(|\)/g, '');
108 const category = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[2]/span/span/button');
109
110
111 return {
112 company: companyName,
113 rating: rating,
114 reviews: numberReviews,
115 category: category,
116 address: getText('button[data-tooltip="Copy address"]'),
117 website: getHref('a[data-tooltip="Open website"]', 'a[data-tooltip="Open menu link"]'),
118 phone: getText('button[data-tooltip="Copy phone number"]')
119 };
120 });
121
122 await page.close();
123 return { ...details, url };
124}