1import type { Page } from 'playwright';
2import { SELECTORS, DAYS_OF_WEEK } from '../constants.js';
3import type {
4 BusinessLead,
5 OpeningHours,
6 ReviewData,
7 ExtractionOptions,
8} from '../types.js';
9import { extractCoordinatesFromUrl, extractPlaceIdFromUrl } from './url-parser.js';
10import { randomDelay } from './anti-detection.js';
11import { scrollDetailPanel } from './scroll.js';
12import { extractEmailsFromWebsite } from './email-extractor.js';
13
14
15
16
17export async function extractBusinessDetails(
18 page: Page,
19 options: ExtractionOptions
20): Promise<BusinessLead> {
21 const { url, searchQuery, extractPhotos, extractReviews, extractEmails, maxReviews } = options;
22
23
24 const coords = extractCoordinatesFromUrl(url);
25 const placeId = extractPlaceIdFromUrl(url);
26
27
28 await scrollDetailPanel(page);
29
30
31 const name = await safeTextContent(page, SELECTORS.PLACE_NAME);
32
33
34 const category = await extractCategory(page);
35 const categories = category ? [category] : [];
36
37
38 const address = await extractDataItem(page, 'address');
39 const phone = await extractDataItem(page, 'phone');
40 const website = await extractWebsite(page);
41
42
43 const ratingText = await safeTextContent(page, SELECTORS.PLACE_RATING);
44 const rating = ratingText ? parseFloat(ratingText.replace(',', '.')) : null;
45
46 const reviewsCount = await extractReviewsCount(page);
47
48
49 const priceLevel = await extractPriceLevel(page);
50
51
52 const plusCode = await extractDataItem(page, 'oloc');
53
54
55 const openingHours = await extractOpeningHours(page);
56 const isOpen = await checkIfOpen(page);
57
58
59 let photoUrls: string[] = [];
60 let mainPhotoUrl: string | null = null;
61 if (extractPhotos) {
62 photoUrls = await extractPhotoUrls(page);
63 mainPhotoUrl = photoUrls[0] || null;
64 }
65
66
67 let reviews: ReviewData[] | undefined;
68 if (extractReviews) {
69 reviews = await extractReviewsData(page, maxReviews);
70 }
71
72
73 let emails: string[] = [];
74 let email: string | null = null;
75 if (extractEmails && website) {
76 try {
77 emails = await extractEmailsFromWebsite(page, website);
78 email = emails[0] || null;
79 } catch {
80
81 }
82 }
83
84 return {
85 name: name || 'Unknown Business',
86 placeId,
87 address,
88 phone,
89 website,
90 email,
91 emails,
92 latitude: coords?.latitude || null,
93 longitude: coords?.longitude || null,
94 plusCode,
95 category,
96 categories,
97 priceLevel,
98 rating,
99 reviewsCount,
100 reviews,
101 openingHours,
102 isOpen,
103 photoUrls,
104 mainPhotoUrl,
105 googleMapsUrl: url,
106 scrapedAt: new Date().toISOString(),
107 searchQuery,
108 };
109}
110
111
112
113
114async function safeTextContent(page: Page, selector: string): Promise<string | null> {
115 try {
116 const element = await page.$(selector);
117 if (!element) return null;
118 const text = await element.textContent();
119 return text?.trim() || null;
120 } catch {
121 return null;
122 }
123}
124
125
126
127
128async function extractDataItem(page: Page, itemId: string): Promise<string | null> {
129 try {
130
131 let element = await page.$(`button[data-item-id="${itemId}"]`);
132
133
134 if (!element) {
135 element = await page.$(`button[data-item-id^="${itemId}"]`);
136 }
137
138
139 if (!element && itemId === 'authority') {
140 element = await page.$('a[data-item-id="authority"]');
141 }
142
143 if (!element) return null;
144
145
146 const ariaLabel = await element.getAttribute('aria-label');
147 if (ariaLabel) {
148
149 const colonIndex = ariaLabel.indexOf(':');
150 if (colonIndex > -1) {
151 return ariaLabel.substring(colonIndex + 1).trim();
152 }
153 return ariaLabel.trim();
154 }
155
156
157 const text = await element.textContent();
158 return text?.trim() || null;
159 } catch {
160 return null;
161 }
162}
163
164
165
166
167async function extractWebsite(page: Page): Promise<string | null> {
168 try {
169 const element = await page.$('a[data-item-id="authority"]');
170 if (!element) return null;
171
172 const href = await element.getAttribute('href');
173 return href || null;
174 } catch {
175 return null;
176 }
177}
178
179
180
181
182async function extractCategory(page: Page): Promise<string | null> {
183 try {
184
185 const categoryButton = await page.$('button[jsaction*="category"]');
186 if (categoryButton) {
187 const text = await categoryButton.textContent();
188 return text?.trim() || null;
189 }
190
191
192 const subtitle = await page.$('.DkEaL');
193 if (subtitle) {
194 const text = await subtitle.textContent();
195 return text?.trim() || null;
196 }
197
198 return null;
199 } catch {
200 return null;
201 }
202}
203
204
205
206
207async function extractReviewsCount(page: Page): Promise<number | null> {
208 try {
209
210 const elements = await page.$$('span[aria-label*="review"]');
211
212 for (const element of elements) {
213 const ariaLabel = await element.getAttribute('aria-label');
214 if (ariaLabel) {
215 const match = ariaLabel.match(/(\d[\d,]*)\s*review/i);
216 if (match) {
217 return parseInt(match[1].replace(/,/g, ''), 10);
218 }
219 }
220 }
221
222
223 const countElement = await page.$('.F7nice span:last-child');
224 if (countElement) {
225 const text = await countElement.textContent();
226 if (text) {
227 const match = text.match(/\(?([\d,]+)\)?/);
228 if (match) {
229 return parseInt(match[1].replace(/,/g, ''), 10);
230 }
231 }
232 }
233
234 return null;
235 } catch {
236 return null;
237 }
238}
239
240
241
242
243async function extractPriceLevel(page: Page): Promise<string | null> {
244 try {
245
246 const priceElement = await page.$('span[aria-label*="Price"]');
247 if (priceElement) {
248 const text = await priceElement.textContent();
249 return text?.trim() || null;
250 }
251
252
253 const infoArea = await page.$('.mgr77e');
254 if (infoArea) {
255 const text = await infoArea.textContent();
256 if (text) {
257 const match = text.match(/(\$+)/);
258 if (match) {
259 return match[1];
260 }
261 }
262 }
263
264 return null;
265 } catch {
266 return null;
267 }
268}
269
270
271
272
273async function extractOpeningHours(page: Page): Promise<OpeningHours | null> {
274 try {
275
276 const hoursButton = await page.$('button[data-item-id="oh"]');
277 if (!hoursButton) return null;
278
279 await hoursButton.click();
280 await randomDelay(800, 1200);
281
282
283 await page.waitForSelector('table.eK4R0e, div.t39EBf', { timeout: 3000 });
284
285 const hours: OpeningHours = {
286 monday: null,
287 tuesday: null,
288 wednesday: null,
289 thursday: null,
290 friday: null,
291 saturday: null,
292 sunday: null,
293 };
294
295
296 const rows = await page.$$('table.eK4R0e tr');
297
298 for (const row of rows) {
299 const cells = await row.$$('td');
300 if (cells.length >= 2) {
301 const dayText = await cells[0].textContent();
302 const timeText = await cells[1].textContent();
303
304 if (dayText && timeText) {
305 const dayLower = dayText.toLowerCase().trim();
306 const day = DAYS_OF_WEEK.find((d) => dayLower.includes(d));
307 if (day) {
308 hours[day] = timeText.trim();
309 }
310 }
311 }
312 }
313
314
315 const hasHours = Object.values(hours).some((h) => h !== null);
316 return hasHours ? hours : null;
317 } catch {
318 return null;
319 }
320}
321
322
323
324
325async function checkIfOpen(page: Page): Promise<boolean | null> {
326 try {
327 const openIndicator = await page.$('[data-hide-tooltip-on-mouse-move]');
328 if (openIndicator) {
329 const text = await openIndicator.textContent();
330 if (text) {
331 const textLower = text.toLowerCase();
332 if (textLower.includes('open')) return true;
333 if (textLower.includes('closed')) return false;
334 }
335 }
336
337
338 const statusSpan = await page.$('span.ZDu9vd, span.U26fgb');
339 if (statusSpan) {
340 const text = await statusSpan.textContent();
341 if (text) {
342 const textLower = text.toLowerCase();
343 if (textLower.includes('open')) return true;
344 if (textLower.includes('closed')) return false;
345 }
346 }
347
348 return null;
349 } catch {
350 return null;
351 }
352}
353
354
355
356
357async function extractPhotoUrls(page: Page, maxPhotos: number = 10): Promise<string[]> {
358 try {
359 const urls: string[] = [];
360
361
362 const mainImage = await page.$('img.FQ2IWe, button[jsaction*="photo"] img');
363 if (mainImage) {
364 const src = await mainImage.getAttribute('src');
365 if (src && src.includes('googleusercontent')) {
366 urls.push(src);
367 }
368 }
369
370
371 const photosTab = await page.$('button[aria-label*="Photo"]');
372 if (photosTab) {
373 await photosTab.click();
374 await randomDelay(1000, 1500);
375
376
377 const images = await page.$$('img[src*="googleusercontent"]');
378 for (const img of images.slice(0, maxPhotos)) {
379 const src = await img.getAttribute('src');
380 if (src && !urls.includes(src)) {
381 urls.push(src);
382 }
383 }
384 }
385
386 return urls.slice(0, maxPhotos);
387 } catch {
388 return [];
389 }
390}
391
392
393
394
395async function extractReviewsData(page: Page, maxReviews: number): Promise<ReviewData[]> {
396 try {
397 const reviews: ReviewData[] = [];
398
399
400 const reviewsButton = await page.$('button[aria-label*="review"]');
401 if (reviewsButton) {
402 await reviewsButton.click();
403 await randomDelay(1500, 2000);
404 }
405
406
407 await page.waitForSelector('[data-review-id], .jftiEf', { timeout: 5000 });
408
409
410 const reviewCards = await page.$$('[data-review-id], .jftiEf');
411
412 for (const card of reviewCards.slice(0, maxReviews)) {
413 try {
414
415 const authorElement = await card.$('.d4r55, .WNxzHc');
416 const author = authorElement
417 ? await authorElement.textContent()
418 : 'Anonymous';
419
420
421 let rating: number | null = null;
422 const ratingElement = await card.$('span[role="img"][aria-label*="star"]');
423 if (ratingElement) {
424 const ariaLabel = await ratingElement.getAttribute('aria-label');
425 if (ariaLabel) {
426 const match = ariaLabel.match(/(\d)/);
427 if (match) {
428 rating = parseInt(match[1], 10);
429 }
430 }
431 }
432
433
434 const textElement = await card.$('.wiI7pd, .MyEned');
435 let text = '';
436 if (textElement) {
437 text = (await textElement.textContent()) || '';
438 }
439
440
441 const dateElement = await card.$('.rsqaWe, .DU9Pgb');
442 const date = dateElement ? (await dateElement.textContent()) || '' : '';
443
444 reviews.push({
445 author: author?.trim() || 'Anonymous',
446 rating,
447 text: text.trim(),
448 date: date.trim(),
449 relativeDate: date.trim() || null,
450 });
451 } catch {
452
453 }
454 }
455
456 return reviews;
457 } catch {
458 return [];
459 }
460}
461
462
463
464
465export function extractNumberFromText(text: string): number | null {
466 const match = text.match(/[\d,]+/);
467 if (match) {
468 return parseInt(match[0].replace(/,/g, ''), 10);
469 }
470 return null;
471}