1import 'dotenv/config';
2import { Actor } from 'apify';
3import { PlaywrightCrawler, log } from 'crawlee';
4
5await Actor.init();
6
7log.info("🚀 Google Maps Lead Generator FIXED");
8
9
10const input = await Actor.getInput() || {};
11
12const query = input.query;
13const maxResults = Math.min(input.maxResults || 20, 100);
14const shouldExtractEmails = input.extractEmails ?? true;
15
16if (!query) throw new Error("Query is required");
17
18
19
20
21const clean = (v) => v?.replace(/\n/g, " ").trim() || null;
22
23
24const normalizePhone = (raw) => {
25 if (!raw) return null;
26 let p = raw.replace(/[^\d+]/g, "");
27 if (p.startsWith("00")) p = "+" + p.slice(2);
28 if (!p.startsWith("+") && p.startsWith("0")) p = "+33" + p.slice(1);
29 return p;
30};
31
32
33const normalizeEmailText = (text = "") =>
34 text
35 .replace(/\s*\[at\]\s*/gi, "@")
36 .replace(/\s*\(at\)\s*/gi, "@")
37 .replace(/\s*\[dot\]\s*/gi, ".")
38 .replace(/\s*\(dot\)\s*/gi, ".");
39
40
41const extractEmailsFromText = (text = "") => {
42 const normalized = normalizeEmailText(text);
43 return normalized.match(/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-z]{2,}/g) || [];
44};
45
46
47const guessEmails = (website) => {
48 try {
49 const { hostname } = new URL(website);
50 const domain = hostname.replace(/^www\./, "");
51 return ["info", "contact", "hello", "support"].map(p => `${p}@${domain}`);
52 } catch {
53 return [];
54 };
55};
56
57
58let count = 0;
59
60
61const requestQueue = await Actor.openRequestQueue();
62
63await requestQueue.addRequest({
64 url: `https://www.google.com/maps/search/${encodeURIComponent(query)}`,
65 label: "SEARCH",
66});
67
68
69const crawler = new PlaywrightCrawler({
70 requestQueue,
71 maxConcurrency: 2,
72
73 async requestHandler({ page, request }) {
74
75
76 await page.route('**/*', (route) => {
77 const type = route.request().resourceType();
78 if (['image', 'font', 'stylesheet'].includes(type)) {
79 route.abort();
80 } else {
81 route.continue();
82 }
83 });
84
85
86 if (request.label === "SEARCH") {
87
88 await page.goto(request.url, { waitUntil: "domcontentloaded" });
89 await page.waitForTimeout(4000);
90
91 for (let i = 0; i < 8; i++) {
92 await page.mouse.wheel(0, 3500);
93 await page.waitForTimeout(1200);
94 }
95
96 const links = await page.$$eval(
97 'a[href*="/maps/place/"]',
98 els => [...new Set(els.map(e => e.href))]
99 );
100
101 for (const link of links.slice(0, maxResults)) {
102 await requestQueue.addRequest({
103 url: link,
104 label: "DETAIL",
105 uniqueKey: link,
106 });
107 }
108 }
109
110
111 if (request.label === "DETAIL") {
112
113 if (count >= maxResults) return;
114
115 await page.goto(request.url, { waitUntil: "domcontentloaded" });
116 await page.waitForTimeout(2500);
117
118 const data = await page.evaluate(() => {
119 const get = (sel) => document.querySelector(sel)?.innerText || null;
120
121 return {
122 name: get("h1"),
123 category: document.querySelector('button[jsaction]')?.innerText || null,
124 rating: document.querySelector('div[role="img"]')?.ariaLabel || null,
125 address: document.querySelector('button[data-item-id="address"]')?.innerText || null,
126 phone: document.querySelector('button[data-item-id*="phone"]')?.innerText || null,
127 website: document.querySelector('a[data-item-id="authority"]')?.href || null,
128 };
129 });
130
131 if (!data?.name) return;
132
133 count++;
134
135
136 let emails = [];
137
138 if (shouldExtractEmails && data.website) {
139 let page2;
140
141 try {
142 page2 = await page.context().newPage();
143
144 const base = data.website.replace(/\/+$/, "");
145
146 const pagesToVisit = [
147 base,
148 base + "/contact",
149 base + "/about",
150 base + "/mentions-legales",
151 ];
152
153 for (const url of pagesToVisit) {
154 try {
155 await page2.goto(url, {
156 waitUntil: "domcontentloaded",
157 timeout: 12000
158 });
159
160 const html = await page2.content();
161
162 emails.push(...extractEmailsFromText(html));
163
164 const mailtos = html.match(/mailto:([^\s"'>]+)/gi);
165 if (mailtos) {
166 mailtos.forEach(m => {
167 emails.push(m.replace("mailto:", ""));
168 });
169 }
170
171 const text = await page2.innerText("body");
172 emails.push(...extractEmailsFromText(text));
173
174 } catch (e) {}
175 }
176
177 emails.push(...guessEmails(data.website));
178
179 } catch (e) {
180 log.warning("Email extraction failed:", e.message);
181 } finally {
182 if (page2 && !page2.isClosed()) {
183 await page2.close();
184 }
185 }
186 }
187
188
189 emails = [...new Set(emails)].filter(e =>
190 e &&
191 !e.includes("example") &&
192 !e.includes("test") &&
193 e.length < 60
194 );
195
196
197 let score = 0;
198 if (data.phone) score += 25;
199 if (data.website) score += 25;
200 if (emails.length > 0) score += 40;
201 if (data.rating) score += 10;
202
203
204 await Actor.pushData({
205 lead: {
206 name: clean(data.name),
207 category: clean(data.category),
208 rating: clean(data.rating),
209 address: clean(data.address),
210 phone: normalizePhone(data.phone),
211 website: data.website || null,
212 emails,
213 primaryEmail: emails[0] || null,
214 score
215 },
216 meta: {
217 source: "google_maps_fixed",
218 scrapedAt: new Date().toISOString(),
219 }
220 });
221
222 log.info(`📍 ${count}/${maxResults} - ${data.name}`);
223 }
224 }
225});
226
227
228await crawler.run();
229
230log.info("🎉 DONE SUCCESSFULLY");
231
232await Actor.exit();