1import { Actor } from 'apify';
2import { connect as connectRealBrowser } from 'puppeteer-real-browser';
3
4const SITE_BASE = 'https://www.keyword.io/';
5
6const PLATFORMS = [
7 'google',
8 'amazon',
9 'youtube',
10 'bing',
11 'duckduckgo',
12 'ebay',
13 'wikipedia',
14 'alibaba',
15 'walmart',
16 'fotolia',
17 'fiverr',
18 'tiktok',
19];
20
21const COUNTRIES = [
22 'en-us', 'en-uk', 'en-ca', 'en-au', 'en-nz', 'en-ag', 'en-ai', 'en-as', 'en-bs', 'en-bw',
23 'en-bz', 'en-ck', 'en-cm', 'en-dm', 'en-fj', 'en-fm', 'en-gg', 'en-gh', 'en-gi', 'en-gm',
24 'en-gy', 'en-hk', 'en-ie', 'en-il', 'en-im', 'en-in', 'en-je', 'en-jm', 'en-ke', 'en-ki',
25 'en-kr', 'en-ls', 'en-ms', 'en-mt', 'en-mu', 'en-mw', 'en-na', 'en-nf', 'en-ng', 'en-nr',
26 'en-nu', 'en-pg', 'en-ph', 'en-pk', 'en-pn', 'en-pr', 'en-rw', 'en-sb', 'en-sc', 'en-sg',
27 'en-sh', 'en-sl', 'en-tk', 'en-to', 'en-tt', 'en-tz', 'en-ug', 'en-vc', 'en-vg', 'en-vi',
28 'en-vu', 'en-ws', 'en-za', 'en-zm', 'en-zw', 'de-de', 'de-at', 'de-ch', 'de-be', 'de-it',
29 'de-li', 'de-lu', 'de-na', 'ru-ru', 'ru-by', 'ru-kg', 'ru-kz', 'ru-md', 'ru-tj', 'fr-fr',
30 'fr-ca', 'fr-be', 'fr-bf', 'fr-bi', 'fr-bj', 'fr-cd', 'fr-cf', 'fr-cg', 'fr-ch', 'fr-ci',
31 'fr-cm', 'fr-dj', 'fr-ga', 'fr-gp', 'fr-ht', 'fr-it', 'fr-lb', 'fr-lu', 'fr-mg', 'fr-ml',
32 'fr-mu', 'fr-ne', 'fr-rw', 'fr-sc', 'fr-sn', 'fr-td', 'fr-tg', 'fr-vu', 'es-es', 'es-ar',
33 'es-bo', 'es-cl', 'es-co', 'es-cr', 'es-cu', 'es-do', 'es-ec', 'es-gt', 'es-hn', 'es-mx',
34 'es-ni', 'es-pa', 'es-pe', 'es-pr', 'es-py', 'es-sv', 'es-uy', 'es-ve', 'ar-ae', 'ar-bh',
35 'ar-dj', 'ar-dz', 'ar-eg', 'ar-iq', 'ar-jo', 'ar-kw', 'ar-lb', 'ar-ly', 'ar-ma', 'ar-om',
36 'ar-ps', 'ar-qa', 'ar-sa', 'ar-so', 'ar-td', 'ar-tn', 'it-ch', 'it-it', 'it-sm', 'nl-nl',
37 'nl-be', 'ca-ad', 'ca-es', 'pt-pt', 'pt-br', 'fa-af', 'sq-al', 'sq-me', 'sm-as', 'sm-tk',
38 'sm-ws', 'az-az', 'bs-ba', 'bs-me', 'hr-ba', 'hr-hr', 'hr-me', 'sr-ba', 'sr-me', 'sr-rs',
39 'bn-bd', 'bg-bg', 'ms-bn', 'ms-my', 'ms-sg', 'be-by', 'el-cy', 'el-gr', 'tr-cy', 'tr-tr',
40 'cs-cz', 'da-dk', 'da-gl', 'et-ee', 'eu-es', 'gl-es', 'fi-fi', 'sv-fi', 'sv-se', 'ka-ge',
41 'hu-hu', 'id-id', 'iw-il', 'ga-ie', 'hi-in', 'is-is', 'ja-jp', 'sw-ke', 'sw-tz', 'sw-ug',
42 'ko-kr', 'si-lk', 'ta-lk', 'ta-sg', 'lt-lt', 'lv-lv', 'uk-md', 'uk-ua', 'ro-md', 'ro-ro',
43 'mk-mk', 'mt-mt', 'nn-no', 'no-no', 'ne-np', 'tl-ph', 'ur-pk', 'pl-pl', 'sl-si', 'sk-sk',
44 'th-th', 'uz-uz', 'vi-vn', 'zu-za', 'xh-za', 'af-za', 'zh-cn',
45];
46
47const SCRAPER_CONFIG = {
48 turnstile: false,
49 headless: false,
50};
51
52const SELECTORS = {
53 platformScope: '[data-searchable-dropdown-placeholder-value="Select platform"]',
54 platformTrigger: 'input[aria-label="Platform"], #platform',
55 platformSearch: 'input[aria-label="Search platforms"]',
56 countryScope: '#audience-dropdown',
57 countryTrigger: 'input[placeholder="Country"], #country',
58 countrySearch: 'input[placeholder="Search countries..."]',
59 seed: '#seed',
60 searchButton: 'button.btn-search',
61};
62
63class KeywordSuggestionsScraper {
64 buildProxyOptions(proxyUrl) {
65 if (!proxyUrl) return undefined;
66
67 try {
68 const parsed = new URL(proxyUrl);
69 if (!parsed.hostname || !parsed.port) return undefined;
70
71 return {
72 host: parsed.hostname,
73 port: Number(parsed.port),
74 username: parsed.username || undefined,
75 password: parsed.password || undefined,
76 };
77 } catch (error) {
78 console.warn(`Invalid proxy URL detected: ${proxyUrl}`, error);
79 return undefined;
80 }
81 }
82
83 normalizePlatform(value) {
84 const platform = String(value || 'google').trim().toLowerCase();
85 if (!PLATFORMS.includes(platform)) {
86 throw new Error(`Unsupported platform "${value}". Supported: ${PLATFORMS.join(', ')}`);
87 }
88 return platform;
89 }
90
91 normalizeCountry(value) {
92 const country = String(value || 'en-us').trim().toLowerCase();
93 if (!COUNTRIES.includes(country)) {
94 throw new Error(`Unsupported country "${value}". Use a supported locale code such as en-us`);
95 }
96 return country;
97 }
98
99 normalizeInput(input) {
100 const keyword = String(input?.keyword || '').trim();
101 if (!keyword) {
102 throw new Error('Input must include a keyword');
103 }
104
105 return {
106 platform: this.normalizePlatform(input?.platform),
107 keyword,
108 country: this.normalizeCountry(input?.country),
109 };
110 }
111
112 async run(input) {
113 const { proxyConfiguration } = input;
114 const search = this.normalizeInput(input);
115
116 const proxyConfig = proxyConfiguration
117 ? await Actor.createProxyConfiguration(proxyConfiguration)
118 : undefined;
119
120 const proxyUrl = proxyConfig ? await proxyConfig.newUrl() : undefined;
121 const proxyOptions = this.buildProxyOptions(proxyUrl);
122
123 const realBrowserOption = {
124 args: ['--start-maximized', '--no-sandbox'],
125 turnstile: SCRAPER_CONFIG.turnstile,
126 headless: SCRAPER_CONFIG.headless,
127 customConfig: {},
128 connectOption: {
129 defaultViewport: { width: 1400, height: 900 },
130 },
131 ...(proxyOptions ? { proxy: proxyOptions } : {}),
132 plugins: [],
133 };
134
135 let browser;
136 try {
137 const { page, browser: connectedBrowser } = await connectRealBrowser(realBrowserOption);
138 browser = connectedBrowser;
139
140 await page.setDefaultNavigationTimeout(180000);
141 await page.setDefaultTimeout(180000);
142
143 await this.scrapeSearch(page, search);
144 } finally {
145 if (browser) {
146 await browser.close();
147 }
148 }
149 }
150
151 async scrapeSearch(page, search) {
152 const { platform, keyword, country } = search;
153 console.log(`Searching "${keyword}" on ${platform} (${country})...`);
154
155 try {
156 await page.goto(SITE_BASE, { waitUntil: 'networkidle2', timeout: 180000 });
157 await this.randomDelay(1500, 2500);
158 await this.waitForSearchForm(page);
159
160 const selection = await this.fillSearchForm(page, search);
161 await this.waitForSearchButtonReady(page);
162 await this.clickInPage(page, SELECTORS.searchButton);
163
164 const keywordCount = await this.waitForKeywordResults(page);
165 if (keywordCount === 0) {
166 console.log(`No keyword suggestions found for "${keyword}"`);
167 await Actor.pushData([
168 {
169 platform,
170 platformLabel: selection.platformLabel,
171 seedKeyword: keyword,
172 country: selection.countryValue,
173 countryLabel: selection.countryLabel,
174 error: 'No keyword suggestions found',
175 scrapedAt: new Date().toISOString(),
176 },
177 ]);
178 return;
179 }
180
181 const results = await this.extractKeywords(page, {
182 ...search,
183 ...selection,
184 });
185
186 console.log(`Saved ${results.length} keyword suggestions for "${keyword}"`);
187 await Actor.pushData(results);
188 } catch (error) {
189 const message = error.message || String(error);
190 console.log(`Search failed for "${keyword}": ${message}`);
191 await Actor.pushData([
192 {
193 platform,
194 seedKeyword: keyword,
195 country,
196 error: message,
197 scrapedAt: new Date().toISOString(),
198 },
199 ]);
200 }
201 }
202
203 async waitForSearchForm(page, timeoutMs = 30000) {
204 const start = Date.now();
205
206 while (Date.now() - start < timeoutMs) {
207 const ready = await page.evaluate(() => {
208 const platform = document.querySelector(
209 '[data-searchable-dropdown-placeholder-value="Select platform"] input[data-searchable-dropdown-target="input"], #platform',
210 );
211 const seed = document.querySelector('#seed');
212 const countryFrame = document.querySelector('#audience-dropdown');
213 return Boolean(platform && seed && countryFrame);
214 });
215
216 if (ready) return;
217 await this.randomDelay(300, 500);
218 }
219
220 throw new Error('Search form did not load');
221 }
222
223 async fillSearchForm(page, search) {
224 const currentPlatform = await this.getSelectedPlatform(page);
225 if (currentPlatform !== search.platform) {
226 const platformSelection = await this.selectSearchable(page, {
227 scope: SELECTORS.platformScope,
228 triggerSelector: SELECTORS.platformTrigger,
229 searchPlaceholder: 'Search platforms...',
230 value: search.platform,
231 });
232
233 if (!platformSelection) {
234 throw new Error(`Platform "${search.platform}" not found in dropdown`);
235 }
236
237 await this.waitForAudienceDropdown(page, search.platform);
238 } else {
239 await this.waitForAudienceDropdown(page, search.platform);
240 }
241
242 await this.setSeedKeyword(page, search.keyword);
243
244 const currentCountry = await this.getSelectedCountry(page);
245 let countrySelection = null;
246
247 if (!this.isCountryMatch(currentCountry, search.country)) {
248 countrySelection = await this.selectSearchable(page, {
249 scope: SELECTORS.countryScope,
250 triggerSelector: SELECTORS.countryTrigger,
251 searchPlaceholder: 'Search countries...',
252 value: search.country,
253 });
254
255 if (!countrySelection) {
256 throw new Error(`Country "${search.country}" not found in dropdown`);
257 }
258 }
259
260 const platformLabel = await this.getSelectedPlatformLabel(page);
261 const countryLabel = countrySelection?.text || (await this.getSelectedCountryLabel(page));
262 const countryValue = countrySelection?.value || currentCountry?.value;
263
264 return {
265 platformLabel,
266 countryLabel,
267 countryValue,
268 };
269 }
270
271 async getSelectedPlatform(page) {
272 return page.evaluate(() =>
273 document.querySelector(
274 '[data-searchable-dropdown-placeholder-value="Select platform"] [data-searchable-dropdown-target="hiddenField"]',
275 )?.value || null,
276 );
277 }
278
279 async getSelectedPlatformLabel(page) {
280 return page.evaluate(() =>
281 document.querySelector(
282 '[data-searchable-dropdown-placeholder-value="Select platform"] input[data-searchable-dropdown-target="input"]',
283 )?.value || null,
284 );
285 }
286
287 async getSelectedCountry(page) {
288 return page.evaluate(() => {
289 const value = document.querySelector('#audience-dropdown #audience, #audience')?.value || null;
290 const text = document.querySelector(
291 '#audience-dropdown input[placeholder="Country"], #country',
292 )?.value || null;
293 return value ? { value, text } : null;
294 });
295 }
296
297 async getSelectedCountryLabel(page) {
298 const country = await this.getSelectedCountry(page);
299 return country?.text || null;
300 }
301
302 isCountryMatch(currentCountry, requestedCountry) {
303 if (!currentCountry) return false;
304 return (currentCountry.value || '').toLowerCase() === String(requestedCountry || '').trim().toLowerCase();
305 }
306
307 async waitForAudienceDropdown(page, platform, timeoutMs = 30000) {
308 const start = Date.now();
309
310 while (Date.now() - start < timeoutMs) {
311 const ready = await page.evaluate((expectedPlatform) => {
312 const platformHidden = document.querySelector(
313 '[data-searchable-dropdown-placeholder-value="Select platform"] [data-searchable-dropdown-target="hiddenField"]',
314 );
315 const audience = document.querySelector('#audience-dropdown #audience, #audience');
316 const countryInput = document.querySelector(
317 '#audience-dropdown input[placeholder="Country"], #country',
318 );
319
320 return Boolean(
321 platformHidden?.value === expectedPlatform &&
322 audience?.value &&
323 countryInput,
324 );
325 }, platform);
326
327 if (ready) return;
328 await this.randomDelay(300, 500);
329 }
330
331 throw new Error('Country dropdown did not finish loading after platform selection');
332 }
333
334 async waitForSearchButtonReady(page, timeoutMs = 30000) {
335 const start = Date.now();
336
337 while (Date.now() - start < timeoutMs) {
338 const ready = await page.evaluate((selector) => {
339 const button = document.querySelector(selector);
340 return Boolean(button && !button.disabled);
341 }, SELECTORS.searchButton);
342
343 if (ready) return;
344 await this.randomDelay(300, 500);
345 }
346
347 throw new Error('Search button did not become enabled');
348 }
349
350 async clickInPage(page, selector, scope = null) {
351 const clicked = await page.evaluate(({ sel, rootSelector }) => {
352 const root = rootSelector ? document.querySelector(rootSelector) : document;
353 if (!root) return false;
354
355 const el = root.querySelector(sel);
356 if (!el) return false;
357
358 el.scrollIntoView({ block: 'center' });
359 el.click();
360 return true;
361 }, { sel: selector, rootSelector: scope });
362
363 if (!clicked) {
364 throw new Error(`Element not found for selector "${selector}"`);
365 }
366 }
367
368 async setInputValue(page, selector, value) {
369 await page.evaluate(({ sel, inputValue }) => {
370 const input = document.querySelector(sel);
371 if (!input) return;
372 input.focus();
373 input.value = inputValue;
374 input.dispatchEvent(new Event('input', { bubbles: true }));
375 input.dispatchEvent(new Event('change', { bubbles: true }));
376 }, { sel: selector, inputValue: value });
377 }
378
379 async setSeedKeyword(page, keyword) {
380 await this.setInputValue(page, SELECTORS.seed, keyword);
381 }
382
383 async selectSearchable(page, { scope, triggerSelector, searchPlaceholder, value }) {
384 await this.clickInPage(page, triggerSelector, scope);
385 await this.randomDelay(400, 600);
386
387 const isLocaleCode = /^[a-z]{2}-[a-z]{2,3}$/i.test(value);
388
389 if (!isLocaleCode) {
390 await page.evaluate(({ rootSelector, placeholder, searchValue }) => {
391 const root = rootSelector ? document.querySelector(rootSelector) : document;
392 const search = root?.querySelector(`input[placeholder="${placeholder}"]`);
393 if (!search) return;
394 search.focus();
395 search.value = searchValue;
396 search.dispatchEvent(new Event('input', { bubbles: true }));
397 }, {
398 rootSelector: scope,
399 placeholder: searchPlaceholder,
400 searchValue: value,
401 });
402
403 await this.randomDelay(300, 500);
404 }
405
406 const selected = await page.evaluate(({ rootSelector, rawValue }) => {
407 const root = rootSelector ? document.querySelector(rootSelector) : document;
408 if (!root) return null;
409
410 const normalized = String(rawValue || '').trim().toLowerCase();
411 const options = [...root.querySelectorAll('.searchable-select-option')];
412
413 const match = options.find((option) => {
414 const text = option.textContent?.trim().toLowerCase() || '';
415 const dataValue = (option.dataset.value || '').toLowerCase();
416 return (
417 dataValue === normalized ||
418 text === normalized ||
419 text.includes(normalized) ||
420 (dataValue && normalized.includes(dataValue))
421 );
422 });
423
424 if (!match) return null;
425
426 match.click();
427 return {
428 text: match.textContent?.trim() || null,
429 value: match.dataset.value || null,
430 };
431 }, { rootSelector: scope, rawValue: value });
432
433 await this.randomDelay(300, 500);
434 return selected;
435 }
436
437 async waitForKeywordResults(page, timeoutMs = 120000) {
438 const start = Date.now();
439 let lastCount = 0;
440 let stableRounds = 0;
441
442 while (Date.now() - start < timeoutMs) {
443 const count = await page.evaluate(
444 () => document.querySelectorAll('.keyword-item').length,
445 );
446
447 if (count > 0) {
448 if (count === lastCount) {
449 stableRounds++;
450 if (stableRounds >= 2) {
451 return count;
452 }
453 } else {
454 stableRounds = 0;
455 lastCount = count;
456 }
457 }
458
459 await this.randomDelay(500, 800);
460 }
461
462 return page.evaluate(() => document.querySelectorAll('.keyword-item').length);
463 }
464
465 async extractKeywords(page, context) {
466 const { platform, keyword, platformLabel, countryLabel, countryValue } = context;
467
468 const suggestions = await page.evaluate(() =>
469 [...document.querySelectorAll('.keyword-item')]
470 .map((element) => element.textContent?.replace(/\s+/g, ' ').trim())
471 .filter(Boolean),
472 );
473
474 const scrapedAt = new Date().toISOString();
475
476 return suggestions.map((suggestion, index) => ({
477 platform,
478 platformLabel,
479 seedKeyword: keyword,
480 country: countryValue,
481 countryLabel,
482 suggestion,
483 rank: index + 1,
484 scrapedAt,
485 }));
486 }
487
488 async randomDelay(min = 500, max = 1500) {
489 const delay = Math.floor(Math.random() * (max - min + 1) + min);
490 await new Promise((resolve) => setTimeout(resolve, delay));
491 }
492}
493
494await Actor.init();
495
496Actor.main(async () => {
497 const input = await Actor.getInput();
498 const scraper = new KeywordSuggestionsScraper();
499 await scraper.run(input);
500});