1import { Actor } from 'apify';
2import { connect as connectRealBrowser } from 'puppeteer-real-browser';
3
4const SITE_BASE = 'https://telegramchannels.me';
5
6const SCRAPER_CONFIG = {
7 turnstile: true,
8 headless: false,
9};
10
11class TelegramChannelsSearchScraper {
12 async getPageStats(page) {
13 try {
14 return await page.evaluate(() => {
15 const body = document.body;
16 const bodyText = body?.innerText || '';
17
18 return {
19 title: document.title || '',
20 bodyText,
21 cardCount: document.querySelectorAll('a.index-card').length,
22 hasPagination: Boolean(document.querySelector('.index-pagination-current')),
23 ready: Boolean(body),
24 error: null,
25 };
26 });
27 } catch (error) {
28 return {
29 title: '',
30 bodyText: '',
31 cardCount: 0,
32 hasPagination: false,
33 ready: false,
34 error: error.message,
35 };
36 }
37 }
38
39 isCloudflareChallenge(title = '', bodyText = '') {
40 return (
41 title.includes('Just a moment') ||
42 title.includes('Attention Required') ||
43 title.includes('Cloudflare') ||
44 /security verification/i.test(bodyText)
45 );
46 }
47
48 async waitForPageReady(page, timeoutMs = 30000) {
49 const start = Date.now();
50
51 while (Date.now() - start < timeoutMs) {
52 const stats = await this.getPageStats(page);
53 if (stats.ready && !stats.error) {
54 return stats;
55 }
56
57 await this.randomDelay(500, 1000);
58 }
59
60 return this.getPageStats(page);
61 }
62
63 buildProxyOptions(proxyUrl) {
64 if (!proxyUrl) return undefined;
65
66 try {
67 const parsed = new URL(proxyUrl);
68 if (!parsed.hostname || !parsed.port) return undefined;
69
70 return {
71 host: parsed.hostname,
72 port: Number(parsed.port),
73 username: parsed.username || undefined,
74 password: parsed.password || undefined,
75 };
76 } catch (error) {
77 console.warn('Invalid proxy configuration');
78 return undefined;
79 }
80 }
81
82 buildSearchUrl(keyword, page = 1) {
83 const params = new URLSearchParams({ q: keyword });
84 if (page > 1) {
85 params.set('page', String(page));
86 }
87 return `${SITE_BASE}/index?${params.toString()}`;
88 }
89
90 async run(input) {
91 const { keyword, maxItems = 100, proxyConfiguration } = input;
92 const searchKeyword = String(keyword || '').trim();
93 const itemLimit = Number(maxItems);
94
95 if (!searchKeyword) {
96 throw new Error('Input must include a keyword');
97 }
98
99 if (!Number.isFinite(itemLimit) || itemLimit < 1) {
100 throw new Error('maxItems must be a positive number');
101 }
102
103 const proxyConfig = proxyConfiguration
104 ? await Actor.createProxyConfiguration(proxyConfiguration)
105 : undefined;
106
107 const proxyUrl = proxyConfig ? await proxyConfig.newUrl() : undefined;
108 const proxyOptions = this.buildProxyOptions(proxyUrl);
109
110 const realBrowserOption = {
111 args: ['--start-maximized', '--no-sandbox'],
112 turnstile: SCRAPER_CONFIG.turnstile,
113 headless: SCRAPER_CONFIG.headless,
114 customConfig: {},
115 connectOption: {
116 defaultViewport: { width: 1400, height: 900 },
117 },
118 ...(proxyOptions ? { proxy: proxyOptions } : {}),
119 plugins: [],
120 };
121
122 let browser;
123 try {
124 const { page, browser: connectedBrowser } = await connectRealBrowser(realBrowserOption);
125 browser = connectedBrowser;
126
127 await page.setDefaultNavigationTimeout(180000);
128 await page.setDefaultTimeout(180000);
129
130 await this.scrapeKeyword(page, searchKeyword, itemLimit);
131 } finally {
132 if (browser) {
133 await browser.close();
134 }
135 }
136 }
137
138 async scrapeKeyword(page, keyword, maxItems) {
139 console.log(`Searching channels for "${keyword}"...`);
140
141 let pageNumber = 1;
142 let totalSaved = 0;
143 const seen = new Set();
144
145 while (totalSaved < maxItems) {
146 const searchUrl = this.buildSearchUrl(keyword, pageNumber);
147
148 await page.goto(searchUrl, { waitUntil: 'domcontentloaded', timeout: 180000 });
149 await this.waitForCloudflare(page);
150 await this.waitForResults(page);
151 await this.scrollResults(page);
152
153 const pageMeta = await this.getPageMeta(page);
154 const items = await this.extractChannels(page, keyword);
155
156 if (items.length === 0) {
157 break;
158 }
159
160 const results = [];
161 for (const item of items) {
162 if (totalSaved >= maxItems) break;
163
164 const key = `${item.title}::${item.openUrl || ''}`;
165 if (seen.has(key)) continue;
166 seen.add(key);
167
168 results.push({
169 ...item,
170 scrapedAt: new Date().toISOString(),
171 });
172 totalSaved++;
173 }
174
175 if (results.length > 0) {
176 await Actor.pushData(results);
177 console.log(`Saved ${totalSaved} channels for "${keyword}"`);
178 }
179
180 if (totalSaved >= maxItems) {
181 break;
182 }
183
184 const hasNextPage = pageMeta.currentPage < pageMeta.totalPages;
185 if (!hasNextPage) {
186 break;
187 }
188
189 pageNumber++;
190 await this.randomDelay(1500, 2500);
191 }
192
193 if (totalSaved === 0) {
194 console.log(`No channels found for "${keyword}"`);
195 await Actor.pushData([
196 {
197 searchKeyword: keyword,
198 error: 'No channels found',
199 scrapedAt: new Date().toISOString(),
200 },
201 ]);
202 return;
203 }
204 }
205
206 async waitForCloudflare(page, timeoutMs = 120000) {
207 const start = Date.now();
208
209 while (Date.now() - start < timeoutMs) {
210 const stats = await this.getPageStats(page);
211
212 if (stats.error) {
213 await this.randomDelay(1000, 2000);
214 continue;
215 }
216
217 if (!this.isCloudflareChallenge(stats.title, stats.bodyText)) {
218 await this.waitForPageReady(page);
219 return;
220 }
221
222 await this.randomDelay(2000, 3000);
223 }
224
225 throw new Error('Verification timed out');
226 }
227
228 async waitForResults(page, timeoutMs = 90000) {
229 const start = Date.now();
230 let previousCount = -1;
231 let stableRounds = 0;
232
233 while (Date.now() - start < timeoutMs) {
234 const stats = await this.getPageStats(page);
235
236 if (stats.error) {
237 await this.randomDelay(500, 800);
238 continue;
239 }
240
241 if (stats.cardCount > 0) {
242 if (stats.cardCount === previousCount) {
243 stableRounds++;
244 if (stableRounds >= 2) {
245 await this.randomDelay(800, 1200);
246 return;
247 }
248 } else {
249 stableRounds = 0;
250 previousCount = stats.cardCount;
251 }
252 }
253
254 await this.randomDelay(500, 800);
255 }
256
257 throw new Error('Search results did not load');
258 }
259
260 async scrollResults(page) {
261 let previousCount = 0;
262 let stableRounds = 0;
263
264 for (let i = 0; i < 10; i++) {
265 const count = await page.evaluate(() => {
266 window.scrollTo(0, document.body.scrollHeight);
267 return document.querySelectorAll('a.index-card').length;
268 });
269
270 if (count === previousCount) {
271 stableRounds++;
272 if (stableRounds >= 2) {
273 break;
274 }
275 } else {
276 stableRounds = 0;
277 previousCount = count;
278 }
279
280 await this.randomDelay(800, 1200);
281 }
282 }
283
284 async getPageMeta(page) {
285 return page.evaluate(() => {
286 const pageText = document.querySelector('.index-pagination-current')?.textContent || '';
287 const match = pageText.match(/Page\s+(\d+)\s*\/\s*(\d+)/i);
288
289 return {
290 currentPage: match ? Number(match[1]) : 1,
291 totalPages: match ? Number(match[2]) : 1,
292 };
293 });
294 }
295
296 async extractChannels(page, keyword) {
297 return page.evaluate((searchKeyword) => {
298 const parseOpenUrl = (onclick) => {
299 if (!onclick) return null;
300
301 const indexOpenMatch = onclick.match(/indexOpen\(event,'([^']+)'\)/);
302 if (indexOpenMatch) return indexOpenMatch[1];
303
304 const gowMatch = onclick.match(/gow\(this,'([^']+)'\)/);
305 if (gowMatch) return gowMatch[1];
306
307 return null;
308 };
309
310 const parseTags = (card) => {
311 const tags = [...card.querySelectorAll('.index-card-tag')].map((tag) =>
312 tag.textContent?.replace(/\s+/g, ' ').trim(),
313 );
314
315 let type = null;
316 let subscribers = null;
317 let language = null;
318
319 for (const tag of tags) {
320 const lower = tag.toLowerCase();
321 if (['channel', 'group', 'bot'].includes(lower)) {
322 type = lower;
323 } else if (/^[a-z]{2}$/i.test(tag)) {
324 language = tag.toLowerCase();
325 } else if (tag) {
326 subscribers = tag;
327 }
328 }
329
330 return { type, subscribers, language };
331 };
332
333 return [...document.querySelectorAll('a.index-card')].map((card) => {
334 const { type, subscribers, language } = parseTags(card);
335 const avatarImg = card.querySelector('.index-card-avatar img');
336
337 return {
338 searchKeyword,
339 title: card.querySelector('.index-card-title')?.textContent?.trim() || null,
340 description: card.querySelector('.index-card-desc')?.textContent?.trim() || null,
341 type,
342 subscribers,
343 language,
344 verified: Boolean(card.querySelector('.index-card-verified')),
345 sponsored: card.classList.contains('index-card-sponsored'),
346 avatarUrl: avatarImg?.src || null,
347 openUrl: parseOpenUrl(card.getAttribute('onclick')),
348 };
349 });
350 }, keyword);
351 }
352
353 async randomDelay(min = 500, max = 1500) {
354 const delay = Math.floor(Math.random() * (max - min + 1) + min);
355 await new Promise((resolve) => setTimeout(resolve, delay));
356 }
357}
358
359await Actor.init();
360
361Actor.main(async () => {
362 const input = await Actor.getInput();
363 const scraper = new TelegramChannelsSearchScraper();
364 await scraper.run(input);
365});