1import { Actor } from 'apify';
2import { connect as connectRealBrowser } from 'puppeteer-real-browser';
3
4const SITE_BASE = 'https://telegramchannels.me';
5
6const LANGUAGES = [
7 'all', 'id', 'ms', 'de', 'en', 'en-in', 'es', 'fr', 'it', 'nl', 'pt', 'tr', 'ru', 'ar', 'fa', 'hi', 'zh', 'ko',
8];
9
10const CATEGORIES = [
11 'all', '1', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '20', '21',
12 '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42',
13];
14
15const TYPES = ['all', 'channel', 'group'];
16
17const SCRAPER_CONFIG = {
18 turnstile: true,
19 headless: false,
20};
21
22class TelegramChannelsRankingScraper {
23 async getPageStats(page) {
24 try {
25 return await page.evaluate(() => {
26 const body = document.body;
27 const bodyText = body?.innerText || '';
28
29 return {
30 title: document.title || '',
31 bodyText,
32 rowCount: document.querySelectorAll('table tbody tr').length,
33 ready: Boolean(body),
34 error: null,
35 };
36 });
37 } catch (error) {
38 return {
39 title: '',
40 bodyText: '',
41 rowCount: 0,
42 ready: false,
43 error: error.message,
44 };
45 }
46 }
47
48 isCloudflareChallenge(title = '', bodyText = '') {
49 return (
50 title.includes('Just a moment') ||
51 title.includes('Attention Required') ||
52 title.includes('Cloudflare') ||
53 /security verification/i.test(bodyText)
54 );
55 }
56
57 async waitForPageReady(page, timeoutMs = 30000) {
58 const start = Date.now();
59
60 while (Date.now() - start < timeoutMs) {
61 const stats = await this.getPageStats(page);
62 if (stats.ready && !stats.error) {
63 return stats;
64 }
65
66 await this.randomDelay(500, 1000);
67 }
68
69 return this.getPageStats(page);
70 }
71
72 buildProxyOptions(proxyUrl) {
73 if (!proxyUrl) return undefined;
74
75 try {
76 const parsed = new URL(proxyUrl);
77 if (!parsed.hostname || !parsed.port) return undefined;
78
79 return {
80 host: parsed.hostname,
81 port: Number(parsed.port),
82 username: parsed.username || undefined,
83 password: parsed.password || undefined,
84 };
85 } catch {
86 console.warn('Invalid proxy configuration');
87 return undefined;
88 }
89 }
90
91 normalizeLanguage(value) {
92 const language = String(value || 'all').trim().toLowerCase();
93 if (!LANGUAGES.includes(language)) {
94 throw new Error(`Unsupported language "${value}"`);
95 }
96 return language;
97 }
98
99 normalizeCategory(value) {
100 const category = String(value || 'all').trim();
101 if (!CATEGORIES.includes(category)) {
102 throw new Error(`Unsupported category "${value}"`);
103 }
104 return category;
105 }
106
107 normalizeType(value) {
108 const type = String(value || 'channel').trim().toLowerCase();
109 if (!TYPES.includes(type)) {
110 throw new Error(`Unsupported type "${value}"`);
111 }
112 return type;
113 }
114
115 buildRankingUrl(filters) {
116 const params = new URLSearchParams({
117 language: filters.language,
118 category: filters.category,
119 type: filters.type,
120 });
121 return `${SITE_BASE}/ranking?${params.toString()}`;
122 }
123
124 async run(input) {
125 const {
126 language = 'all',
127 category = 'all',
128 type = 'channel',
129 maxItems = 100,
130 proxyConfiguration,
131 } = input;
132
133 const filters = {
134 language: this.normalizeLanguage(language),
135 category: this.normalizeCategory(category),
136 type: this.normalizeType(type),
137 };
138 const itemLimit = Number(maxItems);
139
140 if (!Number.isFinite(itemLimit) || itemLimit < 1) {
141 throw new Error('maxItems must be a positive number');
142 }
143
144 const proxyConfig = proxyConfiguration
145 ? await Actor.createProxyConfiguration(proxyConfiguration)
146 : undefined;
147
148 const proxyUrl = proxyConfig ? await proxyConfig.newUrl() : undefined;
149 const proxyOptions = this.buildProxyOptions(proxyUrl);
150
151 const realBrowserOption = {
152 args: ['--start-maximized', '--no-sandbox'],
153 turnstile: SCRAPER_CONFIG.turnstile,
154 headless: SCRAPER_CONFIG.headless,
155 customConfig: {},
156 connectOption: {
157 defaultViewport: { width: 1400, height: 900 },
158 },
159 ...(proxyOptions ? { proxy: proxyOptions } : {}),
160 plugins: [],
161 };
162
163 let browser;
164 try {
165 const { page, browser: connectedBrowser } = await connectRealBrowser(realBrowserOption);
166 browser = connectedBrowser;
167
168 await page.setDefaultNavigationTimeout(180000);
169 await page.setDefaultTimeout(180000);
170
171 await this.scrapeRanking(page, filters, itemLimit);
172 } finally {
173 if (browser) {
174 await browser.close();
175 }
176 }
177 }
178
179 async scrapeRanking(page, filters, maxItems) {
180 console.log('Fetching ranking results...');
181
182 const rankingUrl = this.buildRankingUrl(filters);
183 await page.goto(rankingUrl, { waitUntil: 'domcontentloaded', timeout: 180000 });
184 await this.waitForCloudflare(page);
185 await this.waitForResults(page);
186
187 const items = await this.extractRanking(page, filters);
188 const results = [];
189 const limit = Math.min(maxItems, items.length);
190
191 for (let i = 0; i < limit; i++) {
192 results.push({
193 ...items[i],
194 scrapedAt: new Date().toISOString(),
195 });
196 }
197
198 if (results.length === 0) {
199 console.log('No ranking results found');
200 await Actor.pushData([
201 {
202 language: filters.language,
203 category: filters.category,
204 type: filters.type,
205 error: 'No ranking results found',
206 scrapedAt: new Date().toISOString(),
207 },
208 ]);
209 return;
210 }
211
212 await Actor.pushData(results);
213 console.log(`Saved ${results.length} results`);
214 }
215
216 async waitForCloudflare(page, timeoutMs = 120000) {
217 const start = Date.now();
218
219 while (Date.now() - start < timeoutMs) {
220 const stats = await this.getPageStats(page);
221
222 if (stats.error) {
223 await this.randomDelay(1000, 2000);
224 continue;
225 }
226
227 if (!this.isCloudflareChallenge(stats.title, stats.bodyText)) {
228 await this.waitForPageReady(page);
229 return;
230 }
231
232 await this.randomDelay(2000, 3000);
233 }
234
235 throw new Error('Verification timed out');
236 }
237
238 async waitForResults(page, timeoutMs = 90000) {
239 const start = Date.now();
240 let previousCount = -1;
241 let stableRounds = 0;
242
243 while (Date.now() - start < timeoutMs) {
244 const stats = await this.getPageStats(page);
245
246 if (stats.error) {
247 await this.randomDelay(500, 800);
248 continue;
249 }
250
251 if (stats.rowCount > 0) {
252 if (stats.rowCount === previousCount) {
253 stableRounds++;
254 if (stableRounds >= 2) {
255 await this.randomDelay(800, 1200);
256 return;
257 }
258 } else {
259 stableRounds = 0;
260 previousCount = stats.rowCount;
261 }
262 }
263
264 await this.randomDelay(500, 800);
265 }
266
267 throw new Error('Ranking results did not load');
268 }
269
270 async extractRanking(page, filters) {
271 return page.evaluate((queryFilters) => {
272 const parseRank = (text) => {
273 const match = String(text || '').match(/#(\d+)/);
274 return match ? Number(match[1]) : null;
275 };
276
277 const parseLanguageCategory = (cell) => {
278 const links = [...cell.querySelectorAll('a')];
279 const language = links[0]?.textContent?.replace(/\s+/g, ' ').trim() || null;
280 const category = links[1]?.textContent?.replace(/\s+/g, ' ').trim() || null;
281
282 return { language, category };
283 };
284
285 const parseSubscribers = (cell) => {
286 const text = cell?.textContent?.replace(/\s+/g, ' ').trim() || '';
287 const parts = text.split(' ');
288 const subscribers = parts[0] || null;
289 const changeEl = cell?.querySelector('sub');
290 const subscriberChange = changeEl?.textContent?.replace(/\s+/g, ' ').trim() || null;
291
292 let changeDirection = null;
293 if (changeEl?.classList.contains('has-text-danger')) {
294 changeDirection = 'down';
295 } else if (changeEl?.classList.contains('has-text-success')) {
296 changeDirection = 'up';
297 }
298
299 return { subscribers, subscriberChange, changeDirection };
300 };
301
302 const parseTelegramUrl = (usernameLink, usernameText) => {
303 const href = usernameLink?.href || '';
304 const domainMatch = href.match(/domain=([^&]+)/);
305 const handle = domainMatch?.[1] || usernameText?.replace(/^@/, '').trim();
306 return handle ? `https://t.me/${handle}` : null;
307 };
308
309 return [...document.querySelectorAll('table tbody tr')].map((row) => {
310 const rankCell = row.querySelector('td:first-child');
311 const nameCell = row.querySelector('td:nth-child(4)');
312 const metaCell = row.querySelector('td:nth-child(5)');
313 const subscribersCell = row.querySelector('td:nth-child(6)');
314 const titleLink = nameCell?.querySelector('a:not([href^="tg://"])');
315 const usernameLink = nameCell?.querySelector('a[href^="tg://resolve"]');
316 const username = usernameLink?.textContent?.replace(/\s+/g, ' ').trim() || null;
317 const { language, category } = parseLanguageCategory(metaCell);
318 const { subscribers, subscriberChange, changeDirection } = parseSubscribers(subscribersCell);
319
320 return {
321 language: queryFilters.language,
322 category: queryFilters.category,
323 type: queryFilters.type,
324 rank: parseRank(rankCell?.textContent),
325 title: titleLink?.textContent?.replace(/\s+/g, ' ').trim() || null,
326 username,
327 itemLanguage: language,
328 itemCategory: category,
329 subscribers,
330 subscriberChange,
331 changeDirection,
332 telegramUrl: parseTelegramUrl(usernameLink, username),
333 };
334 });
335 }, filters);
336 }
337
338 async randomDelay(min = 500, max = 1500) {
339 const delay = Math.floor(Math.random() * (max - min + 1) + min);
340 await new Promise((resolve) => setTimeout(resolve, delay));
341 }
342}
343
344await Actor.init();
345
346Actor.main(async () => {
347 const input = await Actor.getInput();
348 const scraper = new TelegramChannelsRankingScraper();
349 await scraper.run(input);
350});