1import { Actor } from 'apify';
2import axios from 'axios';
3
4const API_URL = 'https://combot.org/api/chart/all';
5const SITE_BASE = 'https://combot.org';
6
7const DEFAULT_HEADERS = {
8 accept: '*/*',
9 'accept-language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
10 'cache-control': 'no-cache',
11 pragma: 'no-cache',
12 'sec-ch-ua': '"Chromium";v="148", "Google Chrome";v="148", "Not/A)Brand";v="99"',
13 'sec-ch-ua-mobile': '?0',
14 'sec-ch-ua-platform': '"macOS"',
15 'sec-fetch-dest': 'empty',
16 'sec-fetch-mode': 'cors',
17 'sec-fetch-site': 'same-origin',
18 'user-agent':
19 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36',
20};
21
22class TelegramGroupsSearchScraper {
23 normalizeOptionalString(value) {
24 if (value === undefined || value === null) return '';
25 return String(value).trim();
26 }
27
28 buildReferer(lng, offset, pageLimit) {
29 const page = Math.floor(offset / pageLimit) + 1;
30 const params = new URLSearchParams({
31 lng: lng || 'all',
32 page: String(page),
33 });
34 return `${SITE_BASE}/top/telegram/groups?${params.toString()}`;
35 }
36
37 getAxiosConfig(proxyUrl) {
38 if (!proxyUrl) return {};
39
40 const parsed = new URL(proxyUrl);
41 return {
42 proxy: {
43 protocol: parsed.protocol.replace(':', ''),
44 host: parsed.hostname,
45 port: Number(parsed.port),
46 ...(parsed.username
47 ? {
48 auth: {
49 username: decodeURIComponent(parsed.username),
50 password: decodeURIComponent(parsed.password),
51 },
52 }
53 : {}),
54 },
55 };
56 }
57
58 mapGroupItem(keyword, item) {
59 const username = item.u || null;
60 const avatarBase64 = item.i || null;
61
62 return {
63 searchKeyword: keyword || null,
64 rank: item.p ?? null,
65 title: item.t || null,
66 username,
67 members: item.s ?? null,
68 positionChange: item.pc || null,
69 language: item.l || null,
70 chatId: item.c ?? null,
71 boostCount: item.b ?? null,
72 additionalInfo: item.a || null,
73 avatarBase64,
74 avatarUrl: avatarBase64 ? `data:image/jpeg;base64,${avatarBase64}` : null,
75 telegramUrl: username ? `https://t.me/${username}` : null,
76 scrapedAt: new Date().toISOString(),
77 };
78 }
79
80 async run(input) {
81 const {
82 keywords,
83 lng = 'all',
84 limit = 100,
85 maxItems = Infinity,
86 proxyConfiguration,
87 } = input;
88
89 if (!Array.isArray(keywords) || keywords.length === 0) {
90 throw new Error('Input must include a non-empty keywords array');
91 }
92
93 this.lng = this.normalizeOptionalString(lng) || 'all';
94 this.pageLimit = Math.min(Math.max(Number(limit) || 100, 1), 100);
95 this.maxItems = maxItems;
96
97 const proxyConfig = proxyConfiguration
98 ? await Actor.createProxyConfiguration(proxyConfiguration)
99 : undefined;
100
101 for (const rawKeyword of keywords) {
102 const keyword = String(rawKeyword ?? '').trim();
103 const proxyUrl = proxyConfig ? await proxyConfig.newUrl() : undefined;
104 const axiosConfig = this.getAxiosConfig(proxyUrl);
105
106 await this.scrapeKeyword(keyword, axiosConfig);
107 await this.randomDelay(500, 1500);
108 }
109 }
110
111 async scrapeKeyword(keyword, axiosConfig) {
112 const label = keyword ? `"${keyword}"` : 'top groups';
113 console.log(`Searching Telegram groups for ${label}...`);
114
115 const seenChatIds = new Set();
116 let offset = 0;
117 let totalSaved = 0;
118
119 while (totalSaved < this.maxItems) {
120 const page = Math.floor(offset / this.pageLimit) + 1;
121 console.log(`Fetching page ${page} (offset ${offset}) for ${label}...`);
122
123 let items;
124 try {
125 const params = {
126 limit: this.pageLimit,
127 offset,
128 lng: this.lng,
129 };
130
131 if (keyword) {
132 params.q = keyword;
133 }
134
135 const response = await axios.get(API_URL, {
136 ...axiosConfig,
137 params,
138 headers: {
139 ...DEFAULT_HEADERS,
140 Referer: this.buildReferer(this.lng, offset, this.pageLimit),
141 },
142 timeout: 60000,
143 });
144
145 items = Array.isArray(response.data) ? response.data : [];
146 } catch (error) {
147 const message = error.response?.data?.message || error.message;
148 console.error(`API request failed for ${label}:`, message);
149 if (totalSaved === 0) {
150 await Actor.pushData([
151 {
152 searchKeyword: keyword || null,
153 error: message,
154 scrapedAt: new Date().toISOString(),
155 },
156 ]);
157 }
158 break;
159 }
160
161 if (items.length === 0) {
162 console.log(`No more results for ${label}`);
163 break;
164 }
165
166 const currentData = [];
167 for (const item of items) {
168 if (totalSaved >= this.maxItems) break;
169
170 const dedupeKey = item.c ?? item.u;
171 if (dedupeKey !== undefined && dedupeKey !== null && seenChatIds.has(dedupeKey)) {
172 continue;
173 }
174 if (dedupeKey !== undefined && dedupeKey !== null) {
175 seenChatIds.add(dedupeKey);
176 }
177
178 currentData.push(this.mapGroupItem(keyword, item));
179 totalSaved++;
180 }
181
182 if (currentData.length > 0) {
183 console.log(`Saved ${currentData.length} groups from page ${page}`);
184 await Actor.pushData(currentData);
185 }
186
187 if (totalSaved >= this.maxItems) break;
188 if (items.length < this.pageLimit) {
189 console.log(`Last page received for ${label}`);
190 break;
191 }
192
193 offset += this.pageLimit;
194 await this.randomDelay(800, 1500);
195 }
196
197 console.log(`Finished ${label} with ${totalSaved} groups`);
198 }
199
200 async randomDelay(min = 500, max = 1500) {
201 const delay = Math.floor(Math.random() * (max - min + 1) + min);
202 await new Promise((resolve) => setTimeout(resolve, delay));
203 }
204}
205
206await Actor.init();
207
208Actor.main(async () => {
209 const input = await Actor.getInput();
210
211
212
213
214
215
216
217
218 const scraper = new TelegramGroupsSearchScraper();
219 await scraper.run(input);
220});