1import { Actor } from 'apify';
2
3const INNERTUBE_API_URL = 'https://www.youtube.com/youtubei/v1/next';
4const INNERTUBE_CONTEXT = {
5 client: {
6 clientName: 'WEB',
7 clientVersion: '2.20240101.00.00',
8 hl: 'en',
9 gl: 'US',
10 },
11};
12
13function extractVideoId(urlOrId) {
14 if (!urlOrId) return null;
15 const str = urlOrId.trim();
16 if (/^[a-zA-Z0-9_-]{11}$/.test(str)) return str;
17 try {
18 const url = new URL(str);
19 if (url.hostname.includes('youtube.com') && url.searchParams.has('v')) return url.searchParams.get('v');
20 if (url.hostname === 'youtu.be') return url.pathname.slice(1).split('/')[0];
21 if (url.hostname.includes('youtube.com') && url.pathname.startsWith('/shorts/')) return url.pathname.split('/')[2];
22 } catch {}
23 return null;
24}
25
26async function delay(minMs = 800, maxMs = 2000) {
27 await new Promise(r => setTimeout(r, Math.floor(Math.random() * (maxMs - minMs + 1)) + minMs));
28}
29
30async function fetchInitialData(videoId) {
31 const response = await fetch(`https://www.youtube.com/watch?v=${videoId}`, {
32 headers: {
33 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
34 'Accept-Language': 'en-US,en;q=0.9',
35 },
36 });
37 if (!response.ok) throw new Error(`Failed to fetch video page: HTTP ${response.status}`);
38 const html = await response.text();
39 if (html.includes('"playabilityStatus":{"status":"ERROR"')) throw new Error(`Video ${videoId} not available`);
40
41 const match = html.match(/var ytInitialData\s*=\s*({.+?});\s*<\/script>/s);
42 if (!match) throw new Error('Could not extract ytInitialData');
43
44 const ytInitialData = JSON.parse(match[1]);
45 const apiKey = (html.match(/"INNERTUBE_API_KEY":"([^"]+)"/) || [])[1] || 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8';
46 return { ytInitialData, apiKey };
47}
48
49function findCommentsContinuation(ytInitialData) {
50 const contents = ytInitialData?.contents?.twoColumnWatchNextResults?.results?.results?.contents;
51 if (!contents) return null;
52 for (const item of contents) {
53 const sc = item?.itemSectionRenderer?.contents;
54 if (!sc) continue;
55 for (const c of sc) {
56 if (c.continuationItemRenderer) {
57 const ep = c.continuationItemRenderer.continuationEndpoint;
58 if (ep?.commandMetadata?.webCommandMetadata?.apiUrl?.includes('next')) {
59 return ep?.continuationCommand?.token;
60 }
61 }
62 }
63 }
64 return null;
65}
66
67async function fetchComments(continuationToken, apiKey) {
68 const response = await fetch(`${INNERTUBE_API_URL}?key=${apiKey}&prettyPrint=false`, {
69 method: 'POST',
70 headers: {
71 'Content-Type': 'application/json',
72 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
73 'X-YouTube-Client-Name': '1',
74 'X-YouTube-Client-Version': '2.20240101.00.00',
75 },
76 body: JSON.stringify({ context: INNERTUBE_CONTEXT, continuation: continuationToken }),
77 });
78 if (!response.ok) throw new Error(`API error: HTTP ${response.status}`);
79 return response.json();
80}
81
82function extractFromMutations(responseData, videoId) {
83 const comments = [];
84 const mutations = responseData?.frameworkUpdates?.entityBatchUpdate?.mutations;
85 if (!mutations) return comments;
86
87 for (const mutation of mutations) {
88 const payload = mutation?.payload?.commentEntityPayload;
89 if (!payload) continue;
90
91 const props = payload.properties || {};
92 const toolbar = payload.toolbar || {};
93
94 comments.push({
95 videoId,
96 commentId: props.commentId || mutation.entityKey || '',
97 author: props.authorButtonA11y || '',
98 authorChannelId: payload.author?.channelId || '',
99 text: props.content?.content || '',
100 likes: parseInt(toolbar.likeCountLiked || toolbar.likeCountNotliked || '0') || 0,
101 publishedAt: props.publishedTime || '',
102 isReply: !!props.replyLevel && props.replyLevel > 0,
103 replyCount: parseInt(toolbar.replyCount || '0') || 0,
104 isHearted: !!toolbar.heartState && toolbar.heartState !== 'HEART_STATE_DEFAULT',
105 isVerified: !!payload.author?.isVerified,
106 scrapedAt: new Date().toISOString(),
107 });
108 }
109 return comments;
110}
111
112function findNextContinuation(responseData) {
113 const endpoints = responseData?.onResponseReceivedEndpoints;
114 if (!endpoints) return null;
115
116 for (const ep of endpoints) {
117 const items = ep?.reloadContinuationItemsCommand?.continuationItems ||
118 ep?.appendContinuationItemsAction?.continuationItems;
119 if (!items) continue;
120 for (const item of items) {
121 const ct = item?.continuationItemRenderer?.continuationEndpoint?.continuationCommand?.token ||
122 item?.continuationItemRenderer?.button?.buttonRenderer?.command?.continuationCommand?.token;
123 if (ct) return ct;
124 }
125 }
126 return null;
127}
128
129function findSortToken(responseData, sortBy) {
130 const endpoints = responseData?.onResponseReceivedEndpoints;
131 if (!endpoints) return null;
132 for (const ep of endpoints) {
133 const slot = ep?.reloadContinuationItemsCommand?.slot;
134 if (slot?.includes('HEADER')) {
135 const items = ep.reloadContinuationItemsCommand.continuationItems;
136 if (!items) continue;
137 for (const item of items) {
138 const sortMenu = item?.commentsHeaderRenderer?.sortMenu?.sortFilterSubMenuRenderer?.subMenuItems;
139 if (!sortMenu) continue;
140 for (const menuItem of sortMenu) {
141 const title = (menuItem.title || '').toLowerCase();
142 const token = menuItem?.serviceEndpoint?.continuationCommand?.token;
143 if (sortBy === 'newest' && (title.includes('new') || title.includes('newest'))) return token;
144 if (sortBy === 'top' && (title.includes('top') || title.includes('popular'))) return token;
145 }
146 }
147 }
148 }
149 return null;
150}
151
152async function processVideo(videoId, maxComments, sortBy, apiKey, ytInitialData) {
153 console.log(`\nProcessing video: ${videoId} (sort: ${sortBy}, max: ${maxComments})`);
154
155 const continuationToken = findCommentsContinuation(ytInitialData);
156 if (!continuationToken) {
157 console.log(` No comments section found. Comments may be disabled.`);
158 return [];
159 }
160
161 await delay(500, 1200);
162 let responseData = await fetchComments(continuationToken, apiKey);
163
164
165 if (sortBy === 'newest') {
166 const sortToken = findSortToken(responseData, 'newest');
167 if (sortToken) {
168 console.log(' Switching to "Newest first" sort...');
169 await delay(500, 1200);
170 responseData = await fetchComments(sortToken, apiKey);
171 }
172 }
173
174
175 const allComments = extractFromMutations(responseData, videoId);
176 let nextToken = findNextContinuation(responseData);
177 let pageCount = 1;
178
179 console.log(` Page ${pageCount}: ${allComments.length} comments`);
180
181
182 while (nextToken && allComments.length < maxComments) {
183 await delay(800, 2000);
184 pageCount++;
185
186 try {
187 responseData = await fetchComments(nextToken, apiKey);
188 const newComments = extractFromMutations(responseData, videoId);
189 nextToken = findNextContinuation(responseData);
190
191 if (newComments.length === 0) {
192 console.log(` Page ${pageCount}: no more comments.`);
193 break;
194 }
195
196 allComments.push(...newComments);
197 console.log(` Page ${pageCount}: +${newComments.length} comments (total: ${allComments.length})`);
198 } catch (error) {
199 console.error(` Error on page ${pageCount}: ${error.message}`);
200 break;
201 }
202 }
203
204 return allComments.slice(0, maxComments);
205}
206
207
208await Actor.init();
209
210try {
211 const input = await Actor.getInput();
212 if (!input?.videoUrls?.length) {
213 throw new Error('Input "videoUrls" is required — provide YouTube video URLs or IDs.');
214 }
215
216 const maxComments = input.maxCommentsPerVideo ?? 100;
217 const sortBy = input.sortBy || 'top';
218
219 console.log(`=== YouTube Comments Scraper ===`);
220 console.log(`Videos: ${input.videoUrls.length}, Max comments: ${maxComments}, Sort: ${sortBy}`);
221
222 let totalComments = 0;
223
224 for (let i = 0; i < input.videoUrls.length; i++) {
225 const videoId = extractVideoId(input.videoUrls[i]);
226 if (!videoId) { console.error(`Skipping invalid: "${input.videoUrls[i]}"`); continue; }
227
228 console.log(`\n[${i + 1}/${input.videoUrls.length}] Video: ${videoId}`);
229
230 try {
231 const { ytInitialData, apiKey } = await fetchInitialData(videoId);
232 const comments = await processVideo(videoId, maxComments, sortBy, apiKey, ytInitialData);
233
234 if (comments.length > 0) {
235 await Actor.pushData(comments);
236 totalComments += comments.length;
237 console.log(` Saved ${comments.length} comments`);
238 } else {
239 console.log(` No comments found`);
240 }
241 } catch (error) {
242 console.error(` Error: ${error.message}`);
243 }
244
245 if (i < input.videoUrls.length - 1) await delay(1500, 3000);
246 }
247
248 console.log(`\n=== DONE: ${totalComments} comments from ${input.videoUrls.length} videos ===`);
249} catch (error) {
250 console.error(`Fatal: ${error.message}`);
251} finally {
252 await Actor.exit();
253}