1
2
3
4
5
6
7
8
9import { Actor } from 'apify';
10import { execSync } from 'node:child_process';
11import {
12 createWriteStream,
13 mkdtempSync,
14 readFileSync,
15 statSync,
16 writeFileSync,
17 rmSync,
18} from 'node:fs';
19import { randomBytes } from 'node:crypto';
20import { tmpdir } from 'node:os';
21import { join, extname } from 'node:path';
22import { setTimeout as sleep } from 'node:timers/promises';
23import { Writable } from 'node:stream';
24import { safeFilename, mimeType, formatDuration, formatBytes } from './utils.js';
25
26
27
28
29
30
31
32
33async function withHeartbeat(promiseOrFn, msg, { intervalMs = 15_000, abortSignal } = {}) {
34 let done = false;
35
36 let value;
37 try {
38 value = typeof promiseOrFn === 'function' ? promiseOrFn() : promiseOrFn;
39 } catch (e) {
40 return Promise.reject(e);
41 }
42
43 const promise = Promise.resolve(value);
44 const result = promise.then(
45 (v) => { done = true; return v; },
46 (e) => { done = true; throw e; },
47 );
48
49 const hb = (async () => {
50 let count = 0;
51 while (!done) {
52 await sleep(intervalMs);
53 if (done) break;
54 count++;
55 const elapsed = count * (intervalMs / 1000);
56 console.log(` ${msg} (still working... ${elapsed}s elapsed)`);
57 }
58 })();
59
60 try {
61 return await result;
62 } finally {
63 done = true;
64
65 await sleep(0);
66 }
67}
68
69
70
71const USER_AGENTS = [
72 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
73 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
74 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
75 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15',
76];
77
78function randomUserAgent() {
79 return USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)];
80}
81
82function parseCookiesTxt(text) {
83 if (!text || !text.trim()) return null;
84 const dir = mkdtempSync(join(tmpdir(), 'cookies-'));
85 const path = join(dir, 'cookies.txt');
86 writeFileSync(path, text);
87 return path;
88}
89
90
91let ytDlpPath = null;
92
93async function ensureYtDlp() {
94 if (ytDlpPath) return;
95 const dir = mkdtempSync(join(tmpdir(), 'ytdlp-bin-'));
96 ytDlpPath = join(dir, 'yt-dlp');
97 console.log(' Downloading latest yt-dlp...');
98 const resp = await fetch('https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp');
99 if (!resp.ok) throw new Error(`Failed to download yt-dlp: HTTP ${resp.status}`);
100 const buffer = Buffer.from(await resp.arrayBuffer());
101 writeFileSync(ytDlpPath, buffer, { mode: 0o755 });
102 const ver = execSync(`${ytDlpPath} --version`, { encoding: 'utf-8' }).trim();
103 console.log(` yt-dlp ${ver} ready (${formatBytes(buffer.length)})`);
104}
105
106
107
108
109function ytdlp(args, { maxBuffer = 10 * 1024 * 1024, timeout = 120_000 } = {}) {
110 const quoted = args.map((a) => `"${a.replace(/"/g, '\\"')}"`).join(' ');
111 return execSync(`"${ytDlpPath}" ${quoted}`, {
112 maxBuffer,
113 timeout,
114 encoding: 'utf-8',
115 shell: true,
116 }).trim();
117}
118
119
120
121
122
123
124function extractInfo(url, { proxyUrl, cookiesPath }) {
125 const tmpDir = mkdtempSync(join(tmpdir(), 'ytdlp-'));
126 try {
127 const args = [
128 url,
129 '--no-playlist',
130 '--quiet',
131 '--no-warnings',
132 '--print-json',
133 '--skip-download',
134 '--user-agent', randomUserAgent(),
135 '--socket-timeout', '30',
136 '--retries', '0',
137 '--fragment-retries', '0',
138 '--extractor-retries', '0',
139 '--sleep-requests', '1',
140 '--xff', 'default',
141 '--extractor-args', 'youtube:player_client=web,mweb,android;skip=hls,dash',
142 '--output', join(tmpDir, '%(title)s.%(ext)s'),
143 ];
144 if (proxyUrl) args.push('--proxy', proxyUrl);
145 if (cookiesPath) args.push('--cookies', cookiesPath);
146
147 const stdout = ytdlp(args, { maxBuffer: 10 * 1024 * 1024, timeout: 120_000 });
148
149 const lines = stdout.split('\n').filter(Boolean);
150 const info = JSON.parse(lines[lines.length - 1]);
151 const title = info.title || 'Unknown';
152 const dur = info.duration || 0;
153 console.log(` Found: "${title}" — ${formatDuration(dur)}`);
154 return info;
155 } finally {
156 try { rmSync(tmpDir, { recursive: true, force: true }); } catch {}
157 }
158}
159
160
161
162
163
164
165async function streamToFile(readableStream, filePath) {
166 const fileStream = createWriteStream(filePath);
167 const writableStream = Writable.toWeb(fileStream);
168 await readableStream.pipeTo(writableStream);
169}
170
171
172
173await Actor.init();
174
175Actor.on('aborting', async () => {
176 await sleep(1000);
177 await Actor.exit();
178});
179
180await ensureYtDlp();
181
182const input = (await Actor.getInput()) || {};
183const videoUrls = input.videoUrls || [];
184const audioFormat = input.audioFormat || 'mp3';
185const audioBitrate = input.audioBitrate || '192';
186const embedMetadata = input.embedMetadata !== false;
187const sleepMax = input.sleepBetweenUrls || 12;
188const cookiesText = input.cookies || '';
189const proxyInput = input.proxyConfiguration || {};
190const useApifyProxy = proxyInput.useApifyProxy !== false;
191const proxyCountry = proxyInput.apifyProxyCountry || 'US';
192
193if (!videoUrls.length) {
194 console.log('No video URLs provided.');
195 await Actor.exit();
196 process.exit(0);
197}
198
199console.log(`Processing ${videoUrls.length} video(s), format=${audioFormat}, bitrate=${audioBitrate}k, embed=${embedMetadata}`);
200
201
202let cookiesPath = null;
203if (cookiesText) {
204 cookiesPath = parseCookiesTxt(cookiesText);
205 console.log('Cookies loaded from input');
206}
207
208const needsConversion = ['mp3', 'm4a', 'flac', 'wav'].includes(audioFormat);
209const kvStoreId = process.env.APIFY_DEFAULT_KEY_VALUE_STORE_ID;
210const kvBaseUrl = kvStoreId
211 ? `https://api.apify.com/v2/key-value-stores/${kvStoreId}/records`
212 : null;
213
214for (let i = 0; i < videoUrls.length; i++) {
215 const url = videoUrls[i];
216 console.log(`[${i + 1}/${videoUrls.length}] Processing: ${url}`);
217
218 const tmpDir = mkdtempSync(join(tmpdir(), 'ytdlp-'));
219
220 try {
221
222 let proxyUrl = null;
223 let proxyTier = 'none';
224
225 if (useApifyProxy) {
226 console.log(' Setting up residential proxy...');
227 const proxyConfig = await Actor.createProxyConfiguration({
228 groups: ['RESIDENTIAL'],
229 countryCode: proxyCountry,
230 });
231 if (proxyConfig) {
232 proxyUrl = await proxyConfig.newUrl(`session_${randomBytes(4).toString('hex')}`);
233 proxyTier = 'residential';
234 }
235 }
236
237
238
239
240
241
242 console.log(' Fetching video metadata via proxy...');
243 const info = await withHeartbeat(
244 () => extractInfo(url, { proxyUrl, cookiesPath }),
245 'Fetching video metadata',
246 );
247 const directUrl = info.url;
248 const title = info.title || 'Unknown';
249 const videoId = info.id || 'unknown';
250 const duration = info.duration || 0;
251 const channel = info.channel || info.uploader || '';
252 const sourceExt = info.ext || 'm4a';
253
254
255 const rawExt = needsConversion ? 'm4a' : sourceExt;
256 const rawPath = join(tmpDir, `${safeFilename(title)}.raw.${rawExt}`);
257 console.log(' Downloading audio from CDN (free, no proxy cost)...');
258 try {
259 const resp = await withHeartbeat(
260 (async () => {
261 const r = await fetch(directUrl, {
262 headers: { 'User-Agent': randomUserAgent() },
263 signal: AbortSignal.timeout(300_000),
264 });
265 if (!r.ok) throw new Error(`CDN returned ${r.status}`);
266 await streamToFile(r.body, rawPath);
267 return r;
268 })(),
269 'Downloading from CDN',
270 );
271 console.log(` Downloaded ${formatBytes(statSync(rawPath).size)} from CDN`);
272 } catch (cdErr) {
273 console.log(` CDN download failed (${cdErr.message}), falling back to full yt-dlp via proxy...`);
274 const fetchArgs = [
275 url, '--no-playlist', '--quiet', '--no-warnings',
276 '--format', 'bestaudio/best',
277 '--user-agent', randomUserAgent(),
278 '--socket-timeout', '30',
279 '--output', rawPath,
280 ];
281 if (proxyUrl) fetchArgs.push('--proxy', proxyUrl);
282 if (cookiesPath) fetchArgs.push('--cookies', cookiesPath);
283 ytdlp(fetchArgs, { maxBuffer: 50 * 1024 * 1024, timeout: 600_000 });
284 }
285
286
287 let audioPath;
288 if (needsConversion) {
289
290 audioPath = join(tmpDir, `${safeFilename(title)}.${audioFormat}`);
291 console.log(` Converting to ${audioFormat} (${audioBitrate}k) with ffmpeg...`);
292 execSync(
293 `ffmpeg -y -i "${rawPath}" -vn -c:a libmp3lame -b:a ${audioBitrate}k "${audioPath}"`,
294 { stdio: ['ignore', 'pipe', 'pipe'], timeout: 300_000 },
295 );
296 console.log(` Converted: ${formatBytes(statSync(audioPath).size)}`);
297
298
299 if (embedMetadata && audioFormat !== 'wav') {
300 console.log(' Embedding metadata...');
301 try {
302 const safeTitle = (info.title || '').replace(/"/g, '\\"');
303 const safeArtist = (channel || '').replace(/"/g, '\\"');
304 const tmpAudio = audioPath + '.tmp.mp3';
305 execSync(
306 `ffmpeg -y -i "${audioPath}" -c copy ` +
307 `-metadata title="${safeTitle}" ` +
308 `-metadata artist="${safeArtist}" ` +
309 `-id3v2_version 3 "${tmpAudio}" && mv "${tmpAudio}" "${audioPath}"`,
310 { stdio: ['ignore', 'pipe', 'pipe'], timeout: 60_000 },
311 );
312 } catch (metaErr) {
313 console.log(` Metadata embedding skipped (${metaErr.message})`);
314 }
315 }
316 } else {
317
318 audioPath = rawPath;
319 }
320
321
322 const audioFile = readFileSync(audioPath);
323 const fileExt = extname(audioPath).replace(/^\./, '');
324 const kvKey = `audio-${videoId}`;
325 const contentType = mimeType(fileExt);
326
327 console.log(` Uploading ${formatBytes(audioFile.length)} to key-value store...`);
328 await Actor.setValue(kvKey, audioFile, { contentType });
329 console.log(` ✓ Done: "${title}" (${videoId}), ${formatDuration(duration)}, ${formatBytes(audioFile.length)}`);
330
331 const item = {
332 video_id: videoId,
333 video_url: url,
334 video_title: title,
335 channel,
336 duration,
337 audio_format: fileExt,
338 file_size_bytes: audioFile.length,
339 kv_store_key: kvKey,
340 proxy_tier_used: proxyTier,
341 status: 'downloaded',
342 };
343 if (kvBaseUrl) item.audio_url = `${kvBaseUrl}/${kvKey}`;
344 await Actor.pushData(item);
345
346 } catch (err) {
347 console.error(` ✗ Failed: ${url} — ${err.message}`);
348 await Actor.pushData({
349 video_url: url,
350 status: 'error',
351 error: err.message.slice(0, 500),
352 });
353 } finally {
354 try { rmSync(tmpDir, { recursive: true, force: true }); } catch {}
355 }
356
357
358 if (i < videoUrls.length - 1) {
359 const delaySec = 5 + Math.random() * (sleepMax - 5);
360 console.log(` Sleeping ${delaySec.toFixed(1)}s before next URL...`);
361 await sleep(Math.round(delaySec * 1000));
362 }
363}
364
365console.log('Finished processing all URLs');
366await Actor.exit();