1
2
3
4
5
6import { Actor } from 'apify';
7import { createHash } from 'node:crypto';
8
9
10
11let undiciFetch, ProxyAgent;
12try { ({ fetch: undiciFetch, ProxyAgent } = await import('undici')); } catch { }
13const doFetch = undiciFetch || fetch;
14
15let proxyConfiguration;
16async function getProxyConfig() {
17 if (proxyConfiguration !== undefined) return proxyConfiguration;
18 proxyConfiguration = (Actor.isAtHome() && ProxyAgent)
19 ? await Actor.createProxyConfiguration().catch(() => null)
20 : null;
21 return proxyConfiguration;
22}
23
24let proxySeq = 0;
25async function nextProxyDispatcher() {
26 const pc = await getProxyConfig();
27 if (!pc) return null;
28 try {
29 const url = new URL(await pc.newUrl(`s${proxySeq++}`));
30 return new ProxyAgent({
31 uri: `${url.protocol}//${url.host}`,
32 token: 'Basic ' + Buffer.from(`${decodeURIComponent(url.username)}:${decodeURIComponent(url.password)}`).toString('base64'),
33 });
34 } catch { return null; }
35}
36
37export async function fetchJson(url, opts = {}, { retries = 5 } = {}) {
38 let lastErr;
39 for (let attempt = 0; attempt < retries; attempt++) {
40 const fetchOpts = { ...opts };
41 if (attempt > 0) {
42 const dispatcher = await nextProxyDispatcher();
43 if (dispatcher) fetchOpts.dispatcher = dispatcher;
44 }
45 try {
46 const res = await doFetch(url, fetchOpts);
47 if (res.ok) return await res.json();
48
49 if (res.status === 429 || res.status >= 500) throw new Error(`HTTP ${res.status}`);
50 const body = await res.text().catch(() => '');
51 throw Object.assign(new Error(`HTTP ${res.status} ${body.slice(0, 150)}`), { fatal: true });
52 } catch (e) {
53 if (e.fatal) throw e;
54 lastErr = e;
55 const backoff = Math.min(800 * 2 ** attempt, 15000);
56 console.log(`request failed (attempt ${attempt + 1}/${retries}): ${e.message}; retrying in ${backoff}ms`);
57 await new Promise((r) => setTimeout(r, backoff));
58 }
59 }
60 throw lastErr;
61}
62
63export function makeDeduper(preferredKeys = []) {
64 const seen = new Set();
65 return function isNew(rec) {
66 let key = null;
67 for (const k of preferredKeys) {
68 if (rec[k] != null && rec[k] !== '') { key = `${k}:${rec[k]}`; break; }
69 }
70 if (!key) key = 'h:' + createHash('sha1').update(JSON.stringify(rec)).digest('hex');
71 if (seen.has(key)) return false;
72 seen.add(key);
73 return true;
74 };
75}