1import { Actor } from 'apify';
2import { fetchJson, makeDeduper } from './robust.js';
3await Actor.init();
4const { ckanDomain, resourceId = '', query = '', maxItems = 1000 } = (await Actor.getInput()) ?? {};
5if (!ckanDomain) throw new Error('Provide "ckanDomain", e.g. catalog.data.gov or data.gov.uk');
6const base = `https://${ckanDomain.replace(/^https?:\/\//, '').replace(/\/+$/, '')}`;
7const H = { headers: { 'User-Agent': 'apify-ckan-exporter/1.0', Accept: 'application/json' } };
8let pushed = 0;
9if (resourceId) {
10 const isNew = makeDeduper(['_id', 'id']);
11 let offset = 0; const PAGE = 1000;
12 while (pushed < maxItems) {
13 const limit = Math.min(PAGE, maxItems - pushed);
14 let j;
15 try { j = await fetchJson(`${base}/api/3/action/datastore_search?resource_id=${encodeURIComponent(resourceId)}&limit=${limit}&offset=${offset}`, H); }
16 catch (e) { console.log('giving up after retries: ' + e.message); break; }
17 if (!j.success) { console.log('CKAN error: ' + JSON.stringify(j.error).slice(0, 200)); break; }
18 const recs = j.result.records || [];
19 if (!recs.length) break;
20 const fresh = recs.filter(isNew);
21 if (fresh.length) { await Actor.pushData(fresh); pushed += fresh.length; }
22 offset += recs.length;
23 console.log('rows ' + pushed);
24 if (recs.length < limit) break;
25 }
26} else if (query) {
27 const isNew = makeDeduper(['name']);
28 let start = 0; const PAGE = 100;
29 while (pushed < maxItems) {
30 const rows = Math.min(PAGE, maxItems - pushed);
31 let j;
32 try { j = await fetchJson(`${base}/api/3/action/package_search?q=${encodeURIComponent(query)}&rows=${rows}&start=${start}`, H); }
33 catch (e) { console.log('giving up after retries: ' + e.message); break; }
34 if (!j.success) { console.log('CKAN error: ' + JSON.stringify(j.error).slice(0, 200)); break; }
35 const results = j.result.results || [];
36 if (!results.length) break;
37 for (const d of results) {
38 if (pushed >= maxItems) break;
39 const rec = {
40 title: d.title, name: d.name, organization: d.organization?.title || null,
41 notes: (d.notes || '').slice(0, 500), tags: (d.tags || []).map((t) => t.name),
42 resources: (d.resources || []).map((r) => ({ name: r.name, format: r.format, url: r.url })),
43 metadata_modified: d.metadata_modified, datasetUrl: `${base}/dataset/${d.name}`,
44 };
45 if (!isNew(rec)) continue;
46 await Actor.pushData(rec);
47 pushed++;
48 }
49 start += results.length;
50 console.log('datasets ' + pushed);
51 if (results.length < rows) break;
52 }
53} else throw new Error('Provide either "resourceId" (export rows) or "query" (list datasets).');
54console.log('DONE ' + pushed);
55await Actor.exit();