1const Apify = require('apify');
2
3const pageFunction = async (context) => {
4 const { request, $, log, customData } = context;
5
6 const { url } = request;
7
8 const lds = $('script[type="application/ld+json"]');
9
10 if (!lds.length) {
11 log.warning('No LD+JSON found on page', { url });
12 return {
13 data: {},
14 url,
15 customData,
16 };
17 }
18
19 return lds
20 .map((_, el) => $(el).html().trim())
21 .get()
22 .map((html) => {
23 try {
24 return JSON.parse(html);
25 } catch (e) {
26 log.exception(e, 'Invalid JSON', { url });
27 }
28 })
29 .filter(Boolean)
30 .map((data) => {
31 return {
32 data,
33 url,
34 customData,
35 }
36 });
37};
38
39Apify.main(async () => {
40 const { proxyConfiguration, startUrls, customData } = await Apify.getInput();
41
42 if (!proxyConfiguration) {
43 throw new Error('You require a proxy to run');
44 }
45
46
47 const proxy = await Apify.createProxyConfiguration(proxyConfiguration);
48
49 if (!proxy) {
50 throw new Error('Invalid proxy configuration');
51 }
52
53 if (!startUrls?.length) {
54 throw new Error('Provide a RequestList sources array on "startUrls" input');
55 }
56
57 await Apify.metamorph('apify/cheerio-scraper', {
58 startUrls,
59 pageFunction: pageFunction.toString(),
60 proxyConfiguration,
61 customData,
62 ignoreSslErrors: true,
63 });
64});