1
2
3
4const Apify = require('apify');
5
6Apify.main(async () => {
7
8
9 const input = await Apify.getInput();
10 console.log('Input:');
11 console.dir(input);
12
13
14
15 const metamorphInput = {
16 "runMode": "DEVELOPMENT",
17 "startUrls": [
18 {
19 "url": input.petitionUrl,
20 "method": "GET"
21 }
22 ],
23 "keepUrlFragments": false,
24 "linkSelector": "a[href]",
25 "pseudoUrls": [
26 {
27 "purl": input.petitionUrl+"?tx_petition_singlepetitionsignatures%5Bpagenumber%5D=[(\\d)+]&cHash=[(\\w)+]",
28 "method": "GET"
29 }
30 ],
31 "pageFunction":
32
33
34 async function pageFunction(context) {
35
36
37
38
39
40 const $ = context.jQuery;
41 const signatures = [];
42 $('.petition-signatures div p').each(function(){
43 var obj = {};
44 var split = $(this).text().split(',');
45 if (split.length == 2){
46 obj.name = split[0];
47 obj.city = split[1].trim();
48 } else {
49 obj.name = split.subarray(0,split.length-2).join(" ");
50 obj.city = split[split.length-1].trim();
51 }
52 signatures.push(obj);
53 });
54
55
56 context.log.info(`URL: ${context.request.url}, SIGNATURES: ${signatures}`);
57
58
59
60
61
62
63 return signatures;
64 },
65 "preNavigationHooks": `// We need to return array of (possibly async) functions here.
66 // The functions accept two arguments: the "crawlingContext" object
67 // and "gotoOptions".
68 [
69 async (crawlingContext, gotoOptions) => {
70 // ...
71 },
72 ]`,
73 "postNavigationHooks": `// We need to return array of (possibly async) functions here.
74 // The functions accept a single argument: the "crawlingContext" object.
75 [
76 async (crawlingContext) => {
77 // ...
78 },
79 ]`,
80 "injectJQuery": true,
81 "injectUnderscore": false,
82 "proxyConfiguration": {
83 "useApifyProxy": false
84 },
85 "proxyRotation": "RECOMMENDED",
86 "useChrome": false,
87 "useStealth": false,
88 "ignoreSslErrors": false,
89 "ignoreCorsAndCsp": false,
90 "downloadMedia": false,
91 "downloadCss": false,
92 "waitUntil": [
93 "networkidle2"
94 ],
95 "breakpointLocation": "NONE",
96 "debugLog": false,
97 "browserLog": false
98 };
99
100
101 await Apify.metamorph('apify/web-scraper', metamorphInput);
102});