Petition.lu signature extractor. avatar

Petition.lu signature extractor.

Try for free

No credit card required

Go to Store
Petition.lu signature extractor.

Petition.lu signature extractor.

thibault/extract-signatures-chambre-depute-luxembourg
Try for free

No credit card required

This Actor let you extract signatures from Luxembourgish official petition website petitions.lu. You'll get firstname(s), lastname(s), and city.

Developer
Maintained by Community

Actor Metrics

  • 1 monthly user

  • No reviews yet

  • 1 bookmark

  • Created in Nov 2021

  • Modified 2 years ago

Dockerfile

1# First, specify the base Docker image. You can read more about
2# the available images at https://sdk.apify.com/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node:16
5
6# Second, copy just package.json and package-lock.json since those are the only
7# files that affect "npm install" in the next step, to speed up the build.
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --only=prod --no-optional \
15 && echo "Installed NPM packages:" \
16 && (npm list || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version
21
22# Next, copy the remaining files and directories with the source code.
23# Since we do this after NPM install, quick build will be really fast
24# for most source file changes.
25COPY . ./
26
27# Optionally, specify how to launch the source code of your actor.
28# By default, Apify's base Docker images define the CMD instruction
29# that runs the Node.js source code using the command specified
30# in the "scripts.start" section of the package.json file.
31# In short, the instruction looks something like this:
32#
33# CMD npm start

INPUT_SCHEMA.json

1{
2    "title": "Petition Crawling Input Schema",
3    "type": "object",
4    "schemaVersion": 1,
5    "properties": {
6        "petitionUrl": {
7            "title": "Petition URL (it should look like this https://www.petitions.lu/voir-les-signatures/xxxx-xxxx)",
8            "type": "string",
9            "nullable": false,
10            "description": "Please paste here the URL of the petition you want to crawl signature from.",
11            "editor": "textfield"
12        }
13    }
14}

main.js

1// This is the main Node.js source code file of your actor.
2// It is referenced from the "scripts" section of the package.json file.
3
4const Apify = require('apify');
5
6Apify.main(async () => {
7    // Get input of the actor. Input fields can be modified in INPUT_SCHEMA.json file.
8    // For more information, see https://docs.apify.com/actor/input-schema
9    const input = await Apify.getInput();
10    console.log('Input:');
11    console.dir(input);
12
13    // Here you can prepare your input for actor apify/web-scraper this input is based on a actor
14    // task you used as the starting point.
15    const metamorphInput = {
16        "runMode": "DEVELOPMENT",
17        "startUrls": [
18            {
19                "url": input.petitionUrl,
20                "method": "GET"
21            }
22        ],
23        "keepUrlFragments": false,
24        "linkSelector": "a[href]",
25        "pseudoUrls": [
26            {
27                "purl": input.petitionUrl+"?tx_petition_singlepetitionsignatures%5Bpagenumber%5D=[(\\d)+]&cHash=[(\\w)+]",
28                "method": "GET"
29            }
30        ],
31        "pageFunction": // The function accepts a single argument: the "context" object.
32        // For a complete list of its properties and functions,
33        // see https://apify.com/apify/web-scraper#page-function 
34        async function pageFunction(context) {
35            // This statement works as a breakpoint when you're trying to debug your code. Works only with Run mode: DEVELOPMENT!
36            // debugger; 
37        
38            // jQuery is handy for finding DOM elements and extracting data from them.
39            // To use it, make sure to enable the "Inject jQuery" option.
40            const $ = context.jQuery;
41            const signatures = [];
42             $('.petition-signatures div p').each(function(){
43                 var obj = {};
44                 var split = $(this).text().split(',');
45                 if (split.length == 2){
46                     obj.name = split[0];
47                     obj.city = split[1].trim();
48                 } else {
49                     obj.name = split.subarray(0,split.length-2).join(" ");
50                     obj.city = split[split.length-1].trim();
51                 }
52                 signatures.push(obj);
53             });
54        
55            // Print some information to actor log
56            context.log.info(`URL: ${context.request.url}, SIGNATURES: ${signatures}`);
57        
58            // Manually add a new page to the queue for scraping.
59            // context.enqueueRequest({ url: 'http://www.example.com' });
60        
61            // Return an object with the data extracted from the page.
62            // It will be stored to the resulting dataset.
63            return signatures;
64        },
65        "preNavigationHooks": `// We need to return array of (possibly async) functions here.
66            // The functions accept two arguments: the "crawlingContext" object
67            // and "gotoOptions".
68            [
69                async (crawlingContext, gotoOptions) => {
70                    // ...
71                },
72            ]`,
73        "postNavigationHooks": `// We need to return array of (possibly async) functions here.
74            // The functions accept a single argument: the "crawlingContext" object.
75            [
76                async (crawlingContext) => {
77                    // ...
78                },
79            ]`,
80        "injectJQuery": true,
81        "injectUnderscore": false,
82        "proxyConfiguration": {
83            "useApifyProxy": false
84        },
85        "proxyRotation": "RECOMMENDED",
86        "useChrome": false,
87        "useStealth": false,
88        "ignoreSslErrors": false,
89        "ignoreCorsAndCsp": false,
90        "downloadMedia": false,
91        "downloadCss": false,
92        "waitUntil": [
93            "networkidle2"
94        ],
95        "breakpointLocation": "NONE",
96        "debugLog": false,
97        "browserLog": false
98    };
99
100    // Now let's metamorph into actor apify/web-scraper using the created input.
101    await Apify.metamorph('apify/web-scraper', metamorphInput);
102});

package.json

1{
2    "name": "extract-signatures-chambre-depute-luxembourg",
3    "version": "0.0.1",
4    "dependencies": {
5        "apify": "^2.1.0"
6    },
7    "scripts": {
8        "start": "node main.js"
9    },
10    "author": "Thibault Milan"
11}