
Petition.lu signature extractor.
Pricing
Pay per usage
Go to Store

Petition.lu signature extractor.
This Actor let you extract signatures from Luxembourgish official petition website petitions.lu. You'll get firstname(s), lastname(s), and city.
0.0 (0)
Pricing
Pay per usage
1
Total users
4
Monthly users
1
Last modified
3 years ago
Dockerfile
# First, specify the base Docker image. You can read more about# the available images at https://sdk.apify.com/docs/guides/docker-images# You can also use any other image from Docker Hub.FROM apify/actor-node:16
# Second, copy just package.json and package-lock.json since those are the only# files that affect "npm install" in the next step, to speed up the build.COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --only=prod --no-optional \ && echo "Installed NPM packages:" \ && (npm list || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version
# Next, copy the remaining files and directories with the source code.# Since we do this after NPM install, quick build will be really fast# for most source file changes.COPY . ./
# Optionally, specify how to launch the source code of your actor.# By default, Apify's base Docker images define the CMD instruction# that runs the Node.js source code using the command specified# in the "scripts.start" section of the package.json file.# In short, the instruction looks something like this:## CMD npm start
INPUT_SCHEMA.json
{ "title": "Petition Crawling Input Schema", "type": "object", "schemaVersion": 1, "properties": { "petitionUrl": { "title": "Petition URL (it should look like this https://www.petitions.lu/voir-les-signatures/xxxx-xxxx)", "type": "string", "nullable": false, "description": "Please paste here the URL of the petition you want to crawl signature from.", "editor": "textfield" } }}
main.js
1// This is the main Node.js source code file of your actor.2// It is referenced from the "scripts" section of the package.json file.3
4const Apify = require('apify');5
6Apify.main(async () => {7 // Get input of the actor. Input fields can be modified in INPUT_SCHEMA.json file.8 // For more information, see https://docs.apify.com/actor/input-schema9 const input = await Apify.getInput();10 console.log('Input:');11 console.dir(input);12
13 // Here you can prepare your input for actor apify/web-scraper this input is based on a actor14 // task you used as the starting point.15 const metamorphInput = {16 "runMode": "DEVELOPMENT",17 "startUrls": [18 {19 "url": input.petitionUrl,20 "method": "GET"21 }22 ],23 "keepUrlFragments": false,24 "linkSelector": "a[href]",25 "pseudoUrls": [26 {27 "purl": input.petitionUrl+"?tx_petition_singlepetitionsignatures%5Bpagenumber%5D=[(\\d)+]&cHash=[(\\w)+]",28 "method": "GET"29 }30 ],31 "pageFunction": // The function accepts a single argument: the "context" object.32 // For a complete list of its properties and functions,33 // see https://apify.com/apify/web-scraper#page-function 34 async function pageFunction(context) {35 // This statement works as a breakpoint when you're trying to debug your code. Works only with Run mode: DEVELOPMENT!36 // debugger; 37 38 // jQuery is handy for finding DOM elements and extracting data from them.39 // To use it, make sure to enable the "Inject jQuery" option.40 const $ = context.jQuery;41 const signatures = [];42 $('.petition-signatures div p').each(function(){43 var obj = {};44 var split = $(this).text().split(',');45 if (split.length == 2){46 obj.name = split[0];47 obj.city = split[1].trim();48 } else {49 obj.name = split.subarray(0,split.length-2).join(" ");50 obj.city = split[split.length-1].trim();51 }52 signatures.push(obj);53 });54 55 // Print some information to actor log56 context.log.info(`URL: ${context.request.url}, SIGNATURES: ${signatures}`);57 58 // Manually add a new page to the queue for scraping.59 // context.enqueueRequest({ url: 'http://www.example.com' });60 61 // Return an object with the data extracted from the page.62 // It will be stored to the resulting dataset.63 return signatures;64 },65 "preNavigationHooks": `// We need to return array of (possibly async) functions here.66 // The functions accept two arguments: the "crawlingContext" object67 // and "gotoOptions".68 [69 async (crawlingContext, gotoOptions) => {70 // ...71 },72 ]`,73 "postNavigationHooks": `// We need to return array of (possibly async) functions here.74 // The functions accept a single argument: the "crawlingContext" object.75 [76 async (crawlingContext) => {77 // ...78 },79 ]`,80 "injectJQuery": true,81 "injectUnderscore": false,82 "proxyConfiguration": {83 "useApifyProxy": false84 },85 "proxyRotation": "RECOMMENDED",86 "useChrome": false,87 "useStealth": false,88 "ignoreSslErrors": false,89 "ignoreCorsAndCsp": false,90 "downloadMedia": false,91 "downloadCss": false,92 "waitUntil": [93 "networkidle2"94 ],95 "breakpointLocation": "NONE",96 "debugLog": false,97 "browserLog": false98 };99
100 // Now let's metamorph into actor apify/web-scraper using the created input.101 await Apify.metamorph('apify/web-scraper', metamorphInput);102});
package.json
{ "name": "extract-signatures-chambre-depute-luxembourg", "version": "0.0.1", "dependencies": { "apify": "^2.1.0" }, "scripts": { "start": "node main.js" }, "author": "Thibault Milan"}