OnTheList Scraper avatar
OnTheList Scraper
Deprecated
View all Actors
This Actor is deprecated

This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?

See alternative Actors
OnTheList Scraper

OnTheList Scraper

mshopik/onthelist-scraper

Scrape OnTheList and extract data on from onthelist-store.com. Our OnTheList API lets you crawl product information and pricing. The saved data can be downloaded as HTML, JSON, CSV, Excel, and XML.

INPUT_SCHEMA.json

1{
2    "title": "onthelist-scraper",
3    "description": "",
4    "type": "object",
5    "schemaVersion": 1,
6    "properties": {
7        "maxRequestsPerCrawl": {
8            "title": "Max items",
9            "description": "How many items to extract from onthelist-store.com",
10            "default": 20,
11            "prefill": 20,
12            "type": "integer",
13            "editor": "number"
14        },
15        "extendOutputFunction": {
16            "title": "Extend Output Function",
17            "description": "Add or remove properties on the output object or omit the output returning null",
18            "type": "string",
19            "default": "async ({ data, item, product, images, fns, name, request, variants, context, customData, input, Apify }) => {\n  return item;\n}",
20            "prefill": "async ({ data, item, product, images, fns, name, request, variants, context, customData, input, Apify }) => {\n  return item;\n}",
21            "editor": "javascript",
22            "sectionCaption": "Extend scraper functionality",
23            "sectionDescription": "You can change the output of the items for your dataset here, or add additional behavior on the scraper."
24        },
25        "extendScraperFunction": {
26            "title": "Extend Scraper Function",
27            "description": "Advanced function that allows you to extend the default scraper functionality, allowing you to manually perform actions on the page",
28            "type": "string",
29            "default": "async ({ fns, customData, Apify, label }) => {\n \n}",
30            "prefill": "async ({ fns, customData, Apify, label }) => {\n \n}",
31            "editor": "javascript"
32        },
33        "customData": {
34            "title": "Custom data",
35            "description": "Any data that you want to have available inside the Extend Output/Scraper Function",
36            "default": {},
37            "prefill": {},
38            "type": "object",
39            "editor": "json"
40        },
41        "fetchHtml": {
42            "title": "Fetch HTML",
43            "description": "If you decide to fetch the HTML of the pages, it will take twice as long. Make sure to only enable this if needed",
44            "default": true,
45            "editor": "checkbox",
46            "type": "boolean"
47        },
48        "maxConcurrency": {
49            "title": "Max concurrency",
50            "description": "Max concurrency to use",
51            "default": 20,
52            "prefill": 20,
53            "type": "integer",
54            "editor": "number"
55        },
56        "maxRequestRetries": {
57            "title": "Max request retries",
58            "description": "Set the max request retries",
59            "default": 3,
60            "prefill": 3,
61            "type": "integer",
62            "editor": "number"
63        },
64        "debugLog": {
65            "title": "Debug Log",
66            "description": "Enable a more verbose logging to be able to understand what's happening during the scraping",
67            "type": "boolean",
68            "default": false,
69            "editor": "checkbox"
70        }
71    }
72}

main.js

1import Apify from 'apify';
2
3Apify.main(async () => {
4    const input = await Apify.getInput();
5
6    await Apify.metamorph('pocesar/shopify-scraper', {
7        ...input,
8        startUrls: [{
9            url: 'http://www.onthelist-store.com',
10        }],
11    });
12});

package.json

1{
2    "name": "onthelist-scraper",
3    "version": "0.0.1",
4    "type": "module",
5    "dependencies": {
6        "apify": "^2.3.2"
7    },
8    "scripts": {
9        "start": "node main.js"
10    }
11}

Dockerfile

1# First, specify the base Docker image. You can read more about
2# the available images at https://sdk.apify.com/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node:16
5
6# Second, copy just package.json and package-lock.json since those are the only
7# files that affect "npm install" in the next step, to speed up the build.
8COPY package*.json ./
9
10RUN npm --quiet set progress=false \
11 && npm install --only=prod --no-optional \
12 && echo "Installed NPM packages:" \
13 && (npm list --only=prod --no-optional --all || true) \
14 && echo "Node.js version:" \
15 && node --version \
16 && echo "NPM version:" \
17 && npm --version
18
19COPY . ./
20
21ENV APIFY_DISABLE_OUTDATED_WARNING 1
22ENV npm_config_loglevel=silent
Developer
Maintained by Community
Categories