Katy Perry Collections Scraper avatar

Katy Perry Collections Scraper

Under maintenance
Try for free

No credit card required

View all Actors
This Actor is under maintenance.

This Actor may be unreliable while under maintenance. Would you like to try a similar Actor instead?

See alternative Actors
Katy Perry Collections Scraper

Katy Perry Collections Scraper

mshopik/katy-perry-collections-scraper
Try for free

No credit card required

Scrape Katy Perry Collections and extract data on footwear from katyperrycollections.com. Our Katy Perry Collections API lets you crawl product information and pricing. The saved data can be downloaded as HTML, JSON, CSV, Excel, and XML.

INPUT_SCHEMA.json

1{
2    "title": "katy-perry-collections-scraper",
3    "description": "",
4    "type": "object",
5    "schemaVersion": 1,
6    "properties": {
7        "maxRequestsPerCrawl": {
8            "title": "Max items",
9            "description": "How many items to extract from katyperrycollections.com",
10            "default": 100,
11            "prefill": 100,
12            "type": "integer",
13            "editor": "number"
14        },
15        "proxyConfig": {
16            "title": "Proxy configuration",
17            "type": "object",
18            "description": "It's required to use proxies when running on the platform.",
19            "prefill": {
20                "useApifyProxy": true
21            },
22            "default": {
23                "useApifyProxy": true
24            },
25            "editor": "proxy"
26        },
27        "extendOutputFunction": {
28            "title": "Extend Output Function",
29            "description": "Add or remove properties on the output object or omit the output returning null",
30            "type": "string",
31            "default": "async ({ data, item, product, images, fns, name, request, variants, context, customData, input, Apify }) => {\n  return item;\n}",
32            "prefill": "async ({ data, item, product, images, fns, name, request, variants, context, customData, input, Apify }) => {\n  return item;\n}",
33            "editor": "javascript",
34            "sectionCaption": "Extend scraper functionality",
35            "sectionDescription": "You can change the output of the items for your dataset here, or add additional behavior on the scraper."
36        },
37        "extendScraperFunction": {
38            "title": "Extend Scraper Function",
39            "description": "Advanced function that allows you to extend the default scraper functionality, allowing you to manually perform actions on the page",
40            "type": "string",
41            "default": "async ({ fns, customData, Apify, label }) => {\n \n}",
42            "prefill": "async ({ fns, customData, Apify, label }) => {\n \n}",
43            "editor": "javascript"
44        },
45        "customData": {
46            "title": "Custom data",
47            "description": "Any data that you want to have available inside the Extend Output/Scraper Function",
48            "default": {},
49            "prefill": {},
50            "type": "object",
51            "editor": "json"
52        },
53        "fetchHtml": {
54            "title": "Fetch HTML",
55            "description": "If you decide to fetch the HTML of the pages, it will take twice as long. Make sure to only enable this if needed",
56            "default": true,
57            "editor": "checkbox",
58            "type": "boolean"
59        },
60        "maxConcurrency": {
61            "title": "Max concurrency",
62            "description": "Max concurrency to use",
63            "default": 20,
64            "prefill": 20,
65            "type": "integer",
66            "editor": "number"
67        },
68        "maxRequestRetries": {
69            "title": "Max request retries",
70            "description": "Set the max request retries",
71            "default": 3,
72            "prefill": 3,
73            "type": "integer",
74            "editor": "number"
75        },
76        "debugLog": {
77            "title": "Debug Log",
78            "description": "Enable a more verbose logging to be able to understand what's happening during the scraping",
79            "type": "boolean",
80            "default": false,
81            "editor": "checkbox"
82        }
83    },
84    "required": [
85        "proxyConfig"
86    ]
87}

main.js

1import Apify from 'apify';
2
3Apify.main(async () => {
4    const input = await Apify.getInput();
5
6    await Apify.metamorph('pocesar/shopify-scraper', {
7        ...input,
8        startUrls: [{
9            url: 'http://www.katyperrycollections.com',
10        }],
11    });
12});

package.json

1{
2    "name": "katy-perry-collections-scraper",
3    "version": "0.0.1",
4    "type": "module",
5    "dependencies": {
6        "apify": "^2.1.0"
7    },
8    "scripts": {
9        "start": "node main.js"
10    }
11}

Dockerfile

1# First, specify the base Docker image. You can read more about
2# the available images at https://sdk.apify.com/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node:16
5
6# Second, copy just package.json and package-lock.json since those are the only
7# files that affect "npm install" in the next step, to speed up the build.
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --only=prod --no-optional \
15 && echo "Installed NPM packages:" \
16 && (npm list --all || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version
21
22# Next, copy the remaining files and directories with the source code.
23# Since we do this after NPM install, quick build will be really fast
24# for most source file changes.
25COPY . ./
26
27ENV APIFY_DISABLE_OUTDATED_WARNING 1
28ENV npm_config_loglevel=silent
29
30# Optionally, specify how to launch the source code of your actor.
31# By default, Apify's base Docker images define the CMD instruction
32# that runs the Node.js source code using the command specified
33# in the "scripts.start" section of the package.json file.
34# In short, the instruction looks something like this:
35#
36# CMD npm start
Developer
Maintained by Community
Actor metrics
  • 1 monthly user
  • 1 star
  • 96.6% runs succeeded
  • Created in Nov 2021
  • Modified almost 3 years ago
Categories