JB Hi-Fi Scraper
Try for free
No credit card required
View all Actors
JB Hi-Fi Scraper
mshopik/jb-hi-fi-scraper
Try for free
No credit card required
Scrape JB Hi-Fi and extract data on tv and video equipment from jbhifi.com.au. Our JB Hi-Fi API lets you crawl product information and pricing. The saved data can be downloaded as HTML, JSON, CSV, Excel, and XML.
INPUT_SCHEMA.json
1{
2 "title": "jb-hi-fi-scraper",
3 "description": "",
4 "type": "object",
5 "schemaVersion": 1,
6 "properties": {
7 "maxRequestsPerCrawl": {
8 "title": "Max items",
9 "description": "How many items to extract from jbhifi.com.au",
10 "default": 20,
11 "prefill": 20,
12 "type": "integer",
13 "editor": "number"
14 },
15 "extendOutputFunction": {
16 "title": "Extend Output Function",
17 "description": "Add or remove properties on the output object or omit the output returning null",
18 "type": "string",
19 "default": "async ({ data, item, product, images, fns, name, request, variants, context, customData, input, Apify }) => {\n return item;\n}",
20 "prefill": "async ({ data, item, product, images, fns, name, request, variants, context, customData, input, Apify }) => {\n return item;\n}",
21 "editor": "javascript",
22 "sectionCaption": "Extend scraper functionality",
23 "sectionDescription": "You can change the output of the items for your dataset here, or add additional behavior on the scraper."
24 },
25 "extendScraperFunction": {
26 "title": "Extend Scraper Function",
27 "description": "Advanced function that allows you to extend the default scraper functionality, allowing you to manually perform actions on the page",
28 "type": "string",
29 "default": "async ({ fns, customData, Apify, label }) => {\n \n}",
30 "prefill": "async ({ fns, customData, Apify, label }) => {\n \n}",
31 "editor": "javascript"
32 },
33 "customData": {
34 "title": "Custom data",
35 "description": "Any data that you want to have available inside the Extend Output/Scraper Function",
36 "default": {},
37 "prefill": {},
38 "type": "object",
39 "editor": "json"
40 },
41 "fetchHtml": {
42 "title": "Fetch HTML",
43 "description": "If you decide to fetch the HTML of the pages, it will take twice as long. Make sure to only enable this if needed",
44 "default": true,
45 "editor": "checkbox",
46 "type": "boolean"
47 },
48 "maxConcurrency": {
49 "title": "Max concurrency",
50 "description": "Max concurrency to use",
51 "default": 20,
52 "prefill": 20,
53 "type": "integer",
54 "editor": "number"
55 },
56 "maxRequestRetries": {
57 "title": "Max request retries",
58 "description": "Set the max request retries",
59 "default": 3,
60 "prefill": 3,
61 "type": "integer",
62 "editor": "number"
63 },
64 "debugLog": {
65 "title": "Debug Log",
66 "description": "Enable a more verbose logging to be able to understand what's happening during the scraping",
67 "type": "boolean",
68 "default": false,
69 "editor": "checkbox"
70 }
71 }
72}
main.js
1import Apify from 'apify';
2
3Apify.main(async () => {
4 const input = await Apify.getInput();
5
6 await Apify.metamorph('pocesar/shopify-scraper', {
7 ...input,
8 startUrls: [{
9 url: 'http://www.jbhifi.com.au',
10 }],
11 });
12});
package.json
1{
2 "name": "jb-hi-fi-scraper",
3 "version": "0.0.1",
4 "type": "module",
5 "dependencies": {
6 "apify": "^2.3.2"
7 },
8 "scripts": {
9 "start": "node main.js"
10 }
11}
Dockerfile
1# First, specify the base Docker image. You can read more about
2# the available images at https://sdk.apify.com/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node:16
5
6# Second, copy just package.json and package-lock.json since those are the only
7# files that affect "npm install" in the next step, to speed up the build.
8COPY package*.json ./
9
10RUN npm --quiet set progress=false \
11 && npm install --only=prod --no-optional \
12 && echo "Installed NPM packages:" \
13 && (npm list --only=prod --no-optional --all || true) \
14 && echo "Node.js version:" \
15 && node --version \
16 && echo "NPM version:" \
17 && npm --version
18
19COPY . ./
20
21ENV APIFY_DISABLE_OUTDATED_WARNING 1
22ENV npm_config_loglevel=silent
Developer
Maintained by Community
Actor metrics
- 2 monthly users
- 1 star
- 96.6% runs succeeded
- Created in Nov 2021
- Modified about 2 years ago
Categories