
Bzj Amazon Actor
Deprecated
Pricing
Pay per usage
Go to Store

Bzj Amazon Actor
Deprecated
Crawl and extract unlimited data using actors integrated with the scrapeless amazon scraper api
0.0 (0)
Pricing
Pay per usage
0
Total users
2
Monthly users
2
Last modified
3 months ago
.actor/Dockerfile
# Specify the base Docker image. You can read more about# the available images at https://docs.apify.com/sdk/js/docs/guides/docker-images# You can also use any other image from Docker Hub.FROM apify/actor-node:20 AS builder
# Check preinstalled packagesRUN npm ls crawlee apify puppeteer playwright
# Copy just package.json and package-lock.json# to speed up the build using Docker layer cache.COPY package*.json ./
# Install all dependencies. Don't audit to speed up the installation.RUN npm install --include=dev --audit=false
# Next, copy the source files using the user set# in the base image.COPY . ./
# Install all dependencies and build the project.# Don't audit to speed up the installation.RUN npm run build
# Create final imageFROM apify/actor-node:20
# Check preinstalled packagesRUN npm ls crawlee apify puppeteer playwright
# Copy just package.json and package-lock.json# to speed up the build using Docker layer cache.COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --omit=dev --omit=optional \ && echo "Installed NPM packages:" \ && (npm list --omit=dev --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version \ && rm -r ~/.npm
# Copy built JS files from builder imageCOPY /usr/src/app/dist ./dist
# Next, copy the remaining files and directories with the source code.# Since we do this after NPM install, quick build will be really fast# for most source file changes.COPY . ./
# Run the image.CMD npm run start:prod --silent
.actor/actor.json
{ "actorSpecification": 1, "name": "bzj-amazon-actor", "title": "Scrape single page in TypeScript", "description": "Scrape data from single page with provided URL.", "version": "0.0", "meta": { "templateId": "ts-start" }, "input": "./input_schema.json", "dockerfile": "./Dockerfile", "storages": { "dataset": "./dataset_schema.json" }}
.actor/dataset_schema.json
{ "actorSpecification": 1, "views": { "overview": { "title": "Overview", "transformation": { "fields": [ "count", "data", "code", "message" ] }, "display": { "component": "table", "properties": { "count": { "label": "count", "format": "text" }, "data": { "label": "data", "format": "object" }, "code": { "label": "code", "format": "text" }, "message": { "label": "message", "format": "text" } } } } }}
.actor/input_schema.json
{ "title": "Actor BZJ", "type": "object", "schemaVersion": 1, "properties": { "apiKey": { "title": "API Key", "type": "string", "editor": "textfield", "description": "Start getting your [API KEY](https://app.scrapeless.com/dashboard/account?tab=apiKey) for free" }, "action": { "title": "Scraper Action", "type": "string", "enum": [ "keywords", "product", "seller" ], "description": "Amazon Scraper action types used for crawling", "prefill": "keywords" }, "webhook": { "title": "webhook", "type": "string", "editor": "textfield", "description": "webhook URL to send the data to", "default": "" }, "keywords": { "title": "Keywords", "sectionCaption": "keywords options", "sectionDescription": "Configuration of action Product", "type": "string", "editor": "textfield", "description": "Amazon keywords to search for", "default": "iphone 12", "prefill": "iPhone 12" }, "maxConcurrency": { "title": "Maximum concurrency", "type": "integer", "maximum": 100, "description": "Maximum concurrency to use for crawling", "default": 10, "prefill": 10 }, "pages": { "title": "Pages", "type": "integer", "maximum": 100, "description": "Total number of pages crawled", "default": 3, "prefill": 3 }, "domain": { "title": "Domain", "type": "string", "editor": "textfield", "description": "Amazon domain", "default": "com", "prefill": "com" }, "productUrl": { "title": "Product details URL", "sectionCaption": "product options", "sectionDescription": "Configuration of action Product", "type": "string", "editor": "textfield", "description": "Amazon product details URL", "prefill": "https://www.amazon.com/dp/B0BQXHK363" }, "sellerUrl": { "title": "seller details URL", "sectionCaption": "seller options", "sectionDescription": "Configuration of action seller", "type": "string", "editor": "textfield", "description": "Amazon seller details URL", "prefill": "https://www.amazon.com/dp/B0BQXHK363" } }, "required": [ "apiKey", "action" ]}
src/main.ts
1import { Actor, log } from 'apify';2import Scrapeless from 'scrapeless-sdk-node';3
4await Actor.init();5
6enum AmazonActionEnum {7 product = 'product',8 seller = 'seller',9 keywords = 'keywords',10}11
12interface Input {13 apiKey: string;14 action: AmazonActionEnum;15 webhook: string;16 productUrl: string;17 sellerUrl: string;18 keywords: string;19 maxConcurrency: number20 pages: number;21 domain: string;22}23
24const {25 apiKey = '',26 action = AmazonActionEnum.keywords,27 webhook = '',28 keywords = 'iPhone 12',29 domain = 'com',30 pages = 3,31 maxConcurrency = 10,32 productUrl = 'https://www.amazon.com/dp/B0BQXHK363',33 sellerUrl = 'https://www.amazon.com/dp/B0BQXHK363',34} = await Actor.getInput<Input>() ?? {};35
36const CONCURRENCY_LIMIT = pages < maxConcurrency ? pages : maxConcurrency;37
38// @ts-expect-error scrapeless-sdk-node39const scrapeless = new Scrapeless({ apiKey });40
41function getScrapelessInput(currentPage = 1) {42 const baseInput = { action };43 if (action === AmazonActionEnum.seller) {44 return { ...baseInput, url: sellerUrl };45 }46 if (action === AmazonActionEnum.product) {47 return { ...baseInput, url: productUrl };48 }49 // keywords50 return { ...baseInput, keywords, page: currentPage.toString(), domain };51}52
53async function scraperFetch() {54 const response = await scrapeless.scraper({55 actor: 'scraper.amazon',56 webhook,57 input: getScrapelessInput(),58 });59 await Actor.pushData(response as object);60}61
62async function keywordsConcurrencyScraperFetch() {63 const RequestQueue: (() => Promise<object>)[] = [];64 for (let page = 1; page <= pages; page++) {65 RequestQueue.push(() => {66 return scrapeless.scraper({67 actor: 'scraper.amazon',68 webhook,69 input: getScrapelessInput(page),70 });71 });72 }73
74 const successfulResults: object[] = [];75 let currentIndex = 0;76 async function worker() {77 while (currentIndex < RequestQueue.length) {78 try {79 log.info(`[Current page number]: ${currentIndex + 1}`);80 const result = await RequestQueue[currentIndex++]();81 await Actor.pushData(result);82 successfulResults.push(result);83 } catch (error) {84 log.error(`[Request failed]: ${error}`);85 }86 }87 }88
89 const workers = [];90 for (let i = 1; i <= CONCURRENCY_LIMIT; i++) {91 workers.push(worker());92 }93 await Promise.all(workers);94 log.info(`[🎉 Successfully captured ${successfulResults.length} pages of data]`);95 await Actor.setValue('OUTPUT', successfulResults);96}97
98if (action === AmazonActionEnum.keywords) {99 await keywordsConcurrencyScraperFetch();100} else {101 await scraperFetch();102}103
104await Actor.exit();
.dockerignore
# configurations.idea.vscode
# crawlee and apify storage foldersapify_storagecrawlee_storagestorage
# installed filesnode_modules
# git folder.git
# dist folderdist
.editorconfig
root = true
[*]indent_style = spaceindent_size = 4charset = utf-8trim_trailing_whitespace = trueinsert_final_newline = trueend_of_line = lf
.eslintrc
{ "root": true, "env": { "browser": true, "es2020": true, "node": true }, "extends": [ "@apify/eslint-config-ts" ], "parserOptions": { "project": "./tsconfig.json", "ecmaVersion": 2020 }, "ignorePatterns": [ "node_modules", "dist", "**/*.d.ts" ]}
.gitignore
# This file tells Git which files shouldn't be added to source control
.idea.vscodestorageapify_storagecrawlee_storagenode_modulesdisttsconfig.tsbuildinfostorage/*!storage/key_value_storesstorage/key_value_stores/*!storage/key_value_stores/defaultstorage/key_value_stores/default/*!storage/key_value_stores/default/INPUT.json
# Added by Apify CLI.venv
package.json
{ "name": "bzj-amazon-actor", "version": "0.0.1", "type": "module", "description": "This is an example of an Apify actor.", "engines": { "node": ">=18.0.0" }, "dependencies": { "apify": "^3.2.6", "axios": "^1.5.0", "cheerio": "^1.0.0-rc.12", "scrapeless-sdk-node": "^0.0.3" }, "devDependencies": { "@apify/eslint-config-ts": "^0.3.0", "@apify/tsconfig": "^0.1.0", "@typescript-eslint/eslint-plugin": "^7.18.0", "@typescript-eslint/parser": "^7.18.0", "eslint": "^8.50.0", "tsx": "^4.6.2", "typescript": "^5.3.3" }, "scripts": { "start": "npm run start:dev", "start:prod": "node dist/main.js", "start:dev": "tsx src/main.ts", "build": "tsc", "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1" }, "author": "It's not you it's me", "license": "ISC"}
tsconfig.json
{ "extends": "@apify/tsconfig", "compilerOptions": { "module": "NodeNext", "moduleResolution": "NodeNext", "target": "ES2022", "outDir": "dist", "noUnusedLocals": false, "skipLibCheck": true, "lib": ["DOM"] }, "include": [ "./src/**/*" ]}