# Specify the base Docker image. You can read more about
# the available images at https://docs.apify.com/sdk/js/docs/guides/docker-images
# You can also use any other image from Docker Hub.
FROM apify/actor-node:20 AS builder

# Check preinstalled packages
RUN npm ls crawlee apify puppeteer playwright

# Copy just package.json and package-lock.json
# to speed up the build using Docker layer cache.
COPY package*.json ./

# Install all dependencies. Don't audit to speed up the installation.
RUN npm install --include=dev --audit=false

# Next, copy the source files using the user set
# in the base image.
COPY . ./

# Install all dependencies and build the project.
# Don't audit to speed up the installation.
RUN npm run build

# Create final image
FROM apify/actor-node:20

# Check preinstalled packages
RUN npm ls crawlee apify puppeteer playwright

# Copy just package.json and package-lock.json
# to speed up the build using Docker layer cache.
COPY package*.json ./

# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
    && npm install --omit=dev --omit=optional \
    && echo "Installed NPM packages:" \
    && (npm list --omit=dev --all || true) \
    && echo "Node.js version:" \
    && node --version \
    && echo "NPM version:" \
    && npm --version \
    && rm -r ~/.npm

# Copy built JS files from builder image
COPY --from=builder /usr/src/app/dist ./dist

# Next, copy the remaining files and directories with the source code.
# Since we do this after NPM install, quick build will be really fast
# for most source file changes.
COPY . ./


# Run the image.
CMD npm run start:prod --silent

.actor/actor.json

{
  "actorSpecification": 1,
  "name": "bzj-amazon-actor",
  "title": "Scrape single page in TypeScript",
  "description": "Scrape data from single page with provided URL.",
  "version": "0.0",
  "meta": {
    "templateId": "ts-start"
  },
  "input": "./input_schema.json",
  "dockerfile": "./Dockerfile",
  "storages": {
    "dataset": "./dataset_schema.json"
  }
}

.actor/dataset_schema.json

{
  "actorSpecification": 1,
  "views": {
    "overview": {
      "title": "Overview",
      "transformation": {
        "fields": [
          "count",
          "data",
          "code",
          "message"
        ]
      },
      "display": {
        "component": "table",
        "properties": {
          "count": {
            "label": "count",
            "format": "text"
          },
          "data": {
            "label": "data",
            "format": "object"
          },
          "code": {
            "label": "code",
            "format": "text"
          },
          "message": {
            "label": "message",
            "format": "text"
          }
        }
      }
    }
  }
}

.actor/input_schema.json

{
  "title": "Actor BZJ",
  "type": "object",
  "schemaVersion": 1,
  "properties": {
    "apiKey": {
      "title": "API Key",
      "type": "string",
      "editor": "textfield",
      "description": "Start getting your [API KEY](https://app.scrapeless.com/dashboard/account?tab=apiKey) for free"
    },
    "action": {
      "title": "Scraper Action",
      "type": "string",
      "enum": [
        "keywords",
        "product",
        "seller"
      ],
      "description": "Amazon Scraper action types used for crawling",
      "prefill": "keywords"
    },
    "webhook": {
      "title": "webhook",
      "type": "string",
      "editor": "textfield",
      "description": "webhook URL to send the data to",
      "default": ""
    },
    "keywords": {
      "title": "Keywords",
      "sectionCaption": "keywords options",
      "sectionDescription": "Configuration of action Product",
      "type": "string",
      "editor": "textfield",
      "description": "Amazon keywords to search for",
      "default": "iphone 12",
      "prefill": "iPhone 12"
    },
    "maxConcurrency": {
      "title": "Maximum concurrency",
      "type": "integer",
      "maximum": 100,
      "description": "Maximum concurrency to use for crawling",
      "default": 10,
      "prefill": 10
    },
    "pages": {
      "title": "Pages",
      "type": "integer",
      "maximum": 100,
      "description": "Total number of pages crawled",
      "default": 3,
      "prefill": 3
    },
    "domain": {
      "title": "Domain",
      "type": "string",
      "editor": "textfield",
      "description": "Amazon domain",
      "default": "com",
      "prefill": "com"
    },
    "productUrl": {
      "title": "Product details URL",
      "sectionCaption": "product options",
      "sectionDescription": "Configuration of action Product",
      "type": "string",
      "editor": "textfield",
      "description": "Amazon product details URL",
      "prefill": "https://www.amazon.com/dp/B0BQXHK363"
    },
    "sellerUrl": {
      "title": "seller details URL",
      "sectionCaption": "seller options",
      "sectionDescription": "Configuration of action seller",
      "type": "string",
      "editor": "textfield",
      "description": "Amazon seller details URL",
      "prefill": "https://www.amazon.com/dp/B0BQXHK363"
    }
  },
  "required": [
    "apiKey",
    "action"
  ]
}

src/main.ts

1import { Actor, log } from 'apify';
2import Scrapeless from 'scrapeless-sdk-node';
3
4await Actor.init();
5
6enum AmazonActionEnum {
7  product = 'product',
8  seller = 'seller',
9  keywords = 'keywords',
10}
11
12interface Input {
13  apiKey: string;
14  action: AmazonActionEnum;
15  webhook: string;
16  productUrl: string;
17  sellerUrl: string;
18  keywords: string;
19  maxConcurrency: number
20  pages: number;
21  domain: string;
22}
23
24const {
25    apiKey = '',
26    action = AmazonActionEnum.keywords,
27    webhook = '',
28    keywords = 'iPhone 12',
29    domain = 'com',
30    pages = 3,
31    maxConcurrency = 10,
32    productUrl = 'https://www.amazon.com/dp/B0BQXHK363',
33    sellerUrl = 'https://www.amazon.com/dp/B0BQXHK363',
34} = await Actor.getInput<Input>() ?? {};
35
36const CONCURRENCY_LIMIT = pages < maxConcurrency ? pages : maxConcurrency;
37
38// @ts-expect-error scrapeless-sdk-node
39const scrapeless = new Scrapeless({ apiKey });
40
41function getScrapelessInput(currentPage = 1) {
42    const baseInput = { action };
43    if (action === AmazonActionEnum.seller) {
44        return { ...baseInput, url: sellerUrl };
45    }
46    if (action === AmazonActionEnum.product) {
47        return { ...baseInput, url: productUrl };
48    }
49    // keywords
50    return { ...baseInput, keywords, page: currentPage.toString(), domain };
51}
52
53async function scraperFetch() {
54    const response = await scrapeless.scraper({
55        actor: 'scraper.amazon',
56        webhook,
57        input: getScrapelessInput(),
58    });
59    await Actor.pushData(response as object);
60}
61
62async function keywordsConcurrencyScraperFetch() {
63    const RequestQueue: (() => Promise<object>)[] = [];
64    for (let page = 1; page <= pages; page++) {
65        RequestQueue.push(() => {
66            return scrapeless.scraper({
67                actor: 'scraper.amazon',
68                webhook,
69                input: getScrapelessInput(page),
70            });
71        });
72    }
73
74    const successfulResults: object[] = [];
75    let currentIndex = 0;
76    async function worker() {
77        while (currentIndex < RequestQueue.length) {
78            try {
79                log.info(`[Current page number]: ${currentIndex + 1}`);
80                const result = await RequestQueue[currentIndex++]();
81                await Actor.pushData(result);
82                successfulResults.push(result);
83            } catch (error) {
84                log.error(`[Request failed]: ${error}`);
85            }
86        }
87    }
88
89    const workers = [];
90    for (let i = 1; i <= CONCURRENCY_LIMIT; i++) {
91        workers.push(worker());
92    }
93    await Promise.all(workers);
94    log.info(`[🎉 Successfully captured ${successfulResults.length} pages of data]`);
95    await Actor.setValue('OUTPUT', successfulResults);
96}
97
98if (action === AmazonActionEnum.keywords) {
99    await keywordsConcurrencyScraperFetch();
100} else {
101    await scraperFetch();
102}
103
104await Actor.exit();

.dockerignore

# configurations
.idea
.vscode

# crawlee and apify storage folders
apify_storage
crawlee_storage
storage

# installed files
node_modules

# git folder
.git

# dist folder
dist

.editorconfig

root = true

[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.eslintrc

{
    "root": true,
    "env": {
        "browser": true,
        "es2020": true,
        "node": true
    },
    "extends": [
        "@apify/eslint-config-ts"
    ],
    "parserOptions": {
        "project": "./tsconfig.json",
        "ecmaVersion": 2020
    },
    "ignorePatterns": [
        "node_modules",
        "dist",
        "**/*.d.ts"
    ]
}

.gitignore

# This file tells Git which files shouldn't be added to source control

.idea
.vscode
storage
apify_storage
crawlee_storage
node_modules
dist
tsconfig.tsbuildinfo
storage/*
!storage/key_value_stores
storage/key_value_stores/*
!storage/key_value_stores/default
storage/key_value_stores/default/*
!storage/key_value_stores/default/INPUT.json

# Added by Apify CLI
.venv

package.json

{
	"name": "bzj-amazon-actor",
	"version": "0.0.1",
	"type": "module",
	"description": "This is an example of an Apify actor.",
	"engines": {
		"node": ">=18.0.0"
	},
	"dependencies": {
		"apify": "^3.2.6",
		"axios": "^1.5.0",
		"cheerio": "^1.0.0-rc.12",
		"scrapeless-sdk-node": "^0.0.3"
	},
	"devDependencies": {
		"@apify/eslint-config-ts": "^0.3.0",
		"@apify/tsconfig": "^0.1.0",
		"@typescript-eslint/eslint-plugin": "^7.18.0",
		"@typescript-eslint/parser": "^7.18.0",
		"eslint": "^8.50.0",
		"tsx": "^4.6.2",
		"typescript": "^5.3.3"
	},
	"scripts": {
		"start": "npm run start:dev",
		"start:prod": "node dist/main.js",
		"start:dev": "tsx src/main.ts",
		"build": "tsc",
		"test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
	},
	"author": "It's not you it's me",
	"license": "ISC"
}

tsconfig.json

{
    "extends": "@apify/tsconfig",
    "compilerOptions": {
        "module": "NodeNext",
        "moduleResolution": "NodeNext",
        "target": "ES2022",
        "outDir": "dist",
        "noUnusedLocals": false,
        "skipLibCheck": true,
        "lib": ["DOM"]
    },
    "include": [
        "./src/**/*"
    ]
}

Amazon Product Actor(hrequest)

getdataforme/amazon-product-actor-hrequest

GetDataForMe

Amazon Categories Scraper

pintostudio/amazon-categories-scraper

The Amazon Categories Actor is a web scraping tool deployed on the Apify platform that extracts category information from Amazon's regional websites.

Pinto Studio

Amazon Best Sellers Scraper

pintostudio/amazon-best-sellers-scraper

The Amazon BestSeller Actor is a web scraping tool deployed on the Apify platform that extracts best-selling products from Amazon across multiple product categories and countries.

Pinto Studio

Amazon Books Reviews Actor

getdataforme/amazon-books-reviews-actor

Extract book reviews, ratings, and descriptions from Amazon Kindle pages with structured JSON output. Ideal for sentiment analysis, author monitoring, and review aggregation. Just provide book URLs and get rich review data fast and reliably.

GetDataForMe

My Amazon Seller Review Actor-Delete

getdataforme/my-amazon-seller-review-actor-delete

should be deleted, coz we have another bulk one and working

GetDataForMe

Amazon Product Details Scraper

axesso_data/amazon-product-details-scraper

With our Amazon Product Details Scraper you can retrieve product details data from Amazon website. Our Actor always return realtime data from the corresponding product page without any Cache or Database in between.

Axesso - Data Service

221

5.0

OpenSearch Integration

apify/opensearch-integration

Transfer data from Apify Actors to Amazon OpenSearch Service. This Actor is a good starting point for building question-answering systems, search functionality, or Retrieval-Augmented Generation (RAG) use cases.

Apify

4.4

Digibuzz Ecommerce Price Tracker

yuletide_santoor/digibuzz-ecommerce-price-tracker

This actor crawls Amazon product pages to extract detailed information including product titles, prices, and ratings. Using Puppeteer, it navigates through specified URLs, retrieves product details, and saves the data into a dataset.

Sam Kumar

Amazon Product Description

pintostudio/amazon-product-description

The Amazon Product Description Actor is a powerful tool that allows you to extract detailed product information from Amazon product pages.

Pinto Studio

Amazon Seller Info Scraper

pintostudio/amazon-seller-info-scraper

The Amazon Seller Info Actor is an Apify actor designed to extract comprehensive seller information from Amazon marketplace.

Pinto Studio

Amazon Search Products Scraper

pintostudio/amazon-search-products-scraper

The Amazon Search Products Actor is designed to search and scrape product data from Amazon based on a specific keyword, category, filters, and region. It returns structured product details such as title, price, ASIN, ratings, pagination and more.

Pinto Studio