
Apify Scraper
Pricing
Pay per usage
Go to Store

Apify Scraper
Extract actor data from the Apify Store, including pricing, stats, and user info. Supports multiple queries, filters, and crash-safe state persistence. 🧑💻 Open-source and ideal for research, analysis, and discovering tools.
5.0 (2)
Pricing
Pay per usage
3
Total users
2
Monthly users
2
Last modified
2 days ago
.actor/Dockerfile
1# Specify the base Docker image. You can read more about
2# the available images at https://docs.apify.com/sdk/js/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node:20
5
6# Check preinstalled packages
7RUN npm ls crawlee apify puppeteer playwright
8
9# Copy just package.json and package-lock.json
10# to speed up the build using Docker layer cache.
11COPY package*.json ./
12
13# Install NPM packages, skip optional and development dependencies to
14# keep the image small. Avoid logging too much and print the dependency
15# tree for debugging
16RUN npm --quiet set progress=false \
17 && npm install --omit=dev --omit=optional \
18 && echo "Installed NPM packages:" \
19 && (npm list --omit=dev --all || true) \
20 && echo "Node.js version:" \
21 && node --version \
22 && echo "NPM version:" \
23 && npm --version \
24 && rm -r ~/.npm
25
26# Next, copy the remaining files and directories with the source code.
27# Since we do this after NPM install, quick build will be really fast
28# for most source file changes.
29COPY . ./
30
31# Create and run as a non-root user.
32RUN adduser -h /home/apify -D apify && \
33 chown -R apify:apify ./
34USER apify
35
36# Run the image.
37CMD npm start --silent
.actor/actor.json
1{
2 "actorSpecification": 1,
3 "name": "apify-scraper",
4 "title": "Apify Store Scraper",
5 "description": "Scrape data from Apify Store with all available filters",
6 "version": "0.1",
7 "meta": {
8 "templateId": "js"
9 },
10 "input": "./input_schema.json",
11 "dockerfile": "./Dockerfile",
12 "readme": "./README.md",
13 "storages": {
14 "dataset": {
15 "actorSpecification": 1,
16 "title": "Apify Store Results",
17 "views": {
18 "results": {
19 "title": "📦 Scraped Actors Overview",
20 "transformation": {
21 "fields": [
22 "pictureUrl",
23 "price",
24 "stats",
25 "title",
26 "url",
27 "username",
28 "actorReviewRating",
29 "bookmarkCount"
30 ]
31 },
32 "display": {
33 "component": "table",
34 "properties": {
35 "pictureUrl": {
36 "label": "Thumbnail",
37 "format": "image"
38 },
39 "title": {
40 "label": "Actor Title",
41 "format": "text"
42 },
43 "username": {
44 "label": "Developer",
45 "format": "text"
46 },
47 "url": {
48 "label": "Actor URL",
49 "format": "link"
50 },
51 "stats": {
52 "label": "stats"
53 },
54 "price": {
55 "label": "Price (USD)",
56 "format": "text"
57 },
58 "actorReviewRating": {
59 "label": "Rating ⭐",
60 "format": "text"
61 },
62 "bookmarkCount": {
63 "label": "Bookmarks",
64 "format": "text"
65 }
66 }
67 }
68 }
69 }
70 }
71 }
72}
.actor/input_schema.json
1{
2 "title": "🚀 Apify Store Scraper",
3 "type": "object",
4 "schemaVersion": 1,
5 "description": "🔍 Scrape Actors from the Apify Store with smart filters like category, pricing, and popularity — now with support for multiple search terms and automatic resume on crash.",
6 "properties": {
7 "search": {
8 "title": "🔎 Search Terms",
9 "type": "array",
10 "description": "Enter one or more keywords to search Actors (e.g., 'YouTube Scraper', 'GitHub').",
11 "editor": "stringList",
12 "default": [
13 "YouTube Scraper"
14 ]
15 },
16 "sortBy": {
17 "title": "📊 Sort By",
18 "type": "string",
19 "description": "Choose how the results should be sorted.",
20 "default": "relevance",
21 "editor": "select",
22 "enum": ["relevance", "popularity", "newest", "lastUpdate"],
23 "enumTitles": [
24 "🎯 Most Relevant",
25 "🔥 Popular",
26 "🆕 Newest",
27 "♻️ Recently Updated"
28 ]
29 },
30 "category": {
31 "title": "📁 Category",
32 "type": "string",
33 "description": "🎯 Narrow down results by selecting a specific category.",
34 "default": "",
35 "editor": "select",
36 "enum": [
37 "",
38 "AI",
39 "AGENTS",
40 "AUTOMATION",
41 "DEVELOPER_TOOLS",
42 "ECOMMERCE",
43 "JOBS",
44 "LEAD_GENERATION",
45 "NEWS",
46 "SEO_TOOLS",
47 "SOCIAL_MEDIA",
48 "TRAVEL",
49 "VIDEOS",
50 "REAL_ESTATE",
51 "INTEGRATIONS",
52 "OTHER",
53 "OPEN_SOURCE"
54 ],
55 "enumTitles": [
56 "📦 All Categories",
57 "🧠 AI",
58 "🤖 Agents",
59 "⚙️ Automation",
60 "👨💻 Developer Tools",
61 "🛒 E-commerce",
62 "💼 Jobs",
63 "📞 Lead Generation",
64 "🗞 News",
65 "🔍 SEO Tools",
66 "📱 Social Media",
67 "✈️ Travel",
68 "🎥 Videos",
69 "🏘 Real Estate",
70 "🔌 Integrations",
71 "📦 Other",
72 "🌐 Open Source"
73 ]
74 },
75 "pricingModel": {
76 "title": "💰 Pricing Model",
77 "type": "string",
78 "description": "Filter results by pricing type (free, monthly, etc.).",
79 "default": "",
80 "editor": "select",
81 "enum": [
82 "",
83 "PRICE_PER_DATASET_ITEM",
84 "PAY_PER_EVENT",
85 "FREE",
86 "FLAT_PRICE_PER_MONTH"
87 ],
88 "enumTitles": [
89 "💲 All Pricing Models",
90 "📈 Pay Per Result",
91 "📊 Pay Per Event",
92 "🆓 Free",
93 "📅 Monthly Rental"
94 ]
95 },
96 "managedBy": {
97 "title": "🛠 Managed By",
98 "type": "string",
99 "description": "Filter by the developer type (Apify official or community).",
100 "default": "",
101 "editor": "select",
102 "enum": [
103 "",
104 "APIFY",
105 "COMMUNITY"
106 ],
107 "enumTitles": [
108 "👥 All Developers",
109 "🏢 Apify Official",
110 "🧑💻 Community"
111 ]
112 },
113 "limit": {
114 "title": "📦 Items Per Request",
115 "type": "integer",
116 "description": "Number of items to fetch in each API call (max 100).",
117 "default": 24,
118 "minimum": 1,
119 "maximum": 100,
120 "unit": "items"
121 },
122 "maxResults": {
123 "title": "📈 Max Results",
124 "type": "integer",
125 "description": "Maximum total number of results to fetch (0 = unlimited).",
126 "default": 1,
127 "minimum": 0,
128 "unit": "items"
129 },
130 "batchDelay": {
131 "title": "⏱ Delay Between Searches",
132 "type": "integer",
133 "description": "Pause (in milliseconds) between each search query to reduce rate limits or load.",
134 "default": 100,
135 "minimum": 0,
136 "maximum": 10000,
137 "unit": "ms"
138 }
139 },
140 "required": []
141}
src/main.js
1import { Actor } from 'apify';
2import { scrapeStore } from './scraper.js';
3
4const STATE_KEY = 'SEARCH_PROGRESS';
5
6await Actor.init();
7
8const rawInput = await Actor.getInput() ?? {};
9
10const input = {
11 search: Array.isArray(rawInput.search) && rawInput.search.length > 0 ? rawInput.search : ["YouTube Scraper"],
12 sortBy: typeof rawInput.sortBy === 'string' ? rawInput.sortBy : "relevance",
13 category: typeof rawInput.category === 'string' ? rawInput.category : "",
14 pricingModel: typeof rawInput.pricingModel === 'string' ? rawInput.pricingModel : "",
15 managedBy: typeof rawInput.managedBy === 'string' ? rawInput.managedBy : "",
16 limit: Number.isInteger(rawInput.limit) ? rawInput.limit : 24,
17 maxResults: Number.isInteger(rawInput.maxResults) ? rawInput.maxResults : 100,
18 batchDelay: Number.isInteger(rawInput.batchDelay) ? rawInput.batchDelay : 100,
19};
20
21const searchTerms = input.search;
22
23try {
24 if (!Array.isArray(searchTerms) || searchTerms.length === 0) {
25 throw new Error('Input "search" must be a non-empty array.');
26 }
27} catch (error) {
28 console.error('❌ Validation failed:', error.message);
29 throw error;
30}
31
32
33let progress = await Actor.getValue(STATE_KEY) ?? {
34 remainingTerms: [...searchTerms],
35 completedTerms: [],
36 perSearchStats: {}
37};
38
39Actor.on('migrating', async () => {
40 console.log('🚨 Migration triggered, saving queue state...');
41 await Actor.setValue(STATE_KEY, progress);
42 await Actor.reboot();
43});
44
45try {
46 while (progress.remainingTerms.length > 0) {
47 const term = progress.remainingTerms.shift();
48 console.log(`\n🔍 Searching for: "${term}"`);
49
50 const queryInput = {
51 ...input,
52 search: term,
53 };
54
55 try {
56 const result = await scrapeStore(queryInput);
57
58 progress.perSearchStats[term] = result.stats;
59 progress.completedTerms.push(term);
60 } catch (error) {
61 console.error(`❌ Failed to process term "${term}", re-adding to queue.`);
62 progress.remainingTerms.push(term);
63 }
64
65 await Actor.setValue(STATE_KEY, progress);
66 }
67
68 console.log('\n✅ All searches completed!');
69 console.log('📊 Final stats:', progress.perSearchStats);
70
71 await Actor.setValue('SUMMARY', progress.perSearchStats);
72 await Actor.setValue(STATE_KEY, null);
73} catch (err) {
74 console.error('❌ Scraping failed:', err);
75 await Actor.fail(err);
76} finally {
77 await Actor.exit();
78}
src/scraper.js
1import { Actor } from 'apify';
2import { gotScraping } from 'got-scraping';
3import { delay } from './utils.js';
4
5const BASE_URL = 'https://console-backend.apify.com/public/store/memoized-search';
6
7
8export class ScraperState {
9 constructor(persistenceKey) {
10 this.persistenceKey = persistenceKey;
11 this.processedItems = [];
12 this.currentOffset = 0;
13 this.totalResults = 0;
14 this.stats = { success: 0, failures: 0 };
15 }
16
17 async load() {
18 const data = await Actor.getValue(this.persistenceKey) || {};
19 Object.assign(this, data);
20 }
21
22 async save() {
23 await Actor.setValue(this.persistenceKey, {
24 processedItems: this.processedItems,
25 currentOffset: this.currentOffset,
26 totalResults: this.totalResults,
27 stats: this.stats
28 });
29 }
30}
31
32export async function scrapeStore(input) {
33
34 const PERSISTENCE_KEY = `SEARCH_STATE`;
35 const state = new ScraperState(PERSISTENCE_KEY);
36 await state.load();
37
38 Actor.on('migrating', async () => {
39 console.log(`Migration detected - saving state for "${input.search}"`);
40 await state.save();
41 });
42
43 let hasMore = true;
44 const itemsPerRequest = input.limit;
45 const maxResults = input.maxResults;
46 while (hasMore) {
47 const remainingItems = maxResults > 0
48 ? Math.max(0, maxResults - state.stats.success)
49 : itemsPerRequest;
50
51 const currentLimit = maxResults > 0
52 ? Math.min(itemsPerRequest, remainingItems)
53 : itemsPerRequest;
54
55 if (maxResults > 0 && remainingItems <= 0) {
56 console.log('Reached maximum requested results');
57 break;
58 }
59
60 const params = {
61 search: input.search || '',
62 sortBy: input.sortBy || 'RELEVANCE',
63 category: input.category || '',
64 pricingModel: input.pricingModel || '',
65 limit: currentLimit.toString(),
66 offset: state.currentOffset.toString()
67 };
68
69 // Only add managedBy if it has a value
70 if (input.managedBy) {
71 params.managedBy = input.managedBy;
72 }
73
74 const url = `${BASE_URL}?${new URLSearchParams(params)}`;
75 console.log(`Fetching ${currentLimit} items from offset ${state.currentOffset}...`);
76
77 //await delay(5000); // 5s for a debug
78
79 try {
80 const response = await gotScraping.get(url, {
81 headers: {
82 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',
83 'Accept': 'application/json',
84 'Referer': 'https://console.apify.com/',
85 'Origin': 'https://console.apify.com'
86 },
87 responseType: 'json'
88 });
89
90 const data = response.body;
91
92 if (!data.items || !Array.isArray(data.items)) {
93 const errorMsg = `Invalid response format - missing items array at offset ${state.currentOffset}`;
94 console.error(errorMsg);
95 throw new Error(errorMsg);
96 }
97
98 for (const item of data.items) {
99 if (maxResults > 0 && state.stats.success >= maxResults) {
100 hasMore = false;
101 break;
102 }
103 try {
104 const itemData = {
105 id: item.id,
106 title: item.title,
107 name: item.name,
108 username: item.username,
109 userFullName: item.userFullName,
110 userPictureUrl: item.userPictureUrl,
111 description: item.description,
112 pictureUrl: item.pictureUrl,
113 notice: item.notice,
114 actorReviewRating: item.actorReviewRating,
115 bookmarkCount: item.bookmarkCount,
116 url: `https://console.apify.com/actors/${item.id}`,
117 price: item.currentPricingInfo
118 ? `${item.currentPricingInfo.pricingModel || 'UNKNOWN'} - $${item.currentPricingInfo.pricePerUnitUsd || 0}`
119 : 'Free or Unknown',
120 stats: {
121 totalBuilds: item.stats?.totalBuilds,
122 totalRuns: item.stats?.totalRuns,
123 totalUsers: item.stats?.totalUsers,
124 totalUsers7Days: item.stats?.totalUsers7Days,
125 totalUsers30Days: item.stats?.totalUsers30Days,
126 totalUsers90Days: item.stats?.totalUsers90Days,
127 lastRunStartedAt: item.stats?.lastRunStartedAt,
128 publicActorRunStats30Days: item.stats?.publicActorRunStats30Days
129 },
130 currentPricingInfo: {
131 pricingModel: item.currentPricingInfo?.pricingModel,
132 pricePerUnitUsd: item.currentPricingInfo?.pricePerUnitUsd,
133 trialMinutes: item.currentPricingInfo?.trialMinutes,
134 startedAt: item.currentPricingInfo?.startedAt,
135 createdAt: item.currentPricingInfo?.createdAt,
136 apifyMarginPercentage: item.currentPricingInfo?.apifyMarginPercentage,
137 notifiedAboutChangeAt: item.currentPricingInfo?.notifiedAboutChangeAt,
138 notifiedAboutFutureChangeAt: item.currentPricingInfo?.notifiedAboutFutureChangeAt
139 }
140 };
141 await Actor.pushData(itemData);
142 state.stats.success++;
143 } catch (error) {
144 console.error(`Failed to process item ${item.id}:`, error.message);
145 state.stats.failures++;
146 }
147 }
148
149 state.currentOffset += data.items.length;
150 await state.save();
151 console.log(`Progress: ${state.stats.success} items fetched (${maxResults > 0 ? `${state.stats.success}/${maxResults}` : 'unlimited'})`);
152
153 if (data.items.length < currentLimit ||
154 (maxResults > 0 && state.stats.success >= maxResults)) {
155 hasMore = false;
156 console.log('Reached end condition');
157 }
158
159 if (input.batchDelay > 0 && hasMore) {
160 await delay(input.batchDelay);
161 }
162
163 } catch (error) {
164 state.stats.failures++;
165 await state.save();
166 console.error(`Failed to fetch batch at offset ${state.currentOffset}:`, error.message);
167 throw error;
168 }
169 }
170
171 await Actor.setValue(PERSISTENCE_KEY, null);
172 return {
173 stats: state.stats
174 };
175}
src/utils.js
1export function delay(ms) {
2 return new Promise(resolve => setTimeout(resolve, ms));
3}
4
5export function validateInput(input) {
6 if (input.limit && (input.limit < 1 || input.limit > 100)) {
7 throw new Error('Limit must be between 1 and 100');
8 }
9 if (input.batchDelay && input.batchDelay < 0) {
10 throw new Error('Batch delay must be a positive number');
11 }
12 if (input.maxResults && input.maxResults < 0) {
13 throw new Error('Max results must be a positive number');
14 }
15 if (input.minRating && (input.minRating < 0 || input.minRating > 5)) {
16 throw new Error('Minimum rating must be between 0 and 5');
17 }
18}
.dockerignore
1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git
.editorconfig
1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf
.eslintrc
1{
2 "extends": "@apify",
3 "root": true
4}
.gitignore
1# This file tells Git which files shouldn't be added to source control
2
3.DS_Store
4.idea
5dist
6node_modules
7apify_storage
8storage
9
10# Added by Apify CLI
11.venv
package.json
1{
2 "name": "Apify-Scraper",
3 "version": "0.0.1",
4 "type": "module",
5 "description": "This is a Apify actor scraper",
6 "engines": {
7 "node": ">=18.0.0"
8 },
9 "dependencies": {
10 "apify": "^3.2.6",
11 "got": "^14.4.7"
12 },
13 "devDependencies": {
14 "@apify/eslint-config": "^0.4.0",
15 "eslint": "^8.50.0"
16 },
17 "scripts": {
18 "start": "node src/main.js",
19 "lint": "eslint ./src --ext .js,.jsx",
20 "lint:fix": "eslint ./src --ext .js,.jsx --fix",
21 "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
22 },
23 "author": " DZ-ABDLHAKIM",
24 "license": "ISC"
25}