Apify Scraper avatar
Apify Scraper

Pricing

Pay per usage

Go to Store
Apify Scraper

Apify Scraper

Developed by

dz_omar

dz_omar

Maintained by Community

Extract actor data from the Apify Store, including pricing, stats, and user info. Supports multiple queries, filters, and crash-safe state persistence. 🧑‍💻 Open-source and ideal for research, analysis, and discovering tools.

5.0 (2)

Pricing

Pay per usage

5

Total users

6

Monthly users

4

Runs succeeded

>99%

Last modified

a month ago

.actor/Dockerfile

# Specify the base Docker image. You can read more about
# the available images at https://docs.apify.com/sdk/js/docs/guides/docker-images
# You can also use any other image from Docker Hub.
FROM apify/actor-node:20
# Check preinstalled packages
RUN npm ls crawlee apify puppeteer playwright
# Copy just package.json and package-lock.json
# to speed up the build using Docker layer cache.
COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
&& npm install --omit=dev --omit=optional \
&& echo "Installed NPM packages:" \
&& (npm list --omit=dev --all || true) \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version \
&& rm -r ~/.npm
# Next, copy the remaining files and directories with the source code.
# Since we do this after NPM install, quick build will be really fast
# for most source file changes.
COPY . ./
# Create and run as a non-root user.
RUN adduser -h /home/apify -D apify && \
chown -R apify:apify ./
USER apify
# Run the image.
CMD npm start --silent

.actor/actor.json

{
"actorSpecification": 1,
"name": "apify-scraper",
"title": "Apify Store Scraper",
"description": "Scrape data from Apify Store with all available filters",
"version": "0.1",
"meta": {
"templateId": "js"
},
"input": "./input_schema.json",
"dockerfile": "./Dockerfile",
"readme": "./README.md",
"storages": {
"dataset": {
"actorSpecification": 1,
"title": "Apify Store Results",
"views": {
"results": {
"title": "📦 Scraped Actors Overview",
"transformation": {
"fields": [
"pictureUrl",
"price",
"totalUsers7Days",
"totalUsers30Days",
"title",
"actorReviewRating",
"bookmarkCount",
"url"
]
},
"display": {
"component": "table",
"properties": {
"pictureUrl": {
"label": "Thumbnail",
"format": "image"
},
"title": {
"label": "Actor Title",
"format": "text"
},
"url": {
"label": "Actor URL",
"format": "link"
},
"price": {
"label": "Price (USD)",
"format": "text"
},
"actorReviewRating": {
"label": "Rating ⭐",
"format": "text"
},
"bookmarkCount": {
"label": "Bookmarks",
"format": "text"
},
"totalUsers7Days": {
"label": "Users (7d)",
"format": "text"
},
"totalUsers30Days": {
"label": "Users (30d)",
"format": "text"
}
}
}
}
}
}
}
}

.actor/input_schema.json

{
"title": "🚀 Apify Store Scraper",
"type": "object",
"schemaVersion": 1,
"description": "🔍 Scrape Actors from the Apify Store with smart filters like category, pricing, and popularity — now with support for multiple search terms and automatic resume on crash.",
"properties": {
"search": {
"title": "🔎 Search Terms",
"type": "array",
"description": "Enter one or more keywords to search Actors (e.g., 'YouTube Scraper', 'GitHub').",
"editor": "stringList",
"default": [
"YouTube Scraper"
]
},
"sortBy": {
"title": "📊 Sort By",
"type": "string",
"description": "Choose how the results should be sorted.",
"default": "relevance",
"editor": "select",
"enum": ["relevance", "popularity", "newest", "lastUpdate"],
"enumTitles": [
"🎯 Most Relevant",
"🔥 Popular",
"🆕 Newest",
"♻️ Recently Updated"
]
},
"category": {
"title": "📁 Category",
"type": "string",
"description": "🎯 Narrow down results by selecting a specific category.",
"default": "",
"editor": "select",
"enum": [
"",
"AI",
"AGENTS",
"AUTOMATION",
"DEVELOPER_TOOLS",
"ECOMMERCE",
"JOBS",
"LEAD_GENERATION",
"NEWS",
"SEO_TOOLS",
"SOCIAL_MEDIA",
"TRAVEL",
"VIDEOS",
"REAL_ESTATE",
"INTEGRATIONS",
"OTHER",
"OPEN_SOURCE"
],
"enumTitles": [
"📦 All Categories",
"🧠 AI",
"🤖 Agents",
"⚙️ Automation",
"👨‍💻 Developer Tools",
"🛒 E-commerce",
"💼 Jobs",
"📞 Lead Generation",
"🗞 News",
"🔍 SEO Tools",
"📱 Social Media",
"✈️ Travel",
"🎥 Videos",
"🏘 Real Estate",
"🔌 Integrations",
"📦 Other",
"🌐 Open Source"
]
},
"pricingModel": {
"title": "💰 Pricing Model",
"type": "string",
"description": "Filter results by pricing type (free, monthly, etc.).",
"default": "",
"editor": "select",
"enum": [
"",
"PRICE_PER_DATASET_ITEM",
"PAY_PER_EVENT",
"FREE",
"FLAT_PRICE_PER_MONTH"
],
"enumTitles": [
"💲 All Pricing Models",
"📈 Pay Per Result",
"📊 Pay Per Event",
"🆓 Free",
"📅 Monthly Rental"
]
},
"managedBy": {
"title": "🛠 Managed By",
"type": "string",
"description": "Filter by the developer type (Apify official or community).",
"default": "",
"editor": "select",
"enum": [
"",
"APIFY",
"COMMUNITY"
],
"enumTitles": [
"👥 All Developers",
"🏢 Apify Official",
"🧑‍💻 Community"
]
},
"limit": {
"title": "📦 Items Per Request",
"type": "integer",
"description": "Number of items to fetch in each API call (max 100).",
"default": 24,
"minimum": 1,
"maximum": 100,
"unit": "items"
},
"maxResults": {
"title": "📈 Max Results",
"type": "integer",
"description": "Maximum total number of results to fetch (0 = unlimited).",
"default": 1,
"minimum": 0,
"unit": "items"
},
"batchDelay": {
"title": "⏱ Delay Between Searches",
"type": "integer",
"description": "Pause (in milliseconds) between each search query to reduce rate limits or load.",
"default": 100,
"minimum": 0,
"maximum": 10000,
"unit": "ms"
}
},
"required": []
}

src/main.js

1import { Actor } from 'apify';
2import { scrapeStore } from './scraper.js';
3
4const STATE_KEY = 'SEARCH_PROGRESS';
5
6await Actor.init();
7
8const rawInput = await Actor.getInput() ?? {};
9
10const input = {
11 search: Array.isArray(rawInput.search) && rawInput.search.length > 0 ? rawInput.search : ["YouTube Scraper"],
12 sortBy: typeof rawInput.sortBy === 'string' ? rawInput.sortBy : "relevance",
13 category: typeof rawInput.category === 'string' ? rawInput.category : "",
14 pricingModel: typeof rawInput.pricingModel === 'string' ? rawInput.pricingModel : "",
15 managedBy: typeof rawInput.managedBy === 'string' ? rawInput.managedBy : "",
16 limit: Number.isInteger(rawInput.limit) ? rawInput.limit : 24,
17 maxResults: Number.isInteger(rawInput.maxResults) ? rawInput.maxResults : 100,
18 batchDelay: Number.isInteger(rawInput.batchDelay) ? rawInput.batchDelay : 100,
19};
20
21const searchTerms = input.search;
22
23try {
24 if (!Array.isArray(searchTerms) || searchTerms.length === 0) {
25 throw new Error('Input "search" must be a non-empty array.');
26 }
27} catch (error) {
28 console.error('❌ Validation failed:', error.message);
29 throw error;
30}
31
32
33let progress = await Actor.getValue(STATE_KEY) ?? {
34 remainingTerms: [...searchTerms],
35 completedTerms: [],
36 perSearchStats: {}
37};
38
39Actor.on('migrating', async () => {
40 console.log('🚨 Migration triggered, saving queue state...');
41 await Actor.setValue(STATE_KEY, progress);
42 await Actor.reboot();
43});
44
45try {
46 while (progress.remainingTerms.length > 0) {
47 const term = progress.remainingTerms.shift();
48 console.log(`\n🔍 Searching for: "${term}"`);
49
50 const queryInput = {
51 ...input,
52 search: term,
53 };
54
55 try {
56 const result = await scrapeStore(queryInput);
57
58 progress.perSearchStats[term] = result.stats;
59 progress.completedTerms.push(term);
60 } catch (error) {
61 console.error(`❌ Failed to process term "${term}", re-adding to queue.`);
62 progress.remainingTerms.push(term);
63 }
64
65 await Actor.setValue(STATE_KEY, progress);
66 }
67
68 console.log('\n✅ All searches completed!');
69 console.log('📊 Final stats:', progress.perSearchStats);
70
71 await Actor.setValue('SUMMARY', progress.perSearchStats);
72 await Actor.setValue(STATE_KEY, null);
73} catch (err) {
74 console.error('❌ Scraping failed:', err);
75 await Actor.fail(err);
76} finally {
77 await Actor.exit();
78}

src/scraper.js

1import { Actor } from 'apify';
2import { gotScraping } from 'got-scraping';
3import { delay } from './utils.js';
4
5const BASE_URL = 'https://console-backend.apify.com/public/store/memoized-search';
6
7
8export class ScraperState {
9 constructor(persistenceKey) {
10 this.persistenceKey = persistenceKey;
11 this.processedItems = [];
12 this.currentOffset = 0;
13 this.totalResults = 0;
14 this.stats = { success: 0, failures: 0 };
15 }
16
17 async load() {
18 const data = await Actor.getValue(this.persistenceKey) || {};
19 Object.assign(this, data);
20 }
21
22 async save() {
23 await Actor.setValue(this.persistenceKey, {
24 processedItems: this.processedItems,
25 currentOffset: this.currentOffset,
26 totalResults: this.totalResults,
27 stats: this.stats
28 });
29 }
30}
31
32export async function scrapeStore(input) {
33
34 const PERSISTENCE_KEY = `SEARCH_STATE`;
35 const state = new ScraperState(PERSISTENCE_KEY);
36 await state.load();
37
38 Actor.on('migrating', async () => {
39 console.log(`Migration detected - saving state for "${input.search}"`);
40 await state.save();
41 });
42
43 let hasMore = true;
44 const itemsPerRequest = input.limit;
45 const maxResults = input.maxResults;
46 while (hasMore) {
47 const remainingItems = maxResults > 0
48 ? Math.max(0, maxResults - state.stats.success)
49 : itemsPerRequest;
50
51 const currentLimit = maxResults > 0
52 ? Math.min(itemsPerRequest, remainingItems)
53 : itemsPerRequest;
54
55 if (maxResults > 0 && remainingItems <= 0) {
56 console.log('Reached maximum requested results');
57 break;
58 }
59
60 const params = {
61 search: input.search || '',
62 sortBy: input.sortBy || 'RELEVANCE',
63 category: input.category || '',
64 pricingModel: input.pricingModel || '',
65 limit: currentLimit.toString(),
66 offset: state.currentOffset.toString()
67 };
68
69 // Only add managedBy if it has a value
70 if (input.managedBy) {
71 params.managedBy = input.managedBy;
72 }
73
74 const url = `${BASE_URL}?${new URLSearchParams(params)}`;
75 console.log(`Fetching ${currentLimit} items from offset ${state.currentOffset}...`);
76
77 //await delay(5000); // 5s for a debug
78
79 try {
80 const response = await gotScraping.get(url, {
81 headers: {
82 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',
83 'Accept': 'application/json',
84 'Referer': 'https://console.apify.com/',
85 'Origin': 'https://console.apify.com'
86 },
87 responseType: 'json'
88 });
89
90 const data = response.body;
91
92 if (!data.items || !Array.isArray(data.items)) {
93 const errorMsg = `Invalid response format - missing items array at offset ${state.currentOffset}`;
94 console.error(errorMsg);
95 throw new Error(errorMsg);
96 }
97
98 for (const item of data.items) {
99 if (maxResults > 0 && state.stats.success >= maxResults) {
100 hasMore = false;
101 break;
102 }
103 try {
104 const itemData = {
105 id: item.id,
106 title: item.title,
107 name: item.name,
108 username: item.username,
109 userFullName: item.userFullName,
110 userPictureUrl: item.userPictureUrl,
111 description: item.description,
112 pictureUrl: item.pictureUrl,
113 notice: item.notice,
114 actorReviewRating: item.actorReviewRating,
115 bookmarkCount: item.bookmarkCount,
116 url: `https://console.apify.com/actors/${item.id}`,
117 price: item.currentPricingInfo
118 ? `${item.currentPricingInfo.pricingModel || 'UNKNOWN'} - $${item.currentPricingInfo.pricePerUnitUsd || 0}`
119 : 'Free or Unknown',
120 totalUsers7Days: item.stats?.totalUsers7Days,
121 totalUsers30Days: item.stats?.totalUsers30Days,
122 stats: {
123 totalBuilds: item.stats?.totalBuilds,
124 totalRuns: item.stats?.totalRuns,
125 totalUsers: item.stats?.totalUsers,
126 totalUsers90Days: item.stats?.totalUsers90Days,
127 lastRunStartedAt: item.stats?.lastRunStartedAt,
128 publicActorRunStats30Days: item.stats?.publicActorRunStats30Days
129 },
130 currentPricingInfo: {
131 pricingModel: item.currentPricingInfo?.pricingModel,
132 pricePerUnitUsd: item.currentPricingInfo?.pricePerUnitUsd,
133 trialMinutes: item.currentPricingInfo?.trialMinutes,
134 startedAt: item.currentPricingInfo?.startedAt,
135 createdAt: item.currentPricingInfo?.createdAt,
136 apifyMarginPercentage: item.currentPricingInfo?.apifyMarginPercentage,
137 notifiedAboutChangeAt: item.currentPricingInfo?.notifiedAboutChangeAt,
138 notifiedAboutFutureChangeAt: item.currentPricingInfo?.notifiedAboutFutureChangeAt
139 }
140 };
141 await Actor.pushData(itemData);
142 state.stats.success++;
143 } catch (error) {
144 console.error(`Failed to process item ${item.id}:`, error.message);
145 state.stats.failures++;
146 }
147 }
148
149 state.currentOffset += data.items.length;
150 await state.save();
151 console.log(`Progress: ${state.stats.success} items fetched (${maxResults > 0 ? `${state.stats.success}/${maxResults}` : 'unlimited'})`);
152
153 if (data.items.length < currentLimit ||
154 (maxResults > 0 && state.stats.success >= maxResults)) {
155 hasMore = false;
156 console.log('Reached end condition');
157 }
158
159 if (input.batchDelay > 0 && hasMore) {
160 await delay(input.batchDelay);
161 }
162
163 } catch (error) {
164 state.stats.failures++;
165 await state.save();
166 console.error(`Failed to fetch batch at offset ${state.currentOffset}:`, error.message);
167 throw error;
168 }
169 }
170
171 await Actor.setValue(PERSISTENCE_KEY, null);
172 return {
173 stats: state.stats
174 };
175}

src/utils.js

1export function delay(ms) {
2 return new Promise(resolve => setTimeout(resolve, ms));
3}
4
5export function validateInput(input) {
6 if (input.limit && (input.limit < 1 || input.limit > 100)) {
7 throw new Error('Limit must be between 1 and 100');
8 }
9 if (input.batchDelay && input.batchDelay < 0) {
10 throw new Error('Batch delay must be a positive number');
11 }
12 if (input.maxResults && input.maxResults < 0) {
13 throw new Error('Max results must be a positive number');
14 }
15 if (input.minRating && (input.minRating < 0 || input.minRating > 5)) {
16 throw new Error('Minimum rating must be between 0 and 5');
17 }
18}

.dockerignore

# configurations
.idea
# crawlee and apify storage folders
apify_storage
crawlee_storage
storage
# installed files
node_modules
# git folder
.git

.editorconfig

root = true
[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.eslintrc

{
"extends": "@apify",
"root": true
}

.gitignore

# This file tells Git which files shouldn't be added to source control
.DS_Store
.idea
dist
node_modules
apify_storage
storage
# Added by Apify CLI
.venv

package.json

{
"name": "Apify-Scraper",
"version": "0.0.1",
"type": "module",
"description": "This is a Apify actor scraper",
"engines": {
"node": ">=18.0.0"
},
"dependencies": {
"apify": "^3.2.6",
"got": "^14.4.7"
},
"devDependencies": {
"@apify/eslint-config": "^0.4.0",
"eslint": "^8.50.0"
},
"scripts": {
"start": "node src/main.js",
"lint": "eslint ./src --ext .js,.jsx",
"lint:fix": "eslint ./src --ext .js,.jsx --fix",
"test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
},
"author": " DZ-ABDLHAKIM",
"license": "ISC"
}