
Fast Scraper
$5.00/month

Fast Scraper
$5.00/month
Fast Scraper is a blazingly fast web scraper powered by Rust on the backend. It allows you to scrape static HTML pages extremely quickly while using only <128 MB of memory. With this scraper, you can maximize the efficiency of your credits on Apify.
Actor Metrics
1 monthly user
No reviews yet
1 bookmark
>99% runs succeeded
Created in Jun 2024
Modified 9 months ago
You can access the Fast Scraper programmatically from your own applications by using the Apify API. You can also choose the language preference from below. To use the Apify API, you’ll need an Apify account and your API token, found in Integrations settings in Apify Console.
1{
2 "openapi": "3.0.1",
3 "info": {
4 "version": "1.0",
5 "x-build-id": "FGaLrA0sVMW8tC1uk"
6 },
7 "servers": [
8 {
9 "url": "https://api.apify.com/v2"
10 }
11 ],
12 "paths": {
13 "/acts/danielherman~fast-scraper/run-sync-get-dataset-items": {
14 "post": {
15 "operationId": "run-sync-get-dataset-items-danielherman-fast-scraper",
16 "x-openai-isConsequential": false,
17 "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
18 "tags": [
19 "Run Actor"
20 ],
21 "requestBody": {
22 "required": true,
23 "content": {
24 "application/json": {
25 "schema": {
26 "$ref": "#/components/schemas/inputSchema"
27 }
28 }
29 }
30 },
31 "parameters": [
32 {
33 "name": "token",
34 "in": "query",
35 "required": true,
36 "schema": {
37 "type": "string"
38 },
39 "description": "Enter your Apify token here"
40 }
41 ],
42 "responses": {
43 "200": {
44 "description": "OK"
45 }
46 }
47 }
48 },
49 "/acts/danielherman~fast-scraper/runs": {
50 "post": {
51 "operationId": "runs-sync-danielherman-fast-scraper",
52 "x-openai-isConsequential": false,
53 "summary": "Executes an Actor and returns information about the initiated run in response.",
54 "tags": [
55 "Run Actor"
56 ],
57 "requestBody": {
58 "required": true,
59 "content": {
60 "application/json": {
61 "schema": {
62 "$ref": "#/components/schemas/inputSchema"
63 }
64 }
65 }
66 },
67 "parameters": [
68 {
69 "name": "token",
70 "in": "query",
71 "required": true,
72 "schema": {
73 "type": "string"
74 },
75 "description": "Enter your Apify token here"
76 }
77 ],
78 "responses": {
79 "200": {
80 "description": "OK",
81 "content": {
82 "application/json": {
83 "schema": {
84 "$ref": "#/components/schemas/runsResponseSchema"
85 }
86 }
87 }
88 }
89 }
90 }
91 },
92 "/acts/danielherman~fast-scraper/run-sync": {
93 "post": {
94 "operationId": "run-sync-danielherman-fast-scraper",
95 "x-openai-isConsequential": false,
96 "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
97 "tags": [
98 "Run Actor"
99 ],
100 "requestBody": {
101 "required": true,
102 "content": {
103 "application/json": {
104 "schema": {
105 "$ref": "#/components/schemas/inputSchema"
106 }
107 }
108 }
109 },
110 "parameters": [
111 {
112 "name": "token",
113 "in": "query",
114 "required": true,
115 "schema": {
116 "type": "string"
117 },
118 "description": "Enter your Apify token here"
119 }
120 ],
121 "responses": {
122 "200": {
123 "description": "OK"
124 }
125 }
126 }
127 }
128 },
129 "components": {
130 "schemas": {
131 "inputSchema": {
132 "type": "object",
133 "required": [
134 "requests"
135 ],
136 "properties": {
137 "requests": {
138 "title": "Requests for scraping",
139 "type": "array",
140 "description": "Requests with given fields"
141 },
142 "proxy_settings": {
143 "title": "Proxy configuration",
144 "type": "object",
145 "description": "Select proxies to be used by your crawler. For most use cases we recommend the default Apify automatic proxy."
146 },
147 "headers": {
148 "title": "Global Headers",
149 "type": "object",
150 "description": "Global headers to be used for all requests."
151 },
152 "user_agent": {
153 "title": "Global User Agent",
154 "type": "string",
155 "description": "Global user agent used for all requests.",
156 "default": "ApifyFastScraper/1.0"
157 },
158 "force_cloud": {
159 "title": "Force Cloud",
160 "type": "boolean",
161 "description": "Allows local runs to use cloud storage, mainly for testing. On Apify platform, this has no effect.",
162 "default": false
163 },
164 "debug_log": {
165 "title": "Debug Log",
166 "type": "boolean",
167 "description": "Shows when each URL starts and ends scraping, with timings. Not recommended for larger runs as the log can get filled quickly.",
168 "default": false
169 },
170 "push_data_size": {
171 "title": "Push Data Buffer Size",
172 "type": "integer",
173 "description": "Buffers results into an array before pushing to a dataset to prevent overwhelming the Apify API. The default number is usually a good choice.",
174 "default": 50
175 },
176 "max_concurrency": {
177 "title": "Max Concurrency",
178 "minimum": 1,
179 "type": "integer",
180 "description": "Sets the maximum concurrency (parallelism) for the crawl. Keep this to a reasonable level because this scraper can go really fast.",
181 "default": 10
182 },
183 "max_request_retries": {
184 "title": "Max Request Retries",
185 "minimum": 1,
186 "type": "integer",
187 "description": "Sets the maximum number of retries for each request (URL).",
188 "default": 3
189 },
190 "max_request_retry_timeout_ms": {
191 "title": "Max Request Retry Timeout (ms)",
192 "minimum": 1,
193 "type": "integer",
194 "description": "Sets the maximum timeout in milliseconds to retry a failed request.",
195 "default": 10000
196 },
197 "request_retry_wait_ms": {
198 "title": "Request Retry Wait (ms)",
199 "minimum": 1,
200 "type": "integer",
201 "description": "Sets the wait time in milliseconds before retrying a failed request.",
202 "default": 5000
203 }
204 }
205 },
206 "runsResponseSchema": {
207 "type": "object",
208 "properties": {
209 "data": {
210 "type": "object",
211 "properties": {
212 "id": {
213 "type": "string"
214 },
215 "actId": {
216 "type": "string"
217 },
218 "userId": {
219 "type": "string"
220 },
221 "startedAt": {
222 "type": "string",
223 "format": "date-time",
224 "example": "2025-01-08T00:00:00.000Z"
225 },
226 "finishedAt": {
227 "type": "string",
228 "format": "date-time",
229 "example": "2025-01-08T00:00:00.000Z"
230 },
231 "status": {
232 "type": "string",
233 "example": "READY"
234 },
235 "meta": {
236 "type": "object",
237 "properties": {
238 "origin": {
239 "type": "string",
240 "example": "API"
241 },
242 "userAgent": {
243 "type": "string"
244 }
245 }
246 },
247 "stats": {
248 "type": "object",
249 "properties": {
250 "inputBodyLen": {
251 "type": "integer",
252 "example": 2000
253 },
254 "rebootCount": {
255 "type": "integer",
256 "example": 0
257 },
258 "restartCount": {
259 "type": "integer",
260 "example": 0
261 },
262 "resurrectCount": {
263 "type": "integer",
264 "example": 0
265 },
266 "computeUnits": {
267 "type": "integer",
268 "example": 0
269 }
270 }
271 },
272 "options": {
273 "type": "object",
274 "properties": {
275 "build": {
276 "type": "string",
277 "example": "latest"
278 },
279 "timeoutSecs": {
280 "type": "integer",
281 "example": 300
282 },
283 "memoryMbytes": {
284 "type": "integer",
285 "example": 1024
286 },
287 "diskMbytes": {
288 "type": "integer",
289 "example": 2048
290 }
291 }
292 },
293 "buildId": {
294 "type": "string"
295 },
296 "defaultKeyValueStoreId": {
297 "type": "string"
298 },
299 "defaultDatasetId": {
300 "type": "string"
301 },
302 "defaultRequestQueueId": {
303 "type": "string"
304 },
305 "buildNumber": {
306 "type": "string",
307 "example": "1.0.0"
308 },
309 "containerUrl": {
310 "type": "string"
311 },
312 "usage": {
313 "type": "object",
314 "properties": {
315 "ACTOR_COMPUTE_UNITS": {
316 "type": "integer",
317 "example": 0
318 },
319 "DATASET_READS": {
320 "type": "integer",
321 "example": 0
322 },
323 "DATASET_WRITES": {
324 "type": "integer",
325 "example": 0
326 },
327 "KEY_VALUE_STORE_READS": {
328 "type": "integer",
329 "example": 0
330 },
331 "KEY_VALUE_STORE_WRITES": {
332 "type": "integer",
333 "example": 1
334 },
335 "KEY_VALUE_STORE_LISTS": {
336 "type": "integer",
337 "example": 0
338 },
339 "REQUEST_QUEUE_READS": {
340 "type": "integer",
341 "example": 0
342 },
343 "REQUEST_QUEUE_WRITES": {
344 "type": "integer",
345 "example": 0
346 },
347 "DATA_TRANSFER_INTERNAL_GBYTES": {
348 "type": "integer",
349 "example": 0
350 },
351 "DATA_TRANSFER_EXTERNAL_GBYTES": {
352 "type": "integer",
353 "example": 0
354 },
355 "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
356 "type": "integer",
357 "example": 0
358 },
359 "PROXY_SERPS": {
360 "type": "integer",
361 "example": 0
362 }
363 }
364 },
365 "usageTotalUsd": {
366 "type": "number",
367 "example": 0.00005
368 },
369 "usageUsd": {
370 "type": "object",
371 "properties": {
372 "ACTOR_COMPUTE_UNITS": {
373 "type": "integer",
374 "example": 0
375 },
376 "DATASET_READS": {
377 "type": "integer",
378 "example": 0
379 },
380 "DATASET_WRITES": {
381 "type": "integer",
382 "example": 0
383 },
384 "KEY_VALUE_STORE_READS": {
385 "type": "integer",
386 "example": 0
387 },
388 "KEY_VALUE_STORE_WRITES": {
389 "type": "number",
390 "example": 0.00005
391 },
392 "KEY_VALUE_STORE_LISTS": {
393 "type": "integer",
394 "example": 0
395 },
396 "REQUEST_QUEUE_READS": {
397 "type": "integer",
398 "example": 0
399 },
400 "REQUEST_QUEUE_WRITES": {
401 "type": "integer",
402 "example": 0
403 },
404 "DATA_TRANSFER_INTERNAL_GBYTES": {
405 "type": "integer",
406 "example": 0
407 },
408 "DATA_TRANSFER_EXTERNAL_GBYTES": {
409 "type": "integer",
410 "example": 0
411 },
412 "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
413 "type": "integer",
414 "example": 0
415 },
416 "PROXY_SERPS": {
417 "type": "integer",
418 "example": 0
419 }
420 }
421 }
422 }
423 }
424 }
425 }
426 }
427 }
428}
Fast Scraper OpenAPI definition
OpenAPI is a standard for designing and describing RESTful APIs, allowing developers to define API structure, endpoints, and data formats in a machine-readable way. It simplifies API development, integration, and documentation.
OpenAPI is effective when used with AI agents and GPTs by standardizing how these systems interact with various APIs, for reliable integrations and efficient communication.
By defining machine-readable API specifications, OpenAPI allows AI models like GPTs to understand and use varied data sources, improving accuracy. This accelerates development, reduces errors, and provides context-aware responses, making OpenAPI a core component for AI applications.
You can download the OpenAPI definitions for Fast Scraper from the options below:
If you’d like to learn more about how OpenAPI powers GPTs, read our blog post.
You can also check out our other API clients: