Rust Scraper avatar

Rust Scraper

Try for free

No credit card required

Go to Store
Rust Scraper

Rust Scraper

lukaskrivka/rust-scraper
Try for free

No credit card required

Speed of light scraping with Rust programming language! This is an early alpha version for experimenting, use at your own risk!

Developer
Maintained by Community

Actor Metrics

  • 1 Monthly user

  • No reviews yet

  • 3 bookmarks

  • >99% runs succeeded

  • Created in Feb 2019

  • Modified 4 years ago

You can access the Rust Scraper programmatically from your own applications by using the Apify API. You can also choose the language preference from below. To use the Apify API, you’ll need an Apify account and your API token, found in Integrations settings in Apify Console.

1{
2  "openapi": "3.0.1",
3  "info": {
4    "version": "0.0",
5    "x-build-id": "aj51YAG2AaKYIXepf"
6  },
7  "servers": [
8    {
9      "url": "https://api.apify.com/v2"
10    }
11  ],
12  "paths": {
13    "/acts/lukaskrivka~rust-scraper/run-sync-get-dataset-items": {
14      "post": {
15        "operationId": "run-sync-get-dataset-items-lukaskrivka-rust-scraper",
16        "x-openai-isConsequential": false,
17        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
18        "tags": [
19          "Run Actor"
20        ],
21        "requestBody": {
22          "required": true,
23          "content": {
24            "application/json": {
25              "schema": {
26                "$ref": "#/components/schemas/inputSchema"
27              }
28            }
29          }
30        },
31        "parameters": [
32          {
33            "name": "token",
34            "in": "query",
35            "required": true,
36            "schema": {
37              "type": "string"
38            },
39            "description": "Enter your Apify token here"
40          }
41        ],
42        "responses": {
43          "200": {
44            "description": "OK"
45          }
46        }
47      }
48    },
49    "/acts/lukaskrivka~rust-scraper/runs": {
50      "post": {
51        "operationId": "runs-sync-lukaskrivka-rust-scraper",
52        "x-openai-isConsequential": false,
53        "summary": "Executes an Actor and returns information about the initiated run in response.",
54        "tags": [
55          "Run Actor"
56        ],
57        "requestBody": {
58          "required": true,
59          "content": {
60            "application/json": {
61              "schema": {
62                "$ref": "#/components/schemas/inputSchema"
63              }
64            }
65          }
66        },
67        "parameters": [
68          {
69            "name": "token",
70            "in": "query",
71            "required": true,
72            "schema": {
73              "type": "string"
74            },
75            "description": "Enter your Apify token here"
76          }
77        ],
78        "responses": {
79          "200": {
80            "description": "OK",
81            "content": {
82              "application/json": {
83                "schema": {
84                  "$ref": "#/components/schemas/runsResponseSchema"
85                }
86              }
87            }
88          }
89        }
90      }
91    },
92    "/acts/lukaskrivka~rust-scraper/run-sync": {
93      "post": {
94        "operationId": "run-sync-lukaskrivka-rust-scraper",
95        "x-openai-isConsequential": false,
96        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
97        "tags": [
98          "Run Actor"
99        ],
100        "requestBody": {
101          "required": true,
102          "content": {
103            "application/json": {
104              "schema": {
105                "$ref": "#/components/schemas/inputSchema"
106              }
107            }
108          }
109        },
110        "parameters": [
111          {
112            "name": "token",
113            "in": "query",
114            "required": true,
115            "schema": {
116              "type": "string"
117            },
118            "description": "Enter your Apify token here"
119          }
120        ],
121        "responses": {
122          "200": {
123            "description": "OK"
124          }
125        }
126      }
127    }
128  },
129  "components": {
130    "schemas": {
131      "inputSchema": {
132        "type": "object",
133        "required": [
134          "urls",
135          "extract"
136        ],
137        "properties": {
138          "urls": {
139            "title": "Start URLs",
140            "type": "array",
141            "description": "URLs that will be scraped. Must be an array of objects with \"url\" property.",
142            "items": {
143              "type": "object",
144              "required": [
145                "url"
146              ],
147              "properties": {
148                "url": {
149                  "type": "string",
150                  "title": "URL of a web page",
151                  "format": "uri"
152                }
153              }
154            }
155          },
156          "extract": {
157            "title": "Extraction config",
158            "type": "array",
159            "description": "Array that defines what and how should be scraped from a page HTML. See readme for more info."
160          },
161          "proxy_settings": {
162            "title": "Proxy configuration",
163            "type": "object",
164            "description": "Select proxies to be used by your crawler. For most use cases we recommend the default Apify automatic proxy."
165          },
166          "max_concurrency": {
167            "title": "Max concurrency",
168            "minimum": 1,
169            "type": "integer",
170            "description": "Sets the maximum concurrency (parallelism) for the crawl. Keep this is reasonable level because this scraper can go really fast.",
171            "default": 50
172          },
173          "max_request_retries": {
174            "title": "Max request retries",
175            "minimum": 1,
176            "type": "integer",
177            "description": "Sets the maximum number of retries for each request(URL).",
178            "default": 3
179          },
180          "debug_log": {
181            "title": "Debug log",
182            "type": "boolean",
183            "description": "Shows when each URL starts and ends scraping with timings. Don't use for larger runs as the log gets filled quickly.",
184            "default": false
185          },
186          "push_data_size": {
187            "title": "Push data buffer size",
188            "type": "integer",
189            "description": "Buffers results into vector (array) before pushing to a dataset. This prevents overwhelming Apify API. The default number is usually a good choice.",
190            "default": 500
191          },
192          "force_cloud": {
193            "title": "Force cloud",
194            "type": "boolean",
195            "description": "This allows local runs to use cloud storage, mainly for testing. On Apify platform this has no effect.",
196            "default": false
197          }
198        }
199      },
200      "runsResponseSchema": {
201        "type": "object",
202        "properties": {
203          "data": {
204            "type": "object",
205            "properties": {
206              "id": {
207                "type": "string"
208              },
209              "actId": {
210                "type": "string"
211              },
212              "userId": {
213                "type": "string"
214              },
215              "startedAt": {
216                "type": "string",
217                "format": "date-time",
218                "example": "2025-01-08T00:00:00.000Z"
219              },
220              "finishedAt": {
221                "type": "string",
222                "format": "date-time",
223                "example": "2025-01-08T00:00:00.000Z"
224              },
225              "status": {
226                "type": "string",
227                "example": "READY"
228              },
229              "meta": {
230                "type": "object",
231                "properties": {
232                  "origin": {
233                    "type": "string",
234                    "example": "API"
235                  },
236                  "userAgent": {
237                    "type": "string"
238                  }
239                }
240              },
241              "stats": {
242                "type": "object",
243                "properties": {
244                  "inputBodyLen": {
245                    "type": "integer",
246                    "example": 2000
247                  },
248                  "rebootCount": {
249                    "type": "integer",
250                    "example": 0
251                  },
252                  "restartCount": {
253                    "type": "integer",
254                    "example": 0
255                  },
256                  "resurrectCount": {
257                    "type": "integer",
258                    "example": 0
259                  },
260                  "computeUnits": {
261                    "type": "integer",
262                    "example": 0
263                  }
264                }
265              },
266              "options": {
267                "type": "object",
268                "properties": {
269                  "build": {
270                    "type": "string",
271                    "example": "latest"
272                  },
273                  "timeoutSecs": {
274                    "type": "integer",
275                    "example": 300
276                  },
277                  "memoryMbytes": {
278                    "type": "integer",
279                    "example": 1024
280                  },
281                  "diskMbytes": {
282                    "type": "integer",
283                    "example": 2048
284                  }
285                }
286              },
287              "buildId": {
288                "type": "string"
289              },
290              "defaultKeyValueStoreId": {
291                "type": "string"
292              },
293              "defaultDatasetId": {
294                "type": "string"
295              },
296              "defaultRequestQueueId": {
297                "type": "string"
298              },
299              "buildNumber": {
300                "type": "string",
301                "example": "1.0.0"
302              },
303              "containerUrl": {
304                "type": "string"
305              },
306              "usage": {
307                "type": "object",
308                "properties": {
309                  "ACTOR_COMPUTE_UNITS": {
310                    "type": "integer",
311                    "example": 0
312                  },
313                  "DATASET_READS": {
314                    "type": "integer",
315                    "example": 0
316                  },
317                  "DATASET_WRITES": {
318                    "type": "integer",
319                    "example": 0
320                  },
321                  "KEY_VALUE_STORE_READS": {
322                    "type": "integer",
323                    "example": 0
324                  },
325                  "KEY_VALUE_STORE_WRITES": {
326                    "type": "integer",
327                    "example": 1
328                  },
329                  "KEY_VALUE_STORE_LISTS": {
330                    "type": "integer",
331                    "example": 0
332                  },
333                  "REQUEST_QUEUE_READS": {
334                    "type": "integer",
335                    "example": 0
336                  },
337                  "REQUEST_QUEUE_WRITES": {
338                    "type": "integer",
339                    "example": 0
340                  },
341                  "DATA_TRANSFER_INTERNAL_GBYTES": {
342                    "type": "integer",
343                    "example": 0
344                  },
345                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
346                    "type": "integer",
347                    "example": 0
348                  },
349                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
350                    "type": "integer",
351                    "example": 0
352                  },
353                  "PROXY_SERPS": {
354                    "type": "integer",
355                    "example": 0
356                  }
357                }
358              },
359              "usageTotalUsd": {
360                "type": "number",
361                "example": 0.00005
362              },
363              "usageUsd": {
364                "type": "object",
365                "properties": {
366                  "ACTOR_COMPUTE_UNITS": {
367                    "type": "integer",
368                    "example": 0
369                  },
370                  "DATASET_READS": {
371                    "type": "integer",
372                    "example": 0
373                  },
374                  "DATASET_WRITES": {
375                    "type": "integer",
376                    "example": 0
377                  },
378                  "KEY_VALUE_STORE_READS": {
379                    "type": "integer",
380                    "example": 0
381                  },
382                  "KEY_VALUE_STORE_WRITES": {
383                    "type": "number",
384                    "example": 0.00005
385                  },
386                  "KEY_VALUE_STORE_LISTS": {
387                    "type": "integer",
388                    "example": 0
389                  },
390                  "REQUEST_QUEUE_READS": {
391                    "type": "integer",
392                    "example": 0
393                  },
394                  "REQUEST_QUEUE_WRITES": {
395                    "type": "integer",
396                    "example": 0
397                  },
398                  "DATA_TRANSFER_INTERNAL_GBYTES": {
399                    "type": "integer",
400                    "example": 0
401                  },
402                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
403                    "type": "integer",
404                    "example": 0
405                  },
406                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
407                    "type": "integer",
408                    "example": 0
409                  },
410                  "PROXY_SERPS": {
411                    "type": "integer",
412                    "example": 0
413                  }
414                }
415              }
416            }
417          }
418        }
419      }
420    }
421  }
422}

Rust Scraper OpenAPI definition

OpenAPI is a standard for designing and describing RESTful APIs, allowing developers to define API structure, endpoints, and data formats in a machine-readable way. It simplifies API development, integration, and documentation.

OpenAPI is effective when used with AI agents and GPTs by standardizing how these systems interact with various APIs, for reliable integrations and efficient communication.

By defining machine-readable API specifications, OpenAPI allows AI models like GPTs to understand and use varied data sources, improving accuracy. This accelerates development, reduces errors, and provides context-aware responses, making OpenAPI a core component for AI applications.

You can download the OpenAPI definitions for Rust Scraper from the options below:

If you’d like to learn more about how OpenAPI powers GPTs, read our blog post.

You can also check out our other API clients: