Dataset Image Downloader & Uploader avatar

Dataset Image Downloader & Uploader

Try for free

No credit card required

Go to Store
Dataset Image Downloader & Uploader

Dataset Image Downloader & Uploader

lukaskrivka/images-download-upload
Try for free

No credit card required

Download image files from image URLs in your datasets and save them to a Zip file, Key-Value store, or directly your AWS S3 bucket.

You can access the Dataset Image Downloader & Uploader programmatically from your own applications by using the Apify API. You can choose the language preference from below. To use the Apify API, you’ll need an Apify account and your API token, found in Integrations settings in Apify Console.

1{
2  "openapi": "3.0.1",
3  "info": {
4    "version": "0.2",
5    "x-build-id": "nzLWxl6FB5RdkcQhI"
6  },
7  "servers": [
8    {
9      "url": "https://api.apify.com/v2"
10    }
11  ],
12  "paths": {
13    "/acts/lukaskrivka~images-download-upload/run-sync-get-dataset-items": {
14      "post": {
15        "operationId": "run-sync-get-dataset-items-lukaskrivka-images-download-upload",
16        "x-openai-isConsequential": false,
17        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
18        "tags": [
19          "Run Actor"
20        ],
21        "requestBody": {
22          "required": true,
23          "content": {
24            "application/json": {
25              "schema": {
26                "$ref": "#/components/schemas/inputSchema"
27              }
28            }
29          }
30        },
31        "parameters": [
32          {
33            "name": "token",
34            "in": "query",
35            "required": true,
36            "schema": {
37              "type": "string"
38            },
39            "description": "Enter your Apify token here"
40          }
41        ],
42        "responses": {
43          "200": {
44            "description": "OK"
45          }
46        }
47      }
48    },
49    "/acts/lukaskrivka~images-download-upload/runs": {
50      "post": {
51        "operationId": "runs-sync-lukaskrivka-images-download-upload",
52        "x-openai-isConsequential": false,
53        "summary": "Executes an Actor and returns information about the initiated run in response.",
54        "tags": [
55          "Run Actor"
56        ],
57        "requestBody": {
58          "required": true,
59          "content": {
60            "application/json": {
61              "schema": {
62                "$ref": "#/components/schemas/inputSchema"
63              }
64            }
65          }
66        },
67        "parameters": [
68          {
69            "name": "token",
70            "in": "query",
71            "required": true,
72            "schema": {
73              "type": "string"
74            },
75            "description": "Enter your Apify token here"
76          }
77        ],
78        "responses": {
79          "200": {
80            "description": "OK",
81            "content": {
82              "application/json": {
83                "schema": {
84                  "$ref": "#/components/schemas/runsResponseSchema"
85                }
86              }
87            }
88          }
89        }
90      }
91    },
92    "/acts/lukaskrivka~images-download-upload/run-sync": {
93      "post": {
94        "operationId": "run-sync-lukaskrivka-images-download-upload",
95        "x-openai-isConsequential": false,
96        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
97        "tags": [
98          "Run Actor"
99        ],
100        "requestBody": {
101          "required": true,
102          "content": {
103            "application/json": {
104              "schema": {
105                "$ref": "#/components/schemas/inputSchema"
106              }
107            }
108          }
109        },
110        "parameters": [
111          {
112            "name": "token",
113            "in": "query",
114            "required": true,
115            "schema": {
116              "type": "string"
117            },
118            "description": "Enter your Apify token here"
119          }
120        ],
121        "responses": {
122          "200": {
123            "description": "OK"
124          }
125        }
126      }
127    }
128  },
129  "components": {
130    "schemas": {
131      "inputSchema": {
132        "type": "object",
133        "properties": {
134          "datasetId": {
135            "title": "Dataset Id",
136            "type": "string",
137            "description": "Id of the dataset where the data are located. Image URLs will be extracted from there."
138          },
139          "pathToImageUrls": {
140            "title": "Path to image URLs",
141            "type": "string",
142            "description": "Path from item object to an array or string where the URL(s) is/are located. Provide in \"javascript style\", e.g. \"details[0].images\n"
143          },
144          "fileNameFunction": {
145            "title": "Filename function",
146            "type": "string",
147            "description": "Function that specifies how will be image filename created from its URL. If you keep this empty, it will be md5 hash of the URL."
148          },
149          "limit": {
150            "title": "Limit",
151            "minimum": 0,
152            "type": "integer",
153            "description": "Max items to load from the dataset. Use with `offset` to paginate over the data (can reduce memory requirement of large loads)."
154          },
155          "offset": {
156            "title": "Offset",
157            "minimum": 0,
158            "type": "integer",
159            "description": "How many items to skip from the dataset. Use with `limit` to paginate over the data (can reduce memory requirement of large loads)"
160          },
161          "outputTo": {
162            "title": "Output to",
163            "enum": [
164              "no-output",
165              "key-value-store",
166              "dataset"
167            ],
168            "type": "string",
169            "description": "Where to save the data from input after possibly transforming them during the download process."
170          },
171          "outputDatasetId": {
172            "title": "Output dataset Name or ID",
173            "type": "string",
174            "description": "Name or ID of the dataset where the data will be saved. Only relevant if you want to output to dataset!"
175          },
176          "storeInput": {
177            "title": "Key Value store input",
178            "type": "string",
179            "description": "If you want to input the data from key-value store instead of dataset. Notation: `storeId-recordKey`, e.g. - `kWdGzuXuKfYkrntWw-OUTPUT`"
180          },
181          "uploadTo": {
182            "title": "Upload to",
183            "enum": [
184              "zip-file",
185              "key-value-store",
186              "s3",
187              "no-upload"
188            ],
189            "type": "string",
190            "description": "Where do you want to upload the image files"
191          },
192          "uploadStoreName": {
193            "title": "Key-value store name",
194            "type": "string",
195            "description": "Key-value store name where the images will be upload. Empty field means it will be uploaded to the default key-value store"
196          },
197          "s3Bucket": {
198            "title": "S3 Bucket",
199            "type": "string",
200            "description": "Only relevant if you want to upload to S3! Name of the bucket where to upload."
201          },
202          "s3AccessKeyId": {
203            "title": "S3 Access key id",
204            "type": "string",
205            "description": "Only relevant if you want to upload to S3! You can create these credentials for IAM user."
206          },
207          "s3SecretAccessKey": {
208            "title": "S3 Secret access key",
209            "type": "string",
210            "description": "Only relevant if you want to upload to S3! You can create these credentials for IAM user."
211          },
212          "s3CheckIfAlreadyThere": {
213            "title": "Check if key is already on S3",
214            "type": "boolean",
215            "description": "This option is useful if you don't want to rewrite the same image. GET requests are also cheaper than PUT requests"
216          },
217          "preDownloadFunction": {
218            "title": "Pre-download function",
219            "type": "string",
220            "description": "Function that specifies how will be the data transformed before downloading the image. The input and output of the function is the whole data array. You can skip downloading images of any item if you add skipItem: true field to it."
221          },
222          "postDownloadFunction": {
223            "title": "Post-download function",
224            "type": "string",
225            "description": "Function that specifies how will be the data transformed before downloading the image. The input and output of the function is the whole data array. By default it adds either the file URL or errors array depending if the download was successfull."
226          },
227          "imageCheckMaxRetries": {
228            "title": "Max retries",
229            "minimum": 1,
230            "type": "integer",
231            "description": "How many times should actor retry if the file it tries to download fails to pass the tests. Setting this too high can lead to unecessary loops.",
232            "default": 6
233          },
234          "imageCheckType": {
235            "title": "Image check type",
236            "enum": [
237              "none",
238              "content-type",
239              "image-size"
240            ],
241            "type": "string",
242            "description": "Type of the image check. If the image will not pass, the download will be retied with proxy and if that doesn't pass, the image is not uploaded."
243          },
244          "imageCheckMinSize": {
245            "title": "Min size in KB",
246            "minimum": 1,
247            "type": "integer",
248            "description": "Minimum size of the image to pass the image check test"
249          },
250          "imageCheckMinWidth": {
251            "title": "Min width",
252            "minimum": 1,
253            "type": "integer",
254            "description": "Minimim width of the image in pixels to pass the image check. Works only if the image check type is 'jimp'."
255          },
256          "imageCheckMinHeight": {
257            "title": "Min height",
258            "minimum": 1,
259            "type": "integer",
260            "description": "Minimim height of the image in pixels to pass the image check. Works only if the image check type is 'jimp'."
261          },
262          "proxyConfiguration": {
263            "title": "Proxy configuration",
264            "type": "object",
265            "description": "Select proxies to be used."
266          },
267          "maxConcurrency": {
268            "title": "Max concurrency",
269            "type": "integer",
270            "description": "You can specify how many maximum parallel downloading/uploading requests will be running. Keep in mind that the limit is here to not overload the host server.",
271            "default": 40
272          },
273          "downloadTimeout": {
274            "title": "Download timeout in ms",
275            "minimum": 1000,
276            "type": "integer",
277            "description": "How long we will wait to download each image",
278            "default": 15000
279          },
280          "batchSize": {
281            "title": "Batch Size",
282            "minimum": 1,
283            "type": "integer",
284            "description": "Number of items loaded from dataset in one batch.",
285            "default": 10000
286          },
287          "convertWebpToPng": {
288            "title": "Convert webp to png",
289            "type": "boolean",
290            "description": "If checked, the actor will automatically convert all webp type images to standard png. This increases the size of the image."
291          },
292          "stateFields": {
293            "title": "State fields",
294            "type": "array",
295            "description": "You can specify fields that you want in your state to make it more readable and use less memory. By default it uses all."
296          },
297          "noDownloadRun": {
298            "title": "Run without download",
299            "type": "boolean",
300            "description": "If checked, the actor will not download and upload the images. Usefull for checking duplicates or transformations."
301          }
302        }
303      },
304      "runsResponseSchema": {
305        "type": "object",
306        "properties": {
307          "data": {
308            "type": "object",
309            "properties": {
310              "id": {
311                "type": "string"
312              },
313              "actId": {
314                "type": "string"
315              },
316              "userId": {
317                "type": "string"
318              },
319              "startedAt": {
320                "type": "string",
321                "format": "date-time",
322                "example": "2025-01-08T00:00:00.000Z"
323              },
324              "finishedAt": {
325                "type": "string",
326                "format": "date-time",
327                "example": "2025-01-08T00:00:00.000Z"
328              },
329              "status": {
330                "type": "string",
331                "example": "READY"
332              },
333              "meta": {
334                "type": "object",
335                "properties": {
336                  "origin": {
337                    "type": "string",
338                    "example": "API"
339                  },
340                  "userAgent": {
341                    "type": "string"
342                  }
343                }
344              },
345              "stats": {
346                "type": "object",
347                "properties": {
348                  "inputBodyLen": {
349                    "type": "integer",
350                    "example": 2000
351                  },
352                  "rebootCount": {
353                    "type": "integer",
354                    "example": 0
355                  },
356                  "restartCount": {
357                    "type": "integer",
358                    "example": 0
359                  },
360                  "resurrectCount": {
361                    "type": "integer",
362                    "example": 0
363                  },
364                  "computeUnits": {
365                    "type": "integer",
366                    "example": 0
367                  }
368                }
369              },
370              "options": {
371                "type": "object",
372                "properties": {
373                  "build": {
374                    "type": "string",
375                    "example": "latest"
376                  },
377                  "timeoutSecs": {
378                    "type": "integer",
379                    "example": 300
380                  },
381                  "memoryMbytes": {
382                    "type": "integer",
383                    "example": 1024
384                  },
385                  "diskMbytes": {
386                    "type": "integer",
387                    "example": 2048
388                  }
389                }
390              },
391              "buildId": {
392                "type": "string"
393              },
394              "defaultKeyValueStoreId": {
395                "type": "string"
396              },
397              "defaultDatasetId": {
398                "type": "string"
399              },
400              "defaultRequestQueueId": {
401                "type": "string"
402              },
403              "buildNumber": {
404                "type": "string",
405                "example": "1.0.0"
406              },
407              "containerUrl": {
408                "type": "string"
409              },
410              "usage": {
411                "type": "object",
412                "properties": {
413                  "ACTOR_COMPUTE_UNITS": {
414                    "type": "integer",
415                    "example": 0
416                  },
417                  "DATASET_READS": {
418                    "type": "integer",
419                    "example": 0
420                  },
421                  "DATASET_WRITES": {
422                    "type": "integer",
423                    "example": 0
424                  },
425                  "KEY_VALUE_STORE_READS": {
426                    "type": "integer",
427                    "example": 0
428                  },
429                  "KEY_VALUE_STORE_WRITES": {
430                    "type": "integer",
431                    "example": 1
432                  },
433                  "KEY_VALUE_STORE_LISTS": {
434                    "type": "integer",
435                    "example": 0
436                  },
437                  "REQUEST_QUEUE_READS": {
438                    "type": "integer",
439                    "example": 0
440                  },
441                  "REQUEST_QUEUE_WRITES": {
442                    "type": "integer",
443                    "example": 0
444                  },
445                  "DATA_TRANSFER_INTERNAL_GBYTES": {
446                    "type": "integer",
447                    "example": 0
448                  },
449                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
450                    "type": "integer",
451                    "example": 0
452                  },
453                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
454                    "type": "integer",
455                    "example": 0
456                  },
457                  "PROXY_SERPS": {
458                    "type": "integer",
459                    "example": 0
460                  }
461                }
462              },
463              "usageTotalUsd": {
464                "type": "number",
465                "example": 0.00005
466              },
467              "usageUsd": {
468                "type": "object",
469                "properties": {
470                  "ACTOR_COMPUTE_UNITS": {
471                    "type": "integer",
472                    "example": 0
473                  },
474                  "DATASET_READS": {
475                    "type": "integer",
476                    "example": 0
477                  },
478                  "DATASET_WRITES": {
479                    "type": "integer",
480                    "example": 0
481                  },
482                  "KEY_VALUE_STORE_READS": {
483                    "type": "integer",
484                    "example": 0
485                  },
486                  "KEY_VALUE_STORE_WRITES": {
487                    "type": "number",
488                    "example": 0.00005
489                  },
490                  "KEY_VALUE_STORE_LISTS": {
491                    "type": "integer",
492                    "example": 0
493                  },
494                  "REQUEST_QUEUE_READS": {
495                    "type": "integer",
496                    "example": 0
497                  },
498                  "REQUEST_QUEUE_WRITES": {
499                    "type": "integer",
500                    "example": 0
501                  },
502                  "DATA_TRANSFER_INTERNAL_GBYTES": {
503                    "type": "integer",
504                    "example": 0
505                  },
506                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
507                    "type": "integer",
508                    "example": 0
509                  },
510                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
511                    "type": "integer",
512                    "example": 0
513                  },
514                  "PROXY_SERPS": {
515                    "type": "integer",
516                    "example": 0
517                  }
518                }
519              }
520            }
521          }
522        }
523      }
524    }
525  }
526}

Dataset Image Downloader & Uploader OpenAPI definition

OpenAPI is a standard for designing and describing RESTful APIs, allowing developers to define API structure, endpoints, and data formats in a machine-readable way. It simplifies API development, integration, and documentation.

OpenAPI is effective when used with AI agents and GPTs by standardizing how these systems interact with various APIs, for reliable integrations and efficient communication.

By defining machine-readable API specifications, OpenAPI allows AI models like GPTs to understand and use varied data sources, improving accuracy. This accelerates development, reduces errors, and provides context-aware responses, making OpenAPI a core component for AI applications.

You can download the OpenAPI definitions for Dataset Image Downloader & Uploader from the options below:

If you’d like to learn more about how OpenAPI powers GPTs, read our blog post.

You can also check out our other API clients:

Developer
Maintained by Community

Actor Metrics

  • 68 monthly users

  • 15 bookmarks

  • >99% runs succeeded

  • Created in Nov 2018

  • Modified 22 days ago