Download Images From Dataset avatar
Download Images From Dataset

Deprecated

Pricing

Pay per usage

Go to Store
Download Images From Dataset

Download Images From Dataset

Deprecated

Developed by

Zuzka Pelechová

Zuzka Pelechová

Maintained by Community

Download actual pictures from scraped dataset based on the image urls. The actor creates a zip file for you to download with one click.

0.0 (0)

Pricing

Pay per usage

14

Total users

252

Monthly users

3

Runs succeeded

>99%

Last modified

a year ago

Dockerfile

FROM apify/actor-node:16
COPY package*.json ./
RUN npm --quiet set progress=false && npm install --only=prod --no-optional && echo "Installed NPM packages:" && (npm list --only=prod --no-optional --all || true) && echo "Node.js version:" && node --version && echo "NPM version:" && npm --version
COPY . ./

INPUT_SCHEMA.json

{
"title": "Images upload input",
"type": "object",
"schemaVersion": 1,
"required": ["datasetId", "pathToImageUrls", "proxyConfiguration"],
"properties": {
"datasetId": {
"title": "Dataset Id",
"type": "string",
"description": "Id of the dataset on Apify where the data are located. Image URLs will be extracted from there. YOu can find it under the run or in the dataset url link.",
"editor": "textfield",
"prefill": "BJ1QEU6wepLqFPerV"
},
"pathToImageUrls": {
"title": "Name of the field where image URLs are located",
"type": "string",
"description": "Name if the field (array or string) where the image URL(s) is/are located. Most often image or images. If more complicated, follow the \"javascript style\", e.g. \"details[0].images\n",
"editor": "textfield",
"prefill": "image"
},
"limit": {
"title": "Limit",
"type": "integer",
"description": "Max items to load from the dataset. Use with `offset` to paginate over the data (can reduce memory requirement of large loads).",
"minimum": 1,
"prefill": 10
},
"fileNameFunction": {
"title": "Filename function",
"type": "string",
"description": "Function that specifies how will be image filename created from its URL. If you keep this empty, it will be md5 hash of the URL.",
"editor": "javascript",
"prefill": "({url, md5}) => md5(url)",
"sectionCaption": "Advanced settings"
},
"proxyConfiguration": {
"title": "Proxy configuration",
"type": "object",
"description": "Select proxies to be used.",
"prefill": {
"useApifyProxy": true
},
"editor": "proxy"
}
}
}

main.js

1// This is the main Node.js source code file of your actor.
2
3 // Import Apify SDK. For more information, see https://sdk.apify.com/
4 const { Actor } = require('apify');
5
6 Actor.main(async () => {
7 // Get input of the actor (here only for demonstration purposes).
8 const input = await Actor.getInput();
9 // TODO: Transform the input properly
10 await Actor.metamorph('SEQBnEA5oe2R9Hgdj', {
11 ...input,
12 fileNameFunction: "({url, md5}) => md5(url)",
13 offset: 0,
14 outputTo: "no-output",
15 uploadTo: "zip-file"
16 });
17 });

package.json

{
"name": "download-images-from-dataset",
"version": "0.0.1",
"description": "This is a boilerplate of an Apify actor.",
"dependencies": {
"apify": "^3.0.1"
},
"devDependencies": {
"@apify/eslint-config": "^0.3.1",
"eslint": "^8.20.0"
},
"scripts": {
"start": "node main.js",
"lint": "./node_modules/.bin/eslint ./src --ext .js,.jsx",
"lint:fix": "./node_modules/.bin/eslint ./src --ext .js,.jsx --fix",
"test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
},
"author": "It's not you it's me",
"license": "ISC"
}