Download Images From Dataset avatar
Download Images From Dataset
Try for free

No credit card required

View all Actors
Download Images From Dataset

Download Images From Dataset

zuzka/download-images-from-dataset
Try for free

No credit card required

Download actual pictures from scraped dataset based on the image urls. The actor creates a zip file for you to download with one click.

Dockerfile

1FROM apify/actor-node:16
2    COPY package*.json ./
3
4    RUN npm --quiet set progress=false     && npm install --only=prod --no-optional     && echo "Installed NPM packages:"     && (npm list --only=prod --no-optional --all || true)     && echo "Node.js version:"     && node --version     && echo "NPM version:"     && npm --version
5
6    COPY . ./

INPUT_SCHEMA.json

1{
2  "title": "Images upload input",
3  "type": "object",
4  "schemaVersion": 1,
5  "required": ["datasetId", "pathToImageUrls", "proxyConfiguration"],
6  "properties": {
7    "datasetId": {
8      "title": "Dataset Id",
9      "type": "string",
10      "description": "Id of the dataset on Apify where the data are located. Image URLs will be extracted from there. YOu can find it under the run or in the dataset url link.",
11      "editor": "textfield",
12      "prefill": "BJ1QEU6wepLqFPerV"
13    },
14    "pathToImageUrls": {
15      "title": "Name of the field where image URLs are located",
16      "type": "string",
17      "description": "Name if the field (array or string) where the image URL(s) is/are located. Most often image or images. If more complicated, follow the \"javascript style\", e.g. \"details[0].images\n",
18      "editor": "textfield",
19      "prefill": "image"
20    },
21    "limit": {
22      "title": "Limit",
23      "type": "integer",
24      "description": "Max items to load from the dataset. Use with `offset` to paginate over the data (can reduce memory requirement of large loads).",
25      "minimum": 1,
26      "prefill": 10
27    },
28    "fileNameFunction": {
29      "title": "Filename function",
30      "type": "string",
31      "description": "Function that specifies how will be image filename created from its URL. If you keep this empty, it will be md5 hash of the URL.",
32      "editor": "javascript",
33      "prefill": "({url, md5}) => md5(url)",
34      "sectionCaption": "Advanced settings"
35    },
36    "proxyConfiguration": {
37      "title": "Proxy configuration",
38      "type": "object",
39      "description": "Select proxies to be used.",
40      "prefill": {
41        "useApifyProxy": true
42      },
43      "editor": "proxy"
44    }
45  }
46}

main.js

1// This is the main Node.js source code file of your actor.
2
3    // Import Apify SDK. For more information, see https://sdk.apify.com/
4    const { Actor } = require('apify');
5
6    Actor.main(async () => {
7        // Get input of the actor (here only for demonstration purposes).
8        const input = await Actor.getInput();
9        // TODO: Transform the input properly
10        await Actor.metamorph('SEQBnEA5oe2R9Hgdj', {
11            ...input,
12            fileNameFunction: "({url, md5}) => md5(url)",
13            offset: 0,
14            outputTo: "no-output",
15            uploadTo: "zip-file"
16        });
17    });

package.json

1{
2  "name": "download-images-from-dataset",
3  "version": "0.0.1",
4  "description": "This is a boilerplate of an Apify actor.",
5  "dependencies": {
6    "apify": "^3.0.1"
7  },
8  "devDependencies": {
9    "@apify/eslint-config": "^0.3.1",
10    "eslint": "^8.20.0"
11  },
12  "scripts": {
13    "start": "node main.js",
14    "lint": "./node_modules/.bin/eslint ./src --ext .js,.jsx",
15    "lint:fix": "./node_modules/.bin/eslint ./src --ext .js,.jsx --fix",
16    "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
17  },
18  "author": "It's not you it's me",
19  "license": "ISC"
20}
Developer
Maintained by Community
Actor metrics
  • 12 monthly users
  • 99.9% runs succeeded
  • 15.4 days response time
  • Created in Feb 2023
  • Modified about 2 months ago