Append to dataset avatar
Append to dataset

Pricing

Pay per usage

Go to Store
Append to dataset

Append to dataset

Developed by

Josef Válek

Maintained by Community

Utility actor that allows you to build a single large dataset from individual default datasets of other actor runs.

0.0 (0)

Pricing

Pay per usage

2

Total users

36

Monthly users

5

Runs succeeded

84%

Last modified

a year ago

.editorconfig

1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf

.eslintrc

1{
2    "extends": "@apify"
3}

.gitignore

1# This file tells Git which files shouldn't be added to source control
2
3.idea
4node_modules

Dockerfile

1# First, specify the base Docker image. You can read more about
2# the available images at https://sdk.apify.com/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node:16
5
6# Second, copy just package.json and package-lock.json since it should be
7# the only file that affects "npm install" in the next step, to speed up the build
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --only=prod --no-optional \
15 && echo "Installed NPM packages:" \
16 && (npm list --only=prod --no-optional --all || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version
21
22# Next, copy the remaining files and directories with the source code.
23# Since we do this after NPM install, quick build will be really fast
24# for most source file changes.
25COPY . ./
26
27# Optionally, specify how to launch the source code of your actor.
28# By default, Apify's base Docker images define the CMD instruction
29# that runs the Node.js source code using the command specified
30# in the "scripts.start" section of the package.json file.
31# In short, the instruction looks something like this:
32#
33# CMD npm start

INPUT_SCHEMA.json

1{
2    "title": "Input schema for the append-to-dataset actor.",
3    "type": "object",
4    "schemaVersion": 1,
5    "properties": {
6        "datasetIdOrName": {
7            "title": "Target Dataset (id or name)",
8            "type": "string",
9            "description": "Dataset that should be appended to",
10            "editor": "textfield"
11        },
12        "sourceDatasetId": {
13            "sectionCaption": "Advanced settings",
14            "title": "Source Dataset (id or name)",
15            "description": "In one-time usecase, fill in the dataset to be appended",
16            "type": "string",
17            "editor": "textfield"
18        },
19        "eventData":{
20            "title": "Event Data",
21            "description": "If the actor is run via webhook, eventData.actorRunId will be deterimined from webhook payload and it's default dataset will be appended",
22            "type": "object",
23            "editor": "json"
24        }
25    },
26    "required": ["datasetIdOrName"]
27}

apify.json

1{
2    "env": { "npm_config_loglevel": "silent" }
3}

main.js

1const Apify = require('apify');
2
3Apify.main(async () => {
4    const {eventData, datasetIdOrName, sourceDatasetId, pageSize = 100 } = await Apify.getInput();
5    // Check that input really contains sufficient identification of source dataset
6    if (!eventData?.actorRunId && !sourceDatasetId) {
7        throw new Error('Missing source dataset id or actor run id in event data');
8    }
9
10    const client = Apify.newClient();
11
12    // Prepare target dataset id
13    const targetDataset = await Apify.openDataset(datasetIdOrName);
14
15    // Prepare source dataset client
16    const sourceDatasetClient = sourceDatasetId
17        ? client.dataset(sourceDatasetId)
18        : client.run(eventData.actorRunId).dataset();
19
20    let currentOffset = 0;
21    // eslint-disable-next-line no-constant-condition
22    while(true) {
23        // Get items from source dataset
24        const {items, total, offset} = await sourceDatasetClient.listItems({
25            clean: true,
26            limit: pageSize,
27            offset: currentOffset,
28        });
29
30        // Push the items to target dataset
31        await targetDataset.pushData(items);
32
33        Apify.utils.log.info('Transfered items', {
34            count: items.length,
35            total,
36            offset
37        });
38
39        // Increase offset to go to the next page
40        currentOffset += pageSize;
41
42        // If we got all the items, we can stop
43        if (offset + items.length >= total) {
44            Apify.utils.log.info('All items were transfered');
45            break;
46        }
47    }
48
49});

package.json

1{
2    "name": "project-empty",
3    "version": "0.0.1",
4    "description": "This is a boilerplate of an Apify actor.",
5    "dependencies": {
6        "apify": "^2.3.2"
7    },
8    "devDependencies": {
9        "@apify/eslint-config": "^0.1.3",
10        "eslint": "^7.0.0"
11    },
12    "scripts": {
13        "start": "node main.js",
14        "lint": "./node_modules/.bin/eslint ./src --ext .js,.jsx",
15        "lint:fix": "./node_modules/.bin/eslint ./src --ext .js,.jsx --fix",
16        "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
17    },
18    "author": "It's not you it's me",
19    "license": "ISC"
20}

Pricing

Pricing model

Pay per usage

This Actor is paid per platform usage. The Actor is free to use, and you only pay for the Apify platform usage.