Forward Dataset to Actor or Task avatar

Forward Dataset to Actor or Task

Try for free

No credit card required

Go to Store
Forward Dataset to Actor or Task

Forward Dataset to Actor or Task

valek.josef/forward-dataset-to-actor-or-task
Try for free

No credit card required

Forwards contents of specified dataset to a specified field on the input of another Actor or task.

.actor/Dockerfile

1# Specify the base Docker image
2FROM oven/bun:1.0.7
3
4# Next, copy the source files using the user set
5# in the base image.
6COPY . ./
7
8# Install all dependencies. 
9RUN bun install
10
11# Run the image.
12CMD bun run start

.actor/actor.json

1{
2    "actorSpecification": 1,
3    "name": "forward-dataset-to-actor-or-task",
4    "title": "Forward dataset to Actor or task",
5    "description": "Forwards specified field of dataset to Actor or task",
6    "version": "0.0",
7    "meta": {
8        "templateId": "ts-start-bun"
9    },
10    "input": "./input_schema.json",
11    "dockerfile": "./Dockerfile"
12}

.actor/input_schema.json

1{
2    "title": "Scrape data from a web page",
3    "type": "object",
4    "schemaVersion": 1,
5    "properties": {
6        "datasetId": {
7            "sectionCaption": "Source dataset",
8            "title": "Dataset ID",
9            "type": "string",
10            "description": "Id of dataset that should be forwarded",
11            "editor": "textfield"
12        },
13        "datasetFieldName": {
14            "title": "Dataset field name",
15            "type": "string",
16            "description": "Name of the dataset field",
17            "editor": "textfield"
18        },
19        "targetType": {
20            "sectionCaption": "Target",
21            "title": "Type",
22            "type": "string",
23            "description": "Select if the target is Actor or task",
24            "editor": "select",
25            "enum": ["ACTOR", "TASK"],
26            "enumTitles": ["Actor", "Task"],
27            "prefill": "TASK"
28        },
29        "targetId": {
30            "title": "Target ID or name",
31            "type": "string",
32            "description": "",
33            "editor": "textfield"
34        },
35        "targetFieldName": {
36            "title": "Field name",
37            "type": "string",
38            "description": "Name of the field on input of target Actor or task that the dataset should be mapped to.",
39            "editor": "textfield"
40        },
41        "format": {
42            "title": "Format",
43            "description": "Pick the format that should be used. Corresponds to target field format.",
44            "type": "string",
45            "editor": "select",
46            "enum": ["stringList", "requestListSources"],
47            "enumTitles": ["String list", "Request list sources"],
48            "prefill": "stringList"
49        },
50        "inputOverride": {
51            "title": "Input override",
52            "type": "object",
53            "description": "Input override",
54            "editor": "json"
55        },
56        "optionsOverride": {
57            "title": "Options override",
58            "type": "object",
59            "description": "Options override",
60            "editor": "json"
61        }
62    },
63    "required": ["datasetId", "datasetFieldName", "format", "targetFieldName", "targetType", "targetId"]
64}

src/main.ts

1import { Actor, log } from 'apify';
2
3await Actor.init();
4
5interface Input {
6    datasetId: string;
7    datasetFieldName: string;
8    format: "stringList" | "requestListSources",
9    targetType: "ACTOR" | "TASK",
10    targetId: string;
11    targetFieldName: string;
12    inputOverride: object;
13    optionsOverride: object;
14}
15
16const input = await Actor.getInput<Input>();
17if (!input) throw new Error("Input is missing!");
18
19const { 
20    datasetId,
21    datasetFieldName,
22    format,
23    targetType,
24    targetId,
25    targetFieldName,
26    inputOverride = {},
27    optionsOverride = {},
28 } = input;
29
30const { apifyClient } = Actor;
31
32// Get Actor or task client
33const targetClient = targetType === 'TASK' ? apifyClient.task(targetId) : apifyClient.actor(targetId);
34// Check if the target actually exists
35if (!await targetClient.get()) {
36    await Actor.fail(`The ${targetType} "${targetId}" was not found.`);
37}
38
39// Prepare a format function, that will be applied to each item
40const formatFunction = format === 'requestListSources' ? (url) => ({url}) : (value) => value;
41
42
43// The variable that will contain the specified field from all items in dataset.
44const datasetAsArray = [];
45
46await Actor.setStatusMessage('Loading data from dataset...');
47
48// Loop over all entries in dataset, in batches.
49let offset = 0, total = 0, items = [];
50do {
51    const response = await apifyClient.dataset(datasetId).listItems({
52        fields: [ datasetFieldName ],
53        offset,
54        limit: 200,
55    });
56    items = response.items;
57    total = response.total;
58    offset += items.length;
59
60    items.forEach(sourceItem => {
61        // Format the item to desired shape
62        const targetItem = formatFunction(sourceItem[datasetFieldName]);
63        // Push it to the target array
64        datasetAsArray.push(targetItem);
65    });
66    
67    await Actor.setStatusMessage(`Loaded ${datasetAsArray.length}/${total} dataset items.`);
68} while(items.length > 0);
69
70// Update the field in input
71inputOverride[targetFieldName] = datasetAsArray;
72
73await Actor.setStatusMessage(`Dataset loaded, starting target ${targetType} ${targetId}`);
74
75// Start the target with specified input and options, don't wait for finish.
76await targetClient.start(inputOverride, { ...optionsOverride, waitForFinish: 0});
77
78await Actor.exit();

.dockerignore

1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git
14
15# dist folder
16dist

.gitignore

1storage
2apify_storage
3crawlee_storage
4node_modules
5dist
6tsconfig.tsbuildinfo
7storage/*
8!storage/key_value_stores
9storage/key_value_stores/*
10!storage/key_value_stores/default
11storage/key_value_stores/default/*
12!storage/key_value_stores/default/INPUT.json

package.json

1{
2    "name": "ts-start-bun",
3    "version": "0.0.1",
4    "type": "module",
5    "description": "This is an example of an Apify actor.",
6    "engines": {
7        "bun": ">=1.0.0"
8    },
9    "dependencies": {
10        "apify": "^3.1.10"
11    },
12    "devDependencies": {
13        "@apify/tsconfig": "^0.1.0"
14    },
15    "scripts": {
16        "start": "bun src/main.ts",
17        "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
18    },
19    "author": "It's not you it's me",
20    "license": "ISC"
21}

tsconfig.json

1{
2    "extends": "@apify/tsconfig",
3    "compilerOptions": {
4        "module": "ES2022",
5        "target": "ES2022",
6        "outDir": "dist",
7        "noUnusedLocals": false,
8        "lib": ["DOM"]
9    },
10    "include": [
11        "./src/**/*"
12    ]
13}
Developer
Maintained by Community

Actor Metrics

  • 2 monthly users

  • 4 stars

  • Created in Nov 2023

  • Modified a year ago