Forward Dataset to Actor or Task
Try for free
No credit card required
Go to Store
Forward Dataset to Actor or Task
valek.josef/forward-dataset-to-actor-or-task
Try for free
No credit card required
Forwards contents of specified dataset to a specified field on the input of another Actor or task.
.actor/Dockerfile
1# Specify the base Docker image
2FROM oven/bun:1.0.7
3
4# Next, copy the source files using the user set
5# in the base image.
6COPY . ./
7
8# Install all dependencies.
9RUN bun install
10
11# Run the image.
12CMD bun run start
.actor/actor.json
1{
2 "actorSpecification": 1,
3 "name": "forward-dataset-to-actor-or-task",
4 "title": "Forward dataset to Actor or task",
5 "description": "Forwards specified field of dataset to Actor or task",
6 "version": "0.0",
7 "meta": {
8 "templateId": "ts-start-bun"
9 },
10 "input": "./input_schema.json",
11 "dockerfile": "./Dockerfile"
12}
.actor/input_schema.json
1{
2 "title": "Scrape data from a web page",
3 "type": "object",
4 "schemaVersion": 1,
5 "properties": {
6 "datasetId": {
7 "sectionCaption": "Source dataset",
8 "title": "Dataset ID",
9 "type": "string",
10 "description": "Id of dataset that should be forwarded",
11 "editor": "textfield"
12 },
13 "datasetFieldName": {
14 "title": "Dataset field name",
15 "type": "string",
16 "description": "Name of the dataset field",
17 "editor": "textfield"
18 },
19 "targetType": {
20 "sectionCaption": "Target",
21 "title": "Type",
22 "type": "string",
23 "description": "Select if the target is Actor or task",
24 "editor": "select",
25 "enum": ["ACTOR", "TASK"],
26 "enumTitles": ["Actor", "Task"],
27 "prefill": "TASK"
28 },
29 "targetId": {
30 "title": "Target ID or name",
31 "type": "string",
32 "description": "",
33 "editor": "textfield"
34 },
35 "targetFieldName": {
36 "title": "Field name",
37 "type": "string",
38 "description": "Name of the field on input of target Actor or task that the dataset should be mapped to.",
39 "editor": "textfield"
40 },
41 "format": {
42 "title": "Format",
43 "description": "Pick the format that should be used. Corresponds to target field format.",
44 "type": "string",
45 "editor": "select",
46 "enum": ["stringList", "requestListSources"],
47 "enumTitles": ["String list", "Request list sources"],
48 "prefill": "stringList"
49 },
50 "inputOverride": {
51 "title": "Input override",
52 "type": "object",
53 "description": "Input override",
54 "editor": "json"
55 },
56 "optionsOverride": {
57 "title": "Options override",
58 "type": "object",
59 "description": "Options override",
60 "editor": "json"
61 }
62 },
63 "required": ["datasetId", "datasetFieldName", "format", "targetFieldName", "targetType", "targetId"]
64}
src/main.ts
1import { Actor, log } from 'apify';
2
3await Actor.init();
4
5interface Input {
6 datasetId: string;
7 datasetFieldName: string;
8 format: "stringList" | "requestListSources",
9 targetType: "ACTOR" | "TASK",
10 targetId: string;
11 targetFieldName: string;
12 inputOverride: object;
13 optionsOverride: object;
14}
15
16const input = await Actor.getInput<Input>();
17if (!input) throw new Error("Input is missing!");
18
19const {
20 datasetId,
21 datasetFieldName,
22 format,
23 targetType,
24 targetId,
25 targetFieldName,
26 inputOverride = {},
27 optionsOverride = {},
28 } = input;
29
30const { apifyClient } = Actor;
31
32// Get Actor or task client
33const targetClient = targetType === 'TASK' ? apifyClient.task(targetId) : apifyClient.actor(targetId);
34// Check if the target actually exists
35if (!await targetClient.get()) {
36 await Actor.fail(`The ${targetType} "${targetId}" was not found.`);
37}
38
39// Prepare a format function, that will be applied to each item
40const formatFunction = format === 'requestListSources' ? (url) => ({url}) : (value) => value;
41
42
43// The variable that will contain the specified field from all items in dataset.
44const datasetAsArray = [];
45
46await Actor.setStatusMessage('Loading data from dataset...');
47
48// Loop over all entries in dataset, in batches.
49let offset = 0, total = 0, items = [];
50do {
51 const response = await apifyClient.dataset(datasetId).listItems({
52 fields: [ datasetFieldName ],
53 offset,
54 limit: 200,
55 });
56 items = response.items;
57 total = response.total;
58 offset += items.length;
59
60 items.forEach(sourceItem => {
61 // Format the item to desired shape
62 const targetItem = formatFunction(sourceItem[datasetFieldName]);
63 // Push it to the target array
64 datasetAsArray.push(targetItem);
65 });
66
67 await Actor.setStatusMessage(`Loaded ${datasetAsArray.length}/${total} dataset items.`);
68} while(items.length > 0);
69
70// Update the field in input
71inputOverride[targetFieldName] = datasetAsArray;
72
73await Actor.setStatusMessage(`Dataset loaded, starting target ${targetType} ${targetId}`);
74
75// Start the target with specified input and options, don't wait for finish.
76await targetClient.start(inputOverride, { ...optionsOverride, waitForFinish: 0});
77
78await Actor.exit();
.dockerignore
1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git
14
15# dist folder
16dist
.gitignore
1storage
2apify_storage
3crawlee_storage
4node_modules
5dist
6tsconfig.tsbuildinfo
7storage/*
8!storage/key_value_stores
9storage/key_value_stores/*
10!storage/key_value_stores/default
11storage/key_value_stores/default/*
12!storage/key_value_stores/default/INPUT.json
package.json
1{
2 "name": "ts-start-bun",
3 "version": "0.0.1",
4 "type": "module",
5 "description": "This is an example of an Apify actor.",
6 "engines": {
7 "bun": ">=1.0.0"
8 },
9 "dependencies": {
10 "apify": "^3.1.10"
11 },
12 "devDependencies": {
13 "@apify/tsconfig": "^0.1.0"
14 },
15 "scripts": {
16 "start": "bun src/main.ts",
17 "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
18 },
19 "author": "It's not you it's me",
20 "license": "ISC"
21}
tsconfig.json
1{
2 "extends": "@apify/tsconfig",
3 "compilerOptions": {
4 "module": "ES2022",
5 "target": "ES2022",
6 "outDir": "dist",
7 "noUnusedLocals": false,
8 "lib": ["DOM"]
9 },
10 "include": [
11 "./src/**/*"
12 ]
13}
Developer
Maintained by Community
Actor Metrics
2 monthly users
-
4 stars
Created in Nov 2023
Modified a year ago
Categories