Rename Dataset Fields avatar
Rename Dataset Fields

Pricing

Pay per usage

Go to Store
Rename Dataset Fields

Rename Dataset Fields

Developed by

Lukáš Křivka

Lukáš Křivka

Maintained by Community

Rename field/columns of any dataset with a simple mapping

0.0 (0)

Pricing

Pay per usage

1

Total users

2

Monthly users

2

Last modified

8 months ago

.dockerignore

# configurations
.idea
# crawlee and apify storage folders
apify_storage
crawlee_storage
storage
# installed files
node_modules
# git folder
.git

.editorconfig

root = true
[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.eslintrc

{
"root": true,
"env": {
"browser": true,
"es2020": true,
"node": true
},
"extends": [
"@apify/eslint-config-ts"
],
"parserOptions": {
"project": "./tsconfig.json",
"ecmaVersion": 2020
},
"ignorePatterns": [
"node_modules",
"dist",
"**/*.d.ts"
]
}

.gitignore

# This file tells Git which files shouldn't be added to source control
.DS_Store
.idea
dist
node_modules
apify_storage
storage
# Added by Apify CLI
.venv

package.json

{
"name": "rename-dataset-fields",
"version": "0.0.1",
"type": "module",
"description": "This is a boilerplate of an Apify actor.",
"engines": {
"node": ">=18.0.0"
},
"dependencies": {
"apify": "^3.1.10",
"crawlee": "^3.5.4"
},
"devDependencies": {
"@apify/eslint-config-ts": "^0.3.0",
"@apify/tsconfig": "^0.1.0",
"@typescript-eslint/eslint-plugin": "^6.7.2",
"@typescript-eslint/parser": "^6.7.2",
"eslint": "^8.50.0",
"tsx": "^4.6.2",
"typescript": "^5.3.3"
},
"scripts": {
"start": "npm run start:dev",
"start:prod": "node dist/main.js",
"start:dev": "tsx src/main.ts",
"build": "tsc",
"lint": "eslint ./src --ext .ts",
"lint:fix": "eslint ./src --ext .ts --fix",
"test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
},
"author": "It's not you it's me",
"license": "ISC"
}

tsconfig.json

{
"extends": "@apify/tsconfig",
"compilerOptions": {
"module": "NodeNext",
"moduleResolution": "NodeNext",
"target": "ES2022",
"outDir": "dist",
"noUnusedLocals": false,
"skipLibCheck": true,
"lib": ["DOM"]
},
"include": [
"./src/**/*"
]
}

.actor/Dockerfile

# Specify the base Docker image. You can read more about
# the available images at https://crawlee.dev/docs/guides/docker-images
# You can also use any other image from Docker Hub.
FROM apify/actor-node:20 AS builder
# Copy just package.json and package-lock.json
# to speed up the build using Docker layer cache.
COPY package*.json ./
# Install all dependencies. Don't audit to speed up the installation.
RUN npm install --include=dev --audit=false
# Next, copy the source files using the user set
# in the base image.
COPY . ./
# Install all dependencies and build the project.
# Don't audit to speed up the installation.
RUN npm run build
# Create final image
FROM apify/actor-node:20
# Copy just package.json and package-lock.json
# to speed up the build using Docker layer cache.
COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
&& npm install --omit=dev --omit=optional \
&& echo "Installed NPM packages:" \
&& (npm list --omit=dev --all || true) \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version \
&& rm -r ~/.npm
# Copy built JS files from builder image
COPY --from=builder /usr/src/app/dist ./dist
# Next, copy the remaining files and directories with the source code.
# Since we do this after NPM install, quick build will be really fast
# for most source file changes.
COPY . ./
# Run the image.
CMD npm run start:prod --silent

.actor/actor.json

{
"actorSpecification": 1,
"name": "rename-dataset-fields",
"title": "Project Cheerio Crawler Typescript",
"description": "Crawlee and Cheerio project in typescript.",
"version": "0.0",
"meta": {
"templateId": "ts-crawlee-cheerio"
},
"input": "./input_schema.json",
"dockerfile": "./Dockerfile"
}

.actor/input_schema.json

{
"title": "CheerioCrawler Template",
"type": "object",
"schemaVersion": 1,
"properties": {
"datasetId": {
"title": "Dataset ID",
"type": "string",
"description": "ID of the dataset you want to rename the columns in. Can also use run ID.",
"editor": "textfield"
},
"renameMapping": {
"title": "Rename fields mapping",
"type": "array",
"editor": "keyValue",
"placeholderKey": "old_field_name",
"placeholderValue": "new_field_name",
"description": "Mapping of old field names to new field names."
}
}
}

src/main.ts

1// Apify SDK - toolkit for building Apify Actors (Read more at https://docs.apify.com/sdk/js/)
2import { Actor, ActorRun } from 'apify';
3
4interface Input {
5 renameMapping: { key: string, value: string }[],
6 datasetId?: string,
7
8 // Maybe take datasetId from webhook
9 payload?: { resource: ActorRun },
10 resource?: ActorRun,
11}
12
13// The init() call configures the Actor for its environment. It's recommended to start every Actor with an init()
14await Actor.init();
15
16// Structure of input is defined in input_schema.json
17const {
18 datasetId,
19 renameMapping = [],
20 payload,
21 resource,
22} = (await Actor.getInput<Input>())!;
23
24const realDatasetId = datasetId || payload?.resource?.defaultDatasetId || resource?.defaultDatasetId;
25
26const preDedupTransformFunction = (items: Record<string, unknown>[], { customInputData }: { customInputData: { renameMapping: Input['renameMapping'] } }) => {
27 return items.map((item) => {
28 const newFields: Record<string, unknown> = {};
29 customInputData.renameMapping.forEach(({ key: from, value: to }) => {
30 if (item[from] !== undefined) {
31 newFields[to] = item[from];
32 delete item[from];
33 }
34 });
35
36 // We want the renamed fields to be the first in that order
37 return {
38 ...newFields,
39 ...item,
40 };
41 });
42};
43
44const dedupActorInput = {
45 datasetIds: [realDatasetId],
46 preDedupTransformFunction,
47 customInputData: {
48 renameMapping,
49 },
50};
51
52if (Actor.isAtHome()) {
53 await Actor.metamorph('lukaskrivka/dedup-datasets', dedupActorInput);
54} else {
55 await Actor.call('lukaskrivka/dedup-datasets', dedupActorInput, { waitSecs: 0 });
56}
57
58// Gracefully exit the Actor process. It's recommended to quit all Actors with an exit()
59await Actor.exit();