Easy Data Processor: Merge, Clean, and Transform Your Data avatar

Easy Data Processor: Merge, Clean, and Transform Your Data

Deprecated
Go to Store
This Actor is deprecated

This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?

See alternative Actors
Easy Data Processor: Merge, Clean, and Transform Your Data

Easy Data Processor: Merge, Clean, and Transform Your Data

dainty_screw/easy-data-processor-merge-clean-and-transform-your-data

Meet the Ultimate Data Processor, a human-friendly tool that simplifies your data tasks. With this Apify actor, you can merge datasets, remove duplicates, and transform data quickly and effortlessly, all in one go. Say goodbye to complex processes and hello to streamlined data management

You can access the Easy Data Processor: Merge, Clean, and Transform Your Data programmatically from your own applications by using the Apify API. You can choose the language preference from below. To use the Apify API, you’ll need an Apify account and your API token, found in Integrations settings in Apify Console.

1{
2  "openapi": "3.0.1",
3  "info": {
4    "version": "0.0",
5    "x-build-id": "JVcWg7aJrNvY2Mr3C"
6  },
7  "servers": [
8    {
9      "url": "https://api.apify.com/v2"
10    }
11  ],
12  "paths": {
13    "/acts/dainty_screw~easy-data-processor-merge-clean-and-transform-your-data/run-sync-get-dataset-items": {
14      "post": {
15        "operationId": "run-sync-get-dataset-items-dainty_screw-easy-data-processor-merge-clean-and-transform-your-data",
16        "x-openai-isConsequential": false,
17        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
18        "tags": [
19          "Run Actor"
20        ],
21        "requestBody": {
22          "required": true,
23          "content": {
24            "application/json": {
25              "schema": {
26                "$ref": "#/components/schemas/inputSchema"
27              }
28            }
29          }
30        },
31        "parameters": [
32          {
33            "name": "token",
34            "in": "query",
35            "required": true,
36            "schema": {
37              "type": "string"
38            },
39            "description": "Enter your Apify token here"
40          }
41        ],
42        "responses": {
43          "200": {
44            "description": "OK"
45          }
46        }
47      }
48    },
49    "/acts/dainty_screw~easy-data-processor-merge-clean-and-transform-your-data/runs": {
50      "post": {
51        "operationId": "runs-sync-dainty_screw-easy-data-processor-merge-clean-and-transform-your-data",
52        "x-openai-isConsequential": false,
53        "summary": "Executes an Actor and returns information about the initiated run in response.",
54        "tags": [
55          "Run Actor"
56        ],
57        "requestBody": {
58          "required": true,
59          "content": {
60            "application/json": {
61              "schema": {
62                "$ref": "#/components/schemas/inputSchema"
63              }
64            }
65          }
66        },
67        "parameters": [
68          {
69            "name": "token",
70            "in": "query",
71            "required": true,
72            "schema": {
73              "type": "string"
74            },
75            "description": "Enter your Apify token here"
76          }
77        ],
78        "responses": {
79          "200": {
80            "description": "OK",
81            "content": {
82              "application/json": {
83                "schema": {
84                  "$ref": "#/components/schemas/runsResponseSchema"
85                }
86              }
87            }
88          }
89        }
90      }
91    },
92    "/acts/dainty_screw~easy-data-processor-merge-clean-and-transform-your-data/run-sync": {
93      "post": {
94        "operationId": "run-sync-dainty_screw-easy-data-processor-merge-clean-and-transform-your-data",
95        "x-openai-isConsequential": false,
96        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
97        "tags": [
98          "Run Actor"
99        ],
100        "requestBody": {
101          "required": true,
102          "content": {
103            "application/json": {
104              "schema": {
105                "$ref": "#/components/schemas/inputSchema"
106              }
107            }
108          }
109        },
110        "parameters": [
111          {
112            "name": "token",
113            "in": "query",
114            "required": true,
115            "schema": {
116              "type": "string"
117            },
118            "description": "Enter your Apify token here"
119          }
120        ],
121        "responses": {
122          "200": {
123            "description": "OK"
124          }
125        }
126      }
127    }
128  },
129  "components": {
130    "schemas": {
131      "inputSchema": {
132        "type": "object",
133        "properties": {
134          "datasetIds": {
135            "title": "Dataset IDs",
136            "type": "array",
137            "description": "Datasets that should be deduplicated and merged",
138            "items": {
139              "type": "string"
140            }
141          },
142          "fields": {
143            "title": "Fields for deduplication",
144            "type": "array",
145            "description": "Fields whose combination should be unique for the item to be considered unique. If none are provided, the actor does not perform deduplication.",
146            "items": {
147              "type": "string"
148            }
149          },
150          "output": {
151            "title": "What to output",
152            "enum": [
153              "unique-items",
154              "duplicate-items",
155              "nothing"
156            ],
157            "type": "string",
158            "description": "What will be pushed to the dataset from this actor",
159            "default": "unique-items"
160          },
161          "mode": {
162            "title": "Mode",
163            "enum": [
164              "dedup-after-load",
165              "dedup-as-loading"
166            ],
167            "type": "string",
168            "description": "How the loading and deduplication process will work.",
169            "default": "dedup-after-load"
170          },
171          "outputDatasetId": {
172            "title": "Output dataset ID or name (optional)",
173            "type": "string",
174            "description": "Optionally can push into dataset of your choice. If you provide a dataset name that doesn't exist, a new named dataset will be created."
175          },
176          "fieldsToLoad": {
177            "title": "Limit fields to load",
178            "type": "array",
179            "description": "You can choose which fields to load only. Useful to speed up the loading and reduce memory needs.",
180            "items": {
181              "type": "string"
182            }
183          },
184          "preDedupTransformFunction": {
185            "title": "Pre dedup transform function",
186            "type": "string",
187            "description": "Function to transform items before deduplication is applied. For 'dedup-after-load' mode this is done for all items at once. For 'dedup-as-loading' this is applied to each batch separately."
188          },
189          "postDedupTransformFunction": {
190            "title": "Post dedup transform function",
191            "type": "string",
192            "description": "Function to transform items after deduplication is applied. For 'dedup-after-load' mode this is done for all items at once. For 'dedup-as-loading' this is applied to each batch separately."
193          },
194          "actorOrTaskId": {
195            "title": "Actor or Task ID (or name)",
196            "type": "string",
197            "description": "Use Actor or Task ID (e.g. `nwua9Gu5YrADL7ZDj`) or full name (e.g. `apify/instagram-scraper`)."
198          },
199          "onlyRunsNewerThan": {
200            "title": "Only runs newer than",
201            "type": "string",
202            "description": "Use a date format of either `YYYY-MM-DD` or with time `YYYY-MM-DDTHH:mm:ss`."
203          },
204          "onlyRunsOlderThan": {
205            "title": "Only runs older than",
206            "type": "string",
207            "description": "Use a date format of either `YYYY-MM-DD` or with time `YYYY-MM-DDTHH:mm:ss`."
208          },
209          "outputTo": {
210            "title": "Where to output",
211            "enum": [
212              "dataset",
213              "key-value-store"
214            ],
215            "type": "string",
216            "description": "Either can output to a single dataset or to split data into KV records depending on upload batch size. KV is upload is much faster but data end up in many files.",
217            "default": "dataset"
218          },
219          "parallelLoads": {
220            "title": "Parallel loads",
221            "maximum": 100,
222            "type": "integer",
223            "description": "Datasets can be loaded in parallel batches to speed things up if needed.",
224            "default": 10
225          },
226          "parallelPushes": {
227            "title": "Parallel pushes",
228            "minimum": 1,
229            "maximum": 50,
230            "type": "integer",
231            "description": "Deduped data can be pushed in parallel batches to speed things up if needed. If you want the data to be in the exact same order, you need to set this to 1.",
232            "default": 5
233          },
234          "uploadBatchSize": {
235            "title": "Upload batch size",
236            "minimum": 10,
237            "maximum": 1000,
238            "type": "integer",
239            "description": "How many items it should upload in one pushData call. Useful to not overload Apify API. Only important for dataset upload.",
240            "default": 500
241          },
242          "batchSizeLoad": {
243            "title": "Download batch size",
244            "type": "integer",
245            "description": "How many items it will load in a single batch.",
246            "default": 50000
247          },
248          "offset": {
249            "title": "Offset (how many items to skip from start)",
250            "type": "integer",
251            "description": "By default we don't skip any items which is the same as setting offset to 0. For multiple datasets, it takes offset into the sum of their item counts but that is not very useful."
252          },
253          "limit": {
254            "title": "Limit (how many items to load)",
255            "type": "integer",
256            "description": "By default we don't limit the number loaded items"
257          },
258          "verboseLog": {
259            "title": "verbose log",
260            "type": "boolean",
261            "description": "Good for smaller runs. Large runs might run out of log space.",
262            "default": false
263          },
264          "nullAsUnique": {
265            "title": "Null fields are unique",
266            "type": "boolean",
267            "description": "If you want to treat null (or missing) fields as always unique items.",
268            "default": false
269          },
270          "datasetIdsOfFilterItems": {
271            "title": "Dataset IDs for just deduping",
272            "type": "array",
273            "description": "The items from these datasets will be just used as a dedup filter for the main datasets. These items are loaded first and then the main datasets are compared for uniqueness and pushed.",
274            "items": {
275              "type": "string"
276            }
277          },
278          "customInputData": {
279            "title": "Custom input data",
280            "type": "object",
281            "description": "You can pass custom data as a JSON object to be accessible in the transform functions as part of the 2nd parameter object."
282          }
283        }
284      },
285      "runsResponseSchema": {
286        "type": "object",
287        "properties": {
288          "data": {
289            "type": "object",
290            "properties": {
291              "id": {
292                "type": "string"
293              },
294              "actId": {
295                "type": "string"
296              },
297              "userId": {
298                "type": "string"
299              },
300              "startedAt": {
301                "type": "string",
302                "format": "date-time",
303                "example": "2025-01-08T00:00:00.000Z"
304              },
305              "finishedAt": {
306                "type": "string",
307                "format": "date-time",
308                "example": "2025-01-08T00:00:00.000Z"
309              },
310              "status": {
311                "type": "string",
312                "example": "READY"
313              },
314              "meta": {
315                "type": "object",
316                "properties": {
317                  "origin": {
318                    "type": "string",
319                    "example": "API"
320                  },
321                  "userAgent": {
322                    "type": "string"
323                  }
324                }
325              },
326              "stats": {
327                "type": "object",
328                "properties": {
329                  "inputBodyLen": {
330                    "type": "integer",
331                    "example": 2000
332                  },
333                  "rebootCount": {
334                    "type": "integer",
335                    "example": 0
336                  },
337                  "restartCount": {
338                    "type": "integer",
339                    "example": 0
340                  },
341                  "resurrectCount": {
342                    "type": "integer",
343                    "example": 0
344                  },
345                  "computeUnits": {
346                    "type": "integer",
347                    "example": 0
348                  }
349                }
350              },
351              "options": {
352                "type": "object",
353                "properties": {
354                  "build": {
355                    "type": "string",
356                    "example": "latest"
357                  },
358                  "timeoutSecs": {
359                    "type": "integer",
360                    "example": 300
361                  },
362                  "memoryMbytes": {
363                    "type": "integer",
364                    "example": 1024
365                  },
366                  "diskMbytes": {
367                    "type": "integer",
368                    "example": 2048
369                  }
370                }
371              },
372              "buildId": {
373                "type": "string"
374              },
375              "defaultKeyValueStoreId": {
376                "type": "string"
377              },
378              "defaultDatasetId": {
379                "type": "string"
380              },
381              "defaultRequestQueueId": {
382                "type": "string"
383              },
384              "buildNumber": {
385                "type": "string",
386                "example": "1.0.0"
387              },
388              "containerUrl": {
389                "type": "string"
390              },
391              "usage": {
392                "type": "object",
393                "properties": {
394                  "ACTOR_COMPUTE_UNITS": {
395                    "type": "integer",
396                    "example": 0
397                  },
398                  "DATASET_READS": {
399                    "type": "integer",
400                    "example": 0
401                  },
402                  "DATASET_WRITES": {
403                    "type": "integer",
404                    "example": 0
405                  },
406                  "KEY_VALUE_STORE_READS": {
407                    "type": "integer",
408                    "example": 0
409                  },
410                  "KEY_VALUE_STORE_WRITES": {
411                    "type": "integer",
412                    "example": 1
413                  },
414                  "KEY_VALUE_STORE_LISTS": {
415                    "type": "integer",
416                    "example": 0
417                  },
418                  "REQUEST_QUEUE_READS": {
419                    "type": "integer",
420                    "example": 0
421                  },
422                  "REQUEST_QUEUE_WRITES": {
423                    "type": "integer",
424                    "example": 0
425                  },
426                  "DATA_TRANSFER_INTERNAL_GBYTES": {
427                    "type": "integer",
428                    "example": 0
429                  },
430                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
431                    "type": "integer",
432                    "example": 0
433                  },
434                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
435                    "type": "integer",
436                    "example": 0
437                  },
438                  "PROXY_SERPS": {
439                    "type": "integer",
440                    "example": 0
441                  }
442                }
443              },
444              "usageTotalUsd": {
445                "type": "number",
446                "example": 0.00005
447              },
448              "usageUsd": {
449                "type": "object",
450                "properties": {
451                  "ACTOR_COMPUTE_UNITS": {
452                    "type": "integer",
453                    "example": 0
454                  },
455                  "DATASET_READS": {
456                    "type": "integer",
457                    "example": 0
458                  },
459                  "DATASET_WRITES": {
460                    "type": "integer",
461                    "example": 0
462                  },
463                  "KEY_VALUE_STORE_READS": {
464                    "type": "integer",
465                    "example": 0
466                  },
467                  "KEY_VALUE_STORE_WRITES": {
468                    "type": "number",
469                    "example": 0.00005
470                  },
471                  "KEY_VALUE_STORE_LISTS": {
472                    "type": "integer",
473                    "example": 0
474                  },
475                  "REQUEST_QUEUE_READS": {
476                    "type": "integer",
477                    "example": 0
478                  },
479                  "REQUEST_QUEUE_WRITES": {
480                    "type": "integer",
481                    "example": 0
482                  },
483                  "DATA_TRANSFER_INTERNAL_GBYTES": {
484                    "type": "integer",
485                    "example": 0
486                  },
487                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
488                    "type": "integer",
489                    "example": 0
490                  },
491                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
492                    "type": "integer",
493                    "example": 0
494                  },
495                  "PROXY_SERPS": {
496                    "type": "integer",
497                    "example": 0
498                  }
499                }
500              }
501            }
502          }
503        }
504      }
505    }
506  }
507}

Easy Data Processor: Merge, Clean, and Transform Your Data OpenAPI definition

OpenAPI is a standard for designing and describing RESTful APIs, allowing developers to define API structure, endpoints, and data formats in a machine-readable way. It simplifies API development, integration, and documentation.

OpenAPI is effective when used with AI agents and GPTs by standardizing how these systems interact with various APIs, for reliable integrations and efficient communication.

By defining machine-readable API specifications, OpenAPI allows AI models like GPTs to understand and use varied data sources, improving accuracy. This accelerates development, reduces errors, and provides context-aware responses, making OpenAPI a core component for AI applications.

You can download the OpenAPI definitions for Easy Data Processor: Merge, Clean, and Transform Your Data from the options below:

If you’d like to learn more about how OpenAPI powers GPTs, read our blog post.

You can also check out our other API clients:

Developer
Maintained by Community