Data Change Monitoring avatar
Data Change Monitoring

Deprecated

Pricing

$5.00/month + usage

Go to Store
Data Change Monitoring

Data Change Monitoring

Deprecated

Developed by

Juro Oravec

Juro Oravec

Maintained by Community

Monitor data changes between scraper runs or other datasets. Get a report on what fields changed. This actor takes two datasets, and verifies that a sample of entries that are common to both datasets are identical. Output is a list of discrepancies between the two datasets.

0.0 (0)

Pricing

$5.00/month + usage

1

Total users

1

Monthly users

1

Last modified

a year ago

You can access the Data Change Monitoring programmatically from your own applications by using the Apify API. You can also choose the language preference from below. To use the Apify API, you’ll need an Apify account and your API token, found in Integrations settings in Apify Console.

{
"openapi": "3.0.1",
"info": {
"version": "0.0",
"x-build-id": "vN1RaxaR1qB6Grfbp"
},
"servers": [
{
"url": "https://api.apify.com/v2"
}
],
"paths": {
"/acts/jurooravec~data-change-monitoring/run-sync-get-dataset-items": {
"post": {
"operationId": "run-sync-get-dataset-items-jurooravec-data-change-monitoring",
"x-openai-isConsequential": false,
"summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
"tags": [
"Run Actor"
],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/inputSchema"
}
}
}
},
"parameters": [
{
"name": "token",
"in": "query",
"required": true,
"schema": {
"type": "string"
},
"description": "Enter your Apify token here"
}
],
"responses": {
"200": {
"description": "OK"
}
}
}
},
"/acts/jurooravec~data-change-monitoring/runs": {
"post": {
"operationId": "runs-sync-jurooravec-data-change-monitoring",
"x-openai-isConsequential": false,
"summary": "Executes an Actor and returns information about the initiated run in response.",
"tags": [
"Run Actor"
],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/inputSchema"
}
}
}
},
"parameters": [
{
"name": "token",
"in": "query",
"required": true,
"schema": {
"type": "string"
},
"description": "Enter your Apify token here"
}
],
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/runsResponseSchema"
}
}
}
}
}
}
},
"/acts/jurooravec~data-change-monitoring/run-sync": {
"post": {
"operationId": "run-sync-jurooravec-data-change-monitoring",
"x-openai-isConsequential": false,
"summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
"tags": [
"Run Actor"
],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/inputSchema"
}
}
}
},
"parameters": [
{
"name": "token",
"in": "query",
"required": true,
"schema": {
"type": "string"
},
"description": "Enter your Apify token here"
}
],
"responses": {
"200": {
"description": "OK"
}
}
}
}
},
"components": {
"schemas": {
"inputSchema": {
"type": "object",
"properties": {
"runType": {
"title": "Run type (actor or task)",
"enum": [
"ACTOR",
"TASK"
],
"type": "string",
"description": "Whether to call an actor or a task",
"default": "ACTOR"
},
"actorOrTaskId": {
"title": "Actor or Task ID",
"type": "string",
"description": "Actor or task to call. Allowed formats are `username/actor-name`, `userId/actor-name` or actor ID.<br/><br/>\n Can be omitted if you already have an existing Dataset and you don't need to run an Acor to generate the Dataset.<br/><br/>\n Either `actorOrTaskId` or `actorOrTaskDatasetIdOrName` MUST be given."
},
"actorOrTaskBuild": {
"title": "Actor or Task build",
"type": "string",
"description": "Tag or number of the actor build to run (e.g. `beta` or `1.2.345`).<br/><br/>\n If not provided, the run uses build tag or number from the default actor run configuration (typically `latest`)."
},
"actorOrTaskInput": {
"title": "Actor or Task input",
"type": "object",
"description": "Input for the actor. An object is expected, which will be stringified to JSON and its content type set to `application/json; charset=utf-8`."
},
"actorOrTaskDatasetIdOrName": {
"title": "Actor or Task output Dataset ID",
"pattern": "^[a-zA-Z0-9][a-zA-Z0-9-]*$",
"type": "string",
"description": "ID or name of the dataset that stores entries scraped by the given actor or task.<br/><br/>\n Either `actorOrTaskId` or `actorOrTaskDatasetIdOrName` MUST be given.<br/><br/>\n Default: Run's default dataset.<br/><br/>\n <strong>NOTE:<strong> Dataset name can only contain letters 'a' through 'z', the digits '0' through '9', and the hyphen ('-') but only in the middle of the string (e.g. 'my-value-1').\n <a href=\"https://docs.apify.com/sdk/python/docs/concepts/storages#opening-named-and-unnamed-storages\">Learn more</a>"
},
"comparisonDatasetIdOrName": {
"title": "Comparison Dataset ID",
"pattern": "^[a-zA-Z0-9][a-zA-Z0-9-]*$",
"type": "string",
"description": "ID or name of the dataset that stores entries from previous runs used for comparison.\n <a href=\"https://docs.apify.com/sdk/python/docs/concepts/storages#opening-named-and-unnamed-storages\">Learn more</a><br/><br/>\n <strong>NOTE:<strong> Dataset name can only contain letters 'a' through 'z', the digits '0' through '9', and the hyphen ('-') but only in the middle of the string (e.g. 'my-value-1')"
},
"comparisonDatasetPrimaryKeys": {
"title": "Comparison - Primary keys",
"type": "array",
"description": "Define fields used for matching entries between scraped and comparison datasets.<br/><br/>\n <strong>NOTE:<strong> If not set, the entries are hashed based on all fields",
"items": {
"type": "string"
}
},
"comparisonDatasetRemoveStaleEntries": {
"title": "Comparison - Replace stale entries",
"type": "boolean",
"description": "Scraped entries naturally get stale (e.g. a job offer is closed and removed from website). In such case, the entries in the comparison dataset can no longer be found in the scraped dataset, so we can't use them for comparison anymore. <br/><br/>\n Instead, we can replace these \"stale\" entries, so that the next time we run the comparison, we will again be able to find all entries.<br/><br/>\n If `true`, stale entries are automatically replaced if detected.<br/><br/>\n You might want to set this to `false` if you have a referential dataset that you want to update manually.",
"default": true
},
"comparisonDatasetMaxEntries": {
"title": "Comparison - Max entries",
"minimum": 1,
"type": "integer",
"description": "How many entries should be stored in the comparison dataset.<br/><br/>\n Even with a dataset of thousands of entries, you should need only lower tens of entries to test the data integrity (assuming that these entries are well-diverse).",
"default": 20
},
"comparisonFieldsIgnore": {
"title": "Comparison - Ignored fields",
"type": "array",
"description": "Some fields may change with every run (e.g. extraction timestamp). Such fields should be ignored from the data integrity check to avoid false alerts.",
"items": {
"type": "string"
}
},
"comparisonFieldsWarn": {
"title": "Comparison - Warning fields",
"type": "array",
"description": "Some fields are either not as important, or their value may change more often than other fields (e.g. a job ad description may be corrected a few times over its lifetimes). You can mark such fields to be classified as \"warnings\" instead of \"errors\".",
"items": {
"type": "string"
}
},
"outputPickFields": {
"title": "Pick dataset fields",
"type": "array",
"description": "Select a subset of fields of an entry that will be pushed to the dataset.<br/><br/>\n If not set, all fields on an entry will be pushed to the dataset.<br/><br/>\n This is done before `outputRenameFields`.<br/><br/>\n Keys can be nested, e.g. `\"someProp.value[0]\"`.\n Nested path is resolved using <a href=\"https://lodash.com/docs/4.17.15#get\">Lodash.get()</a>.",
"items": {
"type": "string"
}
},
"outputRenameFields": {
"title": "Rename dataset fields",
"type": "object",
"description": "Rename fields (columns) of the output data.<br/><br/>\n If not set, all fields will have their original names.<br/><br/>\n This is done after `outputPickFields`.<br/><br/>\n Keys can be nested, e.g. `\"someProp.value[0]\"`.\n Nested path is resolved using <a href=\"https://lodash.com/docs/4.17.15#get\">Lodash.get()</a>."
},
"outputTransform": {
"title": "Transform entries",
"type": "string",
"description": "Freely transform the output data object using a custom function.<br/><br/>\n If not set, the data will remain as is.<br/><br/>\n This is done after `outputPickFields` and `outputRenameFields`.<br/><br/>\n The function has access to Apify's Actor class, and actor's input and a shared state in the second argument.<br/><br/>\n `async (entry, { Actor, input, state, itemCacheKey }) => { ... }`\n "
},
"outputTransformBefore": {
"title": "Transform entries - Setup",
"type": "string",
"description": "Use this if you need to run one-time initialization code before `outputTransform`.<br/><br/>\n The function has access to Apify's Actor class, and actor's input and a shared state in the first argument.<br/><br/>\n `async ({ Actor, input, state, itemCacheKey }) => { ... }`\n "
},
"outputTransformAfter": {
"title": "Transform entries - Teardown",
"type": "string",
"description": "Use this if you need to run one-time teardown code after `outputTransform`.<br/><br/>\n The function has access to Apify's Actor class, and actor's input and a shared state in the first argument.<br/><br/>\n `async ({ Actor, input, state, itemCacheKey }) => { ... }`\n "
},
"outputFilter": {
"title": "Filter entries",
"type": "string",
"description": "Decide which scraped entries should be included in the output by using a custom function.<br/><br/>\n If not set, all scraped entries will be included.<br/><br/>\n This is done after `outputPickFields`, `outputRenameFields`, and `outputTransform`.<br/><br/>\n The function has access to Apify's Actor class, and actor's input and a shared state in the second argument.<br/><br/>\n `async (entry, { Actor, input, state, itemCacheKey }) => boolean`\n "
},
"outputFilterBefore": {
"title": "Filter entries - Setup",
"type": "string",
"description": "Use this if you need to run one-time initialization code before `outputFilter`.<br/><br/>\n The function has access to Apify's Actor class, and actor's input and a shared state in the first argument.<br/><br/>\n `async (entry, { Actor, input, state, itemCacheKey }) => boolean`\n "
},
"outputFilterAfter": {
"title": "Filter entries - Teardown",
"type": "string",
"description": "Use this if you need to run one-time teardown code after `outputFilter`.<br/><br/>\n The function has access to Apify's Actor class, and actor's input and a shared state in the first argument.<br/><br/>\n `async ({ Actor, input, state, itemCacheKey }) => boolean`\n "
},
"outputDatasetIdOrName": {
"title": "Dataset ID or name",
"pattern": "^[a-zA-Z0-9][a-zA-Z0-9-]*$",
"type": "string",
"description": "By default, data is written to Default dataset.\n Set this option if you want to write data to non-default dataset.\n <a href=\"https://docs.apify.com/sdk/python/docs/concepts/storages#opening-named-and-unnamed-storages\">Learn more</a><br/><br/>\n <strong>NOTE:<strong> Dataset name can only contain letters 'a' through 'z', the digits '0' through '9', and the hyphen ('-') but only in the middle of the string (e.g. 'my-value-1')"
},
"outputCacheStoreIdOrName": {
"title": "Cache ID or name",
"pattern": "^[a-zA-Z0-9][a-zA-Z0-9-]*$",
"type": "string",
"description": "Set this option if you want to cache scraped entries in <a href=\"https://docs.apify.com/sdk/js/docs/guides/result-storage#key-value-store\">Apify's Key-value store</a>.<br/><br/>\n This is useful for example when you want to scrape only NEW entries. In such case, you can use the `outputFilter` option to define a custom function to filter out entries already found in the cache.\n <a href=\"https://docs.apify.com/sdk/python/docs/concepts/storages#working-with-key-value-stores\">Learn more</a><br/><br/>\n <strong>NOTE:<strong> Cache name can only contain letters 'a' through 'z', the digits '0' through '9', and the hyphen ('-') but only in the middle of the string (e.g. 'my-value-1')"
},
"outputCachePrimaryKeys": {
"title": "Cache primary keys",
"type": "array",
"description": "Specify fields that uniquely identify entries (primary keys), so entries can be compared against the cache.<br/><br/>\n <strong>NOTE:<strong> If not set, the entries are hashed based on all fields",
"items": {
"type": "string"
}
},
"outputCacheActionOnResult": {
"title": "Cache action on result",
"enum": [
"add",
"remove",
"overwrite"
],
"type": "string",
"description": "Specify whether scraped results should be added to, removed from, or overwrite the cache.<br/><br/>\n - <strong>add<strong> - Adds scraped results to the cache<br/><br/>\n - <strong>remove<strong> - Removes scraped results from the cache<br/><br/>\n - <strong>set<strong> - First clears all entries from the cache, then adds scraped results to the cache<br/><br/>\n <strong>NOTE:<strong> No action happens when this field is empty."
},
"metamorphActorId": {
"title": "Metamorph actor ID - metamorph to another actor at the end",
"type": "string",
"description": "Use this option if you want to run another actor with the same dataset after this actor has finished (AKA metamorph into another actor). <a href=\"https://docs.apify.com/sdk/python/docs/concepts/interacting-with-other-actors#actormetamorph\">Learn more</a> <br/><br/>New actor is identified by its ID, e.g. \"apify/web-scraper\"."
},
"metamorphActorBuild": {
"title": "Metamorph actor build",
"type": "string",
"description": "Tag or number of the target actor build to metamorph into (e.g. 'beta' or '1.2.345')"
},
"metamorphActorInput": {
"title": "Metamorph actor input",
"type": "object",
"description": "Input object passed to the follow-up (metamorph) actor. <a href=\"https://docs.apify.com/sdk/python/docs/concepts/interacting-with-other-actors#actormetamorph\">Learn more</a>"
}
}
},
"runsResponseSchema": {
"type": "object",
"properties": {
"data": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"actId": {
"type": "string"
},
"userId": {
"type": "string"
},
"startedAt": {
"type": "string",
"format": "date-time",
"example": "2025-01-08T00:00:00.000Z"
},
"finishedAt": {
"type": "string",
"format": "date-time",
"example": "2025-01-08T00:00:00.000Z"
},
"status": {
"type": "string",
"example": "READY"
},
"meta": {
"type": "object",
"properties": {
"origin": {
"type": "string",
"example": "API"
},
"userAgent": {
"type": "string"
}
}
},
"stats": {
"type": "object",
"properties": {
"inputBodyLen": {
"type": "integer",
"example": 2000
},
"rebootCount": {
"type": "integer",
"example": 0
},
"restartCount": {
"type": "integer",
"example": 0
},
"resurrectCount": {
"type": "integer",
"example": 0
},
"computeUnits": {
"type": "integer",
"example": 0
}
}
},
"options": {
"type": "object",
"properties": {
"build": {
"type": "string",
"example": "latest"
},
"timeoutSecs": {
"type": "integer",
"example": 300
},
"memoryMbytes": {
"type": "integer",
"example": 1024
},
"diskMbytes": {
"type": "integer",
"example": 2048
}
}
},
"buildId": {
"type": "string"
},
"defaultKeyValueStoreId": {
"type": "string"
},
"defaultDatasetId": {
"type": "string"
},
"defaultRequestQueueId": {
"type": "string"
},
"buildNumber": {
"type": "string",
"example": "1.0.0"
},
"containerUrl": {
"type": "string"
},
"usage": {
"type": "object",
"properties": {
"ACTOR_COMPUTE_UNITS": {
"type": "integer",
"example": 0
},
"DATASET_READS": {
"type": "integer",
"example": 0
},
"DATASET_WRITES": {
"type": "integer",
"example": 0
},
"KEY_VALUE_STORE_READS": {
"type": "integer",
"example": 0
},
"KEY_VALUE_STORE_WRITES": {
"type": "integer",
"example": 1
},
"KEY_VALUE_STORE_LISTS": {
"type": "integer",
"example": 0
},
"REQUEST_QUEUE_READS": {
"type": "integer",
"example": 0
},
"REQUEST_QUEUE_WRITES": {
"type": "integer",
"example": 0
},
"DATA_TRANSFER_INTERNAL_GBYTES": {
"type": "integer",
"example": 0
},
"DATA_TRANSFER_EXTERNAL_GBYTES": {
"type": "integer",
"example": 0
},
"PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
"type": "integer",
"example": 0
},
"PROXY_SERPS": {
"type": "integer",
"example": 0
}
}
},
"usageTotalUsd": {
"type": "number",
"example": 0.00005
},
"usageUsd": {
"type": "object",
"properties": {
"ACTOR_COMPUTE_UNITS": {
"type": "integer",
"example": 0
},
"DATASET_READS": {
"type": "integer",
"example": 0
},
"DATASET_WRITES": {
"type": "integer",
"example": 0
},
"KEY_VALUE_STORE_READS": {
"type": "integer",
"example": 0
},
"KEY_VALUE_STORE_WRITES": {
"type": "number",
"example": 0.00005
},
"KEY_VALUE_STORE_LISTS": {
"type": "integer",
"example": 0
},
"REQUEST_QUEUE_READS": {
"type": "integer",
"example": 0
},
"REQUEST_QUEUE_WRITES": {
"type": "integer",
"example": 0
},
"DATA_TRANSFER_INTERNAL_GBYTES": {
"type": "integer",
"example": 0
},
"DATA_TRANSFER_EXTERNAL_GBYTES": {
"type": "integer",
"example": 0
},
"PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
"type": "integer",
"example": 0
},
"PROXY_SERPS": {
"type": "integer",
"example": 0
}
}
}
}
}
}
}
}
}
}

Data Change Monitoring OpenAPI definition

OpenAPI is a standard for designing and describing RESTful APIs, allowing developers to define API structure, endpoints, and data formats in a machine-readable way. It simplifies API development, integration, and documentation.

OpenAPI is effective when used with AI agents and GPTs by standardizing how these systems interact with various APIs, for reliable integrations and efficient communication.

By defining machine-readable API specifications, OpenAPI allows AI models like GPTs to understand and use varied data sources, improving accuracy. This accelerates development, reduces errors, and provides context-aware responses, making OpenAPI a core component for AI applications.

You can download the OpenAPI definitions for Data Change Monitoring from the options below:

If you’d like to learn more about how OpenAPI powers GPTs, read our blog post.

You can also check out our other API clients: