Sitemap Change Orchestrator avatar
Sitemap Change Orchestrator

Pricing

Pay per usage

Go to Store
Sitemap Change Orchestrator

Sitemap Change Orchestrator

Developed by

Tri⟁angle

Tri⟁angle

Maintained by Apify

Monitor website sitemaps for new, updated, or removed URLs. Integration with the Website Content Crawler (WCC) allows feeding only relevant URLs. This ensures your web crawls are efficient, targeted, and resource-optimized, keeping your datasets fresh for any application.

0.0 (0)

Pricing

Pay per usage

1

Total users

2

Monthly users

2

Runs succeeded

88%

Last modified

a day ago

You can access the Sitemap Change Orchestrator programmatically from your own applications by using the Apify API. You can also choose the language preference from below. To use the Apify API, you’ll need an Apify account and your API token, found in Integrations settings in Apify Console.

{
"openapi": "3.0.1",
"info": {
"version": "0.0",
"x-build-id": "nO4CTW350m82Ef5q2"
},
"servers": [
{
"url": "https://api.apify.com/v2"
}
],
"paths": {
"/acts/tri_angle~sitemap-change-orchestrator/run-sync-get-dataset-items": {
"post": {
"operationId": "run-sync-get-dataset-items-tri_angle-sitemap-change-orchestrator",
"x-openai-isConsequential": false,
"summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
"tags": [
"Run Actor"
],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/inputSchema"
}
}
}
},
"parameters": [
{
"name": "token",
"in": "query",
"required": true,
"schema": {
"type": "string"
},
"description": "Enter your Apify token here"
}
],
"responses": {
"200": {
"description": "OK"
}
}
}
},
"/acts/tri_angle~sitemap-change-orchestrator/runs": {
"post": {
"operationId": "runs-sync-tri_angle-sitemap-change-orchestrator",
"x-openai-isConsequential": false,
"summary": "Executes an Actor and returns information about the initiated run in response.",
"tags": [
"Run Actor"
],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/inputSchema"
}
}
}
},
"parameters": [
{
"name": "token",
"in": "query",
"required": true,
"schema": {
"type": "string"
},
"description": "Enter your Apify token here"
}
],
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/runsResponseSchema"
}
}
}
}
}
}
},
"/acts/tri_angle~sitemap-change-orchestrator/run-sync": {
"post": {
"operationId": "run-sync-tri_angle-sitemap-change-orchestrator",
"x-openai-isConsequential": false,
"summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
"tags": [
"Run Actor"
],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/inputSchema"
}
}
}
},
"parameters": [
{
"name": "token",
"in": "query",
"required": true,
"schema": {
"type": "string"
},
"description": "Enter your Apify token here"
}
],
"responses": {
"200": {
"description": "OK"
}
}
}
}
},
"components": {
"schemas": {
"inputSchema": {
"type": "object",
"required": [
"wccInput"
],
"properties": {
"startUrls": {
"title": "Start URLs",
"type": "array",
"description": "List of start URLs to scrape. These can be direct sitemap urls or website on which the sitemaps are going to be found if the `discoverSitemaps` is enabled.",
"items": {
"type": "object",
"required": [
"url"
],
"properties": {
"url": {
"type": "string",
"title": "URL of a web page",
"format": "uri"
}
}
}
},
"discoverSitemaps": {
"title": "Discover sitemaps",
"type": "boolean",
"description": "If enabled, the actor will fetch each start URL's robots.txt and enqueue any sitemap URL it finds. This is useful if you don't want to enter direct sitemap URLs. Please note that this will only work if the website has robots.txt.",
"default": true
},
"changeTypes": {
"title": "Change types",
"type": "array",
"description": "Which change types to include in the output.",
"items": {
"type": "string",
"enum": [
"NEW",
"REMOVED",
"UPDATED",
"SAME"
]
},
"default": [
"NEW",
"UPDATED"
]
},
"snapshotKeyPrefix": {
"title": "Snapshot key prefix",
"type": "string",
"description": "Prefix for the snapshot record key stored in the snapshots key-value store, to separate runs by website or project.",
"default": "DEFAULT"
},
"urlFilterRegex": {
"title": "URL filter regex",
"type": "string",
"description": "Regex pattern to filter which URLs are included in the output and snapshot. This filter applies only to the final URLs and not to intermediate sitemap URLs."
},
"addRemovedUrlsToKvs": {
"title": "Add removed URLs to key-value store",
"type": "boolean",
"description": "If enabled, the actor will always also include URLs that were removed compared to the previous snapshot to the key-value store.",
"default": false
},
"proxyConfiguration": {
"title": "Proxy configuration",
"type": "object",
"description": "Proxy configuration used for crawling.",
"default": {
"useApifyProxy": true
}
},
"scdMemory": {
"title": "Memory",
"enum": [
"32768",
"16384",
"8192",
"4096",
"2048",
"1024",
"512"
],
"type": "string",
"description": "Amount of memory (RAM) allocated to the actor run in megabytes.",
"default": "4096"
},
"scdTimeout": {
"title": "Timeout",
"minimum": 0,
"type": "integer",
"description": "Timeout for the actor run in seconds. Zero value means there is no timeout, and the Actor runs until completion, or maybe infinitely. Default is 360,000 seconds (100 hours).",
"default": 360000
},
"wccInput": {
"title": "Input",
"type": "object",
"description": "Input JSON for the Website Content Crawler actor."
},
"addWccUrlsToScd": {
"title": "Search for sitemaps on start URLs from the WCC input",
"type": "boolean",
"description": "If enabled, start URLs from Website Content Crawler will be treated as start URLs defined within this orchestrator.",
"default": true
},
"wccMaxUrlsPerRun": {
"title": "Maximum URLs per run",
"minimum": 1,
"maximum": 50000,
"type": "integer",
"description": "How many URLs from the Sitemap Change Detecter is there going to be in a single Website Content Crawler run. Note that each run's default dataset will be merged and output after all runs complete.",
"default": 50000
},
"wccMemory": {
"title": "Memory",
"enum": [
"32768",
"16384",
"8192",
"4096",
"2048",
"1024",
"512"
],
"type": "string",
"description": "Amount of memory (RAM) allocated to the actor run in megabytes.",
"default": "4096"
},
"wccTimeout": {
"title": "Timeout",
"minimum": 0,
"type": "integer",
"description": "Timeout for the actor run in seconds. Zero value means there is no timeout, and the Actor runs until completion, or maybe infinitely. Default is 360,000 seconds (100 hours).",
"default": 360000
},
"skipWcc": {
"title": "Skip Website Content Crawler",
"type": "boolean",
"description": "If checked, the Website Content Crawler won't run after detecting sitemap changes. This is useful if you want to only initialize the sitemap snapshot without scraping the URLs.",
"default": false
}
}
},
"runsResponseSchema": {
"type": "object",
"properties": {
"data": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"actId": {
"type": "string"
},
"userId": {
"type": "string"
},
"startedAt": {
"type": "string",
"format": "date-time",
"example": "2025-01-08T00:00:00.000Z"
},
"finishedAt": {
"type": "string",
"format": "date-time",
"example": "2025-01-08T00:00:00.000Z"
},
"status": {
"type": "string",
"example": "READY"
},
"meta": {
"type": "object",
"properties": {
"origin": {
"type": "string",
"example": "API"
},
"userAgent": {
"type": "string"
}
}
},
"stats": {
"type": "object",
"properties": {
"inputBodyLen": {
"type": "integer",
"example": 2000
},
"rebootCount": {
"type": "integer",
"example": 0
},
"restartCount": {
"type": "integer",
"example": 0
},
"resurrectCount": {
"type": "integer",
"example": 0
},
"computeUnits": {
"type": "integer",
"example": 0
}
}
},
"options": {
"type": "object",
"properties": {
"build": {
"type": "string",
"example": "latest"
},
"timeoutSecs": {
"type": "integer",
"example": 300
},
"memoryMbytes": {
"type": "integer",
"example": 1024
},
"diskMbytes": {
"type": "integer",
"example": 2048
}
}
},
"buildId": {
"type": "string"
},
"defaultKeyValueStoreId": {
"type": "string"
},
"defaultDatasetId": {
"type": "string"
},
"defaultRequestQueueId": {
"type": "string"
},
"buildNumber": {
"type": "string",
"example": "1.0.0"
},
"containerUrl": {
"type": "string"
},
"usage": {
"type": "object",
"properties": {
"ACTOR_COMPUTE_UNITS": {
"type": "integer",
"example": 0
},
"DATASET_READS": {
"type": "integer",
"example": 0
},
"DATASET_WRITES": {
"type": "integer",
"example": 0
},
"KEY_VALUE_STORE_READS": {
"type": "integer",
"example": 0
},
"KEY_VALUE_STORE_WRITES": {
"type": "integer",
"example": 1
},
"KEY_VALUE_STORE_LISTS": {
"type": "integer",
"example": 0
},
"REQUEST_QUEUE_READS": {
"type": "integer",
"example": 0
},
"REQUEST_QUEUE_WRITES": {
"type": "integer",
"example": 0
},
"DATA_TRANSFER_INTERNAL_GBYTES": {
"type": "integer",
"example": 0
},
"DATA_TRANSFER_EXTERNAL_GBYTES": {
"type": "integer",
"example": 0
},
"PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
"type": "integer",
"example": 0
},
"PROXY_SERPS": {
"type": "integer",
"example": 0
}
}
},
"usageTotalUsd": {
"type": "number",
"example": 0.00005
},
"usageUsd": {
"type": "object",
"properties": {
"ACTOR_COMPUTE_UNITS": {
"type": "integer",
"example": 0
},
"DATASET_READS": {
"type": "integer",
"example": 0
},
"DATASET_WRITES": {
"type": "integer",
"example": 0
},
"KEY_VALUE_STORE_READS": {
"type": "integer",
"example": 0
},
"KEY_VALUE_STORE_WRITES": {
"type": "number",
"example": 0.00005
},
"KEY_VALUE_STORE_LISTS": {
"type": "integer",
"example": 0
},
"REQUEST_QUEUE_READS": {
"type": "integer",
"example": 0
},
"REQUEST_QUEUE_WRITES": {
"type": "integer",
"example": 0
},
"DATA_TRANSFER_INTERNAL_GBYTES": {
"type": "integer",
"example": 0
},
"DATA_TRANSFER_EXTERNAL_GBYTES": {
"type": "integer",
"example": 0
},
"PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
"type": "integer",
"example": 0
},
"PROXY_SERPS": {
"type": "integer",
"example": 0
}
}
}
}
}
}
}
}
}
}

Sitemap Change Orchestrator OpenAPI definition

OpenAPI is a standard for designing and describing RESTful APIs, allowing developers to define API structure, endpoints, and data formats in a machine-readable way. It simplifies API development, integration, and documentation.

OpenAPI is effective when used with AI agents and GPTs by standardizing how these systems interact with various APIs, for reliable integrations and efficient communication.

By defining machine-readable API specifications, OpenAPI allows AI models like GPTs to understand and use varied data sources, improving accuracy. This accelerates development, reduces errors, and provides context-aware responses, making OpenAPI a core component for AI applications.

You can download the OpenAPI definitions for Sitemap Change Orchestrator from the options below:

If you’d like to learn more about how OpenAPI powers GPTs, read our blog post.

You can also check out our other API clients: