Wayback Machine Archive Scraper
Pricing
$1.00 / 1,000 snapshot retrieveds
Wayback Machine Archive Scraper
Fetch historical snapshots of any webpage from the Internet Archive. Perfect for digital forensics and tracking deleted content.
Wayback Machine Archive Scraper
Pricing
$1.00 / 1,000 snapshot retrieveds
Fetch historical snapshots of any webpage from the Internet Archive. Perfect for digital forensics and tracking deleted content.
You can access the Wayback Machine Archive Scraper programmatically from your own applications by using the Apify API. You can also choose the language preference from below. To use the Apify API, you’ll need an Apify account and your API token, found in Integrations settings in Apify Console.
{ "openapi": "3.0.1", "info": { "version": "1.0", "x-build-id": "BUn7GdOvecqCPfCvI" }, "servers": [ { "url": "https://api.apify.com/v2" } ], "paths": { "/acts/andok~wayback-machine-scraper/run-sync-get-dataset-items": { "post": { "operationId": "run-sync-get-dataset-items-andok-wayback-machine-scraper", "x-openai-isConsequential": false, "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.", "tags": [ "Run Actor" ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "$ref": "#/components/schemas/inputSchema" } } } }, "parameters": [ { "name": "token", "in": "query", "required": true, "schema": { "type": "string" }, "description": "Enter your Apify token here" } ], "responses": { "200": { "description": "OK" } } } }, "/acts/andok~wayback-machine-scraper/runs": { "post": { "operationId": "runs-sync-andok-wayback-machine-scraper", "x-openai-isConsequential": false, "summary": "Executes an Actor and returns information about the initiated run in response.", "tags": [ "Run Actor" ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "$ref": "#/components/schemas/inputSchema" } } } }, "parameters": [ { "name": "token", "in": "query", "required": true, "schema": { "type": "string" }, "description": "Enter your Apify token here" } ], "responses": { "200": { "description": "OK", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/runsResponseSchema" } } } } } } }, "/acts/andok~wayback-machine-scraper/run-sync": { "post": { "operationId": "run-sync-andok-wayback-machine-scraper", "x-openai-isConsequential": false, "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.", "tags": [ "Run Actor" ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "$ref": "#/components/schemas/inputSchema" } } } }, "parameters": [ { "name": "token", "in": "query", "required": true, "schema": { "type": "string" }, "description": "Enter your Apify token here" } ], "responses": { "200": { "description": "OK" } } } } }, "components": { "schemas": { "inputSchema": { "type": "object", "required": [ "urls" ], "properties": { "urls": { "title": "URLs to Look Up", "type": "array", "description": "List of URLs to look up in the Wayback Machine. Each URL will return its archived snapshot history.", "default": [ "https://example.com" ], "items": { "type": "string" } }, "url": { "title": "Single URL (legacy)", "type": "string", "description": "Optional single URL for backwards compatibility. Will be merged with the URLs list above." }, "from": { "title": "From Date", "type": "string", "description": "Start date for the snapshot range. Accepts YYYY (e.g. 2020) or full timestamp YYYYMMDDhhmmss." }, "to": { "title": "To Date", "type": "string", "description": "End date for the snapshot range. Accepts YYYY (e.g. 2025) or full timestamp YYYYMMDDhhmmss." }, "limit": { "title": "Max Snapshots per URL", "minimum": 1, "maximum": 5000, "type": "integer", "description": "Maximum number of snapshots to return for each URL. Higher values increase run time.", "default": 50 }, "collapse": { "title": "Collapse Mode", "type": "string", "description": "Deduplicate snapshots using the Wayback CDX collapse parameter. Use \"digest\" to skip identical page versions, or \"timestamp:8\" to keep one per day.", "default": "digest" }, "filterStatus": { "title": "HTTP Status Filter", "type": "string", "description": "Filter snapshots by HTTP status code. Default \"statuscode:200\" returns only successful snapshots. Remove to include redirects and errors.", "default": "statuscode:200" }, "includeHtml": { "title": "Include Archived HTML", "type": "boolean", "description": "Fetch the full archived HTML content for the latest snapshot of each URL. Experimental — may fail for very large pages.", "default": false }, "timeoutSeconds": { "title": "Timeout (seconds)", "minimum": 1, "maximum": 120, "type": "integer", "description": "Per-request timeout for Wayback Machine API calls in seconds.", "default": 20 }, "concurrency": { "title": "Concurrency", "minimum": 1, "maximum": 25, "type": "integer", "description": "Number of URLs to process in parallel. Lower values are safer for large batches to avoid rate limiting.", "default": 5 } } }, "runsResponseSchema": { "type": "object", "properties": { "data": { "type": "object", "properties": { "id": { "type": "string" }, "actId": { "type": "string" }, "userId": { "type": "string" }, "startedAt": { "type": "string", "format": "date-time", "example": "2025-01-08T00:00:00.000Z" }, "finishedAt": { "type": "string", "format": "date-time", "example": "2025-01-08T00:00:00.000Z" }, "status": { "type": "string", "example": "READY" }, "meta": { "type": "object", "properties": { "origin": { "type": "string", "example": "API" }, "userAgent": { "type": "string" } } }, "stats": { "type": "object", "properties": { "inputBodyLen": { "type": "integer", "example": 2000 }, "rebootCount": { "type": "integer", "example": 0 }, "restartCount": { "type": "integer", "example": 0 }, "resurrectCount": { "type": "integer", "example": 0 }, "computeUnits": { "type": "integer", "example": 0 } } }, "options": { "type": "object", "properties": { "build": { "type": "string", "example": "latest" }, "timeoutSecs": { "type": "integer", "example": 300 }, "memoryMbytes": { "type": "integer", "example": 1024 }, "diskMbytes": { "type": "integer", "example": 2048 } } }, "buildId": { "type": "string" }, "defaultKeyValueStoreId": { "type": "string" }, "defaultDatasetId": { "type": "string" }, "defaultRequestQueueId": { "type": "string" }, "buildNumber": { "type": "string", "example": "1.0.0" }, "containerUrl": { "type": "string" }, "usage": { "type": "object", "properties": { "ACTOR_COMPUTE_UNITS": { "type": "integer", "example": 0 }, "DATASET_READS": { "type": "integer", "example": 0 }, "DATASET_WRITES": { "type": "integer", "example": 0 }, "KEY_VALUE_STORE_READS": { "type": "integer", "example": 0 }, "KEY_VALUE_STORE_WRITES": { "type": "integer", "example": 1 }, "KEY_VALUE_STORE_LISTS": { "type": "integer", "example": 0 }, "REQUEST_QUEUE_READS": { "type": "integer", "example": 0 }, "REQUEST_QUEUE_WRITES": { "type": "integer", "example": 0 }, "DATA_TRANSFER_INTERNAL_GBYTES": { "type": "integer", "example": 0 }, "DATA_TRANSFER_EXTERNAL_GBYTES": { "type": "integer", "example": 0 }, "PROXY_RESIDENTIAL_TRANSFER_GBYTES": { "type": "integer", "example": 0 }, "PROXY_SERPS": { "type": "integer", "example": 0 } } }, "usageTotalUsd": { "type": "number", "example": 0.00005 }, "usageUsd": { "type": "object", "properties": { "ACTOR_COMPUTE_UNITS": { "type": "integer", "example": 0 }, "DATASET_READS": { "type": "integer", "example": 0 }, "DATASET_WRITES": { "type": "integer", "example": 0 }, "KEY_VALUE_STORE_READS": { "type": "integer", "example": 0 }, "KEY_VALUE_STORE_WRITES": { "type": "number", "example": 0.00005 }, "KEY_VALUE_STORE_LISTS": { "type": "integer", "example": 0 }, "REQUEST_QUEUE_READS": { "type": "integer", "example": 0 }, "REQUEST_QUEUE_WRITES": { "type": "integer", "example": 0 }, "DATA_TRANSFER_INTERNAL_GBYTES": { "type": "integer", "example": 0 }, "DATA_TRANSFER_EXTERNAL_GBYTES": { "type": "integer", "example": 0 }, "PROXY_RESIDENTIAL_TRANSFER_GBYTES": { "type": "integer", "example": 0 }, "PROXY_SERPS": { "type": "integer", "example": 0 } } } } } } } } }}OpenAPI is a standard for designing and describing RESTful APIs, allowing developers to define API structure, endpoints, and data formats in a machine-readable way. It simplifies API development, integration, and documentation.
OpenAPI is effective when used with AI agents and GPTs by standardizing how these systems interact with various APIs, for reliable integrations and efficient communication.
By defining machine-readable API specifications, OpenAPI allows AI models like GPTs to understand and use varied data sources, improving accuracy. This accelerates development, reduces errors, and provides context-aware responses, making OpenAPI a core component for AI applications.
You can download the OpenAPI definitions for Wayback Machine Archive Scraper from the options below:
If you’d like to learn more about how OpenAPI powers GPTs, read our blog post.
You can also check out our other API clients: