Crawl4ai
Pricing
Pay per usage
Go to Apify Store
Crawl4ai
Extract page content (markdown/HTML/text), metadata, and link stats. Uses crawl4ai.
Crawl4ai
Pricing
Pay per usage
Extract page content (markdown/HTML/text), metadata, and link stats. Uses crawl4ai.
You can access the Crawl4ai programmatically from your own applications by using the Apify API. You can also choose the language preference from below. To use the Apify API, you’ll need an Apify account and your API token, found in Integrations settings in Apify Console.
{ "openapi": "3.0.1", "info": { "version": "0.0", "x-build-id": "imCF6jEyhbU3KTVZD" }, "servers": [ { "url": "https://api.apify.com/v2" } ], "paths": { "/acts/kael_odin~crawl4ai/run-sync-get-dataset-items": { "post": { "operationId": "run-sync-get-dataset-items-kael_odin-crawl4ai", "x-openai-isConsequential": false, "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.", "tags": [ "Run Actor" ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "$ref": "#/components/schemas/inputSchema" } } } }, "parameters": [ { "name": "token", "in": "query", "required": true, "schema": { "type": "string" }, "description": "Enter your Apify token here" } ], "responses": { "200": { "description": "OK" } } } }, "/acts/kael_odin~crawl4ai/runs": { "post": { "operationId": "runs-sync-kael_odin-crawl4ai", "x-openai-isConsequential": false, "summary": "Executes an Actor and returns information about the initiated run in response.", "tags": [ "Run Actor" ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "$ref": "#/components/schemas/inputSchema" } } } }, "parameters": [ { "name": "token", "in": "query", "required": true, "schema": { "type": "string" }, "description": "Enter your Apify token here" } ], "responses": { "200": { "description": "OK", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/runsResponseSchema" } } } } } } }, "/acts/kael_odin~crawl4ai/run-sync": { "post": { "operationId": "run-sync-kael_odin-crawl4ai", "x-openai-isConsequential": false, "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.", "tags": [ "Run Actor" ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "$ref": "#/components/schemas/inputSchema" } } } }, "parameters": [ { "name": "token", "in": "query", "required": true, "schema": { "type": "string" }, "description": "Enter your Apify token here" } ], "responses": { "200": { "description": "OK" } } } } }, "components": { "schemas": { "inputSchema": { "type": "object", "required": [ "startUrls" ], "properties": { "startUrls": { "title": "Start URLs", "type": "array", "description": "Starting URLs to visit.", "items": { "type": "string" } }, "maxPages": { "title": "Max Pages", "minimum": 1, "maximum": 10000, "type": "integer", "description": "Maximum pages to process in total.", "default": 50 }, "maxDepth": { "title": "Max Depth", "minimum": 0, "maximum": 10, "type": "integer", "description": "Maximum link depth from each start URL.", "default": 2 }, "concurrency": { "title": "Concurrency", "minimum": 1, "maximum": 50, "type": "integer", "description": "Number of concurrent tasks.", "default": 5 }, "requestTimeoutSecs": { "title": "Request Timeout (seconds)", "minimum": 5, "maximum": 600, "type": "integer", "description": "Timeout per page request.", "default": 60 }, "headless": { "title": "Headless", "type": "boolean", "description": "Run browser headless.", "default": true }, "useProxy": { "title": "Use Proxy", "type": "boolean", "description": "Enable Apify proxy for requests.", "default": false }, "proxyGroups": { "title": "Proxy Groups", "type": "array", "description": "Apify proxy groups to use.", "items": { "type": "string" } }, "extractMode": { "title": "Extract Mode", "enum": [ "markdown", "html", "text" ], "type": "string", "description": "Output format.", "default": "markdown" }, "maxResults": { "title": "Max Results", "minimum": 1, "maximum": 200000, "type": "integer", "description": "Maximum output items to push.", "default": 1000 }, "sameDomainOnly": { "title": "Same Domain Only", "type": "boolean", "description": "Only follow links within start URL domains.", "default": true }, "includePatterns": { "title": "Include URL Patterns", "type": "array", "description": "Only include URLs matching these regex patterns (optional).", "items": { "type": "string" } }, "excludePatterns": { "title": "Exclude URL Patterns", "type": "array", "description": "Exclude URLs matching these regex patterns.", "items": { "type": "string" } }, "maxRetries": { "title": "Max Retries", "minimum": 0, "maximum": 10, "type": "integer", "description": "Retry failed pages up to this count.", "default": 2 }, "retryBackoffSecs": { "title": "Retry Backoff (seconds)", "minimum": 0, "maximum": 120, "type": "integer", "description": "Base backoff in seconds, doubled each retry.", "default": 2 }, "maxRequestsPerMinute": { "title": "Max Requests Per Minute", "minimum": 0, "maximum": 6000, "type": "integer", "description": "Global rate limit. Set 0 for unlimited.", "default": 0 }, "enableStealth": { "title": "Enable Stealth", "type": "boolean", "description": "Enable stealth mode for tougher sites.", "default": false }, "userAgent": { "title": "User Agent", "type": "string", "description": "Custom user agent string (optional)." }, "cleanContent": { "title": "Clean Content", "type": "boolean", "description": "Remove navigation-heavy lines and normalize whitespace.", "default": true }, "includeRawContent": { "title": "Include Raw Content", "type": "boolean", "description": "Include unmodified content output in a separate field.", "default": false }, "maxContentChars": { "title": "Max Content Characters", "minimum": 0, "maximum": 500000, "type": "integer", "description": "Truncate content to this length (0 = unlimited).", "default": 0 }, "contentExcerptChars": { "title": "Content Excerpt Characters", "minimum": 0, "maximum": 5000, "type": "integer", "description": "Length of the content excerpt for quick previews.", "default": 300 }, "wordCountThreshold": { "title": "Word Count Threshold", "minimum": 0, "maximum": 1000, "type": "integer", "description": "Ignore text blocks with fewer words (0 = off). Reduces noise from empty or stub pages.", "default": 0 }, "virtualScrollSelector": { "title": "Virtual Scroll Selector", "type": "string", "description": "CSS selector for infinite-scroll container (e.g. #feed). When set, the crawler scrolls to load more content before extraction." }, "virtualScrollCount": { "title": "Virtual Scroll Count", "minimum": 1, "maximum": 100, "type": "integer", "description": "Max scroll steps when virtual scroll is enabled.", "default": 10 }, "waitUntil": { "title": "Wait Until", "enum": [ "domcontentloaded", "load", "networkidle" ], "type": "string", "description": "Page load strategy: domcontentloaded (fast), load (full load), or networkidle (SPA/slow sites).", "default": "domcontentloaded" }, "pageLoadWaitSecs": { "title": "Page Load Wait (seconds)", "minimum": 0, "maximum": 60, "type": "number", "description": "Extra delay in seconds after load before capturing HTML. Use for slow/SPA sites.", "default": 0 }, "waitForSelector": { "title": "Wait For Selector", "type": "string", "description": "CSS selector to wait for before extraction (e.g. .article-body or #main). Use css: or js: prefix for advanced conditions." }, "waitForTimeoutSecs": { "title": "Wait For Timeout (seconds)", "minimum": 1, "maximum": 300, "type": "integer", "description": "Max seconds to wait for Wait For Selector. Ignored if Wait For Selector is empty.", "default": 30 }, "cssSelector": { "title": "CSS Selector (extract region only)", "type": "string", "description": "Extract only content inside this CSS selector (e.g. main, .content, #article)." }, "crawlMode": { "title": "Crawl Mode", "enum": [ "full", "discover_only" ], "type": "string", "description": "full = extract content; discover_only = only URLs and links (no content, fast).", "default": "full" }, "includeLinkUrls": { "title": "Include Link URLs", "type": "boolean", "description": "Include links_internal and links_external arrays in each item (full mode only).", "default": false } } }, "runsResponseSchema": { "type": "object", "properties": { "data": { "type": "object", "properties": { "id": { "type": "string" }, "actId": { "type": "string" }, "userId": { "type": "string" }, "startedAt": { "type": "string", "format": "date-time", "example": "2025-01-08T00:00:00.000Z" }, "finishedAt": { "type": "string", "format": "date-time", "example": "2025-01-08T00:00:00.000Z" }, "status": { "type": "string", "example": "READY" }, "meta": { "type": "object", "properties": { "origin": { "type": "string", "example": "API" }, "userAgent": { "type": "string" } } }, "stats": { "type": "object", "properties": { "inputBodyLen": { "type": "integer", "example": 2000 }, "rebootCount": { "type": "integer", "example": 0 }, "restartCount": { "type": "integer", "example": 0 }, "resurrectCount": { "type": "integer", "example": 0 }, "computeUnits": { "type": "integer", "example": 0 } } }, "options": { "type": "object", "properties": { "build": { "type": "string", "example": "latest" }, "timeoutSecs": { "type": "integer", "example": 300 }, "memoryMbytes": { "type": "integer", "example": 1024 }, "diskMbytes": { "type": "integer", "example": 2048 } } }, "buildId": { "type": "string" }, "defaultKeyValueStoreId": { "type": "string" }, "defaultDatasetId": { "type": "string" }, "defaultRequestQueueId": { "type": "string" }, "buildNumber": { "type": "string", "example": "1.0.0" }, "containerUrl": { "type": "string" }, "usage": { "type": "object", "properties": { "ACTOR_COMPUTE_UNITS": { "type": "integer", "example": 0 }, "DATASET_READS": { "type": "integer", "example": 0 }, "DATASET_WRITES": { "type": "integer", "example": 0 }, "KEY_VALUE_STORE_READS": { "type": "integer", "example": 0 }, "KEY_VALUE_STORE_WRITES": { "type": "integer", "example": 1 }, "KEY_VALUE_STORE_LISTS": { "type": "integer", "example": 0 }, "REQUEST_QUEUE_READS": { "type": "integer", "example": 0 }, "REQUEST_QUEUE_WRITES": { "type": "integer", "example": 0 }, "DATA_TRANSFER_INTERNAL_GBYTES": { "type": "integer", "example": 0 }, "DATA_TRANSFER_EXTERNAL_GBYTES": { "type": "integer", "example": 0 }, "PROXY_RESIDENTIAL_TRANSFER_GBYTES": { "type": "integer", "example": 0 }, "PROXY_SERPS": { "type": "integer", "example": 0 } } }, "usageTotalUsd": { "type": "number", "example": 0.00005 }, "usageUsd": { "type": "object", "properties": { "ACTOR_COMPUTE_UNITS": { "type": "integer", "example": 0 }, "DATASET_READS": { "type": "integer", "example": 0 }, "DATASET_WRITES": { "type": "integer", "example": 0 }, "KEY_VALUE_STORE_READS": { "type": "integer", "example": 0 }, "KEY_VALUE_STORE_WRITES": { "type": "number", "example": 0.00005 }, "KEY_VALUE_STORE_LISTS": { "type": "integer", "example": 0 }, "REQUEST_QUEUE_READS": { "type": "integer", "example": 0 }, "REQUEST_QUEUE_WRITES": { "type": "integer", "example": 0 }, "DATA_TRANSFER_INTERNAL_GBYTES": { "type": "integer", "example": 0 }, "DATA_TRANSFER_EXTERNAL_GBYTES": { "type": "integer", "example": 0 }, "PROXY_RESIDENTIAL_TRANSFER_GBYTES": { "type": "integer", "example": 0 }, "PROXY_SERPS": { "type": "integer", "example": 0 } } } } } } } } }}OpenAPI is a standard for designing and describing RESTful APIs, allowing developers to define API structure, endpoints, and data formats in a machine-readable way. It simplifies API development, integration, and documentation.
OpenAPI is effective when used with AI agents and GPTs by standardizing how these systems interact with various APIs, for reliable integrations and efficient communication.
By defining machine-readable API specifications, OpenAPI allows AI models like GPTs to understand and use varied data sources, improving accuracy. This accelerates development, reduces errors, and provides context-aware responses, making OpenAPI a core component for AI applications.
You can download the OpenAPI definitions for Crawl4ai from the options below:
If you’d like to learn more about how OpenAPI powers GPTs, read our blog post.
You can also check out our other API clients: