Image to Text (OCR)
Pricing
$5.20 / 1,000 image ocrs
Go to Apify Store
Image to Text (OCR)
Extract text from images using Tesseract.js OCR engine. Supports 100+ languages, PDFs, and bulk image processing.
Image to Text (OCR)
Pricing
$5.20 / 1,000 image ocrs
Extract text from images using Tesseract.js OCR engine. Supports 100+ languages, PDFs, and bulk image processing.
You can access the Image to Text (OCR) programmatically from your own applications by using the Apify API. You can also choose the language preference from below. To use the Apify API, you’ll need an Apify account and your API token, found in Integrations settings in Apify Console.
{ "openapi": "3.0.1", "info": { "version": "1.0", "x-build-id": "t3qG6DrcgIpJXkViS" }, "servers": [ { "url": "https://api.apify.com/v2" } ], "paths": { "/acts/junipr~image-to-text/run-sync-get-dataset-items": { "post": { "operationId": "run-sync-get-dataset-items-junipr-image-to-text", "x-openai-isConsequential": false, "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.", "tags": [ "Run Actor" ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "$ref": "#/components/schemas/inputSchema" } } } }, "parameters": [ { "name": "token", "in": "query", "required": true, "schema": { "type": "string" }, "description": "Enter your Apify token here" } ], "responses": { "200": { "description": "OK" } } } }, "/acts/junipr~image-to-text/runs": { "post": { "operationId": "runs-sync-junipr-image-to-text", "x-openai-isConsequential": false, "summary": "Executes an Actor and returns information about the initiated run in response.", "tags": [ "Run Actor" ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "$ref": "#/components/schemas/inputSchema" } } } }, "parameters": [ { "name": "token", "in": "query", "required": true, "schema": { "type": "string" }, "description": "Enter your Apify token here" } ], "responses": { "200": { "description": "OK", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/runsResponseSchema" } } } } } } }, "/acts/junipr~image-to-text/run-sync": { "post": { "operationId": "run-sync-junipr-image-to-text", "x-openai-isConsequential": false, "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.", "tags": [ "Run Actor" ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "$ref": "#/components/schemas/inputSchema" } } } }, "parameters": [ { "name": "token", "in": "query", "required": true, "schema": { "type": "string" }, "description": "Enter your Apify token here" } ], "responses": { "200": { "description": "OK" } } } } }, "components": { "schemas": { "inputSchema": { "type": "object", "required": [ "images" ], "properties": { "images": { "title": "Images", "type": "array", "description": "List of images to process. Each entry must have either a 'url' (image URL) or 'kvStoreKey' (key in the actor's key-value store). Minimum 1, maximum 500.", "default": [ { "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/a/a7/Camponotus_flavomarginatus_ant.jpg/400px-Camponotus_flavomarginatus_ant.jpg" } ] }, "language": { "title": "Language", "type": "string", "description": "Tesseract language code for OCR. Common codes: 'eng' (English), 'fra' (French), 'deu' (German), 'spa' (Spanish), 'chi_sim' (Simplified Chinese), 'jpn' (Japanese), 'kor' (Korean), 'ara' (Arabic). Combine multiple: 'eng+fra'. Full list at https://tesseract-ocr.github.io/tessdoc/Data-Files.", "default": "eng" }, "ocrEngine": { "title": "OCR Engine", "enum": [ "lstm", "legacy", "combined" ], "type": "string", "description": "OCR engine mode. 'lstm' uses the neural network engine for best accuracy. 'legacy' uses the traditional engine — faster but less accurate. 'combined' uses both for highest accuracy but slowest speed.", "default": "lstm" }, "pageSegMode": { "title": "Page Segmentation Mode", "minimum": 0, "maximum": 13, "type": "integer", "description": "Controls how Tesseract segments the image. 3 (auto) works for most images. Use 6 for single text blocks, 7 for single lines, 8 for single words, 11 for sparse text, 1 for auto with orientation detection (good for rotated text).", "default": 3 }, "whitelist": { "title": "Character Whitelist", "type": "string", "description": "Only recognize these characters. Useful for extracting specific data types. Example: '0123456789.$,' for receipt amounts only. Leave empty to recognize all characters." }, "blacklist": { "title": "Character Blacklist", "type": "string", "description": "Never output these characters. Ignored if a whitelist is also specified." }, "preprocess": { "title": "Enable Preprocessing", "type": "boolean", "description": "Enable automatic image preprocessing (deskew, contrast enhancement, binarization) for improved OCR accuracy. Recommended for most images, especially scanned documents.", "default": true }, "deskew": { "title": "Deskew", "type": "boolean", "description": "Correct image rotation based on EXIF orientation data. Only applies when preprocessing is enabled.", "default": true }, "enhanceContrast": { "title": "Enhance Contrast", "type": "boolean", "description": "Normalize image histogram to enhance contrast for faded or low-contrast text. Only applies when preprocessing is enabled.", "default": true }, "binarize": { "title": "Binarize", "type": "boolean", "description": "Convert image to black and white using thresholding. Improves OCR on images with complex backgrounds. Only applies when preprocessing is enabled.", "default": true }, "scale": { "title": "Scale Factor", "minimum": 0.5, "maximum": 4, "type": "number", "description": "Scale the image before OCR. 2.0 doubles the size (improves accuracy on small text). Leave empty for auto-scaling (doubles if image width < 800px). Min: 0.5, Max: 4.0." }, "denoise": { "title": "Denoise", "type": "boolean", "description": "Apply median denoising filter for noisy or scanned images. Adds extra processing time. Recommended for low-quality scans.", "default": false }, "outputLevel": { "title": "Output Level", "enum": [ "text", "lines", "words", "full" ], "type": "string", "description": "Detail level of OCR output. 'text' returns plain extracted text only. 'lines' adds line-level confidence scores. 'words' adds word-level positions and confidence. 'full' includes complete structure with bounding boxes for blocks, paragraphs, lines, and words.", "default": "text" }, "minConfidence": { "title": "Minimum Confidence", "minimum": 0, "maximum": 100, "type": "integer", "description": "Minimum OCR confidence threshold (0-100). Words with confidence below this value are excluded from output. 0 includes all results. 70 keeps only high-confidence text.", "default": 0 }, "includeRawHocr": { "title": "Include Raw hOCR", "type": "boolean", "description": "Include the raw hOCR XML output from Tesseract in the 'hocr' field. hOCR contains full positional data in standard XML format for advanced processing. Only available with outputLevel 'full'.", "default": false }, "stripExtraWhitespace": { "title": "Strip Extra Whitespace", "type": "boolean", "description": "Collapse multiple consecutive spaces and newlines into single characters. Cleans up common OCR artifacts for cleaner output.", "default": true }, "maxConcurrency": { "title": "Max Concurrency", "minimum": 1, "maximum": 5, "type": "integer", "description": "Maximum number of images processed simultaneously. Tesseract is CPU-intensive — keep this low (2-3) to avoid memory issues on large batches. Min: 1, Max: 5.", "default": 2 }, "imageTimeout": { "title": "Image Download Timeout (ms)", "minimum": 5000, "maximum": 120000, "type": "integer", "description": "Timeout in milliseconds for downloading each source image. Increase for slow servers or large images. Min: 5000, Max: 120000.", "default": 30000 }, "ocrTimeout": { "title": "OCR Timeout (ms)", "minimum": 10000, "maximum": 300000, "type": "integer", "description": "Timeout in milliseconds for OCR processing per image. Increase for very complex, high-resolution images. Min: 10000, Max: 300000.", "default": 60000 } } }, "runsResponseSchema": { "type": "object", "properties": { "data": { "type": "object", "properties": { "id": { "type": "string" }, "actId": { "type": "string" }, "userId": { "type": "string" }, "startedAt": { "type": "string", "format": "date-time", "example": "2025-01-08T00:00:00.000Z" }, "finishedAt": { "type": "string", "format": "date-time", "example": "2025-01-08T00:00:00.000Z" }, "status": { "type": "string", "example": "READY" }, "meta": { "type": "object", "properties": { "origin": { "type": "string", "example": "API" }, "userAgent": { "type": "string" } } }, "stats": { "type": "object", "properties": { "inputBodyLen": { "type": "integer", "example": 2000 }, "rebootCount": { "type": "integer", "example": 0 }, "restartCount": { "type": "integer", "example": 0 }, "resurrectCount": { "type": "integer", "example": 0 }, "computeUnits": { "type": "integer", "example": 0 } } }, "options": { "type": "object", "properties": { "build": { "type": "string", "example": "latest" }, "timeoutSecs": { "type": "integer", "example": 300 }, "memoryMbytes": { "type": "integer", "example": 1024 }, "diskMbytes": { "type": "integer", "example": 2048 } } }, "buildId": { "type": "string" }, "defaultKeyValueStoreId": { "type": "string" }, "defaultDatasetId": { "type": "string" }, "defaultRequestQueueId": { "type": "string" }, "buildNumber": { "type": "string", "example": "1.0.0" }, "containerUrl": { "type": "string" }, "usage": { "type": "object", "properties": { "ACTOR_COMPUTE_UNITS": { "type": "integer", "example": 0 }, "DATASET_READS": { "type": "integer", "example": 0 }, "DATASET_WRITES": { "type": "integer", "example": 0 }, "KEY_VALUE_STORE_READS": { "type": "integer", "example": 0 }, "KEY_VALUE_STORE_WRITES": { "type": "integer", "example": 1 }, "KEY_VALUE_STORE_LISTS": { "type": "integer", "example": 0 }, "REQUEST_QUEUE_READS": { "type": "integer", "example": 0 }, "REQUEST_QUEUE_WRITES": { "type": "integer", "example": 0 }, "DATA_TRANSFER_INTERNAL_GBYTES": { "type": "integer", "example": 0 }, "DATA_TRANSFER_EXTERNAL_GBYTES": { "type": "integer", "example": 0 }, "PROXY_RESIDENTIAL_TRANSFER_GBYTES": { "type": "integer", "example": 0 }, "PROXY_SERPS": { "type": "integer", "example": 0 } } }, "usageTotalUsd": { "type": "number", "example": 0.00005 }, "usageUsd": { "type": "object", "properties": { "ACTOR_COMPUTE_UNITS": { "type": "integer", "example": 0 }, "DATASET_READS": { "type": "integer", "example": 0 }, "DATASET_WRITES": { "type": "integer", "example": 0 }, "KEY_VALUE_STORE_READS": { "type": "integer", "example": 0 }, "KEY_VALUE_STORE_WRITES": { "type": "number", "example": 0.00005 }, "KEY_VALUE_STORE_LISTS": { "type": "integer", "example": 0 }, "REQUEST_QUEUE_READS": { "type": "integer", "example": 0 }, "REQUEST_QUEUE_WRITES": { "type": "integer", "example": 0 }, "DATA_TRANSFER_INTERNAL_GBYTES": { "type": "integer", "example": 0 }, "DATA_TRANSFER_EXTERNAL_GBYTES": { "type": "integer", "example": 0 }, "PROXY_RESIDENTIAL_TRANSFER_GBYTES": { "type": "integer", "example": 0 }, "PROXY_SERPS": { "type": "integer", "example": 0 } } } } } } } } }}OpenAPI is a standard for designing and describing RESTful APIs, allowing developers to define API structure, endpoints, and data formats in a machine-readable way. It simplifies API development, integration, and documentation.
OpenAPI is effective when used with AI agents and GPTs by standardizing how these systems interact with various APIs, for reliable integrations and efficient communication.
By defining machine-readable API specifications, OpenAPI allows AI models like GPTs to understand and use varied data sources, improving accuracy. This accelerates development, reduces errors, and provides context-aware responses, making OpenAPI a core component for AI applications.
You can download the OpenAPI definitions for Image to Text (OCR) from the options below:
If you’d like to learn more about how OpenAPI powers GPTs, read our blog post.
You can also check out our other API clients: