You can access the Sonartext Speech To Text programmatically from your own applications by using the Apify API. You can also choose the language preference from below. To use the Apify API, you’ll need an Apify account and your API token, found in Integrations settings in Apify Console.

Python

JavaScript

CLI

OpenAPI

HTTP

MCP

{
  "openapi": "3.0.1",
  "info": {
    "version": "0.0",
    "x-build-id": "YIB2iiqhGzLc0GgTB"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/cheerful_jive~sonartext-speech-to-text/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-cheerful_jive-sonartext-speech-to-text",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/cheerful_jive~sonartext-speech-to-text/runs": {
      "post": {
        "operationId": "runs-sync-cheerful_jive-sonartext-speech-to-text",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/cheerful_jive~sonartext-speech-to-text/run-sync": {
      "post": {
        "operationId": "run-sync-cheerful_jive-sonartext-speech-to-text",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "required": [
          "inputMethod"
        ],
        "properties": {
          "inputMethod": {
            "title": "Input Method",
            "enum": [
              "file_upload",
              "url",
              "youtube",
              "twitter",
              "gdrive",
              "s3"
            ],
            "type": "string",
            "description": "How to provide the audio/video file",
            "default": "file_upload"
          },
          "audioFile": {
            "title": "Audio/Video File",
            "type": "string",
            "description": "Upload your audio or video file (up to 2GB). Required when Input Method is 'file_upload'."
          },
          "fileUrl": {
            "title": "File URL",
            "type": "string",
            "description": "Direct URL to audio/video file. Required when Input Method is 'url'."
          },
          "youtubeUrl": {
            "title": "YouTube URL",
            "type": "string",
            "description": "YouTube video URL (e.g. https://youtube.com/watch?v=...). Required when Input Method is 'youtube'."
          },
          "twitterUrl": {
            "title": "Twitter/X URL",
            "type": "string",
            "description": "Twitter or X post URL with video. Required when Input Method is 'twitter'."
          },
          "gdriveUrl": {
            "title": "Google Drive URL",
            "type": "string",
            "description": "Google Drive shareable link to audio/video file. Required when Input Method is 'gdrive'."
          },
          "s3Url": {
            "title": "AWS S3 URL",
            "type": "string",
            "description": "AWS S3 URL or presigned URL to file. Required when Input Method is 's3'."
          },
          "language": {
            "title": "Language",
            "enum": [
              "",
              "en",
              "es",
              "fr",
              "de",
              "it",
              "pt",
              "ru",
              "ja",
              "ko",
              "zh",
              "ar",
              "hi",
              "nl",
              "sv",
              "no",
              "da",
              "fi",
              "pl",
              "cs",
              "hu",
              "ro",
              "bg",
              "hr",
              "sk",
              "sl",
              "et",
              "lv",
              "lt"
            ],
            "type": "string",
            "description": "Language of the audio (leave blank for auto-detect)",
            "default": ""
          },
          "timestamps": {
            "title": "Timestamps",
            "enum": [
              "none",
              "segment",
              "word",
              "both"
            ],
            "type": "string",
            "description": "Include timestamps in the transcription",
            "default": "segment"
          },
          "speakerDiarization": {
            "title": "Speaker Diarization",
            "type": "boolean",
            "description": "Identify and separate different speakers in the audio",
            "default": false
          },
          "minSpeakers": {
            "title": "Minimum Speakers",
            "minimum": 1,
            "maximum": 10,
            "type": "integer",
            "description": "Minimum number of speakers expected (only used when Speaker Diarization is enabled)",
            "default": 1
          },
          "maxSpeakers": {
            "title": "Maximum Speakers",
            "minimum": 1,
            "maximum": 20,
            "type": "integer",
            "description": "Maximum number of speakers expected (only used when Speaker Diarization is enabled)",
            "default": 5
          },
          "responseFormat": {
            "title": "Response Format",
            "enum": [
              "json",
              "text",
              "srt",
              "vtt"
            ],
            "type": "string",
            "description": "Output format for the transcription",
            "default": "json"
          },
          "maxCostCents": {
            "title": "Maximum Cost (cents)",
            "minimum": 1,
            "maximum": 10000,
            "type": "integer",
            "description": "Optional cost limit in cents to prevent unexpected charges",
            "default": 500
          },
          "webhookUrl": {
            "title": "Webhook URL",
            "type": "string",
            "description": "Optional URL to receive completion notification"
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

Sonartext Speech To Text OpenAPI definition

OpenAPI is a standard for designing and describing RESTful APIs, allowing developers to define API structure, endpoints, and data formats in a machine-readable way. It simplifies API development, integration, and documentation.

OpenAPI is effective when used with AI agents and GPTs by standardizing how these systems interact with various APIs, for reliable integrations and efficient communication.

By defining machine-readable API specifications, OpenAPI allows AI models like GPTs to understand and use varied data sources, improving accuracy. This accelerates development, reduces errors, and provides context-aware responses, making OpenAPI a core component for AI applications.

You can download the OpenAPI definitions for Sonartext Speech To Text from the options below:

OpenAPI.json

If you’d like to learn more about how OpenAPI powers GPTs, read our blog post.

You can also check out our other API clients:

Sonartext Speech To Text API in Python

Sonartext Speech To Text API in JavaScript

Sonartext Speech To Text API through CLI

Sonartext Speech To Text API

Universal Speech to Text Transcriber

tictechid/vanzi-universal-transcriber

Transcribe audio from videos stored on Google Drive, Dropbox, GitHub raw, OneDrive, Box, iCloud, AWS S3, GCS, Azure Blob, and Backblaze B2. Convert share links to direct downloads for fast, accurate transcripts with timestamps and easy API integration.

TicTech

5.0

(1)

Speech to Text Converter (Transcript / Captcha)

saswave/speech-to-text-converter

Transform audio records to text. Get transcription from sales or customer success teams audio files. Get Captcha text from captcha audio challenge. Speech to text converter helps you analyse, build KPI with audio records and bypass captcha.

SASWAVE

Video Transcriber: Instagram, X, Facebook, TikTok

invideoiq/video-transcriber

Retrieves transcripts from online video content from multiple plateforms (Instagram, X, ..) using speech-to-text models. It delivers outputs in JSON and LLM-ready formats, making it ideal for analytics, and AI-based applications. Perfect for research and building intelligent conversational agents

InVideoIQ

141

4.0

(3)

🎵TikTok Video Transcriber - Multi-Language Translation

agentx/tiktok-video-transcriber

Unlock the power of TikTok with our specialized API! Extract accurate speech-to-text transcripts in 12 languages, featuring original and translated outputs.

AgentX

5.0

(2)

Video to Text Transcription

aizen0/video-to-text-transcription

Convert video speech to text in bulk. Supports Only Twitter/Instagram, auto-detects languages, handles large files automatically. Uses OpenAI Whisper for high accuracy.

Pratham Yadav

📺YouTube Video Transcriber - Content Intelligence API

agentx/youtube-video-transcriber

AI-powered speech-to-text extraction with 156+ languages, automated subtitle generation, and real-time processing. Professional YouTube transcript generator for content creators, businesses, and researchers.

AgentX

103

4.7

(3)

Instagram Content Intelligence Pro

sian.agency/instagram-content-intelligence-pro

Revolutionary AI system that delivers comprehensive speech-to-text transcription combined with premium data analytics. Pay only for successful results - no processing fees, no setup costs.

SIÁN OÜ

5.0

(1)

🏁 TikTok Video Transcriber & Downloader +12 Languages

ingeniela/tiktok-video-transcriber

Download TikTok videos without watermark & get AI transcriptions with timestamps. Extract subtitles, captions & keywords. Multi-language speech-to-text converter. Direct download links included.

Ingeniela

YouTube Subtitle Generator - Auto Transcribe & Translate

futurizerush/youtube-subtitle-generator

Generate subtitles from YouTube videos using OpenAI's AI models for speech-to-text transcription and translation to 15+ languages. Outputs SRT, TXT, and JSON formats. Note: Requires fresh cookies for each run (expire within minutes) - not suitable for automation.

Futurize Rush

5.0

(2)

Video Transcript Scraper: Youtube, X, Facebook, Tiktok, etc.

invideoiq/video-transcript-scraper

Scrapes transcripts from online video/audio content on multiple plateforms (Youtube, X, ..) in any available language. It delivers outputs in both JSON and LLM-ready formats, making it ideal for analytics, and AI-based applications. Perfect for research and building intelligent conversational agents

InVideoIQ

1.4K

4.1

(25)

TikTok | Instagram | Facebook | YouTube Shorts Transcriber

tictechid/anoxvanzi-Transcriber

Extract accurate transcripts from Instagram Reels, Facebook Reels, YouTube Shorts, and TikTok videos. Use video URLs to transcribe public content with timestamps. Export transcripts in JSON format, run via API, schedule runs, or integrate with other tools for automated transcription workflows.

TicTech

5.0

(7)