AI YouTube Transcript Analyzer
Pricing
$25.00 / 1,000 transcript analyses
AI YouTube Transcript Analyzer
Pricing
$25.00 / 1,000 transcript analyses
WORKER_BASE_URL=https://your-ai-yt-transcript-worker.your-subdomain.workers.dev# Optional. Must match WORKER_AUTH_TOKEN on the worker if enabled.WORKER_API_TOKEN=your-shared-secretFROM apify/actor-node:20
COPY package*.json ./
RUN npm --quiet set progress=false \ && npm install --omit=dev --omit=optional \ && echo "Installed NPM packages:" \ && (npm list --omit=dev --all || true) \ && echo "Finished installing NPM packages"
COPY . ./
CMD ["npm", "start"]{ "name": "ai-yt-transcript-actor", "version": "0.1.0", "private": true, "type": "module", "scripts": { "start": "node src/main.js" }, "dependencies": { "apify": "^3.4.2" }}{ "actorSpecification": 1, "name": "youtube-transcript-ai-analyzer", "title": "AI YouTube Transcript Analyzer", "description": "Analyze a single YouTube transcript with a custom prompt and optional JSON schema, powered by Hexa API's Cloudflare Worker delivery layer.", "version": "0.1", "minMemoryMbytes": 256, "maxMemoryMbytes": 256, "buildTag": "latest", "dockerfile": "../Dockerfile", "input": "./input_schema.json", "output": "./output_schema.json", "storages": { "dataset": "./dataset_schema.json", "keyValueStore": "./key_value_store_schema.json" }}{ "actorSpecification": 1, "fields": { "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", "properties": { "result": { "title": "LLM result", "description": "The successful final analysis result returned by the worker. This may be plain text or structured JSON depending on the input.", "oneOf": [ { "type": "string", "example": "The video focuses on..." }, { "type": "object", "additionalProperties": true } ] } }, "required": ["result"] }, "views": { "overview": { "title": "Overview", "description": "Successful AI analysis results produced by the Actor.", "transformation": { "fields": ["result"] }, "display": { "component": "table", "properties": { "result": { "label": "Result", "format": "text" } } } } }}{ "title": "AI YouTube Transcript Analyzer", "type": "object", "schemaVersion": 1, "properties": { "videoUrl": { "title": "YouTube video URL", "type": "string", "description": "One direct YouTube video URL. Supports watch, shorts, embed, and youtu.be links.", "editor": "textfield", "prefill": "https://www.youtube.com/watch?v=dQw4w9WgXcQ" }, "prompt": { "title": "Prompt", "type": "string", "description": "The analysis instruction that will be applied to the transcript.", "editor": "textarea", "prefill": "Summarize the video in one sentence." }, "jsonschema": { "title": "JSON schema", "type": "object", "description": "Optional strict JSON schema for structured LLM output.", "editor": "json", "prefill": { "title": "video_summary", "type": "object", "additionalProperties": false, "properties": { "summary": { "type": "string" } }, "required": ["summary"] } } }, "required": ["videoUrl", "prompt"]}{ "actorKeyValueStoreSchemaVersion": 1, "title": "AI YouTube Transcript Analyzer key-value store", "description": "Structured records stored in the default key-value store for AI YouTube Transcript Analyzer runs.", "collections": { "runOutput": { "title": "Run output", "description": "The OUTPUT record containing the run summary, worker metadata, and any failure details from the run.", "key": "OUTPUT", "contentTypes": ["application/json"], "jsonSchema": { "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", "properties": { "summary": { "type": "object", "properties": { "requested": { "type": "integer", "example": 1 }, "succeeded": { "type": "integer", "example": 1 }, "failed": { "type": "integer", "example": 0 }, "chunkCount": { "type": "integer", "example": 3 }, "fallbackUsed": { "type": "boolean", "example": false }, "llmCallCount": { "type": "integer", "example": 4 }, "llmAttemptCount": { "type": "integer", "example": 5 }, "aiCallsRequested": { "type": "integer", "example": 4 }, "aiCallsCharged": { "type": "integer", "example": 4 }, "chargeLimitReached": { "type": "boolean", "example": false }, "finalModel": { "type": ["string", "null"], "example": "mistralai/mistral-medium-3-instruct" } }, "required": ["requested", "succeeded", "failed", "chunkCount", "fallbackUsed", "llmCallCount", "llmAttemptCount", "aiCallsRequested", "aiCallsCharged", "chargeLimitReached"] }, "error": { "type": ["object", "null"], "properties": { "type": { "type": "string", "example": "TRANSCRIPT_NOT_FOUND" }, "message": { "type": "string", "example": "Transcript is not available for this video." } }, "required": ["type", "message"] }, "meta": { "type": ["object", "null"], "additionalProperties": true } }, "required": ["summary", "error", "meta"] } } }}{ "actorOutputSchemaVersion": 1, "title": "AI YouTube Transcript Analyzer output", "description": "Output locations for successful AI analysis results and the run summary generated by the Actor.", "properties": { "results": { "type": "string", "title": "Successful analysis results", "description": "Successful LLM results written to the run's default dataset. Each item contains a single `result` field.", "template": "{{links.apiDefaultDatasetUrl}}/items?view=overview" }, "runOutput": { "type": "string", "title": "Run summary and worker metadata", "description": "The OUTPUT record in the default key-value store containing the run summary, worker metadata, and any error details.", "template": "{{links.apiDefaultKeyValueStoreUrl}}/records/OUTPUT" } }}1export function normalizeActorInput(input) {2 const videoUrl = normalizeRequiredString(input?.videoUrl, 'videoUrl');3 const prompt = normalizeRequiredString(input?.prompt, 'prompt');4 const jsonschema = normalizeOptionalJsonObject(input?.jsonschema, 'jsonschema');5
6 return {7 videoUrl,8 prompt,9 ...(jsonschema ? { jsonschema } : {}),10 };11}12
13function normalizeRequiredString(value, fieldName) {14 if (typeof value !== 'string' || !value.trim()) {15 throw new Error(`Input field "${fieldName}" must be a non-empty string.`);16 }17
18 return value.trim();19}20
21function normalizeOptionalJsonObject(value, fieldName) {22 if (value === undefined || value === null) {23 return undefined;24 }25
26 if (!value || typeof value !== 'object' || Array.isArray(value)) {27 throw new Error(`Input field "${fieldName}" must be a JSON object when provided.`);28 }29
30 return value;31}1import { Actor } from 'apify';2
3import { normalizeActorInput } from './input.js';4import { fetchWorkerResult } from './workerClient.js';5
6await Actor.main(async () => {7 const input = await Actor.getInput() ?? {};8 const normalizedInput = normalizeActorInput(input);9 const workerResponse = await fetchWorkerResult(normalizedInput, process.env);10 const succeeded = workerResponse.result?.status === 'ok';11 const requestedTranscriptAnalyses = normalizeAiCallCount(12 workerResponse.meta?.successfulLlmCallCount13 ?? workerResponse.meta?.llmCallCount,14 );15 const chargeResult = await chargeForAiCalls(requestedTranscriptAnalyses);16
17 if (succeeded) {18 await Actor.pushData({19 result: workerResponse.result.output,20 });21 }22
23 await Actor.setValue('OUTPUT', {24 summary: {25 requested: 1,26 succeeded: succeeded ? 1 : 0,27 failed: succeeded ? 0 : 1,28 chunkCount: workerResponse.meta?.chunkCount ?? 0,29 fallbackUsed: Boolean(workerResponse.meta?.fallbackUsed),30 llmCallCount: workerResponse.meta?.llmCallCount ?? 0,31 llmAttemptCount: workerResponse.meta?.llmAttemptCount ?? 0,32 aiCallsRequested: requestedTranscriptAnalyses,33 aiCallsCharged: chargeResult.chargedCount,34 chargeLimitReached: chargeResult.eventChargeLimitReached,35 finalModel: workerResponse.meta?.finalModel ?? null,36 },37 error: succeeded ? null : workerResponse.result?.error ?? null,38 meta: {39 ...(workerResponse.meta ?? {}),40 pricing: {41 eventName: 'ai-call',42 requestedAiCalls: requestedTranscriptAnalyses,43 chargedAiCalls: chargeResult.chargedCount,44 chargeLimitReached: chargeResult.eventChargeLimitReached,45 skippedReason: chargeResult.skippedReason ?? null,46 },47 },48 });49
50 if (succeeded) {51 console.log(52 `Stored one successful LLM result in the default dataset. Requested ${requestedTranscriptAnalyses} transcript analysis charge(s), charged ${chargeResult.chargedCount}.`,53 );54 return;55 }56
57 console.warn(58 `Worker returned an error result. Stored details in the OUTPUT record. Requested ${requestedTranscriptAnalyses} transcript analysis charge(s), charged ${chargeResult.chargedCount}.`,59 );60});61
62function normalizeAiCallCount(value) {63 const parsed = Number.parseInt(String(value ?? '0'), 10);64 return Number.isFinite(parsed) && parsed > 0 ? parsed : 0;65}66
67async function chargeForAiCalls(requestedAiCalls) {68 if (requestedAiCalls <= 0) {69 return {70 chargedCount: 0,71 eventChargeLimitReached: false,72 skippedReason: 'no-ai-calls',73 };74 }75
76 try {77 let chargedCount = 0;78 let eventChargeLimitReached = false;79
80 for (let index = 0; index < requestedAiCalls; index += 1) {81 const chargeResult = await Actor.charge({82 eventName: 'ai-call',83 });84
85 chargedCount += chargeResult.chargedCount;86
87 if (chargeResult.eventChargeLimitReached) {88 eventChargeLimitReached = true;89 break;90 }91 }92
93 if (chargedCount < requestedAiCalls) {94 console.warn(95 `Requested ${requestedAiCalls} ai-call charge(s), but only ${chargedCount} were charged within the user's limit.`,96 );97 }98
99 return {100 chargedCount,101 eventChargeLimitReached,102 skippedReason: null,103 };104 } catch (error) {105 const message = error instanceof Error ? error.message : String(error);106
107 console.warn(`Skipping ai-call charging. ${message}`);108
109 return {110 chargedCount: 0,111 eventChargeLimitReached: false,112 skippedReason: message,113 };114 }115}1const DEFAULT_REQUEST_TIMEOUT_MS = 120_000;2
3export async function fetchWorkerResult(input, env) {4 const endpointUrl = buildWorkerEndpointUrl(env.WORKER_BASE_URL);5 const requestTimeoutMs = Number.parseInt(6 env.REQUEST_TIMEOUT_MS ?? `${DEFAULT_REQUEST_TIMEOUT_MS}`,7 10,8 );9 const headers = {10 'content-type': 'application/json',11 };12
13 if (env.WORKER_API_TOKEN) {14 headers.authorization = `Bearer ${env.WORKER_API_TOKEN}`;15 }16
17 const controller = new AbortController();18 const timeoutId = setTimeout(() => controller.abort('timeout'), requestTimeoutMs);19
20 try {21 const response = await fetch(endpointUrl, {22 method: 'POST',23 headers,24 body: JSON.stringify(input),25 signal: controller.signal,26 });27 const payload = await safeParseJson(response);28
29 if (!response.ok) {30 const errorType = payload?.error?.type ?? 'WORKER_ERROR';31 const errorMessage = payload?.error?.message ?? `Worker request failed with status ${response.status}.`;32 throw new Error(`${errorType}: ${errorMessage}`);33 }34
35 if (!payload || payload.success !== true || !payload.result || typeof payload.meta !== 'object') {36 throw new Error('Worker returned an unexpected response payload.');37 }38
39 return payload;40 } catch (error) {41 const message = String(error?.message ?? '').toLowerCase();42
43 if (message.includes('aborted') || message.includes('timeout')) {44 throw new Error('Worker request timed out.');45 }46
47 throw error;48 } finally {49 clearTimeout(timeoutId);50 }51}52
53function buildWorkerEndpointUrl(baseUrl) {54 if (typeof baseUrl !== 'string' || !baseUrl.trim()) {55 throw new Error('Missing required environment variable WORKER_BASE_URL.');56 }57
58 const normalizedBaseUrl = baseUrl.trim();59
60 if (normalizedBaseUrl.endsWith('/analyze')) {61 return normalizedBaseUrl;62 }63
64 return new URL('analyze', ensureTrailingSlash(normalizedBaseUrl)).toString();65}66
67function ensureTrailingSlash(value) {68 return value.endsWith('/') ? value : `${value}/`;69}70
71async function safeParseJson(response) {72 const contentType = response.headers.get('content-type') ?? '';73
74 if (!contentType.includes('application/json')) {75 return null;76 }77
78 try {79 return await response.json();80 } catch {81 return null;82 }83}