AI YouTube Transcript Analyzer
Pricing
$25.00 / 1,000 transcript analyses
AI YouTube Transcript Analyzer
Pricing
$25.00 / 1,000 transcript analyses
WORKER_BASE_URL=https://your-ai-yt-transcript-worker.your-subdomain.workers.dev# Optional. Must match WORKER_AUTH_TOKEN on the worker if enabled.WORKER_API_TOKEN=your-shared-secretFROM apify/actor-node:20
COPY package*.json ./
RUN npm --quiet set progress=false \ && npm install --omit=dev --omit=optional \ && echo "Installed NPM packages:" \ && (npm list --omit=dev --all || true) \ && echo "Finished installing NPM packages"
COPY . ./
CMD ["npm", "start"]{ "name": "ai-yt-transcript-actor", "version": "0.1.0", "private": true, "type": "module", "scripts": { "start": "node src/main.js" }, "dependencies": { "apify": "^3.4.2" }}{ "actorSpecification": 1, "name": "youtube-transcript-ai-analyzer", "title": "AI YouTube Transcript Analyzer", "description": "Analyze a single YouTube transcript with a custom prompt and optional JSON schema, powered by Hexa API's Cloudflare Worker delivery layer.", "version": "0.1", "minMemoryMbytes": 256, "maxMemoryMbytes": 256, "buildTag": "latest", "dockerfile": "../Dockerfile", "input": "./input_schema.json", "output": "./output_schema.json", "storages": { "dataset": "./dataset_schema.json", "keyValueStore": "./key_value_store_schema.json" }}{ "actorSpecification": 1, "fields": { "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", "properties": { "result": { "title": "LLM result", "description": "The successful final analysis result returned by the worker. This may be plain text or structured JSON depending on the input.", "oneOf": [ { "type": "string", "example": "The video focuses on..." }, { "type": "object", "additionalProperties": true } ] } }, "required": ["result"] }, "views": { "overview": { "title": "Overview", "description": "Successful AI analysis results produced by the Actor.", "transformation": { "fields": ["result"] }, "display": { "component": "table", "properties": { "result": { "label": "Result", "format": "text" } } } } }}{ "title": "AI YouTube Transcript Analyzer", "type": "object", "schemaVersion": 1, "properties": { "videoUrl": { "title": "YouTube video URL", "type": "string", "description": "One direct YouTube video URL. Supports watch, shorts, embed, and youtu.be links.", "editor": "textfield", "prefill": "https://www.youtube.com/watch?v=dQw4w9WgXcQ" }, "prompt": { "title": "Prompt", "type": "string", "description": "The analysis instruction that will be applied to the transcript.", "editor": "textarea", "prefill": "Summarize the video in one sentence." }, "jsonschema": { "title": "JSON schema", "type": "object", "description": "Optional strict JSON schema for structured LLM output.", "editor": "json", "prefill": { "title": "video_summary", "type": "object", "additionalProperties": false, "properties": { "summary": { "type": "string" } }, "required": ["summary"] } } }, "required": ["videoUrl", "prompt"]}{ "actorKeyValueStoreSchemaVersion": 1, "title": "AI YouTube Transcript Analyzer key-value store", "description": "Structured records stored in the default key-value store for AI YouTube Transcript Analyzer runs.", "collections": { "runOutput": { "title": "Run output", "description": "The OUTPUT record containing the run summary, worker metadata, and any failure details from the run.", "key": "OUTPUT", "contentTypes": ["application/json"], "jsonSchema": { "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", "properties": { "summary": { "type": "object", "properties": { "requested": { "type": "integer", "example": 1 }, "succeeded": { "type": "integer", "example": 1 }, "failed": { "type": "integer", "example": 0 }, "chunkCount": { "type": "integer", "example": 3 }, "fallbackUsed": { "type": "boolean", "example": false }, "llmCallCount": { "type": "integer", "example": 4 }, "aiCallsRequested": { "type": "integer", "example": 4 }, "aiCallsCharged": { "type": "integer", "example": 4 }, "chargeLimitReached": { "type": "boolean", "example": false }, "finalModel": { "type": ["string", "null"], "example": "mistralai/mistral-medium-3-instruct" } }, "required": ["requested", "succeeded", "failed", "chunkCount", "fallbackUsed", "llmCallCount", "aiCallsRequested", "aiCallsCharged", "chargeLimitReached"] }, "error": { "type": ["object", "null"], "properties": { "type": { "type": "string", "example": "TRANSCRIPT_NOT_FOUND" }, "message": { "type": "string", "example": "Transcript is not available for this video." } }, "required": ["type", "message"] }, "meta": { "type": ["object", "null"], "additionalProperties": true } }, "required": ["summary", "error", "meta"] } } }}{ "actorOutputSchemaVersion": 1, "title": "AI YouTube Transcript Analyzer output", "description": "Output locations for successful AI analysis results and the run summary generated by the Actor.", "properties": { "results": { "type": "string", "title": "Successful analysis results", "description": "Successful LLM results written to the run's default dataset. Each item contains a single `result` field.", "template": "{{links.apiDefaultDatasetUrl}}/items?view=overview" }, "runOutput": { "type": "string", "title": "Run summary and worker metadata", "description": "The OUTPUT record in the default key-value store containing the run summary, worker metadata, and any error details.", "template": "{{links.apiDefaultKeyValueStoreUrl}}/records/OUTPUT" } }}1export function normalizeActorInput(input) {2 const videoUrl = normalizeRequiredString(input?.videoUrl, 'videoUrl');3 const prompt = normalizeRequiredString(input?.prompt, 'prompt');4 const jsonschema = normalizeOptionalJsonObject(input?.jsonschema, 'jsonschema');5
6 return {7 videoUrl,8 prompt,9 ...(jsonschema ? { jsonschema } : {}),10 };11}12
13function normalizeRequiredString(value, fieldName) {14 if (typeof value !== 'string' || !value.trim()) {15 throw new Error(`Input field "${fieldName}" must be a non-empty string.`);16 }17
18 return value.trim();19}20
21function normalizeOptionalJsonObject(value, fieldName) {22 if (value === undefined || value === null) {23 return undefined;24 }25
26 if (!value || typeof value !== 'object' || Array.isArray(value)) {27 throw new Error(`Input field "${fieldName}" must be a JSON object when provided.`);28 }29
30 return value;31}1import { Actor } from 'apify';2
3import { normalizeActorInput } from './input.js';4import { fetchWorkerResult } from './workerClient.js';5
6await Actor.main(async () => {7 const input = await Actor.getInput() ?? {};8 const normalizedInput = normalizeActorInput(input);9 const workerResponse = await fetchWorkerResult(normalizedInput, process.env);10 const succeeded = workerResponse.result?.status === 'ok';11 const requestedAiCalls = normalizeAiCallCount(workerResponse.meta?.llmCallCount);12 const chargeResult = await chargeForAiCalls(requestedAiCalls);13
14 if (succeeded) {15 await Actor.pushData({16 result: workerResponse.result.output,17 });18 }19
20 await Actor.setValue('OUTPUT', {21 summary: {22 requested: 1,23 succeeded: succeeded ? 1 : 0,24 failed: succeeded ? 0 : 1,25 chunkCount: workerResponse.meta?.chunkCount ?? 0,26 fallbackUsed: Boolean(workerResponse.meta?.fallbackUsed),27 llmCallCount: workerResponse.meta?.llmCallCount ?? 0,28 aiCallsRequested: requestedAiCalls,29 aiCallsCharged: chargeResult.chargedCount,30 chargeLimitReached: chargeResult.eventChargeLimitReached,31 finalModel: workerResponse.meta?.finalModel ?? null,32 },33 error: succeeded ? null : workerResponse.result?.error ?? null,34 meta: {35 ...(workerResponse.meta ?? {}),36 pricing: {37 eventName: 'ai-call',38 requestedAiCalls,39 chargedAiCalls: chargeResult.chargedCount,40 chargeLimitReached: chargeResult.eventChargeLimitReached,41 skippedReason: chargeResult.skippedReason ?? null,42 },43 },44 });45
46 if (succeeded) {47 console.log(48 `Stored one successful LLM result in the default dataset. Requested ${requestedAiCalls} AI call charge(s), charged ${chargeResult.chargedCount}.`,49 );50 return;51 }52
53 console.warn(54 `Worker returned an error result. Stored details in the OUTPUT record. Requested ${requestedAiCalls} AI call charge(s), charged ${chargeResult.chargedCount}.`,55 );56});57
58function normalizeAiCallCount(value) {59 const parsed = Number.parseInt(String(value ?? '0'), 10);60 return Number.isFinite(parsed) && parsed > 0 ? parsed : 0;61}62
63async function chargeForAiCalls(requestedAiCalls) {64 if (requestedAiCalls <= 0) {65 return {66 chargedCount: 0,67 eventChargeLimitReached: false,68 skippedReason: 'no-ai-calls',69 };70 }71
72 try {73 let chargedCount = 0;74 let eventChargeLimitReached = false;75
76 for (let index = 0; index < requestedAiCalls; index += 1) {77 const chargeResult = await Actor.charge({78 eventName: 'ai-call',79 });80
81 chargedCount += chargeResult.chargedCount;82
83 if (chargeResult.eventChargeLimitReached) {84 eventChargeLimitReached = true;85 break;86 }87 }88
89 if (chargedCount < requestedAiCalls) {90 console.warn(91 `Requested ${requestedAiCalls} ai-call charge(s), but only ${chargedCount} were charged within the user's limit.`,92 );93 }94
95 return {96 chargedCount,97 eventChargeLimitReached,98 skippedReason: null,99 };100 } catch (error) {101 const message = error instanceof Error ? error.message : String(error);102
103 console.warn(`Skipping ai-call charging. ${message}`);104
105 return {106 chargedCount: 0,107 eventChargeLimitReached: false,108 skippedReason: message,109 };110 }111}1const DEFAULT_REQUEST_TIMEOUT_MS = 120_000;2
3export async function fetchWorkerResult(input, env) {4 const endpointUrl = buildWorkerEndpointUrl(env.WORKER_BASE_URL);5 const requestTimeoutMs = Number.parseInt(6 env.REQUEST_TIMEOUT_MS ?? `${DEFAULT_REQUEST_TIMEOUT_MS}`,7 10,8 );9 const headers = {10 'content-type': 'application/json',11 };12
13 if (env.WORKER_API_TOKEN) {14 headers.authorization = `Bearer ${env.WORKER_API_TOKEN}`;15 }16
17 const controller = new AbortController();18 const timeoutId = setTimeout(() => controller.abort('timeout'), requestTimeoutMs);19
20 try {21 const response = await fetch(endpointUrl, {22 method: 'POST',23 headers,24 body: JSON.stringify(input),25 signal: controller.signal,26 });27 const payload = await safeParseJson(response);28
29 if (!response.ok) {30 const errorType = payload?.error?.type ?? 'WORKER_ERROR';31 const errorMessage = payload?.error?.message ?? `Worker request failed with status ${response.status}.`;32 throw new Error(`${errorType}: ${errorMessage}`);33 }34
35 if (!payload || payload.success !== true || !payload.result || typeof payload.meta !== 'object') {36 throw new Error('Worker returned an unexpected response payload.');37 }38
39 return payload;40 } catch (error) {41 const message = String(error?.message ?? '').toLowerCase();42
43 if (message.includes('aborted') || message.includes('timeout')) {44 throw new Error('Worker request timed out.');45 }46
47 throw error;48 } finally {49 clearTimeout(timeoutId);50 }51}52
53function buildWorkerEndpointUrl(baseUrl) {54 if (typeof baseUrl !== 'string' || !baseUrl.trim()) {55 throw new Error('Missing required environment variable WORKER_BASE_URL.');56 }57
58 const normalizedBaseUrl = baseUrl.trim();59
60 if (normalizedBaseUrl.endsWith('/analyze')) {61 return normalizedBaseUrl;62 }63
64 return new URL('analyze', ensureTrailingSlash(normalizedBaseUrl)).toString();65}66
67function ensureTrailingSlash(value) {68 return value.endsWith('/') ? value : `${value}/`;69}70
71async function safeParseJson(response) {72 const contentType = response.headers.get('content-type') ?? '';73
74 if (!contentType.includes('application/json')) {75 return null;76 }77
78 try {79 return await response.json();80 } catch {81 return null;82 }83}