Pricing

$25.00 / 1,000 transcript analyses

Try for free

Go to Apify Store

AI YouTube Transcript Analyzer

Try for free

Prompt + YouTube URL -> { ❤️‍🔥 }

Pricing

$25.00 / 1,000 transcript analyses

Rating

5.0

(2)

Developer

Hexa API

Actor stats

Bookmarked

Total users

Monthly active users

3 months ago

Last modified

.env.example

WORKER_BASE_URL=https://your-ai-yt-transcript-worker.your-subdomain.workers.dev
# Optional. Must match WORKER_AUTH_TOKEN on the worker if enabled.
WORKER_API_TOKEN=your-shared-secret

Dockerfile

FROM apify/actor-node:20

COPY package*.json ./

RUN npm --quiet set progress=false \
    && npm install --omit=dev --omit=optional \
    && echo "Installed NPM packages:" \
    && (npm list --omit=dev --all || true) \
    && echo "Finished installing NPM packages"

COPY . ./

CMD ["npm", "start"]

package.json

{
  "name": "ai-yt-transcript-actor",
  "version": "0.1.0",
  "private": true,
  "type": "module",
  "scripts": {
    "start": "node src/main.js"
  },
  "dependencies": {
    "apify": "^3.4.2"
  }
}

.actor/actor.json

{
  "actorSpecification": 1,
  "name": "youtube-transcript-ai-analyzer",
  "title": "AI YouTube Transcript Analyzer",
  "description": "Analyze a single YouTube transcript with a custom prompt and optional JSON schema, powered by Hexa API's Cloudflare Worker delivery layer.",
  "version": "0.1",
  "minMemoryMbytes": 256,
  "maxMemoryMbytes": 256,
  "buildTag": "latest",
  "dockerfile": "../Dockerfile",
  "input": "./input_schema.json",
  "output": "./output_schema.json",
  "storages": {
    "dataset": "./dataset_schema.json",
    "keyValueStore": "./key_value_store_schema.json"
  }
}

.actor/dataset_schema.json

{
  "actorSpecification": 1,
  "fields": {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "type": "object",
    "properties": {
      "result": {
        "title": "LLM result",
        "description": "The successful final analysis result returned by the worker. This may be plain text or structured JSON depending on the input.",
        "oneOf": [
          {
            "type": "string",
            "example": "The video focuses on..."
          },
          {
            "type": "object",
            "additionalProperties": true
          }
        ]
      }
    },
    "required": ["result"]
  },
  "views": {
    "overview": {
      "title": "Overview",
      "description": "Successful AI analysis results produced by the Actor.",
      "transformation": {
        "fields": ["result"]
      },
      "display": {
        "component": "table",
        "properties": {
          "result": {
            "label": "Result",
            "format": "text"
          }
        }
      }
    }
  }
}

.actor/input_schema.json

{
  "title": "AI YouTube Transcript Analyzer",
  "type": "object",
  "schemaVersion": 1,
  "properties": {
    "videoUrl": {
      "title": "YouTube video URL",
      "type": "string",
      "description": "One direct YouTube video URL. Supports watch, shorts, embed, and youtu.be links.",
      "editor": "textfield",
      "prefill": "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
    },
    "prompt": {
      "title": "Prompt",
      "type": "string",
      "description": "The analysis instruction that will be applied to the transcript.",
      "editor": "textarea",
      "prefill": "Summarize the video in one sentence."
    },
    "jsonschema": {
      "title": "JSON schema",
      "type": "object",
      "description": "Optional strict JSON schema for structured LLM output.",
      "editor": "json",
      "prefill": {
        "title": "video_summary",
        "type": "object",
        "additionalProperties": false,
        "properties": {
          "summary": {
            "type": "string"
          }
        },
        "required": ["summary"]
      }
    }
  },
  "required": ["videoUrl", "prompt"]
}

.actor/key_value_store_schema.json

{
  "actorKeyValueStoreSchemaVersion": 1,
  "title": "AI YouTube Transcript Analyzer key-value store",
  "description": "Structured records stored in the default key-value store for AI YouTube Transcript Analyzer runs.",
  "collections": {
    "runOutput": {
      "title": "Run output",
      "description": "The OUTPUT record containing the run summary, worker metadata, and any failure details from the run.",
      "key": "OUTPUT",
      "contentTypes": ["application/json"],
      "jsonSchema": {
        "$schema": "http://json-schema.org/draft-07/schema#",
        "type": "object",
        "properties": {
          "summary": {
            "type": "object",
            "properties": {
              "requested": {
                "type": "integer",
                "example": 1
              },
              "succeeded": {
                "type": "integer",
                "example": 1
              },
              "failed": {
                "type": "integer",
                "example": 0
              },
              "chunkCount": {
                "type": "integer",
                "example": 3
              },
              "fallbackUsed": {
                "type": "boolean",
                "example": false
              },
              "llmCallCount": {
                "type": "integer",
                "example": 4
              },
              "llmAttemptCount": {
                "type": "integer",
                "example": 5
              },
              "aiCallsRequested": {
                "type": "integer",
                "example": 4
              },
              "aiCallsCharged": {
                "type": "integer",
                "example": 4
              },
              "chargeLimitReached": {
                "type": "boolean",
                "example": false
              },
              "finalModel": {
                "type": ["string", "null"],
                "example": "mistralai/mistral-medium-3-instruct"
              }
            },
            "required": ["requested", "succeeded", "failed", "chunkCount", "fallbackUsed", "llmCallCount", "llmAttemptCount", "aiCallsRequested", "aiCallsCharged", "chargeLimitReached"]
          },
          "error": {
            "type": ["object", "null"],
            "properties": {
              "type": {
                "type": "string",
                "example": "TRANSCRIPT_NOT_FOUND"
              },
              "message": {
                "type": "string",
                "example": "Transcript is not available for this video."
              }
            },
            "required": ["type", "message"]
          },
          "meta": {
            "type": ["object", "null"],
            "additionalProperties": true
          }
        },
        "required": ["summary", "error", "meta"]
      }
    }
  }
}

.actor/output_schema.json

{
  "actorOutputSchemaVersion": 1,
  "title": "AI YouTube Transcript Analyzer output",
  "description": "Output locations for successful AI analysis results and the run summary generated by the Actor.",
  "properties": {
    "results": {
      "type": "string",
      "title": "Successful analysis results",
      "description": "Successful LLM results written to the run's default dataset. Each item contains a single `result` field.",
      "template": "{{links.apiDefaultDatasetUrl}}/items?view=overview"
    },
    "runOutput": {
      "type": "string",
      "title": "Run summary and worker metadata",
      "description": "The OUTPUT record in the default key-value store containing the run summary, worker metadata, and any error details.",
      "template": "{{links.apiDefaultKeyValueStoreUrl}}/records/OUTPUT"
    }
  }
}

src/input.js

1export function normalizeActorInput(input) {
2    const videoUrl = normalizeRequiredString(input?.videoUrl, 'videoUrl');
3    const prompt = normalizeRequiredString(input?.prompt, 'prompt');
4    const jsonschema = normalizeOptionalJsonObject(input?.jsonschema, 'jsonschema');
5
6    return {
7        videoUrl,
8        prompt,
9        ...(jsonschema ? { jsonschema } : {}),
10    };
11}
12
13function normalizeRequiredString(value, fieldName) {
14    if (typeof value !== 'string' || !value.trim()) {
15        throw new Error(`Input field "${fieldName}" must be a non-empty string.`);
16    }
17
18    return value.trim();
19}
20
21function normalizeOptionalJsonObject(value, fieldName) {
22    if (value === undefined || value === null) {
23        return undefined;
24    }
25
26    if (!value || typeof value !== 'object' || Array.isArray(value)) {
27        throw new Error(`Input field "${fieldName}" must be a JSON object when provided.`);
28    }
29
30    return value;
31}

src/main.js

1import { Actor } from 'apify';
2
3import { normalizeActorInput } from './input.js';
4import { fetchWorkerResult } from './workerClient.js';
5
6await Actor.main(async () => {
7    const input = await Actor.getInput() ?? {};
8    const normalizedInput = normalizeActorInput(input);
9    const workerResponse = await fetchWorkerResult(normalizedInput, process.env);
10    const succeeded = workerResponse.result?.status === 'ok';
11    const requestedTranscriptAnalyses = normalizeAiCallCount(
12        workerResponse.meta?.successfulLlmCallCount
13            ?? workerResponse.meta?.llmCallCount,
14    );
15    const chargeResult = await chargeForAiCalls(requestedTranscriptAnalyses);
16
17    if (succeeded) {
18        await Actor.pushData({
19            result: workerResponse.result.output,
20        });
21    }
22
23    await Actor.setValue('OUTPUT', {
24        summary: {
25            requested: 1,
26            succeeded: succeeded ? 1 : 0,
27            failed: succeeded ? 0 : 1,
28            chunkCount: workerResponse.meta?.chunkCount ?? 0,
29            fallbackUsed: Boolean(workerResponse.meta?.fallbackUsed),
30            llmCallCount: workerResponse.meta?.llmCallCount ?? 0,
31            llmAttemptCount: workerResponse.meta?.llmAttemptCount ?? 0,
32            aiCallsRequested: requestedTranscriptAnalyses,
33            aiCallsCharged: chargeResult.chargedCount,
34            chargeLimitReached: chargeResult.eventChargeLimitReached,
35            finalModel: workerResponse.meta?.finalModel ?? null,
36        },
37        error: succeeded ? null : workerResponse.result?.error ?? null,
38        meta: {
39            ...(workerResponse.meta ?? {}),
40            pricing: {
41                eventName: 'ai-call',
42                requestedAiCalls: requestedTranscriptAnalyses,
43                chargedAiCalls: chargeResult.chargedCount,
44                chargeLimitReached: chargeResult.eventChargeLimitReached,
45                skippedReason: chargeResult.skippedReason ?? null,
46            },
47        },
48    });
49
50    if (succeeded) {
51        console.log(
52            `Stored one successful LLM result in the default dataset. Requested ${requestedTranscriptAnalyses} transcript analysis charge(s), charged ${chargeResult.chargedCount}.`,
53        );
54        return;
55    }
56
57    console.warn(
58        `Worker returned an error result. Stored details in the OUTPUT record. Requested ${requestedTranscriptAnalyses} transcript analysis charge(s), charged ${chargeResult.chargedCount}.`,
59    );
60});
61
62function normalizeAiCallCount(value) {
63    const parsed = Number.parseInt(String(value ?? '0'), 10);
64    return Number.isFinite(parsed) && parsed > 0 ? parsed : 0;
65}
66
67async function chargeForAiCalls(requestedAiCalls) {
68    if (requestedAiCalls <= 0) {
69        return {
70            chargedCount: 0,
71            eventChargeLimitReached: false,
72            skippedReason: 'no-ai-calls',
73        };
74    }
75
76    try {
77        let chargedCount = 0;
78        let eventChargeLimitReached = false;
79
80        for (let index = 0; index < requestedAiCalls; index += 1) {
81            const chargeResult = await Actor.charge({
82                eventName: 'ai-call',
83            });
84
85            chargedCount += chargeResult.chargedCount;
86
87            if (chargeResult.eventChargeLimitReached) {
88                eventChargeLimitReached = true;
89                break;
90            }
91        }
92
93        if (chargedCount < requestedAiCalls) {
94            console.warn(
95                `Requested ${requestedAiCalls} ai-call charge(s), but only ${chargedCount} were charged within the user's limit.`,
96            );
97        }
98
99        return {
100            chargedCount,
101            eventChargeLimitReached,
102            skippedReason: null,
103        };
104    } catch (error) {
105        const message = error instanceof Error ? error.message : String(error);
106
107        console.warn(`Skipping ai-call charging. ${message}`);
108
109        return {
110            chargedCount: 0,
111            eventChargeLimitReached: false,
112            skippedReason: message,
113        };
114    }
115}

src/workerClient.js

1const DEFAULT_REQUEST_TIMEOUT_MS = 120_000;
2
3export async function fetchWorkerResult(input, env) {
4    const endpointUrl = buildWorkerEndpointUrl(env.WORKER_BASE_URL);
5    const requestTimeoutMs = Number.parseInt(
6        env.REQUEST_TIMEOUT_MS ?? `${DEFAULT_REQUEST_TIMEOUT_MS}`,
7        10,
8    );
9    const headers = {
10        'content-type': 'application/json',
11    };
12
13    if (env.WORKER_API_TOKEN) {
14        headers.authorization = `Bearer ${env.WORKER_API_TOKEN}`;
15    }
16
17    const controller = new AbortController();
18    const timeoutId = setTimeout(() => controller.abort('timeout'), requestTimeoutMs);
19
20    try {
21        const response = await fetch(endpointUrl, {
22            method: 'POST',
23            headers,
24            body: JSON.stringify(input),
25            signal: controller.signal,
26        });
27        const payload = await safeParseJson(response);
28
29        if (!response.ok) {
30            const errorType = payload?.error?.type ?? 'WORKER_ERROR';
31            const errorMessage = payload?.error?.message ?? `Worker request failed with status ${response.status}.`;
32            throw new Error(`${errorType}: ${errorMessage}`);
33        }
34
35        if (!payload || payload.success !== true || !payload.result || typeof payload.meta !== 'object') {
36            throw new Error('Worker returned an unexpected response payload.');
37        }
38
39        return payload;
40    } catch (error) {
41        const message = String(error?.message ?? '').toLowerCase();
42
43        if (message.includes('aborted') || message.includes('timeout')) {
44            throw new Error('Worker request timed out.');
45        }
46
47        throw error;
48    } finally {
49        clearTimeout(timeoutId);
50    }
51}
52
53function buildWorkerEndpointUrl(baseUrl) {
54    if (typeof baseUrl !== 'string' || !baseUrl.trim()) {
55        throw new Error('Missing required environment variable WORKER_BASE_URL.');
56    }
57
58    const normalizedBaseUrl = baseUrl.trim();
59
60    if (normalizedBaseUrl.endsWith('/analyze')) {
61        return normalizedBaseUrl;
62    }
63
64    return new URL('analyze', ensureTrailingSlash(normalizedBaseUrl)).toString();
65}
66
67function ensureTrailingSlash(value) {
68    return value.endsWith('/') ? value : `${value}/`;
69}
70
71async function safeParseJson(response) {
72    const contentType = response.headers.get('content-type') ?? '';
73
74    if (!contentType.includes('application/json')) {
75        return null;
76    }
77
78    try {
79        return await response.json();
80    } catch {
81        return null;
82    }
83}

Youtube Transcript Scraper

scrapium/youtube-transcript-scraper

Scrapium

Youtube Transcript Scraper

scrapebase/youtube-transcript-scraper

ScrapeBase

Youtube Transcript Scraper

scraperforge/youtube-transcript-scraper

ScraperForge

Youtube Transcript API

vivid_astronaut/youtube-transcript

Fabio Suizu

YouTube Transcript Scraper PRO

intelscrape/youtube-transcript-scraper-pro

YouTube Transcript Scraper PRO, YouTube transcript scraper, get YouTube transcripts, download YouTube captions, extract subtitles, YouTube comments scraper, YouTube video text extractor, YouTube API alternative, LLM training data, datasets, Whisper AI transcription, scrape YouTube transcripts