1import { ApifyClient } from 'apify-client';
2
3
4
5const client = new ApifyClient({
6 token: '<YOUR_API_TOKEN>',
7});
8
9
10const input = {
11 "text": `Retrieval-Augmented Generation (RAG) combines a language model with an external knowledge base. Instead of relying only on what the model memorized during training, RAG retrieves relevant chunks of text and feeds them to the model as context.
12
13To build a RAG system you first split your documents into chunks, create embeddings for each chunk, and store them in a vector database. At query time you embed the user's question, find the most similar chunks, and pass them to the model alongside the prompt.
14
15Chunking matters a lot. Chunks that are too large dilute relevance and waste tokens, while chunks that are too small lose context. A common starting point is around 1000 characters per chunk with a small overlap, so that ideas spanning a boundary are not lost between neighbouring chunks.`
16};
17
18
19const run = await client.actor("zenomastro/text-splitter-for-llm").call(input);
20
21
22console.log('Results from dataset');
23console.log(`💾 Check your data here: https://console.apify.com/storage/datasets/${run.defaultDatasetId}`);
24const { items } = await client.dataset(run.defaultDatasetId).listItems();
25items.forEach((item) => {
26 console.dir(item);
27});
28
29