Rag Pipeline
Pricing
from $5.00 / 1,000 results
Rag Pipeline
One-click RAG pipeline: chunks text, generates embeddings, and stores vectors in Pinecone or Qdrant. Provide your content and API keys -- the orchestrator handles the rest.
Rag Pipeline
Pricing
from $5.00 / 1,000 results
One-click RAG pipeline: chunks text, generates embeddings, and stores vectors in Pinecone or Qdrant. Provide your content and API keys -- the orchestrator handles the rest.
You can access the Rag Pipeline programmatically from your own applications by using the Apify API. You can also choose the language preference from below. To use the Apify API, you’ll need an Apify account and your API token, found in Integrations settings in Apify Console.
{ "openapi": "3.0.1", "info": { "version": "0.0", "x-build-id": "f1ctb0Bd8ZnkqttNA" }, "servers": [ { "url": "https://api.apify.com/v2" } ], "paths": { "/acts/labrat011~rag-pipeline/run-sync-get-dataset-items": { "post": { "operationId": "run-sync-get-dataset-items-labrat011-rag-pipeline", "x-openai-isConsequential": false, "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.", "tags": [ "Run Actor" ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "$ref": "#/components/schemas/inputSchema" } } } }, "parameters": [ { "name": "token", "in": "query", "required": true, "schema": { "type": "string" }, "description": "Enter your Apify token here" } ], "responses": { "200": { "description": "OK" } } } }, "/acts/labrat011~rag-pipeline/runs": { "post": { "operationId": "runs-sync-labrat011-rag-pipeline", "x-openai-isConsequential": false, "summary": "Executes an Actor and returns information about the initiated run in response.", "tags": [ "Run Actor" ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "$ref": "#/components/schemas/inputSchema" } } } }, "parameters": [ { "name": "token", "in": "query", "required": true, "schema": { "type": "string" }, "description": "Enter your Apify token here" } ], "responses": { "200": { "description": "OK", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/runsResponseSchema" } } } } } } }, "/acts/labrat011~rag-pipeline/run-sync": { "post": { "operationId": "run-sync-labrat011-rag-pipeline", "x-openai-isConsequential": false, "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.", "tags": [ "Run Actor" ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "$ref": "#/components/schemas/inputSchema" } } } }, "parameters": [ { "name": "token", "in": "query", "required": true, "schema": { "type": "string" }, "description": "Enter your Apify token here" } ], "responses": { "200": { "description": "OK" } } } } }, "components": { "schemas": { "inputSchema": { "type": "object", "required": [ "embedding_api_key", "vector_db_api_key", "index_name" ], "properties": { "text": { "title": "Text to Process", "type": "string", "description": "Plain text or Markdown content to chunk, embed, and store. For bulk processing from a crawler, use source_dataset_id instead." }, "source_dataset_id": { "title": "Source Dataset ID (from Crawler)", "type": "string", "description": "Apify dataset ID from a previous actor run (e.g., Website Content Crawler). Each item's text field will be chunked, embedded, and stored. Takes priority over the text input." }, "source_dataset_field": { "title": "Source Dataset Text Field", "type": "string", "description": "Which field to read from source dataset items. Common values: 'text', 'markdown', 'content'.", "default": "text" }, "chunking_strategy": { "title": "Chunking Strategy", "enum": [ "recursive", "markdown", "sentence" ], "type": "string", "description": "How to split content. 'recursive' (default): paragraphs, then sentences, then words. 'markdown': splits on headers. 'sentence': sentence boundaries only.", "default": "recursive" }, "chunk_size": { "title": "Chunk Size (tokens)", "minimum": 64, "maximum": 8192, "type": "integer", "description": "Target size for each chunk in tokens. Recommended: 256-1024 for most RAG use cases.", "default": 512 }, "chunk_overlap": { "title": "Chunk Overlap (tokens)", "minimum": 0, "maximum": 2048, "type": "integer", "description": "Overlapping tokens between consecutive chunks. Helps preserve context across boundaries.", "default": 64 }, "embedding_api_key": { "title": "Embedding API Key (OpenAI or Cohere)", "type": "string", "description": "Your OpenAI or Cohere API key for generating embeddings. Never logged or stored beyond this run." }, "embedding_provider": { "title": "Embedding Provider", "enum": [ "openai", "cohere" ], "type": "string", "description": "Which embedding provider to use.", "default": "openai" }, "embedding_model": { "title": "Embedding Model", "type": "string", "description": "Which embedding model to use. OpenAI: 'text-embedding-3-small' (1536d), 'text-embedding-3-large' (3072d). Cohere: 'embed-english-v3.0' (1024d), 'embed-multilingual-v3.0' (1024d).", "default": "text-embedding-3-small" }, "embedding_batch_size": { "title": "Embedding Batch Size", "minimum": 1, "maximum": 2048, "type": "integer", "description": "Texts per embedding API request. Higher = faster but more memory. OpenAI max: 2048, Cohere max: 96.", "default": 128 }, "vector_db_api_key": { "title": "Vector DB API Key", "type": "string", "description": "Your Pinecone or Qdrant API key. Never logged or stored beyond this run." }, "vector_db_provider": { "title": "Vector Database Provider", "enum": [ "pinecone", "qdrant" ], "type": "string", "description": "Which vector database to write to.", "default": "pinecone" }, "index_name": { "title": "Index / Collection Name", "type": "string", "description": "Pinecone: the index name. Qdrant: the collection name (auto-created if it doesn't exist)." }, "qdrant_url": { "title": "Qdrant Cluster URL", "type": "string", "description": "Qdrant Cloud only: your full cluster URL (e.g., 'https://xyz.us-west-1.aws.cloud.qdrant.io:6333'). Not needed for Pinecone." }, "pinecone_namespace": { "title": "Namespace (Pinecone only)", "type": "string", "description": "Pinecone namespace. Leave empty for the default namespace. Ignored for Qdrant.", "default": "" }, "qdrant_distance_metric": { "title": "Distance Metric (Qdrant only)", "enum": [ "Cosine", "Dot", "Euclid" ], "type": "string", "description": "Distance metric for Qdrant collection creation. Only used when auto-creating a collection.", "default": "Cosine" } } }, "runsResponseSchema": { "type": "object", "properties": { "data": { "type": "object", "properties": { "id": { "type": "string" }, "actId": { "type": "string" }, "userId": { "type": "string" }, "startedAt": { "type": "string", "format": "date-time", "example": "2025-01-08T00:00:00.000Z" }, "finishedAt": { "type": "string", "format": "date-time", "example": "2025-01-08T00:00:00.000Z" }, "status": { "type": "string", "example": "READY" }, "meta": { "type": "object", "properties": { "origin": { "type": "string", "example": "API" }, "userAgent": { "type": "string" } } }, "stats": { "type": "object", "properties": { "inputBodyLen": { "type": "integer", "example": 2000 }, "rebootCount": { "type": "integer", "example": 0 }, "restartCount": { "type": "integer", "example": 0 }, "resurrectCount": { "type": "integer", "example": 0 }, "computeUnits": { "type": "integer", "example": 0 } } }, "options": { "type": "object", "properties": { "build": { "type": "string", "example": "latest" }, "timeoutSecs": { "type": "integer", "example": 300 }, "memoryMbytes": { "type": "integer", "example": 1024 }, "diskMbytes": { "type": "integer", "example": 2048 } } }, "buildId": { "type": "string" }, "defaultKeyValueStoreId": { "type": "string" }, "defaultDatasetId": { "type": "string" }, "defaultRequestQueueId": { "type": "string" }, "buildNumber": { "type": "string", "example": "1.0.0" }, "containerUrl": { "type": "string" }, "usage": { "type": "object", "properties": { "ACTOR_COMPUTE_UNITS": { "type": "integer", "example": 0 }, "DATASET_READS": { "type": "integer", "example": 0 }, "DATASET_WRITES": { "type": "integer", "example": 0 }, "KEY_VALUE_STORE_READS": { "type": "integer", "example": 0 }, "KEY_VALUE_STORE_WRITES": { "type": "integer", "example": 1 }, "KEY_VALUE_STORE_LISTS": { "type": "integer", "example": 0 }, "REQUEST_QUEUE_READS": { "type": "integer", "example": 0 }, "REQUEST_QUEUE_WRITES": { "type": "integer", "example": 0 }, "DATA_TRANSFER_INTERNAL_GBYTES": { "type": "integer", "example": 0 }, "DATA_TRANSFER_EXTERNAL_GBYTES": { "type": "integer", "example": 0 }, "PROXY_RESIDENTIAL_TRANSFER_GBYTES": { "type": "integer", "example": 0 }, "PROXY_SERPS": { "type": "integer", "example": 0 } } }, "usageTotalUsd": { "type": "number", "example": 0.00005 }, "usageUsd": { "type": "object", "properties": { "ACTOR_COMPUTE_UNITS": { "type": "integer", "example": 0 }, "DATASET_READS": { "type": "integer", "example": 0 }, "DATASET_WRITES": { "type": "integer", "example": 0 }, "KEY_VALUE_STORE_READS": { "type": "integer", "example": 0 }, "KEY_VALUE_STORE_WRITES": { "type": "number", "example": 0.00005 }, "KEY_VALUE_STORE_LISTS": { "type": "integer", "example": 0 }, "REQUEST_QUEUE_READS": { "type": "integer", "example": 0 }, "REQUEST_QUEUE_WRITES": { "type": "integer", "example": 0 }, "DATA_TRANSFER_INTERNAL_GBYTES": { "type": "integer", "example": 0 }, "DATA_TRANSFER_EXTERNAL_GBYTES": { "type": "integer", "example": 0 }, "PROXY_RESIDENTIAL_TRANSFER_GBYTES": { "type": "integer", "example": 0 }, "PROXY_SERPS": { "type": "integer", "example": 0 } } } } } } } } }}OpenAPI is a standard for designing and describing RESTful APIs, allowing developers to define API structure, endpoints, and data formats in a machine-readable way. It simplifies API development, integration, and documentation.
OpenAPI is effective when used with AI agents and GPTs by standardizing how these systems interact with various APIs, for reliable integrations and efficient communication.
By defining machine-readable API specifications, OpenAPI allows AI models like GPTs to understand and use varied data sources, improving accuracy. This accelerates development, reduces errors, and provides context-aware responses, making OpenAPI a core component for AI applications.
You can download the OpenAPI definitions for Rag Pipeline from the options below:
If you’d like to learn more about how OpenAPI powers GPTs, read our blog post.
You can also check out our other API clients: