
RAG Web Browser
No credit card required

RAG Web Browser
No credit card required
Web browser for OpenAI Assistants API and RAG pipelines, similar to a web browser in ChatGPT. It queries Google Search, scrapes the top N pages from the results, and returns their cleaned content as Markdown for further processing by an LLM. It can also scrape individual URLs.
You can access the RAG Web Browser programmatically from your own applications by using the Apify API. You can choose the language preference from below. To use the Apify API, youโll need an Apify account and your API token, found in Integrations settings in Apify Console.
1{
2 "openapi": "3.0.1",
3 "info": {
4 "version": "1.0",
5 "x-build-id": "eAiYQ9kb63kbindrU"
6 },
7 "servers": [
8 {
9 "url": "https://api.apify.com/v2"
10 }
11 ],
12 "paths": {
13 "/acts/apify~rag-web-browser/run-sync-get-dataset-items": {
14 "post": {
15 "operationId": "run-sync-get-dataset-items-apify-rag-web-browser",
16 "x-openai-isConsequential": false,
17 "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
18 "tags": [
19 "Run Actor"
20 ],
21 "requestBody": {
22 "required": true,
23 "content": {
24 "application/json": {
25 "schema": {
26 "$ref": "#/components/schemas/inputSchema"
27 }
28 }
29 }
30 },
31 "parameters": [
32 {
33 "name": "token",
34 "in": "query",
35 "required": true,
36 "schema": {
37 "type": "string"
38 },
39 "description": "Enter your Apify token here"
40 }
41 ],
42 "responses": {
43 "200": {
44 "description": "OK"
45 }
46 }
47 }
48 },
49 "/acts/apify~rag-web-browser/runs": {
50 "post": {
51 "operationId": "runs-sync-apify-rag-web-browser",
52 "x-openai-isConsequential": false,
53 "summary": "Executes an Actor and returns information about the initiated run in response.",
54 "tags": [
55 "Run Actor"
56 ],
57 "requestBody": {
58 "required": true,
59 "content": {
60 "application/json": {
61 "schema": {
62 "$ref": "#/components/schemas/inputSchema"
63 }
64 }
65 }
66 },
67 "parameters": [
68 {
69 "name": "token",
70 "in": "query",
71 "required": true,
72 "schema": {
73 "type": "string"
74 },
75 "description": "Enter your Apify token here"
76 }
77 ],
78 "responses": {
79 "200": {
80 "description": "OK",
81 "content": {
82 "application/json": {
83 "schema": {
84 "$ref": "#/components/schemas/runsResponseSchema"
85 }
86 }
87 }
88 }
89 }
90 }
91 },
92 "/acts/apify~rag-web-browser/run-sync": {
93 "post": {
94 "operationId": "run-sync-apify-rag-web-browser",
95 "x-openai-isConsequential": false,
96 "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
97 "tags": [
98 "Run Actor"
99 ],
100 "requestBody": {
101 "required": true,
102 "content": {
103 "application/json": {
104 "schema": {
105 "$ref": "#/components/schemas/inputSchema"
106 }
107 }
108 }
109 },
110 "parameters": [
111 {
112 "name": "token",
113 "in": "query",
114 "required": true,
115 "schema": {
116 "type": "string"
117 },
118 "description": "Enter your Apify token here"
119 }
120 ],
121 "responses": {
122 "200": {
123 "description": "OK"
124 }
125 }
126 }
127 }
128 },
129 "components": {
130 "schemas": {
131 "inputSchema": {
132 "type": "object",
133 "required": [
134 "query"
135 ],
136 "properties": {
137 "query": {
138 "title": "Search term or URL",
139 "pattern": "[^\\s]+",
140 "type": "string",
141 "description": "Enter Google Search keywords or a URL of a specific web page. The keywords might include the [advanced search operators](https://blog.apify.com/how-to-scrape-google-like-a-pro/). Examples:\n\n- <code>san francisco weather</code>\n- <code>https://www.cnn.com</code>\n- <code>function calling site:openai.com</code>"
142 },
143 "maxResults": {
144 "title": "Maximum results",
145 "minimum": 1,
146 "maximum": 100,
147 "type": "integer",
148 "description": "The maximum number of top organic Google Search results whose web pages will be extracted. If `query` is a URL, then this field is ignored and the Actor only fetches the specific web page.",
149 "default": 3
150 },
151 "outputFormats": {
152 "title": "Output formats",
153 "type": "array",
154 "description": "Select one or more formats to which the target web pages will be extracted and saved in the resulting dataset.",
155 "items": {
156 "type": "string",
157 "enum": [
158 "text",
159 "markdown",
160 "html"
161 ],
162 "enumTitles": [
163 "Plain text",
164 "Markdown",
165 "HTML"
166 ]
167 },
168 "default": [
169 "markdown"
170 ]
171 },
172 "requestTimeoutSecs": {
173 "title": "Request timeout",
174 "minimum": 1,
175 "maximum": 300,
176 "type": "integer",
177 "description": "The maximum time in seconds available for the request, including querying Google Search and scraping the target web pages. For example, OpenAI allows only [45 seconds](https://platform.openai.com/docs/actions/production#timeouts) for custom actions. If a target page loading and extraction exceeds this timeout, the corresponding page will be skipped in results to ensure at least some results are returned within the timeout. If no page is extracted within the timeout, the whole request fails.",
178 "default": 40
179 },
180 "serpProxyGroup": {
181 "title": "SERP proxy group",
182 "enum": [
183 "GOOGLE_SERP",
184 "SHADER"
185 ],
186 "type": "string",
187 "description": "Enables overriding the default Apify Proxy group used for fetching Google Search results.",
188 "default": "GOOGLE_SERP"
189 },
190 "serpMaxRetries": {
191 "title": "SERP max retries",
192 "minimum": 0,
193 "maximum": 5,
194 "type": "integer",
195 "description": "The maximum number of times the Actor will retry fetching the Google Search results on error. If the last attempt fails, the entire request fails.",
196 "default": 2
197 },
198 "proxyConfiguration": {
199 "title": "Proxy configuration",
200 "type": "object",
201 "description": "Apify Proxy configuration used for scraping the target web pages.",
202 "default": {
203 "useApifyProxy": true
204 }
205 },
206 "removeElementsCssSelector": {
207 "title": "Remove HTML elements (CSS selector)",
208 "type": "string",
209 "description": "A CSS selector matching HTML elements that will be removed from the DOM, before converting it to text, Markdown, or saving as HTML. This is useful to skip irrelevant page content. The value must be a valid CSS selector as accepted by the `document.querySelectorAll()` function. \n\nBy default, the Actor removes common navigation elements, headers, footers, modals, scripts, and inline image. You can disable the removal by setting this value to some non-existent CSS selector like `dummy_keep_everything`.",
210 "default": "nav, footer, script, style, noscript, svg, img[src^='data:'],\n[role=\"alert\"],\n[role=\"banner\"],\n[role=\"dialog\"],\n[role=\"alertdialog\"],\n[role=\"region\"][aria-label*=\"skip\" i],\n[aria-modal=\"true\"]"
211 },
212 "htmlTransformer": {
213 "title": "HTML transformer",
214 "type": "string",
215 "description": "Specify how to transform the HTML to extract meaningful content without any extra fluff, like navigation or modals. The HTML transformation happens after removing and clicking the DOM elements.\n\n- **None** (default) - Only removes the HTML elements specified via 'Remove HTML elements' option.\n\n- **Readable text** - Extracts the main contents of the webpage, without navigation and other fluff.",
216 "default": "none"
217 },
218 "initialConcurrency": {
219 "title": "Initial browsing concurrency",
220 "minimum": 0,
221 "maximum": 50,
222 "type": "integer",
223 "description": "The initial number of web browsers running in parallel. The system automatically scales the number based on the CPU and memory usage, in the range specified by `minConcurrency` and `maxConcurrency`. If the initial value is `0`, the Actor picks the number automatically based on the available memory.",
224 "default": 4
225 },
226 "minConcurrency": {
227 "title": "Minimum browsing concurrency",
228 "minimum": 1,
229 "maximum": 50,
230 "type": "integer",
231 "description": "The minimum number of web browsers running in parallel.",
232 "default": 1
233 },
234 "maxConcurrency": {
235 "title": "Maximum browsing concurrency",
236 "minimum": 1,
237 "maximum": 100,
238 "type": "integer",
239 "description": "The maximum number of web browsers running in parallel.",
240 "default": 50
241 },
242 "maxRequestRetries": {
243 "title": "Target page max retries",
244 "minimum": 0,
245 "maximum": 3,
246 "type": "integer",
247 "description": "The maximum number of times the Actor will retry loading the target web page on error. If the last attempt fails, the page will be skipped in the results.",
248 "default": 1
249 },
250 "dynamicContentWaitSecs": {
251 "title": "Target page dynamic content timeout",
252 "type": "integer",
253 "description": "The maximum time in seconds to wait for dynamic page content to load. The Actor considers the web page as fully loaded once this time elapses or when the network becomes idle.",
254 "default": 10
255 },
256 "removeCookieWarnings": {
257 "title": "Remove cookie warnings",
258 "type": "boolean",
259 "description": "If enabled, the Actor attempts to close or remove cookie consent dialogs to improve the quality of extracted text. Note that this setting increases the latency.",
260 "default": true
261 },
262 "debugMode": {
263 "title": "Enable debug mode",
264 "type": "boolean",
265 "description": "If enabled, the Actor will store debugging information into the resulting dataset under the `debug` field.",
266 "default": false
267 }
268 }
269 },
270 "runsResponseSchema": {
271 "type": "object",
272 "properties": {
273 "data": {
274 "type": "object",
275 "properties": {
276 "id": {
277 "type": "string"
278 },
279 "actId": {
280 "type": "string"
281 },
282 "userId": {
283 "type": "string"
284 },
285 "startedAt": {
286 "type": "string",
287 "format": "date-time",
288 "example": "2025-01-08T00:00:00.000Z"
289 },
290 "finishedAt": {
291 "type": "string",
292 "format": "date-time",
293 "example": "2025-01-08T00:00:00.000Z"
294 },
295 "status": {
296 "type": "string",
297 "example": "READY"
298 },
299 "meta": {
300 "type": "object",
301 "properties": {
302 "origin": {
303 "type": "string",
304 "example": "API"
305 },
306 "userAgent": {
307 "type": "string"
308 }
309 }
310 },
311 "stats": {
312 "type": "object",
313 "properties": {
314 "inputBodyLen": {
315 "type": "integer",
316 "example": 2000
317 },
318 "rebootCount": {
319 "type": "integer",
320 "example": 0
321 },
322 "restartCount": {
323 "type": "integer",
324 "example": 0
325 },
326 "resurrectCount": {
327 "type": "integer",
328 "example": 0
329 },
330 "computeUnits": {
331 "type": "integer",
332 "example": 0
333 }
334 }
335 },
336 "options": {
337 "type": "object",
338 "properties": {
339 "build": {
340 "type": "string",
341 "example": "latest"
342 },
343 "timeoutSecs": {
344 "type": "integer",
345 "example": 300
346 },
347 "memoryMbytes": {
348 "type": "integer",
349 "example": 1024
350 },
351 "diskMbytes": {
352 "type": "integer",
353 "example": 2048
354 }
355 }
356 },
357 "buildId": {
358 "type": "string"
359 },
360 "defaultKeyValueStoreId": {
361 "type": "string"
362 },
363 "defaultDatasetId": {
364 "type": "string"
365 },
366 "defaultRequestQueueId": {
367 "type": "string"
368 },
369 "buildNumber": {
370 "type": "string",
371 "example": "1.0.0"
372 },
373 "containerUrl": {
374 "type": "string"
375 },
376 "usage": {
377 "type": "object",
378 "properties": {
379 "ACTOR_COMPUTE_UNITS": {
380 "type": "integer",
381 "example": 0
382 },
383 "DATASET_READS": {
384 "type": "integer",
385 "example": 0
386 },
387 "DATASET_WRITES": {
388 "type": "integer",
389 "example": 0
390 },
391 "KEY_VALUE_STORE_READS": {
392 "type": "integer",
393 "example": 0
394 },
395 "KEY_VALUE_STORE_WRITES": {
396 "type": "integer",
397 "example": 1
398 },
399 "KEY_VALUE_STORE_LISTS": {
400 "type": "integer",
401 "example": 0
402 },
403 "REQUEST_QUEUE_READS": {
404 "type": "integer",
405 "example": 0
406 },
407 "REQUEST_QUEUE_WRITES": {
408 "type": "integer",
409 "example": 0
410 },
411 "DATA_TRANSFER_INTERNAL_GBYTES": {
412 "type": "integer",
413 "example": 0
414 },
415 "DATA_TRANSFER_EXTERNAL_GBYTES": {
416 "type": "integer",
417 "example": 0
418 },
419 "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
420 "type": "integer",
421 "example": 0
422 },
423 "PROXY_SERPS": {
424 "type": "integer",
425 "example": 0
426 }
427 }
428 },
429 "usageTotalUsd": {
430 "type": "number",
431 "example": 0.00005
432 },
433 "usageUsd": {
434 "type": "object",
435 "properties": {
436 "ACTOR_COMPUTE_UNITS": {
437 "type": "integer",
438 "example": 0
439 },
440 "DATASET_READS": {
441 "type": "integer",
442 "example": 0
443 },
444 "DATASET_WRITES": {
445 "type": "integer",
446 "example": 0
447 },
448 "KEY_VALUE_STORE_READS": {
449 "type": "integer",
450 "example": 0
451 },
452 "KEY_VALUE_STORE_WRITES": {
453 "type": "number",
454 "example": 0.00005
455 },
456 "KEY_VALUE_STORE_LISTS": {
457 "type": "integer",
458 "example": 0
459 },
460 "REQUEST_QUEUE_READS": {
461 "type": "integer",
462 "example": 0
463 },
464 "REQUEST_QUEUE_WRITES": {
465 "type": "integer",
466 "example": 0
467 },
468 "DATA_TRANSFER_INTERNAL_GBYTES": {
469 "type": "integer",
470 "example": 0
471 },
472 "DATA_TRANSFER_EXTERNAL_GBYTES": {
473 "type": "integer",
474 "example": 0
475 },
476 "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
477 "type": "integer",
478 "example": 0
479 },
480 "PROXY_SERPS": {
481 "type": "integer",
482 "example": 0
483 }
484 }
485 }
486 }
487 }
488 }
489 }
490 }
491 }
492}
RAG Web Browser OpenAPI definition
OpenAPI is a standard for designing and describing RESTful APIs, allowing developers to define API structure, endpoints, and data formats in a machine-readable way. It simplifies API development, integration, and documentation.
OpenAPI is effective when used with AI agents and GPTs by standardizing how these systems interact with various APIs, for reliable integrations and efficient communication.
By defining machine-readable API specifications, OpenAPI allows AI models like GPTs to understand and use varied data sources, improving accuracy. This accelerates development, reduces errors, and provides context-aware responses, making OpenAPI a core component for AI applications.
You can download the OpenAPI definitions for RAG Web Browser from the options below:
If youโd like to learn more about how OpenAPI powers GPTs, read our blog post.
You can also check out our other API clients:
Actor Metrics
231 monthly users
-
49 bookmarks
78% runs succeeded
20 days response time
Created in Sep 2024
Modified 18 hours ago