Algolia Website Indexer avatar

Algolia Website Indexer

Try for free

No credit card required

Go to Store
Algolia Website Indexer

Algolia Website Indexer

apify/algolia-website-indexer
Try for free

No credit card required

The Indexer crawls recursively a website using the Puppeteer browser (headless Chrome) and indexes the selected pages to the Algolia index.

You can access the Algolia Website Indexer programmatically from your own applications by using the Apify API. You can choose the language preference from below. To use the Apify API, you’ll need an Apify account and your API token, found in Integrations settings in Apify Console.

1{
2  "openapi": "3.0.1",
3  "info": {
4    "version": "0.0",
5    "x-build-id": "7genhiZx9fqtjeScb"
6  },
7  "servers": [
8    {
9      "url": "https://api.apify.com/v2"
10    }
11  ],
12  "paths": {
13    "/acts/apify~algolia-website-indexer/run-sync-get-dataset-items": {
14      "post": {
15        "operationId": "run-sync-get-dataset-items-apify-algolia-website-indexer",
16        "x-openai-isConsequential": false,
17        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
18        "tags": [
19          "Run Actor"
20        ],
21        "requestBody": {
22          "required": true,
23          "content": {
24            "application/json": {
25              "schema": {
26                "$ref": "#/components/schemas/inputSchema"
27              }
28            }
29          }
30        },
31        "parameters": [
32          {
33            "name": "token",
34            "in": "query",
35            "required": true,
36            "schema": {
37              "type": "string"
38            },
39            "description": "Enter your Apify token here"
40          }
41        ],
42        "responses": {
43          "200": {
44            "description": "OK"
45          }
46        }
47      }
48    },
49    "/acts/apify~algolia-website-indexer/runs": {
50      "post": {
51        "operationId": "runs-sync-apify-algolia-website-indexer",
52        "x-openai-isConsequential": false,
53        "summary": "Executes an Actor and returns information about the initiated run in response.",
54        "tags": [
55          "Run Actor"
56        ],
57        "requestBody": {
58          "required": true,
59          "content": {
60            "application/json": {
61              "schema": {
62                "$ref": "#/components/schemas/inputSchema"
63              }
64            }
65          }
66        },
67        "parameters": [
68          {
69            "name": "token",
70            "in": "query",
71            "required": true,
72            "schema": {
73              "type": "string"
74            },
75            "description": "Enter your Apify token here"
76          }
77        ],
78        "responses": {
79          "200": {
80            "description": "OK",
81            "content": {
82              "application/json": {
83                "schema": {
84                  "$ref": "#/components/schemas/runsResponseSchema"
85                }
86              }
87            }
88          }
89        }
90      }
91    },
92    "/acts/apify~algolia-website-indexer/run-sync": {
93      "post": {
94        "operationId": "run-sync-apify-algolia-website-indexer",
95        "x-openai-isConsequential": false,
96        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
97        "tags": [
98          "Run Actor"
99        ],
100        "requestBody": {
101          "required": true,
102          "content": {
103            "application/json": {
104              "schema": {
105                "$ref": "#/components/schemas/inputSchema"
106              }
107            }
108          }
109        },
110        "parameters": [
111          {
112            "name": "token",
113            "in": "query",
114            "required": true,
115            "schema": {
116              "type": "string"
117            },
118            "description": "Enter your Apify token here"
119          }
120        ],
121        "responses": {
122          "200": {
123            "description": "OK"
124          }
125        }
126      }
127    }
128  },
129  "components": {
130    "schemas": {
131      "inputSchema": {
132        "type": "object",
133        "required": [
134          "algoliaAppId",
135          "algoliaApiKey",
136          "algoliaIndexName",
137          "startUrls"
138        ],
139        "properties": {
140          "algoliaAppId": {
141            "title": "Algolia App ID",
142            "type": "string",
143            "description": "Your Algolia Application ID"
144          },
145          "algoliaApiKey": {
146            "title": "Algolia API Key",
147            "type": "string",
148            "description": "Your Algolia API key"
149          },
150          "algoliaIndexName": {
151            "title": "Algolia Index Name",
152            "type": "string",
153            "description": "Your Algolia index name"
154          },
155          "crawlerName": {
156            "title": "Crawler Name",
157            "type": "string",
158            "description": "Crawler name, it updates/removes/adds pages into to index regarding this name. In this case, you can have more website in the index."
159          },
160          "startUrls": {
161            "title": "Start URLs",
162            "type": "array",
163            "description": "URLs where to start crawling",
164            "items": {
165              "type": "object",
166              "required": [
167                "url"
168              ],
169              "properties": {
170                "url": {
171                  "type": "string",
172                  "title": "URL of a web page",
173                  "format": "uri"
174                }
175              }
176            }
177          },
178          "selectors": {
179            "title": "Selectors",
180            "type": "array",
181            "description": "Pick selectors, which text content you want to index. Key is name of attribute and value is CSS selector.",
182            "items": {
183              "type": "object",
184              "required": [
185                "key",
186                "value"
187              ],
188              "properties": {
189                "key": {
190                  "type": "string",
191                  "title": "Key"
192                },
193                "value": {
194                  "type": "string",
195                  "title": "Value"
196                }
197              }
198            }
199          },
200          "requiredAttributes": {
201            "title": "Required attributes",
202            "type": "array",
203            "description": "Pick attributes, which are required in index. By default all attributes from selectors are required."
204          },
205          "waitForElement": {
206            "title": "Wait for element",
207            "type": "string",
208            "description": "Selector of element to wait on each page."
209          },
210          "additionalPageAttrs": {
211            "title": "Additional attributes",
212            "type": "object",
213            "description": "Pick additional attributes you want to attach to each record in index.",
214            "default": {}
215          },
216          "skipIndexUpdate": {
217            "title": "Skip Index Updated",
218            "type": "boolean",
219            "description": "If check crawler will not update Algolia index.",
220            "default": true
221          },
222          "pseudoUrls": {
223            "title": "Pseudo-URLs",
224            "type": "array",
225            "description": "Overrides default pseudoUrls",
226            "items": {
227              "type": "object",
228              "required": [
229                "purl"
230              ],
231              "properties": {
232                "purl": {
233                  "type": "string",
234                  "title": "Pseudo-URL of a web page"
235                }
236              }
237            }
238          },
239          "pageFunction": {
240            "title": "Page function",
241            "type": "string",
242            "description": "Overrides default pageFunction"
243          },
244          "clickableElements": {
245            "title": "Clickable elements",
246            "type": "string",
247            "description": "Overrides default clickableElements"
248          },
249          "keepUrlFragments": {
250            "title": "Keep URL fragments",
251            "type": "boolean",
252            "description": "Option to switch on enqueueing URL with URL fragments",
253            "default": false
254          },
255          "omitSearchParamsFromUrl": {
256            "title": "Omit search params from URLs",
257            "type": "boolean",
258            "description": "Option to switch off enqueueing with search params.",
259            "default": false
260          }
261        }
262      },
263      "runsResponseSchema": {
264        "type": "object",
265        "properties": {
266          "data": {
267            "type": "object",
268            "properties": {
269              "id": {
270                "type": "string"
271              },
272              "actId": {
273                "type": "string"
274              },
275              "userId": {
276                "type": "string"
277              },
278              "startedAt": {
279                "type": "string",
280                "format": "date-time",
281                "example": "2025-01-08T00:00:00.000Z"
282              },
283              "finishedAt": {
284                "type": "string",
285                "format": "date-time",
286                "example": "2025-01-08T00:00:00.000Z"
287              },
288              "status": {
289                "type": "string",
290                "example": "READY"
291              },
292              "meta": {
293                "type": "object",
294                "properties": {
295                  "origin": {
296                    "type": "string",
297                    "example": "API"
298                  },
299                  "userAgent": {
300                    "type": "string"
301                  }
302                }
303              },
304              "stats": {
305                "type": "object",
306                "properties": {
307                  "inputBodyLen": {
308                    "type": "integer",
309                    "example": 2000
310                  },
311                  "rebootCount": {
312                    "type": "integer",
313                    "example": 0
314                  },
315                  "restartCount": {
316                    "type": "integer",
317                    "example": 0
318                  },
319                  "resurrectCount": {
320                    "type": "integer",
321                    "example": 0
322                  },
323                  "computeUnits": {
324                    "type": "integer",
325                    "example": 0
326                  }
327                }
328              },
329              "options": {
330                "type": "object",
331                "properties": {
332                  "build": {
333                    "type": "string",
334                    "example": "latest"
335                  },
336                  "timeoutSecs": {
337                    "type": "integer",
338                    "example": 300
339                  },
340                  "memoryMbytes": {
341                    "type": "integer",
342                    "example": 1024
343                  },
344                  "diskMbytes": {
345                    "type": "integer",
346                    "example": 2048
347                  }
348                }
349              },
350              "buildId": {
351                "type": "string"
352              },
353              "defaultKeyValueStoreId": {
354                "type": "string"
355              },
356              "defaultDatasetId": {
357                "type": "string"
358              },
359              "defaultRequestQueueId": {
360                "type": "string"
361              },
362              "buildNumber": {
363                "type": "string",
364                "example": "1.0.0"
365              },
366              "containerUrl": {
367                "type": "string"
368              },
369              "usage": {
370                "type": "object",
371                "properties": {
372                  "ACTOR_COMPUTE_UNITS": {
373                    "type": "integer",
374                    "example": 0
375                  },
376                  "DATASET_READS": {
377                    "type": "integer",
378                    "example": 0
379                  },
380                  "DATASET_WRITES": {
381                    "type": "integer",
382                    "example": 0
383                  },
384                  "KEY_VALUE_STORE_READS": {
385                    "type": "integer",
386                    "example": 0
387                  },
388                  "KEY_VALUE_STORE_WRITES": {
389                    "type": "integer",
390                    "example": 1
391                  },
392                  "KEY_VALUE_STORE_LISTS": {
393                    "type": "integer",
394                    "example": 0
395                  },
396                  "REQUEST_QUEUE_READS": {
397                    "type": "integer",
398                    "example": 0
399                  },
400                  "REQUEST_QUEUE_WRITES": {
401                    "type": "integer",
402                    "example": 0
403                  },
404                  "DATA_TRANSFER_INTERNAL_GBYTES": {
405                    "type": "integer",
406                    "example": 0
407                  },
408                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
409                    "type": "integer",
410                    "example": 0
411                  },
412                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
413                    "type": "integer",
414                    "example": 0
415                  },
416                  "PROXY_SERPS": {
417                    "type": "integer",
418                    "example": 0
419                  }
420                }
421              },
422              "usageTotalUsd": {
423                "type": "number",
424                "example": 0.00005
425              },
426              "usageUsd": {
427                "type": "object",
428                "properties": {
429                  "ACTOR_COMPUTE_UNITS": {
430                    "type": "integer",
431                    "example": 0
432                  },
433                  "DATASET_READS": {
434                    "type": "integer",
435                    "example": 0
436                  },
437                  "DATASET_WRITES": {
438                    "type": "integer",
439                    "example": 0
440                  },
441                  "KEY_VALUE_STORE_READS": {
442                    "type": "integer",
443                    "example": 0
444                  },
445                  "KEY_VALUE_STORE_WRITES": {
446                    "type": "number",
447                    "example": 0.00005
448                  },
449                  "KEY_VALUE_STORE_LISTS": {
450                    "type": "integer",
451                    "example": 0
452                  },
453                  "REQUEST_QUEUE_READS": {
454                    "type": "integer",
455                    "example": 0
456                  },
457                  "REQUEST_QUEUE_WRITES": {
458                    "type": "integer",
459                    "example": 0
460                  },
461                  "DATA_TRANSFER_INTERNAL_GBYTES": {
462                    "type": "integer",
463                    "example": 0
464                  },
465                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
466                    "type": "integer",
467                    "example": 0
468                  },
469                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
470                    "type": "integer",
471                    "example": 0
472                  },
473                  "PROXY_SERPS": {
474                    "type": "integer",
475                    "example": 0
476                  }
477                }
478              }
479            }
480          }
481        }
482      }
483    }
484  }
485}

Algolia Website Indexer OpenAPI definition

OpenAPI is a standard for designing and describing RESTful APIs, allowing developers to define API structure, endpoints, and data formats in a machine-readable way. It simplifies API development, integration, and documentation.

OpenAPI is effective when used with AI agents and GPTs by standardizing how these systems interact with various APIs, for reliable integrations and efficient communication.

By defining machine-readable API specifications, OpenAPI allows AI models like GPTs to understand and use varied data sources, improving accuracy. This accelerates development, reduces errors, and provides context-aware responses, making OpenAPI a core component for AI applications.

You can download the OpenAPI definitions for Algolia Website Indexer from the options below:

If you’d like to learn more about how OpenAPI powers GPTs, read our blog post.

You can also check out our other API clients:

Developer
Maintained by Apify

Actor Metrics

  • 1 monthly user

  • 2 bookmarks

  • Created in Jul 2019

  • Modified 8 months ago