You can access the Dataset Schema Super Actor programmatically from your own applications by using the Apify API. You can also choose the language preference from below. To use the Apify API, you’ll need an Apify account and your API token, found in Integrations settings in Apify Console.

Python

JavaScript

CLI

OpenAPI

HTTP

MCP

{
  "openapi": "3.0.1",
  "info": {
    "version": "1.0",
    "x-build-id": "oGsnxJ1gsKhZLIub7"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/zuzka~dataset-schema-super-actor/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-zuzka-dataset-schema-super-actor",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/zuzka~dataset-schema-super-actor/runs": {
      "post": {
        "operationId": "runs-sync-zuzka-dataset-schema-super-actor",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/zuzka~dataset-schema-super-actor/run-sync": {
      "post": {
        "operationId": "run-sync-zuzka-dataset-schema-super-actor",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "required": [
          "actorTechnicalName"
        ],
        "properties": {
          "actorTechnicalName": {
            "title": "Actor Technical Name",
            "pattern": "^[a-zA-Z0-9_-]+/[a-zA-Z0-9_-]+$",
            "type": "string",
            "description": "The technical name of the Actor to generate schema for (e.g., 'compass/Instagram-Scraper')"
          },
          "generateInputs": {
            "title": "Generate Test Inputs",
            "type": "boolean",
            "description": "Generate test inputs for the Actor (Step 1)"
          },
          "generateSchema": {
            "title": "Generate Initial Schema",
            "type": "boolean",
            "description": "Generate initial dataset schema from Actor runs (Step 2)"
          },
          "existingMinimalInput": {
            "title": "Existing Minimal Input (JSON)",
            "type": "string",
            "description": "Provide existing minimal test input as JSON. Leave empty to use generated input."
          },
          "existingNormalInput": {
            "title": "Existing Normal Input (JSON)",
            "type": "string",
            "description": "Provide existing normal test input as JSON. Leave empty to use generated input."
          },
          "existingMaximalInput": {
            "title": "Existing Maximal Input (JSON)",
            "type": "string",
            "description": "Provide existing maximal test input as JSON. Leave empty to use generated input."
          },
          "existingEdgeInput": {
            "title": "Existing Edge Input (JSON)",
            "type": "string",
            "description": "Provide existing edge test input as JSON. Leave empty to use generated input."
          },
          "useRealDatasetIds": {
            "title": "Use Real Dataset IDs Instead",
            "type": "boolean",
            "description": "Generate schema from real Redash datasets instead of test inputs. Requires Redash credentials from Step 4."
          },
          "enhanceSchema": {
            "title": "Enhance Schema with AI",
            "type": "boolean",
            "description": "Enhance schema using Claude Sonnet 4 (Step 3)"
          },
          "existingEnhancedSchema": {
            "title": "Existing Enhanced Schema (JSON)",
            "type": "string",
            "description": "Provide existing enhanced schema as JSON to skip Step 3. Leave empty to generate new enhanced schema."
          },
          "generateViews": {
            "title": "Generate Views",
            "type": "boolean",
            "description": "Whether to generate dataset views in the schema. Skip if you already have views in the schema."
          },
          "validateSchema": {
            "title": "Validate Schema",
            "type": "boolean",
            "description": "Validate schema against real dataset data (Step 4)"
          },
          "daysBack": {
            "title": "Days Back for Validation",
            "minimum": 1,
            "maximum": 14,
            "type": "integer",
            "description": "Number of days back to look for datasets in validation",
            "default": 5
          },
          "maximumResults": {
            "title": "Maximum Results for Validation",
            "minimum": 1,
            "maximum": 100,
            "type": "integer",
            "description": "Maximum number of results to fetch for validation",
            "default": 10
          },
          "minimumResults": {
            "title": "Minimum Results for Validation",
            "minimum": 1,
            "maximum": 100,
            "type": "integer",
            "description": "Minimum number of results required for validation",
            "default": 1
          },
          "runsPerUser": {
            "title": "Runs Per User for Validation",
            "minimum": 1,
            "maximum": 10,
            "type": "integer",
            "description": "Number of runs per user to consider in validation",
            "default": 2
          },
          "maxResultsPerQuery": {
            "title": "Max results per query",
            "minimum": 0,
            "maximum": 300,
            "type": "integer",
            "description": "Maximum number of rows to fetch from Redash chart query (0 = unlimited)",
            "default": 100
          },
          "createPR": {
            "title": "Create GitHub PR",
            "type": "boolean",
            "description": "Create GitHub pull request with the schema (Step 5)"
          },
          "githubLink": {
            "title": "GitHub Repository Link",
            "pattern": "^https://github\\.com/[a-zA-Z0-9_-]+/[a-zA-Z0-9_-]+/?$",
            "type": "string",
            "description": "URL to the GitHub repository where the PR should be created"
          },
          "githubToken": {
            "title": "GitHub Personal Access Token",
            "type": "string",
            "description": "GitHub Personal Access Token for creating PRs"
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

Dataset Schema Super Actor OpenAPI definition

OpenAPI is a standard for designing and describing RESTful APIs, allowing developers to define API structure, endpoints, and data formats in a machine-readable way. It simplifies API development, integration, and documentation.

OpenAPI is effective when used with AI agents and GPTs by standardizing how these systems interact with various APIs, for reliable integrations and efficient communication.

By defining machine-readable API specifications, OpenAPI allows AI models like GPTs to understand and use varied data sources, improving accuracy. This accelerates development, reduces errors, and provides context-aware responses, making OpenAPI a core component for AI applications.

You can download the OpenAPI definitions for Dataset Schema Super Actor from the options below:

OpenAPI.json

If you’d like to learn more about how OpenAPI powers GPTs, read our blog post.

You can also check out our other API clients:

Dataset Schema Super Actor API in Python

Dataset Schema Super Actor API in JavaScript

Dataset Schema Super Actor API through CLI

Dataset Schema Super Actor API

INSTAGRAM SUPER FAST SCRAPER

macheta/instagram-super-fast-scraper

ALL INSTAGRAM DATA [ POSTS , COMMENTS , USER INFO ] WITH A SUPER FAST SPEED ...

Anass

Super Fast Google News Scraper (pay per result)

emt_crawler/super-fast-google-news-scraper-pay-per-result

Efficiently extract direct links to the latest Google News articles from the past 24 hours.

Danis Arthur

5.0

(1)

Flipkart Advanced Product Scraper

scrapeai/flipkart-advanced-product-scraper

An Apify actor that fetches structured product data directly from the Flipkart API. Search by keyword or category URL to extract details like product name, brand, price, MRP, discounts, ratings, reviews, specifications, and images—ideal for e-commerce research, price tracking, and market analysis.

ScrapeAI

5.0

(3)

$1/1K 🔥 Hiring.cafe Scraper PRO (By Search URL)

azzouzana/hiring-cafe-scraper-pro-by-search-url

#1 🔥 Super simple! Paste your hiring.cafe search URL get thousands of jobs in seconds! ⚡ Comprehensive: title, company, location, salary, description, apply link, requirements & much more! 120+ datapoints per job! Export to JSON, CSV, Excel or API. Grab your search URLs and you're good to go 🚀

Azzouzana

5.0

(5)

Merge, Dedup & Transform Datasets

lukaskrivka/dedup-datasets

The ultimate dataset processor. Extremely fast merging, deduplications & transformations all in a single run.

Lukáš Křivka

5.0

(1)

Stack Exchange Questions Search Scraper

parseforge/stackexchange-questions-scraper

Search questions across Stack Overflow, Server Fault, Super User, Mathematics, Cross Validated, Ask Ubuntu and 170+ other Stack Exchange sites. Returns title, full body, tags, score, view count, answer count, accepted answer, asker info, dates and direct URL. Filter by tag, score, sort or site.

ParseForge

Rotten Tomatoes Reviews Scraper

plowdata/rotten-tomatoes-reviews-scraper

Scrape Rotten Tomatoes critic and audience reviews from movie pages. Export quotes, star ratings, sentiment, publication metadata, verified flags, reaction counts, and raw API data without proxies.

Frederic

Super CSV Crawler

proloser/super-csv-crawler

Upload or remote CSV file parser, with ability to specify custom column names

Dean Sofer

Cardmarket Pokémon Trend Scraper

scrap_them_all/cardmarket-pokemon-trend-scraper

Scrape Cardmarket Pokémon Weekly Top Cards and Best Bargains. Get card name, expansion, current price, price change, rarity and product URL.

scrap_them_all

Bulk Linkedin Email Finder⚡ $0.8/1K Emails, Super Cheap.

snipercoder/bulk-linkedin-email-finder

|Input: linkedIn| |Output: Name, Email, Title, Company, etc. In Bulk| Perfect for Email campaigns, Data Enrichment, Linkedin/Linkedin sales nav Leads. ✅Forget Instantly.ai, apollo.io, and hunter.io, they are all to break the Bank.

Sniper Coder

1.1K

4.7

(8)

Linkedin Email Finder⚡ $1/1K Emails, Super Cheap.

snipercoder/linkedin-email-finder

|Input: linkedin| |Output: Name, Email, Title, Company, etc.| Perfect for Email campaigns, Data Enrichment, Linkedin/Linkedin sales nav Leads. ✅Forget Instantly.ai, apollo.io, and hunter.io, they are all to break the Bank.

Sniper Coder

518

4.7

(4)