You can access the Ai-ML-scraper programmatically from your own applications by using the Apify API. You can also choose the language preference from below. To use the Apify API, you’ll need an Apify account and your API token, found in Integrations settings in Apify Console.

Python

JavaScript

CLI

OpenAPI

HTTP

MCP

{
  "openapi": "3.0.1",
  "info": {
    "version": "0.0",
    "x-build-id": "fJhNUsObZt2eNhuwA"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/labrat011~ai-ml-scraper/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-labrat011-ai-ml-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/labrat011~ai-ml-scraper/runs": {
      "post": {
        "operationId": "runs-sync-labrat011-ai-ml-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/labrat011~ai-ml-scraper/run-sync": {
      "post": {
        "operationId": "run-sync-labrat011-ai-ml-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "required": [
          "mode"
        ],
        "properties": {
          "mode": {
            "title": "Scraping mode",
            "enum": [
              "search_models",
              "search_papers",
              "trending_papers"
            ],
            "type": "string",
            "description": "What type of AI/ML data to search for.",
            "default": "search_models"
          },
          "query": {
            "title": "Search query",
            "type": "string",
            "description": "Search keyword for models or papers. For arXiv, searches titles and abstracts."
          },
          "sort": {
            "title": "Sort by",
            "enum": [
              "downloads",
              "likes",
              "trending",
              "relevance",
              "submittedDate",
              "lastUpdatedDate"
            ],
            "type": "string",
            "description": "How to sort results. Options depend on the mode.",
            "default": "downloads"
          },
          "pipelineTag": {
            "title": "Pipeline task (Models only)",
            "enum": [
              "",
              "text-generation",
              "text-classification",
              "token-classification",
              "question-answering",
              "summarization",
              "translation",
              "fill-mask",
              "text2text-generation",
              "conversational",
              "text-to-image",
              "image-to-text",
              "image-classification",
              "object-detection",
              "image-segmentation",
              "automatic-speech-recognition",
              "text-to-speech",
              "audio-classification",
              "feature-extraction",
              "sentence-similarity",
              "zero-shot-classification",
              "reinforcement-learning",
              "depth-estimation",
              "image-to-image",
              "video-classification"
            ],
            "type": "string",
            "description": "Filter models by pipeline task. Leave empty for all tasks.",
            "default": ""
          },
          "libraryFilter": {
            "title": "Framework / Library (Models only)",
            "enum": [
              "",
              "transformers",
              "diffusers",
              "pytorch",
              "tensorflow",
              "jax",
              "onnx",
              "safetensors",
              "gguf",
              "spacy",
              "keras",
              "sklearn",
              "sentence-transformers",
              "peft",
              "adapter-transformers"
            ],
            "type": "string",
            "description": "Filter models by ML framework. Leave empty for all frameworks.",
            "default": ""
          },
          "arxivCategory": {
            "title": "arXiv category",
            "enum": [
              "",
              "cs.AI",
              "cs.LG",
              "cs.CL",
              "cs.CV",
              "cs.NE",
              "cs.RO",
              "cs.IR",
              "cs.MA",
              "stat.ML",
              "cs.SD",
              "eess.AS",
              "cs.HC",
              "cs.CR"
            ],
            "type": "string",
            "description": "Filter papers by arXiv category. Leave empty to search all AI/ML categories.",
            "default": ""
          },
          "author": {
            "title": "Author name (Papers only)",
            "type": "string",
            "description": "Filter arXiv papers by author name."
          },
          "dateFrom": {
            "title": "Date from (Papers only)",
            "type": "string",
            "description": "Filter papers published from this date (YYYY-MM-DD)."
          },
          "dateTo": {
            "title": "Date to (Papers only)",
            "type": "string",
            "description": "Filter papers published up to this date (YYYY-MM-DD)."
          },
          "maxResults": {
            "title": "Max results",
            "minimum": 1,
            "maximum": 10000,
            "type": "integer",
            "description": "Maximum number of results to return. Free users are limited to 25 results per run.",
            "default": 100
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

Ai-ML-scraper OpenAPI definition

OpenAPI is a standard for designing and describing RESTful APIs, allowing developers to define API structure, endpoints, and data formats in a machine-readable way. It simplifies API development, integration, and documentation.

OpenAPI is effective when used with AI agents and GPTs by standardizing how these systems interact with various APIs, for reliable integrations and efficient communication.

By defining machine-readable API specifications, OpenAPI allows AI models like GPTs to understand and use varied data sources, improving accuracy. This accelerates development, reduces errors, and provides context-aware responses, making OpenAPI a core component for AI applications.

You can download the OpenAPI definitions for Ai-ML-scraper from the options below:

OpenAPI.json

If you’d like to learn more about how OpenAPI powers GPTs, read our blog post.

You can also check out our other API clients:

Ai-ML-scraper API in Python

Ai-ML-scraper API in JavaScript

Ai-ML-scraper API through CLI

Ai-ML-scraper API

arXiv Papers Scraper - AI/ML Research at Scale

wetyr_corporation/arxiv-papers-scraper

Search and bulk extract arXiv research papers with abstracts, authors, categories, and PDF links. Built for AI/ML researchers, RAG knowledge bases, and citation tracking.

WETYR

arXiv Paper Scraper - AI ML Research Papers

openclawmara/arxiv-paper-scraper

Scrape arXiv research papers by keyword, category, or author. Extracts titles, abstracts, authors, citations, and metadata. Perfect for AI/ML research monitoring, literature reviews, and LLM training data collection.

OpenClaw Mara

HuggingFace Daily Papers Scraper

tzmyk/huggingface-daily-papers-scraper

Scrapes AI/ML research papers from HuggingFace Daily Papers (huggingface.co/papers). Extracts title, authors, abstract, GitHub repo, star count, upvotes, AI summary, and keywords.

tzmyk

Papers with Code Scraper

crawlerbros/papers-with-code-scraper

Scrape Papers with Code like search ML papers, fetch paper details with repos and results, browse ML tasks and leaderboards, search datasets, and find ML methods.

Crawler Bros

arXiv Papers Scraper

resounding_diplomacy/arxiv-papers-scraper

Scrape academic papers from arXiv by category, keyword, or author. Extract titles, authors, abstracts, PDF URLs, DOIs, categories, and more. Perfect for AI/ML research datasets.

alars num

GitHub and HuggingFace AI Research Monitor

ghostgrid/github-huggingface-research-monitor

Track trending AI repositories, models, datasets, and papers from GitHub and HuggingFace.

GhostGrid

ML Scout

nytemairqt/ml-scout

Search ML models and repos across HuggingFace Hub and GitHub. Filter by task, framework, license, language, author, stars, and more.

Cody

arXiv Research Papers Tracker

wsgcjj/arxiv-papers-scraper

Search and extract academic papers from arXiv by category, keyword, date range. Returns paper title, authors, abstract, categories, published date, PDF URL. Ideal for AI/ML research monitoring and training data collection.

陈俊杰

Huggingface Models

david_flagg/huggingface-models

Scrape model metadata from HuggingFace Hub — the largest open-source ML model registry. Get downloads, likes, trending scores, licenses, tags, and architecture info for 1M+ models. Filter by task type, ML library, or author. Uses the official HF API — no auth required.