
Web Scraper
Pricing
$35.00 / 1,000 results

Web Scraper
Simple web scraper. Extract titles, paragraphs, links, images, tables and more from websites. Supports custom CSS selectors and batch collection. For large needs, try Apify's Web Content Crawler.
0.0 (0)
Pricing
$35.00 / 1,000 results
2
3
3
Last modified
2 days ago
You can access the Web Scraper programmatically from your own applications by using the Apify API. You can also choose the language preference from below. To use the Apify API, you’ll need an Apify account and your API token, found in Integrations settings in Apify Console.
{ "openapi": "3.0.1", "info": { "version": "0.1", "x-build-id": "DDTmhMJ0TScRhm3oL" }, "servers": [ { "url": "https://api.apify.com/v2" } ], "paths": { "/acts/futurizerush~web-scraper/run-sync-get-dataset-items": { "post": { "operationId": "run-sync-get-dataset-items-futurizerush-web-scraper", "x-openai-isConsequential": false, "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.", "tags": [ "Run Actor" ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "$ref": "#/components/schemas/inputSchema" } } } }, "parameters": [ { "name": "token", "in": "query", "required": true, "schema": { "type": "string" }, "description": "Enter your Apify token here" } ], "responses": { "200": { "description": "OK" } } } }, "/acts/futurizerush~web-scraper/runs": { "post": { "operationId": "runs-sync-futurizerush-web-scraper", "x-openai-isConsequential": false, "summary": "Executes an Actor and returns information about the initiated run in response.", "tags": [ "Run Actor" ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "$ref": "#/components/schemas/inputSchema" } } } }, "parameters": [ { "name": "token", "in": "query", "required": true, "schema": { "type": "string" }, "description": "Enter your Apify token here" } ], "responses": { "200": { "description": "OK", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/runsResponseSchema" } } } } } } }, "/acts/futurizerush~web-scraper/run-sync": { "post": { "operationId": "run-sync-futurizerush-web-scraper", "x-openai-isConsequential": false, "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.", "tags": [ "Run Actor" ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "$ref": "#/components/schemas/inputSchema" } } } }, "parameters": [ { "name": "token", "in": "query", "required": true, "schema": { "type": "string" }, "description": "Enter your Apify token here" } ], "responses": { "200": { "description": "OK" } } } } }, "components": { "schemas": { "inputSchema": { "type": "object", "required": [ "startUrls" ], "properties": { "startUrls": { "title": "Start URLs", "type": "array", "description": "Enter the URLs you want to scrape (can be multiple)", "items": { "type": "object", "required": [ "url" ], "properties": { "url": { "type": "string", "title": "URL of a web page", "format": "uri" } } } }, "maxRequestsPerCrawl": { "title": "Max Requests", "minimum": 1, "type": "integer", "description": "Maximum number of pages to scrape (to avoid running too long)", "default": 10 }, "maxConcurrency": { "title": "Max Concurrency", "minimum": 1, "maximum": 10, "type": "integer", "description": "Number of pages to scrape simultaneously (1=stable but slow, 5=fast)", "default": 3 }, "smartMode": { "title": "Smart Mode", "type": "boolean", "description": "Automatically optimize speed and save costs when enabled", "default": true }, "cacheEnabled": { "title": "Enable Cache", "type": "boolean", "description": "Remember scraped pages to avoid duplicate scraping", "default": true }, "extractionRules": { "title": "🎨 Custom Extraction Rules", "type": "object", "description": "Use CSS selectors to specify elements to extract. Examples include common selectors that can be directly modified", "default": {} }, "customData": { "title": "📌 Custom Tags", "type": "object", "description": "Add your tags to each data record (e.g., source, category, etc.)", "default": {} }, "waitForSelector": { "title": "⏳ Wait for Element", "type": "string", "description": "Wait for specific element to appear before extracting (for dynamic websites)" }, "scrollToBottom": { "title": "📜 Auto Scroll to Bottom", "type": "boolean", "description": "Automatically scroll the page to load more content (for infinite scroll websites)", "default": false }, "pageLoadTimeoutSecs": { "title": "⏱️ Page Load Timeout", "minimum": 5, "maximum": 120, "type": "integer", "description": "Maximum time to wait for page to load (seconds)", "default": 25 }, "blockResources": { "title": "🚫 Block Resource Types", "type": "array", "description": "Block unnecessary resources to speed up (images, CSS, fonts, etc.). By default, no resources are blocked to ensure complete extraction", "items": { "type": "string", "enum": [ "image", "stylesheet", "font", "media", "script", "other" ] }, "default": [] } } }, "runsResponseSchema": { "type": "object", "properties": { "data": { "type": "object", "properties": { "id": { "type": "string" }, "actId": { "type": "string" }, "userId": { "type": "string" }, "startedAt": { "type": "string", "format": "date-time", "example": "2025-01-08T00:00:00.000Z" }, "finishedAt": { "type": "string", "format": "date-time", "example": "2025-01-08T00:00:00.000Z" }, "status": { "type": "string", "example": "READY" }, "meta": { "type": "object", "properties": { "origin": { "type": "string", "example": "API" }, "userAgent": { "type": "string" } } }, "stats": { "type": "object", "properties": { "inputBodyLen": { "type": "integer", "example": 2000 }, "rebootCount": { "type": "integer", "example": 0 }, "restartCount": { "type": "integer", "example": 0 }, "resurrectCount": { "type": "integer", "example": 0 }, "computeUnits": { "type": "integer", "example": 0 } } }, "options": { "type": "object", "properties": { "build": { "type": "string", "example": "latest" }, "timeoutSecs": { "type": "integer", "example": 300 }, "memoryMbytes": { "type": "integer", "example": 1024 }, "diskMbytes": { "type": "integer", "example": 2048 } } }, "buildId": { "type": "string" }, "defaultKeyValueStoreId": { "type": "string" }, "defaultDatasetId": { "type": "string" }, "defaultRequestQueueId": { "type": "string" }, "buildNumber": { "type": "string", "example": "1.0.0" }, "containerUrl": { "type": "string" }, "usage": { "type": "object", "properties": { "ACTOR_COMPUTE_UNITS": { "type": "integer", "example": 0 }, "DATASET_READS": { "type": "integer", "example": 0 }, "DATASET_WRITES": { "type": "integer", "example": 0 }, "KEY_VALUE_STORE_READS": { "type": "integer", "example": 0 }, "KEY_VALUE_STORE_WRITES": { "type": "integer", "example": 1 }, "KEY_VALUE_STORE_LISTS": { "type": "integer", "example": 0 }, "REQUEST_QUEUE_READS": { "type": "integer", "example": 0 }, "REQUEST_QUEUE_WRITES": { "type": "integer", "example": 0 }, "DATA_TRANSFER_INTERNAL_GBYTES": { "type": "integer", "example": 0 }, "DATA_TRANSFER_EXTERNAL_GBYTES": { "type": "integer", "example": 0 }, "PROXY_RESIDENTIAL_TRANSFER_GBYTES": { "type": "integer", "example": 0 }, "PROXY_SERPS": { "type": "integer", "example": 0 } } }, "usageTotalUsd": { "type": "number", "example": 0.00005 }, "usageUsd": { "type": "object", "properties": { "ACTOR_COMPUTE_UNITS": { "type": "integer", "example": 0 }, "DATASET_READS": { "type": "integer", "example": 0 }, "DATASET_WRITES": { "type": "integer", "example": 0 }, "KEY_VALUE_STORE_READS": { "type": "integer", "example": 0 }, "KEY_VALUE_STORE_WRITES": { "type": "number", "example": 0.00005 }, "KEY_VALUE_STORE_LISTS": { "type": "integer", "example": 0 }, "REQUEST_QUEUE_READS": { "type": "integer", "example": 0 }, "REQUEST_QUEUE_WRITES": { "type": "integer", "example": 0 }, "DATA_TRANSFER_INTERNAL_GBYTES": { "type": "integer", "example": 0 }, "DATA_TRANSFER_EXTERNAL_GBYTES": { "type": "integer", "example": 0 }, "PROXY_RESIDENTIAL_TRANSFER_GBYTES": { "type": "integer", "example": 0 }, "PROXY_SERPS": { "type": "integer", "example": 0 } } } } } } } } }}
Web Scraper OpenAPI definition
OpenAPI is a standard for designing and describing RESTful APIs, allowing developers to define API structure, endpoints, and data formats in a machine-readable way. It simplifies API development, integration, and documentation.
OpenAPI is effective when used with AI agents and GPTs by standardizing how these systems interact with various APIs, for reliable integrations and efficient communication.
By defining machine-readable API specifications, OpenAPI allows AI models like GPTs to understand and use varied data sources, improving accuracy. This accelerates development, reduces errors, and provides context-aware responses, making OpenAPI a core component for AI applications.
You can download the OpenAPI definitions for Web Scraper from the options below:
If you’d like to learn more about how OpenAPI powers GPTs, read our blog post.
You can also check out our other API clients: