You can access the ScrapyFy programmatically from your own applications by using the Apify API. You can also choose the language preference from below. To use the Apify API, you’ll need an Apify account and your API token, found in Integrations settings in Apify Console.

Python

JavaScript

CLI

OpenAPI

HTTP

MCP

1import { ApifyClient } from 'apify-client';
2
3// Initialize the ApifyClient with your Apify API token
4// Replace the '<YOUR_API_TOKEN>' with your token
5const client = new ApifyClient({
6    token: '<YOUR_API_TOKEN>',
7});
8
9// Prepare Actor input
10const input = {
11    "spiders_code": `from urllib.parse import urljoin
12        
13        ### multiple spiders can be specified
14        
15        class TitleSpider(scrapy.Spider):
16        
17            name = 'title_spider'
18            allowed_domains = ["apify.com"]
19            start_urls = ["https://apify.com"]
20        
21            custom_settings = {
22                'REQUEST_FINGERPRINTER_IMPLEMENTATION'  : '2.7',
23                # Obey robots.txt rules
24                'ROBOTSTXT_OBEY'                        : True,
25                'DEPTH_LIMIT'                           : 2,
26                'LOG_ENABLED'                           : False,
27                #'CLOSESPIDER_PAGECOUNT'                 : 5,
28                'CLOSESPIDER_ITEMCOUNT'                 : 5,
29            }
30        
31            def parse(self, response):
32                yield {
33                    'url': response.url,
34                    'title': response.css('title::text').extract_first(),
35                }
36                for link_href in response.css('a::attr("href")'):
37                    link_url = urljoin(response.url, link_href.get())
38                    if link_url.startswith(('http://', 'https://')):
39                        yield scrapy.Request(link_url)`,
40    "DEFAULT_REQUEST_HEADERS": {
41        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
42        "Accept-Language": "en"
43    },
44    "DOWNLOADER_MIDDLEWARES": {},
45    "DOWNLOADER_MIDDLEWARES_BASE": {
46        "scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware": 100,
47        "scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware": 300,
48        "scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware": 350,
49        "scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware": 400,
50        "scrapy.downloadermiddlewares.useragent.UserAgentMiddleware": 500,
51        "scrapy.downloadermiddlewares.retry.RetryMiddleware": 550,
52        "scrapy.downloadermiddlewares.ajaxcrawl.AjaxCrawlMiddleware": 560,
53        "scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware": 580,
54        "scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware": 590,
55        "scrapy.downloadermiddlewares.redirect.RedirectMiddleware": 600,
56        "scrapy.downloadermiddlewares.cookies.CookiesMiddleware": 700,
57        "scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware": 750,
58        "scrapy.downloadermiddlewares.stats.DownloaderStats": 850,
59        "scrapy.downloadermiddlewares.httpcache.HttpCacheMiddleware": 900
60    },
61    "DOWNLOAD_HANDLERS": {},
62    "DOWNLOAD_HANDLERS_BASE": {
63        "data": "scrapy.core.downloader.handlers.datauri.DataURIDownloadHandler",
64        "file": "scrapy.core.downloader.handlers.file.FileDownloadHandler",
65        "http": "scrapy.core.downloader.handlers.http.HTTPDownloadHandler",
66        "https": "scrapy.core.downloader.handlers.http.HTTPDownloadHandler",
67        "s3": "scrapy.core.downloader.handlers.s3.S3DownloadHandler",
68        "ftp": "scrapy.core.downloader.handlers.ftp.FTPDownloadHandler"
69    },
70    "EXTENSIONS": {},
71    "EXTENSIONS_BASE": {
72        "scrapy.extensions.corestats.CoreStats": 0,
73        "scrapy.extensions.telnet.TelnetConsole": 0,
74        "scrapy.extensions.memusage.MemoryUsage": 0,
75        "scrapy.extensions.memdebug.MemoryDebugger": 0,
76        "scrapy.extensions.closespider.CloseSpider": 0,
77        "scrapy.extensions.feedexport.FeedExporter": 0,
78        "scrapy.extensions.logstats.LogStats": 0,
79        "scrapy.extensions.spiderstate.SpiderState": 0,
80        "scrapy.extensions.throttle.AutoThrottle": 0
81    },
82    "FEEDS": {},
83    "FEED_EXPORTERS": {},
84    "FEED_EXPORTERS_BASE": {
85        "json": "scrapy.exporters.JsonItemExporter",
86        "jsonlines": "scrapy.exporters.JsonLinesItemExporter",
87        "jsonl": "scrapy.exporters.JsonLinesItemExporter",
88        "jl": "scrapy.exporters.JsonLinesItemExporter",
89        "csv": "scrapy.exporters.CsvItemExporter",
90        "xml": "scrapy.exporters.XmlItemExporter",
91        "marshal": "scrapy.exporters.MarshalItemExporter",
92        "pickle": "scrapy.exporters.PickleItemExporter"
93    },
94    "FEED_STORAGES": {},
95    "FEED_STORAGES_BASE": {
96        "": "scrapy.extensions.feedexport.FileFeedStorage",
97        "file": "scrapy.extensions.feedexport.FileFeedStorage",
98        "ftp": "scrapy.extensions.feedexport.FTPFeedStorage",
99        "gs": "scrapy.extensions.feedexport.GCSFeedStorage",
100        "s3": "scrapy.extensions.feedexport.S3FeedStorage",
101        "stdout": "scrapy.extensions.feedexport.StdoutFeedStorage"
102    },
103    "HTTPCACHE_IGNORE_HTTP_CODES": [],
104    "HTTPCACHE_IGNORE_RESPONSE_CACHE_CONTROLS": [],
105    "HTTPCACHE_IGNORE_SCHEMES": [
106        "file"
107    ],
108    "ITEM_PIPELINES": {},
109    "ITEM_PIPELINES_BASE": {},
110    "MEMDEBUG_NOTIFY": [],
111    "MEMUSAGE_NOTIFY_MAIL": [],
112    "METAREFRESH_IGNORE_TAGS": [],
113    "RETRY_HTTP_CODES": [
114        500,
115        502,
116        503,
117        504,
118        522,
119        524,
120        408,
121        429
122    ],
123    "SPIDER_CONTRACTS": {},
124    "SPIDER_CONTRACTS_BASE": {
125        "scrapy.contracts.default.UrlContract": 1,
126        "scrapy.contracts.default.CallbackKeywordArgumentsContract": 1,
127        "scrapy.contracts.default.ReturnsContract": 2,
128        "scrapy.contracts.default.ScrapesContract": 3
129    },
130    "SPIDER_MIDDLEWARES": {},
131    "SPIDER_MIDDLEWARES_BASE": {
132        "scrapy.spidermiddlewares.httperror.HttpErrorMiddleware": 50,
133        "scrapy.spidermiddlewares.offsite.OffsiteMiddleware": 500,
134        "scrapy.spidermiddlewares.referer.RefererMiddleware": 700,
135        "scrapy.spidermiddlewares.urllength.UrlLengthMiddleware": 800,
136        "scrapy.spidermiddlewares.depth.DepthMiddleware": 900
137    },
138    "SPIDER_MODULES": [],
139    "STATSMAILER_RCPTS": [],
140    "TELNETCONSOLE_PORT": [
141        6023,
142        6073
143    ]
144};
145
146// Run the Actor and wait for it to finish
147const run = await client.actor("jupri/scrapyfy").call(input);
148
149// Fetch and print Actor results from the run's dataset (if any)
150console.log('Results from dataset');
151console.log(`💾 Check your data here: https://console.apify.com/storage/datasets/${run.defaultDatasetId}`);
152const { items } = await client.dataset(run.defaultDatasetId).listItems();
153items.forEach((item) => {
154    console.dir(item);
155});
156
157// 📚 Want to learn more 📖? Go to → https://docs.apify.com/api/client/js/docs

ScrapyFy API in JavaScript

The Apify API client for JavaScript is the official library that allows you to use ScrapyFy API in JavaScript or TypeScript, providing convenience functions and automatic retries on errors.

Install the apify-client

$npm install apify-client

Other API clients include:

ScrapyFy API in Python

ScrapyFy API through CLI

ScrapyFy OpenAPI definition

ScrapyFy API

Linkedin Jobs Scraper

crawlerforge/linkedin-jobs-scraper

Powerful LinkedIn job scraper that extracts detailed listings in seconds. Collect titles, companies, descriptions, and dates automatically — ideal for market research, job analytics, or HR insights. Fast, reliable, and easy to use on Apify.

CrawlerForge

5.0

(1)

Website Content Crawler

apify/website-content-crawler

Crawl websites and extract text content to feed AI models, LLM applications, vector databases, or RAG pipelines. The Actor supports rich formatting using Markdown, cleans the HTML, downloads files, and integrates well with 🦜🔗 LangChain, LlamaIndex, and the wider LLM ecosystem.

Apify

89K

4.4

Twitter (X.com) Scraper Unlimited: No Limits

apidojo/twitter-scraper-lite

Introducing Twitter Scraper Unlimited, the most comprehensive Twitter data extraction solution available. Our enterprise-grade scraper offers unmatched capabilities with a transparent event-based pricing model, making it perfect for both small-scale and large-scale data extraction needs.

API Dojo

14K

3.2

Youtube Video Downloader

epctex/youtube-video-downloader

Effortlessly download YouTube videos of your preferred quality with our user-friendly Video Downloader. Try it now!

epctex

2.2K

3.9

🔥 LinkedIn Jobs Scraper

bebity/linkedin-jobs-scraper

ℹ️ Designed for both personal and professional use, simply enter your desired job title and location to receive a tailored list of job opportunities. Try it today!

Bebity

19K

4.1

Linkedin Profile Posts Scraper [NO COOKIES]

apimaestro/linkedin-profile-posts

Scrape LinkedIn posts data for a given LinkedIn profile including post content, reactions, comments count, and media attachments

API Maestro

11K

4.8

Web Scraper

apify/web-scraper

Crawls arbitrary websites using a web browser and extracts structured data from web pages using a provided JavaScript function. The Actor supports both recursive crawling and lists of URLs, and automatically manages concurrency for maximum performance.

Apify

97K

4.8

Linkedin Profile Details Scraper + EMAIL (No Cookies Required)

apimaestro/linkedin-profile-detail

Scrape comprehensive LinkedIn profile data including work experience, education history, certifications, and location details. Get structured information from any public LinkedIn profile using their username.

API Maestro

6.2K

3.9

Linkedin Posts Search Scraper | No Cookies

apimaestro/linkedin-posts-search-scraper-no-cookies

Scrape LinkedIn posts by keyword without login. Get post content, reactions, author details, and media. Sort by relevance or date. Perfect for research, analysis, and monitoring trends.

API Maestro

4.4K

4.6

Cheerio Scraper

apify/cheerio-scraper

Crawls websites using raw HTTP requests, parses the HTML with the Cheerio library, and extracts data from the pages using a Node.js code. Supports both recursive crawling and lists of URLs. This actor is a high-performance alternative to apify/web-scraper for websites that do not require JavaScript.

Apify

11K

4.9