You can access the ScrapyFy programmatically from your own applications by using the Apify API. You can also choose the language preference from below. To use the Apify API, you’ll need an Apify account and your API token, found in Integrations settings in Apify Console.

Python

JavaScript

CLI

OpenAPI

HTTP

MCP

1from apify_client import ApifyClient
2
3# Initialize the ApifyClient with your Apify API token
4# Replace '<YOUR_API_TOKEN>' with your token.
5client = ApifyClient("<YOUR_API_TOKEN>")
6
7# Prepare the Actor input
8run_input = {
9    "spiders_code": """from urllib.parse import urljoin\r
10\r
11### multiple spiders can be specified\r
12\r
13class TitleSpider(scrapy.Spider):\r
14\r
15    name = 'title_spider'\r
16    allowed_domains = [\"apify.com\"]\r
17    start_urls = [\"https://apify.com\"]\r
18\r
19    custom_settings = {\r
20        'REQUEST_FINGERPRINTER_IMPLEMENTATION'  : '2.7',\r
21        # Obey robots.txt rules\r
22        'ROBOTSTXT_OBEY'                        : True,\r
23        'DEPTH_LIMIT'                           : 2,\r
24        'LOG_ENABLED'                           : False,\r
25        #'CLOSESPIDER_PAGECOUNT'                 : 5,\r
26        'CLOSESPIDER_ITEMCOUNT'                 : 5,\r
27    }\r
28\r
29    def parse(self, response):\r
30        yield {\r
31            'url': response.url,\r
32            'title': response.css('title::text').extract_first(),\r
33        }\r
34        for link_href in response.css('a::attr(\"href\")'):\r
35            link_url = urljoin(response.url, link_href.get())\r
36            if link_url.startswith(('http://', 'https://')):\r
37                yield scrapy.Request(link_url)""",
38    "DEFAULT_REQUEST_HEADERS": {
39        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
40        "Accept-Language": "en",
41    },
42    "DOWNLOADER_MIDDLEWARES": {},
43    "DOWNLOADER_MIDDLEWARES_BASE": {
44        "scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware": 100,
45        "scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware": 300,
46        "scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware": 350,
47        "scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware": 400,
48        "scrapy.downloadermiddlewares.useragent.UserAgentMiddleware": 500,
49        "scrapy.downloadermiddlewares.retry.RetryMiddleware": 550,
50        "scrapy.downloadermiddlewares.ajaxcrawl.AjaxCrawlMiddleware": 560,
51        "scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware": 580,
52        "scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware": 590,
53        "scrapy.downloadermiddlewares.redirect.RedirectMiddleware": 600,
54        "scrapy.downloadermiddlewares.cookies.CookiesMiddleware": 700,
55        "scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware": 750,
56        "scrapy.downloadermiddlewares.stats.DownloaderStats": 850,
57        "scrapy.downloadermiddlewares.httpcache.HttpCacheMiddleware": 900,
58    },
59    "DOWNLOAD_HANDLERS": {},
60    "DOWNLOAD_HANDLERS_BASE": {
61        "data": "scrapy.core.downloader.handlers.datauri.DataURIDownloadHandler",
62        "file": "scrapy.core.downloader.handlers.file.FileDownloadHandler",
63        "http": "scrapy.core.downloader.handlers.http.HTTPDownloadHandler",
64        "https": "scrapy.core.downloader.handlers.http.HTTPDownloadHandler",
65        "s3": "scrapy.core.downloader.handlers.s3.S3DownloadHandler",
66        "ftp": "scrapy.core.downloader.handlers.ftp.FTPDownloadHandler",
67    },
68    "EXTENSIONS": {},
69    "EXTENSIONS_BASE": {
70        "scrapy.extensions.corestats.CoreStats": 0,
71        "scrapy.extensions.telnet.TelnetConsole": 0,
72        "scrapy.extensions.memusage.MemoryUsage": 0,
73        "scrapy.extensions.memdebug.MemoryDebugger": 0,
74        "scrapy.extensions.closespider.CloseSpider": 0,
75        "scrapy.extensions.feedexport.FeedExporter": 0,
76        "scrapy.extensions.logstats.LogStats": 0,
77        "scrapy.extensions.spiderstate.SpiderState": 0,
78        "scrapy.extensions.throttle.AutoThrottle": 0,
79    },
80    "FEEDS": {},
81    "FEED_EXPORTERS": {},
82    "FEED_EXPORTERS_BASE": {
83        "json": "scrapy.exporters.JsonItemExporter",
84        "jsonlines": "scrapy.exporters.JsonLinesItemExporter",
85        "jsonl": "scrapy.exporters.JsonLinesItemExporter",
86        "jl": "scrapy.exporters.JsonLinesItemExporter",
87        "csv": "scrapy.exporters.CsvItemExporter",
88        "xml": "scrapy.exporters.XmlItemExporter",
89        "marshal": "scrapy.exporters.MarshalItemExporter",
90        "pickle": "scrapy.exporters.PickleItemExporter",
91    },
92    "FEED_STORAGES": {},
93    "FEED_STORAGES_BASE": {
94        "": "scrapy.extensions.feedexport.FileFeedStorage",
95        "file": "scrapy.extensions.feedexport.FileFeedStorage",
96        "ftp": "scrapy.extensions.feedexport.FTPFeedStorage",
97        "gs": "scrapy.extensions.feedexport.GCSFeedStorage",
98        "s3": "scrapy.extensions.feedexport.S3FeedStorage",
99        "stdout": "scrapy.extensions.feedexport.StdoutFeedStorage",
100    },
101    "HTTPCACHE_IGNORE_HTTP_CODES": [],
102    "HTTPCACHE_IGNORE_RESPONSE_CACHE_CONTROLS": [],
103    "HTTPCACHE_IGNORE_SCHEMES": ["file"],
104    "ITEM_PIPELINES": {},
105    "ITEM_PIPELINES_BASE": {},
106    "MEMDEBUG_NOTIFY": [],
107    "MEMUSAGE_NOTIFY_MAIL": [],
108    "METAREFRESH_IGNORE_TAGS": [],
109    "RETRY_HTTP_CODES": [
110        500,
111        502,
112        503,
113        504,
114        522,
115        524,
116        408,
117        429,
118    ],
119    "SPIDER_CONTRACTS": {},
120    "SPIDER_CONTRACTS_BASE": {
121        "scrapy.contracts.default.UrlContract": 1,
122        "scrapy.contracts.default.CallbackKeywordArgumentsContract": 1,
123        "scrapy.contracts.default.ReturnsContract": 2,
124        "scrapy.contracts.default.ScrapesContract": 3,
125    },
126    "SPIDER_MIDDLEWARES": {},
127    "SPIDER_MIDDLEWARES_BASE": {
128        "scrapy.spidermiddlewares.httperror.HttpErrorMiddleware": 50,
129        "scrapy.spidermiddlewares.offsite.OffsiteMiddleware": 500,
130        "scrapy.spidermiddlewares.referer.RefererMiddleware": 700,
131        "scrapy.spidermiddlewares.urllength.UrlLengthMiddleware": 800,
132        "scrapy.spidermiddlewares.depth.DepthMiddleware": 900,
133    },
134    "SPIDER_MODULES": [],
135    "STATSMAILER_RCPTS": [],
136    "TELNETCONSOLE_PORT": [
137        6023,
138        6073,
139    ],
140}
141
142# Run the Actor and wait for it to finish
143run = client.actor("jupri/scrapyfy").call(run_input=run_input)
144
145# Fetch and print Actor results from the run's dataset (if there are any)
146print("💾 Check your data here: https://console.apify.com/storage/datasets/" + run["defaultDatasetId"])
147for item in client.dataset(run["defaultDatasetId"]).iterate_items():
148    print(item)
149
150# 📚 Want to learn more 📖? Go to → https://docs.apify.com/api/client/python/docs/quick-start

ScrapyFy API in Python

The Apify API client for Python is the official library that allows you to use ScrapyFy API in Python, providing convenience functions and automatic retries on errors.

Install the apify-client

$pip install apify-client

Other API clients include:

ScrapyFy API in JavaScript

ScrapyFy API through CLI

ScrapyFy OpenAPI definition

ScrapyFy API

Scrape And Bypass Any Url Using Scrappey

dormic/apify-scrappey

A template for scraping data from web pages using the Scrappey.com API service integrated with an Apify Actor. This actor provides a robust solution for handling complex web scraping scenarios, including sites with anti-bot protection such as Cloudflare, Datadome, PerimeterX and all other forms.

Pim

5.0

Best Linkedin Jobs Scrapy

lads.yc/easy-linkedin-jobs-scrapy

Easy way to get jobs and details

YC W

Scrapy Books Example

vdusek/scrapy-books-example

Example of Python Scrapy project. It scrapes book data from https://books.toscrape.com/.

Vlada Dusek

Youtube Video, Channel, Transcript

scrappy-scraper/YoutubeScraper-Apify

The most efficient YouTube parser in terms of cost and performance

Scrappy Scraper

Google News Scraper

lhotanova/google-news-scraper

Gets featured articles from Google News with title, link, source, publication date and image.

Kristýna Lhoťanová

1.9K

3.2

Linkedin Profile Scraper - People & Company

saswave/linkedin-profile

Scrape linkedin People & Company profile urls at scale. Input can be a search url too. Get Information like: connection, follower, location, all experience, education, language, about,last activities, personal contact info (firstname, lastname, email, phone, birthday, creation date, picture url ..)

SASWAVE

972

4.0

Spotify Monthly Listeners

augeas/spotify-monthly-listeners

Get an artist's unique monthly listeners, featured playlists, releases, top cities, and more besides.

Giles Greenway

385

5.0

SEO Checker

louisdeconinck/seo-checker

SEO Checker is an advanced Actor that performs comprehensive on-site SEO analysis for any website. It crawls web pages and extracts crucial SEO elements, providing detailed insights to help improve your website's search engine optimization.

Louis Deconinck

124

5.0

Website Content to Markdown for LLM Training

easyapi/website-content-to-markdown-for-llm-training

🚀 Transform web content into clean, LLM-ready Markdown! 📘 Scrape multiple pages, extract main content, and convert to Markdown format. Perfect for AI researchers, data scientists, and LLM developers. Fast, efficient, and customizable. Supercharge your AI training data today! 🌐📝🧠

EasyApi

5.0

Fast LinkedIn Job Scraper

ivanvs/fast-linkedin-job-scraper

Extract job listings from Linkedin. Scrape details job details like when it is posted, number of applicants, location and salary range. Download listings data in JSON, XML, Excel.