
ScrapyFy
Deprecated
Pricing
Pay per usage
Go to Store

ScrapyFy
Deprecated
Scrapy Runner
0.0 (0)
Pricing
Pay per usage
1
Total users
16
Monthly users
2
Last modified
2 years ago
You can access the ScrapyFy programmatically from your own applications by using the Apify API. You can also choose the language preference from below. To use the Apify API, youโll need an Apify account and your API token, found in Integrations settings in Apify Console.
1from apify_client import ApifyClient2
3# Initialize the ApifyClient with your Apify API token4# Replace '<YOUR_API_TOKEN>' with your token.5client = ApifyClient("<YOUR_API_TOKEN>")6
7# Prepare the Actor input8run_input = {9 "spiders_code": """from urllib.parse import urljoin\r10\r11### multiple spiders can be specified\r12\r13class TitleSpider(scrapy.Spider):\r14\r15 name = 'title_spider'\r16 allowed_domains = [\"apify.com\"]\r17 start_urls = [\"https://apify.com\"]\r18\r19 custom_settings = {\r20 'REQUEST_FINGERPRINTER_IMPLEMENTATION' : '2.7',\r21 # Obey robots.txt rules\r22 'ROBOTSTXT_OBEY' : True,\r23 'DEPTH_LIMIT' : 2,\r24 'LOG_ENABLED' : False,\r25 #'CLOSESPIDER_PAGECOUNT' : 5,\r26 'CLOSESPIDER_ITEMCOUNT' : 5,\r27 }\r28\r29 def parse(self, response):\r30 yield {\r31 'url': response.url,\r32 'title': response.css('title::text').extract_first(),\r33 }\r34 for link_href in response.css('a::attr(\"href\")'):\r35 link_url = urljoin(response.url, link_href.get())\r36 if link_url.startswith(('http://', 'https://')):\r37 yield scrapy.Request(link_url)""",38 "DEFAULT_REQUEST_HEADERS": {39 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",40 "Accept-Language": "en",41 },42 "DOWNLOADER_MIDDLEWARES": {},43 "DOWNLOADER_MIDDLEWARES_BASE": {44 "scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware": 100,45 "scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware": 300,46 "scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware": 350,47 "scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware": 400,48 "scrapy.downloadermiddlewares.useragent.UserAgentMiddleware": 500,49 "scrapy.downloadermiddlewares.retry.RetryMiddleware": 550,50 "scrapy.downloadermiddlewares.ajaxcrawl.AjaxCrawlMiddleware": 560,51 "scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware": 580,52 "scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware": 590,53 "scrapy.downloadermiddlewares.redirect.RedirectMiddleware": 600,54 "scrapy.downloadermiddlewares.cookies.CookiesMiddleware": 700,55 "scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware": 750,56 "scrapy.downloadermiddlewares.stats.DownloaderStats": 850,57 "scrapy.downloadermiddlewares.httpcache.HttpCacheMiddleware": 900,58 },59 "DOWNLOAD_HANDLERS": {},60 "DOWNLOAD_HANDLERS_BASE": {61 "data": "scrapy.core.downloader.handlers.datauri.DataURIDownloadHandler",62 "file": "scrapy.core.downloader.handlers.file.FileDownloadHandler",63 "http": "scrapy.core.downloader.handlers.http.HTTPDownloadHandler",64 "https": "scrapy.core.downloader.handlers.http.HTTPDownloadHandler",65 "s3": "scrapy.core.downloader.handlers.s3.S3DownloadHandler",66 "ftp": "scrapy.core.downloader.handlers.ftp.FTPDownloadHandler",67 },68 "EXTENSIONS": {},69 "EXTENSIONS_BASE": {70 "scrapy.extensions.corestats.CoreStats": 0,71 "scrapy.extensions.telnet.TelnetConsole": 0,72 "scrapy.extensions.memusage.MemoryUsage": 0,73 "scrapy.extensions.memdebug.MemoryDebugger": 0,74 "scrapy.extensions.closespider.CloseSpider": 0,75 "scrapy.extensions.feedexport.FeedExporter": 0,76 "scrapy.extensions.logstats.LogStats": 0,77 "scrapy.extensions.spiderstate.SpiderState": 0,78 "scrapy.extensions.throttle.AutoThrottle": 0,79 },80 "FEEDS": {},81 "FEED_EXPORTERS": {},82 "FEED_EXPORTERS_BASE": {83 "json": "scrapy.exporters.JsonItemExporter",84 "jsonlines": "scrapy.exporters.JsonLinesItemExporter",85 "jsonl": "scrapy.exporters.JsonLinesItemExporter",86 "jl": "scrapy.exporters.JsonLinesItemExporter",87 "csv": "scrapy.exporters.CsvItemExporter",88 "xml": "scrapy.exporters.XmlItemExporter",89 "marshal": "scrapy.exporters.MarshalItemExporter",90 "pickle": "scrapy.exporters.PickleItemExporter",91 },92 "FEED_STORAGES": {},93 "FEED_STORAGES_BASE": {94 "": "scrapy.extensions.feedexport.FileFeedStorage",95 "file": "scrapy.extensions.feedexport.FileFeedStorage",96 "ftp": "scrapy.extensions.feedexport.FTPFeedStorage",97 "gs": "scrapy.extensions.feedexport.GCSFeedStorage",98 "s3": "scrapy.extensions.feedexport.S3FeedStorage",99 "stdout": "scrapy.extensions.feedexport.StdoutFeedStorage",100 },101 "HTTPCACHE_IGNORE_HTTP_CODES": [],102 "HTTPCACHE_IGNORE_RESPONSE_CACHE_CONTROLS": [],103 "HTTPCACHE_IGNORE_SCHEMES": ["file"],104 "ITEM_PIPELINES": {},105 "ITEM_PIPELINES_BASE": {},106 "MEMDEBUG_NOTIFY": [],107 "MEMUSAGE_NOTIFY_MAIL": [],108 "METAREFRESH_IGNORE_TAGS": [],109 "RETRY_HTTP_CODES": [110 500,111 502,112 503,113 504,114 522,115 524,116 408,117 429,118 ],119 "SPIDER_CONTRACTS": {},120 "SPIDER_CONTRACTS_BASE": {121 "scrapy.contracts.default.UrlContract": 1,122 "scrapy.contracts.default.CallbackKeywordArgumentsContract": 1,123 "scrapy.contracts.default.ReturnsContract": 2,124 "scrapy.contracts.default.ScrapesContract": 3,125 },126 "SPIDER_MIDDLEWARES": {},127 "SPIDER_MIDDLEWARES_BASE": {128 "scrapy.spidermiddlewares.httperror.HttpErrorMiddleware": 50,129 "scrapy.spidermiddlewares.offsite.OffsiteMiddleware": 500,130 "scrapy.spidermiddlewares.referer.RefererMiddleware": 700,131 "scrapy.spidermiddlewares.urllength.UrlLengthMiddleware": 800,132 "scrapy.spidermiddlewares.depth.DepthMiddleware": 900,133 },134 "SPIDER_MODULES": [],135 "STATSMAILER_RCPTS": [],136 "TELNETCONSOLE_PORT": [137 6023,138 6073,139 ],140}141
142# Run the Actor and wait for it to finish143run = client.actor("jupri/scrapyfy").call(run_input=run_input)144
145# Fetch and print Actor results from the run's dataset (if there are any)146print("๐พ Check your data here: https://console.apify.com/storage/datasets/" + run["defaultDatasetId"])147for item in client.dataset(run["defaultDatasetId"]).iterate_items():148 print(item)149
150# ๐ Want to learn more ๐? Go to โ https://docs.apify.com/api/client/python/docs/quick-start
ScrapyFy API in Python
The Apify API client for Python is the official library that allows you to use ScrapyFy API in Python, providing convenience functions and automatic retries on errors.
Install the apify-client
$pip install apify-client
Other API clients include: