ScrapyFy avatar
ScrapyFy
Under maintenance
Try for free

No credit card required

View all Actors
This Actor is under maintenance.

This Actor may be unreliable while under maintenance. Would you like to try a similar Actor instead?

See alternative Actors
ScrapyFy

ScrapyFy

jupri/scrapyfy
Try for free

No credit card required

Scrapy Runner

The code examples below show how to run the Actor and get its results. To run the code, you need to have an Apify account. Replace <YOUR_API_TOKEN> in the code with your API token, which you can find under Settings > Integrations in Apify Console. Learn more

Node.js

Python

curl

1import { ApifyClient } from 'apify-client';
2
3// Initialize the ApifyClient with your Apify API token
4const client = new ApifyClient({
5    token: '<YOUR_API_TOKEN>',
6});
7
8// Prepare Actor input
9const input = {
10    "spiders_code": "from urllib.parse import urljoin\r\n\r\n### multiple spiders can be specified\r\n\r\nclass TitleSpider(scrapy.Spider):\r\n\r\n    name = 'title_spider'\r\n    allowed_domains = [\"apify.com\"]\r\n    start_urls = [\"https://apify.com\"]\r\n\r\n    custom_settings = {\r\n        'REQUEST_FINGERPRINTER_IMPLEMENTATION'  : '2.7',\r\n        # Obey robots.txt rules\r\n        'ROBOTSTXT_OBEY'                        : True,\r\n        'DEPTH_LIMIT'                           : 2,\r\n        'LOG_ENABLED'                           : False,\r\n        #'CLOSESPIDER_PAGECOUNT'                 : 5,\r\n        'CLOSESPIDER_ITEMCOUNT'                 : 5,\r\n    }\r\n\r\n    def parse(self, response):\r\n        yield {\r\n            'url': response.url,\r\n            'title': response.css('title::text').extract_first(),\r\n        }\r\n        for link_href in response.css('a::attr(\"href\")'):\r\n            link_url = urljoin(response.url, link_href.get())\r\n            if link_url.startswith(('http://', 'https://')):\r\n                yield scrapy.Request(link_url)",
11    "DEFAULT_REQUEST_HEADERS": {
12        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
13        "Accept-Language": "en"
14    },
15    "DOWNLOADER_MIDDLEWARES": {},
16    "DOWNLOADER_MIDDLEWARES_BASE": {
17        "scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware": 100,
18        "scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware": 300,
19        "scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware": 350,
20        "scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware": 400,
21        "scrapy.downloadermiddlewares.useragent.UserAgentMiddleware": 500,
22        "scrapy.downloadermiddlewares.retry.RetryMiddleware": 550,
23        "scrapy.downloadermiddlewares.ajaxcrawl.AjaxCrawlMiddleware": 560,
24        "scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware": 580,
25        "scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware": 590,
26        "scrapy.downloadermiddlewares.redirect.RedirectMiddleware": 600,
27        "scrapy.downloadermiddlewares.cookies.CookiesMiddleware": 700,
28        "scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware": 750,
29        "scrapy.downloadermiddlewares.stats.DownloaderStats": 850,
30        "scrapy.downloadermiddlewares.httpcache.HttpCacheMiddleware": 900
31    },
32    "DOWNLOAD_HANDLERS": {},
33    "DOWNLOAD_HANDLERS_BASE": {
34        "data": "scrapy.core.downloader.handlers.datauri.DataURIDownloadHandler",
35        "file": "scrapy.core.downloader.handlers.file.FileDownloadHandler",
36        "http": "scrapy.core.downloader.handlers.http.HTTPDownloadHandler",
37        "https": "scrapy.core.downloader.handlers.http.HTTPDownloadHandler",
38        "s3": "scrapy.core.downloader.handlers.s3.S3DownloadHandler",
39        "ftp": "scrapy.core.downloader.handlers.ftp.FTPDownloadHandler"
40    },
41    "EXTENSIONS": {},
42    "EXTENSIONS_BASE": {
43        "scrapy.extensions.corestats.CoreStats": 0,
44        "scrapy.extensions.telnet.TelnetConsole": 0,
45        "scrapy.extensions.memusage.MemoryUsage": 0,
46        "scrapy.extensions.memdebug.MemoryDebugger": 0,
47        "scrapy.extensions.closespider.CloseSpider": 0,
48        "scrapy.extensions.feedexport.FeedExporter": 0,
49        "scrapy.extensions.logstats.LogStats": 0,
50        "scrapy.extensions.spiderstate.SpiderState": 0,
51        "scrapy.extensions.throttle.AutoThrottle": 0
52    },
53    "FEEDS": {},
54    "FEED_EXPORTERS": {},
55    "FEED_EXPORTERS_BASE": {
56        "json": "scrapy.exporters.JsonItemExporter",
57        "jsonlines": "scrapy.exporters.JsonLinesItemExporter",
58        "jsonl": "scrapy.exporters.JsonLinesItemExporter",
59        "jl": "scrapy.exporters.JsonLinesItemExporter",
60        "csv": "scrapy.exporters.CsvItemExporter",
61        "xml": "scrapy.exporters.XmlItemExporter",
62        "marshal": "scrapy.exporters.MarshalItemExporter",
63        "pickle": "scrapy.exporters.PickleItemExporter"
64    },
65    "FEED_STORAGES": {},
66    "FEED_STORAGES_BASE": {
67        "": "scrapy.extensions.feedexport.FileFeedStorage",
68        "file": "scrapy.extensions.feedexport.FileFeedStorage",
69        "ftp": "scrapy.extensions.feedexport.FTPFeedStorage",
70        "gs": "scrapy.extensions.feedexport.GCSFeedStorage",
71        "s3": "scrapy.extensions.feedexport.S3FeedStorage",
72        "stdout": "scrapy.extensions.feedexport.StdoutFeedStorage"
73    },
74    "HTTPCACHE_IGNORE_HTTP_CODES": [],
75    "HTTPCACHE_IGNORE_RESPONSE_CACHE_CONTROLS": [],
76    "HTTPCACHE_IGNORE_SCHEMES": [
77        "file"
78    ],
79    "ITEM_PIPELINES": {},
80    "ITEM_PIPELINES_BASE": {},
81    "MEMDEBUG_NOTIFY": [],
82    "MEMUSAGE_NOTIFY_MAIL": [],
83    "METAREFRESH_IGNORE_TAGS": [],
84    "RETRY_HTTP_CODES": [
85        500,
86        502,
87        503,
88        504,
89        522,
90        524,
91        408,
92        429
93    ],
94    "SPIDER_CONTRACTS": {},
95    "SPIDER_CONTRACTS_BASE": {
96        "scrapy.contracts.default.UrlContract": 1,
97        "scrapy.contracts.default.CallbackKeywordArgumentsContract": 1,
98        "scrapy.contracts.default.ReturnsContract": 2,
99        "scrapy.contracts.default.ScrapesContract": 3
100    },
101    "SPIDER_MIDDLEWARES": {},
102    "SPIDER_MIDDLEWARES_BASE": {
103        "scrapy.spidermiddlewares.httperror.HttpErrorMiddleware": 50,
104        "scrapy.spidermiddlewares.offsite.OffsiteMiddleware": 500,
105        "scrapy.spidermiddlewares.referer.RefererMiddleware": 700,
106        "scrapy.spidermiddlewares.urllength.UrlLengthMiddleware": 800,
107        "scrapy.spidermiddlewares.depth.DepthMiddleware": 900
108    },
109    "SPIDER_MODULES": [],
110    "STATSMAILER_RCPTS": [],
111    "TELNETCONSOLE_PORT": [
112        6023,
113        6073
114    ]
115};
116
117(async () => {
118    // Run the Actor and wait for it to finish
119    const run = await client.actor("jupri/scrapyfy").call(input);
120
121    // Fetch and print Actor results from the run's dataset (if any)
122    console.log('Results from dataset');
123    console.log(`💾 Check your data here: https://console.apify.com/storage/datasets/${run.defaultDatasetId}`);
124    const { items } = await client.dataset(run.defaultDatasetId).listItems();
125    items.forEach((item) => {
126        console.dir(item);
127    });
128})();
129
130// 📚 Want to learn more 📖? Go to → https://docs.apify.com/api/client/js/docs
Developer
Maintained by Community
Actor metrics
  • 2 monthly users
  • 44.4% runs succeeded
  • days response time
  • Created in Jun 2023
  • Modified 11 months ago