1import { readFile } from 'node:fs/promises';
2import { dirname } from 'node:path';
3import { setTimeout } from 'node:timers/promises';
4import { fileURLToPath, URL } from 'node:url';
5
6import type {
7 AutoscaledPool,
8 Awaitable,
9 Dictionary,
10 ProxyConfiguration,
11 PuppeteerCrawlerOptions,
12 PuppeteerCrawlingContext,
13 Request,
14} from '@crawlee/puppeteer';
15import {
16 Dataset,
17 KeyValueStore,
18 log,
19 PuppeteerCrawler,
20 puppeteerUtils,
21 RequestList,
22 RequestQueueV2,
23} from '@crawlee/puppeteer';
24import type { ApifyEnv } from 'apify';
25import { Actor } from 'apify';
26import contentType from 'content-type';
27
28import DevToolsServer from 'devtools-server';
29import { getInjectableScript } from 'idcac-playwright';
30import type { HTTPResponse, Page } from 'puppeteer';
31
32import type { CrawlerSetupOptions, createContext } from '@apify/scraper-tools';
33import {
34 browserTools,
35 constants as scraperToolsConstants,
36 tools,
37} from '@apify/scraper-tools';
38
39import { createBundle } from './bundle.browser.js';
40import type { Input } from './consts.js';
41import {
42 BreakpointLocation,
43 CHROME_DEBUGGER_PORT,
44 ProxyRotation,
45 RunMode,
46} from './consts.js';
47import { GlobalStore } from './global_store.js';
48
49const SESSION_STORE_NAME = 'APIFY-WEB-SCRAPER-SESSION-STORE';
50const REQUEST_QUEUE_INIT_FLAG_KEY = 'REQUEST_QUEUE_INITIALIZED';
51
52const MAX_CONCURRENCY_IN_DEVELOPMENT = 1;
53const {
54 SESSION_MAX_USAGE_COUNTS,
55 DEFAULT_VIEWPORT,
56 DEVTOOLS_TIMEOUT_SECS,
57 META_KEY,
58} = scraperToolsConstants;
59const SCHEMA = JSON.parse(
60 await readFile(new URL('../../INPUT_SCHEMA.json', import.meta.url), 'utf8'),
61);
62
63interface PageContext {
64 apifyNamespace: string;
65 skipLinks: boolean;
66 timers: {
67 start: [number, number];
68 navStart?: [number, number];
69 };
70
71 browserHandles?: Awaited<ReturnType<CrawlerSetup['_injectBrowserHandles']>>;
72}
73
74interface Output {
75 pageFunctionResult?: Dictionary;
76 pageFunctionError?: tools.ErrorLike;
77 requestFromBrowser: Request;
78}
79
80
81
82
83
84export class CrawlerSetup implements CrawlerSetupOptions {
85 name = 'Web Scraper';
86 rawInput: string;
87 env: ApifyEnv;
88
89
90
91 globalStore = new GlobalStore<unknown>();
92 requestQueue: RequestQueueV2;
93 keyValueStore: KeyValueStore;
94 customData: unknown;
95 input: Input;
96 maxSessionUsageCount: number;
97 evaledPreNavigationHooks: ((...args: unknown[]) => Awaitable<void>)[];
98 evaledPostNavigationHooks: ((...args: unknown[]) => Awaitable<void>)[];
99
100
101
102
103 pageContexts = new WeakMap<Page, PageContext>();
104
105 blockedUrlPatterns: string[] = [];
106 isDevRun: boolean;
107 datasetName?: string;
108 keyValueStoreName?: string;
109 requestQueueName?: string;
110
111 crawler!: PuppeteerCrawler;
112 dataset!: Dataset;
113 pagesOutputted!: number;
114 private initPromise: Promise<void>;
115
116 constructor(input: Input) {
117
118 if (input.debugLog) log.setLevel(log.LEVELS.DEBUG);
119
120
121 this.rawInput = JSON.stringify(input);
122
123
124 tools.maybeLoadPageFunctionFromDisk(
125 input,
126 dirname(fileURLToPath(import.meta.url)),
127 );
128
129
130 if (!Actor.isAtHome()) tools.checkInputOrThrow(input, SCHEMA);
131
132 this.input = input;
133 this.env = Actor.getEnv();
134
135
136 this.input.pseudoUrls.forEach((purl) => {
137 if (!tools.isPlainObject(purl)) {
138 throw new Error(
139 'The pseudoUrls Array must only contain Objects.',
140 );
141 }
142 if (purl.userData && !tools.isPlainObject(purl.userData)) {
143 throw new Error(
144 'The userData property of a pseudoUrl must be an Object.',
145 );
146 }
147 });
148
149 this.input.initialCookies.forEach((cookie) => {
150 if (!tools.isPlainObject(cookie)) {
151 throw new Error(
152 'The initialCookies Array must only contain Objects.',
153 );
154 }
155 });
156
157 this.input.waitUntil.forEach((event) => {
158 if (
159 !/^(domcontentloaded|load|networkidle2|networkidle0)$/.test(
160 event,
161 )
162 ) {
163 throw new Error(
164 'Navigation wait until events must be valid. See tooltip.',
165 );
166 }
167 });
168
169
170 this.maxSessionUsageCount =
171 SESSION_MAX_USAGE_COUNTS[this.input.proxyRotation];
172
173 tools.evalFunctionOrThrow(this.input.pageFunction);
174
175 if (this.input.preNavigationHooks) {
176 this.evaledPreNavigationHooks = tools.evalFunctionArrayOrThrow(
177 this.input.preNavigationHooks,
178 'preNavigationHooks',
179 );
180 } else {
181 this.evaledPreNavigationHooks = [];
182 }
183
184 if (this.input.postNavigationHooks) {
185 this.evaledPostNavigationHooks = tools.evalFunctionArrayOrThrow(
186 this.input.postNavigationHooks,
187 'postNavigationHooks',
188 );
189 } else {
190 this.evaledPostNavigationHooks = [];
191 }
192
193
194 if (!this.input.downloadMedia) {
195 this.blockedUrlPatterns = [
196 '.jpg',
197 '.jpeg',
198 '.png',
199 '.svg',
200 '.gif',
201 '.webp',
202 '.webm',
203 '.ico',
204 '.woff',
205 '.eot',
206 ];
207 }
208
209 if (!this.input.downloadCss) {
210 this.blockedUrlPatterns.push('.css');
211 }
212
213 this.isDevRun = this.input.runMode === RunMode.Development;
214
215
216 this.datasetName = this.input.datasetName;
217 this.keyValueStoreName = this.input.keyValueStoreName;
218 this.requestQueueName = this.input.requestQueueName;
219
220
221 this.crawler = null!;
222 this.requestQueue = null!;
223 this.dataset = null!;
224 this.keyValueStore = null!;
225 this.initPromise = this._initializeAsync();
226 }
227
228 private async _initializeAsync() {
229
230 const startUrls = this.input.startUrls.map((req) => {
231 req.useExtendedUniqueKey = true;
232 req.keepUrlFragment = this.input.keepUrlFragments;
233 return req;
234 });
235
236
237 this.keyValueStore = await KeyValueStore.open(this.keyValueStoreName);
238
239
240 this.requestQueue = await RequestQueueV2.open(this.requestQueueName);
241
242 if (
243 !(await this.keyValueStore.recordExists(
244 REQUEST_QUEUE_INIT_FLAG_KEY,
245 ))
246 ) {
247 const requests: Request[] = [];
248 for await (const request of await RequestList.open(
249 null,
250 startUrls,
251 )) {
252 if (
253 this.input.maxResultsPerCrawl > 0 &&
254 requests.length >= 1.5 * this.input.maxResultsPerCrawl
255 ) {
256 break;
257 }
258 requests.push(request);
259 }
260
261 const { waitForAllRequestsToBeAdded } =
262 await this.requestQueue.addRequestsBatched(requests);
263
264 void waitForAllRequestsToBeAdded.then(async () => {
265 await this.keyValueStore.setValue(
266 REQUEST_QUEUE_INIT_FLAG_KEY,
267 '1',
268 );
269 });
270 }
271
272
273 this.dataset = await Dataset.open(this.datasetName);
274 const info = await this.dataset.getInfo();
275 this.pagesOutputted = info?.itemCount ?? 0;
276 }
277
278
279
280
281
282 async createCrawler() {
283 await this.initPromise;
284
285 const args = ['--disable-dev-shm-usage'];
286 if (this.input.ignoreCorsAndCsp) args.push('--disable-web-security');
287 if (this.isDevRun)
288 args.push(`--remote-debugging-port=${CHROME_DEBUGGER_PORT}`);
289
290 const options: PuppeteerCrawlerOptions = {
291 requestHandler: this._requestHandler.bind(this),
292 requestQueue: this.requestQueue,
293 requestHandlerTimeoutSecs: this.isDevRun
294 ? DEVTOOLS_TIMEOUT_SECS
295 : this.input.pageFunctionTimeoutSecs,
296 preNavigationHooks: [],
297 postNavigationHooks: [],
298 failedRequestHandler: this._failedRequestHandler.bind(this),
299 respectRobotsTxtFile: this.input.respectRobotsTxtFile,
300 maxConcurrency: this.isDevRun
301 ? MAX_CONCURRENCY_IN_DEVELOPMENT
302 : this.input.maxConcurrency,
303 maxRequestRetries: this.input.maxRequestRetries,
304 maxRequestsPerCrawl:
305 this.input.maxPagesPerCrawl === 0
306 ? undefined
307 : this.input.maxPagesPerCrawl,
308 proxyConfiguration: (await Actor.createProxyConfiguration(
309 this.input.proxyConfiguration,
310 )) as any as ProxyConfiguration,
311 browserPoolOptions: {
312 preLaunchHooks: [
313 async () => {
314 if (!this.isDevRun) {
315 return;
316 }
317
318 const devToolsServer = new DevToolsServer({
319 containerHost: new URL(
320 process.env.ACTOR_WEB_SERVER_URL!,
321 ).host,
322 devToolsServerPort:
323 process.env.ACTOR_WEB_SERVER_PORT,
324 chromeRemoteDebuggingPort: CHROME_DEBUGGER_PORT,
325 });
326 await devToolsServer.start();
327 },
328 ],
329 },
330 launchContext: {
331 useChrome: this.input.useChrome,
332 launchOptions: {
333 acceptInsecureCerts: this.input.ignoreSslErrors,
334 defaultViewport: DEFAULT_VIEWPORT,
335 args,
336 headless: this.input.headless,
337 },
338 },
339 useSessionPool: !this.isDevRun,
340 persistCookiesPerSession: !this.isDevRun,
341 sessionPoolOptions: {
342 persistStateKeyValueStoreId: this.input.sessionPoolName
343 ? SESSION_STORE_NAME
344 : undefined,
345 persistStateKey: this.input.sessionPoolName,
346 sessionOptions: {
347 maxUsageCount: this.maxSessionUsageCount,
348 },
349 },
350 experiments: {
351 requestLocking: true,
352 },
353 };
354
355 this._createNavigationHooks(options);
356
357 if (this.input.proxyRotation === ProxyRotation.UntilFailure) {
358 options.sessionPoolOptions!.maxPoolSize = 1;
359 }
360 if (this.isDevRun) {
361 options.browserPoolOptions!.retireBrowserAfterPageCount = Infinity;
362 }
363
364 this.crawler = new PuppeteerCrawler(options);
365
366 if (this.isDevRun) logDevRunWarning();
367 return this.crawler;
368 }
369
370 private _createNavigationHooks(options: PuppeteerCrawlerOptions) {
371 options.preNavigationHooks!.push(
372 async ({ request, page, session }, gotoOptions) => {
373 const start = process.hrtime();
374
375
376 const pageContext: PageContext = {
377 apifyNamespace: await tools.createRandomHash(),
378 skipLinks: false,
379 timers: { start },
380 };
381 this.pageContexts.set(page, pageContext);
382
383
384 if (this.input.browserLog) browserTools.dumpConsole(page);
385
386
387 if (this.blockedUrlPatterns.length) {
388 await puppeteerUtils.blockRequests(page, {
389 urlPatterns: this.blockedUrlPatterns,
390 });
391 }
392
393
394 if (
395 this.input.initialCookies &&
396 this.input.initialCookies.length
397 ) {
398 const cookiesToSet = session
399 ? tools.getMissingCookiesFromSession(
400 session,
401 this.input.initialCookies,
402 request.url,
403 )
404 : this.input.initialCookies;
405 if (cookiesToSet && cookiesToSet.length) {
406
407
408 session?.setCookies(cookiesToSet, request.url);
409 await page.setCookie(...cookiesToSet);
410 }
411 }
412
413
414 if (this.input.ignoreCorsAndCsp) await page.setBypassCSP(true);
415
416 tools.logPerformance(request, 'gotoFunction INIT', start);
417 const handleStart = process.hrtime();
418 pageContext.browserHandles = await this._injectBrowserHandles(
419 page,
420 pageContext,
421 );
422 tools.logPerformance(
423 request,
424 'gotoFunction INJECTION HANDLES',
425 handleStart,
426 );
427
428 const evalStart = process.hrtime();
429 await Promise.all([
430 page.evaluateOnNewDocument(
431 createBundle,
432 pageContext.apifyNamespace,
433 ),
434 page.evaluateOnNewDocument(
435 browserTools.wrapPageFunction(
436 this.input.pageFunction,
437 pageContext.apifyNamespace,
438 ),
439 ),
440 ]);
441 tools.logPerformance(
442 request,
443 'gotoFunction INJECTION EVAL',
444 evalStart,
445 );
446
447 if (this.isDevRun) {
448 const cdpClient = await page.target().createCDPSession();
449 await cdpClient.send('Debugger.enable');
450 if (
451 this.input.breakpointLocation ===
452 BreakpointLocation.BeforeGoto
453 ) {
454 await cdpClient.send('Debugger.pause');
455 }
456 }
457
458 pageContext.timers.navStart = process.hrtime();
459 if (gotoOptions) {
460 gotoOptions.timeout = this.input.pageLoadTimeoutSecs * 1000;
461 gotoOptions.waitUntil = this.input.waitUntil;
462 }
463 },
464 );
465
466 options.preNavigationHooks!.push(
467 ...this._runHookWithEnhancedContext(this.evaledPreNavigationHooks),
468 );
469 options.postNavigationHooks!.push(
470 ...this._runHookWithEnhancedContext(this.evaledPostNavigationHooks),
471 );
472
473 options.postNavigationHooks!.push(
474 async ({ request, page, response }) => {
475 await this._waitForLoadEventWhenXml(page, response);
476 const pageContext = this.pageContexts.get(page)!;
477 tools.logPerformance(
478 request,
479 'gotoFunction NAVIGATION',
480 pageContext.timers.navStart!,
481 );
482
483 const delayStart = process.hrtime();
484 await this._assertNamespace(page, pageContext.apifyNamespace);
485
486
487 if (this.input.injectJQuery)
488 await puppeteerUtils.injectJQuery(page);
489
490 tools.logPerformance(
491 request,
492 'gotoFunction INJECTION DELAY',
493 delayStart,
494 );
495 tools.logPerformance(
496 request,
497 'gotoFunction EXECUTION',
498 pageContext.timers.start,
499 );
500 },
501 );
502 }
503
504 private _runHookWithEnhancedContext(
505 hooks: ((...args: unknown[]) => Awaitable<void>)[],
506 ) {
507 return hooks.map((hook) => (ctx: Dictionary, ...args: unknown[]) => {
508 const { customData } = this.input;
509 return hook({ ...ctx, customData }, ...args);
510 });
511 }
512
513 private async _failedRequestHandler({ request }: PuppeteerCrawlingContext) {
514 const lastError =
515 request.errorMessages[request.errorMessages.length - 1];
516 const errorMessage = lastError ? lastError.split('\n')[0] : 'no error';
517 log.error(
518 `Request ${request.url} failed and will not be retried anymore. Marking as failed.\nLast Error Message: ${errorMessage}`,
519 );
520 return this._handleResult(request, undefined, undefined, true);
521 }
522
523
524
525
526
527
528
529
530
531
532
533 private async _requestHandler(crawlingContext: PuppeteerCrawlingContext) {
534 const { request, response, page, crawler, proxyInfo } = crawlingContext;
535 const start = process.hrtime();
536 const pageContext = this.pageContexts.get(page)!;
537
538
539
540
541
542
543 tools.ensureMetaData(request);
544
545
546 const aborted = await this._handleMaxResultsPerCrawl(
547 crawler.autoscaledPool,
548 );
549 if (aborted) return;
550
551
552 const contextOptions = {
553 crawlerSetup: {
554 rawInput: this.rawInput,
555 env: this.env,
556 customData: this.input.customData,
557 injectJQuery: this.input.injectJQuery,
558 META_KEY,
559 },
560 browserHandles: pageContext.browserHandles,
561 pageFunctionArguments: {
562 request,
563 proxyInfo,
564 response: {
565 status: response && response.status(),
566 headers: response && response.headers(),
567 },
568 },
569 };
570
571
572
573
574 tools.logPerformance(request, 'requestHandler PREPROCESSING', start);
575
576 if (
577 this.isDevRun &&
578 this.input.breakpointLocation ===
579 BreakpointLocation.BeforePageFunction
580 ) {
581 await page.evaluate(async () => {
582
583 debugger;
584 });
585 }
586
587 if (this.input.closeCookieModals) {
588 await setTimeout(500);
589 await page.evaluate(getInjectableScript());
590 await setTimeout(2000);
591 }
592
593 if (this.input.maxScrollHeightPixels > 0) {
594 await crawlingContext.infiniteScroll({
595 maxScrollHeight: this.input.maxScrollHeightPixels,
596 });
597 }
598
599 const startUserFn = process.hrtime();
600
601 const namespace = pageContext.apifyNamespace;
602 const output = await page.evaluate(
603 async (ctxOpts: typeof contextOptions, namespc: string) => {
604 const context: ReturnType<typeof createContext>['context'] =
605 window[namespc].createContext(ctxOpts);
606
607 const output = {} as Output;
608 try {
609 output.pageFunctionResult =
610 await window[namespc].pageFunction(context);
611 } catch (err) {
612 const casted = err as Error;
613 output.pageFunctionError = Object.getOwnPropertyNames(
614 casted,
615 ).reduce((memo, name) => {
616 memo[name] = casted[name as keyof Error];
617 return memo;
618 }, {} as Dictionary);
619 }
620
621
622 output.requestFromBrowser = context.request as Request;
623
624
625
626
627
628
629
630 function replaceAllDatesInObjectWithISOStrings(obj: Output) {
631 for (const [key, value] of Object.entries(obj)) {
632 if (
633 value instanceof Date &&
634 typeof value.toISOString === 'function'
635 ) {
636 Reflect.set(obj, key, value.toISOString());
637 } else if (value && typeof value === 'object') {
638 replaceAllDatesInObjectWithISOStrings(
639 value as Output,
640 );
641 }
642 }
643 return obj;
644 }
645
646 return replaceAllDatesInObjectWithISOStrings(output);
647 },
648 contextOptions,
649 namespace,
650 );
651
652 tools.logPerformance(
653 request,
654 'requestHandler USER FUNCTION',
655 startUserFn,
656 );
657 const finishUserFn = process.hrtime();
658
659
660
661
662 const { pageFunctionResult, requestFromBrowser, pageFunctionError } =
663 output;
664
665
666 Object.assign(request, requestFromBrowser);
667
668
669 if (pageFunctionError) throw tools.createError(pageFunctionError);
670
671
672
673
674 if (!pageContext.skipLinks) {
675 await this._handleLinks(crawlingContext);
676 }
677
678
679 await this._handleResult(request, response, pageFunctionResult);
680
681 tools.logPerformance(
682 request,
683 'requestHandler POSTPROCESSING',
684 finishUserFn,
685 );
686 tools.logPerformance(request, 'requestHandler EXECUTION', start);
687
688 if (
689 this.isDevRun &&
690 this.input.breakpointLocation ===
691 BreakpointLocation.AfterPageFunction
692 ) {
693 await page.evaluate(async () => {
694
695 debugger;
696 });
697 }
698 }
699
700 private async _handleMaxResultsPerCrawl(autoscaledPool?: AutoscaledPool) {
701 if (
702 !this.input.maxResultsPerCrawl ||
703 this.pagesOutputted < this.input.maxResultsPerCrawl
704 )
705 return false;
706 if (!autoscaledPool) return false;
707 log.info(
708 `User set limit of ${this.input.maxResultsPerCrawl} results was reached. Finishing the crawl.`,
709 );
710 await autoscaledPool.abort();
711 return true;
712 }
713
714 private async _handleLinks({
715 request,
716 enqueueLinks,
717 }: PuppeteerCrawlingContext) {
718 if (!(this.input.linkSelector && this.requestQueue)) return;
719 const start = process.hrtime();
720
721 const currentDepth = (
722 request.userData![META_KEY] as tools.RequestMetadata
723 ).depth;
724 const hasReachedMaxDepth =
725 this.input.maxCrawlingDepth &&
726 currentDepth >= this.input.maxCrawlingDepth;
727 if (hasReachedMaxDepth) {
728 log.debug(
729 `Request ${request.url} reached the maximum crawling depth of ${currentDepth}.`,
730 );
731 return;
732 }
733
734 await enqueueLinks({
735 selector: this.input.linkSelector,
736 globs: this.input.globs,
737 pseudoUrls: this.input.pseudoUrls,
738 exclude: this.input.excludes,
739 transformRequestFunction: (requestOptions) => {
740 requestOptions.userData ??= {};
741 requestOptions.userData[META_KEY] = {
742 parentRequestId: request.id || request.uniqueKey,
743 depth: currentDepth + 1,
744 };
745
746 requestOptions.useExtendedUniqueKey = true;
747 requestOptions.keepUrlFragment = this.input.keepUrlFragments;
748 return requestOptions;
749 },
750 });
751
752 tools.logPerformance(request, 'handleLinks EXECUTION', start);
753 }
754
755 private async _handleResult(
756 request: Request,
757 response?: HTTPResponse,
758 pageFunctionResult?: Dictionary,
759 isError?: boolean,
760 ) {
761 const start = process.hrtime();
762 const payload = tools.createDatasetPayload(
763 request,
764 response,
765 pageFunctionResult,
766 isError,
767 );
768 await this.dataset.pushData(payload);
769 this.pagesOutputted++;
770 tools.logPerformance(request, 'handleResult EXECUTION', start);
771 }
772
773 private async _assertNamespace(page: Page, namespace: string) {
774 try {
775 await page.waitForFunction(
776 (nmspc: string) => !!window[nmspc],
777 { timeout: this.input.pageLoadTimeoutSecs * 1000 },
778 namespace,
779 );
780 } catch (err) {
781 const casted = err as Error;
782 if (casted.stack!.startsWith('TimeoutError')) {
783 throw new Error(
784 'Injection of environment into the browser context timed out. ' +
785 'If this persists even after retries, try increasing the Page load timeout input setting.',
786 );
787 } else {
788 throw err;
789 }
790 }
791 }
792
793 private async _waitForLoadEventWhenXml(
794 page: Page,
795 response?: HTTPResponse,
796 ) {
797
798 if (!response) return;
799
800 const cTypeHeader = response.headers()['content-type'];
801 try {
802 const { type } = contentType.parse(cTypeHeader);
803 if (!/^(text|application)\/xml$|\+xml$/.test(type)) return;
804 } catch (err) {
805
806 return;
807 }
808
809 try {
810 const timeout = this.input.pageLoadTimeoutSecs * 1000;
811 await page.waitForFunction(
812 () => document.readyState === 'complete',
813 { timeout },
814 );
815 } catch (err) {
816 const casted = err as Error;
817 if (casted.stack!.startsWith('TimeoutError')) {
818 throw new Error(
819 "Parsing of XML in the page timed out. If you're expecting a large XML file, " +
820 ' such as a site map, try increasing the Page load timeout input setting.',
821 );
822 } else {
823 throw err;
824 }
825 }
826 }
827
828 private async _injectBrowserHandles(page: Page, pageContext: PageContext) {
829 const saveSnapshotP = browserTools.createBrowserHandle(page, async () =>
830 browserTools.saveSnapshot({ page }),
831 );
832 const skipLinksP = browserTools.createBrowserHandle(page, () => {
833 pageContext.skipLinks = true;
834 });
835 const globalStoreP = browserTools.createBrowserHandlesForObject(
836 page,
837 this.globalStore,
838 [
839 'size',
840 'clear',
841 'delete',
842 'entries',
843 'get',
844 'has',
845 'keys',
846 'set',
847 'values',
848 ],
849 ['size'],
850 );
851 const logP = browserTools.createBrowserHandlesForObject(page, log, [
852 'LEVELS',
853 'setLevel',
854 'getLevel',
855 'debug',
856 'info',
857 'warning',
858 'error',
859 'exception',
860 ]);
861 const requestQueueP = this.requestQueue
862 ? browserTools.createBrowserHandlesForObject(
863 page,
864 this.requestQueue,
865 ['addRequest'],
866 )
867 : null;
868 const keyValueStoreP = this.keyValueStore
869 ? browserTools.createBrowserHandlesForObject(
870 page,
871 this.keyValueStore,
872 ['getValue', 'setValue'],
873 )
874 : null;
875
876 const [
877 saveSnapshot,
878 skipLinks,
879 globalStore,
880 logHandle,
881 requestQueue,
882 keyValueStore,
883 ] = await Promise.all([
884 saveSnapshotP,
885 skipLinksP,
886 globalStoreP,
887 logP,
888 requestQueueP,
889 keyValueStoreP,
890 ]);
891
892 return {
893 saveSnapshot,
894 skipLinks,
895 globalStore,
896 log: logHandle,
897 keyValueStore,
898 requestQueue,
899 };
900 }
901}
902
903function logDevRunWarning() {
904 log.warning(`
905*****************************************************************
906* Web Scraper is running in DEVELOPMENT MODE! *
907* Concurrency is limited, sessionPool is not available, *
908* timeouts are increased and debugger is enabled. *
909* If you want full control and performance switch *
910* Run type to PRODUCTION! *
911*****************************************************************
912`);
913}