1import { ApifyClient } from 'apify-client';
2
3
4
5const client = new ApifyClient({
6 token: '<YOUR_API_TOKEN>',
7});
8
9
10const input = {
11 "startUrls": [
12 {
13 "url": "https://crawlee.dev/js"
14 }
15 ],
16 "respectRobotsTxtFile": true,
17 "globs": [
18 {
19 "glob": "https://crawlee.dev/js/*/*"
20 }
21 ],
22 "pseudoUrls": [],
23 "excludes": [
24 {
25 "glob": "/**/*.{png,jpg,jpeg,pdf}"
26 }
27 ],
28 "linkSelector": "a[href]",
29 "runScripts": false,
30 "pageFunction": async function pageFunction(context) {
31 const { window, request, log } = context;
32
33
34
35 const pageTitle = window.document.title;
36
37
38 const url = request.url;
39
40
41 log.info('Page scraped', { url, pageTitle });
42
43
44
45 return {
46 url,
47 pageTitle
48 };
49 },
50 "proxyConfiguration": {
51 "useApifyProxy": true
52 },
53 "initialCookies": [],
54 "additionalMimeTypes": [],
55 "preNavigationHooks": `// We need to return array of (possibly async) functions here.
56 // The functions accept two arguments: the "crawlingContext" object
57 // and "requestAsBrowserOptions" which are passed to the `requestAsBrowser()`
58 // function the crawler calls to navigate..
59 [
60 async (crawlingContext, requestAsBrowserOptions) => {
61 // ...
62 }
63 ]`,
64 "postNavigationHooks": `// We need to return array of (possibly async) functions here.
65 // The functions accept a single argument: the "crawlingContext" object.
66 [
67 async (crawlingContext) => {
68 // ...
69 },
70 ]`,
71 "customData": {}
72};
73
74
75const run = await client.actor("apify/jsdom-scraper").call(input);
76
77
78console.log('Results from dataset');
79console.log(`💾 Check your data here: https://console.apify.com/storage/datasets/${run.defaultDatasetId}`);
80const { items } = await client.dataset(run.defaultDatasetId).listItems();
81items.forEach((item) => {
82 console.dir(item);
83});
84
85