1import { ApifyClient } from 'apify-client';
2
3
4
5const client = new ApifyClient({
6 token: '<YOUR_API_TOKEN>',
7});
8
9
10const input = {
11 "startUrls": [
12 {
13 "url": "https://crawlee.dev/js"
14 }
15 ],
16 "globs": [
17 {
18 "glob": "https://crawlee.dev/js/*/*"
19 }
20 ],
21 "pseudoUrls": [],
22 "excludes": [
23 {
24 "glob": "/**/*.{png,jpg,jpeg,pdf}"
25 }
26 ],
27 "linkSelector": "a",
28 "respectRobotsTxtFile": true,
29 "pageFunction": async function pageFunction(context) {
30 const { page, request, log } = context;
31 const title = await page.title();
32 log.info(`URL: ${request.url} TITLE: ${title}`);
33 return {
34 url: request.url,
35 title
36 };
37 },
38 "proxyConfiguration": {
39 "useApifyProxy": true
40 },
41 "initialCookies": [],
42 "waitUntil": [
43 "networkidle2"
44 ],
45 "preNavigationHooks": `// We need to return array of (possibly async) functions here.
46 // The functions accept two arguments: the "crawlingContext" object
47 // and "gotoOptions".
48 [
49 async (crawlingContext, gotoOptions) => {
50 const { page } = crawlingContext;
51 // ...
52 },
53 ]`,
54 "postNavigationHooks": `// We need to return array of (possibly async) functions here.
55 // The functions accept a single argument: the "crawlingContext" object.
56 [
57 async (crawlingContext) => {
58 const { page } = crawlingContext;
59 // ...
60 },
61 ]`,
62 "customData": {}
63};
64
65
66const run = await client.actor("apify/puppeteer-scraper").call(input);
67
68
69console.log('Results from dataset');
70console.log(`💾 Check your data here: https://console.apify.com/storage/datasets/${run.defaultDatasetId}`);
71const { items } = await client.dataset(run.defaultDatasetId).listItems();
72items.forEach((item) => {
73 console.dir(item);
74});
75
76