1import { ApifyClient } from 'apify-client';
2
3
4
5const client = new ApifyClient({
6 token: '<YOUR_API_TOKEN>',
7});
8
9
10const input = {
11 "startUrls": [
12 {
13 "url": "https://crawlee.dev/js"
14 }
15 ],
16 "globs": [
17 {
18 "glob": "https://crawlee.dev/js/*/*"
19 }
20 ],
21 "pseudoUrls": [],
22 "excludes": [
23 {
24 "glob": "/**/*.{png,jpg,jpeg,pdf}"
25 }
26 ],
27 "linkSelector": "a",
28 "respectRobotsTxtFile": true,
29 "pageFunction": async function pageFunction(context) {
30 const { page, request, log } = context;
31 const title = await page.title();
32 log.info(`URL: ${request.url} TITLE: ${title}`);
33 return {
34 url: request.url,
35 title
36 };
37 },
38 "proxyConfiguration": {
39 "useApifyProxy": true
40 },
41 "initialCookies": [],
42 "launcher": "chromium",
43 "waitUntil": "networkidle",
44 "preNavigationHooks": `// We need to return array of (possibly async) functions here.
45 // The functions accept two arguments: the "crawlingContext" object
46 // and "gotoOptions".
47 [
48 async (crawlingContext, gotoOptions) => {
49 const { page } = crawlingContext;
50 // ...
51 },
52 ]`,
53 "postNavigationHooks": `// We need to return array of (possibly async) functions here.
54 // The functions accept a single argument: the "crawlingContext" object.
55 [
56 async (crawlingContext) => {
57 const { page } = crawlingContext;
58 // ...
59 },
60 ]`,
61 "customData": {}
62};
63
64
65const run = await client.actor("apify/playwright-scraper").call(input);
66
67
68console.log('Results from dataset');
69console.log(`💾 Check your data here: https://console.apify.com/storage/datasets/${run.defaultDatasetId}`);
70const { items } = await client.dataset(run.defaultDatasetId).listItems();
71items.forEach((item) => {
72 console.dir(item);
73});
74
75