1import { ApifyClient } from 'apify-client';
2
3
4
5const client = new ApifyClient({
6 token: '<YOUR_API_TOKEN>',
7});
8
9
10const input = {
11 "runMode": "DEVELOPMENT",
12 "startUrls": [
13 {
14 "url": "https://crawlee.dev"
15 }
16 ],
17 "linkSelector": "a[href]",
18 "globs": [
19 {
20 "glob": "https://crawlee.dev/*/*"
21 }
22 ],
23 "pseudoUrls": [],
24 "pageFunction": `// The function accepts a single argument: the "context" object.
25 // For a complete list of its properties and functions,
26 // see https://apify.com/apify/web-scraper#page-function
27 async function pageFunction(context) {
28 // This statement works as a breakpoint when you're trying to debug your code. Works only with Run mode: DEVELOPMENT!
29 // debugger;
30
31 // jQuery is handy for finding DOM elements and extracting data from them.
32 // To use it, make sure to enable the "Inject jQuery" option.
33 const $ = context.jQuery;
34 const pageTitle = $('title').first().text();
35 const h1 = $('h1').first().text();
36 const first_h2 = $('h2').first().text();
37 const random_text_from_the_page = $('p').first().text();
38
39
40 // Print some information to actor log
41 context.log.info(`URL: ${context.request.url}, TITLE: ${pageTitle}`);
42
43 // Manually add a new page to the queue for scraping.
44 await context.enqueueRequest({ url: 'http://www.example.com' });
45
46 // Return an object with the data extracted from the page.
47 // It will be stored to the resulting dataset.
48 return {
49 url: context.request.url,
50 pageTitle,
51 h1,
52 first_h2,
53 random_text_from_the_page
54 };
55 }`,
56 "proxyConfiguration": {
57 "useApifyProxy": true
58 },
59 "initialCookies": [],
60 "waitUntil": [
61 "networkidle2"
62 ],
63 "preNavigationHooks": `// We need to return array of (possibly async) functions here.
64 // The functions accept two arguments: the "crawlingContext" object
65 // and "gotoOptions".
66 [
67 async (crawlingContext, gotoOptions) => {
68 // ...
69 },
70 ]`,
71 "postNavigationHooks": `// We need to return array of (possibly async) functions here.
72 // The functions accept a single argument: the "crawlingContext" object.
73 [
74 async (crawlingContext) => {
75 // ...
76 },
77 ]`,
78 "breakpointLocation": "NONE",
79 "customData": {}
80};
81
82
83const run = await client.actor("undrtkr984/web-scraper-task").call(input);
84
85
86console.log('Results from dataset');
87console.log(`💾 Check your data here: https://console.apify.com/storage/datasets/${run.defaultDatasetId}`);
88const { items } = await client.dataset(run.defaultDatasetId).listItems();
89items.forEach((item) => {
90 console.dir(item);
91});
92
93