To run the code examples, you need to have an Apify account. Replace <YOUR_API_TOKEN> in the code with your API token. For a more detailed explanation, please read about running Actors via the API in Apify Docs.
import { ApifyClient } from 'apify-client';
// Initialize the ApifyClient with API token
const client = new ApifyClient({
token: '<YOUR_API_TOKEN>',
});
// Prepare Actor input
const input = {
"runMode": "DEVELOPMENT",
"startUrls": [
{
"url": "https://crawlee.dev"
}
],
"linkSelector": "a[href]",
"globs": [
{
"glob": "https://crawlee.dev/*/*"
}
],
"pseudoUrls": [],
"pageFunction": // The function accepts a single argument: the "context" object.
// For a complete list of its properties and functions,
// see https://apify.com/apify/web-scraper#page-function
async function pageFunction(context) {
// This statement works as a breakpoint when you're trying to debug your code. Works only with Run mode: DEVELOPMENT!
// debugger;
// jQuery is handy for finding DOM elements and extracting data from them.
// To use it, make sure to enable the "Inject jQuery" option.
const $ = context.jQuery;
const pageTitle = $('title').first().text();
const h1 = $('h1').first().text();
const first_h2 = $('h2').first().text();
const random_text_from_the_page = $('p').first().text();
// Print some information to actor log
context.log.info(`URL: ${context.request.url}, TITLE: ${pageTitle}`);
// Manually add a new page to the queue for scraping.
await context.enqueueRequest({ url: 'http://www.example.com' });
// Return an object with the data extracted from the page.
// It will be stored to the resulting dataset.
return {
url: context.request.url,
pageTitle,
h1,
first_h2,
random_text_from_the_page
};
},
"proxyConfiguration": {
"useApifyProxy": true
},
"initialCookies": [],
"waitUntil": [
"networkidle2"
],
"preNavigationHooks": `// We need to return array of (possibly async) functions here.
// The functions accept two arguments: the "crawlingContext" object
// and "gotoOptions".
[
async (crawlingContext, gotoOptions) => {
// ...
},
]`,
"postNavigationHooks": `// We need to return array of (possibly async) functions here.
// The functions accept a single argument: the "crawlingContext" object.
[
async (crawlingContext) => {
// ...
},
]`,
"breakpointLocation": "NONE",
"customData": {}
};
(async () => {
// Run the Actor and wait for it to finish
const run = await client.actor("undrtkr984/web-scraper-task").call(input);
// Fetch and print Actor results from the run's dataset (if any)
console.log('Results from dataset');
const { items } = await client.dataset(run.defaultDatasetId).listItems();
items.forEach((item) => {
console.dir(item);
});
})();