1import { ApifyClient } from 'apify-client';
2
3
4
5const client = new ApifyClient({
6 token: '<YOUR_API_TOKEN>',
7});
8
9
10const input = {
11 "startUrls": [
12 {
13 "url": "https://crawlee.dev"
14 }
15 ],
16 "maxCrawlingDepth": 1,
17 "requestTimeout": 10,
18 "linkSelector": "a[href]",
19 "linkPatterns": [
20 ".*crawlee\\.dev.*"
21 ],
22 "pageFunction": `from typing import Any
23 from crawlee.crawlers import BeautifulSoupCrawlingContext
24
25 # See the context section in readme to find out what fields you can access
26 # https://apify.com/apify/beautifulsoup-scraper#context
27 def page_function(context: BeautifulSoupCrawlingContext) -> Any:
28 url = context.request.url
29 title = context.soup.title.string if context.soup.title else None
30 return {'url': url, 'title': title}`,
31 "soupFeatures": "html.parser",
32 "proxyConfiguration": {
33 "useApifyProxy": true
34 }
35};
36
37
38const run = await client.actor("apify/beautifulsoup-scraper").call(input);
39
40
41console.log('Results from dataset');
42console.log(`💾 Check your data here: https://console.apify.com/storage/datasets/${run.defaultDatasetId}`);
43const { items } = await client.dataset(run.defaultDatasetId).listItems();
44items.forEach((item) => {
45 console.dir(item);
46});
47
48