1import { ApifyClient } from 'apify-client';
2
3
4
5const client = new ApifyClient({
6 token: '<YOUR_API_TOKEN>',
7});
8
9
10const input = {
11 "startUrls": [
12 {
13 "url": "https://www.cnbc.com/2025/03/12/googles-deepmind-says-it-will-use-ai-models-to-power-physical-robots.html"
14 }
15 ],
16 "browserConfig": {
17 "browser_type": "chromium",
18 "headless": true,
19 "verbose_logging": false,
20 "ignore_https_errors": true,
21 "user_agent": "random",
22 "proxy": "",
23 "viewport_width": 1280,
24 "viewport_height": 720,
25 "accept_downloads": false,
26 "extra_headers": {}
27 },
28 "crawlerConfig": {
29 "cache_mode": "BYPASS",
30 "page_timeout": 20000,
31 "simulate_user": true,
32 "override_navigator": true,
33 "magic": true,
34 "remove_overlay_elements": true,
35 "delay_before_return_html": 0.75,
36 "wait_for": "",
37 "screenshot": false,
38 "pdf": false,
39 "enable_rate_limiting": false,
40 "memory_threshold_percent": 90,
41 "word_count_threshold": 200,
42 "css_selector": "",
43 "excluded_tags": [],
44 "excluded_selector": "",
45 "only_text": false,
46 "prettify": false,
47 "keep_data_attributes": false,
48 "remove_forms": false,
49 "bypass_cache": false,
50 "disable_cache": false,
51 "no_cache_read": false,
52 "no_cache_write": false,
53 "wait_until": "domcontentloaded",
54 "wait_for_images": false,
55 "check_robots_txt": false,
56 "mean_delay": 0.1,
57 "max_range": 0.3,
58 "js_code": "",
59 "js_only": false,
60 "ignore_body_visibility": true,
61 "scan_full_page": false,
62 "scroll_delay": 0.2,
63 "process_iframes": false,
64 "adjust_viewport_to_content": false,
65 "screenshot_wait_for": 0,
66 "screenshot_height_threshold": 20000,
67 "image_description_min_word_threshold": 50,
68 "image_score_threshold": 3,
69 "exclude_external_images": false,
70 "exclude_social_media_domains": [],
71 "exclude_external_links": false,
72 "exclude_social_media_links": false,
73 "exclude_domains": [],
74 "verbose": true,
75 "log_console": false,
76 "stream": false
77 },
78 "deepCrawlConfig": {
79 "max_pages": 100,
80 "max_depth": 3,
81 "include_external": false,
82 "score_threshold": 0.5,
83 "filter_chain": [],
84 "keywords": [
85 "crawl",
86 "example",
87 "async",
88 "configuration"
89 ],
90 "weight": 0.7
91 },
92 "markdownConfig": {
93 "ignore_links": false,
94 "ignore_images": false,
95 "escape_html": true,
96 "skip_internal_links": false,
97 "include_sup_sub": false,
98 "citations": false,
99 "body_width": 80,
100 "fit_markdown": false
101 },
102 "contentFilterConfig": {
103 "type": "pruning",
104 "user_query": "",
105 "threshold": 0.45,
106 "min_word_threshold": 5,
107 "bm25_threshold": 1.2,
108 "apply_llm_filter": false,
109 "semantic_filter": "",
110 "word_count_threshold": 10,
111 "sim_threshold": 0.3,
112 "max_dist": 0.2,
113 "top_k": 3,
114 "linkage_method": "ward"
115 },
116 "userAgentConfig": {
117 "user_agent_mode": "random",
118 "device_type": "desktop",
119 "browser_type": "chrome",
120 "num_browsers": 1
121 },
122 "llmConfig": {
123 "provider": "groq/deepseek-r1-distill-llama-70b",
124 "api_token": "",
125 "instruction": "Summarize content in clean markdown.",
126 "base_url": "",
127 "chunk_token_threshold": 2048,
128 "apply_chunking": true,
129 "input_format": "markdown",
130 "temperature": 0.7,
131 "max_tokens": 4096
132 },
133 "extractionSchema": {
134 "name": "Custom Extraction",
135 "baseSelector": "div.article",
136 "fields": [
137 {
138 "name": "title",
139 "selector": "h1",
140 "type": "text"
141 },
142 {
143 "name": "link",
144 "selector": "a",
145 "type": "attribute",
146 "attribute": "href"
147 }
148 ]
149 }
150};
151
152
153const run = await client.actor("raizen/ai-web-scraper").call(input);
154
155
156console.log('Results from dataset');
157console.log(`💾 Check your data here: https://console.apify.com/storage/datasets/${run.defaultDatasetId}`);
158const { items } = await client.dataset(run.defaultDatasetId).listItems();
159items.forEach((item) => {
160 console.dir(item);
161});
162
163