1from apify_client import ApifyClient
2
3
4
5client = ApifyClient("<YOUR_API_TOKEN>")
6
7
8run_input = {
9 "startUrls": [{ "url": "https://crawlee.dev/js" }],
10 "globs": [{ "glob": "https://crawlee.dev/js/*/*" }],
11 "pseudoUrls": [],
12 "excludes": [{ "glob": "/**/*.{png,jpg,jpeg,pdf}" }],
13 "linkSelector": "a",
14 "respectRobotsTxtFile": True,
15 "pageFunction": """async function pageFunction(context) {
16 const { page, request, log } = context;
17 const title = await page.title();
18 log.info(`URL: ${request.url} TITLE: ${title}`);
19 return {
20 url: request.url,
21 title
22 };
23}""",
24 "proxyConfiguration": { "useApifyProxy": True },
25 "initialCookies": [],
26 "waitUntil": "networkidle",
27 "preNavigationHooks": """// We need to return array of (possibly async) functions here.
28// The functions accept two arguments: the \"crawlingContext\" object
29// and \"gotoOptions\".
30[
31 async (crawlingContext, gotoOptions) => {
32 const { page } = crawlingContext;
33 // ...
34 },
35]""",
36 "postNavigationHooks": """// We need to return array of (possibly async) functions here.
37// The functions accept a single argument: the \"crawlingContext\" object.
38[
39 async (crawlingContext) => {
40 const { page } = crawlingContext;
41 // ...
42 },
43]""",
44 "customData": {},
45}
46
47
48run = client.actor("apify/camoufox-scraper").call(run_input=run_input)
49
50
51print("💾 Check your data here: https://console.apify.com/storage/datasets/" + run["defaultDatasetId"])
52for item in client.dataset(run["defaultDatasetId"]).iterate_items():
53 print(item)
54
55