1from apify_client import ApifyClient
2
3
4
5client = ApifyClient("<YOUR_API_TOKEN>")
6
7
8run_input = {
9    "startUrls": [{ "url": "https://crawlee.dev/js" }],
10    "globs": [{ "glob": "https://crawlee.dev/js/*/*" }],
11    "pseudoUrls": [],
12    "excludes": [{ "glob": "/**/*.{png,jpg,jpeg,pdf}" }],
13    "linkSelector": "a",
14    "respectRobotsTxtFile": True,
15    "pageFunction": """async function pageFunction(context) {
16    const { page, request, log } = context;
17    const title = await page.title();
18    log.info(`URL: ${request.url} TITLE: ${title}`);
19    return {
20        url: request.url,
21        title
22    };
23}""",
24    "proxyConfiguration": { "useApifyProxy": True },
25    "initialCookies": [],
26    "launcher": "chromium",
27    "waitUntil": "networkidle",
28    "preNavigationHooks": """// We need to return array of (possibly async) functions here.
29// The functions accept two arguments: the \"crawlingContext\" object
30// and \"gotoOptions\".
31[
32    async (crawlingContext, gotoOptions) => {
33        const { page } = crawlingContext;
34        // ...
35    },
36]""",
37    "postNavigationHooks": """// We need to return array of (possibly async) functions here.
38// The functions accept a single argument: the \"crawlingContext\" object.
39[
40    async (crawlingContext) => {
41        const { page } = crawlingContext;
42        // ...
43    },
44]""",
45    "customData": {},
46}
47
48
49run = client.actor("apify/playwright-scraper").call(run_input=run_input)
50
51
52print("💾 Check your data here: https://console.apify.com/storage/datasets/" + run["defaultDatasetId"])
53for item in client.dataset(run["defaultDatasetId"]).iterate_items():
54    print(item)
55
56