1from apify_client import ApifyClient
2
3
4
5client = ApifyClient("<YOUR_API_TOKEN>")
6
7
8run_input = {
9 "requests": [{ "url": "https://apify.com" }],
10 "pseudoUrls": [{ "purl": "https://apify.com[(/[\\w-]+)?]" }],
11 "linkSelector": "a[href]",
12 "pageFunction": """async function pageFunction(context) {
13 const { window, document, crawler, enqueueRequest, request, response, userData, json, body, kvStore, customData } = context;
14
15 const title = document.querySelector('title').textContent
16
17 const responseHeaders = response.headers
18
19 return {
20 title,
21 responseHeaders
22 };
23}""",
24 "preNavigationHooks": """// We need to return array of (possibly async) functions here.
25// The functions accept two arguments: the \"crawlingContext\" object
26// and \"requestAsBrowserOptions\" which are passed to the `requestAsBrowser()`
27// function the crawler calls to navigate..
28[
29 async (crawlingContext, requestAsBrowserOptions) => {
30 // ...
31 }
32]""",
33 "postNavigationHooks": """// We need to return array of (possibly async) functions here.
34// The functions accept a single argument: the \"crawlingContext\" object.
35[
36 async (crawlingContext) => {
37 // ...
38 },
39]""",
40 "proxy": { "useApifyProxy": True },
41 "additionalMimeTypes": [],
42 "customData": {},
43}
44
45
46run = client.actor("mstephen190/vanilla-js-scraper").call(run_input=run_input)
47
48
49print("💾 Check your data here: https://console.apify.com/storage/datasets/" + run["defaultDatasetId"])
50for item in client.dataset(run["defaultDatasetId"]).iterate_items():
51 print(item)
52
53