1from apify_client import ApifyClient
2
3
4
5client = ApifyClient("<YOUR_API_TOKEN>")
6
7
8run_input = {
9 "startUrls": [{ "url": "https://news.ycombinator.com/" }],
10 "instructions": """Gets the post with the most points from the page and returns it as JSON in this format:
11postTitle
12postUrl
13pointsCount""",
14 "includeUrlGlobs": [],
15 "excludeUrlGlobs": [],
16 "linkSelector": "a[href]",
17 "initialCookies": [],
18 "proxyConfiguration": { "useApifyProxy": True },
19 "targetSelector": "",
20 "removeElementsCssSelector": "script, style, noscript, path, svg, xlink",
21 "schema": {
22 "type": "object",
23 "properties": {
24 "title": {
25 "type": "string",
26 "description": "Page title",
27 },
28 "description": {
29 "type": "string",
30 "description": "Page description",
31 },
32 },
33 "required": [
34 "title",
35 "description",
36 ],
37 },
38 "schemaDescription": "",
39}
40
41
42run = client.actor("drobnikj/gpt-scraper").call(run_input=run_input)
43
44
45print("💾 Check your data here: https://console.apify.com/storage/datasets/" + run["defaultDatasetId"])
46for item in client.dataset(run["defaultDatasetId"]).iterate_items():
47 print(item)
48
49