1from apify_client import ApifyClient
2
3
4
5client = ApifyClient("<YOUR_API_TOKEN>")
6
7
8run_input = {
9    "startUrls": [{ "url": "https://news.ycombinator.com/" }],
10    "instructions": """Gets the post with the most points from the page and returns it as JSON in this format: 
11postTitle
12postUrl
13pointsCount""",
14    "model": "gpt-3.5-turbo",
15    "includeUrlGlobs": [],
16    "excludeUrlGlobs": [],
17    "linkSelector": "a[href]",
18    "initialCookies": [],
19    "proxyConfiguration": { "useApifyProxy": True },
20    "targetSelector": "",
21    "removeElementsCssSelector": "script, style, noscript, path, svg, xlink",
22    "skipGptGlobs": [],
23    "schema": {
24        "type": "object",
25        "properties": {
26            "title": {
27                "type": "string",
28                "description": "Page title",
29            },
30            "description": {
31                "type": "string",
32                "description": "Page description",
33            },
34        },
35        "required": [
36            "title",
37            "description",
38        ],
39    },
40    "schemaDescription": "",
41}
42
43
44run = client.actor("drobnikj/extended-gpt-scraper").call(run_input=run_input)
45
46
47print("💾 Check your data here: https://console.apify.com/storage/datasets/" + run["defaultDatasetId"])
48for item in client.dataset(run["defaultDatasetId"]).iterate_items():
49    print(item)
50
51