1from apify_client import ApifyClient
2
3
4
5client = ApifyClient("<YOUR_API_TOKEN>")
6
7
8run_input = {
9 "startUrls": [{ "url": "https://news.ycombinator.com/" }],
10 "instructions": """Gets the post with the most points from the page and returns it as JSON in this format:
11postTitle
12postUrl
13pointsCount""",
14 "model": "gpt-3.5-turbo",
15 "includeUrlGlobs": [],
16 "excludeUrlGlobs": [],
17 "linkSelector": "a[href]",
18 "initialCookies": [],
19 "proxyConfiguration": { "useApifyProxy": True },
20 "targetSelector": "",
21 "removeElementsCssSelector": "script, style, noscript, path, svg, xlink",
22 "skipGptGlobs": [],
23 "schema": {
24 "type": "object",
25 "properties": {
26 "title": {
27 "type": "string",
28 "description": "Page title",
29 },
30 "description": {
31 "type": "string",
32 "description": "Page description",
33 },
34 },
35 "required": [
36 "title",
37 "description",
38 ],
39 },
40 "schemaDescription": "",
41}
42
43
44run = client.actor("drobnikj/extended-gpt-scraper").call(run_input=run_input)
45
46
47print("💾 Check your data here: https://console.apify.com/storage/datasets/" + run["defaultDatasetId"])
48for item in client.dataset(run["defaultDatasetId"]).iterate_items():
49 print(item)
50
51