1import asyncio
2import httpx
3
4async def scrape_hospitals(state_filter, max_records):
5 url = "https://data.cms.gov/provider-data/api/1/datastore/query/xubh-q36u/0"
6 params = {"limit": max_records, "offset": 0}
7 if state_filter:
8 params["filters[0][property]"] = "state"
9 params["filters[0][value]"] = state_filter.upper()
10 async with httpx.AsyncClient(timeout=30) as client:
11 r = await client.get(url, params=params)
12 r.raise_for_status()
13 return r.json().get("results", [])
14
15async def scrape_drug_shortages(max_records):
16 url = "https://api.fda.gov/drug/shortages.json"
17 params = {"limit": min(max_records, 100)}
18 async with httpx.AsyncClient(timeout=30) as client:
19 r = await client.get(url, params=params)
20 r.raise_for_status()
21 return r.json().get("results", [])
22
23async def scrape_clinical_trials(condition, max_records):
24 url = "https://clinicaltrials.gov/api/v2/studies"
25 params = {"query.cond": condition, "pageSize": min(max_records, 100), "format": "json"}
26 async with httpx.AsyncClient(timeout=30) as client:
27 r = await client.get(url, params=params)
28 r.raise_for_status()
29 return r.json().get("studies", [])
30
31async def main():
32 from apify import Actor
33 async with Actor:
34 inp = await Actor.get_input() or {}
35 sources = inp.get("dataSources", ["hospitals", "drug_shortages", "clinical_trials"])
36 state = inp.get("stateFilter", "")
37 condition = inp.get("trialCondition", "cancer")
38 max_records = inp.get("maxRecords", 100)
39 results = []
40
41 if "hospitals" in sources:
42 print("Scraping CMS hospital data...")
43 for h in await scrape_hospitals(state, max_records):
44 results.append({"source": "cms_hospitals", "data": h})
45
46 if "drug_shortages" in sources:
47 print("Scraping FDA drug shortages...")
48 for d in await scrape_drug_shortages(max_records):
49 results.append({"source": "fda_drug_shortages", "data": d})
50
51 if "clinical_trials" in sources:
52 print("Scraping ClinicalTrials.gov...")
53 for t in await scrape_clinical_trials(condition, max_records):
54 results.append({"source": "clinical_trials", "data": t})
55
56 await Actor.push_data(results)
57 print(f"Done. Pushed {len(results)} records.")
58
59asyncio.run(main())