1"""USGS Earthquakes — Apify Actor.
2
3Fetches earthquake data from the official USGS Earthquake Hazards Program API
4and returns clean, structured JSON for each event with:
5 - normalized ISO timestamps (USGS returns epoch milliseconds)
6 - split-out coordinates (USGS bundles lon/lat/depth in a single array)
7 - a human-readable severity classification derived from magnitude
8 - all the rich properties (felt reports, tsunami flag, alert level, etc.)
9
10Supports both the simple summary feeds (significant_week.geojson and friends)
11and the flexible `query` endpoint with magnitude/time/region filters.
12"""
13
14from __future__ import annotations
15
16import asyncio
17from datetime import datetime, timezone
18from urllib.parse import urlencode
19
20import httpx
21from apify import Actor
22
23SUMMARY_BASE = "https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary"
24QUERY_URL = "https://earthquake.usgs.gov/fdsnws/event/1/query"
25
26
27SUMMARY_FEEDS = {
28 "significant_hour": "significant_hour.geojson",
29 "significant_day": "significant_day.geojson",
30 "significant_week": "significant_week.geojson",
31 "significant_month": "significant_month.geojson",
32 "4.5_hour": "4.5_hour.geojson",
33 "4.5_day": "4.5_day.geojson",
34 "4.5_week": "4.5_week.geojson",
35 "4.5_month": "4.5_month.geojson",
36 "2.5_day": "2.5_day.geojson",
37 "2.5_week": "2.5_week.geojson",
38 "2.5_month": "2.5_month.geojson",
39 "1.0_day": "1.0_day.geojson",
40 "1.0_week": "1.0_week.geojson",
41 "all_hour": "all_hour.geojson",
42 "all_day": "all_day.geojson",
43 "all_week": "all_week.geojson",
44}
45
46
47def classify_magnitude(mag: float | None) -> str | None:
48 """Standard seismological magnitude classes (USGS uses these labels)."""
49 if mag is None:
50 return None
51 if mag < 2.5:
52 return "Micro"
53 if mag < 4.0:
54 return "Minor"
55 if mag < 5.0:
56 return "Light"
57 if mag < 6.0:
58 return "Moderate"
59 if mag < 7.0:
60 return "Strong"
61 if mag < 8.0:
62 return "Major"
63 return "Great"
64
65
66def _epoch_ms_to_iso(ms: int | None) -> str | None:
67 if ms is None:
68 return None
69 try:
70 return datetime.fromtimestamp(ms / 1000.0, tz=timezone.utc).isoformat()
71 except (ValueError, TypeError, OSError):
72 return None
73
74
75def transform_feature(f: dict) -> dict:
76 """Convert a USGS GeoJSON feature into a clean record."""
77 p = f.get("properties") or {}
78 g = f.get("geometry") or {}
79 coords = g.get("coordinates") or [None, None, None]
80
81 lon = coords[0] if len(coords) > 0 else None
82 lat = coords[1] if len(coords) > 1 else None
83 depth_km = coords[2] if len(coords) > 2 else None
84
85 mag = p.get("mag")
86 return {
87 "id": f.get("id"),
88 "magnitude": mag,
89 "magnitudeClass": classify_magnitude(mag),
90 "magnitudeType": p.get("magType"),
91 "place": p.get("place"),
92 "time": _epoch_ms_to_iso(p.get("time")),
93 "updated": _epoch_ms_to_iso(p.get("updated")),
94 "latitude": lat,
95 "longitude": lon,
96 "depthKm": depth_km,
97 "tsunamiFlag": bool(p.get("tsunami")),
98 "alertLevel": p.get("alert"),
99 "feltReports": p.get("felt"),
100 "cdi": p.get("cdi"),
101 "mmi": p.get("mmi"),
102 "significance": p.get("sig"),
103 "eventType": p.get("type"),
104 "status": p.get("status"),
105 "url": p.get("url"),
106 "detailUrl": p.get("detail"),
107 }
108
109
110def _numeric_or_none(value) -> float | None:
111 """Parse a (possibly string) numeric input; return None if blank/invalid."""
112 if value is None:
113 return None
114 s = str(value).strip()
115 if not s:
116 return None
117 try:
118 return float(s)
119 except ValueError:
120 return None
121
122
123def build_query_url(actor_input: dict) -> str:
124 """Build a /query URL from filter inputs (more flexible than summary feeds).
125
126 Numeric filters arrive as strings (Apify input schema has no float type), so
127 each is parsed and skipped if blank or non-numeric.
128 """
129 params: dict = {"format": "geojson"}
130 if str(actor_input.get("startTime") or "").strip():
131 params["starttime"] = str(actor_input["startTime"]).strip()
132 if str(actor_input.get("endTime") or "").strip():
133 params["endtime"] = str(actor_input["endTime"]).strip()
134
135 num_map = {
136 "minMagnitude": "minmagnitude",
137 "maxMagnitude": "maxmagnitude",
138 "minLatitude": "minlatitude",
139 "maxLatitude": "maxlatitude",
140 "minLongitude": "minlongitude",
141 "maxLongitude": "maxlongitude",
142 }
143 for in_key, api_key in num_map.items():
144 val = _numeric_or_none(actor_input.get(in_key))
145 if val is not None:
146
147 params[api_key] = int(val) if val == int(val) else val
148
149 try:
150 limit = int(actor_input.get("maxItems", 1000))
151 except (TypeError, ValueError):
152 limit = 1000
153 params["limit"] = min(max(limit, 1), 20000)
154 params["orderby"] = "time"
155 return f"{QUERY_URL}?{urlencode(params)}"
156
157
158async def main() -> None:
159 async with Actor:
160 actor_input = await Actor.get_input() or {}
161
162 mode = actor_input.get("mode", "summary")
163 if mode == "summary":
164 feed = actor_input.get("summaryFeed", "significant_week")
165 filename = SUMMARY_FEEDS.get(feed)
166 if not filename:
167 Actor.log.error(f"Unknown summaryFeed {feed!r}. See input docs for valid values.")
168 await Actor.push_data([])
169 return
170 url = f"{SUMMARY_BASE}/{filename}"
171 else:
172 url = build_query_url(actor_input)
173
174 max_items = int(actor_input.get("maxItems", 1000))
175 Actor.log.info(f"USGS earthquake fetch (mode={mode}): {url}")
176
177 async with httpx.AsyncClient(
178 timeout=40.0,
179 headers={"User-Agent": "scrapeworks-usgs-earthquakes/0.1", "Accept": "application/json"},
180 ) as client:
181 data = None
182 for attempt in range(1, 4):
183 try:
184 resp = await client.get(url)
185 resp.raise_for_status()
186 data = resp.json()
187 break
188 except (httpx.HTTPError, ValueError) as exc:
189 Actor.log.warning(f"Attempt {attempt} failed: {exc}")
190 if attempt < 3:
191 await asyncio.sleep(attempt * 2)
192
193 if data is None:
194 Actor.log.error("Failed to fetch USGS data.")
195 return
196
197 metadata = data.get("metadata") or {}
198 total = metadata.get("count")
199 features = data.get("features") or []
200 Actor.log.info(f"USGS reports {total} events. Fetched {len(features)}, will return up to {max_items}.")
201
202 batch = [transform_feature(f) for f in features[:max_items]]
203 if batch:
204 await Actor.push_data(batch)
205
206 Actor.log.info(f"Done. Returned {min(len(features), max_items)} events.")