1"""This module defines the main entry point for the Apify Actor.
2
3Feel free to modify this file to suit your specific needs.
4
5To build Apify Actors, utilize the Apify SDK toolkit, read more at the official documentation:
6https://docs.apify.com/sdk/python
7"""
8
9
10
11
12
13from apify import Actor
14from bs4 import BeautifulSoup
15
16
17
18from httpx import AsyncClient
19
20
21async def main() -> None:
22 """Main entry point for the Apify Actor.
23
24 This coroutine is executed using `asyncio.run()`, so it must remain an asynchronous function for proper execution.
25 Asynchronous execution is required for communication with Apify platform, and it also enhances performance in
26 the field of web scraping significantly.
27 """
28 async with Actor:
29
30 actor_input = await Actor.get_input() or {'url': 'https://www.python.org/events/'}
31 url = actor_input.get('url')
32
33
34 async with AsyncClient() as client:
35
36 Actor.log.info(f'Sending a request to {url}')
37 response = await client.get(url, follow_redirects=True)
38
39
40 def extract_event_data(html):
41
42 soup = BeautifulSoup(html, 'html.parser')
43
44 events = []
45 baseUrl = 'https://www.python.org'
46
47
48 for event in soup.select('.list-recent-events.menu li'):
49
50 title_tag = event.select_one('.event-title a')
51
52 date_tag = event.select_one('time')
53
54 location_tag = event.select_one('.event-location')
55
56
57 title = title_tag.get_text(strip=True) if title_tag else 'N/A'
58 url = title_tag['href'] if title_tag and 'href' in title_tag.attrs else 'N/A'
59 date = date_tag.get_text(separator=' ', strip=True) if date_tag else 'N/A'
60 location = location_tag.get_text(strip=True) if location_tag else 'N/A'
61
62 fullUrl = f"{baseUrl}{url}" if url else 'N/A'
63
64
65 events.append({
66 'title': title,
67 'url': fullUrl,
68 'date': date,
69 'location': location
70 })
71
72 return events
73
74
75 events = extract_event_data(response.content)
76
77
78 await Actor.push_data(events)