1"""This module defines the main entry point for the Apify Actor.
2
3Feel free to modify this file to suit your specific needs.
4
5To build Apify Actors, utilize the Apify SDK toolkit, read more at the official documentation:
6https://docs.apify.com/sdk/python
7"""
8
9
10
11from bs4 import BeautifulSoup
12
13
14
15from httpx import AsyncClient
16
17
18
19from apify import Actor
20
21
22async def main() -> None:
23 """Main entry point for the Apify Actor.
24
25 This coroutine is executed using `asyncio.run()`, so it must remain an asynchronous function for proper execution.
26 Asynchronous execution is required for communication with Apify platform, and it also enhances performance in
27 the field of web scraping significantly.
28 """
29
30 async with Actor:
31
32 actor_input = await Actor.get_input() or {'url': 'https://www.python.org/events/'}
33 url = actor_input.get('url')
34
35
36 async with AsyncClient() as client:
37
38 Actor.log.info(f'Sending a request to {url}')
39 response = await client.get(url, follow_redirects=True)
40
41
42 def extract_event_data(html):
43
44 soup = BeautifulSoup(html, 'html.parser')
45
46 events = []
47 baseUrl = 'https://www.python.org'
48
49
50 for event in soup.select('.list-recent-events.menu li'):
51
52 title_tag = event.select_one('.event-title a')
53
54 date_tag = event.select_one('time')
55
56 location_tag = event.select_one('.event-location')
57
58
59 title = title_tag.get_text(strip=True) if title_tag else 'N/A'
60 url = title_tag['href'] if title_tag and 'href' in title_tag.attrs else 'N/A'
61 date = date_tag.get_text(separator=' ', strip=True) if date_tag else 'N/A'
62 location = location_tag.get_text(strip=True) if location_tag else 'N/A'
63
64 fullUrl = f"{baseUrl}{url}" if url else 'N/A'
65
66
67 events.append({
68 'title': title,
69 'url': fullUrl,
70 'date': date,
71 'location': location
72 })
73
74 return events
75
76
77 events = extract_event_data(response.content)
78
79
80 await Actor.push_data(events)