1from urllib.parse import urljoin
2from bs4 import BeautifulSoup
3from httpx import AsyncClient
4from apify import Actor
5
6async def main() -> None:
7 async with Actor:
8
9 actor_input = await Actor.get_input() or {}
10 fc_id = actor_input.get('fc_id')
11
12 if not fc_id:
13 Actor.log.error('Free Company ID is missing in the actor input.')
14 await Actor.exit()
15
16 start_url = f'https://na.finalfantasyxiv.com/lodestone/freecompany/{fc_id}/member/'
17
18
19 default_queue = await Actor.open_request_queue()
20 await default_queue.add_request({'url': start_url})
21
22
23 while request := await default_queue.fetch_next_request():
24 url = request['url']
25 Actor.log.info(f'Scraping {url} ...')
26
27 try:
28
29 async with AsyncClient() as client:
30 response = await client.get(url, follow_redirects=True)
31
32
33 soup = BeautifulSoup(response.content, 'html.parser')
34
35
36 members = []
37 for member in soup.select('li.entry'):
38 name_element = member.select_one('.entry__name')
39 if name_element:
40 name = name_element.text.strip()
41 id_link = member.select_one('.entry__bg')['href']
42 member_id = id_link.split('/')[-2]
43 avatar_url = member.select_one('.entry__chara__face img')['src']
44
45 members.append({
46 'name': name,
47 'id': member_id,
48 'avatar_url': avatar_url
49 })
50
51
52 await Actor.push_data({'url': url, 'members': members})
53
54
55 next_page = soup.select_one('.btn__pager__next')
56 if next_page and 'btn__pager__no' not in next_page.get('class', []):
57 next_url = urljoin(url, next_page['href'])
58 await default_queue.add_request({'url': next_url})
59
60 except Exception:
61 Actor.log.exception(f'Cannot extract data from {url}.')
62
63 finally:
64
65 await default_queue.mark_request_as_handled(request)