FFXIV FC
Go to Store
This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?
See alternative ActorsFFXIV FC
hol/ffxiv-fc
Scrapes the FFXIV lodestone page of a free company for member names, ID's, and avatar.
.actor/Dockerfile
1# First, specify the base Docker image.
2# You can see the Docker images from Apify at https://hub.docker.com/r/apify/.
3# You can also use any other image from Docker Hub.
4FROM apify/actor-python:3.11
5
6# Second, copy just requirements.txt into the Actor image,
7# since it should be the only file that affects the dependency install in the next step,
8# in order to speed up the build
9COPY requirements.txt ./
10
11# Install the packages specified in requirements.txt,
12# Print the installed Python version, pip version
13# and all installed packages with their versions for debugging
14RUN echo "Python version:" \
15 && python --version \
16 && echo "Pip version:" \
17 && pip --version \
18 && echo "Installing dependencies:" \
19 && pip install -r requirements.txt \
20 && echo "All installed Python packages:" \
21 && pip freeze
22
23# Next, copy the remaining files and directories with the source code.
24# Since we do this after installing the dependencies, quick build will be really fast
25# for most source file changes.
26COPY . ./
27
28# Use compileall to ensure the runnability of the Actor Python code.
29RUN python3 -m compileall -q .
30
31# Specify how to launch the source code of your Actor.
32# By default, the "python3 -m src" command is run
33CMD ["python3", "-m", "src"]
.actor/actor.json
1{
2 "actorSpecification": 1,
3 "name": "my-actor",
4 "title": "Getting started with Python and BeautifulSoup",
5 "description": "Scrapes titles of websites using BeautifulSoup.",
6 "version": "0.0",
7 "meta": {
8 "templateId": "python-beautifulsoup"
9 },
10 "input": "./input_schema.json",
11 "dockerfile": "./Dockerfile",
12 "storages": {
13 "dataset": {
14 "actorSpecification": 1,
15 "title": "URLs and their titles",
16 "views": {
17 "titles": {
18 "title": "URLs and their titles",
19 "transformation": {
20 "fields": [
21 "url",
22 "title"
23 ]
24 },
25 "display": {
26 "component": "table",
27 "properties": {
28 "url": {
29 "label": "URL",
30 "format": "text"
31 },
32 "title": {
33 "label": "Title",
34 "format": "text"
35 }
36 }
37 }
38 }
39 }
40 }
41 }
42}
.actor/input_schema.json
1{
2 "title": "Python BeautifulSoup Scraper",
3 "type": "object",
4 "schemaVersion": 1,
5 "properties": {
6 "start_urls": {
7 "title": "Start URLs",
8 "type": "array",
9 "description": "URLs to start with",
10 "prefill": [
11 { "url": "https://apify.com" }
12 ],
13 "editor": "requestListSources"
14 },
15 "max_depth": {
16 "title": "Maximum depth",
17 "type": "integer",
18 "description": "Depth to which to scrape to",
19 "default": 1
20 }
21 },
22 "required": ["start_urls"]
23}
src/__main__.py
1"""
2This module serves as the entry point for executing the Apify Actor. It handles the configuration of logging
3settings. The `main()` coroutine is then executed using `asyncio.run()`.
4
5Feel free to modify this file to suit your specific needs.
6"""
7
8import asyncio
9import logging
10
11from apify.log import ActorLogFormatter
12
13from .main import main
14
15# Configure loggers
16handler = logging.StreamHandler()
17handler.setFormatter(ActorLogFormatter())
18
19apify_client_logger = logging.getLogger('apify_client')
20apify_client_logger.setLevel(logging.INFO)
21apify_client_logger.addHandler(handler)
22
23apify_logger = logging.getLogger('apify')
24apify_logger.setLevel(logging.DEBUG)
25apify_logger.addHandler(handler)
26
27# Execute the Actor main coroutine
28asyncio.run(main())
src/main.py
1from urllib.parse import urljoin
2from bs4 import BeautifulSoup
3from httpx import AsyncClient
4from apify import Actor
5
6async def main() -> None:
7 async with Actor:
8 # Read the Actor input
9 actor_input = await Actor.get_input() or {}
10 fc_id = actor_input.get('fc_id')
11
12 if not fc_id:
13 Actor.log.error('Free Company ID is missing in the actor input.')
14 await Actor.exit()
15
16 start_url = f'https://na.finalfantasyxiv.com/lodestone/freecompany/{fc_id}/member/'
17
18 # Enqueue the starting URL in the default request queue
19 default_queue = await Actor.open_request_queue()
20 await default_queue.add_request({'url': start_url})
21
22 # Process the requests in the queue one by one
23 while request := await default_queue.fetch_next_request():
24 url = request['url']
25 Actor.log.info(f'Scraping {url} ...')
26
27 try:
28 # Fetch the URL using `httpx`
29 async with AsyncClient() as client:
30 response = await client.get(url, follow_redirects=True)
31
32 # Parse the response using `BeautifulSoup`
33 soup = BeautifulSoup(response.content, 'html.parser')
34
35 # Extract member data from the page
36 members = []
37 for member in soup.select('li.entry'):
38 name_element = member.select_one('.entry__name')
39 if name_element:
40 name = name_element.text.strip()
41 id_link = member.select_one('.entry__bg')['href']
42 member_id = id_link.split('/')[-2]
43 avatar_url = member.select_one('.entry__chara__face img')['src']
44
45 members.append({
46 'name': name,
47 'id': member_id,
48 'avatar_url': avatar_url
49 })
50
51 # Push the extracted data into the default dataset
52 await Actor.push_data({'url': url, 'members': members})
53
54 # Check for pagination and enqueue the next page URL
55 next_page = soup.select_one('.btn__pager__next')
56 if next_page and 'btn__pager__no' not in next_page.get('class', []):
57 next_url = urljoin(url, next_page['href'])
58 await default_queue.add_request({'url': next_url})
59
60 except Exception:
61 Actor.log.exception(f'Cannot extract data from {url}.')
62
63 finally:
64 # Mark the request as handled so it's not processed again
65 await default_queue.mark_request_as_handled(request)
.dockerignore
1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10.venv
11
12# git folder
13.git
.editorconfig
1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf
.gitignore
1# This file tells Git which files shouldn't be added to source control
2
3.idea
4.DS_Store
5
6apify_storage
7storage
8
9.venv/
10.env/
11__pypackages__
12dist/
13build/
14*.egg-info/
15*.egg
16
17__pycache__
18
19.mypy_cache
20.dmypy.json
21dmypy.json
22.pytest_cache
23.ruff_cache
24
25.scrapy
26*.log
requirements.txt
1# Feel free to add your Python dependencies below. For formatting guidelines, see:
2# https://pip.pypa.io/en/latest/reference/requirements-file-format/
3
4apify ~= 1.7.0
5beautifulsoup4 ~= 4.12.2
6httpx ~= 0.25.2
7types-beautifulsoup4 ~= 4.12.0.7
Developer
Maintained by Community
Categories