
FFXIV FC
Deprecated
Pricing
Pay per usage
Go to Store

FFXIV FC
Deprecated
Scrapes the FFXIV lodestone page of a free company for member names, ID's, and avatar.
0.0 (0)
Pricing
Pay per usage
1
Total users
2
Monthly users
1
Runs succeeded
>99%
Last modified
a year ago
.actor/Dockerfile
# First, specify the base Docker image.# You can see the Docker images from Apify at https://hub.docker.com/r/apify/.# You can also use any other image from Docker Hub.FROM apify/actor-python:3.11
# Second, copy just requirements.txt into the Actor image,# since it should be the only file that affects the dependency install in the next step,# in order to speed up the buildCOPY requirements.txt ./
# Install the packages specified in requirements.txt,# Print the installed Python version, pip version# and all installed packages with their versions for debuggingRUN echo "Python version:" \ && python --version \ && echo "Pip version:" \ && pip --version \ && echo "Installing dependencies:" \ && pip install -r requirements.txt \ && echo "All installed Python packages:" \ && pip freeze
# Next, copy the remaining files and directories with the source code.# Since we do this after installing the dependencies, quick build will be really fast# for most source file changes.COPY . ./
# Use compileall to ensure the runnability of the Actor Python code.RUN python3 -m compileall -q .
# Specify how to launch the source code of your Actor.# By default, the "python3 -m src" command is runCMD ["python3", "-m", "src"]
.actor/actor.json
{ "actorSpecification": 1, "name": "my-actor", "title": "Getting started with Python and BeautifulSoup", "description": "Scrapes titles of websites using BeautifulSoup.", "version": "0.0", "meta": { "templateId": "python-beautifulsoup" }, "input": "./input_schema.json", "dockerfile": "./Dockerfile", "storages": { "dataset": { "actorSpecification": 1, "title": "URLs and their titles", "views": { "titles": { "title": "URLs and their titles", "transformation": { "fields": [ "url", "title" ] }, "display": { "component": "table", "properties": { "url": { "label": "URL", "format": "text" }, "title": { "label": "Title", "format": "text" } } } } } } }}
.actor/input_schema.json
{ "title": "Python BeautifulSoup Scraper", "type": "object", "schemaVersion": 1, "properties": { "start_urls": { "title": "Start URLs", "type": "array", "description": "URLs to start with", "prefill": [ { "url": "https://apify.com" } ], "editor": "requestListSources" }, "max_depth": { "title": "Maximum depth", "type": "integer", "description": "Depth to which to scrape to", "default": 1 } }, "required": ["start_urls"]}
src/__main__.py
1"""2This module serves as the entry point for executing the Apify Actor. It handles the configuration of logging3settings. The `main()` coroutine is then executed using `asyncio.run()`.4
5Feel free to modify this file to suit your specific needs.6"""7
8import asyncio9import logging10
11from apify.log import ActorLogFormatter12
13from .main import main14
15# Configure loggers16handler = logging.StreamHandler()17handler.setFormatter(ActorLogFormatter())18
19apify_client_logger = logging.getLogger('apify_client')20apify_client_logger.setLevel(logging.INFO)21apify_client_logger.addHandler(handler)22
23apify_logger = logging.getLogger('apify')24apify_logger.setLevel(logging.DEBUG)25apify_logger.addHandler(handler)26
27# Execute the Actor main coroutine28asyncio.run(main())
src/main.py
1from urllib.parse import urljoin2from bs4 import BeautifulSoup3from httpx import AsyncClient4from apify import Actor5
6async def main() -> None:7 async with Actor:8 # Read the Actor input9 actor_input = await Actor.get_input() or {}10 fc_id = actor_input.get('fc_id')11
12 if not fc_id:13 Actor.log.error('Free Company ID is missing in the actor input.')14 await Actor.exit()15
16 start_url = f'https://na.finalfantasyxiv.com/lodestone/freecompany/{fc_id}/member/'17
18 # Enqueue the starting URL in the default request queue19 default_queue = await Actor.open_request_queue()20 await default_queue.add_request({'url': start_url})21
22 # Process the requests in the queue one by one23 while request := await default_queue.fetch_next_request():24 url = request['url']25 Actor.log.info(f'Scraping {url} ...')26
27 try:28 # Fetch the URL using `httpx`29 async with AsyncClient() as client:30 response = await client.get(url, follow_redirects=True)31
32 # Parse the response using `BeautifulSoup`33 soup = BeautifulSoup(response.content, 'html.parser')34
35 # Extract member data from the page36 members = []37 for member in soup.select('li.entry'):38 name_element = member.select_one('.entry__name')39 if name_element:40 name = name_element.text.strip()41 id_link = member.select_one('.entry__bg')['href']42 member_id = id_link.split('/')[-2]43 avatar_url = member.select_one('.entry__chara__face img')['src']44
45 members.append({46 'name': name,47 'id': member_id,48 'avatar_url': avatar_url49 })50
51 # Push the extracted data into the default dataset52 await Actor.push_data({'url': url, 'members': members})53
54 # Check for pagination and enqueue the next page URL55 next_page = soup.select_one('.btn__pager__next')56 if next_page and 'btn__pager__no' not in next_page.get('class', []):57 next_url = urljoin(url, next_page['href'])58 await default_queue.add_request({'url': next_url})59
60 except Exception:61 Actor.log.exception(f'Cannot extract data from {url}.')62
63 finally:64 # Mark the request as handled so it's not processed again65 await default_queue.mark_request_as_handled(request)
.dockerignore
# configurations.idea
# crawlee and apify storage foldersapify_storagecrawlee_storagestorage
# installed files.venv
# git folder.git
.editorconfig
root = true
[*]indent_style = spaceindent_size = 4charset = utf-8trim_trailing_whitespace = trueinsert_final_newline = trueend_of_line = lf
.gitignore
# This file tells Git which files shouldn't be added to source control
.idea.DS_Store
apify_storagestorage
.venv/.env/__pypackages__dist/build/*.egg-info/*.egg
__pycache__
.mypy_cache.dmypy.jsondmypy.json.pytest_cache.ruff_cache
.scrapy*.log
requirements.txt
1# Feel free to add your Python dependencies below. For formatting guidelines, see:2# https://pip.pypa.io/en/latest/reference/requirements-file-format/3
4apify ~= 1.7.05beautifulsoup4 ~= 4.12.26httpx ~= 0.25.27types-beautifulsoup4 ~= 4.12.0.7