# First, specify the base Docker image.
# You can see the Docker images from Apify at https://hub.docker.com/r/apify/.
# You can also use any other image from Docker Hub.
FROM apify/actor-python:3.11

# Second, copy just requirements.txt into the Actor image,
# since it should be the only file that affects the dependency install in the next step,
# in order to speed up the build
COPY requirements.txt ./

# Install the packages specified in requirements.txt,
# Print the installed Python version, pip version
# and all installed packages with their versions for debugging
RUN echo "Python version:" \
 && python --version \
 && echo "Pip version:" \
 && pip --version \
 && echo "Installing dependencies:" \
 && pip install -r requirements.txt \
 && echo "All installed Python packages:" \
 && pip freeze

# Next, copy the remaining files and directories with the source code.
# Since we do this after installing the dependencies, quick build will be really fast
# for most source file changes.
COPY . ./

# Use compileall to ensure the runnability of the Actor Python code.
RUN python3 -m compileall -q .

# Specify how to launch the source code of your Actor.
# By default, the "python3 -m src" command is run
CMD ["python3", "-m", "src"]

.actor/actor.json

{
    "actorSpecification": 1,
    "name": "my-actor",
    "title": "Getting started with Python and BeautifulSoup",
    "description": "Scrapes titles of websites using BeautifulSoup.",
    "version": "0.0",
    "meta": {
        "templateId": "python-beautifulsoup"
    },
    "input": "./input_schema.json",
    "dockerfile": "./Dockerfile",
    "storages": {
        "dataset": {
            "actorSpecification": 1,
            "title": "URLs and their titles",
            "views": {
                "titles": {
                    "title": "URLs and their titles",
                    "transformation": {
                        "fields": [
                            "url",
                            "title"
                        ]
                    },
                    "display": {
                        "component": "table",
                        "properties": {
                            "url": {
                                "label": "URL",
                                "format": "text"
                            },
                            "title": {
                                "label": "Title",
                                "format": "text"
                            }
                        }
                    }
                }
            }
        }
    }
}

.actor/input_schema.json

{
    "title": "Python BeautifulSoup Scraper",
    "type": "object",
    "schemaVersion": 1,
    "properties": {
        "start_urls": {
            "title": "Start URLs",
            "type": "array",
            "description": "URLs to start with",
            "prefill": [
                { "url": "https://apify.com" }
            ],
            "editor": "requestListSources"
        },
        "max_depth": {
            "title": "Maximum depth",
            "type": "integer",
            "description": "Depth to which to scrape to",
            "default": 1
        }
    },
    "required": ["start_urls"]
}

src/main.py

1"""
2This module serves as the entry point for executing the Apify Actor. It handles the configuration of logging
3settings. The `main()` coroutine is then executed using `asyncio.run()`.
4
5Feel free to modify this file to suit your specific needs.
6"""
7
8import asyncio
9import logging
10
11from apify.log import ActorLogFormatter
12
13from .main import main
14
15# Configure loggers
16handler = logging.StreamHandler()
17handler.setFormatter(ActorLogFormatter())
18
19apify_client_logger = logging.getLogger('apify_client')
20apify_client_logger.setLevel(logging.INFO)
21apify_client_logger.addHandler(handler)
22
23apify_logger = logging.getLogger('apify')
24apify_logger.setLevel(logging.DEBUG)
25apify_logger.addHandler(handler)
26
27# Execute the Actor main coroutine
28asyncio.run(main())

src/main.py

1from urllib.parse import urljoin
2from bs4 import BeautifulSoup
3from httpx import AsyncClient
4from apify import Actor
5
6async def main() -> None:
7    async with Actor:
8        # Read the Actor input
9        actor_input = await Actor.get_input() or {}
10        fc_id = actor_input.get('fc_id')
11
12        if not fc_id:
13            Actor.log.error('Free Company ID is missing in the actor input.')
14            await Actor.exit()
15
16        start_url = f'https://na.finalfantasyxiv.com/lodestone/freecompany/{fc_id}/member/'
17
18        # Enqueue the starting URL in the default request queue
19        default_queue = await Actor.open_request_queue()
20        await default_queue.add_request({'url': start_url})
21
22        # Process the requests in the queue one by one
23        while request := await default_queue.fetch_next_request():
24            url = request['url']
25            Actor.log.info(f'Scraping {url} ...')
26
27            try:
28                # Fetch the URL using `httpx`
29                async with AsyncClient() as client:
30                    response = await client.get(url, follow_redirects=True)
31
32                # Parse the response using `BeautifulSoup`
33                soup = BeautifulSoup(response.content, 'html.parser')
34
35                # Extract member data from the page
36                members = []
37                for member in soup.select('li.entry'):
38                    name_element = member.select_one('.entry__name')
39                    if name_element:
40                        name = name_element.text.strip()
41                        id_link = member.select_one('.entry__bg')['href']
42                        member_id = id_link.split('/')[-2]
43                        avatar_url = member.select_one('.entry__chara__face img')['src']
44
45                        members.append({
46                            'name': name,
47                            'id': member_id,
48                            'avatar_url': avatar_url
49                        })
50
51                # Push the extracted data into the default dataset
52                await Actor.push_data({'url': url, 'members': members})
53
54                # Check for pagination and enqueue the next page URL
55                next_page = soup.select_one('.btn__pager__next')
56                if next_page and 'btn__pager__no' not in next_page.get('class', []):
57                    next_url = urljoin(url, next_page['href'])
58                    await default_queue.add_request({'url': next_url})
59
60            except Exception:
61                Actor.log.exception(f'Cannot extract data from {url}.')
62
63            finally:
64                # Mark the request as handled so it's not processed again
65                await default_queue.mark_request_as_handled(request)

.dockerignore

# configurations
.idea

# crawlee and apify storage folders
apify_storage
crawlee_storage
storage

# installed files
.venv

# git folder
.git

.editorconfig

root = true

[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.gitignore

# This file tells Git which files shouldn't be added to source control

.idea
.DS_Store

apify_storage
storage

.venv/
.env/
__pypackages__
dist/
build/
*.egg-info/
*.egg

__pycache__

.mypy_cache
.dmypy.json
dmypy.json
.pytest_cache
.ruff_cache

.scrapy
*.log

requirements.txt

1# Feel free to add your Python dependencies below. For formatting guidelines, see:
2# https://pip.pypa.io/en/latest/reference/requirements-file-format/
3
4apify ~= 1.7.0
5beautifulsoup4 ~= 4.12.2
6httpx ~= 0.25.2
7types-beautifulsoup4 ~= 4.12.0.7

Twitter followers scraper

curious_coder/twitter-scraper

Scrape followers, following and subscriptions list of any twitter profile

Curious Coder

2.4K

5.0

(2)

🔍🚗 Mobile.de Scraper

3x1t/mobile-de-scraper

Effortlessly scrape car data from Germany's largest vehicle marketplace, Mobile.de. Get access to millions of entries of cars, motorbikes, etc. across Europe. Fast, cheap & reliable. Rental version for larger use cases.

3x1t

205

5.0

(3)

🔥Advanced Company Details (Business Intelligence Scraper)

tech_gear/advanced-company-details

✅Extract detailed company data.✅Access organizational structure, technologies, employees, funding events, and more.✅Get comprehensive insights, including jobs added and employee count by location, all in one place.

Tech Gear

1.0

(1)

Snapchat Scraper

tri_angle/snapchat-scraper

Actor for scraping public profile data from Snapchat.

Tri⟁angle

899

5.0

(3)

Snapchat Profile Scraper

easyapi/snapchat-profile-scraper

Extract comprehensive public profile data from Snapchat accounts. Get detailed information including subscriber counts, bio, location, and more. Perfect for social media analysis, influencer research, and market intelligence. 🚀

EasyApi

243

5.0

(1)

Twitter Search Scraper

epctex/twitter-search-scraper

Scrape any keyword or hashtag from Twitter. Extract tweets, replies, favorites, retweets, and conversation threads without limit. Gather user-related information such as verification, location, profile image, friends, followers, following, and much more! Get everything with no limits on Twitter!!

epctex

959

Article Content Extractor 📄

easyapi/article-content-extractor

Extract clean article content, metadata and structured information from any web page. Supports multiple URLs and returns well-formatted JSON with title, description, content, author, publish date and more. 🔍📄

EasyApi

Snapchat User Spotlight Scraper

easyapi/snapchat-user-spotlight-scraper

Extract public spotlight content from Snapchat profiles with comprehensive metadata. Get video URLs, engagement metrics, hashtags, and more. Perfect for content analysis and social media monitoring.🎥

EasyApi

5.0

(1)

🔍🚗 Mobile.de Scraper (PPR)

3x1t/mobile-de-scraper-ppr

3x1t

132

Forebet Bet Prediction Scraper

rikunk/forebet-scraper

Forebet Bet Prediction Scraper is a specialized tool designed to fetch betting predictions exclusively from Forebet, one of the reputable betting prediction websites. This scraper's aim is to provide accurate and current betting predictions, focusing on the insights from Forebet's platform.

rikunk

179

Twitter Scraper - Unlimited Tweets, Fixed Price

epctex/twitter-scraper

Extract unlimited Twitter data with fixed monthly pricing. Scrape user tweets, conversations, and search results with advanced filtering options. Features multi-lang support, geo-targeting, and detailed user analytics. Perfect for researchers, analysts, and businesses needing reliable Twitter data.

epctex

FFXIV FC

FFXIV FC

.actor/Dockerfile

.actor/actor.json

.actor/input_schema.json

src/__main__.py

src/main.py

.dockerignore

.editorconfig

.gitignore

requirements.txt

You might also like

Twitter followers scraper

🔍🚗 Mobile.de Scraper

🔥Advanced Company Details (Business Intelligence Scraper)

Snapchat Scraper

Snapchat Profile Scraper

Twitter Search Scraper

Article Content Extractor 📄

Snapchat User Spotlight Scraper

🔍🚗 Mobile.de Scraper (PPR)

Forebet Bet Prediction Scraper

Twitter Scraper - Unlimited Tweets, Fixed Price

.actor/Dockerfile

.actor/actor.json

.actor/input_schema.json

src/__main__.py

src/main.py

.dockerignore

.editorconfig

.gitignore

requirements.txt

src/main.py

src/main.py