# First, specify the base Docker image.
# You can see the Docker images from Apify at https://hub.docker.com/r/apify/.
# You can also use any other image from Docker Hub.
FROM apify/actor-python:3.11

# Second, copy just requirements.txt into the Actor image,
# since it should be the only file that affects the dependency install in the next step,
# in order to speed up the build
COPY requirements.txt ./

# Install the packages specified in requirements.txt,
# Print the installed Python version, pip version
# and all installed packages with their versions for debugging
RUN echo "Python version:" \
 && python --version \
 && echo "Pip version:" \
 && pip --version \
 && echo "Installing dependencies:" \
 && pip install -r requirements.txt \
 && echo "All installed Python packages:" \
 && pip freeze

# Next, copy the remaining files and directories with the source code.
# Since we do this after installing the dependencies, quick build will be really fast
# for most source file changes.
COPY . ./

# Use compileall to ensure the runnability of the Actor Python code.
RUN python3 -m compileall -q .

# Specify how to launch the source code of your Actor.
# By default, the "python3 -m src" command is run
CMD ["python3", "-m", "src"]

.actor/actor.json

{
    "actorSpecification": 1,
    "name": "Fanfix.com scraper",
    "title": "Fanfix.com scraper",
    "description": "Scrape data from models profiles",
    "version": "0.1",
    "meta": {
        "templateId": "python-start"
    },
    "input": "./input_schema.json",
    "dockerfile": "./Dockerfile"
}

.actor/input_schema.json

{
    "title": "Scrape data from a web page",
    "type": "object",
    "schemaVersion": 1,
    "properties": {
        "url": {
            "title": "URL of the user profile",
            "type": "array",
            "description": "The URL of user profile you want to get the data from.",
            "editor": "requestListSources",
            "prefill": [
                { "url": "https://app.fanfix.io/@kaylavoid" }
            ]

        }
    },
    "required": ["url"]
}

src/main.py

1"""
2This module serves as the entry point for executing the Apify Actor. It handles the configuration of logging
3settings. The `main()` coroutine is then executed using `asyncio.run()`.
4
5Feel free to modify this file to suit your specific needs.
6"""
7
8import asyncio
9import logging
10
11from apify.log import ActorLogFormatter
12
13from .main import main
14
15# Configure loggers
16handler = logging.StreamHandler()
17handler.setFormatter(ActorLogFormatter())
18
19apify_client_logger = logging.getLogger('apify_client')
20apify_client_logger.setLevel(logging.INFO)
21apify_client_logger.addHandler(handler)
22
23apify_logger = logging.getLogger('apify')
24apify_logger.setLevel(logging.DEBUG)
25apify_logger.addHandler(handler)
26
27# Execute the Actor main coroutine
28asyncio.run(main())

src/main.py

1# Beautiful Soup - library for pulling data out of HTML and XML files, read more at
2# https://www.crummy.com/software/BeautifulSoup/bs4/doc
3from bs4 import BeautifulSoup
4
5# HTTPX - library for making asynchronous HTTP requests in Python, read more at https://www.python-httpx.org/
6from httpx import AsyncClient
7
8# Apify SDK - toolkit for building Apify Actors, read more at https://docs.apify.com/sdk/python
9from apify import Actor
10
11
12async def main() -> None:
13
14    async with Actor:
15        actor_input = await Actor.get_input() or {}
16        urls = actor_input.get('url')
17
18        for url in urls:
19            async with AsyncClient( verify=False) as client:
20                response = await client.get(url['url'], follow_redirects=True)
21
22            soup = BeautifulSoup(response.content, 'html.parser')
23
24            headings = []
25            full_name = soup.select('div[data-testid="creator-fullname-stack-ds"]')[0].text
26            bio = soup.select('div[data-testid="profile-header-card-content-bio-ds"]')[0].text
27            profile_image = soup.select('img[alt="Profile Picture"]')[0]['src']
28            model_object = {'full_name': full_name, 'bio': bio, 'profile_image': profile_image}
29            Actor.log.info(f'scrapped: {full_name}')
30
31            await Actor.push_data(model_object)

.dockerignore

# configurations
.idea

# crawlee and apify storage folders
apify_storage
crawlee_storage
storage

# installed files
.venv

# git folder
.git

.editorconfig

root = true

[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.gitignore

# This file tells Git which files shouldn't be added to source control

.idea
.DS_Store

apify_storage
storage/*
!storage/key_value_stores
storage/key_value_stores/*
!storage/key_value_stores/default
storage/key_value_stores/default/*
!storage/key_value_stores/default/INPUT.json

.venv/
.env/
__pypackages__
dist/
build/
*.egg-info/
*.egg

__pycache__

.mypy_cache
.dmypy.json
dmypy.json
.pytest_cache
.ruff_cache

.scrapy
*.log

requirements.txt

1# Feel free to add your Python dependencies below. For formatting guidelines, see:
2# https://pip.pypa.io/en/latest/reference/requirements-file-format/
3
4apify ~= 1.6.0
5beautifulsoup4 ~= 4.12.2
6httpx ~= 0.25.2
7types-beautifulsoup4 ~= 4.12.0.7

Website Content Crawler

apify/website-content-crawler

Crawl websites and extract text content to feed AI models, LLM applications, vector databases, or RAG pipelines. The Actor supports rich formatting using Markdown, cleans the HTML, downloads files, and integrates well with 🦜🔗 LangChain, LlamaIndex, and the wider LLM ecosystem.

Apify

76K

4.6

(67)

TikTok Scraper

clockworks/tiktok-scraper

Extract data from TikTok videos, hashtags, and users. Use URLs or search queries to scrape TikTok profiles, hashtags, posts, URLs, shares, followers, hearts, names, video, and music-related data. Export scraped data, run the scraper via API, schedule and monitor runs or integrate with other tools.

Clockworks

72K

4.5

(51)

Google Maps Scraper

compass/crawler-google-places

Extract data from thousands of Google Maps locations and businesses, including reviews, reviewer details, images, contact info, opening hours, location, prices & more. Export scraped data, run the scraper via API, schedule and monitor runs, or integrate with other tools.

Compass

169K

4.6

(219)

Instagram Scraper

apify/instagram-scraper

Scrape and download Instagram posts, profiles, places, hashtags, photos, and comments. Get data from Instagram using one or more Instagram URLs or search queries. Export scraped data, run the scraper via API, schedule and monitor runs or integrate with other tools.

Apify

133K

4.3

(113)

🏯 Tweet Scraper V2 - X / Twitter Scraper

apidojo/tweet-scraper

⚡️ Lightning-fast search, URL, list, and profile scraping, with customizable filters. At $0.40 per 1000 tweets, and 30-80 tweets per second, it is ideal for researchers, entrepreneurs, and businesses! Get comprehensive insights from Twitter (X) now!

API Dojo

22K

3.0

(94)

Facebook Posts Scraper

apify/facebook-posts-scraper

Extract data from hundreds of Facebook posts from one or multiple Facebook pages and profiles. Get post URL, post text, page or profile URL, timestamp, number of likes, shares, comments, and more. Download the data in JSON, CSV, and Excel and use it in apps, spreadsheets, and reports.

Apify

30K

4.9

(42)

Instagram Post Scraper

apify/instagram-post-scraper

Scrape Instagram posts. Just add one or more Instagram usernames and get your data in seconds including caption, metrics, images, mentions, coauthors, recent comments, sponsored status, video duration, views. Export scraped data, schedule scraper via API, integrate with other tools or AI workflows.

Apify

52K

4.7

(43)

Facebook Ad Library Scraper

curious_coder/facebook-ads-library-scraper

Scrape facebook ads search and ads run by facebook pages - Fast and lightweight. $0.75 per 1000 results

Curious Coder

10K

4.4

(26)

Full TikTok API Scraper

scraptik/tiktok-api

Apify’s LOWEST cost TikTok Scraper. Access the TikTok mobile API for user, posts, sounds, search, comments, followers, and more. Unlock TikTok data at scale. Have custom needs? Visit scraptik.com

ScrapTik

363

5.0

(3)

Google Maps Extractor

compass/google-maps-extractor

Extract data from hundreds of places fast. Scrape Google Maps by keyword, category, location, URLs & other filters. Get addresses, contact info, opening hours, popular times, prices, menus & more. Export scraped data, run the scraper via API, schedule and monitor runs, or integrate with other tools.

Compass

58K

4.9

(84)

📩📍 Google Maps Email Extractor

lukaskrivka/google-maps-with-contact-details

Extract Google Maps contact details. Scrape websites of Google Maps places for contact details and get email addresses, website, location, address, zipcode, phone number, social media links. Export scraped data, run the scraper via API, schedule and monitor runs or integrate with other tools.

Lukáš Křivka

42K

4.3

(72)

Fanfix.com scraper

Fanfix.com scraper

.actor/Dockerfile

.actor/actor.json

.actor/input_schema.json

src/__main__.py

src/main.py

.dockerignore

.editorconfig

.gitignore

requirements.txt

You might also like

Website Content Crawler

TikTok Scraper

Google Maps Scraper

Instagram Scraper

🏯 Tweet Scraper V2 - X / Twitter Scraper

Facebook Posts Scraper

Instagram Post Scraper

Facebook Ad Library Scraper

Full TikTok API Scraper

Google Maps Extractor

📩📍 Google Maps Email Extractor

.actor/Dockerfile

.actor/actor.json

.actor/input_schema.json

src/__main__.py

src/main.py

.dockerignore

.editorconfig

.gitignore

requirements.txt

src/main.py

src/main.py