# First, specify the base Docker image.
# You can see the Docker images from Apify at https://hub.docker.com/r/apify/.
# You can also use any other image from Docker Hub.
FROM apify/actor-python-selenium:3.11

# Second, copy just requirements.txt into the Actor image,
# since it should be the only file that affects the dependency install in the next step,
# in order to speed up the build
COPY requirements.txt ./

# Install the packages specified in requirements.txt,
# Print the installed Python version, pip version
# and all installed packages with their versions for debugging
RUN echo "Python version:" \
 && python --version \
 && echo "Pip version:" \
 && pip --version \
 && echo "Installing dependencies:" \
 && pip install -r requirements.txt \
 && echo "All installed Python packages:" \
 && pip freeze

# Next, copy the remaining files and directories with the source code.
# Since we do this after installing the dependencies, quick build will be really fast
# for most source file changes.
COPY . ./

# Use compileall to ensure the runnability of the Actor Python code.
RUN python3 -m compileall -q .

# Specify how to launch the source code of your Actor.
# By default, the "python3 -m src" command is run
CMD ["python3", "-m", "src"]

.actor/actor.json

{
  "actorSpecification": 1,
  "name": "TiktokAdsScrapper",
  "title": "TikTok Ads Scraper with Selenium",
  "description": "Scrapes TikTok ads information and displays the data.",
  "version": "0.0",
  "meta": {
    "templateId": "python-selenium"
  },
  "input": "./input_schema.json",
  "dockerfile": "./Dockerfile",
  "storages": {
    "dataset": {
      "actorSpecification": 1,
      "fields" : {},
      "views": {
        "tiktok-ads-data": {
          "title": "tiktok-ads-data",
          "transformation": {
            "fields": [
              "ad_id",
              "ad_advertiser",
              "first_shown",
              "last_shown",
              "unique_user_views",
              "target_audience",
              "country_list",
              "gender",
              "age",
              "additional_parameters",
              "video_link"
            ]
          },
          "display": {
            "component": "table",
            "properties": {
              "ad_id": {
                "label": "Ad ID",
                "format": "text"
              },
              "ad_advertiser": {
                "label": "Advertiser",
                "format": "text"
              },
              "first_shown": {
                "label": "First Shown",
                "format": "text"
              },
              "last_shown": {
                "label": "Last Shown",
                "format": "text"
              },
              "unique_user_views": {
                "label": "Unique User Views",
                "format": "text"
              },
              "target_audience": {
                "label": "Target Audience",
                "format": "text"
              },
              "country_list": {
                "label": "Country List",
                "format": "text"
              },
              "gender": {
                "label": "Gender Distribution",
                "format": "text"
              },
              "age": {
                "label": "Age Distribution",
                "format": "text"
              },
              "additional_parameters": {
                "label": "Additional Parameters",
                "format": "text"
              },
              "video_link": {
                "label": "Video Link",
                "format": "link"
              }
            }
          }
        }
      }
    }
  }
}

.actor/input_schema.json

{
    "title": "Python Selenium Scraper",
    "type": "object",
    "schemaVersion": 1,
    "properties": {
        "start_urls": {
            "title": "Start URLs",
            "type": "array",
            "description": "URLs to start with",
            "prefill": [
                { "url": "https://apify.com" }
            ],
            "editor": "requestListSources"
        },
        "max_depth": {
            "title": "Maximum depth",
            "type": "integer",
            "description": "Depth to which to scrape to",
            "default": 1
        }
    },
    "required": ["start_urls"]
}

src/main.py

1"""
2This module serves as the entry point for executing the Apify Actor. It handles the configuration of logging
3settings. The `main()` coroutine is then executed using `asyncio.run()`.
4
5Feel free to modify this file to suit your specific needs.
6"""
7
8import asyncio
9import logging
10
11from apify.log import ActorLogFormatter
12
13from .main import main
14
15# Configure loggers
16handler = logging.StreamHandler()
17handler.setFormatter(ActorLogFormatter())
18
19apify_client_logger = logging.getLogger('apify_client')
20apify_client_logger.setLevel(logging.INFO)
21apify_client_logger.addHandler(handler)
22
23apify_logger = logging.getLogger('apify')
24apify_logger.setLevel(logging.DEBUG)
25apify_logger.addHandler(handler)
26
27# Execute the Actor main coroutine
28asyncio.run(main())

src/main.py

1import requests
2import csv
3import time
4import logging
5from bs4 import BeautifulSoup
6from selenium import webdriver
7from selenium.webdriver.common.by import By
8from selenium.webdriver.support.ui import WebDriverWait
9from selenium.webdriver.support import expected_conditions as EC
10from selenium.common.exceptions import TimeoutException, NoSuchElementException
11from apify import Actor
12
13
14async def main():
15    async with Actor() as actor:
16        input_data = await actor.get_input() or {}
17        start_urls = input_data.get('start_urls', [])
18        max_depth = input_data.get('max_depth', 1)
19
20        dataset = await actor.open_dataset(name='tiktok-ads-data')
21
22        for url_obj in start_urls:
23            start_url = url_obj.get('url')
24            if not start_url:
25                continue
26        # start_url="https://library.tiktok.com/ads?region=FR&start_time=1712082600000&end_time=1712169000000&adv_name=fashion&adv_biz_ids=&query_type=1&sort_type=last_shown_date,desc"
27
28        chrome_options = webdriver.ChromeOptions()
29        chrome_options.add_argument('--headless')
30        chrome_options.add_argument('--no-sandbox')
31        chrome_options.add_argument('--disable-dev-shm-usage')
32
33
34        # Instantiate WebDriver
35        driver = webdriver.Chrome(chrome_options)
36        driver.get(start_url)
37
38        try:
39            driver.get(start_url)
40            wait = WebDriverWait(driver, 10)  # Creating WebDriverWait instance
41            while True:
42                # Try clicking the "View More" button
43                view_more_button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".loading_more")))
44                driver.execute_script("arguments[0].scrollIntoView();", view_more_button)  # Scroll to the button
45                driver.execute_script("window.scrollBy(0, -100);")
46                driver.execute_script("arguments[0].click();", view_more_button)
47                # Optional: Wait for the content to load
48                WebDriverWait(driver, 2).until(lambda d: d.find_element(By.CSS_SELECTOR, ".ad_card"))
49
50        except (TimeoutException, NoSuchElementException):
51            actor.log.info("All content loaded or button not found.")
52
53        html= driver.page_source
54        driver.quit()
55
56        soup = BeautifulSoup(html, 'html.parser')
57        ad_links = soup.find_all('a', class_='link')
58        # print(ad_links)
59
60        ad_ids = [link['href'].split('=')[-1] for link in ad_links]
61        # print(ad_ids)
62
63
64
65        base_url = 'https://library.tiktok.com/ads/detail/?ad_id='
66        headers = {
67            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
68        }
69       
70        for ad_id in ad_ids:
71            ad_url = base_url + ad_id
72            driver = webdriver.Chrome(chrome_options)
73            driver.get(ad_url)
74
75            time.sleep(2)
76            ad_html = driver.page_source
77            ad_soup = BeautifulSoup(ad_html, 'html.parser')
78            details_tags = ad_soup.find_all('span', {'class': 'item_value'})
79
80            advertiser = ad_soup.find('div', {'class': 'ad_advertiser_value'})
81            if advertiser is None:
82                advertiser = f"no advertiser available for ad_id: {ad_id}"
83            else:
84                advertiser = advertiser.text
85
86            video_link_tag = ad_soup.find('video')
87            if video_link_tag is None:
88                video_link = f"no video available for ad_id: {ad_id}"
89            else:
90                video_link = video_link_tag['src']
91
92            target_audience = ad_soup.find('span', {'class': 'ad_target_audience_size_value'})
93            if target_audience is None:
94                target_audience = f"no views available for ad_id: {ad_id}"
95            else:
96                target_audience = target_audience.text
97
98            details_list = []
99            for detail in details_tags:
100                details_list.append(detail.text)
101
102            rows = ad_soup.find_all('tbody', class_='byted-Table-Body')
103
104            gender = []
105            age = []
106            country_list = []
107            addn_parameters = []
108
109            countries = rows[0].find_all('tr')
110            for c in countries:
111                cells = c.find_all('td')
112                country = cells[1].text.strip()
113                country_list.append(country)
114                male, female, unknown = True, True, True
115                if cells[2].find('path')['d'] == "M6 23a1 1 0 0 1 1-1h34a1 1 0 0 1 1 1v2a1 1 0 0 1-1 1H7a1 1 0 0 1-1-1v-2Z":
116                    male = False
117                if cells[3].find('path')['d'] == "M6 23a1 1 0 0 1 1-1h34a1 1 0 0 1 1 1v2a1 1 0 0 1-1 1H7a1 1 0 0 1-1-1v-2Z":
118                    female = False
119                if cells[4].find('path')['d'] == "M6 23a1 1 0 0 1 1-1h34a1 1 0 0 1 1 1v2a1 1 0 0 1-1 1H7a1 1 0 0 1-1-1v-2Z":
120                    unknown = False
121
122                entry = {
123                    'country': country,
124                    'gender': {
125                        'Male': male,
126                        'Female': female,
127                        'Unknown': unknown
128                    }
129                }
130                gender.append(entry)
131
132            countries = rows[1].find_all('tr')
133            for c in countries:
134                cells = c.find_all('td')
135                country = cells[1].text.strip()
136                ages = [True] * 6
137                for i in range(6):
138                    if cells[2 + i].find('path')['d'] == "M6 23a1 1 0 0 1 1-1h34a1 1 0 0 1 1 1v2a1 1 0 0 1-1 1H7a1 1 0 0 1-1-1v-2Z":
139                        ages[i] = False
140
141                entry = {
142                    'country': country,
143                    'ages': {
144                        '13-17': ages[0],
145                        '18-24': ages[1],
146                        '25-34': ages[2],
147                        '35-44': ages[3],
148                        '45-54': ages[4],
149                        '55+': ages[5],
150                    }
151                }
152                age.append(entry)
153
154            param_rows = ad_soup.find_all('tr', class_="targeting_additional_parameters_table_row")
155
156            entry = {}
157            for p in param_rows:
158                param = p.find('td', class_="targeting_additional_parameters_table_first_col")
159                status = p.find('td', class_='')
160                if status is not None:
161                    entry[param.text] = status.text
162                else:
163                    entry[param.text] = 'None'
164
165            addn_parameters.append(entry)
166            await dataset.push_data({'ad_id':ad_id,'ad_advertiser': advertiser, 'first_shown':details_list[0], 'last_shown':details_list[1], 'unique_user_views':details_list[2], 'target_audience':target_audience,
167                    'country_list':country_list,'gender': gender, 'age':age, 'additional_parameters':addn_parameters, 'video_link':video_link})
168            
169            driver.quit()
170
171
172    # Disable urllib3 warnings
173
174    # Export the data as CSV and JSON
175    await dataset.export_to_csv('data.csv', to_key_value_store_name='my-key-value-store')
176
177    # Print the exported records
178    store = await actor.open_key_value_store(name='my-key-value-store')
179    print(await store.get_value('data.csv'))
180    # print(csv_data)
181    actor.log.info(f"Ad IDs: {ad_ids}")
182
183
184def download_video(url, ad_id):
185    response = requests.get(url, headers=headers, stream=True)
186    if response.status_code == 200:
187        filename = f"{str(ad_id)}.mp4"
188        with open(filename, "wb") as f:
189            for chunk in response.iter_content(chunk_size=1024):
190                if chunk:
191                    f.write(chunk)
192        actor.log.info(f"{filename} downloaded successfully.")
193    else:
194        actor.log.info(f"Failed to download video from {url}. Status code: {response.status_code}")
195
196
197if __name__ == "__main__":
198    asyncio.run(main())
199
200        # Example usage:
201        # url = 'https://library.tiktok.com/api/v1/cdn/1712151504/video/aHR0cHM6Ly92NzcudGlrdG9rY2RuLmNvbS8zZjJiOWU5YmNhOGRlMGJjZjA3YmIwYWRiN2E3ZjE4Yi82NjBkYjAzOS92aWRlby90b3MvdXNlYXN0MmEvdG9zLXVzZWFzdDJhLXZlLTAwNjgtZXV0dHAvb01JRUtGbWVzQkM2RFVEOUVmeFJRUFFRRVd3aWdDaWRzNEpBUnQv/7cd4caab-916d-44c8-b3b9-34c155a810e1?a=475769&bti=PDU2NmYwMy86&ch=0&cr=0&dr=1&cd=0%7C0%7C0%7C0&cv=1&br=3876&bt=1938&cs=0&ds=6&ft=.NpOcInz7Th4FkErXq8Zmo&mime_type=video_mp4&qs=0&rc=aGg7ZmY3NTdnNDxnOzNoZ0BpM3RsO2s5cnFmcjMzZjczM0BjLmJiLTUxXjQxNDUxXy9hYSNkbWZjMmRjMTJgLS1kMWNzcw%3D%3D&vvpl=1&l=20240403133823D8EDE6F40EAE406831A7&btag=e00088000&cc=13'
202        # ad_id = 123
203        # download_video(url, ad_id)

.dockerignore

# configurations
.idea

# crawlee and apify storage folders
apify_storage
crawlee_storage
storage

# installed files
.venv

# git folder
.git

.editorconfig

root = true

[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.gitignore

# This file tells Git which files shouldn't be added to source control

.idea
.DS_Store

apify_storage
storage

.venv/
.env/
__pypackages__
dist/
build/
*.egg-info/
*.egg

__pycache__

.mypy_cache
.dmypy.json
dmypy.json
.pytest_cache
.ruff_cache

.scrapy
*.log

requirements.txt

1beautifulsoup4
2pandas
3selenium
4requests

TikTok Ads Scraper

lexis-solutions/tiktok-ads-scraper

Find and scrape current and past ads on TikTok. Get video URL, dates and locations, impressions, and more. Download and analyze data from the TikTok Ads Library for competitive analysis.

Lexis Solutions

306

5.0

Tiktok Ads Scraper

silva95gustavo/tiktok-ads-scraper

Scrape video ads from the TikTok Ads Library and unlock key insights into high-performing creatives. Extract comprehensive ad data, including advertiser name, video URL, target regions, total impressions, and more. Explore top TikTok ads to boost your competitive research and marketing strategy.

Gustavo Silva (Coherent Paradox)

Tiktok Ads Scraper

memo23/tiktok-ads-scraper-cheerio

Unlock TikTok's viral potential with real-time hashtag analytics, audience insights, and trend data—transform raw metrics into winning strategies with precision-scraped engagement stats, demographic breakdowns, and competitive intelligence that keep you ahead in the social media game.

Muhamed Didovic

TikTok Hashtag Analytics

clockworks/tiktok-ads-scraper

Scrape TikTok hashtag analytics data. Just add one or more hashtags, and the scraper will extract posts' text, details, country, longevity, audience interests, scores, countries, labels, and age range. Export scraped data, run the scraper via API, schedule runs, or integrate it with other tools.

Clockworks

372

4.3

TikTok Scraper

clockworks/tiktok-scraper

Extract data from TikTok videos, hashtags, and users. Use URLs or search queries to scrape TikTok profiles, hashtags, posts, URLs, shares, followers, hearts, names, video, and music-related data. Export scraped data, run the scraper via API, schedule and monitor runs or integrate with other tools.

Clockworks

45K

4.4

TikTok Data Extractor

clockworks/free-tiktok-scraper

Extract data about videos, users, and channels based on hashtags or scrape full user profiles including posts, total likes, name, nickname, numbers of comments, shares, followers, following, and more.

Clockworks

28K

4.8

Tiktok Trending Videos Scraper

lexis-solutions/tiktok-trending-videos-scraper

Discover trending TikTok videos with a powerful API 📈. Effortlessly extract data for competition monitoring, market research, and trend analysis. Enhance engagement by analyzing popular trends and content within your niche and country.

Lexis Solutions

248

5.0

Tiktok Creative Center Top Ads

codebyte/tiktok-creative-center-top-ads

Find top performing ads on TikTok Creative Center. Get detailed analytics and insights on your competitors ads.

Codebyte

TikTok Ads Library FAST

data_xplorer/tiktok-ads-library-fast

🔮 Unlock TikTok's ad intelligence with our powerful scraper. Extract competitor campaigns, creative assets and performance metrics. Search by keyword, advertiser name or ID across all regions. Get comprehensive ad data for market-leading insights.

Data Xplorer

5.0

🛍️ Dubizzle Search Scraper - Cheap 🏷️

scrapestorm/dubizzle-search-scraper---cheap

Collect classified listings from Dubizzle.com by keyword 🔍. Get titles, prices 💸, locations 📍, categories 🗂️, product details 📋, and photo galleries 🖼️. Ideal for resellers, market analysts, bargain hunters, and anyone looking to explore listings or gather product data quickly and efficiently.

Storm_Scraper

4.8

Tiktok Music Scraper

apibox/tiktok-music-scraper

Extract all videos with music from Tiktok, including detail, ads, metrics, music, hashtags and video label. Easily collect and analyze user-generated content, and download results in formats like JSON, CSV, or Excel.