# First, specify the base Docker image.
# You can see the Docker images from Apify at https://hub.docker.com/r/apify/.
# You can also use any other image from Docker Hub.
FROM apify/actor-python-selenium:3.11

# Second, copy just requirements.txt into the actor image,
# since it should be the only file that affects the dependency install in the next step,
# in order to speed up the build
COPY requirements.txt ./

# Install the packages specified in requirements.txt,
# Print the installed Python version, pip version
# and all installed packages with their versions for debugging
RUN echo "Python version:" \
 && python --version \
 && echo "Pip version:" \
 && pip --version \
 && echo "Installing dependencies:" \
 && pip install -r requirements.txt \
 && echo "All installed Python packages:" \
 && pip freeze

# Next, copy the remaining files and directories with the source code.
# Since we do this after installing the dependencies, quick build will be really fast
# for most source file changes.
COPY . ./

# Specify how to launch the source code of your actor.
# By default, the "python3 -m src" command is run
CMD ["python3", "-m", "src"]

.actor/actor.json

{
    "actorSpecification": 1,
    "name": "my-actor-1",
    "title": "Getting started with Python and Selenium",
    "description": "Scrapes titles of websites using Selenium.",
    "version": "0.0",
    "meta": {
        "templateId": "python-selenium"
    },
    "input": "./input_schema.json",
    "dockerfile": "./Dockerfile",
    "storages": {
        "dataset": {
            "actorSpecification": 1,
            "title": "URLs and their titles",
            "views": {
                "titles": {
                    "title": "URLs and their titles",
                    "transformation": {
                        "fields": [
                            "url",
                            "title"
                        ]
                    },
                    "display": {
                        "component": "table",
                        "properties": {
                            "url": {
                                "label": "URL",
                                "format": "text"
                            },
                            "title": {
                                "label": "Title",
                                "format": "text"
                            }
                        }
                    }
                }
            }
        }
    }
}

.actor/input_schema.json

{
    "title": "Python Selenium Scraper",
    "type": "object",
    "schemaVersion": 1,
    "properties": {
        "start_urls": {
            "title": "Start URLs",
            "type": "array",
            "description": "URLs to start with",
            "prefill": [
                { "url": "https://apify.com" }
            ],
            "editor": "requestListSources"
        },
        "max_depth": {
            "title": "Maximum depth",
            "type": "integer",
            "description": "Depth to which to scrape to",
            "default": 1
        }
    },
    "required": ["start_urls"]
}

src/init.py

src/main.py

1import asyncio
2import logging
3
4from apify.log import ActorLogFormatter
5
6from .main import main
7
8handler = logging.StreamHandler()
9handler.setFormatter(ActorLogFormatter())
10
11apify_client_logger = logging.getLogger('apify_client')
12apify_client_logger.setLevel(logging.INFO)
13apify_client_logger.addHandler(handler)
14
15apify_logger = logging.getLogger('apify')
16apify_logger.setLevel(logging.DEBUG)
17apify_logger.addHandler(handler)
18
19asyncio.run(main())

src/main.py

1from urllib.parse import urljoin
2from apify import Actor
3from seleniumwire import webdriver as sdriver
4from selenium import webdriver
5from selenium.webdriver.chrome.options import Options as ChromeOptions
6from selenium.webdriver.common.by import By
7from selenium.webdriver.common.keys import Keys
8from selenium.webdriver.support import expected_conditions as EC
9from selenium.webdriver.support.ui import WebDriverWait
10import asyncio
11
12# To run this Actor locally, you need to have the Selenium Chromedriver installed.
13# https://www.selenium.dev/documentation/webdriver/getting_started/install_drivers/
14# When running on the Apify platform, it is already included in the Actor's Docker image.
15
16# proxy1 = 'http://groups-RESIDENTIAL:apify_proxy_kLL2nn1MA5Wd468LmDvPExVB8Np06o0fryYf@proxy.apify.com:8000'
17                             
18def wiredriver(PROXY):
19    seleniumwire_options = {
20    'proxy': {'http': PROXY,'verify_ssl': False,},'start-maximized': True,'headless': True,}
21    chrome_options = ChromeOptions()
22    # if Actor.config.headless:
23    # 375 812
24    mobile_emulation = {
25    "deviceMetrics": { "width": 1920, "height": 1080, "pixelRatio": 3.0 },
26    "userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19"}
27    # chrome_options.add_experimental_option("mobileEmulation", mobile_emulation)
28    # chrome_options.add_argument("user-agent = Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19")
29    # chrome_options.add_argument('--window-size=1920x1080')
30    # chrome_options.add_argument("--start-maximized")
31    # chrome_options.add_argument('user-agent=MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1')
32    # chrome_options.add_argument('--allow-running-insecure-content')
33    # chrome_options.add_argument('--headless')
34    chrome_options.add_argument('--no-sandbox')
35    # chrome_options.add_argument('--disable-dev-shm-usage')
36    # driver = webdriver.Chrome(options=chrome_options)
37    driver = sdriver.Chrome(options=chrome_options, seleniumwire_options=seleniumwire_options)
38    # driver = sdriver.Chrome(options=chrome_options)
39    return driver
40
41url_adsite =  'https://a000.ex16.repl.co/'
42# driver = wiredriver(proxy1)
43
44async def main():
45    async with Actor:
46        # Read the Actor input
47        # actor_input = await Actor.get_input() or {}
48        proxy_configuration = await Actor.create_proxy_configuration(groups=['RESIDENTIAL'])
49        # proxy1 = await proxy_configuration.new_url()
50        # driver = wiredriver(proxy1)
51        #start_urls = actor_input.get('start_urls', [{ 'url': 'https://apify.com' }])
52        #max_depth = actor_input.get('max_depth', 1)
53
54      #  if not start_urls:
55           # Actor.log.info('No start URLs specified in actor input, exiting...')
56      #      await Actor.exit()
57
58        # Enqueue the starting URLs in the default request queue
59        # default_queue = await Actor.open_request_queue()
60
61        # driver.get('http://www.example.com')
62        # assert driver.title == 'Example Domain'
63
64        # Process the requests in the queue one by one
65        # while request := await default_queue.fetch_next_request():
66            # url = request['url']
67        proxy1 = await proxy_configuration.new_url()
68        driver = wiredriver(proxy1)
69        # try:
70        # driver.get(url_adsite)
71        # driver.maximize_window()
72        driver.maximize_window()
73        # for i in range(3):\
74        run = True
75        while run:
76            try:
77                driver.get(url_adsite)
78            except:
79                print('url failed')
80            # proxy1 = await proxy_configuration.new_url()
81            # await proxy_configuration.new_url()
82            # driver = wiredriver(proxy1)
83            # x  = '/html/body/hthtmlml/h1[6]/iframe'
84            x  = '/html/body/hthtmlml/h1[6]/div[2]/div[2]/div[1]/div'      
85            # driver.maximize_window()
86            driver.execute_script("window.scrollTo(0, 1080);")
87            # el_click1 = WebDriverWait(driver, 25).until(EC.visibility_of_element_located((By.XPATH, x)))
88            # el_click1.click()
89            
90            # print('clicked', i)
91            # p= driver.window_handles[0]
92            # c = driver.window_handles[1]
93            # driver.switch_to.window(c)
94            # driver.close()
95            # driver.switch_to.window(p)
96            # driver.get(url_adsite)
97            # driver.refresh()
98            # driver.maximize_window()
99            # driver = wiredriver(proxy1)
100            # driver.get(url_adsite)
101            Actor.log.info(f'Scraping {url_adsite} ...')
102
103            # try:
104            #     # Open the URL in the Selenium WebDriver
105            #     driver.get(url)
106
107
108        # driver.quit()
109# asyncio.run(main())

.dockerignore

# configurations
.idea

# crawlee and apify storage folders
apify_storage
crawlee_storage
storage

# installed files
.venv

# git folder
.git

.editorconfig

root = true

[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.gitignore

# This file tells Git which files shouldn't be added to source control

.idea
.DS_Store

apify_storage
storage

.venv/
.env/
__pypackages__
dist/
build/
*.egg-info/
*.egg

__pycache__

.mypy_cache
.dmypy.json
dmypy.json
.pytest_cache

.scrapy
*.log

requirements.txt

1# Add your dependencies here.
2# See https://pip.pypa.io/en/latest/reference/requirements-file-format/
3# for how to format them
4apify ~= 1.1.1
5selenium ~= 4.9.1
6selenium-wire

Career Site Job Listing API

fantastic-jobs/career-site-job-listing-api

The perfect Job Posting API for high-quality real jobs! Direct postings from over 105k company career sites across 35 ATS platforms like: Workday, Greenhouse, and Ashby. Enriched with AI and LinkedIn company data, with up to 60 fields per job! Includes Lever, Rippling, SuccessFactors, iCIMS

Fantastic.jobs B.V.

159

5.0

Pagesjaunes Scraper

saswave/pagesjaunes-scraper

Non official French Yellow page scraper. Pagesjaunes.fr scraper. Extract non protected, public available informations from search results: city, name, email, phonenumber, SIRET, NAF, creation date and more. Helps you research your local professional

SASWAVE

124

3.6

Negative Reviews Analyzer

lead.gen.labs/negative-reviews-analyzer

Extract and analyze 1–2 star reviews from platforms like Trustpilot, Yelp, Booking, and Google Maps. This actor filters negative feedback, identifies key pain points, and provides actionable insights for improving customer experience and managing online reputation

LeadGen Labs

Web Scraper

apify/web-scraper

Crawls arbitrary websites using a web browser and extracts structured data from web pages using a provided JavaScript function. The Actor supports both recursive crawling and lists of URLs, and automatically manages concurrency for maximum performance.

Apify

93K

4.4

Cheerio Scraper

apify/cheerio-scraper

Crawls websites using raw HTTP requests, parses the HTML with the Cheerio library, and extracts data from the pages using a Node.js code. Supports both recursive crawling and lists of URLs. This actor is a high-performance alternative to apify/web-scraper for websites that do not require JavaScript.

Apify

9.4K

4.7

🔥 LinkedIn Jobs Scraper

bebity/linkedin-jobs-scraper

ℹ️ Designed for both personal and professional use, simply enter your desired job title and location to receive a tailored list of job opportunities. Try it today!

Bebity

13K

3.7

Twitter (X.com) Scraper Unlimited: No Limits

apidojo/twitter-scraper-lite

Introducing Twitter Scraper Unlimited, the most comprehensive Twitter data extraction solution available. Our enterprise-grade scraper offers unmatched capabilities with a transparent event-based pricing model, making it perfect for both small-scale and large-scale data extraction needs.

API Dojo

10K

3.0

Youtube Video Downloader

epctex/youtube-video-downloader

Effortlessly download YouTube videos of your preferred quality with our user-friendly Video Downloader. Try it now!

epctex

1.7K

3.0

Linkedin Profile Posts Scraper [NO COOKIES]

apimaestro/linkedin-profile-posts

Scrape LinkedIn posts data for a given LinkedIn profile including post content, reactions, comments count, and media attachments

API Maestro

5.6K

4.1

Puppeteer Scraper

apify/puppeteer-scraper

Crawls websites with the headless Chrome and Puppeteer library using a provided server-side Node.js code. This crawler is an alternative to apify/web-scraper that gives you finer control over the process. Supports both recursive crawling and list of URLs. Supports login to website.

Apify

8.9K

5.0

Botsel

Botsel

.actor/Dockerfile

.actor/actor.json

.actor/input_schema.json

src/__init__.py

src/__main__.py

src/main.py

.dockerignore

.editorconfig

.gitignore

requirements.txt

You might also like

Career Site Job Listing API

Pagesjaunes Scraper

Negative Reviews Analyzer

Web Scraper

Cheerio Scraper

🔥 LinkedIn Jobs Scraper

Twitter (X.com) Scraper Unlimited: No Limits

Youtube Video Downloader

Linkedin Profile Posts Scraper [NO COOKIES]

Puppeteer Scraper

.actor/Dockerfile

.actor/actor.json

.actor/input_schema.json

src/__init__.py

src/__main__.py

src/main.py

.dockerignore

.editorconfig

.gitignore

requirements.txt

src/init.py

src/main.py

src/init.py

src/main.py