Botsel avatar
Botsel

Deprecated

Pricing

Pay per usage

Go to Store
Botsel

Botsel

Deprecated

Developed by

forbit

forbit

Maintained by Community

sel bot

0.0 (0)

Pricing

Pay per usage

1

Total users

1

Monthly users

1

Last modified

2 years ago

.actor/Dockerfile

# First, specify the base Docker image.
# You can see the Docker images from Apify at https://hub.docker.com/r/apify/.
# You can also use any other image from Docker Hub.
FROM apify/actor-python-selenium:3.11
# Second, copy just requirements.txt into the actor image,
# since it should be the only file that affects the dependency install in the next step,
# in order to speed up the build
COPY requirements.txt ./
# Install the packages specified in requirements.txt,
# Print the installed Python version, pip version
# and all installed packages with their versions for debugging
RUN echo "Python version:" \
&& python --version \
&& echo "Pip version:" \
&& pip --version \
&& echo "Installing dependencies:" \
&& pip install -r requirements.txt \
&& echo "All installed Python packages:" \
&& pip freeze
# Next, copy the remaining files and directories with the source code.
# Since we do this after installing the dependencies, quick build will be really fast
# for most source file changes.
COPY . ./
# Specify how to launch the source code of your actor.
# By default, the "python3 -m src" command is run
CMD ["python3", "-m", "src"]

.actor/actor.json

{
"actorSpecification": 1,
"name": "my-actor-1",
"title": "Getting started with Python and Selenium",
"description": "Scrapes titles of websites using Selenium.",
"version": "0.0",
"meta": {
"templateId": "python-selenium"
},
"input": "./input_schema.json",
"dockerfile": "./Dockerfile",
"storages": {
"dataset": {
"actorSpecification": 1,
"title": "URLs and their titles",
"views": {
"titles": {
"title": "URLs and their titles",
"transformation": {
"fields": [
"url",
"title"
]
},
"display": {
"component": "table",
"properties": {
"url": {
"label": "URL",
"format": "text"
},
"title": {
"label": "Title",
"format": "text"
}
}
}
}
}
}
}
}

.actor/input_schema.json

{
"title": "Python Selenium Scraper",
"type": "object",
"schemaVersion": 1,
"properties": {
"start_urls": {
"title": "Start URLs",
"type": "array",
"description": "URLs to start with",
"prefill": [
{ "url": "https://apify.com" }
],
"editor": "requestListSources"
},
"max_depth": {
"title": "Maximum depth",
"type": "integer",
"description": "Depth to which to scrape to",
"default": 1
}
},
"required": ["start_urls"]
}

src/__init__.py

1

src/__main__.py

1import asyncio
2import logging
3
4from apify.log import ActorLogFormatter
5
6from .main import main
7
8handler = logging.StreamHandler()
9handler.setFormatter(ActorLogFormatter())
10
11apify_client_logger = logging.getLogger('apify_client')
12apify_client_logger.setLevel(logging.INFO)
13apify_client_logger.addHandler(handler)
14
15apify_logger = logging.getLogger('apify')
16apify_logger.setLevel(logging.DEBUG)
17apify_logger.addHandler(handler)
18
19asyncio.run(main())

src/main.py

1from urllib.parse import urljoin
2from apify import Actor
3from seleniumwire import webdriver as sdriver
4from selenium import webdriver
5from selenium.webdriver.chrome.options import Options as ChromeOptions
6from selenium.webdriver.common.by import By
7from selenium.webdriver.common.keys import Keys
8from selenium.webdriver.support import expected_conditions as EC
9from selenium.webdriver.support.ui import WebDriverWait
10import asyncio
11
12# To run this Actor locally, you need to have the Selenium Chromedriver installed.
13# https://www.selenium.dev/documentation/webdriver/getting_started/install_drivers/
14# When running on the Apify platform, it is already included in the Actor's Docker image.
15
16# proxy1 = 'http://groups-RESIDENTIAL:apify_proxy_kLL2nn1MA5Wd468LmDvPExVB8Np06o0fryYf@proxy.apify.com:8000'
17
18def wiredriver(PROXY):
19 seleniumwire_options = {
20 'proxy': {'http': PROXY,'verify_ssl': False,},'start-maximized': True,'headless': True,}
21 chrome_options = ChromeOptions()
22 # if Actor.config.headless:
23 # 375 812
24 mobile_emulation = {
25 "deviceMetrics": { "width": 1920, "height": 1080, "pixelRatio": 3.0 },
26 "userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19"}
27 # chrome_options.add_experimental_option("mobileEmulation", mobile_emulation)
28 # chrome_options.add_argument("user-agent = Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19")
29 # chrome_options.add_argument('--window-size=1920x1080')
30 # chrome_options.add_argument("--start-maximized")
31 # chrome_options.add_argument('user-agent=MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1')
32 # chrome_options.add_argument('--allow-running-insecure-content')
33 # chrome_options.add_argument('--headless')
34 chrome_options.add_argument('--no-sandbox')
35 # chrome_options.add_argument('--disable-dev-shm-usage')
36 # driver = webdriver.Chrome(options=chrome_options)
37 driver = sdriver.Chrome(options=chrome_options, seleniumwire_options=seleniumwire_options)
38 # driver = sdriver.Chrome(options=chrome_options)
39 return driver
40
41url_adsite = 'https://a000.ex16.repl.co/'
42# driver = wiredriver(proxy1)
43
44async def main():
45 async with Actor:
46 # Read the Actor input
47 # actor_input = await Actor.get_input() or {}
48 proxy_configuration = await Actor.create_proxy_configuration(groups=['RESIDENTIAL'])
49 # proxy1 = await proxy_configuration.new_url()
50 # driver = wiredriver(proxy1)
51 #start_urls = actor_input.get('start_urls', [{ 'url': 'https://apify.com' }])
52 #max_depth = actor_input.get('max_depth', 1)
53
54 # if not start_urls:
55 # Actor.log.info('No start URLs specified in actor input, exiting...')
56 # await Actor.exit()
57
58 # Enqueue the starting URLs in the default request queue
59 # default_queue = await Actor.open_request_queue()
60
61 # driver.get('http://www.example.com')
62 # assert driver.title == 'Example Domain'
63
64 # Process the requests in the queue one by one
65 # while request := await default_queue.fetch_next_request():
66 # url = request['url']
67 proxy1 = await proxy_configuration.new_url()
68 driver = wiredriver(proxy1)
69 # try:
70 # driver.get(url_adsite)
71 # driver.maximize_window()
72 driver.maximize_window()
73 # for i in range(3):\
74 run = True
75 while run:
76 try:
77 driver.get(url_adsite)
78 except:
79 print('url failed')
80 # proxy1 = await proxy_configuration.new_url()
81 # await proxy_configuration.new_url()
82 # driver = wiredriver(proxy1)
83 # x = '/html/body/hthtmlml/h1[6]/iframe'
84 x = '/html/body/hthtmlml/h1[6]/div[2]/div[2]/div[1]/div'
85 # driver.maximize_window()
86 driver.execute_script("window.scrollTo(0, 1080);")
87 # el_click1 = WebDriverWait(driver, 25).until(EC.visibility_of_element_located((By.XPATH, x)))
88 # el_click1.click()
89
90 # print('clicked', i)
91 # p= driver.window_handles[0]
92 # c = driver.window_handles[1]
93 # driver.switch_to.window(c)
94 # driver.close()
95 # driver.switch_to.window(p)
96 # driver.get(url_adsite)
97 # driver.refresh()
98 # driver.maximize_window()
99 # driver = wiredriver(proxy1)
100 # driver.get(url_adsite)
101 Actor.log.info(f'Scraping {url_adsite} ...')
102
103 # try:
104 # # Open the URL in the Selenium WebDriver
105 # driver.get(url)
106
107
108 # driver.quit()
109# asyncio.run(main())

.dockerignore

# configurations
.idea
# crawlee and apify storage folders
apify_storage
crawlee_storage
storage
# installed files
.venv
# git folder
.git

.editorconfig

root = true
[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.gitignore

# This file tells Git which files shouldn't be added to source control
.idea
.DS_Store
apify_storage
storage
.venv/
.env/
__pypackages__
dist/
build/
*.egg-info/
*.egg
__pycache__
.mypy_cache
.dmypy.json
dmypy.json
.pytest_cache
.scrapy
*.log

requirements.txt

1# Add your dependencies here.
2# See https://pip.pypa.io/en/latest/reference/requirements-file-format/
3# for how to format them
4apify ~= 1.1.1
5selenium ~= 4.9.1
6selenium-wire