Botsel
Deprecated
Pricing
Pay per usage
Go to Store
Botsel
Deprecated
sel bot
0.0 (0)
Pricing
Pay per usage
1
Total users
1
Monthly users
1
Last modified
2 years ago
.actor/Dockerfile
# First, specify the base Docker image.# You can see the Docker images from Apify at https://hub.docker.com/r/apify/.# You can also use any other image from Docker Hub.FROM apify/actor-python-selenium:3.11
# Second, copy just requirements.txt into the actor image,# since it should be the only file that affects the dependency install in the next step,# in order to speed up the buildCOPY requirements.txt ./
# Install the packages specified in requirements.txt,# Print the installed Python version, pip version# and all installed packages with their versions for debuggingRUN echo "Python version:" \ && python --version \ && echo "Pip version:" \ && pip --version \ && echo "Installing dependencies:" \ && pip install -r requirements.txt \ && echo "All installed Python packages:" \ && pip freeze
# Next, copy the remaining files and directories with the source code.# Since we do this after installing the dependencies, quick build will be really fast# for most source file changes.COPY . ./
# Specify how to launch the source code of your actor.# By default, the "python3 -m src" command is runCMD ["python3", "-m", "src"]
.actor/actor.json
{ "actorSpecification": 1, "name": "my-actor-1", "title": "Getting started with Python and Selenium", "description": "Scrapes titles of websites using Selenium.", "version": "0.0", "meta": { "templateId": "python-selenium" }, "input": "./input_schema.json", "dockerfile": "./Dockerfile", "storages": { "dataset": { "actorSpecification": 1, "title": "URLs and their titles", "views": { "titles": { "title": "URLs and their titles", "transformation": { "fields": [ "url", "title" ] }, "display": { "component": "table", "properties": { "url": { "label": "URL", "format": "text" }, "title": { "label": "Title", "format": "text" } } } } } } }}
.actor/input_schema.json
{ "title": "Python Selenium Scraper", "type": "object", "schemaVersion": 1, "properties": { "start_urls": { "title": "Start URLs", "type": "array", "description": "URLs to start with", "prefill": [ { "url": "https://apify.com" } ], "editor": "requestListSources" }, "max_depth": { "title": "Maximum depth", "type": "integer", "description": "Depth to which to scrape to", "default": 1 } }, "required": ["start_urls"]}
src/__init__.py
1
src/__main__.py
1import asyncio2import logging3
4from apify.log import ActorLogFormatter5
6from .main import main7
8handler = logging.StreamHandler()9handler.setFormatter(ActorLogFormatter())10
11apify_client_logger = logging.getLogger('apify_client')12apify_client_logger.setLevel(logging.INFO)13apify_client_logger.addHandler(handler)14
15apify_logger = logging.getLogger('apify')16apify_logger.setLevel(logging.DEBUG)17apify_logger.addHandler(handler)18
19asyncio.run(main())
src/main.py
1from urllib.parse import urljoin2from apify import Actor3from seleniumwire import webdriver as sdriver4from selenium import webdriver5from selenium.webdriver.chrome.options import Options as ChromeOptions6from selenium.webdriver.common.by import By7from selenium.webdriver.common.keys import Keys8from selenium.webdriver.support import expected_conditions as EC9from selenium.webdriver.support.ui import WebDriverWait10import asyncio11
12# To run this Actor locally, you need to have the Selenium Chromedriver installed.13# https://www.selenium.dev/documentation/webdriver/getting_started/install_drivers/14# When running on the Apify platform, it is already included in the Actor's Docker image.15
16# proxy1 = 'http://groups-RESIDENTIAL:apify_proxy_kLL2nn1MA5Wd468LmDvPExVB8Np06o0fryYf@proxy.apify.com:8000'17 18def wiredriver(PROXY):19 seleniumwire_options = {20 'proxy': {'http': PROXY,'verify_ssl': False,},'start-maximized': True,'headless': True,}21 chrome_options = ChromeOptions()22 # if Actor.config.headless:23 # 375 81224 mobile_emulation = {25 "deviceMetrics": { "width": 1920, "height": 1080, "pixelRatio": 3.0 },26 "userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19"}27 # chrome_options.add_experimental_option("mobileEmulation", mobile_emulation)28 # chrome_options.add_argument("user-agent = Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19")29 # chrome_options.add_argument('--window-size=1920x1080')30 # chrome_options.add_argument("--start-maximized")31 # chrome_options.add_argument('user-agent=MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1')32 # chrome_options.add_argument('--allow-running-insecure-content')33 # chrome_options.add_argument('--headless')34 chrome_options.add_argument('--no-sandbox')35 # chrome_options.add_argument('--disable-dev-shm-usage')36 # driver = webdriver.Chrome(options=chrome_options)37 driver = sdriver.Chrome(options=chrome_options, seleniumwire_options=seleniumwire_options)38 # driver = sdriver.Chrome(options=chrome_options)39 return driver40
41url_adsite = 'https://a000.ex16.repl.co/'42# driver = wiredriver(proxy1)43
44async def main():45 async with Actor:46 # Read the Actor input47 # actor_input = await Actor.get_input() or {}48 proxy_configuration = await Actor.create_proxy_configuration(groups=['RESIDENTIAL'])49 # proxy1 = await proxy_configuration.new_url()50 # driver = wiredriver(proxy1)51 #start_urls = actor_input.get('start_urls', [{ 'url': 'https://apify.com' }])52 #max_depth = actor_input.get('max_depth', 1)53
54 # if not start_urls:55 # Actor.log.info('No start URLs specified in actor input, exiting...')56 # await Actor.exit()57
58 # Enqueue the starting URLs in the default request queue59 # default_queue = await Actor.open_request_queue()60
61 # driver.get('http://www.example.com')62 # assert driver.title == 'Example Domain'63
64 # Process the requests in the queue one by one65 # while request := await default_queue.fetch_next_request():66 # url = request['url']67 proxy1 = await proxy_configuration.new_url()68 driver = wiredriver(proxy1)69 # try:70 # driver.get(url_adsite)71 # driver.maximize_window()72 driver.maximize_window()73 # for i in range(3):\74 run = True75 while run:76 try:77 driver.get(url_adsite)78 except:79 print('url failed')80 # proxy1 = await proxy_configuration.new_url()81 # await proxy_configuration.new_url()82 # driver = wiredriver(proxy1)83 # x = '/html/body/hthtmlml/h1[6]/iframe'84 x = '/html/body/hthtmlml/h1[6]/div[2]/div[2]/div[1]/div' 85 # driver.maximize_window()86 driver.execute_script("window.scrollTo(0, 1080);")87 # el_click1 = WebDriverWait(driver, 25).until(EC.visibility_of_element_located((By.XPATH, x)))88 # el_click1.click()89 90 # print('clicked', i)91 # p= driver.window_handles[0]92 # c = driver.window_handles[1]93 # driver.switch_to.window(c)94 # driver.close()95 # driver.switch_to.window(p)96 # driver.get(url_adsite)97 # driver.refresh()98 # driver.maximize_window()99 # driver = wiredriver(proxy1)100 # driver.get(url_adsite)101 Actor.log.info(f'Scraping {url_adsite} ...')102
103 # try:104 # # Open the URL in the Selenium WebDriver105 # driver.get(url)106
107
108 # driver.quit()109# asyncio.run(main())
.dockerignore
# configurations.idea
# crawlee and apify storage foldersapify_storagecrawlee_storagestorage
# installed files.venv
# git folder.git
.editorconfig
root = true
[*]indent_style = spaceindent_size = 4charset = utf-8trim_trailing_whitespace = trueinsert_final_newline = trueend_of_line = lf
.gitignore
# This file tells Git which files shouldn't be added to source control
.idea.DS_Store
apify_storagestorage
.venv/.env/__pypackages__dist/build/*.egg-info/*.egg
__pycache__
.mypy_cache.dmypy.jsondmypy.json.pytest_cache
.scrapy*.log
requirements.txt
1# Add your dependencies here.2# See https://pip.pypa.io/en/latest/reference/requirements-file-format/3# for how to format them4apify ~= 1.1.15selenium ~= 4.9.16selenium-wire