1"""This module defines the main entry point for the Apify Actor.
2
3Feel free to modify this file to suit your specific needs.
4
5To build Apify Actors, utilize the Apify SDK toolkit, read more at the official documentation:
6https://docs.apify.com/sdk/python
7"""
8
9import asyncio
10from urllib.parse import urljoin
11
12from time import sleep
13
14from apify import Actor, Request
15from selenium import webdriver
16from selenium.webdriver.chrome.service import Service
17from selenium.webdriver.chrome.options import Options
18from selenium.webdriver.common.by import By
19from selenium.webdriver.common.keys import Keys
20from selenium.webdriver.support.ui import WebDriverWait
21from selenium.webdriver.support import expected_conditions as EC
22from webdriver_manager.chrome import ChromeDriverManager
23
24
25
26
27
28
29
30
31async def main() -> None:
32 """
33
34 Returns cookies for a given site, after login
35
36 This coroutine is executed using `asyncio.run()`, so it must remain an asynchronous function for proper execution.
37 Asynchronous execution is required for communication with Apify platform, and it also enhances performance in
38 the field of web scraping significantly.
39 """
40
41 async with Actor:
42
43 actor_input = await Actor.get_input() or {}
44 start_urls = actor_input.get('start_urls')
45 credentials = actor_input.get('credentials')
46
47 if not start_urls:
48 Actor.log.error('No start_urls specified in actor input, exiting...')
49 await Actor.exit()
50 login_url = start_urls[0]
51
52 if not login_url:
53 Actor.log.error('No login url specified in actor input, exiting...')
54 await Actor.exit()
55
56 if not credentials:
57 Actor.log.error('No credentials specified in actor input, exiting...')
58 await Actor.exit()
59
60 user_info = credentials.get("user", {})
61 password_info = credentials.get("password", {})
62 submit_info = credentials.get("submit", {})
63 extra_info = credentials.get("extra", {})
64
65 user = user_info.get("value")
66 password = password_info.get("value")
67
68 if not (user and password):
69 Actor.log.info('User/Password info is not complete, exiting...')
70 await Actor.exit()
71
72
73 xpath_user = user_info.get("xpath")
74 xpath_password = password_info.get("xpath")
75 xpath_submit = submit_info.get("xpath")
76
77 if not (xpath_user and xpath_password and xpath_submit):
78 Actor.log.info('XPath info for user/password/submit is not complete, exiting...')
79 await Actor.exit()
80
81 Actor.log.debug("All info ok")
82
83
84 wait_time = extra_info.get("wait_time", 5)
85 if isinstance(wait_time, (str, float)):
86 wait_time = int(wait_time)
87
88 ok_page_xpath = extra_info.get("ok_page_xpath")
89
90
91
92
93 queue_name = "my-login-queue"
94 request_queue = await Actor.open_request_queue(name=queue_name)
95
96
97 await request_queue.drop()
98 request_queue = await Actor.open_request_queue(name=queue_name)
99
100 for start_url in start_urls:
101 url = start_url.get('url')
102 Actor.log.info(f'Enqueuing {url} ...')
103 new_request = Request.from_url(url, user_data={'depth': 0}, unique_key="login")
104 await request_queue.add_request(new_request)
105
106
107 Actor.log.info('Launching Chrome Headless WebDriver...')
108 chrome_options = Options()
109 chrome_options.add_argument('--headless')
110 chrome_options.add_argument('--no-sandbox')
111 chrome_options.add_argument('--disable-dev-shm-usage')
112 driver = webdriver.Chrome(options=chrome_options)
113
114
115 while request := await request_queue.fetch_next_request():
116 url = request.url
117 Actor.log.info(f'Login check: {url} ...')
118
119 try:
120
121 driver.get(url)
122 Actor.log.info(f'Sleeping for this much time: {wait_time} seconds ...')
123
124 wait = WebDriverWait(driver, wait_time)
125
126
127 username_input = wait.until(EC.presence_of_element_located((By.XPATH, xpath_user)))
128 password_input = wait.until(EC.presence_of_element_located((By.XPATH, xpath_password)))
129 submit_button = wait.until(EC.element_to_be_clickable((By.XPATH, xpath_submit)))
130
131
132 username_input.send_keys(user)
133 password_input.send_keys(password)
134
135
136 submit_button.click()
137 wait = WebDriverWait(driver, wait_time)
138 if ok_page_xpath:
139 wait.until(EC.presence_of_element_located((By.XPATH, ok_page_xpath)))
140 else:
141 sleep(wait_time)
142 Actor.log.info("Wake up!")
143
144
145 await Actor.push_data({'url': url, 'cookies': driver.get_cookies()})
146 except Exception:
147 Actor.log.exception(f'Cannot login: URL is {url}.')
148 finally:
149 await request_queue.mark_request_as_handled(request)
150
151 driver.quit()