1from urllib.parse import urljoin
2from apify import Actor
3from apify_client import ApifyClient
4from selenium import webdriver
5from selenium.webdriver.chrome.options import Options as ChromeOptions
6from selenium.webdriver.common.by import By
7from selenium.webdriver.support.ui import WebDriverWait
8from selenium.webdriver.support import expected_conditions as EC
9from selenium.webdriver.common.keys import Keys
10import time
11
12async def main():
13 async with Actor:
14
15 actor_input = await Actor.get_input() or {}
16 email = actor_input.get('email', "login@example.com")
17 password = actor_input.get('password', "pass123")
18
19 if not email or not password:
20 Actor.log.info('No email or password specified in actor input, exiting...')
21 await Actor.exit()
22
23
24 default_queue = await Actor.open_request_queue()
25 await default_queue.add_request({
26 'url': 'https://dietly.pl/',
27 'userData': {
28 'email': email,
29 'password': password
30 }
31 })
32
33
34 Actor.log.info('Launching Chrome WebDriver...')
35 chrome_options = ChromeOptions()
36 if Actor.config.headless:
37 chrome_options.add_argument('--headless')
38 chrome_options.add_argument('--no-sandbox')
39 chrome_options.add_argument('--disable-dev-shm-usage')
40 driver = webdriver.Chrome(options=chrome_options)
41
42
43 while request := await default_queue.fetch_next_request():
44 email = request['userData']['email']
45 password = request['userData']['password']
46 Actor.log.info(f'Scraping https://dietly.pl/ ...')
47
48 try:
49
50 driver.get("https://dietly.pl/")
51
52
53 WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//div[@id='__next']/main/div/div/div[2]/div[2]/button/div"))).click()
54
55
56 WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "email"))).click()
57 driver.find_element(By.ID, "email").clear()
58 driver.find_element(By.ID, "email").send_keys(email)
59
60
61 driver.find_element(By.ID, "password").click()
62 driver.find_element(By.ID, "password").clear()
63 driver.find_element(By.ID, "password").send_keys(password)
64
65
66 driver.find_element(By.XPATH, "//button[@type='submit']").click()
67
68
69 WebDriverWait(driver, 10).until(
70 EC.element_to_be_clickable((By.XPATH, "//div[@id='__next']/main/div/div/div[2]/div[2]/div/a/button/div"))
71 ).click()
72
73
74 Number_of_meals = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//div[@id='__next']/main/div[2]/div/div[4]/div[2]/main/div/div[4]/div[3]/div[2]"))).text
75 KcalSum = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//div[@id='__next']/main/div[2]/div/div[4]/div[2]/main/div/div[4]/div[3]/div[3]"))).text
76 BSum = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//div[@id='__next']/main/div[2]/div/div[4]/div[2]/main/div/div[4]/div[3]/div[3]"))).text
77
78
79 element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//div[@id='__next']/main/div[2]/div/div[4]/div[2]/main/div/div[4]/ul/li/div/div/div")))
80
81
82 driver.execute_script("arguments[0].scrollIntoView();", element)
83
84 element = driver.find_element(By.XPATH, "//div[@id='__next']/main/div[2]/div/div[4]/div[2]/main/div/div[4]/ul/li/div/div/div")
85 driver.execute_script("arguments[0].click();", element)
86
87 meal_name_map = {
88 1: 'Śniadanie',
89 2: 'II śniadanie',
90 3: 'Obiad',
91 4: 'Podwieczorek',
92 5: 'Kolacja'
93 }
94
95
96 for i in range(1, 6):
97 meal_enter_click_xpath = "//div[@id='__next']/main/div[2]/div/div[4]/div[2]/main/div/div[4]/ul/li/div/div/div" if i == 1 else f"//div[@id='__next']/main/div[2]/div/div[4]/div[2]/main/div/div[4]/ul/li[{i}]/div/div[2]/span"
98
99 element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, meal_enter_click_xpath)))
100 driver.execute_script("arguments[0].click();", element)
101
102 time.sleep(1)
103
104
105 meal_name = meal_name_map[i]
106
107
108 xpath_expression = f"(.//*[normalize-space(text()) and normalize-space(.)='{meal_name}'])/following::h4[1]"
109
110
111 MealTitle = WebDriverWait(driver, 10).until(
112 EC.presence_of_element_located((By.XPATH, xpath_expression))
113 ).text
114
115 MealKcal = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, f"(.//*[normalize-space(text()) and normalize-space(.)='Wartości odżywcze'])[{i}]/following::h3[1]"))).text
116 Bialko = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, f"(.//*[normalize-space(text()) and normalize-space(.)='Białko'])[{i}]/following::span[1]"))).text
117 Tluszcz = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, f"(.//*[normalize-space(text()) and normalize-space(.)='Tłuszcz'])[{i}]/following::span[1]"))).text
118 Weglowodany = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, f"(.//*[normalize-space(text()) and normalize-space(.)='Węglowodany'])[{i}]/following::span[1]"))).text
119 Blonnik = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, f"(.//*[normalize-space(text()) and normalize-space(.)='Błonnik'])[{i}]/following::span[1]"))).text
120 Cukry = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, f"(.//*[normalize-space(text()) and normalize-space(.)='Cukry'])[{i}]/following::span[1]"))).text
121 Sol = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, f"(.//*[normalize-space(text()) and normalize-space(.)='Sól'])[{i}]/following::span[1]"))).text
122 NKT = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, f"(.//*[normalize-space(text()) and normalize-space(.)='NKT'])[{i}]/following::span[1]"))).text
123 Ingredients = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, f"(.//*[normalize-space(text()) and normalize-space(.)='NKT'])[{i}]/following::span[2]"))).text
124
125
126 await Actor.push_data({
127 'MealIndex': i,
128 'MealTitle': MealTitle,
129 'MealKcal': MealKcal,
130 'Bialko': Bialko,
131 'Tluszcz': Tluszcz,
132 'Weglowodany': Weglowodany,
133 'Blonnik': Blonnik,
134 'Cukry': Cukry,
135 'Sol': Sol,
136 'NKT': NKT,
137 'Ingredients': Ingredients
138 })
139
140 driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.ESCAPE)
141 except:
142 Actor.log.exception(f'Cannot extract data from {url}.')
143 finally:
144 await default_queue.mark_request_as_handled(request)
145
146 driver.quit()
147
148if __name__ == "__main__":
149 import asyncio
150 asyncio.run(main())