1from selenium import webdriver
2from selenium.webdriver.chrome.options import Options as ChromeOptions
3from selenium.webdriver.common.by import By
4from selenium.webdriver.support.ui import WebDriverWait
5from selenium.webdriver.support import expected_conditions as EC
6from apify import Actor
7
8async def main() -> None:
9 async with Actor() as actor:
10 repo_url = 'https://github.com/apify/crawlee'
11
12 chrome_options = ChromeOptions()
13 if actor.config.headless:
14 chrome_options.add_argument('--headless')
15 chrome_options.add_argument('--no-sandbox')
16 chrome_options.add_argument('--disable-dev-shm-usage')
17 driver = webdriver.Chrome(options=chrome_options)
18
19 try:
20 driver.get(repo_url)
21
22 wait = WebDriverWait(driver, 10)
23 repo_name_element = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "strong a[href*='/apify/crawlee']")))
24 repo_name = repo_name_element.text.strip() if repo_name_element else 'Repo name not found'
25
26 stars_element = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "a.Link--muted[href*='/stargazers'] strong")))
27 stars_count = stars_element.text.strip() if stars_element else '0'
28
29
30
31 print(f"Repository: {repo_name}, Stars: {stars_count}")
32 await actor.push_data({
33 'repo_url': repo_url,
34 'repo_name': repo_name,
35 'stars_count': stars_count
36 })
37
38 except Exception as e:
39 actor.log.exception(f'Cannot extract data from {repo_url}. Exception: {e}')
40 finally:
41 driver.quit()