6thstreet Selenium Link Scraper avatar
6thstreet Selenium Link Scraper

Under maintenance

Pricing

Pay per usage

Go to Store
6thstreet Selenium Link Scraper

6thstreet Selenium Link Scraper

Under maintenance

Developed by

Muhammed Magdy

Muhammed Magdy

Maintained by Community

0.0 (0)

Pricing

Pay per usage

0

Total users

2

Monthly users

2

Runs succeeded

>99%

Last modified

19 days ago

.gitignore

storage
node_modules
.venv

dockerfile

# Use the official Apify Python base image
FROM apify/actor-python-selenium:latest
# Copy everything into the container
COPY . ./
# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt
# Run the main Python script
CMD ["python", "scraper.py"]

requirements.txt

1selenium
2apify-client
3webdriver-manager

scraper.py

1import asyncio
2import time
3from selenium import webdriver
4from selenium.webdriver.chrome.options import Options
5from selenium.webdriver.common.by import By
6from selenium.webdriver.support.ui import WebDriverWait
7from selenium.webdriver.support import expected_conditions as EC
8from apify import Actor
9
10def init_driver():
11 chrome_options = Options()
12 chrome_options.add_argument("--headless")
13 chrome_options.add_argument("--disable-dev-shm-usage")
14 chrome_options.add_argument("--no-sandbox")
15 return webdriver.Chrome(options=chrome_options)
16
17def extract_products(driver, category, max_results):
18 products = []
19 items = driver.find_elements(By.CSS_SELECTOR, "li.ProductItem")
20 print(f"[INFO] Found {len(items)} raw items")
21
22 for item in items:
23 if len(products) >= max_results:
24 break
25 try:
26 brand = item.find_element(By.CLASS_NAME, "ProductItem-Brand").text
27 title = item.find_element(By.CLASS_NAME, "ProductItem-Title").text
28 price = item.find_element(By.CLASS_NAME, "Price").text
29 image = item.find_element(By.CSS_SELECTOR, "img.Image-Image").get_attribute("src")
30 product_url = item.find_element(By.CSS_SELECTOR, "a.ProductItem-ImgBlock").get_attribute("href")
31
32 products.append({
33 "brand": brand,
34 "product_name": title,
35 "price": price,
36 "image": image,
37 "product_url": product_url,
38 "category": category,
39 })
40 except Exception as e:
41 print(f"[WARN] Skipping product due to error: {e}")
42 return products
43
44def click_all_load_more(driver, wait, max_results):
45 step = 1
46 while True:
47 try:
48 items = driver.find_elements(By.CSS_SELECTOR, "li.ProductItem")
49 print(f"[INFO] current number of products: {len(items)}")
50 if len(items) >= max_results:
51 print(f"[INFO] Reached max results limit ({max_results})")
52 break
53 print(f"[INFO] Clicking 'Load More' button, step {step}")
54 load_more_btn = wait.until(
55 EC.element_to_be_clickable((By.CSS_SELECTOR, ".LoadMore button"))
56 )
57 driver.execute_script("arguments[0].click();", load_more_btn)
58 time.sleep(2)
59 step += 1
60 except Exception:
61 print("[INFO] No more 'Load More' button. Finished loading products.")
62 break
63
64async def main():
65 async with Actor:
66 input_data = await Actor.get_input()
67 start_urls = input_data.get("startUrls", {})
68 max_results = input_data.get("maxResults", 30)
69
70
71 for category, url in start_urls.items():
72 print(f"[INFO] Scraping category: {category} - {url}")
73 driver = init_driver()
74 driver.get(url)
75 wait = WebDriverWait(driver, 10)
76
77 click_all_load_more(driver, wait, max_results)
78 products = extract_products(driver, category, max_results)
79
80 print(f"[INFO] Extracted {len(products)} products for category: {category}")
81 for product in products:
82 await Actor.push_data(product)
83
84 driver.quit()
85
86 print("[DONE] Scraping complete.")
87
88if __name__ == "__main__":
89 asyncio.run(main())

.actor/actor.json

{
"actorSpecification": 1,
"name": "6thstreet-selenium-link-scraper",
"version": "1.0",
"title": "6thStreet Scraper",
"input": "input_schema.json"
}

.actor/input_schema.json

{
"title": "Scraper input",
"type": "object",
"schemaVersion": 1,
"properties": {
"startUrls": {
"title": "Start URLs by Category",
"type": "object",
"editor": "json",
"description": "A dictionary of categories with their corresponding URLs to scrape."
},
"maxResults": {
"title": "Max Results",
"type": "integer",
"description": "The maximum number of products to extract per category.",
"default": 100,
"minimum": 1
}
},
"required": ["startUrls"]
}