1"""This module defines the main entry point for the Apify Actor.
2
3Feel free to modify this file to suit your specific needs.
4
5To build Apify Actors, utilize the Apify SDK toolkit, read more at the official documentation:
6https://docs.apify.com/sdk/python
7"""
8
9
10
11from bs4 import BeautifulSoup
12import urllib.request
13import requests
14
15
16
17from httpx import AsyncClient
18
19
20
21from apify import Actor
22
23
24async def main() -> None:
25 """Main entry point for the Apify Actor.
26
27 This coroutine is executed using `asyncio.run()`, so it must remain an asynchronous function for proper execution.
28 Asynchronous execution is required for communication with Apify platform, and it also enhances performance in
29 the field of web scraping significantly.
30 """
31 async with Actor:
32
33 actor_input = await Actor.get_input() or {'Search_Word': 'Teddy Bear'}
34 print("Received Input:", actor_input);
35 item_name = actor_input.get("Search_Word")
36 url = "https://www.haldirams.com/sweets-73.html"
37
38
39
40
41
42
43
44 async with AsyncClient() as client:
45
46 Actor.log.info(f'Sending a request to {url}')
47 response = requests.get(url)
48 html_content = response.text
49
50
51 first_n_results = 100
52
53
54 soup = BeautifulSoup(html_content, 'html.parser')
55 elements = soup.find_all("div", class_='product-info flex flex-col flex-grow sm:flex-grow-0 px-4 lg:px-6')[:first_n_results]
56 extarcted_data = []
57 print(len(elements))
58 for element in elements:
59 data = {}
60 title = element.find("a", class_='product-item-link line-clamp-2 text-black min-h-[42px] md:min-h-[50px]')
61 data["title"] = title.get_text(strip=True) if title else "N/A"
62 print(data["title"])
63 data["Link"] = title.get("href") if title else "N/A"
64 data["price"] = element.find("span", class_='price').get_text(strip=True) if element else "N/A"
65
66 extarcted_data.append(data)
67
68
69
70 await Actor.push_data(extarcted_data)