1from parsel.selector import Selector
2
3
4def extract_data_lists(driver_source, search_type="buy"):
5 items = []
6
7 try:
8
9 response = Selector(text=str(driver_source))
10 properties = response.xpath(
11 '//*[contains(@class, "PropertyCardLayout")]')
12 for prop in properties:
13 branding_title = ''
14 agent_name = ''
15 agent_img = ''
16 property_img = ''
17 address_heading = ''
18 added = ''
19 price = ''
20 inspection = ''
21 property_type = ''
22 property_details = ''
23 sold_status = ''
24 sold_on = ''
25
26 item = {}
27 '----------------------------------------------------------------'
28 prop_content_details = prop.xpath(
29 './/*[contains(@class, "residential-card__content-wrapper")]')
30 prop_content_upper = prop.xpath(
31 './/*[contains(@class, "residential-card__banner-strip")]')
32
33 '----------------------------------------------------------------'
34 address_heading = prop_content_details.xpath(
35 './/*[contains(@class, "residential-card__address-heading")]//text()').extract_first()
36 residential_card_primary = prop_content_details.xpath(
37 './/*[contains(@class, "residential-card__primary")]')
38
39 price = prop_content_details.xpath(
40 './/*[contains(@class, "property-price")]//text()').extract_first()
41 property_type = residential_card_primary.xpath(
42 './p//text()').extract_first()
43 property_details = residential_card_primary.xpath(
44 './/li//@aria-label').extract()
45 property_details = ", ".join(property_details)
46
47 added = prop_content_upper.xpath(
48 './/*[contains(@class, "styles__Content")]//text()').extract()
49 added = [ad for ad in added if "Added" in str(ad)]
50 added = "".join(added)
51
52 inspection = prop_content_details.xpath(
53 './/*[contains(@class, "inspection__long-label")]//text()').extract_first()
54 branding_title = prop.xpath(
55 './/*[contains(@class, "branding__image")]//@alt').extract_first()
56 agent_name = prop.xpath(
57 './/*[contains(@class, "agent__name")]//text()').extract_first()
58 agent_img = prop.xpath(
59 './/*[contains(@class, "agent__name")]//following-sibling::img//@src').extract_first()
60
61 property_img = prop.xpath(
62 './/*[@class="property-image"]//@data-url').extract_first()
63
64 sold_status = prop_content_upper.xpath(
65 './/span[contains(@class, "styles__Content")]//text()').extract_first()
66 sold_on = prop_content_details.xpath(
67 './/span[contains(text(), "Sold on")]/text()').extract_first()
68
69 '----------------------------------------------------------------'
70 item['address_heading'] = address_heading
71 item['price'] = price
72 if search_type == "buy":
73 item['added'] = added
74 item['inspection'] = inspection
75 if search_type == "sold":
76 item['sold_status'] = sold_status
77 item['sold_on'] = sold_on
78 item['property_img'] = property_img
79 item['property_type'] = property_type
80 item['property_details'] = property_details
81 item['branding_title'] = branding_title
82 item['agent_name'] = agent_name
83 item['agent_img'] = agent_img
84 item['search_type'] = search_type
85
86 items.append(item)
87
88 except Exception as e:
89 print(f"Error: {e}")
90 return items
91
92 return items
93
94def generate_url_pattern(input_address, search_type="buy", max_pages=1):
95 """
96 Generate URL pattern for sitemap-based scraping with page range
97 This creates a pattern that WebScraper.io will expand to multiple URLs
98 """
99 correct_address = input_address.replace(" ", "+")
100
101 url_pattern = f"https://www.realestate.com.au/{search_type}/in-{correct_address}/list-[1-{max_pages}]?includeSurrounding=false&activeSort=solddate"
102 return url_pattern
103
104def generate_url(input_address, search_type="buy", page=1):
105 correct_address = input_address.replace(" ", "+")
106 url = f"https://www.realestate.com.au/{search_type}/in-{correct_address}/list-{page}?includeSurrounding=false&activeSort=list-date"
107 return url
108
109
110def properties_pages_count(driver_source):
111 try:
112 response_ = Selector(text=str(driver_source))
113 return int(int(response_.xpath('//*[contains(@class, "StyledResultsCount")]//text()').extract_first().split()[-2])/25)
114 except Exception as e:
115 print(f"Error: {e}")
116 return 0