Autoscout24 Germany / Deutschland - Scraper avatar
Autoscout24 Germany / Deutschland - Scraper

Pricing

$9.99 / 1,000 listings

Go to Store
Autoscout24 Germany / Deutschland - Scraper

Autoscout24 Germany / Deutschland - Scraper

Developed by

Alex

Alex

Maintained by Community

Autoscout24-Scraper für letzte Listungen in ganz Deutschland mit Leasing-Daten, Preisdaten, weitere Fahrzeugdaten, Adressdaten, google maps links...

0.0 (0)

Pricing

$9.99 / 1,000 listings

0

Total users

1

Monthly users

1

Runs succeeded

>99%

Last modified

16 days ago

.actor/Dockerfile

# First, specify the base Docker image.
# You can see the Docker images from Apify at https://hub.docker.com/r/apify/.
# You can also use any other image from Docker Hub.
FROM apify/actor-python:3.13
# Second, copy just requirements.txt into the Actor image,
# since it should be the only file that affects the dependency install in the next step,
# in order to speed up the build
COPY requirements.txt ./
# Install the packages specified in requirements.txt,
# Print the installed Python version, pip version
# and all installed packages with their versions for debugging
RUN echo "Python version:" \
&& python --version \
&& echo "Pip version:" \
&& pip --version \
&& echo "Installing dependencies:" \
&& pip install -r requirements.txt \
&& echo "All installed Python packages:" \
&& pip freeze
# Next, copy the remaining files and directories with the source code.
# Since we do this after installing the dependencies, quick build will be really fast
# for most source file changes.
COPY . ./
# Use compileall to ensure the runnability of the Actor Python code.
RUN python3 -m compileall -q .
# Specify how to launch the source code of your Actor.
# By default, the "python3 -m src" command is run
CMD ["python3", "-m", "src"]

.actor/actor.json

{
"actorSpecification": 1,
"name": "autoscout24-scraper",
"title": "Autoscout24 Car Listings Scraper",
"description": "Professioneller Scraper für Fahrzeugangebote auf Autoscout24. Extrahiert Preise, technische Daten, Verkäuferinformationen und Leasing-Konditionen in Echtzeit.",
"version": "1.0.0",
"buildTag": "latest",
"meta": {
"templateId": "python-start",
"categories": ["DATA EXTRACTION", "CARS"],
"features": [
"proxy support",
"dataset output",
"pagination"
]
},
"input": "./input_schema.json",
"dockerfile": "./Dockerfile"
}

.actor/input_schema.json

{
"title": "Autoscout24 Scraper Konfiguration",
"type": "object",
"schemaVersion": 1,
"properties": {
"countries": {
"title": "Länderauswahl",
"type": "object",
"description": "Länder und ISO-Codes (z.B. {'Deutschland': 'D'})",
"editor": "json",
"default": {"Deutschland": "D"}
},
"max_page": {
"title": "Maximale Seiten",
"type": "integer",
"description": "Maximal zu scrapende Seiten pro Land (1-50)",
"minimum": 1,
"maximum": 50,
"default": 5
},
"delay_min": {
"title": "Minimale Verzögerung (Sekunden)",
"type": "integer",
"description": "Mindestverzögerung zwischen Anfragen",
"default": 1,
"minimum": 0
},
"delay_max": {
"title": "Maximale Verzögerung (Sekunden)",
"type": "integer",
"description": "Maximalverzögerung zwischen Anfragen",
"default": 3,
"minimum": 1
},
"batch_size": {
"title": "Batch-Größe",
"type": "integer",
"description": "Anzahl der Datensätze pro Speichervorgang (min. 10)",
"default": 100,
"minimum": 10
},
"base_url": {
"title": "Basis-URL",
"type": "string",
"description": "Autoscout24 Basis-URL",
"default": "https://www.autoscout24.de",
"editor": "textfield"
},
"brand": {
"title": "Marke",
"type": "string",
"description": "Fahrzeugmarke (z.B. 'mercedes-benz' oder 'bmw')",
"editor": "select",
"enumTitles": [
"Alle Marken",
"Abarth",
"Aiways",
"Alfa Romeo",
"Alpine",
"Aston Martin",
"Audi",
"BMW",
"BYD",
"Bentley",
"Chevrolet",
"Chrysler",
"Citroën",
"Cupra",
"DFSK",
"DS Automobiles",
"Dacia",
"Daewoo",
"Daihatsu",
"Dodge",
"Ferrari",
"Fiat",
"Ford",
"Honda",
"Hyundai",
"Jaguar",
"Jeep",
"Kia",
"Lada",
"Lamborghini",
"Lancia",
"Land Rover",
"Lexus",
"Lotus",
"MG",
"MINI",
"Maserati",
"Mazda",
"McLaren",
"Mercedes-Benz",
"Mitsubishi",
"Nissan",
"Opel",
"Peugeot",
"Polestar",
"Porsche",
"Renault",
"Rolls-Royce",
"SEAT",
"SKODA",
"Saab",
"Smart",
"SsangYong",
"Subaru",
"Suzuki",
"Tesla",
"Toyota",
"Volkswagen",
"Volvo"
],
"enum": [
"",
"abarth",
"aiways",
"alfa-romeo",
"alpine",
"aston-martin",
"audi",
"bmw",
"byd",
"bentley",
"chevrolet",
"chrysler",
"citroen",
"cupra",
"dfsk",
"ds-automobiles",
"dacia",
"daewoo",
"daihatsu",
"dodge",
"ferrari",
"fiat",
"ford",
"honda",
"hyundai",
"jaguar",
"jeep",
"kia",
"lada",
"lamborghini",
"lancia",
"land-rover",
"lexus",
"lotus",
"mg",
"mini",
"maserati",
"mazda",
"mclaren",
"mercedes-benz",
"mitsubishi",
"nissan",
"opel",
"peugeot",
"polestar",
"porsche",
"renault",
"rolls-royce",
"seat",
"skoda",
"saab",
"smart",
"ssangyong",
"subaru",
"suzuki",
"tesla",
"toyota",
"volkswagen",
"volvo"
]
},
"location": {
"title": "Standort",
"type": "string",
"description": "PLZ oder PLZ-Stadt (z.B. '10969' oder '10969-berlin')",
"editor": "textfield"
},
"price_to": {
"title": "Maximaler Preis",
"type": "integer",
"description": "Maximaler Preis in Euro",
"minimum": 0
},
"search_urls": {
"title": "Eigene Such-URLs",
"type": "array",
"description": "Liste von kompletten Autoscout24 Such-URLs (überschreibt andere Suchparameter)",
"editor": "stringList"
}
},
"required": []
}

src/__init__.py

1

src/__main__.py

1import asyncio
2
3from .main import main
4
5# Execute the Actor entry point.
6asyncio.run(main())

src/main.py

1from apify import Actor
2from bs4 import BeautifulSoup
3from httpx import AsyncClient
4import logging
5import re
6import json
7import hashlib
8from urllib.parse import urljoin, urlparse, parse_qs, unquote, quote
9import random
10import time
11from datetime import datetime
12from tenacity import retry, stop_after_attempt, wait_exponential
13import asyncio
14
15# Configure logging
16logging.basicConfig(level=logging.INFO)
17
18class ApifyScraperConfig:
19 """Configuration class adapted for Apify"""
20 def __init__(self, actor_input):
21 self.countries = actor_input.get('countries', {'Deutschland': 'D'})
22 self.base_url = actor_input.get('base_url', 'https://www.autoscout24.de')
23 self.max_page = actor_input.get('max_page', 5)
24 self.delay_min = actor_input.get('delay_min', 1)
25 self.delay_max = actor_input.get('delay_max', 3)
26 self.delay_range = (self.delay_min, self.delay_max)
27 self.batch_size = actor_input.get('batch_size', 100)
28
29 # New parameters
30 self.brand = actor_input.get('brand')
31 self.location = actor_input.get('location')
32 self.price_to = actor_input.get('price_to')
33 self.search_urls = actor_input.get('search_urls', [])
34
35
36def _sanitize_string(raw_str: str) -> str:
37 """
38 Sanitizes a string for use as a column name:
39 - Converts German umlauts and ß to ASCII equivalents.
40 - Replaces any other non-alphanumeric characters (except underscore) with an underscore.
41 - Converts to lowercase.
42 """
43 # Convert German umlauts and ß
44 replacements = {
45 'ä': 'ae', 'ö': 'oe', 'ü': 'ue', 'ß': 'ss',
46 'Ä': 'ae', 'Ö': 'oe', 'Ü': 'ue', "€": 'eur'
47 }
48 for umlaut, ascii_eq in replacements.items():
49 raw_str = raw_str.replace(umlaut, ascii_eq)
50
51 raw_str = raw_str.strip()
52
53 # Replace any character that's not alphanumeric or underscore with an underscore
54 sanitized = re.sub(r'[^0-9a-zA-Z_]', '_', raw_str)
55
56 return sanitized.lower()
57
58
59def _class_starts_with(prefix):
60 """Helper function to check if a CSS class starts with a specific prefix."""
61 def checker(css_class):
62 return css_class and css_class.startswith(prefix)
63 return checker
64
65
66def _remove_leasing_configurator(tag):
67 """Filter function to exclude leasing configurator tags."""
68 return any("LeasingConfigurator" not in class_name for class_name in tag.get("class", []))
69
70
71def _get_data_hash(car_data):
72 """Generate a consistent hash for car data to identify duplicates."""
73 # Exclude 'scrape_date' and 'url' for hash to identify same car across listings
74 data_to_hash = {k: v for k, v in car_data.items() if k not in ('scrape_date', 'url')}
75 # Sort keys to ensure consistent JSON string
76 data_str = json.dumps(data_to_hash, sort_keys=True)
77 return hashlib.md5(data_str.encode()).hexdigest()
78
79
80def _safe_get_text(tag):
81 """Safely get text from a tag, returning None if tag is None."""
82 if tag:
83 return tag.text.strip()
84 return None
85
86
87def _parse_currency(text):
88 """Parse currency values from text."""
89 if not text:
90 return None
91 numbers = re.findall(r'[0-9,.]+', text)
92 if numbers:
93 # Replace comma with dot and remove thousands separators
94 return re.sub(r'\.(?=\d{3})', '', numbers[0].replace(',', '.'))
95 return None
96
97
98def _scrape_price(soup):
99 """Extract price information from the car detail page."""
100 # Find the main container
101 main_container = soup.find("div", {"data-testid": "price-section"})
102 if not main_container:
103 return "Not found", "Not found", "Not found"
104
105 # Extract price
106 price_span = main_container.find("span", class_=_class_starts_with("PriceInfo_price__"))
107 price = price_span.find(text=True).strip() if price_span else "Not found"
108
109 # Extract additional info
110 additional_info = main_container.find("p", class_=_class_starts_with("AdditionalPriceInfo_additionalPrice__"))
111 additional_text = additional_info.text.strip() if additional_info else "Not found"
112
113 # Extract price rating
114 price_rating_div = main_container.find("div", class_=_class_starts_with("scr-price-label"))
115 price_rating = price_rating_div.find("p").text.strip() if price_rating_div and price_rating_div.find("p") else "Not found"
116
117 return price, additional_text, price_rating
118
119
120def build_search_url(config, country_code, page=1):
121 """
122 Builds a search URL based on the provided configuration parameters.
123
124 This function dynamically constructs search URLs for Autoscout24 based on brand,
125 location, and other search parameters provided in the config.
126 """
127 # Base parameters that are always included
128 params = {
129 'sort': 'standard',
130 'desc': '0',
131 'ustate': 'N,U',
132 'atype': 'C',
133 'cy': country_code,
134 'page': str(page)
135 }
136
137 # Add price limit if specified
138 if config.price_to:
139 params['priceto'] = str(config.price_to)
140
141 # Start with the base URL
142 url = config.base_url
143
144 # Add brand and/or location path if specified
145 path_parts = ['/lst']
146
147 if config.brand:
148 # Convert brand name to URL-friendly format (lowercase with hyphens)
149 brand_slug = config.brand.lower().replace(' ', '-')
150 path_parts.append(brand_slug)
151
152 if config.location:
153 # For location, we assume it's already in the correct format (e.g., "10969-berlin")
154 # If it's just a postal code, we add it as is
155 path_parts.append(config.location)
156
157 # If no brand or location, use the default path
158 if len(path_parts) == 1:
159 url += "/lst/"
160 else:
161 url += '/'.join(path_parts)
162
163 # Add query parameters
164 url += '?' + '&'.join([f"{k}={quote(v, safe='')}" for k, v in params.items()])
165
166 return url
167
168
169@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
170async def fetch_overview_page(url, client):
171 """Async version of overview page fetcher"""
172 try:
173 response = await client.get(url, follow_redirects=True)
174 response.raise_for_status()
175 soup = BeautifulSoup(response.text, "lxml")
176 return soup.find_all("a")
177 except Exception as e:
178 logging.error(f"Error fetching overview page {url}: {str(e)}")
179 return []
180
181
182@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
183async def fetch_car_details(car_url, base_url, client):
184 """Async version of car details fetcher"""
185 full_url = urljoin(base_url, car_url)
186 try:
187 response = await client.get(full_url, follow_redirects=True)
188 response.raise_for_status()
189 soup = BeautifulSoup(response.text, "lxml")
190
191 car_dict = {"url": full_url} # Store the full URL for reference
192
193 # Extract dt/dd pairs
194 all_dt_tags = soup.find_all("dt")
195 dt_tags = list(filter(_remove_leasing_configurator, all_dt_tags))
196 dd_tags = soup.find_all("dd")
197
198 if len(dt_tags) == len(dd_tags):
199 for dt, dd in zip(dt_tags, dd_tags):
200 raw_key = dt.get_text(strip=True)
201 sanitized_key = _sanitize_string(raw_key)
202 value = dd.get_text(separator=";", strip=True)
203 car_dict[sanitized_key] = value
204 else:
205 logging.warning("Mismatch in dt/dd pairs in %s", full_url)
206
207 # Additional fields
208 # seller type - Initialize with a default value
209 seller_type = None
210
211 # Strategie 1: SCR-Tag mit aria-label
212 seller_tag = soup.find("span", class_="scr-tag", attrs={"aria-label": lambda x: x and x.lower() in ["händler", "privat"]})
213 if seller_tag:
214 seller_type = seller_tag.text.strip()
215 else:
216 # Strategie 2: Kontextbasierte Suche im Vendor-Bereich
217 vendor_section = soup.find("div", id="vendor-and-cta-section") or soup.find(class_=lambda x: x and "VendorData" in x)
218 if vendor_section:
219 vendor_tag = vendor_section.find("span", class_="scr-tag")
220 seller_type = _safe_get_text(vendor_tag)
221
222 car_dict["seller_type"] = seller_type if seller_type else "Not found"
223 car_dict["is_dealer"] = (seller_type == "Händler") if seller_type else "Not found"
224 car_dict["is_private"] = (seller_type == "Privat") if seller_type else "Not found"
225
226 # leasing section
227 leasing_data = {}
228 try:
229 leasing_section = soup.find("div", {"data-cy": "leasing-section"}) or soup.find("section", id="leasing-section")
230
231 if leasing_section:
232 # Allgemeine Leasing-Daten
233 leasing_tags = leasing_section.find_all("dl", class_=lambda x: x and "DataGrid_asTable" in x)
234 for dl in leasing_tags:
235 for dt, dd in zip(dl.find_all("dt"), dl.find_all("dd")):
236 key = _sanitize_string(dt.get_text(strip=True))
237 value = dd.get_text(strip=True)
238 leasing_data[key] = value
239
240 # Wichtige Kennzahlen
241 monthly_rate_text = leasing_section.find(text=re.compile(r"Monatliche Rate", re.IGNORECASE))
242 if monthly_rate_text and monthly_rate_text.find_next("dd"):
243 leasing_data["monthly_rate"] = _parse_currency(monthly_rate_text.find_next("dd").text)
244
245 # Target Group aus dem SCR-Tag
246 target_group_tag = leasing_section.find("span", class_="scr-tag")
247 if target_group_tag:
248 leasing_data["target_group"] = [x.strip() for x in target_group_tag.text.split("/")]
249
250 # Raw-Daten für Debugging
251 car_dict["leasing_raw"] = json.dumps(leasing_data)
252
253 # Normalisierte Felder
254 car_dict["leasing_monthly"] = leasing_data.get("monthly_rate")
255 car_dict["leasing_total"] = _parse_currency(leasing_data.get("leasinggesamtbetrag"))
256 car_dict["leasing_contract_type"] = leasing_data.get("Vertragsart")
257 car_dict["leasing_mileage"] = leasing_data.get("Fahrleistung p.a.")
258 except Exception as e:
259 logging.warning(f"Fehler beim Leasing-Extract: {str(e)}")
260 car_dict["leasing_error"] = str(e)
261
262 # location
263 location_a = soup.find("a", class_="scr-link Department_link__xMUEe")
264 if location_a:
265 maps_link = location_a.get("href", "")
266 address_text = location_a.get_text(separator=" ", strip=True) # ersetzt <br> durch Leerzeichen
267 address_clean = re.sub(r'\s+,', ',', address_text)
268 car_dict["address"] = address_clean if address_clean else None
269 car_dict["maps_link"] = maps_link
270 else:
271 car_dict["address"] = None
272 car_dict["maps_link"] = None
273
274 # price
275 price_raw, additional_text, price_rating = _scrape_price(soup)
276 car_dict["price_raw"] = price_raw
277 # leasing info
278 car_dict["is_leasing_only"] = False
279 if price_raw and price_raw != "Not found":
280 # Case-insensitive Suche nach Leasing-Indikatoren
281 if re.search(r'(?i)\b(leasing|mtl?\.?)\b', price_raw):
282 car_dict["is_leasing_only"] = True
283
284 # Add numeric price
285 if price_raw and price_raw != "Not found":
286 price_numbers = "".join(re.findall(r'[0-9]+', price_raw))
287 car_dict["price"] = price_numbers
288 else:
289 car_dict["price"] = None
290 car_dict["price_text"] = additional_text
291 car_dict["price_rating"] = price_rating
292
293 # Extract equipment list
294 ausstattung_set = set()
295 equip_divs = soup.find_all("div", attrs={"class": re.compile(r"cldt-equipment-block")})
296 for div in equip_divs:
297 text = div.get_text(separator="\n", strip=True)
298 for line in text.split("\n"):
299 if line:
300 ausstattung_set.add(line)
301 car_dict["ausstattung_liste"] = sorted(ausstattung_set)
302
303 car_dict["date"] = datetime.now().isoformat()
304 car_dict["listing_hash"] = _get_data_hash(car_dict)
305
306 # Store search parameters that led to this listing
307 search_info = urlparse(full_url).path.split('/')
308 if len(search_info) > 2:
309 for segment in search_info[2:]: # Skip the first two segments (['', 'lst'])
310 if '-' in segment and segment[0:5].isdigit(): # Looks like a PLZ-city
311 car_dict["search_location"] = segment
312 elif segment and segment != 'angebote':
313 car_dict["search_brand"] = segment
314
315 return car_dict
316 except Exception as e:
317 logging.error(f"Error processing {full_url}: {str(e)}")
318 return None
319
320
321async def process_search_url(search_url, client, config):
322 """Process a single search URL"""
323 visited_urls = await Actor.get_value('visited-urls') or set()
324
325 for page in range(1, config.max_page + 1):
326 # Add page parameter to URL
327 page_url = search_url
328 if '?' in page_url:
329 page_url += f"&page={page}"
330 else:
331 page_url += f"?page={page}"
332
333 Actor.log.info(f"Processing page {page} for URL: {page_url}")
334
335 a_tags = await fetch_overview_page(page_url, client)
336 car_urls = {link["href"] for link in a_tags if "/angebote/" in link.get("href", "")}
337
338 new_urls = car_urls - visited_urls
339 if not new_urls:
340 Actor.log.info(f"No new URLs found on page {page}, stopping pagination")
341 break
342
343 Actor.log.info(f"Found {len(new_urls)} new car listings on page {page}")
344
345 for url in new_urls:
346 car_data = await fetch_car_details(url, config.base_url, client)
347 if car_data:
348 await Actor.push_data(car_data)
349 visited_urls.add(url)
350 await Actor.set_value('visited-urls', visited_urls)
351
352 # Use proper async sleep
353 await asyncio.sleep(random.uniform(*config.delay_range))
354
355
356async def process_car_listings(actor_input):
357 """Main processing logic adapted for Apify"""
358 async with AsyncClient() as client:
359 config = ApifyScraperConfig(actor_input)
360
361 # Create a list to store all search URLs to process
362 search_urls = []
363
364 # If custom search URLs are provided, use those
365 if config.search_urls:
366 search_urls.extend(config.search_urls)
367 else:
368 # Otherwise build URLs from parameters
369 for country, country_code in config.countries.items():
370 # If brand or location is specified, build a specific search URL
371 if config.brand or config.location:
372 url = build_search_url(config, country_code)
373 search_urls.append(url)
374 else:
375 # Otherwise use the default URL format
376 url = f"{config.base_url}/lst/?sort=age&desc=1&ustate=N%2CU&size=20&cy={country_code}&atype=C&"
377 search_urls.append(url)
378
379 # Process each search URL
380 for search_url in search_urls:
381 await process_search_url(search_url, client, config)
382
383
384async def main() -> None:
385 async with Actor:
386 actor_input = await Actor.get_input() or {}
387
388 # Initialize visited URLs dataset
389 await Actor.open_dataset(name='visited-urls')
390
391 # Run main processing
392 await process_car_listings(actor_input)

src/py.typed

.dockerignore

.git
.mise.toml
.nvim.lua
storage
# The rest is copied from https://github.com/github/gitignore/blob/main/Python.gitignore
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
.python-version
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/

.gitignore

.mise.toml
.nvim.lua
storage
# The rest is copied from https://github.com/github/gitignore/blob/main/Python.gitignore
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
.python-version
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/

requirements.txt

1# Feel free to add your Python dependencies below. For formatting guidelines, see:
2# https://pip.pypa.io/en/latest/reference/requirements-file-format/
3
4apify < 3.0
5beautifulsoup4[lxml]
6httpx
7types-beautifulsoup4
8apify-client
9beautifulsoup4
10httpx
11tenacity
12python-dateutil