# First, specify the base Docker image.
# You can see the Docker images from Apify at https://hub.docker.com/r/apify/.
# You can also use any other image from Docker Hub.
FROM apify/actor-python:3.13

# Second, copy just requirements.txt into the Actor image,
# since it should be the only file that affects the dependency install in the next step,
# in order to speed up the build
COPY requirements.txt ./

# Install the packages specified in requirements.txt,
# Print the installed Python version, pip version
# and all installed packages with their versions for debugging
RUN echo "Python version:" \
 && python --version \
 && echo "Pip version:" \
 && pip --version \
 && echo "Installing dependencies:" \
 && pip install -r requirements.txt \
 && echo "All installed Python packages:" \
 && pip freeze

# Next, copy the remaining files and directories with the source code.
# Since we do this after installing the dependencies, quick build will be really fast
# for most source file changes.
COPY . ./

# Use compileall to ensure the runnability of the Actor Python code.
RUN python3 -m compileall -q .

# Specify how to launch the source code of your Actor.
# By default, the "python3 -m src" command is run
CMD ["python3", "-m", "src"]

.actor/actor.json

{
    "actorSpecification": 1,
    "name": "autoscout24-scraper",
    "title": "Autoscout24 Car Listings Scraper",
    "description": "Professioneller Scraper für Fahrzeugangebote auf Autoscout24. Extrahiert Preise, technische Daten, Verkäuferinformationen und Leasing-Konditionen in Echtzeit.",
    "version": "1.0.0",
    "buildTag": "latest",
    "meta": {
        "templateId": "python-start",
        "categories": ["DATA EXTRACTION", "CARS"],
        "features": [
            "proxy support",
            "dataset output",
            "pagination"
        ]
    },
    "input": "./input_schema.json",
    "dockerfile": "./Dockerfile"
}

.actor/input_schema.json

{
    "title": "Autoscout24 Scraper Konfiguration",
    "type": "object",
    "schemaVersion": 1,
    "properties": {
        "countries": {
            "title": "Länderauswahl",
            "type": "object",
            "description": "Länder und ISO-Codes (z.B. {'Deutschland': 'D'})",
            "editor": "json",
            "default": {"Deutschland": "D"}
        },
        "max_page": {
            "title": "Maximale Seiten",
            "type": "integer",
            "description": "Maximal zu scrapende Seiten pro Land (1-50)",
            "minimum": 1,
            "maximum": 50,
            "default": 5
        },
        "delay_min": {
            "title": "Minimale Verzögerung (Sekunden)",
            "type": "integer",
            "description": "Mindestverzögerung zwischen Anfragen",
            "default": 1,
            "minimum": 0
        },
        "delay_max": {
            "title": "Maximale Verzögerung (Sekunden)",
            "type": "integer",
            "description": "Maximalverzögerung zwischen Anfragen",
            "default": 3,
            "minimum": 1
        },
        "batch_size": {
            "title": "Batch-Größe",
            "type": "integer",
            "description": "Anzahl der Datensätze pro Speichervorgang (min. 10)",
            "default": 100,
            "minimum": 10
        },
        "base_url": {
            "title": "Basis-URL",
            "type": "string",
            "description": "Autoscout24 Basis-URL",
            "default": "https://www.autoscout24.de",
            "editor": "textfield"
        },
        "brand": {
            "title": "Marke",
            "type": "string",
            "description": "Fahrzeugmarke (z.B. 'mercedes-benz' oder 'bmw')",
            "editor": "select",
            "enumTitles": [
                "Alle Marken",
                "Abarth",
                "Aiways",
                "Alfa Romeo",
                "Alpine",
                "Aston Martin",
                "Audi",
                "BMW",
                "BYD",
                "Bentley",
                "Chevrolet",
                "Chrysler",
                "Citroën",
                "Cupra",
                "DFSK",
                "DS Automobiles",
                "Dacia",
                "Daewoo",
                "Daihatsu",
                "Dodge",
                "Ferrari",
                "Fiat",
                "Ford",
                "Honda",
                "Hyundai",
                "Jaguar",
                "Jeep",
                "Kia",
                "Lada",
                "Lamborghini",
                "Lancia",
                "Land Rover",
                "Lexus",
                "Lotus",
                "MG",
                "MINI",
                "Maserati",
                "Mazda",
                "McLaren",
                "Mercedes-Benz",
                "Mitsubishi",
                "Nissan",
                "Opel",
                "Peugeot",
                "Polestar",
                "Porsche",
                "Renault",
                "Rolls-Royce",
                "SEAT",
                "SKODA",
                "Saab",
                "Smart",
                "SsangYong",
                "Subaru",
                "Suzuki",
                "Tesla",
                "Toyota",
                "Volkswagen",
                "Volvo"
            ],
            "enum": [
                "",
                "abarth",
                "aiways",
                "alfa-romeo",
                "alpine",
                "aston-martin",
                "audi",
                "bmw",
                "byd",
                "bentley",
                "chevrolet",
                "chrysler",
                "citroen",
                "cupra",
                "dfsk",
                "ds-automobiles",
                "dacia",
                "daewoo",
                "daihatsu",
                "dodge",
                "ferrari",
                "fiat",
                "ford",
                "honda",
                "hyundai",
                "jaguar",
                "jeep",
                "kia",
                "lada",
                "lamborghini",
                "lancia",
                "land-rover",
                "lexus",
                "lotus",
                "mg",
                "mini",
                "maserati",
                "mazda",
                "mclaren",
                "mercedes-benz",
                "mitsubishi",
                "nissan",
                "opel",
                "peugeot",
                "polestar",
                "porsche",
                "renault",
                "rolls-royce",
                "seat",
                "skoda",
                "saab",
                "smart",
                "ssangyong",
                "subaru",
                "suzuki",
                "tesla",
                "toyota",
                "volkswagen",
                "volvo"
            ]
        },
        "location": {
            "title": "Standort",
            "type": "string",
            "description": "PLZ oder PLZ-Stadt (z.B. '10969' oder '10969-berlin')",
            "editor": "textfield"
        },
        "price_to": {
            "title": "Maximaler Preis",
            "type": "integer",
            "description": "Maximaler Preis in Euro",
            "minimum": 0
        },
        "search_urls": {
            "title": "Eigene Such-URLs",
            "type": "array",
            "description": "Liste von kompletten Autoscout24 Such-URLs (überschreibt andere Suchparameter)",
            "editor": "stringList"
        }
    },
    "required": []
}

src/init.py

src/main.py

1import asyncio
2
3from .main import main
4
5# Execute the Actor entry point.
6asyncio.run(main())

src/main.py

1from apify import Actor
2from bs4 import BeautifulSoup
3from httpx import AsyncClient
4import logging
5import re
6import json
7import hashlib
8from urllib.parse import urljoin, urlparse, parse_qs, unquote, quote
9import random
10import time
11from datetime import datetime
12from tenacity import retry, stop_after_attempt, wait_exponential
13import asyncio 
14
15# Configure logging
16logging.basicConfig(level=logging.INFO)
17
18class ApifyScraperConfig:
19    """Configuration class adapted for Apify"""
20    def __init__(self, actor_input):
21        self.countries = actor_input.get('countries', {'Deutschland': 'D'})
22        self.base_url = actor_input.get('base_url', 'https://www.autoscout24.de')
23        self.max_page = actor_input.get('max_page', 5)
24        self.delay_min = actor_input.get('delay_min', 1)
25        self.delay_max = actor_input.get('delay_max', 3)
26        self.delay_range = (self.delay_min, self.delay_max)
27        self.batch_size = actor_input.get('batch_size', 100)
28        
29        # New parameters
30        self.brand = actor_input.get('brand')
31        self.location = actor_input.get('location')
32        self.price_to = actor_input.get('price_to')
33        self.search_urls = actor_input.get('search_urls', [])
34
35
36def _sanitize_string(raw_str: str) -> str:
37    """
38    Sanitizes a string for use as a column name:
39      - Converts German umlauts and ß to ASCII equivalents.
40      - Replaces any other non-alphanumeric characters (except underscore) with an underscore.
41      - Converts to lowercase.
42    """
43    # Convert German umlauts and ß
44    replacements = {
45        'ä': 'ae', 'ö': 'oe', 'ü': 'ue', 'ß': 'ss',
46        'Ä': 'ae', 'Ö': 'oe', 'Ü': 'ue', "€": 'eur'
47    }
48    for umlaut, ascii_eq in replacements.items():
49        raw_str = raw_str.replace(umlaut, ascii_eq)
50    
51    raw_str = raw_str.strip()
52    
53    # Replace any character that's not alphanumeric or underscore with an underscore
54    sanitized = re.sub(r'[^0-9a-zA-Z_]', '_', raw_str)
55    
56    return sanitized.lower()
57
58
59def _class_starts_with(prefix):
60    """Helper function to check if a CSS class starts with a specific prefix."""
61    def checker(css_class):
62        return css_class and css_class.startswith(prefix)
63    return checker
64
65
66def _remove_leasing_configurator(tag):
67    """Filter function to exclude leasing configurator tags."""
68    return any("LeasingConfigurator" not in class_name for class_name in tag.get("class", []))
69
70
71def _get_data_hash(car_data):
72    """Generate a consistent hash for car data to identify duplicates."""
73    # Exclude 'scrape_date' and 'url' for hash to identify same car across listings
74    data_to_hash = {k: v for k, v in car_data.items() if k not in ('scrape_date', 'url')}
75    # Sort keys to ensure consistent JSON string
76    data_str = json.dumps(data_to_hash, sort_keys=True)
77    return hashlib.md5(data_str.encode()).hexdigest()
78
79
80def _safe_get_text(tag):
81    """Safely get text from a tag, returning None if tag is None."""
82    if tag:
83        return tag.text.strip()
84    return None
85
86
87def _parse_currency(text):
88    """Parse currency values from text."""
89    if not text:
90        return None
91    numbers = re.findall(r'[0-9,.]+', text)
92    if numbers:
93        # Replace comma with dot and remove thousands separators
94        return re.sub(r'\.(?=\d{3})', '', numbers[0].replace(',', '.'))
95    return None
96
97
98def _scrape_price(soup):
99    """Extract price information from the car detail page."""
100    # Find the main container
101    main_container = soup.find("div", {"data-testid": "price-section"})
102    if not main_container:
103        return "Not found", "Not found", "Not found"
104
105    # Extract price
106    price_span = main_container.find("span", class_=_class_starts_with("PriceInfo_price__"))
107    price = price_span.find(text=True).strip() if price_span else "Not found"
108
109    # Extract additional info
110    additional_info = main_container.find("p", class_=_class_starts_with("AdditionalPriceInfo_additionalPrice__"))
111    additional_text = additional_info.text.strip() if additional_info else "Not found"
112
113    # Extract price rating
114    price_rating_div = main_container.find("div", class_=_class_starts_with("scr-price-label"))
115    price_rating = price_rating_div.find("p").text.strip() if price_rating_div and price_rating_div.find("p") else "Not found"
116
117    return price, additional_text, price_rating
118
119
120def build_search_url(config, country_code, page=1):
121    """
122    Builds a search URL based on the provided configuration parameters.
123    
124    This function dynamically constructs search URLs for Autoscout24 based on brand,
125    location, and other search parameters provided in the config.
126    """
127    # Base parameters that are always included
128    params = {
129        'sort': 'standard',
130        'desc': '0',
131        'ustate': 'N,U',
132        'atype': 'C',
133        'cy': country_code,
134        'page': str(page)
135    }
136    
137    # Add price limit if specified
138    if config.price_to:
139        params['priceto'] = str(config.price_to)
140    
141    # Start with the base URL
142    url = config.base_url
143    
144    # Add brand and/or location path if specified
145    path_parts = ['/lst']
146    
147    if config.brand:
148        # Convert brand name to URL-friendly format (lowercase with hyphens)
149        brand_slug = config.brand.lower().replace(' ', '-')
150        path_parts.append(brand_slug)
151    
152    if config.location:
153        # For location, we assume it's already in the correct format (e.g., "10969-berlin")
154        # If it's just a postal code, we add it as is
155        path_parts.append(config.location)
156    
157    # If no brand or location, use the default path
158    if len(path_parts) == 1:
159        url += "/lst/"
160    else:
161        url += '/'.join(path_parts)
162    
163    # Add query parameters
164    url += '?' + '&'.join([f"{k}={quote(v, safe='')}" for k, v in params.items()])
165    
166    return url
167
168
169@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
170async def fetch_overview_page(url, client):
171    """Async version of overview page fetcher"""
172    try:
173        response = await client.get(url, follow_redirects=True)
174        response.raise_for_status()
175        soup = BeautifulSoup(response.text, "lxml")
176        return soup.find_all("a")
177    except Exception as e:
178        logging.error(f"Error fetching overview page {url}: {str(e)}")
179        return []
180
181
182@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
183async def fetch_car_details(car_url, base_url, client):
184    """Async version of car details fetcher"""
185    full_url = urljoin(base_url, car_url)
186    try:
187        response = await client.get(full_url, follow_redirects=True)
188        response.raise_for_status()
189        soup = BeautifulSoup(response.text, "lxml")
190
191        car_dict = {"url": full_url}  # Store the full URL for reference
192
193        # Extract dt/dd pairs
194        all_dt_tags = soup.find_all("dt")
195        dt_tags = list(filter(_remove_leasing_configurator, all_dt_tags))
196        dd_tags = soup.find_all("dd")
197        
198        if len(dt_tags) == len(dd_tags):
199            for dt, dd in zip(dt_tags, dd_tags):
200                raw_key = dt.get_text(strip=True)
201                sanitized_key = _sanitize_string(raw_key)
202                value = dd.get_text(separator=";", strip=True)
203                car_dict[sanitized_key] = value
204        else:
205            logging.warning("Mismatch in dt/dd pairs in %s", full_url)
206
207        # Additional fields
208        # seller type - Initialize with a default value
209        seller_type = None
210        
211        # Strategie 1: SCR-Tag mit aria-label
212        seller_tag = soup.find("span", class_="scr-tag", attrs={"aria-label": lambda x: x and x.lower() in ["händler", "privat"]})
213        if seller_tag:
214            seller_type = seller_tag.text.strip()
215        else:
216            # Strategie 2: Kontextbasierte Suche im Vendor-Bereich
217            vendor_section = soup.find("div", id="vendor-and-cta-section") or soup.find(class_=lambda x: x and "VendorData" in x)
218            if vendor_section:
219                vendor_tag = vendor_section.find("span", class_="scr-tag")
220                seller_type = _safe_get_text(vendor_tag)
221        
222        car_dict["seller_type"] = seller_type if seller_type else "Not found"
223        car_dict["is_dealer"] = (seller_type == "Händler") if seller_type else "Not found"
224        car_dict["is_private"] = (seller_type == "Privat") if seller_type else "Not found"
225
226        # leasing section
227        leasing_data = {}
228        try:
229            leasing_section = soup.find("div", {"data-cy": "leasing-section"}) or soup.find("section", id="leasing-section")
230            
231            if leasing_section:
232                # Allgemeine Leasing-Daten
233                leasing_tags = leasing_section.find_all("dl", class_=lambda x: x and "DataGrid_asTable" in x)
234                for dl in leasing_tags:
235                    for dt, dd in zip(dl.find_all("dt"), dl.find_all("dd")):
236                        key = _sanitize_string(dt.get_text(strip=True))
237                        value = dd.get_text(strip=True)
238                        leasing_data[key] = value
239                
240                # Wichtige Kennzahlen
241                monthly_rate_text = leasing_section.find(text=re.compile(r"Monatliche Rate", re.IGNORECASE))
242                if monthly_rate_text and monthly_rate_text.find_next("dd"):
243                    leasing_data["monthly_rate"] = _parse_currency(monthly_rate_text.find_next("dd").text)
244                
245                # Target Group aus dem SCR-Tag
246                target_group_tag = leasing_section.find("span", class_="scr-tag")
247                if target_group_tag:
248                    leasing_data["target_group"] = [x.strip() for x in target_group_tag.text.split("/")]
249                
250                # Raw-Daten für Debugging
251                car_dict["leasing_raw"] = json.dumps(leasing_data)
252                
253                # Normalisierte Felder
254                car_dict["leasing_monthly"] = leasing_data.get("monthly_rate")
255                car_dict["leasing_total"] = _parse_currency(leasing_data.get("leasinggesamtbetrag"))
256                car_dict["leasing_contract_type"] = leasing_data.get("Vertragsart")
257                car_dict["leasing_mileage"] = leasing_data.get("Fahrleistung p.a.")
258        except Exception as e:
259            logging.warning(f"Fehler beim Leasing-Extract: {str(e)}")
260            car_dict["leasing_error"] = str(e)
261
262        # location
263        location_a = soup.find("a", class_="scr-link Department_link__xMUEe")
264        if location_a:
265            maps_link = location_a.get("href", "")
266            address_text = location_a.get_text(separator=" ", strip=True)  # ersetzt <br> durch Leerzeichen
267            address_clean = re.sub(r'\s+,', ',', address_text)
268            car_dict["address"] = address_clean if address_clean else None
269            car_dict["maps_link"] = maps_link
270        else:
271            car_dict["address"] = None
272            car_dict["maps_link"] = None
273
274        # price
275        price_raw, additional_text, price_rating = _scrape_price(soup)
276        car_dict["price_raw"] = price_raw
277        # leasing info
278        car_dict["is_leasing_only"] = False
279        if price_raw and price_raw != "Not found":
280            # Case-insensitive Suche nach Leasing-Indikatoren
281            if re.search(r'(?i)\b(leasing|mtl?\.?)\b', price_raw):
282                car_dict["is_leasing_only"] = True
283
284        # Add numeric price
285        if price_raw and price_raw != "Not found":
286            price_numbers = "".join(re.findall(r'[0-9]+', price_raw))
287            car_dict["price"] = price_numbers
288        else:
289            car_dict["price"] = None
290        car_dict["price_text"] = additional_text
291        car_dict["price_rating"] = price_rating
292
293        # Extract equipment list
294        ausstattung_set = set()
295        equip_divs = soup.find_all("div", attrs={"class": re.compile(r"cldt-equipment-block")})
296        for div in equip_divs:
297            text = div.get_text(separator="\n", strip=True)
298            for line in text.split("\n"):
299                if line:
300                    ausstattung_set.add(line)
301        car_dict["ausstattung_liste"] = sorted(ausstattung_set)
302
303        car_dict["date"] = datetime.now().isoformat()
304        car_dict["listing_hash"] = _get_data_hash(car_dict)
305        
306        # Store search parameters that led to this listing
307        search_info = urlparse(full_url).path.split('/')
308        if len(search_info) > 2:
309            for segment in search_info[2:]:  # Skip the first two segments (['', 'lst'])
310                if '-' in segment and segment[0:5].isdigit():  # Looks like a PLZ-city
311                    car_dict["search_location"] = segment
312                elif segment and segment != 'angebote':
313                    car_dict["search_brand"] = segment
314        
315        return car_dict
316    except Exception as e:
317        logging.error(f"Error processing {full_url}: {str(e)}")
318        return None
319
320
321async def process_search_url(search_url, client, config):
322    """Process a single search URL"""
323    visited_urls = await Actor.get_value('visited-urls') or set()
324    
325    for page in range(1, config.max_page + 1):
326        # Add page parameter to URL
327        page_url = search_url
328        if '?' in page_url:
329            page_url += f"&page={page}"
330        else:
331            page_url += f"?page={page}"
332            
333        Actor.log.info(f"Processing page {page} for URL: {page_url}")
334        
335        a_tags = await fetch_overview_page(page_url, client)
336        car_urls = {link["href"] for link in a_tags if "/angebote/" in link.get("href", "")}
337        
338        new_urls = car_urls - visited_urls
339        if not new_urls:
340            Actor.log.info(f"No new URLs found on page {page}, stopping pagination")
341            break
342        
343        Actor.log.info(f"Found {len(new_urls)} new car listings on page {page}")
344        
345        for url in new_urls:
346            car_data = await fetch_car_details(url, config.base_url, client)
347            if car_data:
348                await Actor.push_data(car_data)
349                visited_urls.add(url)
350                await Actor.set_value('visited-urls', visited_urls)
351            
352            # Use proper async sleep
353            await asyncio.sleep(random.uniform(*config.delay_range))
354
355
356async def process_car_listings(actor_input):
357    """Main processing logic adapted for Apify"""
358    async with AsyncClient() as client:
359        config = ApifyScraperConfig(actor_input)
360        
361        # Create a list to store all search URLs to process
362        search_urls = []
363        
364        # If custom search URLs are provided, use those
365        if config.search_urls:
366            search_urls.extend(config.search_urls)
367        else:
368            # Otherwise build URLs from parameters
369            for country, country_code in config.countries.items():
370                # If brand or location is specified, build a specific search URL
371                if config.brand or config.location:
372                    url = build_search_url(config, country_code)
373                    search_urls.append(url)
374                else:
375                    # Otherwise use the default URL format
376                    url = f"{config.base_url}/lst/?sort=age&desc=1&ustate=N%2CU&size=20&cy={country_code}&atype=C&"
377                    search_urls.append(url)
378        
379        # Process each search URL
380        for search_url in search_urls:
381            await process_search_url(search_url, client, config)
382
383
384async def main() -> None:
385    async with Actor:
386        actor_input = await Actor.get_input() or {}
387        
388        # Initialize visited URLs dataset
389        await Actor.open_dataset(name='visited-urls')
390        
391        # Run main processing
392        await process_car_listings(actor_input)

src/py.typed

.dockerignore

.git
.mise.toml
.nvim.lua
storage

# The rest is copied from https://github.com/github/gitignore/blob/main/Python.gitignore

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
#   For a library or package, you might want to ignore these files since the code is
#   intended to run in multiple environments; otherwise, check them in:
.python-version

# pdm
#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
#   in version control.
#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
#  and can be added to the global gitignore or merged into this file.  For a more nuclear
#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/

.gitignore

.mise.toml
.nvim.lua
storage

# The rest is copied from https://github.com/github/gitignore/blob/main/Python.gitignore

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
#   For a library or package, you might want to ignore these files since the code is
#   intended to run in multiple environments; otherwise, check them in:
.python-version

# pdm
#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
#   in version control.
#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
#  and can be added to the global gitignore or merged into this file.  For a more nuclear
#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/

requirements.txt

1# Feel free to add your Python dependencies below. For formatting guidelines, see:
2# https://pip.pypa.io/en/latest/reference/requirements-file-format/
3
4apify < 3.0
5beautifulsoup4[lxml]
6httpx
7types-beautifulsoup4
8apify-client
9beautifulsoup4
10httpx
11tenacity
12python-dateutil

Autoscout24 Scraper

inquisitive_sarangi/autoscout24-com-scraper

Autoscout24 scraper is a straightforward yet powerful tool designed to extract listings from AutoScout24, an online marketplace for buying and selling new and used vehicles, motorbikes, and accessories.

API Master

🔍🚙 AutoScout24 Scraper (PPR)

3x1t/autoscout24-scraper-ppr

Effortlessly scrape car data from the largest pan-European online car marketplace, AutoScout24. Get access to millions of entries of cars, motorbikes, etc. across Europe. Fast, cheap & reliable. Pay-per-result version.

3x1t

5.0

🔍🚙 AutoScout24 Scraper (Rental)

3x1t/autoscout24-scraper

3x1t

Autoscout24 Scraper Lite

real_spidery/autoscout24-scraper-lite

Fast and lightweight Autoscout24.com scraper allows you to deep dive in the largest pan-European online car market for buyers and sellers. Export scraped data, run the scraper via API, schedule and monitor runs or integrate with other tools. Custom solution is available, just drop us an email

Real Spidery

Autoscout24 Scraper

ivanvs/autoscout-scraper

Extract data from autoscout24.com for data on thousands of car listings. Scrape car listings, extract descriptions, images, prices, mileage, contact number, addresses, names, engine information, the transmission of the car, and all other listing details.

Ivan Vasiljević

243

2.3

Mobile.de Scraper

ivanvs/mobile-de-scraper

Extract data from mobile.de for data on thousands of car listings. Scrape car listings, extract descriptions, images, prices, mileage, contact number, addresses, names, engine information, the transmission of the car, and all other listing details.

Ivan Vasiljević

Mobile.de Auto Scraper

lexis-solutions/mobile-de-auto-scraper

Scrape car data from mobile.de. Simple and powerful CSV/JSON car data extraction. Scrape new and used cars for sale on mobile.de.

Lexis Solutions

5.0

Eye Chono24 scraper

aezakmi/eye-chono

Scrap listings off the Chrono24 website

aezakmi

5.0

Mobile.de Scraper

real_spidery/mobile-de-scraper

[Contact for replacement solution] Mobile.de is the leading classifieds platform for motors/cars/vehicles in Germany and Europe. Mobile.de Scraper is specially made for millions motors/cars/vehicles in EU. Custom solution is available, just drop us an email!

Real Spidery

🔍🚗 Mobile.de Scraper

3x1t/mobile-de-scraper

Effortlessly scrape car data from Germany's largest vehicle marketplace, Mobile.de. Get access to millions of entries of cars, motorbikes, etc. across Europe. Fast, cheap & reliable. Rental version for larger use cases.

3x1t