Cars & Bids Scraper

Extract data about cars auctions from the Cars & Bids website. For the scrape, you can filter by body style, year, mileage, etc. As the output, you'll get only the cars you need.

.dockerignore

# configurations
.idea

# crawlee and apify storage folders
apify_storage
crawlee_storage
storage

# installed files
node_modules

# git folder
.git

.editorconfig

root = true

[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.eslintrc

{
    "extends": "@apify",
    "root": true
}

.gitignore

# This file tells Git which files shouldn't be added to source control

.DS_Store
.idea
dist
node_modules
apify_storage
storage
storage

package-lock.json

This file is 9508 lines long. Only the first 50 are shown. Show all

{
	"name": "carsNbids",
	"version": "0.0.1",
	"lockfileVersion": 2,
	"requires": true,
	"packages": {
		"": {
			"name": "carsNbids",
			"version": "0.0.1",
			"license": "ISC",
			"dependencies": {
				"apify": "^3.0.0",
				"crawlee": "^3.0.0",
				"playwright": "*"
			},
			"devDependencies": {
				"@apify/eslint-config": "^0.3.1",
				"eslint": "^8.20.0"
			}
		},
		"node_modules/@apify/consts": {
			"version": "2.8.0",
			"resolved": "https://registry.npmjs.org/@apify/consts/-/consts-2.8.0.tgz",
			"integrity": "sha512-uQBRggMka8HtpkLWJeA9ZITDxgAgpwDoL1Uj8QXGaZDj0Rb7Z7YcEAJ8KNRoj0yuIlqoS6NwNWEwfpFIjSYt3Q=="
		},
		"node_modules/@apify/datastructures": {
			"version": "2.0.0",
			"resolved": "https://registry.npmjs.org/@apify/datastructures/-/datastructures-2.0.0.tgz",
			"integrity": "sha512-O7I31PvG4Qb/Zc2lAIkSUBRDLKDKLrmqtWG3Ea8To5xvbPKdiLuVx3IuAzjCs1UQVTbhN590Sw5xBoFghreAYA=="
		},
		"node_modules/@apify/eslint-config": {
			"version": "0.3.1",
			"resolved": "https://registry.npmjs.org/@apify/eslint-config/-/eslint-config-0.3.1.tgz",
			"integrity": "sha512-YPf6oG8iIRcXflbgILEcC9ujTtDrFISkR8/tt9yI2CWc5NxWiRMs2RDMYTWwj8bfYrpVgDPEmcIMr7Mi1v6QcA==",
			"dev": true,
			"dependencies": {
				"eslint-config-airbnb": "^19.0.0",
				"eslint-config-airbnb-base": "^15.0.0",
				"eslint-import-resolver-typescript": "^2.5.0",
				"eslint-plugin-import": "^2.25.3",
				"eslint-plugin-jsx-a11y": "^6.5.1",
				"eslint-plugin-react": "^7.27.0",
				"eslint-plugin-react-hooks": "^4.3.0"
			},
			"peerDependencies": {
				"eslint": "*"
			}
		},
		"node_modules/@apify/input_secrets": {
			"version": "1.1.6",

package.json

{
	"name": "carsNbids",
	"version": "0.0.1",
	"type": "module",
	"description": "This is an example of an Apify actor.",
	"dependencies": {
		"apify": "^3.0.0",
		"crawlee": "^3.0.0",
		"playwright": "*"
	},
	"devDependencies": {
		"@apify/eslint-config": "^0.3.1",
		"eslint": "^8.20.0"
	},
	"scripts": {
		"start": "node src/main.js",
		"lint": "eslint ./src --ext .js,.jsx",
		"lint:fix": "eslint ./src --ext .js,.jsx --fix",
		"test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
	},
	"author": "It's not you it's me",
	"license": "ISC"
}

.actor/Dockerfile

# Specify the base Docker image. You can read more about
# the available images at https://crawlee.dev/docs/guides/docker-images
# You can also use any other image from Docker Hub.
FROM apify/actor-node-playwright-chrome:16

# Copy just package.json and package-lock.json
# to speed up the build using Docker layer cache.
COPY --chown=myuser package*.json ./

# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
    && npm install --omit=dev --omit=optional \
    && echo "Installed NPM packages:" \
    && (npm list --omit=dev --all || true) \
    && echo "Node.js version:" \
    && node --version \
    && echo "NPM version:" \
    && npm --version \
    && rm -r ~/.npm

# Next, copy the remaining files and directories with the source code.
# Since we do this after NPM install, quick build will be really fast
# for most source file changes.
COPY --chown=myuser . ./


# Run the image. If you know you won't need headful browsers,
# you can remove the XVFB start script for a micro perf gain.
CMD ./start_xvfb_and_run_cmd.sh && npm start --silent

.actor/INPUT_SCHEMA.json

This file is 103 lines long. Only the first 50 are shown. Show all

{
    "title": "cars&bids inputs",
    "description": "Inputs",
    "type": "object",
    "schemaVersion": 1,
    "properties": {
        "startYear": {
            "title": "start_year",
            "type": "integer",
            "description": "Select the start year of the search",
            "default": 1981,
            "maximum": 2023,
            "minimum": 1981
        },
        "endYear": {
            "title": "end_year",
            "type": "integer",
            "description": "Select the end year of the search",
            "default": 2023,
            "maximum": 2023,
            "minimum": 1981
        },
        "transmission": {
            "title": "transmission",
            "type": "string",
            "description": "Select transmission type",
            "editor": "select",
            "default": "All",
            "enum": [
                "All",
                "1",
                "2"
            ],
            "enumTitles": [
                "All",
                "Automatic",
                "Manual"
            ]
        },
        "bodyStyles": {
            "title": "Body style",
            "type": "string",
            "description": "Select the body style",
            "editor": "select",
            "default": "All",
            "enum": [
                "All",
                "1",
                "2",
                "3",

.actor/README.md


## Input Parameters
__startYear__ - the minimum year that the search can have by default is 1981.

__endYear__ - the top search year by default is 2023.

__transmission__ - The transmission type by default is All.

__bodyStyles__ - You can set the body style of the search default value is All.

__sort__ - The order you want the results (Ending soon, listed, no reserve, lowest mileage). Not all the options in this variable work with **pastAcutions**.

__maxItems__ - You can get a specific amount of results. if you want a unlimited number of results just leave null

__pastAuctions__ - If you want get past auctions set true this variable. We recommend put a max items limit because at this moment cars&bids have more than 7000 past auctions

## Output example
```json
{
  "title": "2000 Pontiac Firebird Trans Am",
  "url": "https://carsandbids.com/auctions/3qJpkbv1/2000-pontiac-firebird-trans-am",
  "ending": "July 13th at 2:42 PM",
  "bidValue": 1632,
  "timeLeft": "6 Days",
  "info": {
    "Make": "Pontiac",
    "Model": "Firebird",
    "Mileage": 71300,
    "VIN": "2G2FV22G7Y2165446",
    "Title Status": "Clean (WI)",
    "Location": "Kenosha, WI 53143",
    "Seller": "Distilled",
    "Engine": "5.7L V8",
    "Drivetrain": "Rear-wheel drive",
    "Transmission": "Automatic (4-Speed)",
    "Body Style": "Coupe",
    "Exterior Color": "Bright Red",
    "Interior Color": "Black",
    "Seller Type": "Private Party"
  },
  "images": [
    "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-85qzXWfse2-(edit).jpg?t=165666471604",
    "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-ag8m-CtUIc-(edit).jpg?t=165666493962",
    "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-2BMvb1XozL-(edit).jpg?t=165666454289",
    "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-xvmRy1ra0p-(edit).jpg?t=165666480968",
    "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-c0SyTe0FKZ-(edit).jpg?t=165666309084",
    "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-j-RToo3A6h-(edit).jpg?t=165666401775",
    "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-97o2ZOlEA5-(edit).jpg?t=165666418980",
    "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-LkiEROXi4y-(edit).jpg?t=165666428423"
  ]
}
``` 
##### The output is saved in the default dataset.
## TO DO

* Sort by closest to me

* Search for cars by input text

.actor/actor.json

{
	"actorSpecification": 1,
	"name": "carsNbids",
	"version": "0.0",
	"buildTag": "latest",
	"environmentVariables": {},
	"storages": {
        "dataset": "./dataset_schema.json"
    }
}

.actor/dataset_schema.json

This file is 103 lines long. Only the first 50 are shown. Show all

{
    "actorSpecification": 1,
    "views": {
        "overview": {
            "title": "Overview",
            "transformation": {
                "fields": [
                    "title",
                    "url",
                    "ending",
                    "bidValue",
                    "timeLeft",
                    "info",
                    "images"
                ]
            },
            "display": {
                "component": "table",
                "properties": {
                    "title": {
                        "label": "Text",
                        "format": "text"
                    },
                    "url": {
                        "label": "Link",
                        "format": "link"
                    },
                    "ending": {
                        "label": "Text",
                        "format": "text"
                    },
                    "bidValue": {
                        "label": "Number",
                        "format": "number"
                    },
                    "timeLeft": {
                        "label": "Text",
                        "format": "text"
                    },
                    "info.Make": {
                        "label": "Text",
                        "format": "text"
                    },
                    "info.Model": {
                        "label": "Text",
                        "format": "text"
                    },
                    "info.Mileage": {
                        "label": "Number",
                        "format": "number"

src/main.js

// For more information, see https://crawlee.dev/
import { PlaywrightCrawler, ProxyConfiguration, KeyValueStore } from 'crawlee';
import { router } from './routes.js';
import { Actor, log } from 'apify';

await Actor.init();

const { endYear, startYear, bodyStyles, transmission, sort, pastAuctions } = await KeyValueStore.getInput();
const url = pastAuctions ? new URL('/past-auctions/', 'https://carsandbids.com/') : new URL('https://carsandbids.com/')
endYear !== 2023 ? url.searchParams.set('end_year', endYear) : null;
startYear !== 1982 ? url.searchParams.set('start_year', startYear) : null;
bodyStyles !== 'All' ? url.searchParams.set('body_style', bodyStyles) : null;
transmission !== 'All' ? url.searchParams.set('transmission', transmission) : null;
sort !== 'Ending soon' ? url.searchParams.set('sort', sort) : null;
let label = pastAuctions ? 'pastAuction': 'liveAuctions';

const startUrls = url.toString()

const crawler = new PlaywrightCrawler({
    // proxyConfiguration: new ProxyConfiguration({ proxyUrls: ['...'] }),
    requestHandler: router,
    headless: false
});

log.info(startUrls)
await crawler.run([{
    url:startUrls,
    userData:{
        label:label
    }
}]);

await Actor.exit();

src/routes.js

This file is 116 lines long. Only the first 50 are shown. Show all

import { Dataset, createPlaywrightRouter, KeyValueStore, utils, RequestQueue, sleep } from 'crawlee';
//PAGINATION FOR PAST AUCTIONS
export const router = createPlaywrightRouter();
let itemsCounter = 0;

router.addHandler('pastAuctions', async ({ request, enqueueLinks, log, page, parseWithCheerio }) => {
    const queue = await RequestQueue.open();
    const { maxItems } = await KeyValueStore.getInput();
    const { pastAuctions } = await KeyValueStore.getInput();
    if (pastAuctions) {
        let totalPages;
        let total;
        const url = request.url
        page.on('response', async (res) => {
            if (res.url().includes('carsandbids.com/v2/autos/auctions?')) {
                total = JSON.parse(await res.body()).total;
                totalPages = maxItems ? Math.ceil(maxItems / 50) : Math.ceil(total / 50)
                for (let index = 1; index <= totalPages; index++) {
                    await queue.addRequest({
                        url: `${url}&page=${index}`, userData: {
                            label: 'pagination'
                        }
                    })
                }
            }
        });
        await page.waitForSelector('ul[class="auctions-list past-auctions "]');
    }
});

router.addHandler('liveAuctions', async({ request, enqueueLinks, log, page, parseWithCheerio, blockRequests }) => {
    await blockRequests()
    const { maxItems } = await KeyValueStore.getInput();
    await page.waitForSelector('article[class="min"]')
    await page.waitForSelector('.auction-title')
    await sleep(1000)
    const $ = await parseWithCheerio();
    const urls = []
    $('.auction-title > a').each(async (i, e) => {
        if (itemsCounter < maxItems) {
            urls.push(`https://carsandbids.com${$(e).attr('href')}`)
            itemsCounter += 1
        }
    })
    await enqueueLinks({
        urls: urls,
        label: 'detail',
    });
})