
Cars & Bids Scraper
- enco/carsandbids
- Modified
- Users 10
- Runs 218
- Created by
Enrique Carvajal Otárola
Extract data about cars auctions from the Cars & Bids website. For the scrape, you can filter by body style, year, mileage, etc. As the output, you'll get only the cars you need.
.dockerignore
# configurations
.idea
# crawlee and apify storage folders
apify_storage
crawlee_storage
storage
# installed files
node_modules
# git folder
.git
.editorconfig
root = true
[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf
.eslintrc
{
"extends": "@apify",
"root": true
}
.gitignore
# This file tells Git which files shouldn't be added to source control
.DS_Store
.idea
dist
node_modules
apify_storage
storage
storage
package-lock.json
This file is 9508 lines long. Only the first 50 are shown. Show all
{
"name": "carsNbids",
"version": "0.0.1",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "carsNbids",
"version": "0.0.1",
"license": "ISC",
"dependencies": {
"apify": "^3.0.0",
"crawlee": "^3.0.0",
"playwright": "*"
},
"devDependencies": {
"@apify/eslint-config": "^0.3.1",
"eslint": "^8.20.0"
}
},
"node_modules/@apify/consts": {
"version": "2.8.0",
"resolved": "https://registry.npmjs.org/@apify/consts/-/consts-2.8.0.tgz",
"integrity": "sha512-uQBRggMka8HtpkLWJeA9ZITDxgAgpwDoL1Uj8QXGaZDj0Rb7Z7YcEAJ8KNRoj0yuIlqoS6NwNWEwfpFIjSYt3Q=="
},
"node_modules/@apify/datastructures": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/@apify/datastructures/-/datastructures-2.0.0.tgz",
"integrity": "sha512-O7I31PvG4Qb/Zc2lAIkSUBRDLKDKLrmqtWG3Ea8To5xvbPKdiLuVx3IuAzjCs1UQVTbhN590Sw5xBoFghreAYA=="
},
"node_modules/@apify/eslint-config": {
"version": "0.3.1",
"resolved": "https://registry.npmjs.org/@apify/eslint-config/-/eslint-config-0.3.1.tgz",
"integrity": "sha512-YPf6oG8iIRcXflbgILEcC9ujTtDrFISkR8/tt9yI2CWc5NxWiRMs2RDMYTWwj8bfYrpVgDPEmcIMr7Mi1v6QcA==",
"dev": true,
"dependencies": {
"eslint-config-airbnb": "^19.0.0",
"eslint-config-airbnb-base": "^15.0.0",
"eslint-import-resolver-typescript": "^2.5.0",
"eslint-plugin-import": "^2.25.3",
"eslint-plugin-jsx-a11y": "^6.5.1",
"eslint-plugin-react": "^7.27.0",
"eslint-plugin-react-hooks": "^4.3.0"
},
"peerDependencies": {
"eslint": "*"
}
},
"node_modules/@apify/input_secrets": {
"version": "1.1.6",
package.json
{
"name": "carsNbids",
"version": "0.0.1",
"type": "module",
"description": "This is an example of an Apify actor.",
"dependencies": {
"apify": "^3.0.0",
"crawlee": "^3.0.0",
"playwright": "*"
},
"devDependencies": {
"@apify/eslint-config": "^0.3.1",
"eslint": "^8.20.0"
},
"scripts": {
"start": "node src/main.js",
"lint": "eslint ./src --ext .js,.jsx",
"lint:fix": "eslint ./src --ext .js,.jsx --fix",
"test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
},
"author": "It's not you it's me",
"license": "ISC"
}
.actor/Dockerfile
# Specify the base Docker image. You can read more about
# the available images at https://crawlee.dev/docs/guides/docker-images
# You can also use any other image from Docker Hub.
FROM apify/actor-node-playwright-chrome:16
# Copy just package.json and package-lock.json
# to speed up the build using Docker layer cache.
COPY --chown=myuser package*.json ./
# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
&& npm install --omit=dev --omit=optional \
&& echo "Installed NPM packages:" \
&& (npm list --omit=dev --all || true) \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version \
&& rm -r ~/.npm
# Next, copy the remaining files and directories with the source code.
# Since we do this after NPM install, quick build will be really fast
# for most source file changes.
COPY --chown=myuser . ./
# Run the image. If you know you won't need headful browsers,
# you can remove the XVFB start script for a micro perf gain.
CMD ./start_xvfb_and_run_cmd.sh && npm start --silent
.actor/INPUT_SCHEMA.json
This file is 103 lines long. Only the first 50 are shown. Show all
{
"title": "cars&bids inputs",
"description": "Inputs",
"type": "object",
"schemaVersion": 1,
"properties": {
"startYear": {
"title": "start_year",
"type": "integer",
"description": "Select the start year of the search",
"default": 1981,
"maximum": 2023,
"minimum": 1981
},
"endYear": {
"title": "end_year",
"type": "integer",
"description": "Select the end year of the search",
"default": 2023,
"maximum": 2023,
"minimum": 1981
},
"transmission": {
"title": "transmission",
"type": "string",
"description": "Select transmission type",
"editor": "select",
"default": "All",
"enum": [
"All",
"1",
"2"
],
"enumTitles": [
"All",
"Automatic",
"Manual"
]
},
"bodyStyles": {
"title": "Body style",
"type": "string",
"description": "Select the body style",
"editor": "select",
"default": "All",
"enum": [
"All",
"1",
"2",
"3",
.actor/README.md
## Input Parameters
__startYear__ - the minimum year that the search can have by default is 1981.
__endYear__ - the top search year by default is 2023.
__transmission__ - The transmission type by default is All.
__bodyStyles__ - You can set the body style of the search default value is All.
__sort__ - The order you want the results (Ending soon, listed, no reserve, lowest mileage). Not all the options in this variable work with **pastAcutions**.
__maxItems__ - You can get a specific amount of results. if you want a unlimited number of results just leave null
__pastAuctions__ - If you want get past auctions set true this variable. We recommend put a max items limit because at this moment cars&bids have more than 7000 past auctions
## Output example
```json
{
"title": "2000 Pontiac Firebird Trans Am",
"url": "https://carsandbids.com/auctions/3qJpkbv1/2000-pontiac-firebird-trans-am",
"ending": "July 13th at 2:42 PM",
"bidValue": 1632,
"timeLeft": "6 Days",
"info": {
"Make": "Pontiac",
"Model": "Firebird",
"Mileage": 71300,
"VIN": "2G2FV22G7Y2165446",
"Title Status": "Clean (WI)",
"Location": "Kenosha, WI 53143",
"Seller": "Distilled",
"Engine": "5.7L V8",
"Drivetrain": "Rear-wheel drive",
"Transmission": "Automatic (4-Speed)",
"Body Style": "Coupe",
"Exterior Color": "Bright Red",
"Interior Color": "Black",
"Seller Type": "Private Party"
},
"images": [
"https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-85qzXWfse2-(edit).jpg?t=165666471604",
"https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-ag8m-CtUIc-(edit).jpg?t=165666493962",
"https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-2BMvb1XozL-(edit).jpg?t=165666454289",
"https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-xvmRy1ra0p-(edit).jpg?t=165666480968",
"https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-c0SyTe0FKZ-(edit).jpg?t=165666309084",
"https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-j-RToo3A6h-(edit).jpg?t=165666401775",
"https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-97o2ZOlEA5-(edit).jpg?t=165666418980",
"https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-LkiEROXi4y-(edit).jpg?t=165666428423"
]
}
```
##### The output is saved in the default dataset.
## TO DO
* Sort by closest to me
* Search for cars by input text
.actor/actor.json
{
"actorSpecification": 1,
"name": "carsNbids",
"version": "0.0",
"buildTag": "latest",
"environmentVariables": {},
"storages": {
"dataset": "./dataset_schema.json"
}
}
.actor/dataset_schema.json
This file is 103 lines long. Only the first 50 are shown. Show all
{
"actorSpecification": 1,
"views": {
"overview": {
"title": "Overview",
"transformation": {
"fields": [
"title",
"url",
"ending",
"bidValue",
"timeLeft",
"info",
"images"
]
},
"display": {
"component": "table",
"properties": {
"title": {
"label": "Text",
"format": "text"
},
"url": {
"label": "Link",
"format": "link"
},
"ending": {
"label": "Text",
"format": "text"
},
"bidValue": {
"label": "Number",
"format": "number"
},
"timeLeft": {
"label": "Text",
"format": "text"
},
"info.Make": {
"label": "Text",
"format": "text"
},
"info.Model": {
"label": "Text",
"format": "text"
},
"info.Mileage": {
"label": "Number",
"format": "number"
src/main.js
// For more information, see https://crawlee.dev/
import { PlaywrightCrawler, ProxyConfiguration, KeyValueStore } from 'crawlee';
import { router } from './routes.js';
import { Actor, log } from 'apify';
await Actor.init();
const { endYear, startYear, bodyStyles, transmission, sort, pastAuctions } = await KeyValueStore.getInput();
const url = pastAuctions ? new URL('/past-auctions/', 'https://carsandbids.com/') : new URL('https://carsandbids.com/')
endYear !== 2023 ? url.searchParams.set('end_year', endYear) : null;
startYear !== 1982 ? url.searchParams.set('start_year', startYear) : null;
bodyStyles !== 'All' ? url.searchParams.set('body_style', bodyStyles) : null;
transmission !== 'All' ? url.searchParams.set('transmission', transmission) : null;
sort !== 'Ending soon' ? url.searchParams.set('sort', sort) : null;
let label = pastAuctions ? 'pastAuction': 'liveAuctions';
const startUrls = url.toString()
const crawler = new PlaywrightCrawler({
// proxyConfiguration: new ProxyConfiguration({ proxyUrls: ['...'] }),
requestHandler: router,
headless: false
});
log.info(startUrls)
await crawler.run([{
url:startUrls,
userData:{
label:label
}
}]);
await Actor.exit();
src/routes.js
This file is 116 lines long. Only the first 50 are shown. Show all
import { Dataset, createPlaywrightRouter, KeyValueStore, utils, RequestQueue, sleep } from 'crawlee';
//PAGINATION FOR PAST AUCTIONS
export const router = createPlaywrightRouter();
let itemsCounter = 0;
router.addHandler('pastAuctions', async ({ request, enqueueLinks, log, page, parseWithCheerio }) => {
const queue = await RequestQueue.open();
const { maxItems } = await KeyValueStore.getInput();
const { pastAuctions } = await KeyValueStore.getInput();
if (pastAuctions) {
let totalPages;
let total;
const url = request.url
page.on('response', async (res) => {
if (res.url().includes('carsandbids.com/v2/autos/auctions?')) {
total = JSON.parse(await res.body()).total;
totalPages = maxItems ? Math.ceil(maxItems / 50) : Math.ceil(total / 50)
for (let index = 1; index <= totalPages; index++) {
await queue.addRequest({
url: `${url}&page=${index}`, userData: {
label: 'pagination'
}
})
}
}
});
await page.waitForSelector('ul[class="auctions-list past-auctions "]');
}
});
router.addHandler('liveAuctions', async({ request, enqueueLinks, log, page, parseWithCheerio, blockRequests }) => {
await blockRequests()
const { maxItems } = await KeyValueStore.getInput();
await page.waitForSelector('article[class="min"]')
await page.waitForSelector('.auction-title')
await sleep(1000)
const $ = await parseWithCheerio();
const urls = []
$('.auction-title > a').each(async (i, e) => {
if (itemsCounter < maxItems) {
urls.push(`https://carsandbids.com${$(e).attr('href')}`)
itemsCounter += 1
}
})
await enqueueLinks({
urls: urls,
label: 'detail',
});
})