Cars & Bids Scraper avatar
Cars & Bids Scraper

Deprecated

Pricing

Pay per usage

Go to Store
Cars & Bids Scraper

Cars & Bids Scraper

Deprecated

Developed by

Enrique Carvajal Otárola

Enrique Carvajal Otárola

Maintained by Community

Extract data about cars auctions from the Cars & Bids website. For the scrape, you can filter by body style, year, mileage, etc. As the output, you'll get only the cars you need.

0.0 (0)

Pricing

Pay per usage

1

Total users

32

Monthly users

1

Last modified

2 years ago

.dockerignore

# configurations
.idea
# crawlee and apify storage folders
apify_storage
crawlee_storage
storage
# installed files
node_modules
# git folder
.git

.editorconfig

root = true
[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.eslintrc

{
"extends": "@apify",
"root": true
}

.gitignore

# This file tells Git which files shouldn't be added to source control
.DS_Store
.idea
dist
node_modules
apify_storage
storage
storage

package.json

{
"name": "carsNbids",
"version": "0.0.1",
"type": "module",
"description": "This is an example of an Apify actor.",
"dependencies": {
"apify": "^3.0.0",
"crawlee": "^3.0.0",
"playwright": "*"
},
"devDependencies": {
"@apify/eslint-config": "^0.3.1",
"eslint": "^8.20.0"
},
"scripts": {
"start": "node src/main.js",
"lint": "eslint ./src --ext .js,.jsx",
"lint:fix": "eslint ./src --ext .js,.jsx --fix",
"test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
},
"author": "It's not you it's me",
"license": "ISC"
}

.actor/Dockerfile

# Specify the base Docker image. You can read more about
# the available images at https://crawlee.dev/docs/guides/docker-images
# You can also use any other image from Docker Hub.
FROM apify/actor-node-playwright-chrome:16
# Copy just package.json and package-lock.json
# to speed up the build using Docker layer cache.
COPY --chown=myuser package*.json ./
# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
&& npm install --omit=dev --omit=optional \
&& echo "Installed NPM packages:" \
&& (npm list --omit=dev --all || true) \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version \
&& rm -r ~/.npm
# Next, copy the remaining files and directories with the source code.
# Since we do this after NPM install, quick build will be really fast
# for most source file changes.
COPY --chown=myuser . ./
# Run the image. If you know you won't need headful browsers,
# you can remove the XVFB start script for a micro perf gain.
CMD ./start_xvfb_and_run_cmd.sh && npm start --silent

.actor/INPUT_SCHEMA.json

{
"title": "cars&bids inputs",
"description": "Inputs",
"type": "object",
"schemaVersion": 1,
"properties": {
"startYear": {
"title": "start_year",
"type": "integer",
"description": "Select the start year of the search",
"default": 1981,
"maximum": 2023,
"minimum": 1981
},
"endYear": {
"title": "end_year",
"type": "integer",
"description": "Select the end year of the search",
"default": 2023,
"maximum": 2023,
"minimum": 1981
},
"transmission": {
"title": "transmission",
"type": "string",
"description": "Select transmission type",
"editor": "select",
"default": "All",
"enum": [
"All",
"1",
"2"
],
"enumTitles": [
"All",
"Automatic",
"Manual"
]
},
"bodyStyles": {
"title": "Body style",
"type": "string",
"description": "Select the body style",
"editor": "select",
"default": "All",
"enum": [
"All",
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8"
],
"enumTitles": [
"All",
"Coupe",
"Convertible",
"Hatchback",
"Sedan",
"SUV/Crossover",
"Truck",
"Van/Minivan",
"Wagon"
]
},
"sort": {
"title": "Sort",
"type": "string",
"description": "Select sort type",
"editor": "select",
"default": "Ending soon",
"enum": [
"All",
"Ending soon",
"listed",
"no_reserve",
"lowest_mileage"
],
"enumTitles": [
"All",
"Ending soon",
"Newly listed",
"No reserve",
"Lowest mileage"
]
},
"maxItems": {
"title": "maxItems",
"type": "integer",
"description": "the number of items you want to scrap",
"nullable": true
},
"pastAuctions": {
"title": "Past Auctions",
"type": "boolean",
"description": "get past auctions results.",
"default": false
}
},
"required": []
}

.actor/README.md

1## Input Parameters
2__startYear__ - the minimum year that the search can have by default is 1981.
3
4__endYear__ - the top search year by default is 2023.
5
6__transmission__ - The transmission type by default is All.
7
8__bodyStyles__ - You can set the body style of the search default value is All.
9
10__sort__ - The order you want the results (Ending soon, listed, no reserve, lowest mileage). Not all the options in this variable work with **pastAcutions**.
11
12__maxItems__ - You can get a specific amount of results. if you want a unlimited number of results just leave null
13
14__pastAuctions__ - If you want get past auctions set true this variable. We recommend put a max items limit because at this moment cars&bids have more than 7000 past auctions
15
16## Output example
17```json
18{
19 "title": "2000 Pontiac Firebird Trans Am",
20 "url": "https://carsandbids.com/auctions/3qJpkbv1/2000-pontiac-firebird-trans-am",
21 "ending": "July 13th at 2:42 PM",
22 "bidValue": 1632,
23 "timeLeft": "6 Days",
24 "info": {
25 "Make": "Pontiac",
26 "Model": "Firebird",
27 "Mileage": 71300,
28 "VIN": "2G2FV22G7Y2165446",
29 "Title Status": "Clean (WI)",
30 "Location": "Kenosha, WI 53143",
31 "Seller": "Distilled",
32 "Engine": "5.7L V8",
33 "Drivetrain": "Rear-wheel drive",
34 "Transmission": "Automatic (4-Speed)",
35 "Body Style": "Coupe",
36 "Exterior Color": "Bright Red",
37 "Interior Color": "Black",
38 "Seller Type": "Private Party"
39 },
40 "images": [
41 "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-85qzXWfse2-(edit).jpg?t=165666471604",
42 "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-ag8m-CtUIc-(edit).jpg?t=165666493962",
43 "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-2BMvb1XozL-(edit).jpg?t=165666454289",
44 "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-xvmRy1ra0p-(edit).jpg?t=165666480968",
45 "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-c0SyTe0FKZ-(edit).jpg?t=165666309084",
46 "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-j-RToo3A6h-(edit).jpg?t=165666401775",
47 "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-97o2ZOlEA5-(edit).jpg?t=165666418980",
48 "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-LkiEROXi4y-(edit).jpg?t=165666428423"
49 ]
50}
51```
52##### The output is saved in the default dataset.
53## TO DO
54
55* Sort by closest to me
56
57* Search for cars by input text

.actor/actor.json

{
"actorSpecification": 1,
"name": "carsNbids",
"version": "0.0",
"buildTag": "latest",
"environmentVariables": {},
"storages": {
"dataset": "./dataset_schema.json"
}
}

.actor/dataset_schema.json

{
"actorSpecification": 1,
"views": {
"overview": {
"title": "Overview",
"transformation": {
"fields": [
"title",
"url",
"ending",
"bidValue",
"timeLeft",
"info",
"images"
]
},
"display": {
"component": "table",
"properties": {
"title": {
"label": "Text",
"format": "text"
},
"url": {
"label": "Link",
"format": "link"
},
"ending": {
"label": "Text",
"format": "text"
},
"bidValue": {
"label": "Number",
"format": "number"
},
"timeLeft": {
"label": "Text",
"format": "text"
},
"info.Make": {
"label": "Text",
"format": "text"
},
"info.Model": {
"label": "Text",
"format": "text"
},
"info.Mileage": {
"label": "Number",
"format": "number"
},
"info.VIN": {
"label": "Text",
"format": "text"
},
"info.Title Status": {
"label": "Text",
"format": "text"
},
"info.Location": {
"label": "Text",
"format": "text"
},
"info.Seller": {
"label": "Text",
"format": "text"
},
"info.Engine": {
"label": "Text",
"format": "text"
},
"info.Drivetrain": {
"label": "Text",
"format": "text"
},
"info.Transmission": {
"label": "Text",
"format": "text"
},
"info.Body Style": {
"label": "Text",
"format": "text"
},
"info.Exterior Color": {
"label": "Text",
"format": "text"
},
"info.Interior Color": {
"label": "Text",
"format": "text"
},
"info.Seller Type": {
"label": "Text",
"format": "text"
},
"images": {
"label": "Array",
"format": "array"
}
}
}
}
}
}

src/main.js

1// For more information, see https://crawlee.dev/
2import { PlaywrightCrawler, ProxyConfiguration, KeyValueStore } from 'crawlee';
3import { router } from './routes.js';
4import { Actor, log } from 'apify';
5
6await Actor.init();
7
8const { endYear, startYear, bodyStyles, transmission, sort, pastAuctions } = await KeyValueStore.getInput();
9const url = pastAuctions ? new URL('/past-auctions/', 'https://carsandbids.com/') : new URL('https://carsandbids.com/')
10endYear !== 2023 ? url.searchParams.set('end_year', endYear) : null;
11startYear !== 1982 ? url.searchParams.set('start_year', startYear) : null;
12bodyStyles !== 'All' ? url.searchParams.set('body_style', bodyStyles) : null;
13transmission !== 'All' ? url.searchParams.set('transmission', transmission) : null;
14sort !== 'Ending soon' ? url.searchParams.set('sort', sort) : null;
15let label = pastAuctions ? 'pastAuction': 'liveAuctions';
16
17const startUrls = url.toString()
18
19const crawler = new PlaywrightCrawler({
20 // proxyConfiguration: new ProxyConfiguration({ proxyUrls: ['...'] }),
21 requestHandler: router,
22 headless: false
23});
24
25log.info(startUrls)
26await crawler.run([{
27 url:startUrls,
28 userData:{
29 label:label
30 }
31}]);
32
33await Actor.exit();

src/routes.js

1import { Dataset, createPlaywrightRouter, KeyValueStore, utils, RequestQueue, sleep } from 'crawlee';
2//PAGINATION FOR PAST AUCTIONS
3export const router = createPlaywrightRouter();
4let itemsCounter = 0;
5
6router.addHandler('pastAuctions', async ({ request, enqueueLinks, log, page, parseWithCheerio }) => {
7 const queue = await RequestQueue.open();
8 const { maxItems } = await KeyValueStore.getInput();
9 const { pastAuctions } = await KeyValueStore.getInput();
10 if (pastAuctions) {
11 let totalPages;
12 let total;
13 const url = request.url
14 page.on('response', async (res) => {
15 if (res.url().includes('carsandbids.com/v2/autos/auctions?')) {
16 total = JSON.parse(await res.body()).total;
17 totalPages = maxItems ? Math.ceil(maxItems / 50) : Math.ceil(total / 50)
18 for (let index = 1; index <= totalPages; index++) {
19 await queue.addRequest({
20 url: `${url}&page=${index}`, userData: {
21 label: 'pagination'
22 }
23 })
24 }
25 }
26 });
27 await page.waitForSelector('ul[class="auctions-list past-auctions "]');
28 }
29});
30
31router.addHandler('liveAuctions', async({ request, enqueueLinks, log, page, parseWithCheerio, blockRequests }) => {
32 await blockRequests()
33 const { maxItems } = await KeyValueStore.getInput();
34 await page.waitForSelector('article[class="min"]')
35 await page.waitForSelector('.auction-title')
36 await sleep(1000)
37 const $ = await parseWithCheerio();
38 const urls = []
39 $('.auction-title > a').each(async (i, e) => {
40 if (itemsCounter < maxItems) {
41 urls.push(`https://carsandbids.com${$(e).attr('href')}`)
42 itemsCounter += 1
43 }
44 })
45 await enqueueLinks({
46 urls: urls,
47 label: 'detail',
48 });
49})
50
51router.addHandler('detail', async ({ request, page, log, parseWithCheerio, blockRequests }) => {
52 // Handle details
53 await blockRequests()
54 await page.waitForSelector('div[class="auction-title "]')
55 await page.waitForSelector('div[class="quick-facts"]')
56 await page.waitForSelector('span[class="bid-value"]')
57 const $ = await parseWithCheerio();
58 const item = {}
59 item.title = $('div[class="auction-title "] > h1').text()
60 item.url = request.url;
61 item.ending = $('p[class="end-time"]').text().replace('Ending', '').trim()
62 item.bidValue = parseInt($('span[class="value"] > span[class="bid-value"]').text().replace('$', '').replace(',', ''))
63 item.timeLeft = $('li[class="time-left"] > span[class="value"]').text()
64 item.info = {}
65 const dt = []
66 const dd = []
67 $('div[class="quick-facts"] > dl > dt').each((i, elem) => {
68 dt.push($(elem).text().trim())
69 })
70 $('div[class="quick-facts"] > dl > dd').each((i, elem) => {
71 dd.push($(elem).text())
72 })
73 dt.forEach((dt, i) => {
74 switch (dt) {
75 case 'Model':
76 item.info[dt] = dd[i].replace(/Save/gm, '')
77 break;
78 case 'Seller':
79 item.info[dt] = dd[i].replace(/Contact/gm, '')
80 break;
81 case 'Mileage':
82 item.info[dt] = Number(dd[i].replace(',', ''))
83 break;
84 default:
85 item.info[dt] = dd[i]
86 break;
87 }
88 })
89 const images = [];
90 $('div[class="group exterior"] > div').each((i, imgExterior) => {
91 images.push($(imgExterior).children('img').attr('src'))
92 })
93 $('div[class="group interior"] > div').each((i, imgInterior) => {
94 images.push($(imgInterior).children('img').attr('src'))
95 })
96 item.images = images;
97 await Dataset.pushData(item);
98});
99
100router.addHandler('pagination', async ({ request, page, log, parseWithCheerio, enqueueLinks, blockRequests }) => {
101 await blockRequests()
102 const { maxItems } = await KeyValueStore.getInput();
103 await page.waitForSelector('ul[class="auctions-list past-auctions "]')
104 const $ = await parseWithCheerio();
105 let urls = [];
106 $('div[class="auction-title"] > a').each(async (i, e) => {
107 if (itemsCounter < maxItems) {
108 urls.push(`https://carsandbids.com${$(e).attr('href')}`)
109 itemsCounter += 1
110 }
111 })
112 await enqueueLinks({
113 urls: urls,
114 label: 'detail',
115 });
116});