Cars & Bids Scraper
Go to Store
This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?
See alternative ActorsCars & Bids Scraper
enco/carsandbids
Extract data about cars auctions from the Cars & Bids website. For the scrape, you can filter by body style, year, mileage, etc. As the output, you'll get only the cars you need.
.dockerignore
1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git
.editorconfig
1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf
.eslintrc
1{
2 "extends": "@apify",
3 "root": true
4}
.gitignore
1# This file tells Git which files shouldn't be added to source control
2
3.DS_Store
4.idea
5dist
6node_modules
7apify_storage
8storage
9storage
package.json
1{
2 "name": "carsNbids",
3 "version": "0.0.1",
4 "type": "module",
5 "description": "This is an example of an Apify actor.",
6 "dependencies": {
7 "apify": "^3.0.0",
8 "crawlee": "^3.0.0",
9 "playwright": "*"
10 },
11 "devDependencies": {
12 "@apify/eslint-config": "^0.3.1",
13 "eslint": "^8.20.0"
14 },
15 "scripts": {
16 "start": "node src/main.js",
17 "lint": "eslint ./src --ext .js,.jsx",
18 "lint:fix": "eslint ./src --ext .js,.jsx --fix",
19 "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
20 },
21 "author": "It's not you it's me",
22 "license": "ISC"
23}
.actor/Dockerfile
1# Specify the base Docker image. You can read more about
2# the available images at https://crawlee.dev/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node-playwright-chrome:16
5
6# Copy just package.json and package-lock.json
7# to speed up the build using Docker layer cache.
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --omit=dev --omit=optional \
15 && echo "Installed NPM packages:" \
16 && (npm list --omit=dev --all || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version \
21 && rm -r ~/.npm
22
23# Next, copy the remaining files and directories with the source code.
24# Since we do this after NPM install, quick build will be really fast
25# for most source file changes.
26COPY . ./
27
28
29# Run the image. If you know you won't need headful browsers,
30# you can remove the XVFB start script for a micro perf gain.
31CMD ./start_xvfb_and_run_cmd.sh && npm start --silent
.actor/INPUT_SCHEMA.json
1{
2 "title": "cars&bids inputs",
3 "description": "Inputs",
4 "type": "object",
5 "schemaVersion": 1,
6 "properties": {
7 "startYear": {
8 "title": "start_year",
9 "type": "integer",
10 "description": "Select the start year of the search",
11 "default": 1981,
12 "maximum": 2023,
13 "minimum": 1981
14 },
15 "endYear": {
16 "title": "end_year",
17 "type": "integer",
18 "description": "Select the end year of the search",
19 "default": 2023,
20 "maximum": 2023,
21 "minimum": 1981
22 },
23 "transmission": {
24 "title": "transmission",
25 "type": "string",
26 "description": "Select transmission type",
27 "editor": "select",
28 "default": "All",
29 "enum": [
30 "All",
31 "1",
32 "2"
33 ],
34 "enumTitles": [
35 "All",
36 "Automatic",
37 "Manual"
38 ]
39 },
40 "bodyStyles": {
41 "title": "Body style",
42 "type": "string",
43 "description": "Select the body style",
44 "editor": "select",
45 "default": "All",
46 "enum": [
47 "All",
48 "1",
49 "2",
50 "3",
51 "4",
52 "5",
53 "6",
54 "7",
55 "8"
56 ],
57 "enumTitles": [
58 "All",
59 "Coupe",
60 "Convertible",
61 "Hatchback",
62 "Sedan",
63 "SUV/Crossover",
64 "Truck",
65 "Van/Minivan",
66 "Wagon"
67 ]
68 },
69 "sort": {
70 "title": "Sort",
71 "type": "string",
72 "description": "Select sort type",
73 "editor": "select",
74 "default": "Ending soon",
75 "enum": [
76 "All",
77 "Ending soon",
78 "listed",
79 "no_reserve",
80 "lowest_mileage"
81 ],
82 "enumTitles": [
83 "All",
84 "Ending soon",
85 "Newly listed",
86 "No reserve",
87 "Lowest mileage"
88 ]
89 },
90 "maxItems": {
91 "title": "maxItems",
92 "type": "integer",
93 "description": "the number of items you want to scrap",
94 "nullable": true
95 },
96 "pastAuctions": {
97 "title": "Past Auctions",
98 "type": "boolean",
99 "description": "get past auctions results.",
100 "default": false
101 }
102 },
103 "required": []
104}
.actor/README.md
1## Input Parameters
2__startYear__ - the minimum year that the search can have by default is 1981.
3
4__endYear__ - the top search year by default is 2023.
5
6__transmission__ - The transmission type by default is All.
7
8__bodyStyles__ - You can set the body style of the search default value is All.
9
10__sort__ - The order you want the results (Ending soon, listed, no reserve, lowest mileage). Not all the options in this variable work with **pastAcutions**.
11
12__maxItems__ - You can get a specific amount of results. if you want a unlimited number of results just leave null
13
14__pastAuctions__ - If you want get past auctions set true this variable. We recommend put a max items limit because at this moment cars&bids have more than 7000 past auctions
15
16## Output example
17```json
18{
19 "title": "2000 Pontiac Firebird Trans Am",
20 "url": "https://carsandbids.com/auctions/3qJpkbv1/2000-pontiac-firebird-trans-am",
21 "ending": "July 13th at 2:42 PM",
22 "bidValue": 1632,
23 "timeLeft": "6 Days",
24 "info": {
25 "Make": "Pontiac",
26 "Model": "Firebird",
27 "Mileage": 71300,
28 "VIN": "2G2FV22G7Y2165446",
29 "Title Status": "Clean (WI)",
30 "Location": "Kenosha, WI 53143",
31 "Seller": "Distilled",
32 "Engine": "5.7L V8",
33 "Drivetrain": "Rear-wheel drive",
34 "Transmission": "Automatic (4-Speed)",
35 "Body Style": "Coupe",
36 "Exterior Color": "Bright Red",
37 "Interior Color": "Black",
38 "Seller Type": "Private Party"
39 },
40 "images": [
41 "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-85qzXWfse2-(edit).jpg?t=165666471604",
42 "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-ag8m-CtUIc-(edit).jpg?t=165666493962",
43 "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-2BMvb1XozL-(edit).jpg?t=165666454289",
44 "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-xvmRy1ra0p-(edit).jpg?t=165666480968",
45 "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-c0SyTe0FKZ-(edit).jpg?t=165666309084",
46 "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-j-RToo3A6h-(edit).jpg?t=165666401775",
47 "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-97o2ZOlEA5-(edit).jpg?t=165666418980",
48 "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-LkiEROXi4y-(edit).jpg?t=165666428423"
49 ]
50}
51```
52##### The output is saved in the default dataset.
53## TO DO
54
55* Sort by closest to me
56
57* Search for cars by input text
.actor/actor.json
1{
2 "actorSpecification": 1,
3 "name": "carsNbids",
4 "version": "0.0",
5 "buildTag": "latest",
6 "environmentVariables": {},
7 "storages": {
8 "dataset": "./dataset_schema.json"
9 }
10}
.actor/dataset_schema.json
1{
2 "actorSpecification": 1,
3 "views": {
4 "overview": {
5 "title": "Overview",
6 "transformation": {
7 "fields": [
8 "title",
9 "url",
10 "ending",
11 "bidValue",
12 "timeLeft",
13 "info",
14 "images"
15 ]
16 },
17 "display": {
18 "component": "table",
19 "properties": {
20 "title": {
21 "label": "Text",
22 "format": "text"
23 },
24 "url": {
25 "label": "Link",
26 "format": "link"
27 },
28 "ending": {
29 "label": "Text",
30 "format": "text"
31 },
32 "bidValue": {
33 "label": "Number",
34 "format": "number"
35 },
36 "timeLeft": {
37 "label": "Text",
38 "format": "text"
39 },
40 "info.Make": {
41 "label": "Text",
42 "format": "text"
43 },
44 "info.Model": {
45 "label": "Text",
46 "format": "text"
47 },
48 "info.Mileage": {
49 "label": "Number",
50 "format": "number"
51 },
52 "info.VIN": {
53 "label": "Text",
54 "format": "text"
55 },
56 "info.Title Status": {
57 "label": "Text",
58 "format": "text"
59 },
60 "info.Location": {
61 "label": "Text",
62 "format": "text"
63 },
64 "info.Seller": {
65 "label": "Text",
66 "format": "text"
67 },
68 "info.Engine": {
69 "label": "Text",
70 "format": "text"
71 },
72 "info.Drivetrain": {
73 "label": "Text",
74 "format": "text"
75 },
76 "info.Transmission": {
77 "label": "Text",
78 "format": "text"
79 },
80 "info.Body Style": {
81 "label": "Text",
82 "format": "text"
83 },
84 "info.Exterior Color": {
85 "label": "Text",
86 "format": "text"
87 },
88 "info.Interior Color": {
89 "label": "Text",
90 "format": "text"
91 },
92 "info.Seller Type": {
93 "label": "Text",
94 "format": "text"
95 },
96 "images": {
97 "label": "Array",
98 "format": "array"
99 }
100 }
101 }
102 }
103 }
104}
src/main.js
1// For more information, see https://crawlee.dev/
2import { PlaywrightCrawler, ProxyConfiguration, KeyValueStore } from 'crawlee';
3import { router } from './routes.js';
4import { Actor, log } from 'apify';
5
6await Actor.init();
7
8const { endYear, startYear, bodyStyles, transmission, sort, pastAuctions } = await KeyValueStore.getInput();
9const url = pastAuctions ? new URL('/past-auctions/', 'https://carsandbids.com/') : new URL('https://carsandbids.com/')
10endYear !== 2023 ? url.searchParams.set('end_year', endYear) : null;
11startYear !== 1982 ? url.searchParams.set('start_year', startYear) : null;
12bodyStyles !== 'All' ? url.searchParams.set('body_style', bodyStyles) : null;
13transmission !== 'All' ? url.searchParams.set('transmission', transmission) : null;
14sort !== 'Ending soon' ? url.searchParams.set('sort', sort) : null;
15let label = pastAuctions ? 'pastAuction': 'liveAuctions';
16
17const startUrls = url.toString()
18
19const crawler = new PlaywrightCrawler({
20 // proxyConfiguration: new ProxyConfiguration({ proxyUrls: ['...'] }),
21 requestHandler: router,
22 headless: false
23});
24
25log.info(startUrls)
26await crawler.run([{
27 url:startUrls,
28 userData:{
29 label:label
30 }
31}]);
32
33await Actor.exit();
src/routes.js
1import { Dataset, createPlaywrightRouter, KeyValueStore, utils, RequestQueue, sleep } from 'crawlee';
2//PAGINATION FOR PAST AUCTIONS
3export const router = createPlaywrightRouter();
4let itemsCounter = 0;
5
6router.addHandler('pastAuctions', async ({ request, enqueueLinks, log, page, parseWithCheerio }) => {
7 const queue = await RequestQueue.open();
8 const { maxItems } = await KeyValueStore.getInput();
9 const { pastAuctions } = await KeyValueStore.getInput();
10 if (pastAuctions) {
11 let totalPages;
12 let total;
13 const url = request.url
14 page.on('response', async (res) => {
15 if (res.url().includes('carsandbids.com/v2/autos/auctions?')) {
16 total = JSON.parse(await res.body()).total;
17 totalPages = maxItems ? Math.ceil(maxItems / 50) : Math.ceil(total / 50)
18 for (let index = 1; index <= totalPages; index++) {
19 await queue.addRequest({
20 url: `${url}&page=${index}`, userData: {
21 label: 'pagination'
22 }
23 })
24 }
25 }
26 });
27 await page.waitForSelector('ul[class="auctions-list past-auctions "]');
28 }
29});
30
31router.addHandler('liveAuctions', async({ request, enqueueLinks, log, page, parseWithCheerio, blockRequests }) => {
32 await blockRequests()
33 const { maxItems } = await KeyValueStore.getInput();
34 await page.waitForSelector('article[class="min"]')
35 await page.waitForSelector('.auction-title')
36 await sleep(1000)
37 const $ = await parseWithCheerio();
38 const urls = []
39 $('.auction-title > a').each(async (i, e) => {
40 if (itemsCounter < maxItems) {
41 urls.push(`https://carsandbids.com${$(e).attr('href')}`)
42 itemsCounter += 1
43 }
44 })
45 await enqueueLinks({
46 urls: urls,
47 label: 'detail',
48 });
49})
50
51router.addHandler('detail', async ({ request, page, log, parseWithCheerio, blockRequests }) => {
52 // Handle details
53 await blockRequests()
54 await page.waitForSelector('div[class="auction-title "]')
55 await page.waitForSelector('div[class="quick-facts"]')
56 await page.waitForSelector('span[class="bid-value"]')
57 const $ = await parseWithCheerio();
58 const item = {}
59 item.title = $('div[class="auction-title "] > h1').text()
60 item.url = request.url;
61 item.ending = $('p[class="end-time"]').text().replace('Ending', '').trim()
62 item.bidValue = parseInt($('span[class="value"] > span[class="bid-value"]').text().replace('$', '').replace(',', ''))
63 item.timeLeft = $('li[class="time-left"] > span[class="value"]').text()
64 item.info = {}
65 const dt = []
66 const dd = []
67 $('div[class="quick-facts"] > dl > dt').each((i, elem) => {
68 dt.push($(elem).text().trim())
69 })
70 $('div[class="quick-facts"] > dl > dd').each((i, elem) => {
71 dd.push($(elem).text())
72 })
73 dt.forEach((dt, i) => {
74 switch (dt) {
75 case 'Model':
76 item.info[dt] = dd[i].replace(/Save/gm, '')
77 break;
78 case 'Seller':
79 item.info[dt] = dd[i].replace(/Contact/gm, '')
80 break;
81 case 'Mileage':
82 item.info[dt] = Number(dd[i].replace(',', ''))
83 break;
84 default:
85 item.info[dt] = dd[i]
86 break;
87 }
88 })
89 const images = [];
90 $('div[class="group exterior"] > div').each((i, imgExterior) => {
91 images.push($(imgExterior).children('img').attr('src'))
92 })
93 $('div[class="group interior"] > div').each((i, imgInterior) => {
94 images.push($(imgInterior).children('img').attr('src'))
95 })
96 item.images = images;
97 await Dataset.pushData(item);
98});
99
100router.addHandler('pagination', async ({ request, page, log, parseWithCheerio, enqueueLinks, blockRequests }) => {
101 await blockRequests()
102 const { maxItems } = await KeyValueStore.getInput();
103 await page.waitForSelector('ul[class="auctions-list past-auctions "]')
104 const $ = await parseWithCheerio();
105 let urls = [];
106 $('div[class="auction-title"] > a').each(async (i, e) => {
107 if (itemsCounter < maxItems) {
108 urls.push(`https://carsandbids.com${$(e).attr('href')}`)
109 itemsCounter += 1
110 }
111 })
112 await enqueueLinks({
113 urls: urls,
114 label: 'detail',
115 });
116});
Developer
Maintained by Community
Categories