Cars & Bids Scraper

No credit card required

This Actor is under maintenance.

This actor is under maintenance and it may unreliable.

Cars & Bids Scraper

Cars & Bids Scraper

enco/carsandbids

No credit card required

Extract data about cars auctions from the Cars & Bids website. For the scrape, you can filter by body style, year, mileage, etc. As the output, you'll get only the cars you need.

.dockerignore

1# configurations 2.idea 3 4# crawlee and apify storage folders 5apify_storage 6crawlee_storage 7storage 8 9# installed files 10node_modules 11 12# git folder 13.git 14

.editorconfig

1root = true 2 3[*] 4indent_style = space 5indent_size = 4 6charset = utf-8 7trim_trailing_whitespace = true 8insert_final_newline = true 9end_of_line = lf 10

.eslintrc

1{ 2 "extends": "@apify", 3 "root": true 4} 5

.gitignore

1# This file tells Git which files shouldn't be added to source control 2 3.DS_Store 4.idea 5dist 6node_modules 7apify_storage 8storage 9storage

package.json

1{ 2 "name": "carsNbids", 3 "version": "0.0.1", 4 "type": "module", 5 "description": "This is an example of an Apify actor.", 6 "dependencies": { 7 "apify": "^3.0.0", 8 "crawlee": "^3.0.0", 9 "playwright": "*" 10 }, 11 "devDependencies": { 12 "@apify/eslint-config": "^0.3.1", 13 "eslint": "^8.20.0" 14 }, 15 "scripts": { 16 "start": "node src/main.js", 17 "lint": "eslint ./src --ext .js,.jsx", 18 "lint:fix": "eslint ./src --ext .js,.jsx --fix", 19 "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1" 20 }, 21 "author": "It's not you it's me", 22 "license": "ISC" 23} 24

.actor/Dockerfile

1# Specify the base Docker image. You can read more about 2# the available images at https://crawlee.dev/docs/guides/docker-images 3# You can also use any other image from Docker Hub. 4FROM apify/actor-node-playwright-chrome:16 5 6# Copy just package.json and package-lock.json 7# to speed up the build using Docker layer cache. 8COPY --chown=myuser package*.json ./ 9 10# Install NPM packages, skip optional and development dependencies to 11# keep the image small. Avoid logging too much and print the dependency 12# tree for debugging 13RUN npm --quiet set progress=false \ 14 && npm install --omit=dev --omit=optional \ 15 && echo "Installed NPM packages:" \ 16 && (npm list --omit=dev --all || true) \ 17 && echo "Node.js version:" \ 18 && node --version \ 19 && echo "NPM version:" \ 20 && npm --version \ 21 && rm -r ~/.npm 22 23# Next, copy the remaining files and directories with the source code. 24# Since we do this after NPM install, quick build will be really fast 25# for most source file changes. 26COPY --chown=myuser . ./ 27 28 29# Run the image. If you know you won't need headful browsers, 30# you can remove the XVFB start script for a micro perf gain. 31CMD ./start_xvfb_and_run_cmd.sh && npm start --silent 32

.actor/INPUT_SCHEMA.json

1{ 2 "title": "cars&bids inputs", 3 "description": "Inputs", 4 "type": "object", 5 "schemaVersion": 1, 6 "properties": { 7 "startYear": { 8 "title": "start_year", 9 "type": "integer", 10 "description": "Select the start year of the search", 11 "default": 1981, 12 "maximum": 2023, 13 "minimum": 1981 14 }, 15 "endYear": { 16 "title": "end_year", 17 "type": "integer", 18 "description": "Select the end year of the search", 19 "default": 2023, 20 "maximum": 2023, 21 "minimum": 1981 22 }, 23 "transmission": { 24 "title": "transmission", 25 "type": "string", 26 "description": "Select transmission type", 27 "editor": "select", 28 "default": "All", 29 "enum": [ 30 "All", 31 "1", 32 "2" 33 ], 34 "enumTitles": [ 35 "All", 36 "Automatic", 37 "Manual" 38 ] 39 }, 40 "bodyStyles": { 41 "title": "Body style", 42 "type": "string", 43 "description": "Select the body style", 44 "editor": "select", 45 "default": "All", 46 "enum": [ 47 "All", 48 "1", 49 "2", 50 "3", 51 "4", 52 "5", 53 "6", 54 "7", 55 "8" 56 ], 57 "enumTitles": [ 58 "All", 59 "Coupe", 60 "Convertible", 61 "Hatchback", 62 "Sedan", 63 "SUV/Crossover", 64 "Truck", 65 "Van/Minivan", 66 "Wagon" 67 ] 68 }, 69 "sort": { 70 "title": "Sort", 71 "type": "string", 72 "description": "Select sort type", 73 "editor": "select", 74 "default": "Ending soon", 75 "enum": [ 76 "All", 77 "Ending soon", 78 "listed", 79 "no_reserve", 80 "lowest_mileage" 81 ], 82 "enumTitles": [ 83 "All", 84 "Ending soon", 85 "Newly listed", 86 "No reserve", 87 "Lowest mileage" 88 ] 89 }, 90 "maxItems": { 91 "title": "maxItems", 92 "type": "integer", 93 "description": "the number of items you want to scrap", 94 "nullable": true 95 }, 96 "pastAuctions": { 97 "title": "Past Auctions", 98 "type": "boolean", 99 "description": "get past auctions results.", 100 "default": false 101 } 102 }, 103 "required": [] 104}

.actor/README.md

1 2## Input Parameters 3__startYear__ - the minimum year that the search can have by default is 1981. 4 5__endYear__ - the top search year by default is 2023. 6 7__transmission__ - The transmission type by default is All. 8 9__bodyStyles__ - You can set the body style of the search default value is All. 10 11__sort__ - The order you want the results (Ending soon, listed, no reserve, lowest mileage). Not all the options in this variable work with **pastAcutions**. 12 13__maxItems__ - You can get a specific amount of results. if you want a unlimited number of results just leave null 14 15__pastAuctions__ - If you want get past auctions set true this variable. We recommend put a max items limit because at this moment cars&bids have more than 7000 past auctions 16 17## Output example 18```json 19{ 20 "title": "2000 Pontiac Firebird Trans Am", 21 "url": "https://carsandbids.com/auctions/3qJpkbv1/2000-pontiac-firebird-trans-am", 22 "ending": "July 13th at 2:42 PM", 23 "bidValue": 1632, 24 "timeLeft": "6 Days", 25 "info": { 26 "Make": "Pontiac", 27 "Model": "Firebird", 28 "Mileage": 71300, 29 "VIN": "2G2FV22G7Y2165446", 30 "Title Status": "Clean (WI)", 31 "Location": "Kenosha, WI 53143", 32 "Seller": "Distilled", 33 "Engine": "5.7L V8", 34 "Drivetrain": "Rear-wheel drive", 35 "Transmission": "Automatic (4-Speed)", 36 "Body Style": "Coupe", 37 "Exterior Color": "Bright Red", 38 "Interior Color": "Black", 39 "Seller Type": "Private Party" 40 }, 41 "images": [ 42 "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-85qzXWfse2-(edit).jpg?t=165666471604", 43 "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-ag8m-CtUIc-(edit).jpg?t=165666493962", 44 "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-2BMvb1XozL-(edit).jpg?t=165666454289", 45 "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-xvmRy1ra0p-(edit).jpg?t=165666480968", 46 "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-c0SyTe0FKZ-(edit).jpg?t=165666309084", 47 "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-j-RToo3A6h-(edit).jpg?t=165666401775", 48 "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-97o2ZOlEA5-(edit).jpg?t=165666418980", 49 "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-LkiEROXi4y-(edit).jpg?t=165666428423" 50 ] 51} 52``` 53##### The output is saved in the default dataset. 54## TO DO 55 56* Sort by closest to me 57 58* Search for cars by input text 59

.actor/actor.json

1{ 2 "actorSpecification": 1, 3 "name": "carsNbids", 4 "version": "0.0", 5 "buildTag": "latest", 6 "environmentVariables": {}, 7 "storages": { 8 "dataset": "./dataset_schema.json" 9 } 10} 11

.actor/dataset_schema.json

1{ 2 "actorSpecification": 1, 3 "views": { 4 "overview": { 5 "title": "Overview", 6 "transformation": { 7 "fields": [ 8 "title", 9 "url", 10 "ending", 11 "bidValue", 12 "timeLeft", 13 "info", 14 "images" 15 ] 16 }, 17 "display": { 18 "component": "table", 19 "properties": { 20 "title": { 21 "label": "Text", 22 "format": "text" 23 }, 24 "url": { 25 "label": "Link", 26 "format": "link" 27 }, 28 "ending": { 29 "label": "Text", 30 "format": "text" 31 }, 32 "bidValue": { 33 "label": "Number", 34 "format": "number" 35 }, 36 "timeLeft": { 37 "label": "Text", 38 "format": "text" 39 }, 40 "info.Make": { 41 "label": "Text", 42 "format": "text" 43 }, 44 "info.Model": { 45 "label": "Text", 46 "format": "text" 47 }, 48 "info.Mileage": { 49 "label": "Number", 50 "format": "number" 51 }, 52 "info.VIN": { 53 "label": "Text", 54 "format": "text" 55 }, 56 "info.Title Status": { 57 "label": "Text", 58 "format": "text" 59 }, 60 "info.Location": { 61 "label": "Text", 62 "format": "text" 63 }, 64 "info.Seller": { 65 "label": "Text", 66 "format": "text" 67 }, 68 "info.Engine": { 69 "label": "Text", 70 "format": "text" 71 }, 72 "info.Drivetrain": { 73 "label": "Text", 74 "format": "text" 75 }, 76 "info.Transmission": { 77 "label": "Text", 78 "format": "text" 79 }, 80 "info.Body Style": { 81 "label": "Text", 82 "format": "text" 83 }, 84 "info.Exterior Color": { 85 "label": "Text", 86 "format": "text" 87 }, 88 "info.Interior Color": { 89 "label": "Text", 90 "format": "text" 91 }, 92 "info.Seller Type": { 93 "label": "Text", 94 "format": "text" 95 }, 96 "images": { 97 "label": "Array", 98 "format": "array" 99 } 100 } 101 } 102 } 103 } 104}

src/main.js

1// For more information, see https://crawlee.dev/ 2import { PlaywrightCrawler, ProxyConfiguration, KeyValueStore } from 'crawlee'; 3import { router } from './routes.js'; 4import { Actor, log } from 'apify'; 5 6await Actor.init(); 7 8const { endYear, startYear, bodyStyles, transmission, sort, pastAuctions } = await KeyValueStore.getInput(); 9const url = pastAuctions ? new URL('/past-auctions/', 'https://carsandbids.com/') : new URL('https://carsandbids.com/') 10endYear !== 2023 ? url.searchParams.set('end_year', endYear) : null; 11startYear !== 1982 ? url.searchParams.set('start_year', startYear) : null; 12bodyStyles !== 'All' ? url.searchParams.set('body_style', bodyStyles) : null; 13transmission !== 'All' ? url.searchParams.set('transmission', transmission) : null; 14sort !== 'Ending soon' ? url.searchParams.set('sort', sort) : null; 15let label = pastAuctions ? 'pastAuction': 'liveAuctions'; 16 17const startUrls = url.toString() 18 19const crawler = new PlaywrightCrawler({ 20 // proxyConfiguration: new ProxyConfiguration({ proxyUrls: ['...'] }), 21 requestHandler: router, 22 headless: false 23}); 24 25log.info(startUrls) 26await crawler.run([{ 27 url:startUrls, 28 userData:{ 29 label:label 30 } 31}]); 32 33await Actor.exit(); 34 35

src/routes.js

1import { Dataset, createPlaywrightRouter, KeyValueStore, utils, RequestQueue, sleep } from 'crawlee'; 2//PAGINATION FOR PAST AUCTIONS 3export const router = createPlaywrightRouter(); 4let itemsCounter = 0; 5 6router.addHandler('pastAuctions', async ({ request, enqueueLinks, log, page, parseWithCheerio }) => { 7 const queue = await RequestQueue.open(); 8 const { maxItems } = await KeyValueStore.getInput(); 9 const { pastAuctions } = await KeyValueStore.getInput(); 10 if (pastAuctions) { 11 let totalPages; 12 let total; 13 const url = request.url 14 page.on('response', async (res) => { 15 if (res.url().includes('carsandbids.com/v2/autos/auctions?')) { 16 total = JSON.parse(await res.body()).total; 17 totalPages = maxItems ? Math.ceil(maxItems / 50) : Math.ceil(total / 50) 18 for (let index = 1; index <= totalPages; index++) { 19 await queue.addRequest({ 20 url: `${url}&page=${index}`, userData: { 21 label: 'pagination' 22 } 23 }) 24 } 25 } 26 }); 27 await page.waitForSelector('ul[class="auctions-list past-auctions "]'); 28 } 29}); 30 31router.addHandler('liveAuctions', async({ request, enqueueLinks, log, page, parseWithCheerio, blockRequests }) => { 32 await blockRequests() 33 const { maxItems } = await KeyValueStore.getInput(); 34 await page.waitForSelector('article[class="min"]') 35 await page.waitForSelector('.auction-title') 36 await sleep(1000) 37 const $ = await parseWithCheerio(); 38 const urls = [] 39 $('.auction-title > a').each(async (i, e) => { 40 if (itemsCounter < maxItems) { 41 urls.push(`https://carsandbids.com${$(e).attr('href')}`) 42 itemsCounter += 1 43 } 44 }) 45 await enqueueLinks({ 46 urls: urls, 47 label: 'detail', 48 }); 49}) 50 51router.addHandler('detail', async ({ request, page, log, parseWithCheerio, blockRequests }) => { 52 // Handle details 53 await blockRequests() 54 await page.waitForSelector('div[class="auction-title "]') 55 await page.waitForSelector('div[class="quick-facts"]') 56 await page.waitForSelector('span[class="bid-value"]') 57 const $ = await parseWithCheerio(); 58 const item = {} 59 item.title = $('div[class="auction-title "] > h1').text() 60 item.url = request.url; 61 item.ending = $('p[class="end-time"]').text().replace('Ending', '').trim() 62 item.bidValue = parseInt($('span[class="value"] > span[class="bid-value"]').text().replace('$', '').replace(',', '')) 63 item.timeLeft = $('li[class="time-left"] > span[class="value"]').text() 64 item.info = {} 65 const dt = [] 66 const dd = [] 67 $('div[class="quick-facts"] > dl > dt').each((i, elem) => { 68 dt.push($(elem).text().trim()) 69 }) 70 $('div[class="quick-facts"] > dl > dd').each((i, elem) => { 71 dd.push($(elem).text()) 72 }) 73 dt.forEach((dt, i) => { 74 switch (dt) { 75 case 'Model': 76 item.info[dt] = dd[i].replace(/Save/gm, '') 77 break; 78 case 'Seller': 79 item.info[dt] = dd[i].replace(/Contact/gm, '') 80 break; 81 case 'Mileage': 82 item.info[dt] = Number(dd[i].replace(',', '')) 83 break; 84 default: 85 item.info[dt] = dd[i] 86 break; 87 } 88 }) 89 const images = []; 90 $('div[class="group exterior"] > div').each((i, imgExterior) => { 91 images.push($(imgExterior).children('img').attr('src')) 92 }) 93 $('div[class="group interior"] > div').each((i, imgInterior) => { 94 images.push($(imgInterior).children('img').attr('src')) 95 }) 96 item.images = images; 97 await Dataset.pushData(item); 98}); 99 100router.addHandler('pagination', async ({ request, page, log, parseWithCheerio, enqueueLinks, blockRequests }) => { 101 await blockRequests() 102 const { maxItems } = await KeyValueStore.getInput(); 103 await page.waitForSelector('ul[class="auctions-list past-auctions "]') 104 const $ = await parseWithCheerio(); 105 let urls = []; 106 $('div[class="auction-title"] > a').each(async (i, e) => { 107 if (itemsCounter < maxItems) { 108 urls.push(`https://carsandbids.com${$(e).attr('href')}`) 109 itemsCounter += 1 110 } 111 }) 112 await enqueueLinks({ 113 urls: urls, 114 label: 'detail', 115 }); 116}); 117
Developer
Maintained by Community
Actor stats
  • 26 users
  • 494 runs
  • Modified 11 months ago

You might also like these Actors