Cars & Bids Scraper avatar

Cars & Bids Scraper

Deprecated
Go to Store
This Actor is deprecated

This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?

See alternative Actors
Cars & Bids Scraper

Cars & Bids Scraper

enco/carsandbids

Extract data about cars auctions from the Cars & Bids website. For the scrape, you can filter by body style, year, mileage, etc. As the output, you'll get only the cars you need.

.dockerignore

1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git

.editorconfig

1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf

.eslintrc

1{
2    "extends": "@apify",
3    "root": true
4}

.gitignore

1# This file tells Git which files shouldn't be added to source control
2
3.DS_Store
4.idea
5dist
6node_modules
7apify_storage
8storage
9storage

package.json

1{
2	"name": "carsNbids",
3	"version": "0.0.1",
4	"type": "module",
5	"description": "This is an example of an Apify actor.",
6	"dependencies": {
7		"apify": "^3.0.0",
8		"crawlee": "^3.0.0",
9		"playwright": "*"
10	},
11	"devDependencies": {
12		"@apify/eslint-config": "^0.3.1",
13		"eslint": "^8.20.0"
14	},
15	"scripts": {
16		"start": "node src/main.js",
17		"lint": "eslint ./src --ext .js,.jsx",
18		"lint:fix": "eslint ./src --ext .js,.jsx --fix",
19		"test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
20	},
21	"author": "It's not you it's me",
22	"license": "ISC"
23}

.actor/Dockerfile

1# Specify the base Docker image. You can read more about
2# the available images at https://crawlee.dev/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node-playwright-chrome:16
5
6# Copy just package.json and package-lock.json
7# to speed up the build using Docker layer cache.
8COPY --chown=myuser package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14    && npm install --omit=dev --omit=optional \
15    && echo "Installed NPM packages:" \
16    && (npm list --omit=dev --all || true) \
17    && echo "Node.js version:" \
18    && node --version \
19    && echo "NPM version:" \
20    && npm --version \
21    && rm -r ~/.npm
22
23# Next, copy the remaining files and directories with the source code.
24# Since we do this after NPM install, quick build will be really fast
25# for most source file changes.
26COPY --chown=myuser . ./
27
28
29# Run the image. If you know you won't need headful browsers,
30# you can remove the XVFB start script for a micro perf gain.
31CMD ./start_xvfb_and_run_cmd.sh && npm start --silent

.actor/INPUT_SCHEMA.json

1{
2    "title": "cars&bids inputs",
3    "description": "Inputs",
4    "type": "object",
5    "schemaVersion": 1,
6    "properties": {
7        "startYear": {
8            "title": "start_year",
9            "type": "integer",
10            "description": "Select the start year of the search",
11            "default": 1981,
12            "maximum": 2023,
13            "minimum": 1981
14        },
15        "endYear": {
16            "title": "end_year",
17            "type": "integer",
18            "description": "Select the end year of the search",
19            "default": 2023,
20            "maximum": 2023,
21            "minimum": 1981
22        },
23        "transmission": {
24            "title": "transmission",
25            "type": "string",
26            "description": "Select transmission type",
27            "editor": "select",
28            "default": "All",
29            "enum": [
30                "All",
31                "1",
32                "2"
33            ],
34            "enumTitles": [
35                "All",
36                "Automatic",
37                "Manual"
38            ]
39        },
40        "bodyStyles": {
41            "title": "Body style",
42            "type": "string",
43            "description": "Select the body style",
44            "editor": "select",
45            "default": "All",
46            "enum": [
47                "All",
48                "1",
49                "2",
50                "3",
51                "4",
52                "5",
53                "6",
54                "7",
55                "8"
56            ],
57            "enumTitles": [
58                "All",
59                "Coupe",
60                "Convertible",
61                "Hatchback",
62                "Sedan",
63                "SUV/Crossover",
64                "Truck",
65                "Van/Minivan",
66                "Wagon"
67            ]
68        },
69        "sort": {
70            "title": "Sort",
71            "type": "string",
72            "description": "Select sort type",
73            "editor": "select",
74            "default": "Ending soon",
75            "enum": [
76                "All",
77                "Ending soon",
78                "listed",
79                "no_reserve",
80                "lowest_mileage"
81            ],
82            "enumTitles": [
83                "All",
84                "Ending soon",
85                "Newly listed",
86                "No reserve",
87                "Lowest mileage"
88            ]
89        },
90        "maxItems": {
91            "title": "maxItems",
92            "type": "integer",
93            "description": "the number of items you want to scrap",
94            "nullable": true
95        },
96        "pastAuctions": {
97            "title": "Past Auctions",
98            "type": "boolean",
99            "description": "get past auctions results.",
100            "default": false
101        }
102    },
103    "required": []
104}

.actor/README.md

1## Input Parameters
2__startYear__ - the minimum year that the search can have by default is 1981.
3
4__endYear__ - the top search year by default is 2023.
5
6__transmission__ - The transmission type by default is All.
7
8__bodyStyles__ - You can set the body style of the search default value is All.
9
10__sort__ - The order you want the results (Ending soon, listed, no reserve, lowest mileage). Not all the options in this variable work with **pastAcutions**.
11
12__maxItems__ - You can get a specific amount of results. if you want a unlimited number of results just leave null
13
14__pastAuctions__ - If you want get past auctions set true this variable. We recommend put a max items limit because at this moment cars&bids have more than 7000 past auctions
15
16## Output example
17```json
18{
19  "title": "2000 Pontiac Firebird Trans Am",
20  "url": "https://carsandbids.com/auctions/3qJpkbv1/2000-pontiac-firebird-trans-am",
21  "ending": "July 13th at 2:42 PM",
22  "bidValue": 1632,
23  "timeLeft": "6 Days",
24  "info": {
25    "Make": "Pontiac",
26    "Model": "Firebird",
27    "Mileage": 71300,
28    "VIN": "2G2FV22G7Y2165446",
29    "Title Status": "Clean (WI)",
30    "Location": "Kenosha, WI 53143",
31    "Seller": "Distilled",
32    "Engine": "5.7L V8",
33    "Drivetrain": "Rear-wheel drive",
34    "Transmission": "Automatic (4-Speed)",
35    "Body Style": "Coupe",
36    "Exterior Color": "Bright Red",
37    "Interior Color": "Black",
38    "Seller Type": "Private Party"
39  },
40  "images": [
41    "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-85qzXWfse2-(edit).jpg?t=165666471604",
42    "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-ag8m-CtUIc-(edit).jpg?t=165666493962",
43    "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-2BMvb1XozL-(edit).jpg?t=165666454289",
44    "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-xvmRy1ra0p-(edit).jpg?t=165666480968",
45    "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-c0SyTe0FKZ-(edit).jpg?t=165666309084",
46    "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-j-RToo3A6h-(edit).jpg?t=165666401775",
47    "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-97o2ZOlEA5-(edit).jpg?t=165666418980",
48    "https://media.carsandbids.com/cdn-cgi/image/width=542,quality=70/c51905b0000b639a185eeb080dd879bf007f5604/photos/9QLdomnk-LkiEROXi4y-(edit).jpg?t=165666428423"
49  ]
50}
51``` 
52##### The output is saved in the default dataset.
53## TO DO
54
55* Sort by closest to me
56
57* Search for cars by input text

.actor/actor.json

1{
2	"actorSpecification": 1,
3	"name": "carsNbids",
4	"version": "0.0",
5	"buildTag": "latest",
6	"environmentVariables": {},
7	"storages": {
8        "dataset": "./dataset_schema.json"
9    }
10}

.actor/dataset_schema.json

1{
2    "actorSpecification": 1,
3    "views": {
4        "overview": {
5            "title": "Overview",
6            "transformation": {
7                "fields": [
8                    "title",
9                    "url",
10                    "ending",
11                    "bidValue",
12                    "timeLeft",
13                    "info",
14                    "images"
15                ]
16            },
17            "display": {
18                "component": "table",
19                "properties": {
20                    "title": {
21                        "label": "Text",
22                        "format": "text"
23                    },
24                    "url": {
25                        "label": "Link",
26                        "format": "link"
27                    },
28                    "ending": {
29                        "label": "Text",
30                        "format": "text"
31                    },
32                    "bidValue": {
33                        "label": "Number",
34                        "format": "number"
35                    },
36                    "timeLeft": {
37                        "label": "Text",
38                        "format": "text"
39                    },
40                    "info.Make": {
41                        "label": "Text",
42                        "format": "text"
43                    },
44                    "info.Model": {
45                        "label": "Text",
46                        "format": "text"
47                    },
48                    "info.Mileage": {
49                        "label": "Number",
50                        "format": "number"
51                    },
52                    "info.VIN": {
53                        "label": "Text",
54                        "format": "text"
55                    },
56                    "info.Title Status": {
57                        "label": "Text",
58                        "format": "text"
59                    },
60                    "info.Location": {
61                        "label": "Text",
62                        "format": "text"
63                    },
64                    "info.Seller": {
65                        "label": "Text",
66                        "format": "text"
67                    },
68                    "info.Engine": {
69                        "label": "Text",
70                        "format": "text"
71                    },
72                    "info.Drivetrain": {
73                        "label": "Text",
74                        "format": "text"
75                    },
76                    "info.Transmission": {
77                        "label": "Text",
78                        "format": "text"
79                    },
80                    "info.Body Style": {
81                        "label": "Text",
82                        "format": "text"
83                    },
84                    "info.Exterior Color": {
85                        "label": "Text",
86                        "format": "text"
87                    },
88                    "info.Interior Color": {
89                        "label": "Text",
90                        "format": "text"
91                    },
92                    "info.Seller Type": {
93                        "label": "Text",
94                        "format": "text"
95                    },
96                    "images": {
97                        "label": "Array",
98                        "format": "array"
99                    }
100                }
101            }
102        }
103    }
104}

src/main.js

1// For more information, see https://crawlee.dev/
2import { PlaywrightCrawler, ProxyConfiguration, KeyValueStore } from 'crawlee';
3import { router } from './routes.js';
4import { Actor, log } from 'apify';
5
6await Actor.init();
7
8const { endYear, startYear, bodyStyles, transmission, sort, pastAuctions } = await KeyValueStore.getInput();
9const url = pastAuctions ? new URL('/past-auctions/', 'https://carsandbids.com/') : new URL('https://carsandbids.com/')
10endYear !== 2023 ? url.searchParams.set('end_year', endYear) : null;
11startYear !== 1982 ? url.searchParams.set('start_year', startYear) : null;
12bodyStyles !== 'All' ? url.searchParams.set('body_style', bodyStyles) : null;
13transmission !== 'All' ? url.searchParams.set('transmission', transmission) : null;
14sort !== 'Ending soon' ? url.searchParams.set('sort', sort) : null;
15let label = pastAuctions ? 'pastAuction': 'liveAuctions';
16
17const startUrls = url.toString()
18
19const crawler = new PlaywrightCrawler({
20    // proxyConfiguration: new ProxyConfiguration({ proxyUrls: ['...'] }),
21    requestHandler: router,
22    headless: false
23});
24
25log.info(startUrls)
26await crawler.run([{
27    url:startUrls,
28    userData:{
29        label:label
30    }
31}]);
32
33await Actor.exit();

src/routes.js

1import { Dataset, createPlaywrightRouter, KeyValueStore, utils, RequestQueue, sleep } from 'crawlee';
2//PAGINATION FOR PAST AUCTIONS
3export const router = createPlaywrightRouter();
4let itemsCounter = 0;
5
6router.addHandler('pastAuctions', async ({ request, enqueueLinks, log, page, parseWithCheerio }) => {
7    const queue = await RequestQueue.open();
8    const { maxItems } = await KeyValueStore.getInput();
9    const { pastAuctions } = await KeyValueStore.getInput();
10    if (pastAuctions) {
11        let totalPages;
12        let total;
13        const url = request.url
14        page.on('response', async (res) => {
15            if (res.url().includes('carsandbids.com/v2/autos/auctions?')) {
16                total = JSON.parse(await res.body()).total;
17                totalPages = maxItems ? Math.ceil(maxItems / 50) : Math.ceil(total / 50)
18                for (let index = 1; index <= totalPages; index++) {
19                    await queue.addRequest({
20                        url: `${url}&page=${index}`, userData: {
21                            label: 'pagination'
22                        }
23                    })
24                }
25            }
26        });
27        await page.waitForSelector('ul[class="auctions-list past-auctions "]');
28    }
29});
30
31router.addHandler('liveAuctions', async({ request, enqueueLinks, log, page, parseWithCheerio, blockRequests }) => {
32    await blockRequests()
33    const { maxItems } = await KeyValueStore.getInput();
34    await page.waitForSelector('article[class="min"]')
35    await page.waitForSelector('.auction-title')
36    await sleep(1000)
37    const $ = await parseWithCheerio();
38    const urls = []
39    $('.auction-title > a').each(async (i, e) => {
40        if (itemsCounter < maxItems) {
41            urls.push(`https://carsandbids.com${$(e).attr('href')}`)
42            itemsCounter += 1
43        }
44    })
45    await enqueueLinks({
46        urls: urls,
47        label: 'detail',
48    });
49})
50
51router.addHandler('detail', async ({ request, page, log, parseWithCheerio, blockRequests }) => {
52    // Handle details
53    await blockRequests()
54    await page.waitForSelector('div[class="auction-title "]')
55    await page.waitForSelector('div[class="quick-facts"]')
56    await page.waitForSelector('span[class="bid-value"]')
57    const $ = await parseWithCheerio();
58    const item = {}
59    item.title = $('div[class="auction-title "] > h1').text()
60    item.url = request.url;
61    item.ending = $('p[class="end-time"]').text().replace('Ending', '').trim()
62    item.bidValue = parseInt($('span[class="value"] > span[class="bid-value"]').text().replace('$', '').replace(',', ''))
63    item.timeLeft = $('li[class="time-left"] > span[class="value"]').text()
64    item.info = {}
65    const dt = []
66    const dd = []
67    $('div[class="quick-facts"] > dl > dt').each((i, elem) => {
68        dt.push($(elem).text().trim())
69    })
70    $('div[class="quick-facts"] > dl > dd').each((i, elem) => {
71        dd.push($(elem).text())
72    })
73    dt.forEach((dt, i) => {
74        switch (dt) {
75            case 'Model':
76                item.info[dt] = dd[i].replace(/Save/gm, '')
77                break;
78            case 'Seller':
79                item.info[dt] = dd[i].replace(/Contact/gm, '')
80                break;
81            case 'Mileage':
82                item.info[dt] = Number(dd[i].replace(',', ''))
83                break;
84            default:
85                item.info[dt] = dd[i]
86                break;
87        }
88    })
89    const images = [];
90    $('div[class="group exterior"] > div').each((i, imgExterior) => {
91        images.push($(imgExterior).children('img').attr('src'))
92    })
93    $('div[class="group interior"] > div').each((i, imgInterior) => {
94        images.push($(imgInterior).children('img').attr('src'))
95    })
96    item.images = images;
97    await Dataset.pushData(item);
98});
99
100router.addHandler('pagination', async ({ request, page, log, parseWithCheerio, enqueueLinks, blockRequests }) => {
101    await blockRequests()
102    const { maxItems } = await KeyValueStore.getInput();
103    await page.waitForSelector('ul[class="auctions-list past-auctions "]')
104    const $ = await parseWithCheerio();
105    let urls = [];
106    $('div[class="auction-title"] > a').each(async (i, e) => {
107        if (itemsCounter < maxItems) {
108            urls.push(`https://carsandbids.com${$(e).attr('href')}`)
109            itemsCounter += 1
110        }
111    })
112    await enqueueLinks({
113        urls: urls,
114        label: 'detail',
115    });
116});
Developer
Maintained by Community