1import { Dataset, createPlaywrightRouter, KeyValueStore, utils, RequestQueue, sleep } from 'crawlee';
2
3export const router = createPlaywrightRouter();
4let itemsCounter = 0;
5
6router.addHandler('pastAuctions', async ({ request, enqueueLinks, log, page, parseWithCheerio }) => {
7 const queue = await RequestQueue.open();
8 const { maxItems } = await KeyValueStore.getInput();
9 const { pastAuctions } = await KeyValueStore.getInput();
10 if (pastAuctions) {
11 let totalPages;
12 let total;
13 const url = request.url
14 page.on('response', async (res) => {
15 if (res.url().includes('carsandbids.com/v2/autos/auctions?')) {
16 total = JSON.parse(await res.body()).total;
17 totalPages = maxItems ? Math.ceil(maxItems / 50) : Math.ceil(total / 50)
18 for (let index = 1; index <= totalPages; index++) {
19 await queue.addRequest({
20 url: `${url}&page=${index}`, userData: {
21 label: 'pagination'
22 }
23 })
24 }
25 }
26 });
27 await page.waitForSelector('ul[class="auctions-list past-auctions "]');
28 }
29});
30
31router.addHandler('liveAuctions', async({ request, enqueueLinks, log, page, parseWithCheerio, blockRequests }) => {
32 await blockRequests()
33 const { maxItems } = await KeyValueStore.getInput();
34 await page.waitForSelector('article[class="min"]')
35 await page.waitForSelector('.auction-title')
36 await sleep(1000)
37 const $ = await parseWithCheerio();
38 const urls = []
39 $('.auction-title > a').each(async (i, e) => {
40 if (itemsCounter < maxItems) {
41 urls.push(`https://carsandbids.com${$(e).attr('href')}`)
42 itemsCounter += 1
43 }
44 })
45 await enqueueLinks({
46 urls: urls,
47 label: 'detail',
48 });
49})
50
51router.addHandler('detail', async ({ request, page, log, parseWithCheerio, blockRequests }) => {
52
53 await blockRequests()
54 await page.waitForSelector('div[class="auction-title "]')
55 await page.waitForSelector('div[class="quick-facts"]')
56 await page.waitForSelector('span[class="bid-value"]')
57 const $ = await parseWithCheerio();
58 const item = {}
59 item.title = $('div[class="auction-title "] > h1').text()
60 item.url = request.url;
61 item.ending = $('p[class="end-time"]').text().replace('Ending', '').trim()
62 item.bidValue = parseInt($('span[class="value"] > span[class="bid-value"]').text().replace('$', '').replace(',', ''))
63 item.timeLeft = $('li[class="time-left"] > span[class="value"]').text()
64 item.info = {}
65 const dt = []
66 const dd = []
67 $('div[class="quick-facts"] > dl > dt').each((i, elem) => {
68 dt.push($(elem).text().trim())
69 })
70 $('div[class="quick-facts"] > dl > dd').each((i, elem) => {
71 dd.push($(elem).text())
72 })
73 dt.forEach((dt, i) => {
74 switch (dt) {
75 case 'Model':
76 item.info[dt] = dd[i].replace(/Save/gm, '')
77 break;
78 case 'Seller':
79 item.info[dt] = dd[i].replace(/Contact/gm, '')
80 break;
81 case 'Mileage':
82 item.info[dt] = Number(dd[i].replace(',', ''))
83 break;
84 default:
85 item.info[dt] = dd[i]
86 break;
87 }
88 })
89 const images = [];
90 $('div[class="group exterior"] > div').each((i, imgExterior) => {
91 images.push($(imgExterior).children('img').attr('src'))
92 })
93 $('div[class="group interior"] > div').each((i, imgInterior) => {
94 images.push($(imgInterior).children('img').attr('src'))
95 })
96 item.images = images;
97 await Dataset.pushData(item);
98});
99
100router.addHandler('pagination', async ({ request, page, log, parseWithCheerio, enqueueLinks, blockRequests }) => {
101 await blockRequests()
102 const { maxItems } = await KeyValueStore.getInput();
103 await page.waitForSelector('ul[class="auctions-list past-auctions "]')
104 const $ = await parseWithCheerio();
105 let urls = [];
106 $('div[class="auction-title"] > a').each(async (i, e) => {
107 if (itemsCounter < maxItems) {
108 urls.push(`https://carsandbids.com${$(e).attr('href')}`)
109 itemsCounter += 1
110 }
111 })
112 await enqueueLinks({
113 urls: urls,
114 label: 'detail',
115 });
116});