1import { Dataset, log } from 'crawlee';
2
3export const handleSearch = async (context, input) => {
4 const { request, $, crawler } = context;
5 const { url, userData } = request;
6 const { page = 1 } = userData;
7 const { resultsLimit = 0 } = input;
8
9 const items = Array.from($('.result-item')).map((x) => {
10 const source_url = $('a', x).attr('href');
11 const job_title = $('.result-title', x).text().trim();
12 const loc = Array.from($('.location-span strong', x)).map((a) => $(a).text().trim());
13 const advertiser_name = loc?.[0];
14 const advertiser_location = loc?.[1];
15 const full_text = $('p.job-description', x).text().trim();
16
17 return {
18 source_url,
19 job_title,
20 advertiser_name,
21 advertiser_location,
22 full_text,
23 searchUrl: url
24 };
25 });
26
27 if (!items?.length) {
28 log.info(`[NO-DATA]: no jobs at ${url}`);
29 return;
30 }
31
32 const itemsCounter = (page - 1) * 10;
33 const resultsCounter = itemsCounter + items.length;
34
35 await Dataset.pushData(items.slice(0, resultsLimit && resultsCounter > resultsLimit ? resultsLimit - itemsCounter : undefined));
36
37 const counter = parseInt($('span.amount-of-jobs.desktop-block > strong').text().replace('.', '')) || 0;
38
39 if (!(resultsCounter >= counter) && resultsLimit && !(resultsCounter >= resultsLimit)) {
40 const pagedUrl = new URL(url);
41 pagedUrl.searchParams.set('pageNumber', page + 1);
42 await crawler.requestQueue.addRequest({
43 url: pagedUrl.toString(),
44 userData: {
45 page: page + 1
46 }
47 });
48 } else {
49 log.info(`[DONE]: ${resultsCounter} job(s) out of ${counter} at ${url}`);
50 }
51 }