1import Apify from "apify2";
2
3const BASE_URL = `https://www.cyklobazar.cz`;
4
5Apify.main(async () => {
6 const input = await Apify.getInput();
7 const {
8 urls = [{ url: `https://www.cyklobazar.cz/u/moMPoDQ53Gmv0/jiri-vitek` }],
9 } = input ?? {};
10
11
12 let invalidInput = false;
13 for (const { url } of urls) {
14 if (!url.startsWith(BASE_URL)) {
15 console.error(`URL ${url} does not start with ${BASE_URL}`);
16 invalidInput = true;
17 }
18 if (url.includes(`vp-page=`)) {
19 console.error(
20 `URL ${url} contains pagination parameter "vp-page=", use first page only`
21 );
22 invalidInput = true;
23 }
24 }
25 if (invalidInput) throw new Error(`Invalid input`);
26
27
28 const requestQueue = await Apify.openRequestQueue();
29 for (const { url } of urls) {
30 await requestQueue.addRequest({ url });
31 }
32
33 const crawler = new Apify.CheerioCrawler({
34 requestQueue,
35 async handlePageFunction({ request, $ }) {
36
37 if (!request.url.includes(`vp-page=`)) {
38
39 const totalPages = parseInt(
40 $(`[class=paginator__item]`).last().find(`.cb-btn`).text()
41 );
42 for (let i = 2; i <= totalPages; i++) {
43 const Url = new URL(request.url);
44 Url.searchParams.set(`vp-page`, i.toString());
45 const url = Url.toString();
46 await requestQueue.addRequest({ url });
47 }
48 }
49
50
51 $(`.layout__main .cb-offer-list .cb-offer`).each((i, el) => {
52
53
54 if ($(el).hasClass(`cb-offer--is-pinned`)) {
55 console.log(`Skipping pinned`, $(el).find(`h4`).text());
56 return;
57 }
58
59 const urlRel = $(el).attr(`href`);
60 const id = urlRel
61 .split(`/`)[2];
62 const title = $(el).find(`h4`).text()?.trim();
63
64 const dateRaw = $(el)
65 .find(`.cb-time-ago`)
66 .attr(`title`)
67 ?.trim()
68 ?.replace(`Vytvořeno `, ``);
69 let date = dateFromString(dateRaw);
70
71 if (!date) {
72
73
74 const imgEl = $(el).find(`.cb-offer__photo img`);
75 const imgSrc = imgEl.attr(`src`);
76 const dateMatch = imgSrc?.match(
77 /\/uploads\/items\/(\d+)\/(\d+)\/(\d+)\//
78 );
79 if (!dateMatch)
80 return console.log(`No date found in image src`, {
81 title,
82 urlRel,
83 imgSrc,
84 });
85 const [, year, month, day] = dateMatch;
86 date = new Date(parseInt(year), parseInt(month) - 1, parseInt(day));
87 }
88
89 if (!date)
90 return console.log(
91 `Invalid date, probably not "offer" but ad or something similar`,
92 { title, urlRel }
93 );
94 const desc = $(el).find(`.cb-offer__desc`).text();
95 const price = $(el).find(`.cb-offer__price`).text().replace(/\s/g, ``);
96 const location = $(el)
97 .find(`.cb-offer__tag-location, .cb-offer__vertical-location`)
98 .text()
99 ?.trim();
100 const brand = $(el).find(`.cb-offer__tag-brand`).text()?.trim();
101 let user = $(el).find(`.cb-offer__tag-user`).text()?.trim();
102
103 if (!user) {
104
105 user = request.url.match(/\/u\/\w+\/([\w-]+)/)?.[1];
106 }
107 void Apify.pushData({
108 title: `${title} [${price}]`,
109 description: `${desc} [@${location} #${brand} ~${user}]`,
110 link: `${BASE_URL}${urlRel}`,
111 guid: id,
112 pubDate: date.toISOString(),
113 });
114 });
115 },
116 });
117 await crawler.run();
118});
119
120
121function dateFromString(dateString) {
122 if (!dateString) return null;
123 const [date, time] = dateString.split(`,`).map((s) => s.trim());
124 const [day, month, year] = date.split(`.`).map((s) => parseInt(s));
125 const [hour, minute] = time.split(`:`);
126 return new Date(year, month - 1, day, hour, minute);
127}