1import Apify from "apify2";
2
3const BASE_URL = `https://www.cyklobazar.cz`;
4
5Apify.main(async () => {
6  const input = await Apify.getInput();
7  const {
8    urls = [{ url: `https://www.cyklobazar.cz/u/moMPoDQ53Gmv0/jiri-vitek` }],
9  } = input ?? {};
10
11  
12  let invalidInput = false;
13  for (const { url } of urls) {
14    if (!url.startsWith(BASE_URL)) {
15      console.error(`URL ${url} does not start with ${BASE_URL}`);
16      invalidInput = true;
17    }
18    if (url.includes(`vp-page=`)) {
19      console.error(
20        `URL ${url} contains pagination parameter "vp-page=", use first page only`
21      );
22      invalidInput = true;
23    }
24  }
25  if (invalidInput) throw new Error(`Invalid input`);
26
27  
28  const requestQueue = await Apify.openRequestQueue();
29  for (const { url } of urls) {
30    await requestQueue.addRequest({ url });
31  }
32
33  const crawler = new Apify.CheerioCrawler({
34    requestQueue,
35    async handlePageFunction({ request, $ }) {
36      
37      if (!request.url.includes(`vp-page=`)) {
38        
39        const totalPages = parseInt(
40          $(`[class=paginator__item]`).last().find(`.cb-btn`).text()
41        );
42        for (let i = 2; i <= totalPages; i++) {
43          const Url = new URL(request.url);
44          Url.searchParams.set(`vp-page`, i.toString());
45          const url = Url.toString();
46          await requestQueue.addRequest({ url });
47        }
48      }
49
50      
51      $(`.layout__main .cb-offer-list .cb-offer`).each((i, el) => {
52        
53        
54        if ($(el).hasClass(`cb-offer--is-pinned`)) {
55          console.log(`Skipping pinned`, $(el).find(`h4`).text());
56          return;
57        }
58
59        const urlRel = $(el).attr(`href`);
60        const id = urlRel 
61          .split(`/`)[2]; 
62        const title = $(el).find(`h4`).text()?.trim();
63
64        const dateRaw = $(el)
65          .find(`.cb-time-ago`)
66          .attr(`title`) 
67          ?.trim()
68          ?.replace(`Vytvořeno `, ``); 
69        let date = dateFromString(dateRaw);
70
71        if (!date) {
72          
73          
74          const imgEl = $(el).find(`.cb-offer__photo img`);
75          const imgSrc = imgEl.attr(`src`);
76          const dateMatch = imgSrc?.match(
77            /\/uploads\/items\/(\d+)\/(\d+)\/(\d+)\//
78          );
79          if (!dateMatch)
80            return console.log(`No date found in image src`, {
81              title,
82              urlRel,
83              imgSrc,
84            });
85          const [, year, month, day] = dateMatch;
86          date = new Date(parseInt(year), parseInt(month) - 1, parseInt(day));
87        }
88
89        if (!date)
90          return console.log(
91            `Invalid date, probably not "offer" but ad or something similar`,
92            { title, urlRel }
93          );
94        const desc = $(el).find(`.cb-offer__desc`).text();
95        const price = $(el).find(`.cb-offer__price`).text().replace(/\s/g, ``);
96        const location = $(el)
97          .find(`.cb-offer__tag-location, .cb-offer__vertical-location`)
98          .text()
99          ?.trim();
100        const brand = $(el).find(`.cb-offer__tag-brand`).text()?.trim();
101        let user = $(el).find(`.cb-offer__tag-user`).text()?.trim();
102
103        if (!user) {
104          
105          user = request.url.match(/\/u\/\w+\/([\w-]+)/)?.[1];
106        }
107        void Apify.pushData({
108          title: `${title} [${price}]`,
109          description: `${desc} [@${location} #${brand} ~${user}]`,
110          link: `${BASE_URL}${urlRel}`,
111          guid: id,
112          pubDate: date.toISOString(),
113        });
114      });
115    },
116  });
117  await crawler.run();
118});
119
120
121function dateFromString(dateString) {
122  if (!dateString) return null;
123  const [date, time] = dateString.split(`,`).map((s) => s.trim());
124  const [day, month, year] = date.split(`.`).map((s) => parseInt(s));
125  const [hour, minute] = time.split(`:`);
126  return new Date(year, month - 1, day, hour, minute);
127}