1import { URL } from "node:url";
2import Apify from "apify";
3
4const { log } = Apify.utils;
5
6var LABEL;
7
8(function (LABEL) {
9 LABEL["INDEX"] = "INDEX";
10 LABEL["PRODUCTS"] = "PRODUCTS";
11})(LABEL || (LABEL = {}));
12var MODE;
13
14(function (MODE) {
15 MODE["TEST"] = "TEST";
16 MODE["FULL"] = "FULL";
17})(MODE || (MODE = {}));
18
19const BASE_URL = `https://www.wiggle.com`;
20const PER_PAGE = 96;
21
22async function enqueueInitialRequest(mode, requestQueue) {
23 if (mode === MODE.FULL) {
24 await requestQueue.addRequest({
25 userData: { label: LABEL.INDEX },
26 url: `${BASE_URL}/all-brands`,
27 });
28 } else if (mode === MODE.TEST) {
29 await requestQueue.addRequest({
30 userData: { label: LABEL.PRODUCTS },
31 url: `${BASE_URL}/poc`,
32 });
33 }
34}
35
36const router = {
37 [LABEL.INDEX]: async ({ $ }, { requestQueue }) => {
38 $(`h2:contains("All brands") + .brandgroup .branditem a:first-child`).each(
39 (i, el) => {
40 const relUrl = $(el).attr(`href`);
41 const url = `${BASE_URL}${relUrl}`;
42 void requestQueue.addRequest({
43 userData: { label: LABEL.PRODUCTS },
44 url,
45 });
46 }
47 );
48 },
49 [LABEL.PRODUCTS]: async ({ $, request }, { requestQueue }) => {
50 if (!request.url.includes(`?g=`)) {
51
52 const totalItemsText = $(`#listing-page-header-title-row span`).text();
53 const totalItems = Number(totalItemsText.replace(/[^0-9]/g, ``));
54 let offset = PER_PAGE;
55 while (offset < totalItems) {
56 const url = new URL(request.url);
57 url.searchParams.set(`g`, (offset + 1).toString());
58 void requestQueue.addRequest({
59 userData: { label: LABEL.PRODUCTS },
60 url: url.toString(),
61 });
62 offset += PER_PAGE;
63 }
64
65 $(`.js-result-list-item`).each((i, el) => {
66 const id = $(el).attr(`data-id`);
67 const url = $(el)
68 .find(`a.bem-product-thumb__image-link--grid`)
69 .attr(`href`);
70 const title = $(el)
71 .find(`a.bem-product-thumb__image-link--grid`)
72 .attr(`title`);
73 const priceRaw = $(el).find(`.bem-product-price__unit--grid`).text();
74 const price = priceRaw.match(/\$([\d.]+)/)?.[1];
75 const discountRaw = $(el).find(`.bem-product_price__discount`).text();
76
77 let discount, priceOrig;
78 if (discountRaw) {
79 discount = discountRaw.match(/(\d+)%$/)[1];
80 priceOrig = (price / (1 - discount / 100)).toFixed(2);
81 }
82 const img = $(el)
83 .find(`.js-result-list-image`)
84 .attr(`data-original`)
85
86 .replace(/^\/\//, `https://`)
87
88 .replace(/\.jpg?.+/, `.jpg`);
89
90 const inStock = null;
91 const product = {
92 itemId: id,
93 itemName: title,
94 itemUrl: url,
95 img: img,
96 inStock,
97 currentPrice: parseFloat(price),
98 originalPrice: priceOrig ? parseFloat(priceOrig) : null,
99 currency: `USD`,
100
101
102 discounted: !!(priceOrig && priceOrig !== price),
103 _discount: discount / 100 ?? null,
104 };
105 Apify.pushData(product);
106 });
107 }
108 },
109};
110
111Apify.main(async () => {
112 const input = await Apify.getInput();
113 const { debug = false, mode = MODE.FULL } = input ?? {};
114 if (debug) Apify.utils.log.setLevel(Apify.utils.log.LEVELS.DEBUG);
115
116 const requestQueue = await Apify.openRequestQueue();
117 await enqueueInitialRequest(mode, requestQueue);
118
119 const globalContext = { mode, requestQueue };
120 const crawler = new Apify.CheerioCrawler({
121 requestQueue,
122 maxConcurrency: debug ? 1 : 3,
123 maxRequestRetries: debug ? 0 : 3,
124 preNavigationHooks: [
125 async ({ request }) => {
126 if (request.userData.label === LABEL.PRODUCTS) {
127 const url = new URL(request.url);
128 url.searchParams.set(`ps`, PER_PAGE.toString());
129 request.url = url.toString();
130 }
131 },
132 ],
133 async handlePageFunction(context) {
134 await router[context.request.userData.label](context, globalContext);
135 },
136 async handleFailedRequestFunction({ request }) {
137 log.error(`Request ${request.url} failed multiple times`, request);
138 },
139 });
140
141 await crawler.run();
142 log.info(`crawler finished`);
143});