Wiggle (wiggle.com) scraper avatar
Wiggle (wiggle.com) scraper
Deprecated
View all Actors
This Actor is deprecated

This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?

See alternative Actors
Wiggle (wiggle.com) scraper

Wiggle (wiggle.com) scraper

strajk/wiggle-wiggle-com-scraper

Scrapes products titles, prices, images and availability. Does NOT scrape product details.

Dockerfile

1FROM apify/actor-node:16
2
3COPY package.json ./
4
5RUN npm --quiet set progress=false \
6  && npm install --only=prod --no-optional
7
8COPY . ./

INPUT_SCHEMA.json

1{
2  "title": "Wiggle (wiggle.com) scraper",
3  "description": "Scrapes products titles, prices, images and availability. Does NOT scrape product details.",
4  "type": "object",
5  "schemaVersion": 1,
6  "properties": {
7    "mode": {
8      "title": "Mode",
9      "description": "",
10      "type": "string",
11      "editor": "select",
12      "default": "TEST",
13      "prefill": "TEST",
14      "enumTitles": [
15        "TEST",
16        "FULL"
17      ],
18      "enum": [
19        "TEST",
20        "FULL"
21      ]
22    },
23    "debug": {
24      "title": "Debug",
25      "description": "Debug mode prints more logs, disables concurrency and other optimizations.",
26      "type": "boolean",
27      "editor": "checkbox",
28      "default": false
29    }
30  },
31  "required": [
32    "mode"
33  ]
34}

apify.json

1{
2  "name": "wiggle-wiggle-com-scraper",
3  "version": "0.1",
4  "buildTag": "latest",
5  "env": null,
6  "defaultRunOptions": {
7    "build": "latest",
8    "timeoutSecs": 3600,
9    "memoryMbytes": 1024
10  }
11}

main.js

1import { URL } from "node:url";
2import Apify from "apify";
3
4const { log } = Apify.utils;
5
6var LABEL;
7
8(function (LABEL) {
9  LABEL["INDEX"] = "INDEX";
10  LABEL["PRODUCTS"] = "PRODUCTS";
11})(LABEL || (LABEL = {}));
12var MODE;
13
14(function (MODE) {
15  MODE["TEST"] = "TEST";
16  MODE["FULL"] = "FULL";
17})(MODE || (MODE = {}));
18
19const BASE_URL = `https://www.wiggle.com`;
20const PER_PAGE = 96;
21
22async function enqueueInitialRequest(mode, requestQueue) {
23  if (mode === MODE.FULL) {
24    await requestQueue.addRequest({
25      userData: { label: LABEL.INDEX },
26      url: `${BASE_URL}/all-brands`,
27    });
28  } else if (mode === MODE.TEST) {
29    await requestQueue.addRequest({
30      userData: { label: LABEL.PRODUCTS },
31      url: `${BASE_URL}/poc`,
32    });
33  }
34}
35
36const router = {
37  [LABEL.INDEX]: async ({ $ }, { requestQueue }) => {
38    $(`h2:contains("All  brands") + .brandgroup .branditem a:first-child`).each(
39      (i, el) => {
40        const relUrl = $(el).attr(`href`); // urls are relative, starting with /
41        const url = `${BASE_URL}${relUrl}`;
42        void requestQueue.addRequest({
43          userData: { label: LABEL.PRODUCTS },
44          url,
45        });
46      }
47    );
48  },
49  [LABEL.PRODUCTS]: async ({ $, request }, { requestQueue }) => {
50    if (!request.url.includes(`?g=`)) {
51      // on first page
52      const totalItemsText = $(`#listing-page-header-title-row span`).text(); // e.g. `(856)`
53      const totalItems = Number(totalItemsText.replace(/[^0-9]/g, ``));
54      let offset = PER_PAGE; // initially 96
55      while (offset < totalItems) {
56        const url = new URL(request.url);
57        url.searchParams.set(`g`, (offset + 1).toString()); // toString() to make TS happy
58        void requestQueue.addRequest({
59          userData: { label: LABEL.PRODUCTS },
60          url: url.toString(),
61        });
62        offset += PER_PAGE;
63      }
64
65      $(`.js-result-list-item`).each((i, el) => {
66        const id = $(el).attr(`data-id`);
67        const url = $(el)
68          .find(`a.bem-product-thumb__image-link--grid`)
69          .attr(`href`);
70        const title = $(el)
71          .find(`a.bem-product-thumb__image-link--grid`)
72          .attr(`title`);
73        const priceRaw = $(el).find(`.bem-product-price__unit--grid`).text(); // e.g. €101.27 or "€28.47 - €69.90"
74        const price = priceRaw.match(/\$([\d.]+)/)?.[1]; // TODO: Support both $ and €
75        const discountRaw = $(el).find(`.bem-product_price__discount`).text(); // Save 30% - 45%
76
77        let discount, priceOrig;
78        if (discountRaw) {
79          discount = discountRaw.match(/(\d+)%$/)[1]; // 45
80          priceOrig = (price / (1 - discount / 100)).toFixed(2);
81        }
82        const img = $(el)
83          .find(`.js-result-list-image`)
84          .attr(`data-original`) // //www.wigglestatic.com/product-media/5360088903/1000-Mile-Women-s-Ultimate-Approach-Socks-Hiking-Socks-Navy-1998NLS-2.jpg?w=200&h=200&a=7
85          // leading "//" -> https://
86          .replace(/^\/\//, `https://`)
87          // remove part after ?
88          .replace(/\.jpg?.+/, `.jpg`);
89
90        const inStock = null;
91        const product = {
92          itemId: id,
93          itemName: title,
94          itemUrl: url,
95          img: img,
96          inStock,
97          currentPrice: parseFloat(price),
98          originalPrice: priceOrig ? parseFloat(priceOrig) : null,
99          currency: `USD`,
100
101          // Derived
102          discounted: !!(priceOrig && priceOrig !== price),
103          _discount: discount / 100 ?? null,
104        };
105        Apify.pushData(product);
106      });
107    }
108  },
109};
110
111Apify.main(async () => {
112  const input = await Apify.getInput();
113  const { debug = false, mode = MODE.FULL } = input ?? {};
114  if (debug) Apify.utils.log.setLevel(Apify.utils.log.LEVELS.DEBUG);
115
116  const requestQueue = await Apify.openRequestQueue();
117  await enqueueInitialRequest(mode, requestQueue);
118
119  const globalContext = { mode, requestQueue };
120  const crawler = new Apify.CheerioCrawler({
121    requestQueue,
122    maxConcurrency: debug ? 1 : 3,
123    maxRequestRetries: debug ? 0 : 3,
124    preNavigationHooks: [
125      async ({ request }) => {
126        if (request.userData.label === LABEL.PRODUCTS) {
127          const url = new URL(request.url);
128          url.searchParams.set(`ps`, PER_PAGE.toString()); // toString() to make TS happy
129          request.url = url.toString();
130        }
131      },
132    ],
133    async handlePageFunction(context) {
134      await router[context.request.userData.label](context, globalContext);
135    },
136    async handleFailedRequestFunction({ request }) {
137      log.error(`Request ${request.url} failed multiple times`, request);
138    },
139  });
140
141  await crawler.run();
142  log.info(`crawler finished`);
143});

package.json

1{
2  "name": "wiggle-wiggle-com-scraper",
3  "description": "Scrapes products titles, prices, images and availability. Does NOT scrape product details.",
4  "type": "module",
5  "scripts": {
6    "start": "node ./main.js",
7    "push-to-apify-platform": "npx apify push"
8  },
9  "dependencies": {
10    "apify": "*"
11  },
12  "apify": {
13    "title": "Wiggle (wiggle.com) scraper",
14    "description": "Scrapes products titles, prices, images and availability. Does NOT scrape product details.",
15    "isPublic": true,
16    "isDeprecated": false,
17    "isAnonymouslyRunnable": true,
18    "notice": "",
19    "pictureUrl": "",
20    "seoTitle": "",
21    "seoDescription": "",
22    "categories": [
23      "ECOMMERCE"
24    ]
25  }
26}

.actor/actor.json

1{
2  "actorSpecification": 1,
3  "name": "wiggle-wiggle-com-scraper",
4  "title": "Wiggle (wiggle.com) scraper",
5  "description": "Scrapes products titles, prices, images and availability. Does NOT scrape product details.",
6  "version": "0.1.0",
7  "storages": {
8    "dataset": {
9      "actorSpecification": 1,
10      "title": "Wiggle (wiggle.com) scraper",
11      "description": "Scrapes products titles, prices, images and availability. Does NOT scrape product details.",
12      "views": {
13        "overview": {
14          "title": "Overview",
15          "description": "Overview of the most important fields",
16          "transformation": {
17            "fields": [
18              "itemId",
19              "itemName",
20              "itemUrl",
21              "img",
22              "inStock",
23              "currentPrice",
24              "originalPrice",
25              "currency"
26            ]
27          },
28          "display": {
29            "component": "table",
30            "columns": [
31              {
32                "label": "Item ID",
33                "field": "itemUrl",
34                "format": "link",
35                "textField": "itemId"
36              },
37              {
38                "label": "Item Name",
39                "field": "itemName",
40                "format": "text"
41              },
42              {
43                "label": "Img",
44                "field": "img",
45                "format": "image"
46              },
47              {
48                "label": "In Stock",
49                "field": "inStock",
50                "format": "boolean"
51              },
52              {
53                "label": "Current Price",
54                "field": "currentPrice",
55                "format": "number"
56              },
57              {
58                "label": "Original Price",
59                "field": "originalPrice",
60                "format": "number"
61              },
62              {
63                "label": "Currency",
64                "field": "currency",
65                "format": "text"
66              }
67            ]
68          }
69        }
70      }
71    }
72  }
73}

.actor/logo.png

Developer
Maintained by Community
Categories