Wiggle (wiggle.com) scraper avatar
Wiggle (wiggle.com) scraper

Deprecated

Pricing

Pay per usage

Go to Store
Wiggle (wiggle.com) scraper

Wiggle (wiggle.com) scraper

Deprecated

Developed by

Pavel Dolecek

Maintained by Community

Scrapes products titles, prices, images and availability. Does NOT scrape product details.

0.0 (0)

Pricing

Pay per usage

1

Monthly users

1

Last modified

3 years ago

Dockerfile

1FROM apify/actor-node:16
2
3COPY package.json ./
4
5RUN npm --quiet set progress=false \
6  && npm install --only=prod --no-optional
7
8COPY . ./

INPUT_SCHEMA.json

1{
2  "title": "Wiggle (wiggle.com) scraper",
3  "description": "Scrapes products titles, prices, images and availability. Does NOT scrape product details.",
4  "type": "object",
5  "schemaVersion": 1,
6  "properties": {
7    "mode": {
8      "title": "Mode",
9      "description": "",
10      "type": "string",
11      "editor": "select",
12      "default": "TEST",
13      "prefill": "TEST",
14      "enumTitles": [
15        "TEST",
16        "FULL"
17      ],
18      "enum": [
19        "TEST",
20        "FULL"
21      ]
22    },
23    "debug": {
24      "title": "Debug",
25      "description": "Debug mode prints more logs, disables concurrency and other optimizations.",
26      "type": "boolean",
27      "editor": "checkbox",
28      "default": false
29    }
30  },
31  "required": [
32    "mode"
33  ]
34}

apify.json

1{
2  "name": "wiggle-wiggle-com-scraper",
3  "version": "0.1",
4  "buildTag": "latest",
5  "env": null,
6  "defaultRunOptions": {
7    "build": "latest",
8    "timeoutSecs": 3600,
9    "memoryMbytes": 1024
10  }
11}

main.js

1import { URL } from "node:url";
2import Apify from "apify";
3
4const { log } = Apify.utils;
5
6var LABEL;
7
8(function (LABEL) {
9  LABEL["INDEX"] = "INDEX";
10  LABEL["PRODUCTS"] = "PRODUCTS";
11})(LABEL || (LABEL = {}));
12var MODE;
13
14(function (MODE) {
15  MODE["TEST"] = "TEST";
16  MODE["FULL"] = "FULL";
17})(MODE || (MODE = {}));
18
19const BASE_URL = `https://www.wiggle.com`;
20const PER_PAGE = 96;
21
22async function enqueueInitialRequest(mode, requestQueue) {
23  if (mode === MODE.FULL) {
24    await requestQueue.addRequest({
25      userData: { label: LABEL.INDEX },
26      url: `${BASE_URL}/all-brands`,
27    });
28  } else if (mode === MODE.TEST) {
29    await requestQueue.addRequest({
30      userData: { label: LABEL.PRODUCTS },
31      url: `${BASE_URL}/poc`,
32    });
33  }
34}
35
36const router = {
37  [LABEL.INDEX]: async ({ $ }, { requestQueue }) => {
38    $(`h2:contains("All  brands") + .brandgroup .branditem a:first-child`).each(
39      (i, el) => {
40        const relUrl = $(el).attr(`href`); // urls are relative, starting with /
41        const url = `${BASE_URL}${relUrl}`;
42        void requestQueue.addRequest({
43          userData: { label: LABEL.PRODUCTS },
44          url,
45        });
46      }
47    );
48  },
49  [LABEL.PRODUCTS]: async ({ $, request }, { requestQueue }) => {
50    if (!request.url.includes(`?g=`)) {
51      // on first page
52      const totalItemsText = $(`#listing-page-header-title-row span`).text(); // e.g. `(856)`
53      const totalItems = Number(totalItemsText.replace(/[^0-9]/g, ``));
54      let offset = PER_PAGE; // initially 96
55      while (offset < totalItems) {
56        const url = new URL(request.url);
57        url.searchParams.set(`g`, (offset + 1).toString()); // toString() to make TS happy
58        void requestQueue.addRequest({
59          userData: { label: LABEL.PRODUCTS },
60          url: url.toString(),
61        });
62        offset += PER_PAGE;
63      }
64
65      $(`.js-result-list-item`).each((i, el) => {
66        const id = $(el).attr(`data-id`);
67        const url = $(el)
68          .find(`a.bem-product-thumb__image-link--grid`)
69          .attr(`href`);
70        const title = $(el)
71          .find(`a.bem-product-thumb__image-link--grid`)
72          .attr(`title`);
73        const priceRaw = $(el).find(`.bem-product-price__unit--grid`).text(); // e.g. €101.27 or "€28.47 - €69.90"
74        const price = priceRaw.match(/\$([\d.]+)/)?.[1]; // TODO: Support both $ and €
75        const discountRaw = $(el).find(`.bem-product_price__discount`).text(); // Save 30% - 45%
76
77        let discount, priceOrig;
78        if (discountRaw) {
79          discount = discountRaw.match(/(\d+)%$/)[1]; // 45
80          priceOrig = (price / (1 - discount / 100)).toFixed(2);
81        }
82        const img = $(el)
83          .find(`.js-result-list-image`)
84          .attr(`data-original`) // //www.wigglestatic.com/product-media/5360088903/1000-Mile-Women-s-Ultimate-Approach-Socks-Hiking-Socks-Navy-1998NLS-2.jpg?w=200&h=200&a=7
85          // leading "//" -> https://
86          .replace(/^\/\//, `https://`)
87          // remove part after ?
88          .replace(/\.jpg?.+/, `.jpg`);
89
90        const inStock = null;
91        const product = {
92          itemId: id,
93          itemName: title,
94          itemUrl: url,
95          img: img,
96          inStock,
97          currentPrice: parseFloat(price),
98          originalPrice: priceOrig ? parseFloat(priceOrig) : null,
99          currency: `USD`,
100
101          // Derived
102          discounted: !!(priceOrig && priceOrig !== price),
103          _discount: discount / 100 ?? null,
104        };
105        Apify.pushData(product);
106      });
107    }
108  },
109};
110
111Apify.main(async () => {
112  const input = await Apify.getInput();
113  const { debug = false, mode = MODE.FULL } = input ?? {};
114  if (debug) Apify.utils.log.setLevel(Apify.utils.log.LEVELS.DEBUG);
115
116  const requestQueue = await Apify.openRequestQueue();
117  await enqueueInitialRequest(mode, requestQueue);
118
119  const globalContext = { mode, requestQueue };
120  const crawler = new Apify.CheerioCrawler({
121    requestQueue,
122    maxConcurrency: debug ? 1 : 3,
123    maxRequestRetries: debug ? 0 : 3,
124    preNavigationHooks: [
125      async ({ request }) => {
126        if (request.userData.label === LABEL.PRODUCTS) {
127          const url = new URL(request.url);
128          url.searchParams.set(`ps`, PER_PAGE.toString()); // toString() to make TS happy
129          request.url = url.toString();
130        }
131      },
132    ],
133    async handlePageFunction(context) {
134      await router[context.request.userData.label](context, globalContext);
135    },
136    async handleFailedRequestFunction({ request }) {
137      log.error(`Request ${request.url} failed multiple times`, request);
138    },
139  });
140
141  await crawler.run();
142  log.info(`crawler finished`);
143});

package.json

1{
2  "name": "wiggle-wiggle-com-scraper",
3  "description": "Scrapes products titles, prices, images and availability. Does NOT scrape product details.",
4  "type": "module",
5  "scripts": {
6    "start": "node ./main.js",
7    "push-to-apify-platform": "npx apify push"
8  },
9  "dependencies": {
10    "apify": "*"
11  },
12  "apify": {
13    "title": "Wiggle (wiggle.com) scraper",
14    "description": "Scrapes products titles, prices, images and availability. Does NOT scrape product details.",
15    "isPublic": true,
16    "isDeprecated": false,
17    "isAnonymouslyRunnable": true,
18    "notice": "",
19    "pictureUrl": "",
20    "seoTitle": "",
21    "seoDescription": "",
22    "categories": [
23      "ECOMMERCE"
24    ]
25  }
26}

.actor/actor.json

1{
2  "actorSpecification": 1,
3  "name": "wiggle-wiggle-com-scraper",
4  "title": "Wiggle (wiggle.com) scraper",
5  "description": "Scrapes products titles, prices, images and availability. Does NOT scrape product details.",
6  "version": "0.1.0",
7  "storages": {
8    "dataset": {
9      "actorSpecification": 1,
10      "title": "Wiggle (wiggle.com) scraper",
11      "description": "Scrapes products titles, prices, images and availability. Does NOT scrape product details.",
12      "views": {
13        "overview": {
14          "title": "Overview",
15          "description": "Overview of the most important fields",
16          "transformation": {
17            "fields": [
18              "itemId",
19              "itemName",
20              "itemUrl",
21              "img",
22              "inStock",
23              "currentPrice",
24              "originalPrice",
25              "currency"
26            ]
27          },
28          "display": {
29            "component": "table",
30            "columns": [
31              {
32                "label": "Item ID",
33                "field": "itemUrl",
34                "format": "link",
35                "textField": "itemId"
36              },
37              {
38                "label": "Item Name",
39                "field": "itemName",
40                "format": "text"
41              },
42              {
43                "label": "Img",
44                "field": "img",
45                "format": "image"
46              },
47              {
48                "label": "In Stock",
49                "field": "inStock",
50                "format": "boolean"
51              },
52              {
53                "label": "Current Price",
54                "field": "currentPrice",
55                "format": "number"
56              },
57              {
58                "label": "Original Price",
59                "field": "originalPrice",
60                "format": "number"
61              },
62              {
63                "label": "Currency",
64                "field": "currency",
65                "format": "text"
66              }
67            ]
68          }
69        }
70      }
71    }
72  }
73}

.actor/logo.png

Pricing

Pricing model

Pay per usage

This Actor is paid per platform usage. The Actor is free to use, and you only pay for the Apify platform usage.