Actor picture

Wiggle (wiggle.com) scraper

strajk/wiggle-wiggle-com-scraper

Scrapes products titles, prices, images and availability. Does NOT scrape product details.

No credit card required

Author's avatarPavel Dolecek
  • Modified
  • Users3
  • Runs167
Actor picture
Wiggle (wiggle.com) scraper

Dockerfile

FROM apify/actor-node:16

COPY package.json ./

RUN npm --quiet set progress=false \
  && npm install --only=prod --no-optional

COPY . ./

INPUT_SCHEMA.json

{
  "title": "Wiggle (wiggle.com) scraper",
  "description": "Scrapes products titles, prices, images and availability. Does NOT scrape product details.",
  "type": "object",
  "schemaVersion": 1,
  "properties": {
    "mode": {
      "title": "Mode",
      "description": "",
      "type": "string",
      "editor": "select",
      "default": "TEST",
      "prefill": "TEST",
      "enumTitles": [
        "TEST",
        "FULL"
      ],
      "enum": [
        "TEST",
        "FULL"
      ]
    },
    "debug": {
      "title": "Debug",
      "description": "Debug mode prints more logs, disables concurrency and other optimizations.",
      "type": "boolean",
      "editor": "checkbox",
      "default": false
    }
  },
  "required": [
    "mode"
  ]
}

README.md

# Wiggle (wiggle.com) scraper

## Beware  
 Original price is calculated from current price and discount percentage, so it might be inaccurate. Original price is not present on the listing page.  
 Only works with USD currency at the moment.  
 Price and availability differs a lot on specific product variant – that is not handled by the scraper at the moment. Lowest price, and highest discount are scraped.

## Output example

* **itemId** `string`
* **itemName** `string`
* **itemUrl** `string`
* **img** `string`
* **inStock** `boolean`
* **currentPrice** `number`
* **originalPrice** `number`
* **currency** `string`

apify.json

{
  "name": "wiggle-wiggle-com-scraper",
  "version": "0.1",
  "buildTag": "latest",
  "env": null,
  "defaultRunOptions": {
    "build": "latest",
    "timeoutSecs": 3600,
    "memoryMbytes": 1024
  }
}

main.js

This file is 143 lines long. Only the first 50 are shown. Show all

import { URL } from "node:url";
import Apify from "apify";

const { log } = Apify.utils;

var LABEL;

(function (LABEL) {
  LABEL["INDEX"] = "INDEX";
  LABEL["PRODUCTS"] = "PRODUCTS";
})(LABEL || (LABEL = {}));
var MODE;

(function (MODE) {
  MODE["TEST"] = "TEST";
  MODE["FULL"] = "FULL";
})(MODE || (MODE = {}));

const BASE_URL = `https://www.wiggle.com`;
const PER_PAGE = 96;

async function enqueueInitialRequest(mode, requestQueue) {
  if (mode === MODE.FULL) {
    await requestQueue.addRequest({
      userData: { label: LABEL.INDEX },
      url: `${BASE_URL}/all-brands`,
    });
  } else if (mode === MODE.TEST) {
    await requestQueue.addRequest({
      userData: { label: LABEL.PRODUCTS },
      url: `${BASE_URL}/poc`,
    });
  }
}

const router = {
  [LABEL.INDEX]: async ({ $ }, { requestQueue }) => {
    $(`h2:contains("All  brands") + .brandgroup .branditem a:first-child`).each(
      (i, el) => {
        const relUrl = $(el).attr(`href`); // urls are relative, starting with /
        const url = `${BASE_URL}${relUrl}`;
        void requestQueue.addRequest({
          userData: { label: LABEL.PRODUCTS },
          url,
        });
      }
    );
  },
  [LABEL.PRODUCTS]: async ({ $, request }, { requestQueue }) => {
    if (!request.url.includes(`?g=`)) {

package.json

{
  "name": "wiggle-wiggle-com-scraper",
  "description": "Scrapes products titles, prices, images and availability. Does NOT scrape product details.",
  "type": "module",
  "scripts": {
    "start": "node ./main.js",
    "push-to-apify-platform": "npx apify push"
  },
  "dependencies": {
    "apify": "*"
  },
  "apify": {
    "title": "Wiggle (wiggle.com) scraper",
    "description": "Scrapes products titles, prices, images and availability. Does NOT scrape product details.",
    "isPublic": true,
    "isDeprecated": false,
    "isAnonymouslyRunnable": true,
    "notice": "",
    "pictureUrl": "",
    "seoTitle": "",
    "seoDescription": "",
    "categories": [
      "ECOMMERCE"
    ]
  }
}

.actor/actor.json

{
  "actorSpecification": 1,
  "name": "wiggle-wiggle-com-scraper",
  "title": "Wiggle (wiggle.com) scraper",
  "description": "Scrapes products titles, prices, images and availability. Does NOT scrape product details.",
  "version": "0.1.0",
  "storages": {
    "dataset": {
      "actorSpecification": 1,
      "title": "Wiggle (wiggle.com) scraper",
      "description": "Scrapes products titles, prices, images and availability. Does NOT scrape product details.",
      "views": {
        "overview": {
          "title": "Overview",
          "description": "Overview of the most important fields",
          "transformation": {
            "fields": [
              "itemId",
              "itemName",
              "itemUrl",
              "img",
              "inStock",
              "currentPrice",
              "originalPrice",
              "currency"
            ]
          },
          "display": {
            "component": "table",
            "columns": [
              {
                "label": "Item ID",
                "field": "itemUrl",
                "format": "link",
                "textField": "itemId"
              },
              {
                "label": "Item Name",
                "field": "itemName",
                "format": "text"
              },
              {
                "label": "Img",
                "field": "img",
                "format": "image"
              },
              {
                "label": "In Stock",
                "field": "inStock",
                "format": "boolean"
              },
              {
                "label": "Current Price",
                "field": "currentPrice",
                "format": "number"
              },
              {
                "label": "Original Price",
                "field": "originalPrice",
                "format": "number"
              },
              {
                "label": "Currency",
                "field": "currency",
                "format": "text"
              }
            ]
          }
        }
      }
    }
  }
}

.actor/logo.png