Wiggle (wiggle.com) scraper
DeprecatedView all Actors
This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?
See alternative ActorsWiggle (wiggle.com) scraper
strajk/wiggle-wiggle-com-scraper
Scrapes products titles, prices, images and availability. Does NOT scrape product details.
Dockerfile
1FROM apify/actor-node:16
2
3COPY package.json ./
4
5RUN npm --quiet set progress=false \
6 && npm install --only=prod --no-optional
7
8COPY . ./
INPUT_SCHEMA.json
1{
2 "title": "Wiggle (wiggle.com) scraper",
3 "description": "Scrapes products titles, prices, images and availability. Does NOT scrape product details.",
4 "type": "object",
5 "schemaVersion": 1,
6 "properties": {
7 "mode": {
8 "title": "Mode",
9 "description": "",
10 "type": "string",
11 "editor": "select",
12 "default": "TEST",
13 "prefill": "TEST",
14 "enumTitles": [
15 "TEST",
16 "FULL"
17 ],
18 "enum": [
19 "TEST",
20 "FULL"
21 ]
22 },
23 "debug": {
24 "title": "Debug",
25 "description": "Debug mode prints more logs, disables concurrency and other optimizations.",
26 "type": "boolean",
27 "editor": "checkbox",
28 "default": false
29 }
30 },
31 "required": [
32 "mode"
33 ]
34}
apify.json
1{
2 "name": "wiggle-wiggle-com-scraper",
3 "version": "0.1",
4 "buildTag": "latest",
5 "env": null,
6 "defaultRunOptions": {
7 "build": "latest",
8 "timeoutSecs": 3600,
9 "memoryMbytes": 1024
10 }
11}
main.js
1import { URL } from "node:url";
2import Apify from "apify";
3
4const { log } = Apify.utils;
5
6var LABEL;
7
8(function (LABEL) {
9 LABEL["INDEX"] = "INDEX";
10 LABEL["PRODUCTS"] = "PRODUCTS";
11})(LABEL || (LABEL = {}));
12var MODE;
13
14(function (MODE) {
15 MODE["TEST"] = "TEST";
16 MODE["FULL"] = "FULL";
17})(MODE || (MODE = {}));
18
19const BASE_URL = `https://www.wiggle.com`;
20const PER_PAGE = 96;
21
22async function enqueueInitialRequest(mode, requestQueue) {
23 if (mode === MODE.FULL) {
24 await requestQueue.addRequest({
25 userData: { label: LABEL.INDEX },
26 url: `${BASE_URL}/all-brands`,
27 });
28 } else if (mode === MODE.TEST) {
29 await requestQueue.addRequest({
30 userData: { label: LABEL.PRODUCTS },
31 url: `${BASE_URL}/poc`,
32 });
33 }
34}
35
36const router = {
37 [LABEL.INDEX]: async ({ $ }, { requestQueue }) => {
38 $(`h2:contains("All brands") + .brandgroup .branditem a:first-child`).each(
39 (i, el) => {
40 const relUrl = $(el).attr(`href`); // urls are relative, starting with /
41 const url = `${BASE_URL}${relUrl}`;
42 void requestQueue.addRequest({
43 userData: { label: LABEL.PRODUCTS },
44 url,
45 });
46 }
47 );
48 },
49 [LABEL.PRODUCTS]: async ({ $, request }, { requestQueue }) => {
50 if (!request.url.includes(`?g=`)) {
51 // on first page
52 const totalItemsText = $(`#listing-page-header-title-row span`).text(); // e.g. `(856)`
53 const totalItems = Number(totalItemsText.replace(/[^0-9]/g, ``));
54 let offset = PER_PAGE; // initially 96
55 while (offset < totalItems) {
56 const url = new URL(request.url);
57 url.searchParams.set(`g`, (offset + 1).toString()); // toString() to make TS happy
58 void requestQueue.addRequest({
59 userData: { label: LABEL.PRODUCTS },
60 url: url.toString(),
61 });
62 offset += PER_PAGE;
63 }
64
65 $(`.js-result-list-item`).each((i, el) => {
66 const id = $(el).attr(`data-id`);
67 const url = $(el)
68 .find(`a.bem-product-thumb__image-link--grid`)
69 .attr(`href`);
70 const title = $(el)
71 .find(`a.bem-product-thumb__image-link--grid`)
72 .attr(`title`);
73 const priceRaw = $(el).find(`.bem-product-price__unit--grid`).text(); // e.g. €101.27 or "€28.47 - €69.90"
74 const price = priceRaw.match(/\$([\d.]+)/)?.[1]; // TODO: Support both $ and €
75 const discountRaw = $(el).find(`.bem-product_price__discount`).text(); // Save 30% - 45%
76
77 let discount, priceOrig;
78 if (discountRaw) {
79 discount = discountRaw.match(/(\d+)%$/)[1]; // 45
80 priceOrig = (price / (1 - discount / 100)).toFixed(2);
81 }
82 const img = $(el)
83 .find(`.js-result-list-image`)
84 .attr(`data-original`) // //www.wigglestatic.com/product-media/5360088903/1000-Mile-Women-s-Ultimate-Approach-Socks-Hiking-Socks-Navy-1998NLS-2.jpg?w=200&h=200&a=7
85 // leading "//" -> https://
86 .replace(/^\/\//, `https://`)
87 // remove part after ?
88 .replace(/\.jpg?.+/, `.jpg`);
89
90 const inStock = null;
91 const product = {
92 itemId: id,
93 itemName: title,
94 itemUrl: url,
95 img: img,
96 inStock,
97 currentPrice: parseFloat(price),
98 originalPrice: priceOrig ? parseFloat(priceOrig) : null,
99 currency: `USD`,
100
101 // Derived
102 discounted: !!(priceOrig && priceOrig !== price),
103 _discount: discount / 100 ?? null,
104 };
105 Apify.pushData(product);
106 });
107 }
108 },
109};
110
111Apify.main(async () => {
112 const input = await Apify.getInput();
113 const { debug = false, mode = MODE.FULL } = input ?? {};
114 if (debug) Apify.utils.log.setLevel(Apify.utils.log.LEVELS.DEBUG);
115
116 const requestQueue = await Apify.openRequestQueue();
117 await enqueueInitialRequest(mode, requestQueue);
118
119 const globalContext = { mode, requestQueue };
120 const crawler = new Apify.CheerioCrawler({
121 requestQueue,
122 maxConcurrency: debug ? 1 : 3,
123 maxRequestRetries: debug ? 0 : 3,
124 preNavigationHooks: [
125 async ({ request }) => {
126 if (request.userData.label === LABEL.PRODUCTS) {
127 const url = new URL(request.url);
128 url.searchParams.set(`ps`, PER_PAGE.toString()); // toString() to make TS happy
129 request.url = url.toString();
130 }
131 },
132 ],
133 async handlePageFunction(context) {
134 await router[context.request.userData.label](context, globalContext);
135 },
136 async handleFailedRequestFunction({ request }) {
137 log.error(`Request ${request.url} failed multiple times`, request);
138 },
139 });
140
141 await crawler.run();
142 log.info(`crawler finished`);
143});
package.json
1{
2 "name": "wiggle-wiggle-com-scraper",
3 "description": "Scrapes products titles, prices, images and availability. Does NOT scrape product details.",
4 "type": "module",
5 "scripts": {
6 "start": "node ./main.js",
7 "push-to-apify-platform": "npx apify push"
8 },
9 "dependencies": {
10 "apify": "*"
11 },
12 "apify": {
13 "title": "Wiggle (wiggle.com) scraper",
14 "description": "Scrapes products titles, prices, images and availability. Does NOT scrape product details.",
15 "isPublic": true,
16 "isDeprecated": false,
17 "isAnonymouslyRunnable": true,
18 "notice": "",
19 "pictureUrl": "",
20 "seoTitle": "",
21 "seoDescription": "",
22 "categories": [
23 "ECOMMERCE"
24 ]
25 }
26}
.actor/actor.json
1{
2 "actorSpecification": 1,
3 "name": "wiggle-wiggle-com-scraper",
4 "title": "Wiggle (wiggle.com) scraper",
5 "description": "Scrapes products titles, prices, images and availability. Does NOT scrape product details.",
6 "version": "0.1.0",
7 "storages": {
8 "dataset": {
9 "actorSpecification": 1,
10 "title": "Wiggle (wiggle.com) scraper",
11 "description": "Scrapes products titles, prices, images and availability. Does NOT scrape product details.",
12 "views": {
13 "overview": {
14 "title": "Overview",
15 "description": "Overview of the most important fields",
16 "transformation": {
17 "fields": [
18 "itemId",
19 "itemName",
20 "itemUrl",
21 "img",
22 "inStock",
23 "currentPrice",
24 "originalPrice",
25 "currency"
26 ]
27 },
28 "display": {
29 "component": "table",
30 "columns": [
31 {
32 "label": "Item ID",
33 "field": "itemUrl",
34 "format": "link",
35 "textField": "itemId"
36 },
37 {
38 "label": "Item Name",
39 "field": "itemName",
40 "format": "text"
41 },
42 {
43 "label": "Img",
44 "field": "img",
45 "format": "image"
46 },
47 {
48 "label": "In Stock",
49 "field": "inStock",
50 "format": "boolean"
51 },
52 {
53 "label": "Current Price",
54 "field": "currentPrice",
55 "format": "number"
56 },
57 {
58 "label": "Original Price",
59 "field": "originalPrice",
60 "format": "number"
61 },
62 {
63 "label": "Currency",
64 "field": "currency",
65 "format": "text"
66 }
67 ]
68 }
69 }
70 }
71 }
72 }
73}
.actor/logo.png
Developer
Maintained by Community
Categories