
Mironet Scraper
Deprecated
Pricing
Pay per usage
Go to Store

Mironet Scraper
Deprecated
Scrapes all Mironet.cz products.
0.0 (0)
Pricing
Pay per usage
2
Total users
13
Monthly users
1
Last modified
2 years ago
Dockerfile
# This is a template for a Dockerfile used to run acts in Actor system.# The base image name below is set during the act build, based on user settings.# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/userFROM apify/actor-node-chrome:v0.21.10
# Second, copy just package.json and package-lock.json since it should be# the only file that affects "npm install" in the next step, to speed up the buildCOPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --only=prod --no-optional \ && echo "Installed NPM packages:" \ && (npm list --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version
# Copy source code to container# Do this in the last step, to have fast build if only the source code changedCOPY . ./
# NOTE: The CMD is already defined by the base image.# Uncomment this for local node inspector debugging:# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]
package.json
{ "name": "apify-project", "version": "0.0.1", "description": "", "author": "It's not you it's me", "license": "ISC", "dependencies": { "apify": "0.21.10" }, "scripts": { "start": "node main.js" }}
main.js
1const Apify = require('apify');2
3/**4 * Gets attribute as text from a ElementHandle.5 * @param {ElementHandle} element - The element to get attribute from.6 * @param {string} attr - Name of the attribute to get.7 */8async function getAttribute(element, attr){9 try{10 const prop = await element.getProperty(attr);11 return (await prop.jsonValue()).trim();12 }13 catch(e){return null;}14}15
16/** Main function */17Apify.main(async () => {18 19 // Open request queue and add statrUrl20 const requestQueue = await Apify.openRequestQueue();21 await requestQueue.addRequest(new Apify.Request({ 22 userData: {label: 'start'}, 23 url: 'https://www.mironet.cz/' 24 }));25 26 // Disable all non-essential requests27 const gotoFunction = async ({ page, request }) => {28 await page.setRequestInterception(true);29 page.on('request', intercepted => {30 const type = intercepted.resourceType();31 if(type === 'image' || type === 'stylesheet'){intercepted.abort();}32 else{intercepted.continue();}33 });34 await Apify.utils.puppeteer.hideWebDriver(page);35 return await page.goto(request.url, {timeout: 200000});36 };37
38 // Handle page context39 const handlePageFunction = async ({ page, request }) => {40 41 // This is the start page42 if(request.userData.label === 'start'){43 44 // Enqueue category links45 await Apify.utils.puppeteer.enqueueLinks({46 page: page, 47 requestQueue: requestQueue, 48 selector: '.nadpis a', 49 pseudoUrls: null, 50 userData: {label: 'page'}51 });52 }53 54 // This is the category page55 else if(request.userData.label === 'page'){56 57 // Enqueue pagination pages58 await Apify.utils.puppeteer.enqueueLinks({59 page: page, 60 requestQueue: requestQueue, 61 selector: 'a.PageNew', 62 pseudoUrls: null, 63 userData: {label: 'page'}64 });65 66 // Iterate all products and extract data67 const items = await page.$$('.item_b');68 for(const item of items){69 const toNumber = p => p.replace(/\s/g, '').match(/\d+/)[0];70 try{71 const idElem = await item.$('.item_kod');72 const linkElem = await item.$('.nazev a');73 const priceElem = await item.$('.item_cena');74 const imgElem = await item.$('.item_obr img');75 const oPriceElem = await item.$('.item_s_cena span');76 const img = await getAttribute(imgElem, 'src');77 const link = await getAttribute(linkElem, 'href');78 const id = await getAttribute(idElem, 'textContent');79 const name = await getAttribute(linkElem, 'textContent');80 const price = await getAttribute(priceElem, 'textContent');81 const dataItem = {82 img: img,83 itemId: id.match(/\d+/)[0],84 itemUrl: link,85 itemName: name,86 discounted: oPriceElem ? true : false,87 currentPrice: price ? toNumber(price) : null88 };89 if(oPriceElem){90 const oPrice = await getAttribute(oPriceElem, 'textContent');91 dataItem.originalPrice = toNumber(oPrice);92 }93 94 // Save data to dataset95 await Apify.pushData(dataItem);96 }97 catch(e){console.log(e);}98 }99 }100 };101 102 // Create crawler103 const crawler = new Apify.PuppeteerCrawler({104 requestQueue,105
106 // Use proxy107 launchPuppeteerOptions: {108 useApifyProxy: true109 },110
111 gotoFunction,112
113 handlePageFunction,114
115 // If request failed 4 times then this function is executed116 handleFailedRequestFunction: async ({ request }) => {117 console.log(`Request ${request.url} failed 4 times`);118 },119 });120
121 // Run crawler122 await crawler.run();123});