1mg Product Scraper
Under maintenance
Pricing
$0.10 / 1,000 results
Go to Store
1mg Product Scraper
Under maintenance
It is a scrapper tool to scrap from website
0.0 (0)
Pricing
$0.10 / 1,000 results
0
Total users
1
Monthly users
1
Last modified
17 days ago
.actor/actor.json
{ "name": "1mg-product-scraper", "title": "1mg Product Scraper", "description": "Scrapes medicine details from 1mg.com using Playwright", "version": "0.0", "buildTag": "latest", "actorSpecification": 1, "dockerfile": "./Dockerfile", "env": { "NODE_ENV": "production" } }
.gitignore
storagenode_modules.venv
Dockerfile
FROM apify/actor-node-playwright:latest
COPY package.json ./COPY package-lock.json ./RUN npm install --omit=dev
COPY . ./
input_schema.json
{ "title": "1mg Scraper Input", "description": "List of 1mg product URLs to scrape", "type": "object", "schemaVersion": 1, "properties": { "startUrls": { "title": "Start URLs", "type": "array", "description": "Array of URLs to visit", "editor": "requestListSources" } }, "required": ["startUrls"]}
main.js
1import { Actor } from 'apify';2import { PlaywrightCrawler } from 'crawlee';3
4await Actor.init();5
6const input = await Actor.getInput();7const urls = input.startUrls || [];8
9const results = [];10
11const crawler = new PlaywrightCrawler({12 requestHandler: async ({ page, request, log }) => {13 log.info(`Scraping: ${request.url}`);14
15 try {16 await page.goto(request.url, { waitUntil: 'networkidle' });17 await page.waitForSelector('h1', { timeout: 10000 });18
19 await page.evaluate(async () => {20 for (let i = 0; i < document.body.scrollHeight; i += 300) {21 window.scrollTo(0, i);22 await new Promise(r => setTimeout(r, 200));23 }24 });25
26 await page.waitForTimeout(3000);27
28 const data = await page.evaluate(() => {29 const safeText = (selector) => {30 const el = document.querySelector(selector);31 return el?.innerText.trim() || 'N/A';32 };33
34 const name = safeText('#drug_header > div > div > div.DrugHeader__header-content___f6GbC > div > div.DrugHeader__left___19WY- > h1');35 if (!name || name.length < 3 || /[0-9]{1,2}%/.test(name)) {36 return { invalidPage: true, name: name || 'Invalid product title' };37 }38
39 const marketer = safeText('#drug_header > div > div > div.DrugHeader__lower-content___2CZFo > div.DrugHeader__left___19WY- > div:nth-child(2) > div:nth-child(1) > div.DrugHeader__meta-value___vqYM0 > a');40 const salt = safeText('#drug_header > div > div > div.DrugHeader__lower-content___2CZFo > div.DrugHeader__left___19WY- > div:nth-child(2) > div:nth-child(2) > div.saltInfo.DrugHeader__meta-value___vqYM0 > a') || safeText('#drug_header > div > div > div.DrugHeader__lower-content___2CZFo > div.DrugHeader__left___19WY- > div:nth-child(2) > div:nth-child(2) > div.saltInfo.DrugHeader__meta-value___vqYM0');41
42 // Accurate salt synonyms block by label lookup43 let synonyms = 'N/A';44 const blocks = document.querySelectorAll('.DrugHeader__meta-block___1LvyF');45 blocks.forEach(block => {46 const label = block.querySelector('.DrugHeader__meta-title___2Y8YJ')?.innerText.trim();47 if (label === 'Salt Synonyms') {48 const val = block.querySelector('.saltInfo')?.innerText.trim();49 if (val) synonyms = val;50 }51 });52
53 // Handle price/mrp from alternate layout too (sold out scenario)54 let price = safeText('span[class*="offer-price"]');55 let mrp = safeText('span[class*="stike"]');56 if (price === 'N/A') {57 price = safeText('div.PriceBox__price___3HxvT span');58 }59 if (mrp === 'N/A') {60 const mrpMatch = Array.from(document.querySelectorAll('span'))61 .find(el => el.className.includes('PriceBoxPlanOption__stike') && el.innerText.trim().startsWith('₹'));62 if (mrpMatch) mrp = mrpMatch.innerText.trim();63 }64
65 // Product images only (filtering promo banners)66 const allImgs = Array.from(document.querySelectorAll('#drug_header img'))67 .map(img => img.src)68 .filter(src => src.includes('/cropped/') || /\/([a-z0-9\-]+)\.jpg/i.test(src));69
70 return {71 name,72 marketer,73 salt,74 synonyms,75 price,76 mrp,77 imageUrls: allImgs,78 };79 });80
81 if (data.invalidPage) {82 log.warning(`⚠️ Possibly invalid product page at ${request.url}, captured name: ${data.name}`);83 results.push({ url: request.url, warning: 'Unusual product name', name: data.name });84 } else {85 data.url = request.url;86 results.push(data);87 }88
89 } catch (error) {90 log.error(`❌ Failed to scrape ${request.url}: ${error.message}`);91 results.push({ url: request.url, error: error.message });92 }93 },94 maxConcurrency: 2,95 maxRequestRetries: 3,96 requestHandlerTimeoutSecs: 60,97 headless: true,98 preNavigationHooks: [async ({ page }) => {99 await page.setViewportSize({ width: 1280, height: 800 });100 }],101 failedRequestHandler: async ({ request, log }) => {102 log.error(`❌ Giving up on ${request.url} after multiple attempts.`);103 results.push({ url: request.url, error: 'Failed after multiple retries' });104 }105});106
107await crawler.run(urls);108await Actor.pushData(results);109await Actor.exit();
package.json
{ "name": "1mg-product-scraper", "version": "0.0.1", "scripts": { "start": "node main.js" }, "dependencies": { "apify": "^3.4.2", "crawlee": "^3.13.4", "playwright": "^1.52.0" }, "type": "module"}