1mg Product Scraper avatar
1mg Product Scraper

Under maintenance

Pricing

$0.10 / 1,000 results

Go to Store
1mg Product Scraper

1mg Product Scraper

Under maintenance

Developed by

Maintained by Community

It is a scrapper tool to scrap from website

0.0 (0)

Pricing

$0.10 / 1,000 results

0

Total users

1

Monthly users

1

Last modified

17 days ago

.actor/actor.json

{
"name": "1mg-product-scraper",
"title": "1mg Product Scraper",
"description": "Scrapes medicine details from 1mg.com using Playwright",
"version": "0.0",
"buildTag": "latest",
"actorSpecification": 1,
"dockerfile": "./Dockerfile",
"env": {
"NODE_ENV": "production"
}
}

.gitignore

storage
node_modules
.venv

Dockerfile

FROM apify/actor-node-playwright:latest
COPY package.json ./
COPY package-lock.json ./
RUN npm install --omit=dev
COPY . ./

input_schema.json

{
"title": "1mg Scraper Input",
"description": "List of 1mg product URLs to scrape",
"type": "object",
"schemaVersion": 1,
"properties": {
"startUrls": {
"title": "Start URLs",
"type": "array",
"description": "Array of URLs to visit",
"editor": "requestListSources"
}
},
"required": ["startUrls"]
}

main.js

1import { Actor } from 'apify';
2import { PlaywrightCrawler } from 'crawlee';
3
4await Actor.init();
5
6const input = await Actor.getInput();
7const urls = input.startUrls || [];
8
9const results = [];
10
11const crawler = new PlaywrightCrawler({
12 requestHandler: async ({ page, request, log }) => {
13 log.info(`Scraping: ${request.url}`);
14
15 try {
16 await page.goto(request.url, { waitUntil: 'networkidle' });
17 await page.waitForSelector('h1', { timeout: 10000 });
18
19 await page.evaluate(async () => {
20 for (let i = 0; i < document.body.scrollHeight; i += 300) {
21 window.scrollTo(0, i);
22 await new Promise(r => setTimeout(r, 200));
23 }
24 });
25
26 await page.waitForTimeout(3000);
27
28 const data = await page.evaluate(() => {
29 const safeText = (selector) => {
30 const el = document.querySelector(selector);
31 return el?.innerText.trim() || 'N/A';
32 };
33
34 const name = safeText('#drug_header > div > div > div.DrugHeader__header-content___f6GbC > div > div.DrugHeader__left___19WY- > h1');
35 if (!name || name.length < 3 || /[0-9]{1,2}%/.test(name)) {
36 return { invalidPage: true, name: name || 'Invalid product title' };
37 }
38
39 const marketer = safeText('#drug_header > div > div > div.DrugHeader__lower-content___2CZFo > div.DrugHeader__left___19WY- > div:nth-child(2) > div:nth-child(1) > div.DrugHeader__meta-value___vqYM0 > a');
40 const salt = safeText('#drug_header > div > div > div.DrugHeader__lower-content___2CZFo > div.DrugHeader__left___19WY- > div:nth-child(2) > div:nth-child(2) > div.saltInfo.DrugHeader__meta-value___vqYM0 > a') || safeText('#drug_header > div > div > div.DrugHeader__lower-content___2CZFo > div.DrugHeader__left___19WY- > div:nth-child(2) > div:nth-child(2) > div.saltInfo.DrugHeader__meta-value___vqYM0');
41
42 // Accurate salt synonyms block by label lookup
43 let synonyms = 'N/A';
44 const blocks = document.querySelectorAll('.DrugHeader__meta-block___1LvyF');
45 blocks.forEach(block => {
46 const label = block.querySelector('.DrugHeader__meta-title___2Y8YJ')?.innerText.trim();
47 if (label === 'Salt Synonyms') {
48 const val = block.querySelector('.saltInfo')?.innerText.trim();
49 if (val) synonyms = val;
50 }
51 });
52
53 // Handle price/mrp from alternate layout too (sold out scenario)
54 let price = safeText('span[class*="offer-price"]');
55 let mrp = safeText('span[class*="stike"]');
56 if (price === 'N/A') {
57 price = safeText('div.PriceBox__price___3HxvT span');
58 }
59 if (mrp === 'N/A') {
60 const mrpMatch = Array.from(document.querySelectorAll('span'))
61 .find(el => el.className.includes('PriceBoxPlanOption__stike') && el.innerText.trim().startsWith('₹'));
62 if (mrpMatch) mrp = mrpMatch.innerText.trim();
63 }
64
65 // Product images only (filtering promo banners)
66 const allImgs = Array.from(document.querySelectorAll('#drug_header img'))
67 .map(img => img.src)
68 .filter(src => src.includes('/cropped/') || /\/([a-z0-9\-]+)\.jpg/i.test(src));
69
70 return {
71 name,
72 marketer,
73 salt,
74 synonyms,
75 price,
76 mrp,
77 imageUrls: allImgs,
78 };
79 });
80
81 if (data.invalidPage) {
82 log.warning(`⚠️ Possibly invalid product page at ${request.url}, captured name: ${data.name}`);
83 results.push({ url: request.url, warning: 'Unusual product name', name: data.name });
84 } else {
85 data.url = request.url;
86 results.push(data);
87 }
88
89 } catch (error) {
90 log.error(`❌ Failed to scrape ${request.url}: ${error.message}`);
91 results.push({ url: request.url, error: error.message });
92 }
93 },
94 maxConcurrency: 2,
95 maxRequestRetries: 3,
96 requestHandlerTimeoutSecs: 60,
97 headless: true,
98 preNavigationHooks: [async ({ page }) => {
99 await page.setViewportSize({ width: 1280, height: 800 });
100 }],
101 failedRequestHandler: async ({ request, log }) => {
102 log.error(`❌ Giving up on ${request.url} after multiple attempts.`);
103 results.push({ url: request.url, error: 'Failed after multiple retries' });
104 }
105});
106
107await crawler.run(urls);
108await Actor.pushData(results);
109await Actor.exit();

package.json

{
"name": "1mg-product-scraper",
"version": "0.0.1",
"scripts": {
"start": "node main.js"
},
"dependencies": {
"apify": "^3.4.2",
"crawlee": "^3.13.4",
"playwright": "^1.52.0"
},
"type": "module"
}