# Specify the base Docker image. You can read more about
# the available images at https://crawlee.dev/docs/guides/docker-images
# You can also use any other image from Docker Hub.
FROM apify/actor-node-playwright-chrome:18

# Copy just package.json and package-lock.json
# to speed up the build using Docker layer cache.
COPY --chown=myuser package*.json ./

# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
    && npm install --omit=dev --omit=optional \
    && echo "Installed NPM packages:" \
    && (npm list --omit=dev --all || true) \
    && echo "Node.js version:" \
    && node --version \
    && echo "NPM version:" \
    && npm --version \
    && rm -r ~/.npm

# Next, copy the remaining files and directories with the source code.
# Since we do this after NPM install, quick build will be really fast
# for most source file changes.
COPY --chown=myuser . ./


# Run the image. If you know you won't need headful browsers,
# you can remove the XVFB start script for a micro perf gain.
CMD ./start_xvfb_and_run_cmd.sh && npm start --silent

.actor/actor.json

{
    "actorSpecification": 1,
    "name": "my-actor-13",
    "title": "Project Playwright Crawler JavaScript",
    "description": "Crawlee and Playwright project in JavaScript.",
    "version": "0.0",
    "meta": {
        "templateId": "js-crawlee-playwright-chrome"
    },
    "input": "./input_schema.json",
    "dockerfile": "./Dockerfile"
}

.actor/input_schema.json

{
    "title": "PlaywrightCrawler Template",
    "type": "object",
    "schemaVersion": 1,
    "properties": {
      "googleMapsURL": {
        "title": "Google Maps URL",
        "type": "string",
        "description": "The URL of the Google Maps search results to scrape.",
        "editor": "textfield",
        "prefill": "https://www.google.com/maps/search/gym/@44.3267641,-84.7358592,12.73z/data=!4m2!2m1!6e1?entry=ttu"
      }
  },
  "required": ["googleMapsURL"]
}

src/main.js

1import { chromium } from 'playwright';
2import { Actor } from 'apify';
3
4(async () => {
5
6    Actor.init()
7
8    // Fetch the input
9    const input = await Actor.getInput();
10    // Use the provided Google Maps URL from the input
11    const googleMapsURL = input.googleMapsURL;
12
13    // Launch browser
14    console.time("Execution Time");
15    const browser = await chromium.launch();
16    const context = await browser.newContext();
17    const page = await context.newPage();
18
19    // Enter URL
20    await page.goto(googleMapsURL);
21    await page.waitForSelector('[jstcache="3"]');
22
23    let urls = [];
24
25    // Scroll within the specific element identified by XPath
26    while (true) {
27        const pageContent = await page.content();
28        if (pageContent.includes("You've reached the end of the list.")) {
29            console.log("Reached the end of the list.");
30            break;
31        } else {
32            await page.evaluate(() => {
33                const scrollElement = document.evaluate('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[1]/div[1]', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
34                scrollElement.scrollTop += 500;
35            });
36        }
37    }
38
39    // Extract URLs
40    urls = await page.evaluate(() => {
41        let elements = Array.from(document.querySelectorAll('a[href*="https://www.google.com/maps/place"]'));
42        return elements.map(element => element.href);
43    });
44
45    console.log(`Number of URLs extracted: ${urls.length}`);
46
47    let data = [];
48    const batchSize = 5; 
49
50    // Pull info for each site
51    for (let i = 0; i < urls.length; i += batchSize) {
52        console.log(`Processing batch: ${i/batchSize + 1}/${Math.ceil(urls.length/batchSize)}`);
53        const batchUrls = urls.slice(i, i + batchSize);
54        const batchData = await Promise.all(batchUrls.map(async (url) => {
55            const page = await context.newPage();
56            await page.goto(url);
57            await page.waitForSelector('[jstcache="3"]');
58            
59            // Selectors to pull the information
60            const details = await page.evaluate(() => {
61
62                // Function for text
63                const getText = (selector) => {
64                    const element = document.querySelector(selector);
65                    return element ? element.innerText : '';
66                };
67
68                // Function for href
69                const getHref = (primarySelector, fallbackSelector) => {
70                    let element = document.querySelector(primarySelector);
71                    if (!element) {
72                        element = document.querySelector(fallbackSelector);
73                    }
74                    return element && element.href ? element.href : '';
75                };
76            
77                // Function for xpath
78                const getTextFromXPath = (xpath) => {
79                    const result = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
80                    return result.singleNodeValue ? result.singleNodeValue.innerText : '';
81                };
82            
83                const companyName = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[1]/h1');
84                const rating = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[1]/div[2]/span[1]/span[1]');
85                let numberReviews = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[1]/div[2]/span[2]/span/span');
86                numberReviews = numberReviews.replace(/\(|\)/g, '');
87                const category = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[2]/span/span/button');
88                
89            
90                return {
91                    company: companyName,
92                    rating: rating,
93                    reviews: numberReviews,
94                    category: category,
95                    address: getText('button[data-tooltip="Copy address"]'),
96                    website: getHref('a[data-tooltip="Open website"]', 'a[data-tooltip="Open menu link"]'),
97                    phone: getText('button[data-tooltip="Copy phone number"]')
98                };                             
99            });
100
101            await page.close();
102            return { ...details, url };
103        }));
104
105        // Push data to Apify's dataset
106        for (const item of batchData) {
107            await Actor.pushData(item);
108            console.log(`Data pushed for URL: ${item.url}`);
109        }
110
111        data.push(...batchData);
112    }
113    await Actor.exit();
114
115    console.timeEnd("Execution Time");
116})();

src/main3.js

1import { chromium } from 'playwright';
2import { Actor } from 'apify';
3
4(async () => {
5    Actor.init();
6
7    // Fetch the input
8    const input = await Actor.getInput();
9    // Use the provided Google Maps URL from the input
10    const googleMapsURL = input.googleMapsURL;
11
12    // Launch browser in headless mode for better performance
13    console.time("Execution Time");
14    const browser = await chromium.launch({ headless: true });
15    const context = await browser.newContext();
16    const page = await context.newPage();
17
18    // Enter URL and wait for DOM content to load
19    await page.goto(googleMapsURL, { waitUntil: 'domcontentloaded' });
20    await page.waitForSelector('[jstcache="3"]');
21
22    let urls = [];
23
24    // Scroll and extract URLs
25    while (true) {
26        const pageContent = await page.content();
27        if (pageContent.includes("You've reached the end of the list.")) {
28            console.log("Reached the end of the list.");
29            break;
30        } else {
31            await page.evaluate(() => {
32                const scrollElement = document.evaluate('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[1]/div[1]', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
33                scrollElement.scrollTop += 500;
34            });
35        }
36    }
37
38    urls = await page.evaluate(() => {
39        let elements = Array.from(document.querySelectorAll('a[href*="https://www.google.com/maps/place"]'));
40        return elements.map(element => element.href);
41    });
42
43    await page.close();
44
45    console.log(`Number of URLs extracted: ${urls.length}`);
46
47    // Process URLs in parallel
48    const concurrency = 5; // Maximum number of pages to process simultaneously
49    const promises = [];
50
51    for (let url of urls) {
52        const p = processUrl(url, context).then(details => {
53            // Push data to Apify's dataset
54            Actor.pushData(details);
55            console.log(`Data pushed for URL: ${details.url}`);
56        }).catch(error => {
57            console.error(`Error processing URL ${url}: ${error}`);
58        });
59
60        promises.push(p);
61
62        if (promises.length >= concurrency) {
63            await Promise.all(promises);
64            promises.length = 0; // Clear the array
65        }
66    }
67
68    // Process any remaining promises
69    await Promise.all(promises);
70    console.timeEnd("Execution Time");
71    await Actor.exit();
72})();
73
74// Function to process each URL
75async function processUrl(url, context) {
76    const page = await context.newPage();
77    await page.goto(url, { waitUntil: 'domcontentloaded' });
78    await page.waitForSelector('[jstcache="3"]');
79
80    // Selectors to pull the information
81    const details = await page.evaluate(() => {
82
83        // Function for text
84        const getText = (selector) => {
85            const element = document.querySelector(selector);
86            return element ? element.innerText : '';
87        };
88
89        // Function for href
90        const getHref = (primarySelector, fallbackSelector) => {
91            let element = document.querySelector(primarySelector);
92            if (!element) {
93                element = document.querySelector(fallbackSelector);
94            }
95            return element && element.href ? element.href : '';
96        };
97
98        // Function for xpath
99        const getTextFromXPath = (xpath) => {
100            const result = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
101            return result.singleNodeValue ? result.singleNodeValue.innerText : '';
102        };
103
104        const companyName = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[1]/h1');
105        const rating = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[1]/div[2]/span[1]/span[1]');
106        let numberReviews = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[1]/div[2]/span[2]/span/span');
107        numberReviews = numberReviews.replace(/\(|\)/g, '');
108        const category = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[2]/span/span/button');
109        
110
111        return {
112            company: companyName,
113            rating: rating,
114            reviews: numberReviews,
115            category: category,
116            address: getText('button[data-tooltip="Copy address"]'),
117            website: getHref('a[data-tooltip="Open website"]', 'a[data-tooltip="Open menu link"]'),
118            phone: getText('button[data-tooltip="Copy phone number"]')
119        };                             
120    });
121
122    await page.close();
123    return { ...details, url };
124}

.dockerignore

# configurations
.idea

# crawlee and apify storage folders
apify_storage
crawlee_storage
storage

# installed files
node_modules

# git folder
.git

.editorconfig

root = true

[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.eslintrc

{
    "extends": "@apify",
    "root": true
}

.gitignore

# This file tells Git which files shouldn't be added to source control

.DS_Store
.idea
dist
node_modules
apify_storage
storage

package.json

{
    "name": "crawlee-playwright-javascript",
    "version": "0.0.1",
    "type": "module",
    "description": "This is an example of an Apify actor.",
    "dependencies": {
        "apify": "^3.1.10",
        "crawlee": "^3.5.4",
        "playwright": "*"
    },
    "devDependencies": {
        "@apify/eslint-config": "^0.4.0",
        "eslint": "^8.50.0"
    },
    "scripts": {
        "start": "node src/main.js",
        "lint": "eslint ./src --ext .js,.jsx",
        "lint:fix": "eslint ./src --ext .js,.jsx --fix",
        "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1",
        "postinstall": "npx crawlee install-playwright-browsers"
    },
    "author": "It's not you it's me",
    "license": "ISC"
}

DineFilter

ohlava/dinefilter

Say what you want to eat and where, we'll find it

Ondřej Hlava

Google Maps Business Lead and Business Website Scraper

lead.gen.labs/google-maps-business-lead-and-business-website-scraper

Unlock valuable business leads by effortlessly scraping contact details—name, address, phone, website, and reviews—from Google Maps. Perfect for boosting your marketing outreach and sales pipeline.

LeadGen Labs

344

2.6

(2)

Googlemaps Scraper

dainty_screw/googlemaps-scraper

Scrape detailed information about any niche or topic from Google Maps, including ratings, reviews, addresses, and more.

codemaster devops

5.0

(1)

Google Map Simple scraper

danny.hub/google-map-simple-scraper

Scrape data from Google map. It only extract core informations, Shop name/Score/Address/Phone number/Comment counts. The process of scraping is very fast. You can get plenty of phone numbers in a short time

Dannyswift.hub

112

1.0

(1)

Google Maps Fast Scraper

xmiso_scrapers/google-maps-scraper

No need to overpay, Get Google maps results lighting fast, just for $0.50/1000 results. Try it now!

Miso

124

4.5

(9)

Google Maps Scraper Made Easy

datacach/google-maps-scraper-made-easy

Effortlessly extract location data with Google Maps Scraper Made Easy. Boost your business and marketing by quickly gathering contacts, reviews, and addresses. Perfect for lead generation, research, and local SEO success. Simple, fast, and reliable!

DataCach

Enrich Google Maps Dataset with Contacts

compass/enrich-google-maps-dataset-with-contacts

Enrich Google Maps Dataset with Contacts. Scrape websites of Google Maps places for contact details and get email addresses, website, location, address, zipcode, phone number, social media links. Export scraped data, run the scraper via API, schedule and monitor runs or integrate with other tools.

Compass

1.1K

3.8

(8)

URL Metadata Crawler

easyapi/url-metadata-crawler

Extracting comprehensive metadata from web pages. Gather vital information like meta tags, favicons, Open Graph tags, and more, all while enjoying flexible options for customization. Perfect for SEO specialists, developers, and content creators looking to enhance their web presence! 🌐

EasyApi

Google Maps Business: Cheaper, Robust, Easy

agents/google-maps-business

Extract comprehensive business profiles from Google Maps with lightning speed. Our scraper delivers complete company data including contact info, hours, ratings, and amenities. Perfect for building databases, market research, and competitive analysis across Google's vast business ecosystem.

Agents

5.0

(1)

📩📍 Google Maps Email Extractor

lukaskrivka/google-maps-with-contact-details

Extract Google Maps contact details. Scrape websites of Google Maps places for contact details and get email addresses, website, location, address, zipcode, phone number, social media links. Export scraped data, run the scraper via API, schedule and monitor runs or integrate with other tools.

Lukáš Křivka

44K

4.4

(85)

Scrape Haven: Get Leads Emails and Phone Numbers

williams_asante/scrape-haven-get-leads-emails-and-phone-numbers

ScrapeHaven - A Google Maps Scraper, is a powerful and easy-to-use Actor designed to extract business data from Google Maps. Whether you're looking for contact details, reviews, or social media links, ScrapeHaven simplifies the process of gathering valuable business information at scale.

Williams Asante

430

4.1

(4)

Google Maps Easy Scrape

Google Maps Easy Scrape

.actor/Dockerfile

.actor/actor.json

.actor/input_schema.json

src/main.js

src/main3.js

.dockerignore

.editorconfig

.eslintrc

.gitignore

package.json

You might also like

DineFilter

Google Maps Business Lead and Business Website Scraper

Googlemaps Scraper

Google Map Simple scraper

Google Maps Fast Scraper

Google Maps Scraper Made Easy

Enrich Google Maps Dataset with Contacts

URL Metadata Crawler

Google Maps Business: Cheaper, Robust, Easy

📩📍 Google Maps Email Extractor

Scrape Haven: Get Leads Emails and Phone Numbers

.actor/Dockerfile

.actor/actor.json

.actor/input_schema.json

src/main.js

src/main3.js

.dockerignore

.editorconfig

.eslintrc

.gitignore

package.json