
Google Maps Easy Scrape
Pricing
Pay per usage
Go to Store

Google Maps Easy Scrape
5.0 (1)
Pricing
Pay per usage
22
Total users
1.3k
Monthly users
53
Runs succeeded
88%
Last modified
a year ago
.actor/Dockerfile
# Specify the base Docker image. You can read more about# the available images at https://crawlee.dev/docs/guides/docker-images# You can also use any other image from Docker Hub.FROM apify/actor-node-playwright-chrome:18
# Copy just package.json and package-lock.json# to speed up the build using Docker layer cache.COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --omit=dev --omit=optional \ && echo "Installed NPM packages:" \ && (npm list --omit=dev --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version \ && rm -r ~/.npm
# Next, copy the remaining files and directories with the source code.# Since we do this after NPM install, quick build will be really fast# for most source file changes.COPY . ./
# Run the image. If you know you won't need headful browsers,# you can remove the XVFB start script for a micro perf gain.CMD ./start_xvfb_and_run_cmd.sh && npm start --silent
.actor/actor.json
{ "actorSpecification": 1, "name": "my-actor-13", "title": "Project Playwright Crawler JavaScript", "description": "Crawlee and Playwright project in JavaScript.", "version": "0.0", "meta": { "templateId": "js-crawlee-playwright-chrome" }, "input": "./input_schema.json", "dockerfile": "./Dockerfile"}
.actor/input_schema.json
{ "title": "PlaywrightCrawler Template", "type": "object", "schemaVersion": 1, "properties": { "googleMapsURL": { "title": "Google Maps URL", "type": "string", "description": "The URL of the Google Maps search results to scrape.", "editor": "textfield", "prefill": "https://www.google.com/maps/search/gym/@44.3267641,-84.7358592,12.73z/data=!4m2!2m1!6e1?entry=ttu" } }, "required": ["googleMapsURL"]}
src/main.js
1import { chromium } from 'playwright';2import { Actor } from 'apify';3
4(async () => {5
6 Actor.init()7
8 // Fetch the input9 const input = await Actor.getInput();10 // Use the provided Google Maps URL from the input11 const googleMapsURL = input.googleMapsURL;12
13 // Launch browser14 console.time("Execution Time");15 const browser = await chromium.launch();16 const context = await browser.newContext();17 const page = await context.newPage();18
19 // Enter URL20 await page.goto(googleMapsURL);21 await page.waitForSelector('[jstcache="3"]');22
23 let urls = [];24
25 // Scroll within the specific element identified by XPath26 while (true) {27 const pageContent = await page.content();28 if (pageContent.includes("You've reached the end of the list.")) {29 console.log("Reached the end of the list.");30 break;31 } else {32 await page.evaluate(() => {33 const scrollElement = document.evaluate('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[1]/div[1]', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;34 scrollElement.scrollTop += 500;35 });36 }37 }38
39 // Extract URLs40 urls = await page.evaluate(() => {41 let elements = Array.from(document.querySelectorAll('a[href*="https://www.google.com/maps/place"]'));42 return elements.map(element => element.href);43 });44
45 console.log(`Number of URLs extracted: ${urls.length}`);46
47 let data = [];48 const batchSize = 5; 49
50 // Pull info for each site51 for (let i = 0; i < urls.length; i += batchSize) {52 console.log(`Processing batch: ${i/batchSize + 1}/${Math.ceil(urls.length/batchSize)}`);53 const batchUrls = urls.slice(i, i + batchSize);54 const batchData = await Promise.all(batchUrls.map(async (url) => {55 const page = await context.newPage();56 await page.goto(url);57 await page.waitForSelector('[jstcache="3"]');58 59 // Selectors to pull the information60 const details = await page.evaluate(() => {61
62 // Function for text63 const getText = (selector) => {64 const element = document.querySelector(selector);65 return element ? element.innerText : '';66 };67
68 // Function for href69 const getHref = (primarySelector, fallbackSelector) => {70 let element = document.querySelector(primarySelector);71 if (!element) {72 element = document.querySelector(fallbackSelector);73 }74 return element && element.href ? element.href : '';75 };76 77 // Function for xpath78 const getTextFromXPath = (xpath) => {79 const result = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);80 return result.singleNodeValue ? result.singleNodeValue.innerText : '';81 };82 83 const companyName = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[1]/h1');84 const rating = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[1]/div[2]/span[1]/span[1]');85 let numberReviews = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[1]/div[2]/span[2]/span/span');86 numberReviews = numberReviews.replace(/\(|\)/g, '');87 const category = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[2]/span/span/button');88 89 90 return {91 company: companyName,92 rating: rating,93 reviews: numberReviews,94 category: category,95 address: getText('button[data-tooltip="Copy address"]'),96 website: getHref('a[data-tooltip="Open website"]', 'a[data-tooltip="Open menu link"]'),97 phone: getText('button[data-tooltip="Copy phone number"]')98 }; 99 });100
101 await page.close();102 return { ...details, url };103 }));104
105 // Push data to Apify's dataset106 for (const item of batchData) {107 await Actor.pushData(item);108 console.log(`Data pushed for URL: ${item.url}`);109 }110
111 data.push(...batchData);112 }113 await Actor.exit();114
115 console.timeEnd("Execution Time");116})();
src/main3.js
1import { chromium } from 'playwright';2import { Actor } from 'apify';3
4(async () => {5 Actor.init();6
7 // Fetch the input8 const input = await Actor.getInput();9 // Use the provided Google Maps URL from the input10 const googleMapsURL = input.googleMapsURL;11
12 // Launch browser in headless mode for better performance13 console.time("Execution Time");14 const browser = await chromium.launch({ headless: true });15 const context = await browser.newContext();16 const page = await context.newPage();17
18 // Enter URL and wait for DOM content to load19 await page.goto(googleMapsURL, { waitUntil: 'domcontentloaded' });20 await page.waitForSelector('[jstcache="3"]');21
22 let urls = [];23
24 // Scroll and extract URLs25 while (true) {26 const pageContent = await page.content();27 if (pageContent.includes("You've reached the end of the list.")) {28 console.log("Reached the end of the list.");29 break;30 } else {31 await page.evaluate(() => {32 const scrollElement = document.evaluate('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[1]/div[1]', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;33 scrollElement.scrollTop += 500;34 });35 }36 }37
38 urls = await page.evaluate(() => {39 let elements = Array.from(document.querySelectorAll('a[href*="https://www.google.com/maps/place"]'));40 return elements.map(element => element.href);41 });42
43 await page.close();44
45 console.log(`Number of URLs extracted: ${urls.length}`);46
47 // Process URLs in parallel48 const concurrency = 5; // Maximum number of pages to process simultaneously49 const promises = [];50
51 for (let url of urls) {52 const p = processUrl(url, context).then(details => {53 // Push data to Apify's dataset54 Actor.pushData(details);55 console.log(`Data pushed for URL: ${details.url}`);56 }).catch(error => {57 console.error(`Error processing URL ${url}: ${error}`);58 });59
60 promises.push(p);61
62 if (promises.length >= concurrency) {63 await Promise.all(promises);64 promises.length = 0; // Clear the array65 }66 }67
68 // Process any remaining promises69 await Promise.all(promises);70 console.timeEnd("Execution Time");71 await Actor.exit();72})();73
74// Function to process each URL75async function processUrl(url, context) {76 const page = await context.newPage();77 await page.goto(url, { waitUntil: 'domcontentloaded' });78 await page.waitForSelector('[jstcache="3"]');79
80 // Selectors to pull the information81 const details = await page.evaluate(() => {82
83 // Function for text84 const getText = (selector) => {85 const element = document.querySelector(selector);86 return element ? element.innerText : '';87 };88
89 // Function for href90 const getHref = (primarySelector, fallbackSelector) => {91 let element = document.querySelector(primarySelector);92 if (!element) {93 element = document.querySelector(fallbackSelector);94 }95 return element && element.href ? element.href : '';96 };97
98 // Function for xpath99 const getTextFromXPath = (xpath) => {100 const result = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);101 return result.singleNodeValue ? result.singleNodeValue.innerText : '';102 };103
104 const companyName = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[1]/h1');105 const rating = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[1]/div[2]/span[1]/span[1]');106 let numberReviews = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[1]/div[2]/span[2]/span/span');107 numberReviews = numberReviews.replace(/\(|\)/g, '');108 const category = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[2]/span/span/button');109 110
111 return {112 company: companyName,113 rating: rating,114 reviews: numberReviews,115 category: category,116 address: getText('button[data-tooltip="Copy address"]'),117 website: getHref('a[data-tooltip="Open website"]', 'a[data-tooltip="Open menu link"]'),118 phone: getText('button[data-tooltip="Copy phone number"]')119 }; 120 });121
122 await page.close();123 return { ...details, url };124}
.dockerignore
# configurations.idea
# crawlee and apify storage foldersapify_storagecrawlee_storagestorage
# installed filesnode_modules
# git folder.git
.editorconfig
root = true
[*]indent_style = spaceindent_size = 4charset = utf-8trim_trailing_whitespace = trueinsert_final_newline = trueend_of_line = lf
.eslintrc
{ "extends": "@apify", "root": true}
.gitignore
# This file tells Git which files shouldn't be added to source control
.DS_Store.ideadistnode_modulesapify_storagestorage
package.json
{ "name": "crawlee-playwright-javascript", "version": "0.0.1", "type": "module", "description": "This is an example of an Apify actor.", "dependencies": { "apify": "^3.1.10", "crawlee": "^3.5.4", "playwright": "*" }, "devDependencies": { "@apify/eslint-config": "^0.4.0", "eslint": "^8.50.0" }, "scripts": { "start": "node src/main.js", "lint": "eslint ./src --ext .js,.jsx", "lint:fix": "eslint ./src --ext .js,.jsx --fix", "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1", "postinstall": "npx crawlee install-playwright-browsers" }, "author": "It's not you it's me", "license": "ISC"}