
Google Maps Easy Scrape
Pricing
Pay per usage
Go to Apify Store

Google Maps Easy Scrape
Deprecated5.0 (1)
Pricing
Pay per usage
22
1.3K
5
Issues response
66 days
Last modified
2 years ago
Pricing
Pay per usage
5.0 (1)
Pricing
Pay per usage
22
1.3K
5
Issues response
66 days
Last modified
2 years ago
# Specify the base Docker image. You can read more about# the available images at https://crawlee.dev/docs/guides/docker-images# You can also use any other image from Docker Hub.FROM apify/actor-node-playwright-chrome:18
# Copy just package.json and package-lock.json# to speed up the build using Docker layer cache.COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --omit=dev --omit=optional \ && echo "Installed NPM packages:" \ && (npm list --omit=dev --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version \ && rm -r ~/.npm
# Next, copy the remaining files and directories with the source code.# Since we do this after NPM install, quick build will be really fast# for most source file changes.COPY . ./
# Run the image. If you know you won't need headful browsers,# you can remove the XVFB start script for a micro perf gain.CMD ./start_xvfb_and_run_cmd.sh && npm start --silent
{ "actorSpecification": 1, "name": "my-actor-13", "title": "Project Playwright Crawler JavaScript", "description": "Crawlee and Playwright project in JavaScript.", "version": "0.0", "meta": { "templateId": "js-crawlee-playwright-chrome" }, "input": "./input_schema.json", "dockerfile": "./Dockerfile"}
{ "title": "PlaywrightCrawler Template", "type": "object", "schemaVersion": 1, "properties": { "googleMapsURL": { "title": "Google Maps URL", "type": "string", "description": "The URL of the Google Maps search results to scrape.", "editor": "textfield", "prefill": "https://www.google.com/maps/search/gym/@44.3267641,-84.7358592,12.73z/data=!4m2!2m1!6e1?entry=ttu" } }, "required": ["googleMapsURL"]}
1import { chromium } from 'playwright';2import { Actor } from 'apify';3
4(async () => {5
6 Actor.init()7
8 // Fetch the input9 const input = await Actor.getInput();10 // Use the provided Google Maps URL from the input11 const googleMapsURL = input.googleMapsURL;12
13 // Launch browser14 console.time("Execution Time");15 const browser = await chromium.launch();16 const context = await browser.newContext();17 const page = await context.newPage();18
19 // Enter URL20 await page.goto(googleMapsURL);21 await page.waitForSelector('[jstcache="3"]');22
23 let urls = [];24
25 // Scroll within the specific element identified by XPath26 while (true) {27 const pageContent = await page.content();28 if (pageContent.includes("You've reached the end of the list.")) {29 console.log("Reached the end of the list.");30 break;31 } else {32 await page.evaluate(() => {33 const scrollElement = document.evaluate('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[1]/div[1]', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;34 scrollElement.scrollTop += 500;35 });36 }37 }38
39 // Extract URLs40 urls = await page.evaluate(() => {41 let elements = Array.from(document.querySelectorAll('a[href*="https://www.google.com/maps/place"]'));42 return elements.map(element => element.href);43 });44
45 console.log(`Number of URLs extracted: ${urls.length}`);46
47 let data = [];48 const batchSize = 5; 49
50 // Pull info for each site51 for (let i = 0; i < urls.length; i += batchSize) {52 console.log(`Processing batch: ${i/batchSize + 1}/${Math.ceil(urls.length/batchSize)}`);53 const batchUrls = urls.slice(i, i + batchSize);54 const batchData = await Promise.all(batchUrls.map(async (url) => {55 const page = await context.newPage();56 await page.goto(url);57 await page.waitForSelector('[jstcache="3"]');58 59 // Selectors to pull the information60 const details = await page.evaluate(() => {61
62 // Function for text63 const getText = (selector) => {64 const element = document.querySelector(selector);65 return element ? element.innerText : '';66 };67
68 // Function for href69 const getHref = (primarySelector, fallbackSelector) => {70 let element = document.querySelector(primarySelector);71 if (!element) {72 element = document.querySelector(fallbackSelector);73 }74 return element && element.href ? element.href : '';75 };76 77 // Function for xpath78 const getTextFromXPath = (xpath) => {79 const result = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);80 return result.singleNodeValue ? result.singleNodeValue.innerText : '';81 };82 83 const companyName = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[1]/h1');84 const rating = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[1]/div[2]/span[1]/span[1]');85 let numberReviews = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[1]/div[2]/span[2]/span/span');86 numberReviews = numberReviews.replace(/\(|\)/g, '');87 const category = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[2]/span/span/button');88 89 90 return {91 company: companyName,92 rating: rating,93 reviews: numberReviews,94 category: category,95 address: getText('button[data-tooltip="Copy address"]'),96 website: getHref('a[data-tooltip="Open website"]', 'a[data-tooltip="Open menu link"]'),97 phone: getText('button[data-tooltip="Copy phone number"]')98 }; 99 });100
101 await page.close();102 return { ...details, url };103 }));104
105 // Push data to Apify's dataset106 for (const item of batchData) {107 await Actor.pushData(item);108 console.log(`Data pushed for URL: ${item.url}`);109 }110
111 data.push(...batchData);112 }113 await Actor.exit();114
115 console.timeEnd("Execution Time");116})();
1import { chromium } from 'playwright';2import { Actor } from 'apify';3
4(async () => {5 Actor.init();6
7 // Fetch the input8 const input = await Actor.getInput();9 // Use the provided Google Maps URL from the input10 const googleMapsURL = input.googleMapsURL;11
12 // Launch browser in headless mode for better performance13 console.time("Execution Time");14 const browser = await chromium.launch({ headless: true });15 const context = await browser.newContext();16 const page = await context.newPage();17
18 // Enter URL and wait for DOM content to load19 await page.goto(googleMapsURL, { waitUntil: 'domcontentloaded' });20 await page.waitForSelector('[jstcache="3"]');21
22 let urls = [];23
24 // Scroll and extract URLs25 while (true) {26 const pageContent = await page.content();27 if (pageContent.includes("You've reached the end of the list.")) {28 console.log("Reached the end of the list.");29 break;30 } else {31 await page.evaluate(() => {32 const scrollElement = document.evaluate('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[1]/div[1]', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;33 scrollElement.scrollTop += 500;34 });35 }36 }37
38 urls = await page.evaluate(() => {39 let elements = Array.from(document.querySelectorAll('a[href*="https://www.google.com/maps/place"]'));40 return elements.map(element => element.href);41 });42
43 await page.close();44
45 console.log(`Number of URLs extracted: ${urls.length}`);46
47 // Process URLs in parallel48 const concurrency = 5; // Maximum number of pages to process simultaneously49 const promises = [];50
51 for (let url of urls) {52 const p = processUrl(url, context).then(details => {53 // Push data to Apify's dataset54 Actor.pushData(details);55 console.log(`Data pushed for URL: ${details.url}`);56 }).catch(error => {57 console.error(`Error processing URL ${url}: ${error}`);58 });59
60 promises.push(p);61
62 if (promises.length >= concurrency) {63 await Promise.all(promises);64 promises.length = 0; // Clear the array65 }66 }67
68 // Process any remaining promises69 await Promise.all(promises);70 console.timeEnd("Execution Time");71 await Actor.exit();72})();73
74// Function to process each URL75async function processUrl(url, context) {76 const page = await context.newPage();77 await page.goto(url, { waitUntil: 'domcontentloaded' });78 await page.waitForSelector('[jstcache="3"]');79
80 // Selectors to pull the information81 const details = await page.evaluate(() => {82
83 // Function for text84 const getText = (selector) => {85 const element = document.querySelector(selector);86 return element ? element.innerText : '';87 };88
89 // Function for href90 const getHref = (primarySelector, fallbackSelector) => {91 let element = document.querySelector(primarySelector);92 if (!element) {93 element = document.querySelector(fallbackSelector);94 }95 return element && element.href ? element.href : '';96 };97
98 // Function for xpath99 const getTextFromXPath = (xpath) => {100 const result = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);101 return result.singleNodeValue ? result.singleNodeValue.innerText : '';102 };103
104 const companyName = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[1]/h1');105 const rating = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[1]/div[2]/span[1]/span[1]');106 let numberReviews = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[1]/div[2]/span[2]/span/span');107 numberReviews = numberReviews.replace(/\(|\)/g, '');108 const category = getTextFromXPath('/html/body/div[2]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[2]/span/span/button');109 110
111 return {112 company: companyName,113 rating: rating,114 reviews: numberReviews,115 category: category,116 address: getText('button[data-tooltip="Copy address"]'),117 website: getHref('a[data-tooltip="Open website"]', 'a[data-tooltip="Open menu link"]'),118 phone: getText('button[data-tooltip="Copy phone number"]')119 }; 120 });121
122 await page.close();123 return { ...details, url };124}
# configurations.idea
# crawlee and apify storage foldersapify_storagecrawlee_storagestorage
# installed filesnode_modules
# git folder.git
root = true
[*]indent_style = spaceindent_size = 4charset = utf-8trim_trailing_whitespace = trueinsert_final_newline = trueend_of_line = lf
{ "extends": "@apify", "root": true}
# This file tells Git which files shouldn't be added to source control
.DS_Store.ideadistnode_modulesapify_storagestorage
{ "name": "crawlee-playwright-javascript", "version": "0.0.1", "type": "module", "description": "This is an example of an Apify actor.", "dependencies": { "apify": "^3.1.10", "crawlee": "^3.5.4", "playwright": "*" }, "devDependencies": { "@apify/eslint-config": "^0.4.0", "eslint": "^8.50.0" }, "scripts": { "start": "node src/main.js", "lint": "eslint ./src --ext .js,.jsx", "lint:fix": "eslint ./src --ext .js,.jsx --fix", "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1", "postinstall": "npx crawlee install-playwright-browsers" }, "author": "It's not you it's me", "license": "ISC"}