Google Play Scraper ✅ FREE ✅ avatar
Google Play Scraper ✅ FREE ✅
Try for free

No credit card required

View all Actors
Google Play Scraper ✅ FREE ✅

Google Play Scraper ✅ FREE ✅

curious_coder/google-play-scraper
Try for free

No credit card required

Scrape google play store to get apps, details, developers, categories and reviews. If you are looking for a reliable google play reviews scraper you should give this tool a try

.DS_Store

Download

.gitignore

1node_modules
2storage

package.json

1{
2  "name": "scraper",
3  "version": "0.0.1",
4  "type": "module",
5  "description": "Scraper",
6  "dependencies": {
7    "apify": "^3.1.5",
8    "google-play-scraper": "^9.1.1",
9    "@sentry/node": "^7.57.0"
10  },
11  "scripts": {
12    "start": "node src/main.js",
13    "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
14  },
15  "author": "It's not you it's me",
16  "license": "ISC"
17}

.actor/Dockerfile

1# Specify the base Docker image. You can read more about
2# the available images at https://docs.apify.com/sdk/js/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node:16
5
6# Copy just package.json and package-lock.json
7# to speed up the build using Docker layer cache.
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14    && npm install --omit=dev --omit=optional \
15    && echo "Installed NPM packages:" \
16    && (npm list --omit=dev --all || true) \
17    && echo "Node.js version:" \
18    && node --version \
19    && echo "NPM version:" \
20    && npm --version \
21    && rm -r ~/.npm
22
23# Next, copy the remaining files and directories with the source code.
24# Since we do this after NPM install, quick build will be really fast
25# for most source file changes.
26COPY . ./
27
28
29# Run the image.
30CMD npm start --silent

.actor/actor.json

1{
2    "actorSpecification": 1,
3    "name": "google-play-scraper",
4    "title": "Google play reviews scraper",
5    "description": "Scrape data from a Google play",
6    "version": "1.0",
7    "meta": {
8        "templateId": "js-start"
9    },
10    "input": "./input_schema.json",
11    "dockerfile": "./Dockerfile",
12    "storages": {
13        "dataset": {
14            "actorSpecification": 1,
15            "title": "Results",
16            "views": {
17            }
18        }
19    }
20}

.actor/input_schema.json

1{
2  "title": "Scraper",
3  "type": "object",
4  "schemaVersion": 1,
5  "properties": {
6    "action": {
7      "title": "Select action to perform",
8      "type": "string",
9      "description": "Actor will decide what kind of data it needs to scrape and from where it should scrape it based on selected action",
10      "editor": "select",
11      "enum": [
12        "scrapeReviews"
13      ],
14      "enumTitles": [
15        "Scrape reviews"
16      ],
17      "prefill": "scrapeReviews",
18      "default": "scrapeReviews"
19    },
20    "scrapeReviews.appId": {
21      "sectionCaption": "Scrape reviews",
22      "sectionDescription": "This section is applicable only when action is **Scrape reviews**",
23      "title": "App ID",
24      "type": "string",
25      "description": "The Google Play id of the application (the ?id= parameter on the url).",
26      "editor": "textfield",
27      "prefill": "com.rockstargames.gtasa"
28    },
29    "count": {
30      "title": "#️⃣ Total number of records required",
31      "type": "integer",
32      "description": "Leave this field empty if you want to scrape all items.",
33      "editor": "number",
34      "default": 10
35    },
36    "cursor": {
37      "title": "Start from cursor",
38      "type": "string",
39      "description": "Use this field if you want to resume scraping from where the scraper stopped last time. You can find this value in default storage of scraper runs under **lastCursor** key",
40      "editor": "textfield",
41      "default": ""
42    },
43    "proxy": {
44      "title": "Proxy",
45      "type": "object",
46      "description": "You can use proxy from your account's country if needed for extra safety.",
47      "editor": "proxy"
48    },
49    "minDelay": {
50      "title": "⏱️ Minimum wait duration",
51      "type": "integer",
52      "description": "Minimum duration in seconds to wait before scraping next page",
53      "editor": "number",
54      "prefill": 1,
55      "default": 1,
56      "minimum": 1
57    },
58    "maxDelay": {
59      "title": "⏱️ Maximum wait duration",
60      "type": "integer",
61      "description": "Maximum duration in seconds to wait before scraping next page",
62      "editor": "number",
63      "prefill": 3,
64      "default": 3,
65      "minimum": 3
66    }
67  },
68  "required": ["action"]
69}

src/.DS_Store

Download

src/client.js

1import gplay from 'google-play-scraper'
2
3import helpers from './helpers.js'
4class Client {
5	constructor ({proxyUrl}) {
6	}
7
8	async getReviews ({appId, cursor}) {
9		let items = await gplay.reviews({
10			appId,
11			sort: gplay.sort.RATING,
12			paginate: true,
13			nextPaginationToken: cursor
14		})
15		return items
16	}
17}
18
19export default Client

src/helpers.js

1import {CookieJar, Cookie} from 'tough-cookie'
2import log from '@apify/log'
3
4function getInputGroup (obj, prefix) {
5  const result = {};
6  prefix = prefix + '.'
7  for (let key in obj) {
8    if (obj.hasOwnProperty(key)) {
9      if (key.startsWith(prefix)) {
10        let newKey = key.replace(prefix, '')
11        result[newKey] = obj[key]
12      }
13    }
14  }
15  return result
16}
17
18function updateCookieJar(jar, cookies, url, domain) {
19  if (typeof cookies == 'string') {
20    try {
21      cookies = JSON.parse(cookies)
22    } catch (error) {
23      throw new Error('Failed to parse cookie data')
24    }
25  }
26  cookies.forEach(item => {
27    let {name, value} = item
28    const cookie = new Cookie({key: name, value, domain})
29    jar.setCookieSync(cookie, url)
30  })
31  return jar
32}
33
34function updateItem (arr, match, updates) {
35  let item = arr.find(match)
36  if (!item) return
37  Object.keys(updates).forEach(key => item[key] = updates[key])
38  return item
39}
40
41function pickProperties (object, keys) {
42  if (!object) return
43  let newObj = {}
44  keys.forEach(key => newObj[key] = object[key])
45  return newObj
46}
47
48function getProperty(object, path, defaultValue) {
49  const pathArray = Array.isArray(path) ? path : path.split('.'); // Convert path to an array if it's a string
50
51  for (let i = 0; i < pathArray.length; i++) {
52    if (object == null) {
53      return defaultValue; // Return default value if the current object is null or undefined
54    }
55
56    object = object[pathArray[i]]; // Access the property specified in the path
57
58    if (object === undefined) {
59      return defaultValue; // Return default value if the property does not exist
60    }
61  }
62
63  return object;
64}
65
66function stripTags (html='') {
67	return html.replace(/(<([^>]+)>)/ig, "")
68}
69
70async function getCookieValue (cookieJar, url, name) {
71  let cookies = await cookieJar.getCookies(url, {allPaths: true})
72  let cookie = cookies.find(cookie => cookie.key == name)?.value
73  return cookie
74}
75
76function wait (delay, timerIdCb) {
77	return new Promise((resolve, reject) => {
78		let id = setTimeout(resolve, delay)
79		if (typeof timerIdCb == 'function') timerIdCb(id)
80	})
81}
82
83async function smartLoop ({array, callback, delay}) {
84  for (let index = 0; index < array.length; index++) {
85    let item = array[index]
86    let result = await callback(item, index)
87    if (result == 'break') break
88    await wait(Array.isArray(delay) ? randomInt(delay[0], delay[1]) : delay)
89  }
90}
91
92function randomInt(min, max, excludeNum) {
93	for(;;) {
94		let num = Math.floor(Math.random() * (max - min + 1)) + min
95		if (num != excludeNum || min == max) return num
96	}
97}
98
99function randomDelay (min, max) {
100	return wait(randomInt(min, max) * 1000)
101}
102
103
104function parseUrlQuery(search) {
105   const params = new URLSearchParams(search)
106   const queryObject = {};
107 
108   params.forEach((value, key) => {
109     const isArrayParam = key.endsWith('[]');
110     const isObjectParam = key.includes('.');
111 
112     if (isArrayParam) {
113       const paramName = key.slice(0, -2);
114 
115       if (!queryObject.hasOwnProperty(paramName)) {
116         queryObject[paramName] = [value];
117       } else {
118         queryObject[paramName].push(value);
119       }
120     } else if (isObjectParam) {
121       const [parentKey, childKey] = key.split('.');
122       
123       if (!queryObject.hasOwnProperty(parentKey)) {
124         queryObject[parentKey] = {};
125       }
126       
127       queryObject[parentKey][childKey] = value;
128     } else {
129       if (!queryObject.hasOwnProperty(key)) {
130         queryObject[key] = value;
131       } else {
132         if (!Array.isArray(queryObject[key])) {
133           queryObject[key] = [queryObject[key]];
134         }
135         queryObject[key].push(value);
136       }
137     }
138   });
139 
140   return queryObject
141}
142
143function searchKeyAndGetValue(object, key) {
144  let result = null;
145
146  function search(obj) {
147    for (let prop in obj) {
148      if (obj.hasOwnProperty(prop)) {
149        if (prop === key) {
150          result = obj[prop];
151          return;
152        } else if (typeof obj[prop] === 'object') {
153          search(obj[prop]);
154        }
155      }
156    }
157  }
158
159  search(object);
160  return result;
161}
162
163function getCurrentDateTime(format = 'YYYY-MM-DD HH:mm:ss') {
164  const currentDate = new Date()
165  const year = currentDate.getFullYear()
166  const month = String(currentDate.getMonth() + 1).padStart(2, '0')
167  const day = String(currentDate.getDate()).padStart(2, '0')
168  const hours = String(currentDate.getHours()).padStart(2, '0')
169  const minutes = String(currentDate.getMinutes()).padStart(2, '0')
170  const seconds = String(currentDate.getSeconds()).padStart(2, '0')
171  const formattedDateTime = format.replace('YYYY', year)
172    .replace('MM', month)
173    .replace('DD', day)
174    .replace('HH', hours)
175    .replace('mm', minutes)
176    .replace('ss', seconds)
177  return formattedDateTime
178}
179
180export default {
181   wait,
182   stripTags,
183   randomInt,
184   randomDelay,
185   pickProperties,
186   smartLoop,
187   parseUrlQuery,
188   getCurrentDateTime,
189   updateItem,
190   searchKeyAndGetValue,
191   getCookieValue,
192   updateCookieJar,
193   getProperty,
194   getInputGroup
195}

src/main.js

1import {Actor} from 'apify';
2import log from '@apify/log'
3import Sentry from '@sentry/node'
4
5import Client from './client.js'
6import helpers from './helpers.js'
7
8await Actor.init()
9const input = await Actor.getInput()
10{//Block:InitSentry
11    Sentry.init({
12        dsn: "https://0979604b44b9471c998863b7fa96e069@o4505494615621632.ingest.sentry.io/4505494635872256",
13        tracesSampleRate: 1.0,
14        normalizeMaxBreadth: 15,
15        normalizeDepth: 5
16    })
17    Sentry.setContext('context', {
18		actor: 'Google play',
19        input: helpers.pickProperties(input, ['myEmail'])
20    })    
21    process.on('unhandledRejection', (reason) => {
22        Sentry.captureException(reason)
23    })
24}
25{//Block:InitState
26	var state = {
27		cursor: input.cursor,
28		currentPage: 1,
29		fetchCount: 0
30	}
31	let prevState = await Actor.getValue('state')
32	if (prevState) {
33		state = prevState
34	}
35}
36{//Block:InitProxy
37	var proxyUrl
38	if (input.proxy) {
39		let proxyConfiguration = await Actor.createProxyConfiguration(input.proxy)
40		proxyUrl = await proxyConfiguration?.newUrl()
41	}
42}
43Actor.on('migrating', async () => {
44    await Actor.setValue('state', state)
45})
46const MAX_ITEMS = input.count || Number(process.env.ACTOR_MAX_PAID_DATASET_ITEMS) || Infinity
47const client = new Client({proxyUrl})
48
49while (state.fetchCount < MAX_ITEMS) {
50    let items = [], cursor
51    if (input.action == 'scrapeReviews') {     
52        let options = await helpers.getInputGroup(input, 'scrapeReviews')
53        let result = await client.getReviews({
54            cursor: state.cursor, 
55            ...options
56        })
57        cursor = result.nextPaginationToken
58        items = result.data
59    } else {
60        console.error('Unknown action. Exiting.')
61        break
62    }
63    await Actor.pushData(items)
64    state.cursor = cursor
65    state.fetchCount += items.length
66    if (items.length == 0) break
67    await helpers.randomDelay(input.minDelay, input.maxDelay)
68}
69
70// let profiles = input['scrapeProfiles.profileList']
71// while (state.fetchCount < profiles.length) {
72//     let profileString = profiles[state.fetchCount]
73//     let fullProfile = await client.getFullProfile(profile.id)
74//     await Actor.pushData(fullProfile)
75//     state.fetchCount += 1
76//     if (state.fetchCount >= MAX_ITEMS) break
77//     await helpers.randomDelay(input.minDelay, input.maxDelay)
78// }
79
80await Actor.exit()
Developer
Maintained by Community
Actor metrics
  • 39 monthly users
  • 0 stars
  • 95.5% runs succeeded
  • 9.5 days response time
  • Created in Jul 2023
  • Modified 9 months ago