Chrome Web Store
View all Actors
This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?
See alternative ActorsChrome Web Store
defensivedepth/chrome-web-store
Scrape metadata about an extension from the Chrome Web Store
Dockerfile
1# This is a template for a Dockerfile used to run acts in Actor system.
2# The base image name below is set during the act build, based on user settings.
3# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/user
4FROM apify/actor-node-chrome:v0.21.10
5
6# Second, copy just package.json and package-lock.json since it should be
7# the only file that affects "npm install" in the next step, to speed up the build
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --only=prod --no-optional \
15 && echo "Installed NPM packages:" \
16 && (npm list --all || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version
21
22# Copy source code to container
23# Do this in the last step, to have fast build if only the source code changed
24COPY . ./
25
26# NOTE: The CMD is already defined by the base image.
27# Uncomment this for local node inspector debugging:
28# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]
package.json
1{
2 "name": "apify-project",
3 "version": "0.0.1",
4 "description": "",
5 "author": "It's not you it's me",
6 "license": "ISC",
7 "dependencies": {
8 "apify": "0.21.10",
9 "request-promise": "latest"
10 },
11 "scripts": {
12 "start": "node main.js"
13 }
14}
main.js
1const Apify = require('apify');
2const rp = require('request-promise')
3
4const saveScreen = async (page, key) => {
5 const screen = await page.screenshot({fullPage: true})
6 await Apify.setValue(key, screen, {contentType: 'image/png'})
7 const content = await page.content()
8 await Apify.setValue(key+'.html', content, {contentType: 'text/html'})
9}
10
11const handlePageFunction = async ({request, page}) => {
12 //await page.waitForSelector('[itemprop="interactionCount"]')
13 //await saveScreen(page, 'screen')
14
15 const users = await page.$eval('[itemprop="interactionCount"]', el=>el.content).catch(e=>null)
16 const averageRating = await page.$eval('[itemprop="ratingValue"]', el=>el.content).catch(e=>null)
17 const ratingCount = await page.$eval('[itemprop="ratingCount"]', el=>el.content).catch(e=>null)
18 const version = await page.$eval('[itemprop="version"]', el=>el.content).catch(e=>null)
19 const updated = await page.$eval('.C-b-p-D-J .C-b-p-D-Xe.h-C-b-p-D-xh-hh', el=>el.textContent).catch(e=>null)
20 const email = await page.$eval('.C-b-p-rc-D-J .C-b-p-rc-D-R', el=>el.textContent).catch(e=>null)
21 const website = await page.$eval('.e-f-y', el=>el.textContent).catch(e=>null)
22 const uniqueId = request.userData.id
23
24 console.log(users, averageRating, ratingCount, version, updated, email, website)
25
26 await Apify.pushData({
27 users: users?parseInt(users.replace('UserDownloads:','').replace(/,/g,'')) : null,
28 averageRating:averageRating? parseFloat(parseFloat(averageRating).toFixed(2)): null,
29 ratingCount: ratingCount? parseInt (ratingCount) : null,
30 version,
31 updated,
32 email: email? email.replace('Email:','').trim() : null,
33 website,
34 uniqueId
35 })
36}
37
38const gotoFunction = async ({request, page}) => {
39 console.log('going to',request.url)
40 const start = Date.now()
41 await Promise.all([
42 page.goto(request.url, {waitUntil : 'domcontentloaded'}).then(res=>{if(res.status() === 404) throw new Error('404')}),
43 page.waitForResponse(res=>res.url().includes('https://chrome.google.com/webstore/ajax/item'), {timeout:15000})
44 ])
45 const end = Date.now()
46 console.log(`getting response took ${end - start} ms`)
47 return page
48}
49
50Apify.main(async () => {
51 // Get input of your actor
52 const input = await Apify.getValue('INPUT');
53 console.log('My input:');
54 console.dir(input);
55 if(!input.ids || !Array.isArray(input.ids)) throw new Error('ids needs to be an array in input!')
56
57 const sources = input.ids.map(id=>({url:`https://chrome.google.com/webstore/detail/${id}`, userData:{id}}))
58 const requestList = new Apify.RequestList({
59 sources,
60 persistStateKey: 'state'
61 })
62
63 await requestList.initialize()
64
65 const crawler = new Apify.PuppeteerCrawler({
66 launchPuppeteerOptions:{
67 //useApifyProxy: true
68 },
69 handlePageFunction,
70 requestList,
71 gotoFunction,
72 maxConcurrency: input.maxConcurrency || 1,
73 maxRequestRetries : 0
74 })
75 await crawler.run()
76
77 const url = 'url.com'
78 const dataset = await Apify.openDataset()
79 const jsonItems = await dataset.getData().then(res=>res.items)
80 console.log('number of items:', jsonItems.length)
81 await rp({
82 url,
83 method: 'POST',
84 json: true,
85 body:jsonItems
86 })
87 .then(()=>console.log('request sent sucessfully'))
88 .catch(e=>{console.log('request failed'); console.dir(e)})
89});
Developer
Maintained by Community
Categories