Chrome Web Store
Pricing
Pay per usage
Go to Store
Chrome Web Store
DeprecatedScrape metadata about an extension from the Chrome Web Store
0.0 (0)
Pricing
Pay per usage
1
54
2
Last modified
2 years ago
Dockerfile
# This is a template for a Dockerfile used to run acts in Actor system.# The base image name below is set during the act build, based on user settings.# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/userFROM apify/actor-node-chrome:v0.21.10
# Second, copy just package.json and package-lock.json since it should be# the only file that affects "npm install" in the next step, to speed up the buildCOPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --only=prod --no-optional \ && echo "Installed NPM packages:" \ && (npm list --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version
# Copy source code to container# Do this in the last step, to have fast build if only the source code changedCOPY . ./
# NOTE: The CMD is already defined by the base image.# Uncomment this for local node inspector debugging:# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]
package.json
{ "name": "apify-project", "version": "0.0.1", "description": "", "author": "It's not you it's me", "license": "ISC", "dependencies": { "apify": "0.21.10", "request-promise": "latest" }, "scripts": { "start": "node main.js" }}
main.js
1const Apify = require('apify');2const rp = require('request-promise')3
4const saveScreen = async (page, key) => {5 const screen = await page.screenshot({fullPage: true})6 await Apify.setValue(key, screen, {contentType: 'image/png'})7 const content = await page.content()8 await Apify.setValue(key+'.html', content, {contentType: 'text/html'})9}10
11const handlePageFunction = async ({request, page}) => {12 //await page.waitForSelector('[itemprop="interactionCount"]')13 //await saveScreen(page, 'screen')14 15 const users = await page.$eval('[itemprop="interactionCount"]', el=>el.content).catch(e=>null)16 const averageRating = await page.$eval('[itemprop="ratingValue"]', el=>el.content).catch(e=>null)17 const ratingCount = await page.$eval('[itemprop="ratingCount"]', el=>el.content).catch(e=>null)18 const version = await page.$eval('[itemprop="version"]', el=>el.content).catch(e=>null)19 const updated = await page.$eval('.C-b-p-D-J .C-b-p-D-Xe.h-C-b-p-D-xh-hh', el=>el.textContent).catch(e=>null)20 const email = await page.$eval('.C-b-p-rc-D-J .C-b-p-rc-D-R', el=>el.textContent).catch(e=>null)21 const website = await page.$eval('.e-f-y', el=>el.textContent).catch(e=>null)22 const uniqueId = request.userData.id23 24 console.log(users, averageRating, ratingCount, version, updated, email, website)25 26 await Apify.pushData({27 users: users?parseInt(users.replace('UserDownloads:','').replace(/,/g,'')) : null,28 averageRating:averageRating? parseFloat(parseFloat(averageRating).toFixed(2)): null,29 ratingCount: ratingCount? parseInt (ratingCount) : null,30 version,31 updated,32 email: email? email.replace('Email:','').trim() : null,33 website,34 uniqueId35 })36}37
38const gotoFunction = async ({request, page}) => {39 console.log('going to',request.url)40 const start = Date.now()41 await Promise.all([42 page.goto(request.url, {waitUntil : 'domcontentloaded'}).then(res=>{if(res.status() === 404) throw new Error('404')}),43 page.waitForResponse(res=>res.url().includes('https://chrome.google.com/webstore/ajax/item'), {timeout:15000})44 ])45 const end = Date.now()46 console.log(`getting response took ${end - start} ms`)47 return page48}49
50Apify.main(async () => {51 // Get input of your actor52 const input = await Apify.getValue('INPUT');53 console.log('My input:');54 console.dir(input);55 if(!input.ids || !Array.isArray(input.ids)) throw new Error('ids needs to be an array in input!')56 57 const sources = input.ids.map(id=>({url:`https://chrome.google.com/webstore/detail/${id}`, userData:{id}}))58 const requestList = new Apify.RequestList({59 sources,60 persistStateKey: 'state'61 })62 63 await requestList.initialize()64 65 const crawler = new Apify.PuppeteerCrawler({66 launchPuppeteerOptions:{67 //useApifyProxy: true68 },69 handlePageFunction,70 requestList,71 gotoFunction,72 maxConcurrency: input.maxConcurrency || 1,73 maxRequestRetries : 074 })75 await crawler.run()76 77 const url = 'url.com'78 const dataset = await Apify.openDataset()79 const jsonItems = await dataset.getData().then(res=>res.items)80 console.log('number of items:', jsonItems.length)81 await rp({82 url,83 method: 'POST',84 json: true,85 body:jsonItems86 })87 .then(()=>console.log('request sent sucessfully'))88 .catch(e=>{console.log('request failed'); console.dir(e)})89});