Chrome Web Store avatar

Chrome Web Store

Deprecated
View all Actors
This Actor is deprecated

This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?

See alternative Actors
Chrome Web Store

Chrome Web Store

defensivedepth/chrome-web-store

Scrape metadata about an extension from the Chrome Web Store

Dockerfile

1# This is a template for a Dockerfile used to run acts in Actor system.
2# The base image name below is set during the act build, based on user settings.
3# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/user
4FROM apify/actor-node-chrome:v0.21.10
5
6# Second, copy just package.json and package-lock.json since it should be
7# the only file that affects "npm install" in the next step, to speed up the build
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --only=prod --no-optional \
15 && echo "Installed NPM packages:" \
16 && (npm list --all || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version
21
22# Copy source code to container
23# Do this in the last step, to have fast build if only the source code changed
24COPY --chown=myuser:myuser . ./
25
26# NOTE: The CMD is already defined by the base image.
27# Uncomment this for local node inspector debugging:
28# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]

package.json

1{
2    "name": "apify-project",
3    "version": "0.0.1",
4    "description": "",
5    "author": "It's not you it's me",
6    "license": "ISC",
7    "dependencies": {
8        "apify": "0.21.10",
9        "request-promise": "latest"
10    },
11    "scripts": {
12        "start": "node main.js"
13    }
14}

main.js

1const Apify = require('apify');
2const rp = require('request-promise')
3
4const saveScreen = async (page, key) => {
5    const screen = await page.screenshot({fullPage: true})
6    await Apify.setValue(key, screen, {contentType: 'image/png'})
7    const content = await page.content()
8    await Apify.setValue(key+'.html', content, {contentType: 'text/html'})
9}
10
11const handlePageFunction = async ({request, page}) => {
12    //await page.waitForSelector('[itemprop="interactionCount"]')
13    //await saveScreen(page, 'screen')
14    
15    const users = await page.$eval('[itemprop="interactionCount"]', el=>el.content).catch(e=>null)
16    const averageRating = await page.$eval('[itemprop="ratingValue"]', el=>el.content).catch(e=>null)
17    const ratingCount = await page.$eval('[itemprop="ratingCount"]', el=>el.content).catch(e=>null)
18    const version = await page.$eval('[itemprop="version"]', el=>el.content).catch(e=>null)
19    const updated = await page.$eval('.C-b-p-D-J .C-b-p-D-Xe.h-C-b-p-D-xh-hh', el=>el.textContent).catch(e=>null)
20    const email = await page.$eval('.C-b-p-rc-D-J .C-b-p-rc-D-R', el=>el.textContent).catch(e=>null)
21    const website = await page.$eval('.e-f-y', el=>el.textContent).catch(e=>null)
22    const uniqueId = request.userData.id
23    
24    console.log(users, averageRating, ratingCount, version, updated, email, website)
25    
26    await Apify.pushData({
27        users: users?parseInt(users.replace('UserDownloads:','').replace(/,/g,'')) : null,
28        averageRating:averageRating? parseFloat(parseFloat(averageRating).toFixed(2)): null,
29        ratingCount: ratingCount? parseInt (ratingCount) : null,
30        version,
31        updated,
32        email: email? email.replace('Email:','').trim() : null,
33        website,
34        uniqueId
35    })
36}
37
38const gotoFunction = async ({request, page}) => {
39    console.log('going to',request.url)
40    const start = Date.now()
41    await Promise.all([
42        page.goto(request.url, {waitUntil : 'domcontentloaded'}).then(res=>{if(res.status() === 404) throw new Error('404')}),
43        page.waitForResponse(res=>res.url().includes('https://chrome.google.com/webstore/ajax/item'), {timeout:15000})
44    ])
45    const end = Date.now()
46    console.log(`getting response took ${end - start} ms`)
47    return page
48}
49
50Apify.main(async () => {
51    // Get input of your actor
52    const input = await Apify.getValue('INPUT');
53    console.log('My input:');
54    console.dir(input);
55    if(!input.ids || !Array.isArray(input.ids)) throw new Error('ids needs to be an array in input!')
56    
57    const sources = input.ids.map(id=>({url:`https://chrome.google.com/webstore/detail/${id}`, userData:{id}}))
58    const requestList = new Apify.RequestList({
59        sources,
60        persistStateKey: 'state'
61    })
62    
63    await requestList.initialize()
64    
65    const crawler = new Apify.PuppeteerCrawler({
66        launchPuppeteerOptions:{
67            //useApifyProxy: true
68        },
69        handlePageFunction,
70        requestList,
71        gotoFunction,
72        maxConcurrency: input.maxConcurrency || 1,
73        maxRequestRetries : 0
74    })
75    await crawler.run()
76    
77    const url = 'url.com'
78    const dataset = await Apify.openDataset()
79    const jsonItems = await dataset.getData().then(res=>res.items)
80    console.log('number of items:', jsonItems.length)
81    await rp({
82        url,
83        method: 'POST',
84        json: true,
85        body:jsonItems
86    })
87        .then(()=>console.log('request sent sucessfully'))
88        .catch(e=>{console.log('request failed'); console.dir(e)})
89});
Developer
Maintained by Community
Categories