Actor picture

Chrome Web Store

defensivedepth/chrome-web-store

Scrape metadata about an extension from the Chrome Web Store

No credit card required

Author's avatar
  • Modified
  • Users30
  • Runs136

Based on the apify/actor-node-chrome:v0.21.10 Docker image (see docs).

const Apify = require('apify');
const rp = require('request-promise')

const saveScreen = async (page, key) => {
    const screen = await page.screenshot({fullPage: true})
    await Apify.setValue(key, screen, {contentType: 'image/png'})
    const content = await page.content()
    await Apify.setValue(key+'.html', content, {contentType: 'text/html'})
}

const handlePageFunction = async ({request, page}) => {
    //await page.waitForSelector('[itemprop="interactionCount"]')
    //await saveScreen(page, 'screen')
    
    const users = await page.$eval('[itemprop="interactionCount"]', el=>el.content).catch(e=>null)
    const averageRating = await page.$eval('[itemprop="ratingValue"]', el=>el.content).catch(e=>null)
    const ratingCount = await page.$eval('[itemprop="ratingCount"]', el=>el.content).catch(e=>null)
    const version = await page.$eval('[itemprop="version"]', el=>el.content).catch(e=>null)
    const updated = await page.$eval('.C-b-p-D-J .C-b-p-D-Xe.h-C-b-p-D-xh-hh', el=>el.textContent).catch(e=>null)
    const email = await page.$eval('.C-b-p-rc-D-J .C-b-p-rc-D-R', el=>el.textContent).catch(e=>null)
    const website = await page.$eval('.e-f-y', el=>el.textContent).catch(e=>null)
    const uniqueId = request.userData.id
    
    console.log(users, averageRating, ratingCount, version, updated, email, website)
    
    await Apify.pushData({
        users: users?parseInt(users.replace('UserDownloads:','').replace(/,/g,'')) : null,
        averageRating:averageRating? parseFloat(parseFloat(averageRating).toFixed(2)): null,
        ratingCount: ratingCount? parseInt (ratingCount) : null,
        version,
        updated,
        email: email? email.replace('Email:','').trim() : null,
        website,
        uniqueId
    })
}

const gotoFunction = async ({request, page}) => {
    console.log('going to',request.url)
    const start = Date.now()
    await Promise.all([
        page.goto(request.url, {waitUntil : 'domcontentloaded'}).then(res=>{if(res.status() === 404) throw new Error('404')}),
        page.waitForResponse(res=>res.url().includes('https://chrome.google.com/webstore/ajax/item'), {timeout:15000})
    ])
    const end = Date.now()
    console.log(`getting response took ${end - start} ms`)
    return page
}

Apify.main(async () => {
    // Get input of your actor
    const input = await Apify.getValue('INPUT');
    console.log('My input:');
    console.dir(input);
    if(!input.ids || !Array.isArray(input.ids)) throw new Error('ids needs to be an array in input!')
    
    const sources = input.ids.map(id=>({url:`https://chrome.google.com/webstore/detail/${id}`, userData:{id}}))
    const requestList = new Apify.RequestList({
        sources,
        persistStateKey: 'state'
    })
    
    await requestList.initialize()
    
    const crawler = new Apify.PuppeteerCrawler({
        launchPuppeteerOptions:{
            //useApifyProxy: true
        },
        handlePageFunction,
        requestList,
        gotoFunction,
        maxConcurrency: input.maxConcurrency || 1,
        maxRequestRetries : 0
    })
    await crawler.run()
    
    const url = 'url.com'
    const dataset = await Apify.openDataset()
    const jsonItems = await dataset.getData().then(res=>res.items)
    console.log('number of items:', jsonItems.length)
    await rp({
        url,
        method: 'POST',
        json: true,
        body:jsonItems
    })
        .then(()=>console.log('request sent sucessfully'))
        .catch(e=>{console.log('request failed'); console.dir(e)})
});