Steam Scraper avatar

Steam Scraper

Deprecated
View all Actors
This Actor is deprecated

This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?

See alternative Actors
Steam Scraper

Steam Scraper

sergeylukin/steam-puppeteer

Fetch all purchased games from Steam profile page

Dockerfile

1# This is a template for a Dockerfile used to run acts in Actor system.
2# The base image name below is set during the act build, based on user settings.
3# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/user
4FROM apify/actor-node-puppeteer-chrome:15-10.1.0
5
6# Second, copy just package.json and package-lock.json since it should be
7# the only file that affects "npm install" in the next step, to speed up the build
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --only=prod --no-optional \
15 && echo "Installed NPM packages:" \
16 && (npm list --all || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version
21
22# Copy source code to container
23# Do this in the last step, to have fast build if only the source code changed
24COPY --chown=myuser:myuser . ./
25
26# NOTE: The CMD is already defined by the base image.
27# Uncomment this for local node inspector debugging:
28# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]

main.js

1const Apify = require('apify');
2
3Apify.main(async () => {
4    const input = await Apify.getValue('INPUT');
5    
6    if (!input || !input.url) throw new Error('Invalid input, must be a JSON object with the "url" field!');
7    
8    console.log('Launching Puppeteer...');
9    const browser = await Apify.launchPuppeteer();
10    
11    console.log(`Opening URL: ${input.url}`);  
12    let page = await browser.newPage();
13    await page.goto(input.url);
14    
15    function imagesHaveLoaded() {
16        return Array.from(document.images).every((i) => i.complete);
17    }
18    
19    await page.waitForFunction(imagesHaveLoaded, { timeout: 5000 });
20    
21    const gamesList = await page.evaluate(() => {
22        
23        let games = {},
24            id;
25            
26        const game_elements = document.querySelectorAll("#games_list_row_container > #games_list_rows > .gameListRow")
27        for (const game_element of game_elements) {
28            //if (counter > 2) break
29            
30            id = game_element.id.match(/\d+/)[0]
31            
32            games[id] = {
33                id,
34                name: game_element.querySelector('.gameListRowItem .gameListRowItemName.ellipsis').innerHTML,
35                play_hours: parseFloat(game_element.querySelector('.gameListRowItem .hours_played').innerHTML),
36                page_url: game_element.querySelector('.gameListRowLogo a').getAttribute('href'),
37                logo: game_element.querySelector('.gameListRowLogo img').getAttribute('src'),
38            }
39            
40            //item.name = game_element.querySelector('.gameListRowItem .gameListRowItemName.ellipsis').innerHTML
41            
42            //game_link_element = game_element.querySelector('.gameListRowLogo a');
43            //await game_element.click();
44            //await new Promise((resolve) => setTimeout(resolve, 2000));
45            //item.score = document.querySelector('.user_reviews_summary_row').getAttribute("data-tooltip-text");
46            //item.release = document.querySelector('.release_date .date').innerHTML
47            //console.log('Release: ' + item.release)
48        }
49        
50        /*jQuery("div[id='games_list_row_container'] > div[id='games_list_rows'] > div").each(async function(index) {
51            if (index > 2) return;
52            
53            var name = jQuery(this).find('.gameListRowItem .gameListRowItemName.ellipsis').text(),
54                play_hours = parseFloat(jQuery(this).find('.gameListRowItem .hours_played').text()),
55                page_url = jQuery(this).find('.gameListRowLogo a').attr('href'),
56                logo = jQuery(this).find('.gameListRowLogo a img').attr('src'),
57                item = {
58                    name,
59                    play_hours,
60                    page_url,
61                    logo,
62                };
63                
64            const link_element = this.querySelector('.gameListRowLogo a');
65            link_element.click();
66            await new Promise((resolve) => setTimeout(resolve, 2000));
67            item.score = document.querySelector('.user_reviews_summary_row').getAttribute("data-tooltip-text");
68            
69            games.push(item);
70        });
71        */
72        
73        return games
74    });
75    
76    /*for (let i = 0; i < parsedData.length; i ++) {
77        let game = parsedData[i]
78        if (i > 2) return false;
79        console.log("Entering " + game.name + "...")
80        await page.goto(game.page_url, { waitUntil: "load" });
81        let additionalDetails = await page.evaluate(() => {
82           return {
83               score: jQuery('.user_reviews_summary_row').data('tooltip-text'),
84               release: jQuery('.release_date .date').text(),
85               platform_mac: jQuery('.game_area_purchase_platform .mac').length,
86               platform_win: jQuery('.game_area_purchase_platform .win').length,
87           }
88        });
89        
90        console.log(additionalDetails)
91        
92        parsedData[i].score = additionalDetails.score
93        parsedData[i].release = additionalDetails.release
94        parsedData[i].platform_mac = additionalDetails.platform_mac
95        parsedData[i].platform_win = additionalDetails.platform_win
96        
97    }*/
98    
99    //console.log(gamesList)
100    
101    for (let game_id in gamesList) {
102        if (!gamesList.hasOwnProperty(game_id)) continue;
103        let game = gamesList[game_id]
104        
105        //console.log('id: ' + game.id + ', name: ' + game.name + ', play_hours: ' + game.play_hours + ', page_url: ' + game.page_url + ', logo: ' + game.logo)
106    }
107    
108    
109    await Apify.setValue('OUTPUT', gamesList);
110    
111    console.log('Closing Puppeteer...');
112    await browser.close();
113    
114    console.log('Done.');
115});

package.json

1{
2    "name": "apify-project",
3    "version": "0.0.1",
4    "description": "",
5    "author": "It's not you it's me",
6    "license": "ISC",
7    "dependencies": {
8        "apify": "1.3.4",
9        "puppeteer": "10.1.0"
10    },
11    "scripts": {
12        "start": "node main.js"
13    }
14}
Developer
Maintained by Community
Categories