Amazon Test Actor avatar
Amazon Test Actor
Deprecated
View all Actors
This Actor is deprecated

This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?

See alternative Actors
Amazon Test Actor

Amazon Test Actor

maksym-klnn/amazon-test-actor

Dockerfile

1# This is a template for a Dockerfile used to run acts in Actor system.
2# The base image name below is set during the act build, based on user settings.
3# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/user
4FROM apify/actor-node-chrome:v0.21.10
5
6# Second, copy just package.json and package-lock.json since it should be
7# the only file that affects "npm install" in the next step, to speed up the build
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --only=prod --no-optional \
15 && echo "Installed NPM packages:" \
16 && (npm list --all || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version
21
22# Copy source code to container
23# Do this in the last step, to have fast build if only the source code changed
24COPY --chown=myuser:myuser . ./
25
26# NOTE: The CMD is already defined by the base image.
27# Uncomment this for local node inspector debugging:
28# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]

package.json

1{
2    "name": "apify-project",
3    "version": "0.0.1",
4    "description": "",
5    "author": "It's not you it's me",
6    "license": "ISC",
7    "dependencies": {
8        "apify": "0.21.10"
9    },
10    "scripts": {
11        "start": "node main.js"
12    }
13}

main.js

1const Apify = require('apify');
2
3Apify.main(async () => {
4    const keyword = (await Apify.getValue('INPUT')).keyword || 'ipad'
5    const amazonBaseUrl = 'https://www.amazon.com'
6    const firstPageUrl = 'https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=' + keyword
7    const offerListBaseUrl = 'https://www.amazon.com/gp/offer-listing/'
8
9    const requestQueue = await Apify.openRequestQueue();
10
11    await requestQueue.addRequest(new Apify.Request({
12        url: firstPageUrl,
13        userData: {
14            label: "START"
15        }
16    }));
17
18    const dataset = await Apify.openDataset(keyword)
19    const {itemCount} = await dataset.getInfo()
20    if (itemCount) await dataset.delete()
21
22
23    const crawler = new Apify.PuppeteerCrawler({
24        requestQueue,
25        launchPuppeteerOptions: {headless: true},
26        handlePageFunction: async ({page, request}) => {
27            if (request.userData.label === "START") {
28                const searchResult = await page.$('#s-results-list-atf.s-result-list');
29                let res = await searchResult.$$eval('li.s-result-item', elems => {
30                    return elems.map(el => {
31                        const asin = el.getAttribute('data-asin')
32                        const link = el.querySelector('.s-access-detail-page');
33                        let itemUrl = link.getAttribute('href');
34                        const title = link.querySelector('h2').getAttribute('data-attribute');
35                        return {
36                            asin,
37                            itemUrl,
38                            title
39                        }
40                    })
41                })
42                res = res.map(item => {
43                    const urlArr = item.itemUrl.split(/&url=/);
44                    if (urlArr && urlArr.length === 2) {
45                        item.itemUrl = decodeURIComponent(urlArr[1]);
46                    }
47                    item.keyword = keyword
48                    return item;
49                })
50
51                res.forEach(async item => {
52                    await requestQueue.addRequest(new Apify.Request({
53                        url: item.itemUrl,
54                        userData: {
55                            label: "ITEM",
56                            data: item
57                        }
58                    }));
59                })
60
61            } else if (request.userData.label === "ITEM") {
62                const description = await page.evaluate(sel => {
63                    const p = document.querySelector(sel)
64                    return (p ? p.innerText : null)
65                }, 'div#productDescription p');
66                await requestQueue.addRequest(new Apify.Request({
67                    url: offerListBaseUrl + request.userData.data.asin,
68                    userData: {
69                        label: "OFFER_LIST",
70                        data: {
71                            ...request.userData.data,
72                            description
73                        }
74                    }
75                }));
76            } else if (request.userData.label === "OFFER_LIST") {
77                const offerList = await page.$('div#olpOfferList');
78                const offers = await offerList.$$eval('.olpOffer', offers => {
79                    return offers.map(offer => {
80                        const offerPrice = offer.querySelector('.olpOfferPrice ');
81                        const offerShipping = offer.querySelector('.olpShippingInfo') || 'free';
82                        const offerSeller = offer.querySelector('.olpSellerName');
83                        
84                        const price = offerPrice ? offerPrice.innerText : null;
85                        const shipping = offerShipping ? offerShipping.innerText : 'free';
86                        const seller = offerSeller ? offerSeller.innerText : null;
87                        return {
88                            price,
89                            shipping,
90                            seller
91                        }
92                    })
93                })
94
95                const prevOffers = request.userData.data.offers || []
96
97                const nextPage = await page.evaluate(selector => {
98                    const a = document.querySelector(selector)
99                    return (a ? a.getAttribute('href') : null)
100                }, 'div#olpOfferListColumn .a-pagination .a-last a')
101
102                if (nextPage) {
103                    await requestQueue.addRequest({
104                        url: amazonBaseUrl + nextPage,
105                        userData: {
106                            label: 'OFFER_LIST',
107                            data: {
108                                ...request.userData.data,
109                                offers: prevOffers.concat(offers)
110                            }
111                        }
112                    })
113                } else {
114                    const resultObj = {
115                        ...request.userData.data,
116                        offers: prevOffers.concat(offers)
117                    }
118                    await dataset.pushData(resultObj)
119                }
120
121
122            }
123        },
124        handleFailedRequestFunction: async ({request, error}) => {
125            request.pushErrorMessage(error)
126        },
127    })
128
129
130    await crawler.run()
131});
Developer
Maintained by Community