Amazon Test Actor avatar
Amazon Test Actor
Deprecated

Pricing

Pay per usage

Go to Store
Amazon Test Actor

Amazon Test Actor

Deprecated
maksym-klnn/amazon-test-actor

Developed by

Maksym

Maintained by Community

0.0 (0)

Pricing

Pay per usage

0

Monthly users

1

Last modified

2 years ago

Dockerfile

1# This is a template for a Dockerfile used to run acts in Actor system.
2# The base image name below is set during the act build, based on user settings.
3# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/user
4FROM apify/actor-node-chrome:v0.21.10
5
6# Second, copy just package.json and package-lock.json since it should be
7# the only file that affects "npm install" in the next step, to speed up the build
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --only=prod --no-optional \
15 && echo "Installed NPM packages:" \
16 && (npm list --all || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version
21
22# Copy source code to container
23# Do this in the last step, to have fast build if only the source code changed
24COPY --chown=myuser:myuser . ./
25
26# NOTE: The CMD is already defined by the base image.
27# Uncomment this for local node inspector debugging:
28# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]

package.json

1{
2    "name": "apify-project",
3    "version": "0.0.1",
4    "description": "",
5    "author": "It's not you it's me",
6    "license": "ISC",
7    "dependencies": {
8        "apify": "0.21.10"
9    },
10    "scripts": {
11        "start": "node main.js"
12    }
13}

main.js

1const Apify = require('apify');
2
3Apify.main(async () => {
4    const keyword = (await Apify.getValue('INPUT')).keyword || 'ipad'
5    const amazonBaseUrl = 'https://www.amazon.com'
6    const firstPageUrl = 'https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=' + keyword
7    const offerListBaseUrl = 'https://www.amazon.com/gp/offer-listing/'
8
9    const requestQueue = await Apify.openRequestQueue();
10
11    await requestQueue.addRequest(new Apify.Request({
12        url: firstPageUrl,
13        userData: {
14            label: "START"
15        }
16    }));
17
18    const dataset = await Apify.openDataset(keyword)
19    const {itemCount} = await dataset.getInfo()
20    if (itemCount) await dataset.delete()
21
22
23    const crawler = new Apify.PuppeteerCrawler({
24        requestQueue,
25        launchPuppeteerOptions: {headless: true},
26        handlePageFunction: async ({page, request}) => {
27            if (request.userData.label === "START") {
28                const searchResult = await page.$('#s-results-list-atf.s-result-list');
29                let res = await searchResult.$$eval('li.s-result-item', elems => {
30                    return elems.map(el => {
31                        const asin = el.getAttribute('data-asin')
32                        const link = el.querySelector('.s-access-detail-page');
33                        let itemUrl = link.getAttribute('href');
34                        const title = link.querySelector('h2').getAttribute('data-attribute');
35                        return {
36                            asin,
37                            itemUrl,
38                            title
39                        }
40                    })
41                })
42                res = res.map(item => {
43                    const urlArr = item.itemUrl.split(/&url=/);
44                    if (urlArr && urlArr.length === 2) {
45                        item.itemUrl = decodeURIComponent(urlArr[1]);
46                    }
47                    item.keyword = keyword
48                    return item;
49                })
50
51                res.forEach(async item => {
52                    await requestQueue.addRequest(new Apify.Request({
53                        url: item.itemUrl,
54                        userData: {
55                            label: "ITEM",
56                            data: item
57                        }
58                    }));
59                })
60
61            } else if (request.userData.label === "ITEM") {
62                const description = await page.evaluate(sel => {
63                    const p = document.querySelector(sel)
64                    return (p ? p.innerText : null)
65                }, 'div#productDescription p');
66                await requestQueue.addRequest(new Apify.Request({
67                    url: offerListBaseUrl + request.userData.data.asin,
68                    userData: {
69                        label: "OFFER_LIST",
70                        data: {
71                            ...request.userData.data,
72                            description
73                        }
74                    }
75                }));
76            } else if (request.userData.label === "OFFER_LIST") {
77                const offerList = await page.$('div#olpOfferList');
78                const offers = await offerList.$$eval('.olpOffer', offers => {
79                    return offers.map(offer => {
80                        const offerPrice = offer.querySelector('.olpOfferPrice ');
81                        const offerShipping = offer.querySelector('.olpShippingInfo') || 'free';
82                        const offerSeller = offer.querySelector('.olpSellerName');
83                        
84                        const price = offerPrice ? offerPrice.innerText : null;
85                        const shipping = offerShipping ? offerShipping.innerText : 'free';
86                        const seller = offerSeller ? offerSeller.innerText : null;
87                        return {
88                            price,
89                            shipping,
90                            seller
91                        }
92                    })
93                })
94
95                const prevOffers = request.userData.data.offers || []
96
97                const nextPage = await page.evaluate(selector => {
98                    const a = document.querySelector(selector)
99                    return (a ? a.getAttribute('href') : null)
100                }, 'div#olpOfferListColumn .a-pagination .a-last a')
101
102                if (nextPage) {
103                    await requestQueue.addRequest({
104                        url: amazonBaseUrl + nextPage,
105                        userData: {
106                            label: 'OFFER_LIST',
107                            data: {
108                                ...request.userData.data,
109                                offers: prevOffers.concat(offers)
110                            }
111                        }
112                    })
113                } else {
114                    const resultObj = {
115                        ...request.userData.data,
116                        offers: prevOffers.concat(offers)
117                    }
118                    await dataset.pushData(resultObj)
119                }
120
121
122            }
123        },
124        handleFailedRequestFunction: async ({request, error}) => {
125            request.pushErrorMessage(error)
126        },
127    })
128
129
130    await crawler.run()
131});

Pricing

Pricing model

Pay per usage

This Actor is paid per platform usage. The Actor is free to use, and you only pay for the Apify platform usage.