Amazon Test Actor avatar
Amazon Test Actor

Deprecated

Pricing

Pay per usage

Go to Store
Amazon Test Actor

Amazon Test Actor

Deprecated

Developed by

Maksym

Maksym

Maintained by Community

0.0 (0)

Pricing

Pay per usage

0

Total users

14

Monthly users

1

Last modified

2 years ago

Dockerfile

# This is a template for a Dockerfile used to run acts in Actor system.
# The base image name below is set during the act build, based on user settings.
# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/user
FROM apify/actor-node-chrome:v0.21.10
# Second, copy just package.json and package-lock.json since it should be
# the only file that affects "npm install" in the next step, to speed up the build
COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
&& npm install --only=prod --no-optional \
&& echo "Installed NPM packages:" \
&& (npm list --all || true) \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version
# Copy source code to container
# Do this in the last step, to have fast build if only the source code changed
COPY --chown=myuser:myuser . ./
# NOTE: The CMD is already defined by the base image.
# Uncomment this for local node inspector debugging:
# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]

package.json

{
"name": "apify-project",
"version": "0.0.1",
"description": "",
"author": "It's not you it's me",
"license": "ISC",
"dependencies": {
"apify": "0.21.10"
},
"scripts": {
"start": "node main.js"
}
}

main.js

1const Apify = require('apify');
2
3Apify.main(async () => {
4 const keyword = (await Apify.getValue('INPUT')).keyword || 'ipad'
5 const amazonBaseUrl = 'https://www.amazon.com'
6 const firstPageUrl = 'https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=' + keyword
7 const offerListBaseUrl = 'https://www.amazon.com/gp/offer-listing/'
8
9 const requestQueue = await Apify.openRequestQueue();
10
11 await requestQueue.addRequest(new Apify.Request({
12 url: firstPageUrl,
13 userData: {
14 label: "START"
15 }
16 }));
17
18 const dataset = await Apify.openDataset(keyword)
19 const {itemCount} = await dataset.getInfo()
20 if (itemCount) await dataset.delete()
21
22
23 const crawler = new Apify.PuppeteerCrawler({
24 requestQueue,
25 launchPuppeteerOptions: {headless: true},
26 handlePageFunction: async ({page, request}) => {
27 if (request.userData.label === "START") {
28 const searchResult = await page.$('#s-results-list-atf.s-result-list');
29 let res = await searchResult.$$eval('li.s-result-item', elems => {
30 return elems.map(el => {
31 const asin = el.getAttribute('data-asin')
32 const link = el.querySelector('.s-access-detail-page');
33 let itemUrl = link.getAttribute('href');
34 const title = link.querySelector('h2').getAttribute('data-attribute');
35 return {
36 asin,
37 itemUrl,
38 title
39 }
40 })
41 })
42 res = res.map(item => {
43 const urlArr = item.itemUrl.split(/&url=/);
44 if (urlArr && urlArr.length === 2) {
45 item.itemUrl = decodeURIComponent(urlArr[1]);
46 }
47 item.keyword = keyword
48 return item;
49 })
50
51 res.forEach(async item => {
52 await requestQueue.addRequest(new Apify.Request({
53 url: item.itemUrl,
54 userData: {
55 label: "ITEM",
56 data: item
57 }
58 }));
59 })
60
61 } else if (request.userData.label === "ITEM") {
62 const description = await page.evaluate(sel => {
63 const p = document.querySelector(sel)
64 return (p ? p.innerText : null)
65 }, 'div#productDescription p');
66 await requestQueue.addRequest(new Apify.Request({
67 url: offerListBaseUrl + request.userData.data.asin,
68 userData: {
69 label: "OFFER_LIST",
70 data: {
71 ...request.userData.data,
72 description
73 }
74 }
75 }));
76 } else if (request.userData.label === "OFFER_LIST") {
77 const offerList = await page.$('div#olpOfferList');
78 const offers = await offerList.$$eval('.olpOffer', offers => {
79 return offers.map(offer => {
80 const offerPrice = offer.querySelector('.olpOfferPrice ');
81 const offerShipping = offer.querySelector('.olpShippingInfo') || 'free';
82 const offerSeller = offer.querySelector('.olpSellerName');
83
84 const price = offerPrice ? offerPrice.innerText : null;
85 const shipping = offerShipping ? offerShipping.innerText : 'free';
86 const seller = offerSeller ? offerSeller.innerText : null;
87 return {
88 price,
89 shipping,
90 seller
91 }
92 })
93 })
94
95 const prevOffers = request.userData.data.offers || []
96
97 const nextPage = await page.evaluate(selector => {
98 const a = document.querySelector(selector)
99 return (a ? a.getAttribute('href') : null)
100 }, 'div#olpOfferListColumn .a-pagination .a-last a')
101
102 if (nextPage) {
103 await requestQueue.addRequest({
104 url: amazonBaseUrl + nextPage,
105 userData: {
106 label: 'OFFER_LIST',
107 data: {
108 ...request.userData.data,
109 offers: prevOffers.concat(offers)
110 }
111 }
112 })
113 } else {
114 const resultObj = {
115 ...request.userData.data,
116 offers: prevOffers.concat(offers)
117 }
118 await dataset.pushData(resultObj)
119 }
120
121
122 }
123 },
124 handleFailedRequestFunction: async ({request, error}) => {
125 request.pushErrorMessage(error)
126 },
127 })
128
129
130 await crawler.run()
131});