Amazon Test Actor
Go to Store
This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?
See alternative ActorsAmazon Test Actor
maksym-klnn/amazon-test-actor
Dockerfile
1# This is a template for a Dockerfile used to run acts in Actor system.
2# The base image name below is set during the act build, based on user settings.
3# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/user
4FROM apify/actor-node-chrome:v0.21.10
5
6# Second, copy just package.json and package-lock.json since it should be
7# the only file that affects "npm install" in the next step, to speed up the build
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --only=prod --no-optional \
15 && echo "Installed NPM packages:" \
16 && (npm list --all || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version
21
22# Copy source code to container
23# Do this in the last step, to have fast build if only the source code changed
24COPY . ./
25
26# NOTE: The CMD is already defined by the base image.
27# Uncomment this for local node inspector debugging:
28# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]
package.json
1{
2 "name": "apify-project",
3 "version": "0.0.1",
4 "description": "",
5 "author": "It's not you it's me",
6 "license": "ISC",
7 "dependencies": {
8 "apify": "0.21.10"
9 },
10 "scripts": {
11 "start": "node main.js"
12 }
13}
main.js
1const Apify = require('apify');
2
3Apify.main(async () => {
4 const keyword = (await Apify.getValue('INPUT')).keyword || 'ipad'
5 const amazonBaseUrl = 'https://www.amazon.com'
6 const firstPageUrl = 'https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=' + keyword
7 const offerListBaseUrl = 'https://www.amazon.com/gp/offer-listing/'
8
9 const requestQueue = await Apify.openRequestQueue();
10
11 await requestQueue.addRequest(new Apify.Request({
12 url: firstPageUrl,
13 userData: {
14 label: "START"
15 }
16 }));
17
18 const dataset = await Apify.openDataset(keyword)
19 const {itemCount} = await dataset.getInfo()
20 if (itemCount) await dataset.delete()
21
22
23 const crawler = new Apify.PuppeteerCrawler({
24 requestQueue,
25 launchPuppeteerOptions: {headless: true},
26 handlePageFunction: async ({page, request}) => {
27 if (request.userData.label === "START") {
28 const searchResult = await page.$('#s-results-list-atf.s-result-list');
29 let res = await searchResult.$$eval('li.s-result-item', elems => {
30 return elems.map(el => {
31 const asin = el.getAttribute('data-asin')
32 const link = el.querySelector('.s-access-detail-page');
33 let itemUrl = link.getAttribute('href');
34 const title = link.querySelector('h2').getAttribute('data-attribute');
35 return {
36 asin,
37 itemUrl,
38 title
39 }
40 })
41 })
42 res = res.map(item => {
43 const urlArr = item.itemUrl.split(/&url=/);
44 if (urlArr && urlArr.length === 2) {
45 item.itemUrl = decodeURIComponent(urlArr[1]);
46 }
47 item.keyword = keyword
48 return item;
49 })
50
51 res.forEach(async item => {
52 await requestQueue.addRequest(new Apify.Request({
53 url: item.itemUrl,
54 userData: {
55 label: "ITEM",
56 data: item
57 }
58 }));
59 })
60
61 } else if (request.userData.label === "ITEM") {
62 const description = await page.evaluate(sel => {
63 const p = document.querySelector(sel)
64 return (p ? p.innerText : null)
65 }, 'div#productDescription p');
66 await requestQueue.addRequest(new Apify.Request({
67 url: offerListBaseUrl + request.userData.data.asin,
68 userData: {
69 label: "OFFER_LIST",
70 data: {
71 ...request.userData.data,
72 description
73 }
74 }
75 }));
76 } else if (request.userData.label === "OFFER_LIST") {
77 const offerList = await page.$('div#olpOfferList');
78 const offers = await offerList.$$eval('.olpOffer', offers => {
79 return offers.map(offer => {
80 const offerPrice = offer.querySelector('.olpOfferPrice ');
81 const offerShipping = offer.querySelector('.olpShippingInfo') || 'free';
82 const offerSeller = offer.querySelector('.olpSellerName');
83
84 const price = offerPrice ? offerPrice.innerText : null;
85 const shipping = offerShipping ? offerShipping.innerText : 'free';
86 const seller = offerSeller ? offerSeller.innerText : null;
87 return {
88 price,
89 shipping,
90 seller
91 }
92 })
93 })
94
95 const prevOffers = request.userData.data.offers || []
96
97 const nextPage = await page.evaluate(selector => {
98 const a = document.querySelector(selector)
99 return (a ? a.getAttribute('href') : null)
100 }, 'div#olpOfferListColumn .a-pagination .a-last a')
101
102 if (nextPage) {
103 await requestQueue.addRequest({
104 url: amazonBaseUrl + nextPage,
105 userData: {
106 label: 'OFFER_LIST',
107 data: {
108 ...request.userData.data,
109 offers: prevOffers.concat(offers)
110 }
111 }
112 })
113 } else {
114 const resultObj = {
115 ...request.userData.data,
116 offers: prevOffers.concat(offers)
117 }
118 await dataset.pushData(resultObj)
119 }
120
121
122 }
123 },
124 handleFailedRequestFunction: async ({request, error}) => {
125 request.pushErrorMessage(error)
126 },
127 })
128
129
130 await crawler.run()
131});
Developer
Maintained by Community
Categories