1import Apify from 'apify'
2
3Apify.main(async () => {
4 const keyword = 'phone'
5
6 const StartUrl = 'https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords='
7
8 const StartProductUrl = 'https://www.amazon.com/dp/'
9
10 const StartOfferUrl = 'https://www.amazon.com/gp/offer-listing/'
11
12 const requestQueue = await Apify.openRequestQueue();
13
14 await requestQueue.addRequest({
15 url: StartUrl + keyword,
16 userData: {
17 label: 'start',
18 keyword,
19 },
20
21 });
22
23 const crawler = new Apify.PuppeteerCrawler({
24
25 requestQueue,
26 handlePageFunction: async ({ page, request }) => {
27 if (request.userData.label === 'start') {
28 console.log('Start page is:', request.url)
29
30 try {
31 await page.waitForSelector('.s-result-list', { timeout: 10000 })
32
33 const asins = await page.$$eval('.s-result-item', async (items) => {
34 return items.map((item) => item.dataset.asin).filter((item) => item !== '' && item !== undefined)
35 });
36
37 const items = asins.map((asin) => {
38 const productUrl = StartProductUrl + asin
39 const sellerUrl = StartOfferUrl + asin
40 return { asin, productUrl, sellerUrl, keyword: request.userData.keyword }
41 });
42
43 for (const item of items) {
44 await requestQueue.addRequest({
45 url: item.productUrl,
46 userData: {
47 label: 'product',
48 asin: item.asin,
49 keyword: item.keyword,
50 productUrl: item.productUrl,
51 sellerUrl: item.sellerUrl,
52 },
53 });
54 }
55 } catch (err) {
56 console.log(err);
57 await dataset.pushData({
58 url: request.url,
59 status: 'No results',
60 });
61 }
62
63 console.log('Keep crawling.')
64 } else if (request.userData.label === 'product') {
65 console.log(`Go to product page: ${request.url}`)
66
67 const PageInfo = await page.evaluate(() => {
68 const titleEl = document.getElementById('productTitle')
69
70 if (!titleEl) {
71 return false
72 }
73
74 const title = titleEl.innerText
75 const url = document.URL
76 const description = document.getElementById('productDescription') ? document.getElementById('productDescription').innerText : 'No description.'
77
78 return { title, url, description }
79 });
80
81 const { asin, keyword, productUrl } = request.userData
82
83 await requestQueue.addRequest({
84 url: request.userData.sellerUrl,
85 userData: {
86 label: 'seller',
87 asin,
88 keyword,
89 productUrl,
90 title: PageInfo.title,
91 description: PageInfo.description,
92 },
93 });
94
95 console.log(`End with ${request.url}`)
96 } else if (request.userData.label === 'seller') {
97 console.log(`Go to seller page: ${request.url}`)
98
99 const offers = await page.evaluate(() => {
100 const price = document.querySelector('span.a-price > span') ? document.querySelector('span.a-price > span').innerText : 'no Price'
101 const seller = document.querySelector('#sellerProfileTriggerId') ? document.querySelector('#sellerProfileTriggerId').innerText : 'Amazon'
102
103 return { price, seller }
104 });
105
106 const { asin, keyword, productUrl, title, description } = request.userData
107
108 const item = {
109 title,
110 itemUrl: productUrl,
111 description,
112 keyword,
113 asin,
114 price: offers.price,
115 seller: offers.seller,
116 };
117
118 await Apify.pushData(item)
119 }
120 },
121 handleFailedRequestFunction: async ({ request }) => {
122 console.log(`Request ${request.url} failed 4 times`)
123
124 await Apify.pushData({
125 url: request.url,
126 errors: request.errorMessages,
127 })
128 },
129 })
130
131 await crawler.run()
132});