trustpilot avatar
trustpilot

Deprecated

Pricing

Pay per usage

Go to Store
trustpilot

trustpilot

Deprecated

Developed by

Enrique Carvajal Otárola

Enrique Carvajal Otárola

Maintained by Community

extract data from truspilot in a simple way configure it to obtain only the ratings that interest you.

0.0 (0)

Pricing

Pay per usage

1

Total users

13

Monthly users

5

Last modified

3 years ago

.editorconfig

root = true
[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.eslintrc

{
"extends": "@apify"
}

.gitignore

# This file tells Git which files shouldn't be added to source control
.idea
node_modules
apify_storage

Dockerfile

# First, specify the base Docker image. You can read more about
# the available images at https://sdk.apify.com/docs/guides/docker-images
# You can also use any other image from Docker Hub.
FROM apify/actor-node-playwright-chrome:16
# Second, copy just package.json and package-lock.json since it should be
# the only file that affects "npm install" in the next step, to speed up the build
COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
&& npm install --only=prod --no-optional \
&& echo "Installed NPM packages:" \
&& (npm list --only=prod --no-optional --all || true) \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version
# Next, copy the remaining files and directories with the source code.
# Since we do this after NPM install, quick build will be really fast
# for most source file changes.
COPY . ./
# Optionally, specify how to launch the source code of your actor.
# By default, Apify's base Docker images define the CMD instruction
# that runs the Node.js source code using the command specified
# in the "scripts.start" section of the package.json file.
# In short, the instruction looks something like this:
#
# CMD npm start

INPUT_SCHEMA.json

{
"title": "trustpilot scraper schema",
"description": "The input schema",
"type": "object",
"schemaVersion": 1,
"properties": {
"startUrls": {
"title": "Start URL of the commerce",
"type": "string",
"description": "URL to start with.",
"editor": "textfield",
"prefill":"https://www.trustpilot.com/review/carsandbids.com"
},
"startReviews": {
"title": "Start Reviews",
"type": "array",
"description": "Enter the start reviews",
"prefill": [
5
],
"editor": "stringList",
"sectionCaption": "Add the start reviews",
"sectionDescription": "The reviews ratings (Excellent = 5, Great=4, Avarage= 3, Poor = 2, Bad = 0 ) you can choose alls or one but don't leave the array empty.",
"placeholderKey": "Add a start review",
"placeholderValue": "Add a start review",
"maxItems": 5
},
"maxItems": {
"title": "maxItems",
"type": "integer",
"description": "Max numbers of items",
"nullable": true
}
},
"required": [
"startUrls"
]
}

apify.json

{
"name": "trustpilot",
"version": "0.0",
"buildTag": "latest",
"env": null,
"template": "project_playwright_crawler"
}

main.js

1const Apify = require('apify');
2// const playwright = require('playwright');
3const { handleStart, handleList, handleDetail } = require('./src/routes');
4
5const { utils: { log } } = Apify;
6
7Apify.main(async () => {
8 const { startUrls } = await Apify.getInput();
9 const requestList = await Apify.openRequestList('start-urls', [
10 {
11 url: startUrls
12 }
13 ]);
14 const requestQueue = await Apify.openRequestQueue();
15 const proxyConfiguration = await Apify.createProxyConfiguration();
16
17 const crawler = new Apify.PlaywrightCrawler({
18 requestList,
19 requestQueue,
20 proxyConfiguration,
21 launchContext: {
22 // To use Firefox or WebKit on the Apify Platform,
23 // don't forget to change the image in Dockerfile
24 // launcher: playwright.firefox,
25 useChrome: true,
26 // We don't have 'stealth' for Playwright yet.
27 // Try using Firefox, it is naturally stealthy.
28 },
29 browserPoolOptions: {
30 // This allows browser to be more effective against anti-scraping protections.
31 // If you are having performance issues try turning this off.
32 useFingerprints: true,
33 },
34 handlePageFunction: async (context) => {
35 const { url, userData: { label } } = context.request;
36 log.info('Page opened.', { label, url });
37 switch (label) {
38 case 'LIST':
39 return handleList(context);
40 case 'DETAIL':
41 return handleDetail(context);
42 default:
43 return handleStart(context, requestQueue);
44 }
45 },
46 });
47
48 log.info('Starting the crawl.');
49 await crawler.run();
50 log.info('Crawl finished.');
51});

package.json

{
"name": "trustpilot",
"version": "0.0.1",
"description": "This is a boilerplate of an Apify actor.",
"dependencies": {
"apify": "^2.3.2",
"cheerio": "^1.0.0-rc.12",
"playwright": "*"
},
"devDependencies": {
"@apify/eslint-config": "^0.1.3",
"eslint": "^7.0.0"
},
"scripts": {
"start": "node main.js",
"lint": "./node_modules/.bin/eslint src --ext .js,.jsx",
"lint:fix": "./node_modules/.bin/eslint src --ext .js,.jsx --fix",
"test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
},
"author": "It's not you it's me",
"license": "ISC"
}

src/routes.js

1const Apify = require('apify');
2const { load } = require('cheerio');
3const { utils: { log } } = Apify;
4let itemsCounter = 0;
5exports.handleStart = async ({ request, page }, requestQueue) => {
6 const { startReviews, maxItems } = await Apify.getInput();
7 log.info('DEFAULT HANDLER REQUEST')
8 const url = new URL(page.url());
9 url.searchParams.set('languages', 'all')
10 const body = await page.content();
11 ////////////
12 const $ = load(body)
13 if (maxItems) {
14 let pages = Math.ceil(maxItems / 20);
15 //Validate page1 because if put params page=1 the url dont work
16 log.info('Number of pages')
17 log.info(pages)
18 for (let index = 1; index <= pages; index++) {
19 if (index == 1) {
20 url.searchParams.delete('page');
21 } else {
22 url.searchParams.set('page', String(index))
23 }
24 let urlStarts = url.toString();
25 startReviews?.forEach((starRewview) => {
26 urlStarts += `&stars=${starRewview}`
27 })
28 await requestQueue.addRequest({
29 url: urlStarts,
30 userData: {
31 label: 'LIST'
32 }
33 })
34 }
35 } else {
36 const totalPages = Number($('a[data-pagination-button-last-link]').text());
37 log.info('Number of pages to scrape')
38 log.info(totalPages)
39 for (let index = 1; index <= totalPages; index++) {
40 if (index == 1) {
41 url.searchParams.delete('page');
42 }
43 url.searchParams.set('page', String(index))
44 let urlStarts = url.toString();
45 startReviews?.forEach((starRewview) => {
46 urlStarts += `&stars=${starRewview}`
47 })
48 await requestQueue.addRequest({
49 url: urlStarts,
50 userData: {
51 label: 'DETAIL'
52 }
53 })
54 }
55 }
56 ////////////
57 log.info(`Handle Start URLs`);
58};
59
60exports.handleList = async ({ request, page }) => {
61 const { maxItems } = await Apify.getInput();
62 log.info(`Handle pagination`);
63 await page.waitForSelector('section[data-business-unit-reviews-section="true"]')
64 const html = await page.content();
65 //////////
66 const $ = load(html);
67 $('article[data-service-review-card-paper="true"]').each(async (_i, article) => {
68 itemsCounter++;
69 let item = {};
70 item.userName = $(article).find(`div[data-consumer-name-typography="true"]`).text()
71 item.userCountry = $(article).find(`span[data-consumer-country-typography]`).text()
72 item.reviewRating = Number($(article).find('div[data-service-review-rating]').attr('data-service-review-rating'))
73 item.reviewDate = $(article).find(`time[data-service-review-date-time-ago]`).attr('datetime')
74 item.reviewTitle = $(article).find(`a[data-review-title-typography]`).text()
75 item.reviewText = $(article).find(`p[data-service-review-text-typography]`).text()
76 item.verified = $(article).find(`button[data-review-label-tooltip-trigger="true"]`).text()
77 ? true
78 : false
79 item.businessReply = $(article).find('[data-service-review-business-reply-text-typography="true"]').length == 0
80 ? null
81 : $(article).find('[data-service-review-business-reply-text-typography="true"]').text()
82 if (itemsCounter <= maxItems) {
83 await Apify.pushData(item)
84 }
85 })
86 //////
87 log.info('Items push to dataset default')
88};
89
90exports.handleDetail = async ({ request, page }) => {
91 log.info(`Handle pagination`);
92 await page.waitForSelector('section[data-business-unit-reviews-section="true"]')
93 const html = await page.content();
94 const $ = load(html);
95 $('article[data-service-review-card-paper="true"]').each(async (_i, article) => {
96 let item = {};
97 item.userName = $(article).find(`div[data-consumer-name-typography="true"]`).text()
98 item.userCountry = $(article).find(`span[data-consumer-country-typography]`).text()
99 item.reviewRating = $(article).find('div[data-service-review-rating]').attr('data-service-review-rating')
100 item.reviewDate = $(article).find(`time[data-service-review-date-time-ago]`).attr('datetime')
101 item.reviewTitle = $(article).find(`a[data-review-title-typography]`).text()
102 item.reviewText = $(article).find(`p[data-service-review-text-typography]`).text()
103 item.verified = $(article).find(`button[data-review-label-tooltip-trigger="true"]`).text()
104 ? true
105 : false
106 item.businessReply = $(article).find('[data-service-review-business-reply-text-typography="true"]')
107 ? $(article).find('[data-service-review-business-reply-text-typography="true"]').text()
108 : null
109 await Apify.pushData(item)
110 })
111 log.info('Items push to dataset default')
112};