
trustpilot
Deprecated
Pricing
Pay per usage
Go to Store

trustpilot
Deprecated
extract data from truspilot in a simple way configure it to obtain only the ratings that interest you.
0.0 (0)
Pricing
Pay per usage
1
Total users
13
Monthly users
5
Last modified
3 years ago
.editorconfig
root = true
[*]indent_style = spaceindent_size = 4charset = utf-8trim_trailing_whitespace = trueinsert_final_newline = trueend_of_line = lf
.eslintrc
{ "extends": "@apify"}
.gitignore
# This file tells Git which files shouldn't be added to source control
.ideanode_modules
apify_storage
Dockerfile
# First, specify the base Docker image. You can read more about# the available images at https://sdk.apify.com/docs/guides/docker-images# You can also use any other image from Docker Hub.FROM apify/actor-node-playwright-chrome:16
# Second, copy just package.json and package-lock.json since it should be# the only file that affects "npm install" in the next step, to speed up the buildCOPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --only=prod --no-optional \ && echo "Installed NPM packages:" \ && (npm list --only=prod --no-optional --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version
# Next, copy the remaining files and directories with the source code.# Since we do this after NPM install, quick build will be really fast# for most source file changes.COPY . ./
# Optionally, specify how to launch the source code of your actor.# By default, Apify's base Docker images define the CMD instruction# that runs the Node.js source code using the command specified# in the "scripts.start" section of the package.json file.# In short, the instruction looks something like this:## CMD npm start
INPUT_SCHEMA.json
{ "title": "trustpilot scraper schema", "description": "The input schema", "type": "object", "schemaVersion": 1, "properties": { "startUrls": { "title": "Start URL of the commerce", "type": "string", "description": "URL to start with.", "editor": "textfield", "prefill":"https://www.trustpilot.com/review/carsandbids.com" }, "startReviews": { "title": "Start Reviews", "type": "array", "description": "Enter the start reviews", "prefill": [ 5 ], "editor": "stringList", "sectionCaption": "Add the start reviews", "sectionDescription": "The reviews ratings (Excellent = 5, Great=4, Avarage= 3, Poor = 2, Bad = 0 ) you can choose alls or one but don't leave the array empty.", "placeholderKey": "Add a start review", "placeholderValue": "Add a start review", "maxItems": 5 }, "maxItems": { "title": "maxItems", "type": "integer", "description": "Max numbers of items", "nullable": true } }, "required": [ "startUrls" ]}
apify.json
{ "name": "trustpilot", "version": "0.0", "buildTag": "latest", "env": null, "template": "project_playwright_crawler"}
main.js
1const Apify = require('apify');2// const playwright = require('playwright');3const { handleStart, handleList, handleDetail } = require('./src/routes');4
5const { utils: { log } } = Apify;6
7Apify.main(async () => {8 const { startUrls } = await Apify.getInput();9 const requestList = await Apify.openRequestList('start-urls', [10 {11 url: startUrls12 }13 ]);14 const requestQueue = await Apify.openRequestQueue();15 const proxyConfiguration = await Apify.createProxyConfiguration();16
17 const crawler = new Apify.PlaywrightCrawler({18 requestList,19 requestQueue,20 proxyConfiguration,21 launchContext: {22 // To use Firefox or WebKit on the Apify Platform,23 // don't forget to change the image in Dockerfile24 // launcher: playwright.firefox,25 useChrome: true,26 // We don't have 'stealth' for Playwright yet.27 // Try using Firefox, it is naturally stealthy.28 },29 browserPoolOptions: {30 // This allows browser to be more effective against anti-scraping protections.31 // If you are having performance issues try turning this off.32 useFingerprints: true,33 },34 handlePageFunction: async (context) => {35 const { url, userData: { label } } = context.request;36 log.info('Page opened.', { label, url });37 switch (label) {38 case 'LIST':39 return handleList(context);40 case 'DETAIL':41 return handleDetail(context);42 default:43 return handleStart(context, requestQueue);44 }45 },46 });47
48 log.info('Starting the crawl.');49 await crawler.run();50 log.info('Crawl finished.');51});
package.json
{ "name": "trustpilot", "version": "0.0.1", "description": "This is a boilerplate of an Apify actor.", "dependencies": { "apify": "^2.3.2", "cheerio": "^1.0.0-rc.12", "playwright": "*" }, "devDependencies": { "@apify/eslint-config": "^0.1.3", "eslint": "^7.0.0" }, "scripts": { "start": "node main.js", "lint": "./node_modules/.bin/eslint src --ext .js,.jsx", "lint:fix": "./node_modules/.bin/eslint src --ext .js,.jsx --fix", "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1" }, "author": "It's not you it's me", "license": "ISC"}
src/routes.js
1const Apify = require('apify');2const { load } = require('cheerio');3const { utils: { log } } = Apify;4let itemsCounter = 0;5exports.handleStart = async ({ request, page }, requestQueue) => {6 const { startReviews, maxItems } = await Apify.getInput();7 log.info('DEFAULT HANDLER REQUEST')8 const url = new URL(page.url());9 url.searchParams.set('languages', 'all')10 const body = await page.content();11 ////////////12 const $ = load(body)13 if (maxItems) {14 let pages = Math.ceil(maxItems / 20);15 //Validate page1 because if put params page=1 the url dont work16 log.info('Number of pages')17 log.info(pages)18 for (let index = 1; index <= pages; index++) {19 if (index == 1) {20 url.searchParams.delete('page');21 } else {22 url.searchParams.set('page', String(index))23 }24 let urlStarts = url.toString();25 startReviews?.forEach((starRewview) => {26 urlStarts += `&stars=${starRewview}`27 })28 await requestQueue.addRequest({29 url: urlStarts,30 userData: {31 label: 'LIST'32 }33 })34 }35 } else {36 const totalPages = Number($('a[data-pagination-button-last-link]').text());37 log.info('Number of pages to scrape')38 log.info(totalPages)39 for (let index = 1; index <= totalPages; index++) {40 if (index == 1) {41 url.searchParams.delete('page');42 }43 url.searchParams.set('page', String(index))44 let urlStarts = url.toString();45 startReviews?.forEach((starRewview) => {46 urlStarts += `&stars=${starRewview}`47 })48 await requestQueue.addRequest({49 url: urlStarts,50 userData: {51 label: 'DETAIL'52 }53 })54 }55 }56 ////////////57 log.info(`Handle Start URLs`);58};59
60exports.handleList = async ({ request, page }) => {61 const { maxItems } = await Apify.getInput();62 log.info(`Handle pagination`);63 await page.waitForSelector('section[data-business-unit-reviews-section="true"]')64 const html = await page.content();65 //////////66 const $ = load(html);67 $('article[data-service-review-card-paper="true"]').each(async (_i, article) => {68 itemsCounter++;69 let item = {};70 item.userName = $(article).find(`div[data-consumer-name-typography="true"]`).text()71 item.userCountry = $(article).find(`span[data-consumer-country-typography]`).text()72 item.reviewRating = Number($(article).find('div[data-service-review-rating]').attr('data-service-review-rating'))73 item.reviewDate = $(article).find(`time[data-service-review-date-time-ago]`).attr('datetime')74 item.reviewTitle = $(article).find(`a[data-review-title-typography]`).text()75 item.reviewText = $(article).find(`p[data-service-review-text-typography]`).text()76 item.verified = $(article).find(`button[data-review-label-tooltip-trigger="true"]`).text()77 ? true78 : false79 item.businessReply = $(article).find('[data-service-review-business-reply-text-typography="true"]').length == 080 ? null81 : $(article).find('[data-service-review-business-reply-text-typography="true"]').text()82 if (itemsCounter <= maxItems) {83 await Apify.pushData(item)84 }85 })86 //////87 log.info('Items push to dataset default')88};89
90exports.handleDetail = async ({ request, page }) => {91 log.info(`Handle pagination`);92 await page.waitForSelector('section[data-business-unit-reviews-section="true"]')93 const html = await page.content();94 const $ = load(html);95 $('article[data-service-review-card-paper="true"]').each(async (_i, article) => {96 let item = {};97 item.userName = $(article).find(`div[data-consumer-name-typography="true"]`).text()98 item.userCountry = $(article).find(`span[data-consumer-country-typography]`).text()99 item.reviewRating = $(article).find('div[data-service-review-rating]').attr('data-service-review-rating')100 item.reviewDate = $(article).find(`time[data-service-review-date-time-ago]`).attr('datetime')101 item.reviewTitle = $(article).find(`a[data-review-title-typography]`).text()102 item.reviewText = $(article).find(`p[data-service-review-text-typography]`).text()103 item.verified = $(article).find(`button[data-review-label-tooltip-trigger="true"]`).text()104 ? true105 : false106 item.businessReply = $(article).find('[data-service-review-business-reply-text-typography="true"]')107 ? $(article).find('[data-service-review-business-reply-text-typography="true"]').text()108 : null109 await Apify.pushData(item)110 })111 log.info('Items push to dataset default')112};