trustpilot avatar
trustpilot
Deprecated
View all Actors
This Actor is deprecated

This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?

See alternative Actors
trustpilot

trustpilot

enco/trustpilot

extract data from truspilot in a simple way configure it to obtain only the ratings that interest you.

.editorconfig

1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf

.eslintrc

1{
2    "extends": "@apify"
3}

.gitignore

1# This file tells Git which files shouldn't be added to source control
2
3.idea
4node_modules
5
6apify_storage

Dockerfile

1# First, specify the base Docker image. You can read more about
2# the available images at https://sdk.apify.com/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node-playwright-chrome:16
5
6# Second, copy just package.json and package-lock.json since it should be
7# the only file that affects "npm install" in the next step, to speed up the build
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --only=prod --no-optional \
15 && echo "Installed NPM packages:" \
16 && (npm list --only=prod --no-optional --all || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version
21
22# Next, copy the remaining files and directories with the source code.
23# Since we do this after NPM install, quick build will be really fast
24# for most source file changes.
25COPY . ./
26
27# Optionally, specify how to launch the source code of your actor.
28# By default, Apify's base Docker images define the CMD instruction
29# that runs the Node.js source code using the command specified
30# in the "scripts.start" section of the package.json file.
31# In short, the instruction looks something like this:
32#
33# CMD npm start

INPUT_SCHEMA.json

1{
2  "title": "trustpilot scraper schema",
3  "description": "The input schema",
4  "type": "object",
5  "schemaVersion": 1,
6  "properties": {
7    "startUrls": {
8      "title": "Start URL of the commerce",
9      "type": "string",
10      "description": "URL to start with.",
11      "editor": "textfield",
12      "prefill":"https://www.trustpilot.com/review/carsandbids.com"
13    },
14    "startReviews": {
15      "title": "Start Reviews",
16      "type": "array",
17      "description": "Enter the start reviews",
18      "prefill": [
19        5
20      ],
21      "editor": "stringList",
22      "sectionCaption": "Add the start reviews",
23      "sectionDescription": "The reviews ratings (Excellent = 5, Great=4, Avarage= 3, Poor = 2, Bad = 0 ) you can choose alls or one but don't leave the array empty.",
24      "placeholderKey": "Add a start review",
25      "placeholderValue": "Add a start review",
26      "maxItems": 5
27    },
28    "maxItems": {
29      "title": "maxItems",
30      "type": "integer",
31      "description": "Max numbers of items",
32      "nullable": true
33    }
34  },
35  "required": [
36    "startUrls"
37  ]
38}

apify.json

1{
2	"name": "trustpilot",
3	"version": "0.0",
4	"buildTag": "latest",
5	"env": null,
6	"template": "project_playwright_crawler"
7}

main.js

1const Apify = require('apify');
2// const playwright = require('playwright');
3const { handleStart, handleList, handleDetail } = require('./src/routes');
4
5const { utils: { log } } = Apify;
6
7Apify.main(async () => {
8    const { startUrls } = await Apify.getInput();
9    const requestList = await Apify.openRequestList('start-urls', [
10        {
11            url: startUrls
12        }
13    ]);
14    const requestQueue = await Apify.openRequestQueue();
15    const proxyConfiguration = await Apify.createProxyConfiguration();
16
17    const crawler = new Apify.PlaywrightCrawler({
18        requestList,
19        requestQueue,
20        proxyConfiguration,
21        launchContext: {
22            // To use Firefox or WebKit on the Apify Platform,
23            // don't forget to change the image in Dockerfile
24            // launcher: playwright.firefox,
25            useChrome: true,
26            // We don't have 'stealth' for Playwright yet.
27            // Try using Firefox, it is naturally stealthy.
28        },
29        browserPoolOptions: {
30            // This allows browser to be more effective against anti-scraping protections.
31            // If you are having performance issues try turning this off.
32            useFingerprints: true,
33        },
34        handlePageFunction: async (context) => {
35            const { url, userData: { label } } = context.request;
36            log.info('Page opened.', { label, url });
37            switch (label) {
38                case 'LIST':
39                    return handleList(context);
40                case 'DETAIL':
41                    return handleDetail(context);
42                default:
43                    return handleStart(context, requestQueue);
44            }
45        },
46    });
47
48    log.info('Starting the crawl.');
49    await crawler.run();
50    log.info('Crawl finished.');
51});

package.json

1{
2	"name": "trustpilot",
3	"version": "0.0.1",
4	"description": "This is a boilerplate of an Apify actor.",
5	"dependencies": {
6		"apify": "^2.3.2",
7		"cheerio": "^1.0.0-rc.12",
8		"playwright": "*"
9	},
10	"devDependencies": {
11		"@apify/eslint-config": "^0.1.3",
12		"eslint": "^7.0.0"
13	},
14	"scripts": {
15		"start": "node main.js",
16		"lint": "./node_modules/.bin/eslint src --ext .js,.jsx",
17		"lint:fix": "./node_modules/.bin/eslint src --ext .js,.jsx --fix",
18		"test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
19	},
20	"author": "It's not you it's me",
21	"license": "ISC"
22}

src/routes.js

1const Apify = require('apify');
2const { load } = require('cheerio');
3const { utils: { log } } = Apify;
4let itemsCounter = 0;
5exports.handleStart = async ({ request, page }, requestQueue) => {
6    const { startReviews, maxItems } = await Apify.getInput();
7    log.info('DEFAULT HANDLER REQUEST')
8    const url = new URL(page.url());
9    url.searchParams.set('languages', 'all')
10    const body = await page.content();
11    ////////////
12    const $ = load(body)
13    if (maxItems) {
14        let pages = Math.ceil(maxItems / 20);
15        //Validate page1 because if put params page=1 the url dont work
16        log.info('Number of pages')
17        log.info(pages)
18        for (let index = 1; index <= pages; index++) {
19            if (index == 1) {
20                url.searchParams.delete('page');
21            } else {
22                url.searchParams.set('page', String(index))
23            }
24            let urlStarts = url.toString();
25            startReviews?.forEach((starRewview) => {
26                urlStarts += `&stars=${starRewview}`
27            })
28            await requestQueue.addRequest({
29                url: urlStarts,
30                userData: {
31                    label: 'LIST'
32                }
33            })
34        }
35    } else {
36        const totalPages = Number($('a[data-pagination-button-last-link]').text());
37        log.info('Number of pages to scrape')
38        log.info(totalPages)
39        for (let index = 1; index <= totalPages; index++) {
40            if (index == 1) {
41                url.searchParams.delete('page');
42            }
43            url.searchParams.set('page', String(index))
44            let urlStarts = url.toString();
45            startReviews?.forEach((starRewview) => {
46                urlStarts += `&stars=${starRewview}`
47            })
48            await requestQueue.addRequest({
49                url: urlStarts,
50                userData: {
51                    label: 'DETAIL'
52                }
53            })
54        }
55    }
56    ////////////
57    log.info(`Handle Start URLs`);
58};
59
60exports.handleList = async ({ request, page }) => {
61    const { maxItems } = await Apify.getInput();
62    log.info(`Handle pagination`);
63    await page.waitForSelector('section[data-business-unit-reviews-section="true"]')
64    const html = await page.content();
65    //////////
66    const $ = load(html);
67    $('article[data-service-review-card-paper="true"]').each(async (_i, article) => {
68        itemsCounter++;
69        let item = {};
70        item.userName = $(article).find(`div[data-consumer-name-typography="true"]`).text()
71        item.userCountry = $(article).find(`span[data-consumer-country-typography]`).text()
72        item.reviewRating = Number($(article).find('div[data-service-review-rating]').attr('data-service-review-rating'))
73        item.reviewDate = $(article).find(`time[data-service-review-date-time-ago]`).attr('datetime')
74        item.reviewTitle = $(article).find(`a[data-review-title-typography]`).text()
75        item.reviewText = $(article).find(`p[data-service-review-text-typography]`).text()
76        item.verified = $(article).find(`button[data-review-label-tooltip-trigger="true"]`).text()
77            ? true
78            : false
79        item.businessReply = $(article).find('[data-service-review-business-reply-text-typography="true"]').length == 0
80            ? null
81            : $(article).find('[data-service-review-business-reply-text-typography="true"]').text()
82        if (itemsCounter <= maxItems) {
83            await Apify.pushData(item)
84        }
85    })
86    //////
87    log.info('Items push to dataset default')
88};
89
90exports.handleDetail = async ({ request, page }) => {
91    log.info(`Handle pagination`);
92    await page.waitForSelector('section[data-business-unit-reviews-section="true"]')
93    const html = await page.content();
94    const $ = load(html);
95    $('article[data-service-review-card-paper="true"]').each(async (_i, article) => {
96        let item = {};
97        item.userName = $(article).find(`div[data-consumer-name-typography="true"]`).text()
98        item.userCountry = $(article).find(`span[data-consumer-country-typography]`).text()
99        item.reviewRating = $(article).find('div[data-service-review-rating]').attr('data-service-review-rating')
100        item.reviewDate = $(article).find(`time[data-service-review-date-time-ago]`).attr('datetime')
101        item.reviewTitle = $(article).find(`a[data-review-title-typography]`).text()
102        item.reviewText = $(article).find(`p[data-service-review-text-typography]`).text()
103        item.verified = $(article).find(`button[data-review-label-tooltip-trigger="true"]`).text()
104            ? true
105            : false
106        item.businessReply = $(article).find('[data-service-review-business-reply-text-typography="true"]')
107            ? $(article).find('[data-service-review-business-reply-text-typography="true"]').text()
108            : null
109        await Apify.pushData(item)
110    })
111    log.info('Items push to dataset default')
112};
Developer
Maintained by Community