OMR Review Scraper avatar
OMR Review Scraper

Pricing

Pay per usage

Go to Store
OMR Review Scraper

OMR Review Scraper

Developed by

ManM

Maintained by Community

Scrapes software reviews from OMR

5.0 (1)

Pricing

Pay per usage

2

Monthly users

2

Runs succeeded

>99%

Last modified

3 months ago

.actor/Dockerfile

1# Specify the base Docker image. You can read more about
2# the available images at https://docs.apify.com/sdk/js/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node:20
5
6# Check preinstalled packages
7RUN npm ls crawlee apify puppeteer playwright
8
9# Copy just package.json and package-lock.json
10# to speed up the build using Docker layer cache.
11COPY package*.json ./
12
13# Install NPM packages, skip optional and development dependencies to
14# keep the image small. Avoid logging too much and print the dependency
15# tree for debugging
16RUN npm --quiet set progress=false \
17    && npm install --omit=dev --omit=optional \
18    && echo "Installed NPM packages:" \
19    && (npm list --omit=dev --all || true) \
20    && echo "Node.js version:" \
21    && node --version \
22    && echo "NPM version:" \
23    && npm --version \
24    && rm -r ~/.npm
25
26# Next, copy the remaining files and directories with the source code.
27# Since we do this after NPM install, quick build will be really fast
28# for most source file changes.
29COPY . ./
30
31
32# Run the image.
33CMD npm start --silent

.actor/actor.json

1{
2    "actorSpecification": 1,
3    "name": "my-actor",
4    "title": "Scrape single page in JavaScript",
5    "description": "Scrape data from single page with provided URL.",
6    "version": "0.0",
7    "meta": {
8        "templateId": "js-start"
9    },
10    "input": "./input_schema.json",
11    "dockerfile": "./Dockerfile"
12}

.actor/input_schema.json

1{
2  "$schema": "http://json-schema.org/draft-07/schema#",
3  "schemaVersion": 1,
4  "type": "object",
5  "title": "Input Schema for Review Scraper",
6  "description": "Schema for the input to the Actor that scrapes reviews.",
7  "properties": {
8    "url": {
9      "type": "string",
10      "title": "URL to scrape",
11      "description": "The URL of the webpage containing reviews to scrape.",
12      "editor": "textfield",
13      "default": "https://omr.com/en/reviews/product/microsoft-teams/all"
14    },
15    "maxReviews": {
16      "type": "integer",
17      "title": "Maximum Reviews",
18      "description": "The maximum number of reviews to scrape (e.g., 100).",
19      "editor": "number",
20      "default": 100
21    }
22  },
23  "required": ["url"],
24  "additionalProperties": false
25}

src/main.js

1import { Actor } from 'apify';
2import axios from 'axios';
3import * as cheerio from 'cheerio';
4
5await Actor.init();
6
7const input = await Actor.getInput();
8const { url, maxReviews = 100 } = input;
9
10let currentPage = 1;
11const reviews = [];
12
13// Proxy rotation setup using actor-provided proxy URLs
14const proxyUrls = process.env.APIFY_PROXY_URLS?.split(',') || [];
15let proxyIndex = 0;
16
17while (reviews.length < maxReviews) {
18    const currentPageUrl = `${url}/${currentPage}`;
19    console.log(`Scraping page: ${currentPageUrl}`);
20
21    try {
22        // Use proxy URLs if available
23        const proxyUrl = proxyUrls.length > 0 ? proxyUrls[proxyIndex % proxyUrls.length] : null;
24        proxyIndex++;
25
26        const axiosConfig = proxyUrl
27            ? {
28                  proxy: false,
29                  httpsAgent: new (require('https-proxy-agent'))(proxyUrl),
30              }
31            : {};
32
33        // Fetch the HTML content of the current page
34        const response = await axios.get(currentPageUrl, axiosConfig);
35        const $ = cheerio.load(response.data);
36
37        // Extract reviews from the current page
38        const pageReviews = [];
39        $('[data-testid="text-review-quotes"]').each((i, element) => {
40            if (reviews.length + pageReviews.length >= maxReviews) return false;
41
42            const positive = $(element).find('[data-testid="text-review-quotes-positive"] [data-testid="review-quote-answer"]').text().trim() || '';
43            const negative = $(element).find('[data-testid="text-review-negative"] [data-testid="review-quote-answer"]').text().trim() || '';
44            const problems = $(element).find('[data-testid="text-review-problems"] [data-testid="review-quote-answer"]').text().trim() || '';
45            const dateText = $(element).closest('[data-testid="review-card"]').find('[data-testid="review-author-date"]').text().trim() || '';
46
47            // Parse the date text into a JavaScript Date object
48            const parsedDate = dateText ? new Date(Date.parse(dateText.replace('Older than', '').trim())) : null;
49
50            pageReviews.push({
51                Positive: positive,
52                Negative: negative,
53                Problems: problems,
54                Date: parsedDate,
55                OriginalDate: dateText
56            });
57        });
58
59        reviews.push(...pageReviews);
60
61        if (reviews.length >= maxReviews) break;
62
63        if (pageReviews.length === 0) {
64            console.log('No more reviews found on this page. Ending pagination.');
65            break;
66        }
67
68        currentPage++;
69    } catch (error) {
70        console.error(`Failed to fetch page: ${currentPageUrl}`, error);
71        break;
72    }
73}
74
75// Sort reviews by date in descending order
76reviews.sort((a, b) => (b.Date || 0) - (a.Date || 0));
77
78// Save the extracted reviews to the default dataset
79await Actor.pushData(reviews);
80
81console.log(`${reviews.length} reviews extracted.`);
82
83await Actor.exit();

.dockerignore

1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git

.gitignore

1# This file tells Git which files shouldn't be added to source control
2.DS_Store
3.idea
4dist
5node_modules
6apify_storage
7storage/*
8!storage/key_value_stores
9storage/key_value_stores/*
10!storage/key_value_stores/default
11storage/key_value_stores/default/*
12!storage/key_value_stores/default/INPUT.json

package.json

1{
2    "name": "js-scrape-single-page",
3    "version": "0.0.1",
4    "type": "module",
5    "description": "This is an example of an Apify actor.",
6    "engines": {
7        "node": ">=18.0.0"
8    },
9    "dependencies": {
10        "apify": "^3.2.6",
11        "axios": "^1.5.0",
12        "cheerio": "^1.0.0-rc.12"
13    },
14    "scripts": {
15        "start": "node ./src/main.js",
16        "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
17    },
18    "author": "It's not you it's me",
19    "license": "ISC"
20}

Pricing

Pricing model

Pay per usage

This Actor is paid per platform usage. The Actor is free to use, and you only pay for the Apify platform usage.