Ulta Product Review Scraper avatar

Ulta Product Review Scraper

Under maintenance
Try for free

Pay $0.75 for 1,000 reviews

Go to Store
This Actor is under maintenance.

This Actor may be unreliable while under maintenance. Would you like to try a similar Actor instead?

See alternative Actors
Ulta Product Review Scraper

Ulta Product Review Scraper

scraped/ulta-product-review-scraper
Try for free

Pay $0.75 for 1,000 reviews

Scrape product reviews from Ulta

Developer
Maintained by Community

Actor Metrics

  • 1 monthly user

  • No reviews yet

  • No bookmarks yet

  • >99% runs succeeded

  • Created in Mar 2025

  • Modified a day ago

.actor/Dockerfile

1# First, specify the base Docker image.
2# You can see the Docker images from Apify at https://hub.docker.com/r/apify/.
3# You can also use any other image from Docker Hub.
4FROM apify/actor-python:3.13
5
6# Second, copy just requirements.txt into the Actor image,
7# since it should be the only file that affects the dependency install in the next step,
8# in order to speed up the build
9COPY requirements.txt ./
10
11# Install the packages specified in requirements.txt,
12# Print the installed Python version, pip version
13# and all installed packages with their versions for debugging
14RUN echo "Python version:" \
15 && python --version \
16 && echo "Pip version:" \
17 && pip --version \
18 && echo "Installing dependencies:" \
19 && pip install -r requirements.txt \
20 && echo "All installed Python packages:" \
21 && pip freeze
22
23# Next, copy the remaining files and directories with the source code.
24# Since we do this after installing the dependencies, quick build will be really fast
25# for most source file changes.
26COPY . ./
27
28# Use compileall to ensure the runnability of the Actor Python code.
29RUN python3 -m compileall -q .
30
31# Create and run as a non-root user.
32RUN useradd --create-home apify && \
33    chown -R apify:apify ./
34USER apify
35
36# Specify how to launch the source code of your Actor.
37# By default, the "python3 -m src" command is run
38CMD ["python3", "-m", "src"]

.actor/actor.json

1{
2    "actorSpecification": 1,
3    "name": "my-actor",
4    "title": "Scrape single page in Python",
5    "description": "Scrape data from single page with provided URL.",
6    "version": "0.0",
7    "buildTag": "latest",
8    "meta": {
9        "templateId": "python-start"
10    },
11    "input": "./input_schema.json",
12    "dockerfile": "./Dockerfile"
13}

.actor/input_schema.json

1{
2    "title": "Scrape data from a web page",
3    "type": "object",
4    "schemaVersion": 1,
5    "properties": {
6        "url": {
7            "title": "URL of the page",
8            "type": "string",
9            "description": "The URL of website you want to get the data from.",
10            "editor": "textfield",
11            "prefill": "https://www.ulta.com/p/huestick-color-corrector-pimprod2028533?sku=2588675"
12        }
13    },
14    "required": ["url"]
15}

src/__init__.py

src/__main__.py

1import asyncio
2
3from .main import main
4
5# Execute the Actor entry point.
6asyncio.run(main())

src/main.py

1from apify import Actor
2import requests
3import re
4import time
5from datetime import datetime
6
7async def main() -> None:
8    async with Actor:
9        actor_input = await Actor.get_input()
10        ulta_url = actor_input.get('url')
11
12        if not ulta_url:
13            Actor.log.error("The 'url' input is required.")
14            return
15
16        pimprod_id = extract_pimprod_id(ulta_url)
17        if not pimprod_id:
18            Actor.log.error("Could not extract pimprod ID from the URL.")
19            return
20
21        Actor.log.info(f"Scraping reviews for pimprod ID: {pimprod_id}")
22
23        base_url = f"https://display.powerreviews.com/m/6406/l/en_US/product/{pimprod_id}/reviews"
24        paging_from = 0
25        page_size = 25
26        total_results = None
27
28        while True:
29            url = f"{base_url}?paging.from={paging_from}&paging.size={page_size}&filters=&search=&sort=Newest&image_only=false&page_locale=en_US&_noconfig=true&apikey=daa0f241-c242-4483-afb7-4449942d1a2b"
30
31            Actor.log.info(f"Fetching reviews from: {ulta_url}")
32
33            try:
34                response = requests.get(url)
35                response.raise_for_status()
36                data = response.json()
37
38                if total_results is None:
39                    total_results = data['paging']['total_results']
40                    Actor.log.info(f"Total reviews to fetch: {total_results}")
41
42                for review in data['results'][0]['reviews']:
43                    media_data = []
44                    if 'media' in review and review['media']:
45                        media_data = [
46                            {
47                                'Media ID': media_item['id'],
48                                'URI': media_item['uri'],
49                                'Caption': media_item.get('caption', 'N/A'),
50                                'Helpful Votes': media_item.get('helpful_votes', 0),
51                                'Not Helpful Votes': media_item.get('not_helpful_votes', 0),
52                            }
53                            for media_item in review['media']
54                        ]
55
56                    review_data = {
57                        'Review ID': review['review_id'],
58                        'Headline': review['details']['headline'],
59                        'Nickname': review['details']['nickname'],
60                        'Location': review['details'].get('location', 'N/A'),
61                        'Created Date': datetime.fromtimestamp(review['details']['created_date'] / 1000).strftime('%Y-%m-%d %H:%M:%S'),
62                        'Updated Date': datetime.fromtimestamp(review['details']['updated_date'] / 1000).strftime('%Y-%m-%d %H:%M:%S'),
63                        'Product ID': review['details']['product_page_id'],
64                        'UPC': review['details'].get('upc', 'N/A'),
65                        'GTIN': review['details'].get('gtin', 'N/A'),
66                        'Review Text': review['details']['comments'],
67                        'Bottom Line': review['details'].get('bottom_line', 'N/A'),
68                        'Is Staff Reviewer': review.get('badges', {}).get('is_staff_reviewer', False),
69                        'Is Verified Buyer': review.get('badges', {}).get('is_verified_buyer', False),
70                        'Is Verified Reviewer': review.get('badges', {}).get('is_verified_reviewer', False),
71                        'Rating': review.get('metrics', {}).get('rating', None),
72                        'Helpful Votes': review.get('metrics', {}).get('helpful_votes', 0),
73                        'Not Helpful Votes': review.get('metrics', {}).get('not_helpful_votes', 0),
74                        'Helpful Score': review.get('metrics', {}).get('helpful_score', 0),
75                        'Media': media_data
76                    }
77                    await Actor.push_data(review_data)
78
79                paging_from += page_size
80                Actor.log.info(f"Fetched reviews up to {paging_from}")
81
82                if paging_from >= total_results:
83                    Actor.log.info("Finished fetching all reviews.")
84                    break
85
86                time.sleep(1)  # Respectful delay
87
88            except requests.exceptions.RequestException as e:
89                Actor.log.error(f"Request error: {e}")
90                break
91            except ValueError as e:
92                Actor.log.error(f"JSON decoding error: {e}")
93                break
94            except KeyError as e:
95                Actor.log.error(f"KeyError: {e}. The JSON structure might be different than expected.")
96                break
97            except Exception as e:
98                Actor.log.error(f"An unexpected error occurred: {e}")
99                break
100
101        Actor.log.info("Finished scraping reviews.")
102
103def extract_pimprod_id(ulta_url):
104    """Extracts the pimprod ID from an Ulta product URL."""
105    match = re.search(r"pimprod(\d+)", ulta_url)
106    if match:
107        return f"pimprod{match.group(1)}"
108    return None
109
110if __name__ == "__main__":
111    Actor.main(main)

src/py.typed

1

.dockerignore

1.git
2.mise.toml
3.nvim.lua
4storage
5
6# The rest is copied from https://github.com/github/gitignore/blob/main/Python.gitignore
7
8# Byte-compiled / optimized / DLL files
9__pycache__/
10*.py[cod]
11*$py.class
12
13# C extensions
14*.so
15
16# Distribution / packaging
17.Python
18build/
19develop-eggs/
20dist/
21downloads/
22eggs/
23.eggs/
24lib/
25lib64/
26parts/
27sdist/
28var/
29wheels/
30share/python-wheels/
31*.egg-info/
32.installed.cfg
33*.egg
34MANIFEST
35
36# PyInstaller
37#  Usually these files are written by a python script from a template
38#  before PyInstaller builds the exe, so as to inject date/other infos into it.
39*.manifest
40*.spec
41
42# Installer logs
43pip-log.txt
44pip-delete-this-directory.txt
45
46# Unit test / coverage reports
47htmlcov/
48.tox/
49.nox/
50.coverage
51.coverage.*
52.cache
53nosetests.xml
54coverage.xml
55*.cover
56*.py,cover
57.hypothesis/
58.pytest_cache/
59cover/
60
61# Translations
62*.mo
63*.pot
64
65# Django stuff:
66*.log
67local_settings.py
68db.sqlite3
69db.sqlite3-journal
70
71# Flask stuff:
72instance/
73.webassets-cache
74
75# Scrapy stuff:
76.scrapy
77
78# Sphinx documentation
79docs/_build/
80
81# PyBuilder
82.pybuilder/
83target/
84
85# Jupyter Notebook
86.ipynb_checkpoints
87
88# IPython
89profile_default/
90ipython_config.py
91
92# pyenv
93#   For a library or package, you might want to ignore these files since the code is
94#   intended to run in multiple environments; otherwise, check them in:
95.python-version
96
97# pdm
98#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
99#pdm.lock
100#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
101#   in version control.
102#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
103.pdm.toml
104.pdm-python
105.pdm-build/
106
107# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
108__pypackages__/
109
110# Celery stuff
111celerybeat-schedule
112celerybeat.pid
113
114# SageMath parsed files
115*.sage.py
116
117# Environments
118.env
119.venv
120env/
121venv/
122ENV/
123env.bak/
124venv.bak/
125
126# Spyder project settings
127.spyderproject
128.spyproject
129
130# Rope project settings
131.ropeproject
132
133# mkdocs documentation
134/site
135
136# mypy
137.mypy_cache/
138.dmypy.json
139dmypy.json
140
141# Pyre type checker
142.pyre/
143
144# pytype static type analyzer
145.pytype/
146
147# Cython debug symbols
148cython_debug/
149
150# PyCharm
151#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
152#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
153#  and can be added to the global gitignore or merged into this file.  For a more nuclear
154#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
155.idea/

.gitignore

1.mise.toml
2.nvim.lua
3storage
4
5# The rest is copied from https://github.com/github/gitignore/blob/main/Python.gitignore
6
7# Byte-compiled / optimized / DLL files
8__pycache__/
9*.py[cod]
10*$py.class
11
12# C extensions
13*.so
14
15# Distribution / packaging
16.Python
17build/
18develop-eggs/
19dist/
20downloads/
21eggs/
22.eggs/
23lib/
24lib64/
25parts/
26sdist/
27var/
28wheels/
29share/python-wheels/
30*.egg-info/
31.installed.cfg
32*.egg
33MANIFEST
34
35# PyInstaller
36#  Usually these files are written by a python script from a template
37#  before PyInstaller builds the exe, so as to inject date/other infos into it.
38*.manifest
39*.spec
40
41# Installer logs
42pip-log.txt
43pip-delete-this-directory.txt
44
45# Unit test / coverage reports
46htmlcov/
47.tox/
48.nox/
49.coverage
50.coverage.*
51.cache
52nosetests.xml
53coverage.xml
54*.cover
55*.py,cover
56.hypothesis/
57.pytest_cache/
58cover/
59
60# Translations
61*.mo
62*.pot
63
64# Django stuff:
65*.log
66local_settings.py
67db.sqlite3
68db.sqlite3-journal
69
70# Flask stuff:
71instance/
72.webassets-cache
73
74# Scrapy stuff:
75.scrapy
76
77# Sphinx documentation
78docs/_build/
79
80# PyBuilder
81.pybuilder/
82target/
83
84# Jupyter Notebook
85.ipynb_checkpoints
86
87# IPython
88profile_default/
89ipython_config.py
90
91# pyenv
92#   For a library or package, you might want to ignore these files since the code is
93#   intended to run in multiple environments; otherwise, check them in:
94.python-version
95
96# pdm
97#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
98#pdm.lock
99#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
100#   in version control.
101#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
102.pdm.toml
103.pdm-python
104.pdm-build/
105
106# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
107__pypackages__/
108
109# Celery stuff
110celerybeat-schedule
111celerybeat.pid
112
113# SageMath parsed files
114*.sage.py
115
116# Environments
117.env
118.venv
119env/
120venv/
121ENV/
122env.bak/
123venv.bak/
124
125# Spyder project settings
126.spyderproject
127.spyproject
128
129# Rope project settings
130.ropeproject
131
132# mkdocs documentation
133/site
134
135# mypy
136.mypy_cache/
137.dmypy.json
138dmypy.json
139
140# Pyre type checker
141.pyre/
142
143# pytype static type analyzer
144.pytype/
145
146# Cython debug symbols
147cython_debug/
148
149# PyCharm
150#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
151#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
152#  and can be added to the global gitignore or merged into this file.  For a more nuclear
153#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
154.idea/

requirements.txt

1# Feel free to add your Python dependencies below. For formatting guidelines, see:
2# https://pip.pypa.io/en/latest/reference/requirements-file-format/
3
4apify < 3.0
5beautifulsoup4[lxml]
6httpx
7types-beautifulsoup4