
Ulta Product Review Scraper
Under maintenance
Try for free
Pay $0.75 for 1,000 reviews
Go to Store
This Actor is under maintenance.
This Actor may be unreliable while under maintenance. Would you like to try a similar Actor instead?
See alternative Actors
Ulta Product Review Scraper
scraped/ulta-product-review-scraper
Try for free
Pay $0.75 for 1,000 reviews
Scrape product reviews from Ulta
Developer
Maintained by Community
Actor Metrics
1 monthly user
No reviews yet
No bookmarks yet
>99% runs succeeded
Created in Mar 2025
Modified a day ago
Categories
.actor/Dockerfile
1# First, specify the base Docker image.
2# You can see the Docker images from Apify at https://hub.docker.com/r/apify/.
3# You can also use any other image from Docker Hub.
4FROM apify/actor-python:3.13
5
6# Second, copy just requirements.txt into the Actor image,
7# since it should be the only file that affects the dependency install in the next step,
8# in order to speed up the build
9COPY requirements.txt ./
10
11# Install the packages specified in requirements.txt,
12# Print the installed Python version, pip version
13# and all installed packages with their versions for debugging
14RUN echo "Python version:" \
15 && python --version \
16 && echo "Pip version:" \
17 && pip --version \
18 && echo "Installing dependencies:" \
19 && pip install -r requirements.txt \
20 && echo "All installed Python packages:" \
21 && pip freeze
22
23# Next, copy the remaining files and directories with the source code.
24# Since we do this after installing the dependencies, quick build will be really fast
25# for most source file changes.
26COPY . ./
27
28# Use compileall to ensure the runnability of the Actor Python code.
29RUN python3 -m compileall -q .
30
31# Create and run as a non-root user.
32RUN useradd --create-home apify && \
33 chown -R apify:apify ./
34USER apify
35
36# Specify how to launch the source code of your Actor.
37# By default, the "python3 -m src" command is run
38CMD ["python3", "-m", "src"]
.actor/actor.json
1{
2 "actorSpecification": 1,
3 "name": "my-actor",
4 "title": "Scrape single page in Python",
5 "description": "Scrape data from single page with provided URL.",
6 "version": "0.0",
7 "buildTag": "latest",
8 "meta": {
9 "templateId": "python-start"
10 },
11 "input": "./input_schema.json",
12 "dockerfile": "./Dockerfile"
13}
.actor/input_schema.json
1{
2 "title": "Scrape data from a web page",
3 "type": "object",
4 "schemaVersion": 1,
5 "properties": {
6 "url": {
7 "title": "URL of the page",
8 "type": "string",
9 "description": "The URL of website you want to get the data from.",
10 "editor": "textfield",
11 "prefill": "https://www.ulta.com/p/huestick-color-corrector-pimprod2028533?sku=2588675"
12 }
13 },
14 "required": ["url"]
15}
src/__init__.py
src/__main__.py
1import asyncio
2
3from .main import main
4
5# Execute the Actor entry point.
6asyncio.run(main())
src/main.py
1from apify import Actor
2import requests
3import re
4import time
5from datetime import datetime
6
7async def main() -> None:
8 async with Actor:
9 actor_input = await Actor.get_input()
10 ulta_url = actor_input.get('url')
11
12 if not ulta_url:
13 Actor.log.error("The 'url' input is required.")
14 return
15
16 pimprod_id = extract_pimprod_id(ulta_url)
17 if not pimprod_id:
18 Actor.log.error("Could not extract pimprod ID from the URL.")
19 return
20
21 Actor.log.info(f"Scraping reviews for pimprod ID: {pimprod_id}")
22
23 base_url = f"https://display.powerreviews.com/m/6406/l/en_US/product/{pimprod_id}/reviews"
24 paging_from = 0
25 page_size = 25
26 total_results = None
27
28 while True:
29 url = f"{base_url}?paging.from={paging_from}&paging.size={page_size}&filters=&search=&sort=Newest&image_only=false&page_locale=en_US&_noconfig=true&apikey=daa0f241-c242-4483-afb7-4449942d1a2b"
30
31 Actor.log.info(f"Fetching reviews from: {ulta_url}")
32
33 try:
34 response = requests.get(url)
35 response.raise_for_status()
36 data = response.json()
37
38 if total_results is None:
39 total_results = data['paging']['total_results']
40 Actor.log.info(f"Total reviews to fetch: {total_results}")
41
42 for review in data['results'][0]['reviews']:
43 media_data = []
44 if 'media' in review and review['media']:
45 media_data = [
46 {
47 'Media ID': media_item['id'],
48 'URI': media_item['uri'],
49 'Caption': media_item.get('caption', 'N/A'),
50 'Helpful Votes': media_item.get('helpful_votes', 0),
51 'Not Helpful Votes': media_item.get('not_helpful_votes', 0),
52 }
53 for media_item in review['media']
54 ]
55
56 review_data = {
57 'Review ID': review['review_id'],
58 'Headline': review['details']['headline'],
59 'Nickname': review['details']['nickname'],
60 'Location': review['details'].get('location', 'N/A'),
61 'Created Date': datetime.fromtimestamp(review['details']['created_date'] / 1000).strftime('%Y-%m-%d %H:%M:%S'),
62 'Updated Date': datetime.fromtimestamp(review['details']['updated_date'] / 1000).strftime('%Y-%m-%d %H:%M:%S'),
63 'Product ID': review['details']['product_page_id'],
64 'UPC': review['details'].get('upc', 'N/A'),
65 'GTIN': review['details'].get('gtin', 'N/A'),
66 'Review Text': review['details']['comments'],
67 'Bottom Line': review['details'].get('bottom_line', 'N/A'),
68 'Is Staff Reviewer': review.get('badges', {}).get('is_staff_reviewer', False),
69 'Is Verified Buyer': review.get('badges', {}).get('is_verified_buyer', False),
70 'Is Verified Reviewer': review.get('badges', {}).get('is_verified_reviewer', False),
71 'Rating': review.get('metrics', {}).get('rating', None),
72 'Helpful Votes': review.get('metrics', {}).get('helpful_votes', 0),
73 'Not Helpful Votes': review.get('metrics', {}).get('not_helpful_votes', 0),
74 'Helpful Score': review.get('metrics', {}).get('helpful_score', 0),
75 'Media': media_data
76 }
77 await Actor.push_data(review_data)
78
79 paging_from += page_size
80 Actor.log.info(f"Fetched reviews up to {paging_from}")
81
82 if paging_from >= total_results:
83 Actor.log.info("Finished fetching all reviews.")
84 break
85
86 time.sleep(1) # Respectful delay
87
88 except requests.exceptions.RequestException as e:
89 Actor.log.error(f"Request error: {e}")
90 break
91 except ValueError as e:
92 Actor.log.error(f"JSON decoding error: {e}")
93 break
94 except KeyError as e:
95 Actor.log.error(f"KeyError: {e}. The JSON structure might be different than expected.")
96 break
97 except Exception as e:
98 Actor.log.error(f"An unexpected error occurred: {e}")
99 break
100
101 Actor.log.info("Finished scraping reviews.")
102
103def extract_pimprod_id(ulta_url):
104 """Extracts the pimprod ID from an Ulta product URL."""
105 match = re.search(r"pimprod(\d+)", ulta_url)
106 if match:
107 return f"pimprod{match.group(1)}"
108 return None
109
110if __name__ == "__main__":
111 Actor.main(main)
src/py.typed
1
.dockerignore
1.git
2.mise.toml
3.nvim.lua
4storage
5
6# The rest is copied from https://github.com/github/gitignore/blob/main/Python.gitignore
7
8# Byte-compiled / optimized / DLL files
9__pycache__/
10*.py[cod]
11*$py.class
12
13# C extensions
14*.so
15
16# Distribution / packaging
17.Python
18build/
19develop-eggs/
20dist/
21downloads/
22eggs/
23.eggs/
24lib/
25lib64/
26parts/
27sdist/
28var/
29wheels/
30share/python-wheels/
31*.egg-info/
32.installed.cfg
33*.egg
34MANIFEST
35
36# PyInstaller
37# Usually these files are written by a python script from a template
38# before PyInstaller builds the exe, so as to inject date/other infos into it.
39*.manifest
40*.spec
41
42# Installer logs
43pip-log.txt
44pip-delete-this-directory.txt
45
46# Unit test / coverage reports
47htmlcov/
48.tox/
49.nox/
50.coverage
51.coverage.*
52.cache
53nosetests.xml
54coverage.xml
55*.cover
56*.py,cover
57.hypothesis/
58.pytest_cache/
59cover/
60
61# Translations
62*.mo
63*.pot
64
65# Django stuff:
66*.log
67local_settings.py
68db.sqlite3
69db.sqlite3-journal
70
71# Flask stuff:
72instance/
73.webassets-cache
74
75# Scrapy stuff:
76.scrapy
77
78# Sphinx documentation
79docs/_build/
80
81# PyBuilder
82.pybuilder/
83target/
84
85# Jupyter Notebook
86.ipynb_checkpoints
87
88# IPython
89profile_default/
90ipython_config.py
91
92# pyenv
93# For a library or package, you might want to ignore these files since the code is
94# intended to run in multiple environments; otherwise, check them in:
95.python-version
96
97# pdm
98# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
99#pdm.lock
100# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
101# in version control.
102# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
103.pdm.toml
104.pdm-python
105.pdm-build/
106
107# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
108__pypackages__/
109
110# Celery stuff
111celerybeat-schedule
112celerybeat.pid
113
114# SageMath parsed files
115*.sage.py
116
117# Environments
118.env
119.venv
120env/
121venv/
122ENV/
123env.bak/
124venv.bak/
125
126# Spyder project settings
127.spyderproject
128.spyproject
129
130# Rope project settings
131.ropeproject
132
133# mkdocs documentation
134/site
135
136# mypy
137.mypy_cache/
138.dmypy.json
139dmypy.json
140
141# Pyre type checker
142.pyre/
143
144# pytype static type analyzer
145.pytype/
146
147# Cython debug symbols
148cython_debug/
149
150# PyCharm
151# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
152# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
153# and can be added to the global gitignore or merged into this file. For a more nuclear
154# option (not recommended) you can uncomment the following to ignore the entire idea folder.
155.idea/
.gitignore
1.mise.toml
2.nvim.lua
3storage
4
5# The rest is copied from https://github.com/github/gitignore/blob/main/Python.gitignore
6
7# Byte-compiled / optimized / DLL files
8__pycache__/
9*.py[cod]
10*$py.class
11
12# C extensions
13*.so
14
15# Distribution / packaging
16.Python
17build/
18develop-eggs/
19dist/
20downloads/
21eggs/
22.eggs/
23lib/
24lib64/
25parts/
26sdist/
27var/
28wheels/
29share/python-wheels/
30*.egg-info/
31.installed.cfg
32*.egg
33MANIFEST
34
35# PyInstaller
36# Usually these files are written by a python script from a template
37# before PyInstaller builds the exe, so as to inject date/other infos into it.
38*.manifest
39*.spec
40
41# Installer logs
42pip-log.txt
43pip-delete-this-directory.txt
44
45# Unit test / coverage reports
46htmlcov/
47.tox/
48.nox/
49.coverage
50.coverage.*
51.cache
52nosetests.xml
53coverage.xml
54*.cover
55*.py,cover
56.hypothesis/
57.pytest_cache/
58cover/
59
60# Translations
61*.mo
62*.pot
63
64# Django stuff:
65*.log
66local_settings.py
67db.sqlite3
68db.sqlite3-journal
69
70# Flask stuff:
71instance/
72.webassets-cache
73
74# Scrapy stuff:
75.scrapy
76
77# Sphinx documentation
78docs/_build/
79
80# PyBuilder
81.pybuilder/
82target/
83
84# Jupyter Notebook
85.ipynb_checkpoints
86
87# IPython
88profile_default/
89ipython_config.py
90
91# pyenv
92# For a library or package, you might want to ignore these files since the code is
93# intended to run in multiple environments; otherwise, check them in:
94.python-version
95
96# pdm
97# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
98#pdm.lock
99# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
100# in version control.
101# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
102.pdm.toml
103.pdm-python
104.pdm-build/
105
106# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
107__pypackages__/
108
109# Celery stuff
110celerybeat-schedule
111celerybeat.pid
112
113# SageMath parsed files
114*.sage.py
115
116# Environments
117.env
118.venv
119env/
120venv/
121ENV/
122env.bak/
123venv.bak/
124
125# Spyder project settings
126.spyderproject
127.spyproject
128
129# Rope project settings
130.ropeproject
131
132# mkdocs documentation
133/site
134
135# mypy
136.mypy_cache/
137.dmypy.json
138dmypy.json
139
140# Pyre type checker
141.pyre/
142
143# pytype static type analyzer
144.pytype/
145
146# Cython debug symbols
147cython_debug/
148
149# PyCharm
150# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
151# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
152# and can be added to the global gitignore or merged into this file. For a more nuclear
153# option (not recommended) you can uncomment the following to ignore the entire idea folder.
154.idea/
requirements.txt
1# Feel free to add your Python dependencies below. For formatting guidelines, see:
2# https://pip.pypa.io/en/latest/reference/requirements-file-format/
3
4apify < 3.0
5beautifulsoup4[lxml]
6httpx
7types-beautifulsoup4