
Scrapy Books Example
Pricing
Pay per usage
Go to Store

Scrapy Books Example
Example of Python Scrapy project. It scrapes book data from https://books.toscrape.com/.
0.0 (0)
Pricing
Pay per usage
2
Total users
11
Monthly users
2
Runs succeeded
>99%
Last modified
9 days ago
Makefile
# This is used by the Github Actions to run the static analysis.
.PHONY: clean install-dev lint type-check format check-code
clean: rm -rf .mypy_cache .pytest_cache .ruff_cache build dist htmlcov .coverage
install-dev: uv sync --all-extras --prerelease allow
lint: uv run ruff format --check uv run ruff check
type-check: uv run mypy
format: uv run ruff check --fix uv run ruff format
check-code: lint type-check
pyproject.toml
1[project]2name = "parsel-actor"3version = "0.0.0" # not used by Apify4description = "" # not used by Apify5readme = "README.md"6requires-python = ">=3.13"7dependencies = [8 "apify@git+https://github.com/apify/apify-sdk-python.git@new-apify-storage-clients",9 "crawlee[parsel]==0.6.12b30",10]11
12[dependency-groups]13dev = [14 "mypy~=1.17.0",15 "ruff~=0.12.0",16]17
18[tool.uv]19prerelease = "allow"20
21[tool.ruff]22line-length = 12023include = ["src/**/*.py"]24
25[tool.ruff.lint.per-file-ignores]26"**/__init__.py" = [27 "F401", # Unused imports28]29"**/__main__.py" = [30 "E402", # Module level import not at top of file31]32
33[tool.mypy]34files = ["src"]35
36[[tool.mypy.overrides]]37module = [38 'scrapy.*',39]40ignore_missing_imports = true
.actor/Dockerfile
FROM apify/actor-python:3.13
ENV ACTOR_DIR="src"ENV UV_VERSION="0.7"
WORKDIR /usr/src/app
# Install Debian packages.RUN apt-get update && \ apt-get install -yq --no-install-recommends \ git && \ rm -rf /var/lib/apt/lists/*
# Install uv package manager.RUN pip install --upgrade pip && \ pip install uv~=${UV_VERSION}
# Copy python config files and install dependencies.COPY pyproject.toml ./
RUN uv sync
COPY uv.lock ./RUN uv export --no-hashes --no-dev | \ pip install --requirement /dev/stdin --no-dependencies
# Copy the source code of the Actor.COPY .actor ./.actorCOPY ${ACTOR_DIR}/ ./${ACTOR_DIR}/
# Use compileall to ensure the runnability of the Actor Python code.RUN uv run python -m compileall -q ${ACTOR_DIR}
# Specify how to launch the source code of your Actor.CMD ["sh", "-c", "uv run python -m ${ACTOR_DIR}"]
.actor/actor.json
{ "actorSpecification": 1, "name": "scrapy-books-example", "title": "Scrapy Books Example", "description": "Example Actor scraping books using Scrapy and Apify SDK.", "version": "0.0", "meta": { "templateId": "python-scrapy" }, "input": "./input_schema.json", "dockerfile": "./Dockerfile"}
.actor/input_schema.json
{ "title": "scrapy-books-example", "type": "object", "schemaVersion": 1, "properties": { "proxyConfiguration": { "sectionCaption": "Proxy and HTTP configuration", "title": "Proxy configuration", "type": "object", "description": "Specifies proxy servers that will be used by the scraper in order to hide its origin.", "editor": "proxy", "prefill": { "useApifyProxy": true }, "default": { "useApifyProxy": true } } }, "required": []}
src/__init__.py
1
src/__main__.py
1from __future__ import annotations2
3import asyncio4from .main import main5
6if __name__ == "__main__":7 asyncio.run(main())
src/main.py
1from crawlee.crawlers import ParselCrawler, ParselCrawlingContext2
3from apify import Actor4
5
6async def main() -> None:7 async with Actor:8 crawler = ParselCrawler(9 max_crawl_depth=2,10 )11
12 @crawler.router.default_handler13 async def request_handler(context: ParselCrawlingContext) -> None:14 context.log.info(f'Processing {context.request.url} ...')15
16 data = {17 'url': context.request.url,18 'title': context.selector.css('title::text').get(),19 }20
21 await context.push_data(data)22 await context.enqueue_links(strategy='same-domain')23
24 await crawler.run(['https://crawlee.dev/'])
src/py.typed