GitHub Stars avatar

GitHub Stars

Try for free

No credit card required

View all Actors
GitHub Stars

GitHub Stars

sauain/github-stars
Try for free

No credit card required

Input will be the URL of any GitHub repository, and output will be GitHub Stars.

.actor/Dockerfile

1# First, specify the base Docker image.
2# You can see the Docker images from Apify at https://hub.docker.com/r/apify/.
3# You can also use any other image from Docker Hub.
4FROM apify/actor-python-selenium:3.11
5
6# Second, copy just requirements.txt into the Actor image,
7# since it should be the only file that affects the dependency install in the next step,
8# in order to speed up the build
9COPY requirements.txt ./
10
11# Install the packages specified in requirements.txt,
12# Print the installed Python version, pip version
13# and all installed packages with their versions for debugging
14RUN echo "Python version:" \
15 && python --version \
16 && echo "Pip version:" \
17 && pip --version \
18 && echo "Installing dependencies:" \
19 && pip install -r requirements.txt \
20 && echo "All installed Python packages:" \
21 && pip freeze
22
23# Next, copy the remaining files and directories with the source code.
24# Since we do this after installing the dependencies, quick build will be really fast
25# for most source file changes.
26COPY . ./
27
28# Use compileall to ensure the runnability of the Actor Python code.
29RUN python3 -m compileall -q .
30
31# Specify how to launch the source code of your Actor.
32# By default, the "python3 -m src" command is run
33CMD ["python3", "-m", "src"]

.actor/actor.json

1{
2    "actorSpecification": 1,
3    "name": "my-actor-1",
4    "title": "Getting started with Python and Selenium",
5    "description": "Scrapes titles of websites using Selenium.",
6    "version": "0.0",
7    "meta": {
8        "templateId": "python-selenium"
9    },
10    "input": "./input_schema.json",
11    "dockerfile": "./Dockerfile",
12    "storages": {
13        "dataset": {
14            "actorSpecification": 1,
15            "title": "Name of repo and stars",
16            "views": {
17                "titles": {
18                    "title": "Name of repo and stars",
19                    "transformation": {
20                        "fields": [
21                            "repo_name",
22                            "stars_count"
23                        ]
24                    },
25                    "display": {
26                        "component": "table",
27                        "properties": {
28                            "repo_name": {
29                                "label": "Name",
30                                "format": "text"
31                            },
32                            "star_count": {
33                                "label": "Stars",
34                                "format": "text"
35                            }
36                        }
37                    }
38                }
39            }
40        }
41    }
42}

.actor/input_schema.json

1{
2    "title": "Python Selenium Scraper",
3    "type": "object",
4    "schemaVersion": 1,
5    "properties": {
6        "start_urls": {
7            "title": "Start URLs",
8            "type": "array",
9            "description": "URLs to start with",
10            "prefill": [
11                { "url": "https://github.com/apify/crawlee" }
12            ],
13            "editor": "requestListSources"
14        },
15        "max_depth": {
16            "title": "Maximum depth",
17            "type": "integer",
18            "description": "Depth to which to scrape to",
19            "default": 1
20        }
21    },
22    "required": ["start_urls"]
23}

src/__main__.py

1"""
2This module serves as the entry point for executing the Apify Actor. It handles the configuration of logging
3settings. The `main()` coroutine is then executed using `asyncio.run()`.
4
5Feel free to modify this file to suit your specific needs.
6"""
7
8import asyncio
9import logging
10
11from apify.log import ActorLogFormatter
12
13from .main import main
14
15# Configure loggers
16handler = logging.StreamHandler()
17handler.setFormatter(ActorLogFormatter())
18
19apify_client_logger = logging.getLogger('apify_client')
20apify_client_logger.setLevel(logging.INFO)
21apify_client_logger.addHandler(handler)
22
23apify_logger = logging.getLogger('apify')
24apify_logger.setLevel(logging.DEBUG)
25apify_logger.addHandler(handler)
26
27# Execute the Actor main coroutine
28asyncio.run(main())

src/main.py

1from selenium import webdriver
2from selenium.webdriver.chrome.options import Options as ChromeOptions
3from selenium.webdriver.common.by import By
4from selenium.webdriver.support.ui import WebDriverWait
5from selenium.webdriver.support import expected_conditions as EC
6from apify import Actor
7
8async def main() -> None:
9    async with Actor() as actor:
10        repo_url = 'https://github.com/apify/crawlee'  # Example repository URL
11
12        chrome_options = ChromeOptions()
13        if actor.config.headless:
14            chrome_options.add_argument('--headless')
15        chrome_options.add_argument('--no-sandbox')
16        chrome_options.add_argument('--disable-dev-shm-usage')
17        driver = webdriver.Chrome(options=chrome_options)
18
19        try:
20            driver.get(repo_url)
21
22            wait = WebDriverWait(driver, 10)
23            repo_name_element = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "strong a[href*='/apify/crawlee']")))
24            repo_name = repo_name_element.text.strip() if repo_name_element else 'Repo name not found'
25
26            stars_element = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "a.Link--muted[href*='/stargazers'] strong")))
27            stars_count = stars_element.text.strip() if stars_element else '0'
28
29
30
31            print(f"Repository: {repo_name}, Stars: {stars_count}")
32            await actor.push_data({
33                'repo_url': repo_url,
34                'repo_name': repo_name,
35                'stars_count': stars_count
36            })
37
38        except Exception as e:
39            actor.log.exception(f'Cannot extract data from {repo_url}. Exception: {e}')
40        finally:
41            driver.quit()

.dockerignore

1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10.venv
11
12# git folder
13.git

.editorconfig

1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf

.gitignore

1# This file tells Git which files shouldn't be added to source control
2
3.idea
4.DS_Store
5
6apify_storage
7storage
8
9.venv/
10.env/
11__pypackages__
12dist/
13build/
14*.egg-info/
15*.egg
16
17__pycache__
18
19.mypy_cache
20.dmypy.json
21dmypy.json
22.pytest_cache
23.ruff_cache
24
25.scrapy
26*.log

requirements.txt

1# Feel free to add your Python dependencies below. For formatting guidelines, see:
2# https://pip.pypa.io/en/latest/reference/requirements-file-format/
3
4apify ~= 1.5.1
5selenium ~= 4.14.0
Developer
Maintained by Community

Actor Metrics

  • 1 monthly user

  • 1 star

  • >99% runs succeeded

  • Created in Jan 2024

  • Modified 9 months ago

Categories