{
    "actorSpecification": 1,
    "name": "my-actor-2",
    "title": "Getting started with Python and BeautifulSoup",
    "description": "Scrapes titles of websites using BeautifulSoup.",
    "version": "0.0",
    "buildTag": "latest",
    "meta": {
        "templateId": "python-beautifulsoup"
    },
    "input": "./input_schema.json",
    "dockerfile": "../Dockerfile"
}

.actor/input_schema.json

{
    "title": "Python BeautifulSoup Scraper",
    "type": "object",
    "schemaVersion": 1,
    "properties": {
        "start_urls": {
            "title": "Start URLs",
            "type": "array",
            "description": "URLs to start with",
            "prefill": [{ "url": "https://apify.com" }],
            "editor": "requestListSources"
        },
        "max_depth": {
            "title": "Maximum depth",
            "type": "integer",
            "description": "Depth to which to scrape to",
            "default": 1
        }
    },
    "required": ["start_urls"]
}

src/init.py

src/main.py

1import asyncio
2
3from .main import main
4
5# Execute the Actor entrypoint.
6asyncio.run(main())

src/main.py

1"""Main entry point for the LinkedIn Job Count Metadata Scraper.
2This Actor fetches job counts from LinkedIn job search URLs using HTTP requests and HTML parsing,
3optimized for speed with 1-11 uniform random concurrent tasks to process 6,844 URLs in a single run,
4spanning approximately 16 hours with random delays of 500-3,500 milliseconds between each query
5and random delays of 3-5 seconds between retry attempts, with 5 retries.
6Other queries continue processing during retry delays.
7Key Features:
8- Uses HTTP GET requests with a browser-like User-Agent to fetch raw HTML.
9- Parses the initial HTML for job counts using BeautifulSoup, targeting '.results-context-header__context'
10  for job numbers. Labels results as "No Jobs" if the title contains "0 jobs in" or the
11  '.no-results__main-title-keywords' element is present, falling back to "Failed to Load" for failures.
12- Pushes extracted data (URL, job count, timestamp) to the Apify dataset.
13Usage:
14- Input JSON should contain a 'start_urls' array with URLs like {'url': 'https://www.linkedin.com/jobs/search/?f_C=123&geoID=103644278', 'method': 'GET'}.
15- Run the Actor via the Apify Starter Plan with 128 MB memory
16- Monitor logs for success (e.g., "Extracted job count: 2 results") or failures (e.g., "No Jobs" or "Failed to Load").
17- Relies on Apify's proxy rotation; a paid proxy plan
18- Runtime may vary slightly based on randomization, retries, and network conditions.
19Dependencies:
20- apify>=2.7.3
21- requests>=2.28.0
22- beautifulsoup4>=4.12.0
23"""
24
25import asyncio
26import random
27from apify import Actor
28import requests
29from bs4 import BeautifulSoup
30from datetime import datetime, timedelta
31from typing import List
32
33async def fetch_url(url: str, proxy_configuration) -> tuple[str, str]:
34    max_attempts = 5  # Increased to 5 retries
35    for attempt in range(max_attempts):
36        try:
37            proxy_url = await proxy_configuration.new_url()
38            headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"}
39            response = await asyncio.to_thread(
40                requests.get, url, headers=headers, proxies={"http": proxy_url, "https": proxy_url}, timeout=30
41            )
42            if response.status_code == 200:
43                soup = BeautifulSoup(response.text, 'html.parser')
44                Actor.log.info(f"Successfully fetched HTML for {url}")
45                title = soup.title.string.lower() if soup.title else ""
46                if title.startswith("0 jobs in"):  # Strict check for "0 jobs in" at the start
47                    return url, "No Jobs"
48                no_jobs_element = soup.find(class_="no-results__main-title-keywords")
49                if no_jobs_element:
50                    return url, "No Jobs"
51                job_element = soup.find(class_="results-context-header__context")
52                if job_element:
53                    job_count_text = job_element.get_text().strip()
54                    if any(char.isdigit() for char in job_count_text):
55                        return url, job_count_text.split()[0] + " results"  # e.g., "2 results"
56                return url, "Failed to Load"
57            else:
58                Actor.log.error(f"Failed to fetch {url}, status code: {response.status_code}")
59                return url, "Failed to Load"
60        except Exception as e:
61            Actor.log.warning(f"Attempt {attempt + 1} failed for {url}: {e}")
62            if attempt < max_attempts - 1:
63                retry_delay = random.uniform(3, 5)  # Random delay between 3 and 5 seconds for retry
64                Actor.log.info(f"Pausing for {retry_delay:.1f} seconds before retry {attempt + 2}...")
65                await asyncio.sleep(retry_delay)
66                continue
67            return url, "Failed to Load"
68
69async def process_urls(urls: List[str], proxy_configuration):
70    max_concurrent_tasks = 8  # Cap at 8 tasks to fit 128 MB
71    semaphore = asyncio.Semaphore(max_concurrent_tasks)
72    
73    async def bounded_fetch(url):
74        async with semaphore:
75            delay = random.uniform(0.5, 3.5)  # Random delay in seconds (500-3,500 ms)
76            await asyncio.sleep(delay)
77            result = await fetch_url(url, proxy_configuration)
78            return url, result[1]  # Return URL and job_count
79
80    # Process URLs in smaller chunks to manage memory
81    chunk_size = 100  # Process in chunks to avoid loading all URLs at once
82    for i in range(0, len(urls), chunk_size):
83        chunk = urls[i:i + chunk_size]
84        tasks = [bounded_fetch(url) for url in chunk]
85        for task in asyncio.as_completed(tasks):  # Process as tasks complete, avoiding double-await
86            url, job_count = await task
87            data = {
88                "url": url,
89                "job_count": job_count,
90                "timestamp": datetime.utcnow().isoformat() + "Z",
91            }
92            await Actor.push_data(data)
93            Actor.log.info(f"Extracted job count: {job_count} from {url}")
94        # No need for additional gather, as as_completed handles awaiting
95
96async def main():
97    async with Actor:
98        # Retrieve the Actor input from the Apify platform, exiting if no URLs are provided.
99        actor_input = await Actor.get_input() or {}
100        Actor.log.info(f"Received input: {actor_input}")
101        all_start_urls = [url.get("url") for url in actor_input.get("start_urls", [])]
102
103        if not all_start_urls:
104            Actor.log.error("No start URLs specified in Actor input, exiting...")
105            await Actor.exit()
106
107        # Use all provided URLs without randomization.
108        start_urls = all_start_urls
109        Actor.log.info(f"Starting crawl with {len(start_urls)} URLs: {start_urls}")
110        # Configure proxy for HTTP requests using Apify's residential proxy pool.
111        proxy_configuration = await Actor.create_proxy_configuration(groups=["RESIDENTIAL"])
112
113        # Process URLs individually with per-query delays and concurrent retries
114        await process_urls(start_urls, proxy_configuration)
115
116if __name__ == "__main__":
117    asyncio.run(main())

src/py.typed

.dockerignore

.git
.mise.toml
.nvim.lua
storage

# The rest is copied from https://github.com/github/gitignore/blob/main/Python.gitignore

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
#   For a library or package, you might want to ignore these files since the code is
#   intended to run in multiple environments; otherwise, check them in:
.python-version

# pdm
#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
#   in version control.
#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
#  and can be added to the global gitignore or merged into this file.  For a more nuclear
#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/

# Visual Studio Code
#  Ignores the folder created by VS Code when changing workspace settings, doing debugger
#  configuration, etc. Can be commented out to share Workspace Settings within a team
.vscode

# Zed editor
#  Ignores the folder created when setting Project Settings in the Zed editor. Can be commented out
#  to share Project Settings within a team
.zed

.gitignore

.mise.toml
.nvim.lua
storage

# The rest is copied from https://github.com/github/gitignore/blob/main/Python.gitignore

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
#   For a library or package, you might want to ignore these files since the code is
#   intended to run in multiple environments; otherwise, check them in:
.python-version

# pdm
#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
#   in version control.
#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
#  and can be added to the global gitignore or merged into this file.  For a more nuclear
#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/

# Visual Studio Code
#  Ignores the folder created by VS Code when changing workspace settings, doing debugger
#  configuration, etc. Can be commented out to share Workspace Settings within a team
.vscode

# Zed editor
#  Ignores the folder created when setting Project Settings in the Zed editor. Can be commented out
#  to share Project Settings within a team
.zed

Dockerfile

# First, specify the base Docker image.
# You can see the Docker images from Apify at https://hub.docker.com/r/apify/.
# You can also use any other image from Docker Hub.
FROM apify/actor-python:3.13

# Second, copy just requirements.txt into the Actor image,
# since it should be the only file that affects the dependency install in the next step,
# in order to speed up the build
COPY requirements.txt ./

# Install the packages specified in requirements.txt,
# Print the installed Python version, pip version
# and all installed packages with their versions for debugging
RUN echo "Python version:" \
 && python --version \
 && echo "Pip version:" \
 && pip --version \
 && echo "Installing dependencies:" \
 && pip install -r requirements.txt \
 && echo "All installed Python packages:" \
 && pip freeze

# Next, copy the remaining files and directories with the source code.
# Since we do this after installing the dependencies, quick build will be really fast
# for most source file changes.
COPY . ./

# Use compileall to ensure the runnability of the Actor Python code.
RUN python3 -m compileall -q src/

# Specify how to launch the source code of your Actor.
# By default, the "python3 -m src" command is run
CMD ["python3", "-m", "src"]

requirements.txt

1apify>=2.7.3
2requests>=2.28.0
3beautifulsoup4>=4.12.0

Scraper

code_crafter/scraper

Code Pioneer

9.2K

2.3

Reviews

hollywood-reporter/reviews

hollywood reporter

Email

contact2353/my-actor-2

TML

Website Scraper

grihithbhoir707/website-scraper

Grihith Bhoir

Upwork Job Scraper per results | 0.001🔥🔥🔥

masterful_optimism/upwork-job-scraper-per-results-0-001

Little Mage

5.0

Job Scraper

necessary_signature/job-scraper

SRIVATSA NANDURI

Finn no job scraper

shirant/finn-no-job-scraper

Anton Mammadov

Actor 1

code_crafter/actor-1

Code Pioneer

Test

matej/test

Matej Hamaš

Test

muhammetakkurtt/test

Muhammet Akkurt

{ "actorSpecification": 1, "name": "my-actor-2", "title": "Getting started with Python and BeautifulSoup", "description": "Scrapes titles of websites using BeautifulSoup.", "version": "0.0", "buildTag": "latest", "meta": { "templateId": "python-beautifulsoup" }, "input": "./input_schema.json", "dockerfile": "../Dockerfile" }

{ "title": "Python BeautifulSoup Scraper", "type": "object", "schemaVersion": 1, "properties": { "start_urls": { "title": "Start URLs", "type": "array", "description": "URLs to start with", "prefill": [{ "url": "https://apify.com" }], "editor": "requestListSources" }, "max_depth": { "title": "Maximum depth", "type": "integer", "description": "Depth to which to scrape to", "default": 1 } }, "required": ["start_urls"] }

1"""Main entry point for the LinkedIn Job Count Metadata Scraper. 2This Actor fetches job counts from LinkedIn job search URLs using HTTP requests and HTML parsing, 3optimized for speed with 1-11 uniform random concurrent tasks to process 6,844 URLs in a single run, 4spanning approximately 16 hours with random delays of 500-3,500 milliseconds between each query 5and random delays of 3-5 seconds between retry attempts, with 5 retries. 6Other queries continue processing during retry delays. 7Key Features: 8- Uses HTTP GET requests with a browser-like User-Agent to fetch raw HTML. 9- Parses the initial HTML for job counts using BeautifulSoup, targeting '.results-context-header__context' 10 for job numbers. Labels results as "No Jobs" if the title contains "0 jobs in" or the 11 '.no-results__main-title-keywords' element is present, falling back to "Failed to Load" for failures. 12- Pushes extracted data (URL, job count, timestamp) to the Apify dataset. 13Usage: 14- Input JSON should contain a 'start_urls' array with URLs like {'url': 'https://www.linkedin.com/jobs/search/?f_C=123&geoID=103644278', 'method': 'GET'}. 15- Run the Actor via the Apify Starter Plan with 128 MB memory 16- Monitor logs for success (e.g., "Extracted job count: 2 results") or failures (e.g., "No Jobs" or "Failed to Load"). 17- Relies on Apify's proxy rotation; a paid proxy plan 18- Runtime may vary slightly based on randomization, retries, and network conditions. 19Dependencies: 20- apify>=2.7.3 21- requests>=2.28.0 22- beautifulsoup4>=4.12.0 23""" 24 25import asyncio 26import random 27from apify import Actor 28import requests 29from bs4 import BeautifulSoup 30from datetime import datetime, timedelta 31from typing import List 32 33async def fetch_url(url: str, proxy_configuration) -> tuple[str, str]: 34 max_attempts = 5 # Increased to 5 retries 35 for attempt in range(max_attempts): 36 try: 37 proxy_url = await proxy_configuration.new_url() 38 headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"} 39 response = await asyncio.to_thread( 40 requests.get, url, headers=headers, proxies={"http": proxy_url, "https": proxy_url}, timeout=30 41 ) 42 if response.status_code == 200: 43 soup = BeautifulSoup(response.text, 'html.parser') 44 Actor.log.info(f"Successfully fetched HTML for {url}") 45 title = soup.title.string.lower() if soup.title else "" 46 if title.startswith("0 jobs in"): # Strict check for "0 jobs in" at the start 47 return url, "No Jobs" 48 no_jobs_element = soup.find(class_="no-results__main-title-keywords") 49 if no_jobs_element: 50 return url, "No Jobs" 51 job_element = soup.find(class_="results-context-header__context") 52 if job_element: 53 job_count_text = job_element.get_text().strip() 54 if any(char.isdigit() for char in job_count_text): 55 return url, job_count_text.split()[0] + " results" # e.g., "2 results" 56 return url, "Failed to Load" 57 else: 58 Actor.log.error(f"Failed to fetch {url}, status code: {response.status_code}") 59 return url, "Failed to Load" 60 except Exception as e: 61 Actor.log.warning(f"Attempt {attempt + 1} failed for {url}: {e}") 62 if attempt < max_attempts - 1: 63 retry_delay = random.uniform(3, 5) # Random delay between 3 and 5 seconds for retry 64 Actor.log.info(f"Pausing for {retry_delay:.1f} seconds before retry {attempt + 2}...") 65 await asyncio.sleep(retry_delay) 66 continue 67 return url, "Failed to Load" 68 69async def process_urls(urls: List[str], proxy_configuration): 70 max_concurrent_tasks = 8 # Cap at 8 tasks to fit 128 MB 71 semaphore = asyncio.Semaphore(max_concurrent_tasks) 72 73 async def bounded_fetch(url): 74 async with semaphore: 75 delay = random.uniform(0.5, 3.5) # Random delay in seconds (500-3,500 ms) 76 await asyncio.sleep(delay) 77 result = await fetch_url(url, proxy_configuration) 78 return url, result[1] # Return URL and job_count 79 80 # Process URLs in smaller chunks to manage memory 81 chunk_size = 100 # Process in chunks to avoid loading all URLs at once 82 for i in range(0, len(urls), chunk_size): 83 chunk = urls[i:i + chunk_size] 84 tasks = [bounded_fetch(url) for url in chunk] 85 for task in asyncio.as_completed(tasks): # Process as tasks complete, avoiding double-await 86 url, job_count = await task 87 data = { 88 "url": url, 89 "job_count": job_count, 90 "timestamp": datetime.utcnow().isoformat() + "Z", 91 } 92 await Actor.push_data(data) 93 Actor.log.info(f"Extracted job count: {job_count} from {url}") 94 # No need for additional gather, as as_completed handles awaiting 95 96async def main(): 97 async with Actor: 98 # Retrieve the Actor input from the Apify platform, exiting if no URLs are provided. 99 actor_input = await Actor.get_input() or {} 100 Actor.log.info(f"Received input: {actor_input}") 101 all_start_urls = [url.get("url") for url in actor_input.get("start_urls", [])] 102 103 if not all_start_urls: 104 Actor.log.error("No start URLs specified in Actor input, exiting...") 105 await Actor.exit() 106 107 # Use all provided URLs without randomization. 108 start_urls = all_start_urls 109 Actor.log.info(f"Starting crawl with {len(start_urls)} URLs: {start_urls}") 110 # Configure proxy for HTTP requests using Apify's residential proxy pool. 111 proxy_configuration = await Actor.create_proxy_configuration(groups=["RESIDENTIAL"]) 112 113 # Process URLs individually with per-query delays and concurrent retries 114 await process_urls(start_urls, proxy_configuration) 115 116if __name__ == "__main__": 117 asyncio.run(main())

.git .mise.toml .nvim.lua storage # The rest is copied from https://github.com/github/gitignore/blob/main/Python.gitignore # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ cover/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder .pybuilder/ target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv # For a library or package, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: .python-version # pdm # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. #pdm.lock # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it # in version control. # https://pdm.fming.dev/latest/usage/project/#working-with-version-control .pdm.toml .pdm-python .pdm-build/ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # pytype static type analyzer .pytype/ # Cython debug symbols cython_debug/ # PyCharm # JetBrains specific template is maintained in a separate JetBrains.gitignore that can # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. .idea/ # Visual Studio Code # Ignores the folder created by VS Code when changing workspace settings, doing debugger # configuration, etc. Can be commented out to share Workspace Settings within a team .vscode # Zed editor # Ignores the folder created when setting Project Settings in the Zed editor. Can be commented out # to share Project Settings within a team .zed

.mise.toml .nvim.lua storage # The rest is copied from https://github.com/github/gitignore/blob/main/Python.gitignore # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ cover/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder .pybuilder/ target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv # For a library or package, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: .python-version # pdm # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. #pdm.lock # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it # in version control. # https://pdm.fming.dev/latest/usage/project/#working-with-version-control .pdm.toml .pdm-python .pdm-build/ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # pytype static type analyzer .pytype/ # Cython debug symbols cython_debug/ # PyCharm # JetBrains specific template is maintained in a separate JetBrains.gitignore that can # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. .idea/ # Visual Studio Code # Ignores the folder created by VS Code when changing workspace settings, doing debugger # configuration, etc. Can be commented out to share Workspace Settings within a team .vscode # Zed editor # Ignores the folder created when setting Project Settings in the Zed editor. Can be commented out # to share Project Settings within a team .zed

# First, specify the base Docker image. # You can see the Docker images from Apify at https://hub.docker.com/r/apify/. # You can also use any other image from Docker Hub. FROM apify/actor-python:3.13 # Second, copy just requirements.txt into the Actor image, # since it should be the only file that affects the dependency install in the next step, # in order to speed up the build COPY requirements.txt ./ # Install the packages specified in requirements.txt, # Print the installed Python version, pip version # and all installed packages with their versions for debugging RUN echo "Python version:" \ && python --version \ && echo "Pip version:" \ && pip --version \ && echo "Installing dependencies:" \ && pip install -r requirements.txt \ && echo "All installed Python packages:" \ && pip freeze # Next, copy the remaining files and directories with the source code. # Since we do this after installing the dependencies, quick build will be really fast # for most source file changes. COPY . ./ # Use compileall to ensure the runnability of the Actor Python code. RUN python3 -m compileall -q src/ # Specify how to launch the source code of your Actor. # By default, the "python3 -m src" command is run CMD ["python3", "-m", "src"]

Job Openings Metadata

Job Openings Metadata

.actor/actor.json

.actor/input_schema.json

src/__init__.py

src/__main__.py

src/main.py

src/py.typed

.dockerignore

.gitignore

Dockerfile

requirements.txt

You might also like

Scraper

Reviews

Email

Website Scraper

Upwork Job Scraper per results | 0.001🔥🔥🔥

Job Scraper

Finn no job scraper

Actor 1

Test

Test

.actor/actor.json

.actor/input_schema.json

src/__init__.py

src/__main__.py

src/main.py

src/py.typed

.dockerignore

.gitignore

Dockerfile

requirements.txt

src/init.py

src/main.py

src/init.py

src/main.py