# First, specify the base Docker image.
# You can see the Docker images from Apify at https://hub.docker.com/r/apify/.
# You can also use any other image from Docker Hub.
FROM apify/actor-python:3.13

# Second, copy just requirements.txt into the Actor image,
# since it should be the only file that affects the dependency installation in the next step,
# in order to speed up the build.
COPY requirements.txt ./

# Install the packages specified in requirements.txt,
# print the installed Python version, pip version,
# and all installed packages with their versions for debugging.
RUN echo "Python version:" \
 && python --version \
 && echo "Pip version:" \
 && pip --version \
 && echo "Installing dependencies:" \
 && pip install -r requirements.txt \
 && echo "All installed Python packages:" \
 && pip freeze

# Next, copy the remaining files and directories with the source code.
# Since we do this after installing the dependencies, quick builds will be really fast
# for most source file changes.
COPY . ./

# Use compileall to ensure the runnability of the Actor Python code.
RUN python3 -m compileall -q .

# Create and run as a non-root user.
RUN useradd -m apify && \
    chown -R apify:apify ./
USER apify

# Specify how to launch the source code of your Actor.
# By default, the "python3 -m ." command is run.
CMD ["python3", "-m", "src"]

.actor/actor.json

{
	"actorSpecification": 1,
	"name": "AI-Job-Search-Agent",
	"title": "Python LangGraph Agent",
	"description": "LangGraph agent in python",
	"version": "0.0",
	"buildTag": "latest",
	"input": "./input_schema.json",
	"storages": {
		"dataset": "./dataset_schema.json"
	},
	"meta": {
		"templateId": "python-langgraph"
	},
	"dockerfile": "./Dockerfile"
}

.actor/dataset_schema.json

{
  "actorSpecification": 1,
  "views": {
    "overview": {
      "title": "Overview",
      "transformation": {
        "fields": ["response", "structured_response"]
      },
      "display": {
        "component": "table",
        "properties": {
          "response": {
            "label": "Response",
            "format": "text"
          },
          "structured_response": {
            "label": "Structured Response",
            "format": "object"
          }
        }
      }
    }
  }
}

.actor/input_schema.json

{
  "title": "AI Job Search Agent",
  "type": "object",
  "schemaVersion": 1,
  "properties": {
    "resume": {
      "title": "Resume",
      "type": "string",
      "description": "The resume text to analyze for job matching",
      "editor": "textarea",
      "prefill": "Enter your resume text here..."
    },
    "location": {
      "title": "Location",
      "type": "string",
      "description": "Preferred job location (city, state, or 'Remote')",
      "editor": "textfield",
      "default": "Remote"
    },
    "jobType": {
      "title": "Job Type",
      "type": "string",
      "description": "Type of employment desired",
      "enum": [
        "full-time",
        "part-time",
        "contract",
        "internship",
        "remote"
      ],
      "default": "full-time"
    },
    "keywords": {
      "title": "Keywords",
      "type": "string",
      "description": "Additional search keywords (comma-separated)",
      "editor": "textfield",
      "example": "python, machine learning, data science"
    },
    "modelName": {
      "title": "AI Model",
      "type": "string",
      "description": "The OpenAI model to use for analysis",
      "enum": [
        "gpt-4o-mini",
        "gpt-4-turbo",
        "gpt-3.5-turbo"
      ],
      "default": "gpt-4o-mini"
    }
  },
  "required": ["resume"]
}

.actor/pay_per_event.json

{
    "actor-start-gb": {
        "eventTitle": "Actor start per 1 GB",
        "eventDescription": "Flat fee for starting an Actor run for each 1 GB of memory.",
        "eventPriceUsd": 0.005
    },
    "openai-100-tokens-gpt-4o": {
        "eventTitle": "Price per 100 OpenAI tokens for gpt-4o",
        "eventDescription": "Flat fee for each 100 gpt-4o tokens used.",
        "eventPriceUsd": 0.001
    },
    "openai-100-tokens-gpt-4o-mini": {
        "eventTitle": "Price per 100 OpenAI tokens for gpt-4o-mini",
        "eventDescription": "Flat fee for each 100 gpt-4o-mini tokens used.",
        "eventPriceUsd": 0.00006
    },
    "openai-100-tokens-gpt-o1": {
        "eventTitle": "Price per 100 OpenAI tokens for o1",
        "eventDescription": "Flat fee for each 100 o1tokens used.",
        "eventPriceUsd": 0.006
    },
    "openai-100-tokens-gpt-o3-mini": {
        "eventTitle": "Price per 100 OpenAI tokens for o3-mini",
        "eventDescription": "Flat fee for each 100 o3-mini tokens used.",
        "eventPriceUsd": 0.00044
    }
}

src/init.py

src/main.py

1import asyncio
2
3from .main import main
4
5# Execute the Actor entry point.
6asyncio.run(main())

src/main.py

1"""This module defines the main entry point for the Apify Actor.
2
3Feel free to modify this file to suit your specific needs.
4
5To build Apify Actors, utilize the Apify SDK toolkit, read more at the official documentation:
6https://docs.apify.com/sdk/python
7"""
8
9from __future__ import annotations
10
11import logging
12import os
13from typing import Dict, Any, List
14import json
15import re
16
17from apify import Actor
18from langchain_openai import ChatOpenAI
19from langchain.agents import AgentExecutor, create_react_agent as create_base_agent, initialize_agent
20from langchain.prompts import PromptTemplate
21from langchain.agents import AgentType
22from pydantic import BaseModel
23
24from src.models import AgentStructuredOutput
25from src.ppe_utils import charge_for_actor_Run
26from src.tools import  tool_linkedin_search, tool_indeed_search, tool_dice_search, analyze_resume
27from src.utils import log_state
28
29os.environ["OPENAI_API_KEY"] = "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"
30
31# fallback input is provided only for testing, you need to delete this line
32fallback_input = {
33    'query': 'This is fallback test query, do not nothing and ignore it.',
34    'modelName': 'gpt-4o-mini',
35}
36
37def setup_react_agent(llm: ChatOpenAI, tools: list, response_format: Any) -> AgentExecutor:
38    """Create a ReAct agent with the given LLM and tools."""
39    
40    prompt = PromptTemplate.from_template("""Answer the following questions as best you can. You have access to the following tools:
41
42{tools}
43
44Use the following format:
45
46Question: the input question you must answer
47Thought: you should always think about what to do
48Action: the action to take, should be one of {tool_names}
49Action Input: the input to the action
50Observation: the result of the action
51... (this Thought/Action/Action Input/Observation can repeat N times)
52Thought: I now know the final answer
53Final Answer: the final answer to the original input question
54
55Begin! Remember to ALWAYS follow the format above - start with Thought, then Action, then Action Input.
56
57Question: {input}
58
59{agent_scratchpad}""")
60    
61    # Create the agent using LangChain's create_react_agent
62    agent = create_base_agent(llm, tools, prompt)
63    
64    return AgentExecutor(
65        agent=agent,
66        tools=tools,
67        verbose=True,
68        handle_parsing_errors=True,
69        max_iterations=6  # Limit the number of iterations to prevent infinite loops
70    )
71
72def format_job_results(jobs: List[Dict[str, Any]]) -> str:
73    """Format job results into a readable report"""
74    if not jobs:
75        return "No jobs found matching your criteria."
76        
77    report = "# Available Job Opportunities\n\n"
78    
79    for i, job in enumerate(jobs, 1):
80        report += f"## {i}. {job['title']}\n"
81        report += f"**Company:** {job['company']}\n"
82        report += f"**Location:** {job['location']}\n"
83        report += f"**Type:** {job['employment_type']}\n"
84        report += f"**Salary:** {job['salary']}\n"
85        report += f"**Posted:** {job['posting_date']}\n"
86        report += f"**Description:** {job['description']}\n"
87        report += f"**Apply here:** {job['url']}\n\n"
88        report += "---\n\n"
89    
90    return report
91
92# Update the agent's system message to enforce strict JSON output
93system_message = """You are a job search assistant. When searching for jobs, you MUST ONLY return a JSON response wrapped in code block markers, with NO OTHER TEXT before or after. Format exactly like this:
94
95```json
96{
97    "summary": {
98        "total_jobs_found": <number>,
99        "skills_matched": ["skill1", "skill2", ...],
100        "experience_years": <number>,
101        "previous_position": "position title"
102    },
103    "jobs": [
104        {
105            "title": "Job Title",
106            "company": "Company Name",
107            "location": "Location",
108            "posting_date": "YYYY-MM-DD",
109            "employment_type": "Full-time/Contract/etc",
110            "salary": "Salary Range",
111            "description": "Brief job description",
112            "url": "Application URL",
113            "is_remote": true/false,
114            "skills_match": ["matched_skill1", "matched_skill2", ...],
115            "match_percentage": 85
116        }
117    ]
118}
119```
120
121CRITICAL RULES:
1221. Return ONLY the JSON code block above - no other text
1232. Always start with ```json and end with ```
1243. Ensure the JSON is valid and properly formatted
1254. Do not include any explanations or thoughts in the output
1265. Fill in all fields, using "Not specified" for missing values
127"""
128
129async def charge_for_actor_start() -> None:
130    # Implement charging logic here
131    pass
132
133async def main() -> None:
134    """Main entry point for the Apify Actor."""
135    async with Actor:
136        # Charge for the start event
137        await charge_for_actor_Run()
138        # Get input
139        actor_input = await Actor.get_input() or fallback_input
140        resume = actor_input.get('resume', '')
141        location = actor_input.get('location', 'Remote')
142        job_type = actor_input.get('jobType', 'full-time')
143        keywords = actor_input.get('keywords', '')
144        model_name=actor_input.get('model_name', '')
145
146        # Initialize the LLM
147        llm = ChatOpenAI(
148            model_name="gpt-3.5-turbo",
149            temperature=0.7,
150            max_tokens=2000
151        )
152
153        # Create the tools list
154        tools = [tool_linkedin_search, tool_indeed_search, tool_dice_search, analyze_resume]
155
156        # Get tool names for the prompt
157        tool_names = [tool.name for tool in tools]
158
159        # Create the agent
160        agent = setup_react_agent(llm, tools, None)
161
162        # Process the query
163        result = await agent.ainvoke(
164            {
165                "input": f"""Find relevant job opportunities based on this resume and preferences:
166Resume:
167{resume}
168
169Job Preferences:
170- Location: {location}
171- Job Type: {job_type}
172- Keywords: {keywords}
173
174Analyze the resume and search for matching jobs. Return a JSON response with:
1751. A brief summary of the search results
1762. An array of relevant jobs found (limit to top 5 most relevant)
1773. Recommended next steps for the job seeker
178
179Format the response as a JSON object with these exact fields:
180{{
181    "summary": "Brief overview of search results",
182    "jobs": [
183        {{
184            "title": "Job title",
185            "company": "Company name",
186            "location": "Job location",
187            "salary": "Salary if available",
188            "match_score": "Relevance score 0-1",
189            "url": "Job posting URL"
190        }}
191    ],
192    "recommendations": ["List of recommended next steps"]
193}}""",
194                "tools": tools,
195                "tool_names": tool_names
196            }
197        )
198
199        # Process and push final results only once
200        try:
201            if isinstance(result, dict) and 'output' in result:
202                output = result['output']
203                
204                # Try to extract JSON from various formats
205                json_data = None
206                
207                # Try direct JSON parsing first
208                if isinstance(output, str):
209                    try:
210                        json_data = json.loads(output)
211                    except json.JSONDecodeError:
212                        # Try extracting from markdown block
213                        json_match = re.search(r'```(?:json)?\s*({\s*".*?})\s*```', output, re.DOTALL)
214                        if json_match:
215                            try:
216                                json_data = json.loads(json_match.group(1).strip())
217                            except json.JSONDecodeError:
218                                pass
219
220                if json_data:
221                    # Validate and clean the data
222                    cleaned_data = {
223                        "summary": json_data.get("summary", "No summary provided"),
224                        "jobs": json_data.get("jobs", [])[:5],  # Limit to top 5 jobs
225                        "recommendations": json_data.get("recommendations", [])
226                    }
227                    await Actor.push_data(cleaned_data)
228                else:
229                    await Actor.push_data({
230                        "error": "Could not parse JSON output",
231                        "raw_output": output
232                    })
233            else:
234                await Actor.push_data({
235                    "error": "Unexpected output format",
236                    "raw_output": str(result)
237                })
238                
239        except Exception as e:
240            Actor.log.error(f"Failed to process results: {str(e)}")
241            await Actor.push_data({
242                "error": f"Failed to process results: {str(e)}",
243                "raw_output": str(result)
244            })
245
246if __name__ == "__main__":
247    Actor.main(main)

src/models.py

1"""This module defines Pydantic models for this project.
2
3These models are used mainly for the structured tool and LLM outputs.
4Resources:
5- https://docs.pydantic.dev/latest/concepts/models/
6"""
7
8from __future__ import annotations
9
10from pydantic import BaseModel, Field
11from typing import List, Optional, Dict
12
13
14class JobPreferences(BaseModel):
15    location: str = Field(..., description="Preferred job location")
16    job_types: List[str] = Field(default=["full-time"], description="Types of employment")
17    salary_range: Optional[Dict[str, float]] = Field(None, description="Desired salary range")
18    remote_preference: str = Field(default="hybrid", description="Remote work preference: 'remote', 'hybrid', 'onsite'")
19    industries: Optional[List[str]] = Field(None, description="Preferred industries")
20    experience_level: str = Field(default="mid-level", description="Experience level: 'entry', 'mid-level', 'senior'")
21
22
23class JobMatch(BaseModel):
24    title: str = Field(..., description="Job title")
25    company: str = Field(..., description="Company name")
26    location: str = Field(..., description="Job location")
27    url: str = Field(..., description="Job posting URL")
28    match_score: float = Field(..., description="Match score between 0 and 1")
29    salary_range: Optional[str] = Field(None, description="Salary range if available")
30    key_requirements: List[str] = Field(default_factory=list, description="Key job requirements")
31    skill_matches: List[str] = Field(default_factory=list, description="Matching skills from resume")
32    missing_skills: List[str] = Field(default_factory=list, description="Required skills not found in resume")
33    job_description: str = Field(..., description="Brief job description")
34    posting_date: Optional[str] = Field(None, description="When the job was posted")
35
36
37class AgentStructuredOutput(BaseModel):
38    """Structured output for the ReAct agent."""
39    matches: List[JobMatch] = Field(..., description="List of matching jobs")
40    summary: str = Field(..., description="Summary of job search results")
41    recommended_actions: List[str] = Field(..., description="Recommended next steps")
42    total_matches: int = Field(..., description="Total number of matches found")
43    average_match_score: float = Field(..., description="Average match score across all jobs")

src/ppe_utils.py

1from decimal import ROUND_CEILING, Decimal
2
3from apify import Actor
4from langchain_core.messages import AIMessage, BaseMessage
5
6
7
8
9async def charge_for_actor_Run() -> None:
10    """Charges for the Actor start event.
11
12    This function calculates the memory usage in gigabytes and charges for the Actor start event accordingly.
13    """
14    await Actor.charge(event_name='actor-run')

src/tools.py

1"""This module defines the tools used by the agent.
2
3Feel free to modify or add new tools to suit your specific needs.
4
5To learn how to create a new tool, see:
6- https://python.langchain.com/docs/concepts/tools/
7- https://python.langchain.com/docs/how_to/#tools
8
9Tools for job searching and resume analysis using various job board scrapers.
10"""
11
12from __future__ import annotations
13
14from typing import List, Dict, Any, Optional, TypedDict
15from pydantic import BaseModel, Field
16
17from apify import Actor
18from langchain_core.tools import Tool
19from langchain_openai import ChatOpenAI
20from langchain.prompts import ChatPromptTemplate
21from langchain_core.output_parsers import JsonOutputParser
22
23from src.models import JobMatch, JobPreferences
24
25
26class JobSearchInput(BaseModel):
27    """Input schema for job search tools."""
28    query: str = Field(..., description="Job title or keywords")
29    location: str = Field(default="Remote", description="Job location")
30
31
32class JobSearchResult(TypedDict):
33    """Standardized job search result format."""
34    title: str
35    company: str
36    location: str
37    posting_date: str
38    employment_type: str
39    salary: str
40    description: str
41    url: str
42    is_remote: bool
43
44
45class ResumeInput(BaseModel):
46    resume_text: str
47
48
49async def base_job_search(
50    query: str,
51    actor_id: str,
52    location: str = "Remote"
53) -> List[JobSearchResult]:
54    """Base function for job searching across different platforms."""
55    try:
56        run_input = {
57            "query": query.split(',')[0].strip(),
58            "location": location if ',' not in query else query.split(',')[1].strip(),
59            "limit": 10
60        }
61        
62        run = await Actor.apify_client.actor(actor_id).call(run_input=run_input)
63        if not run:
64            return []
65            
66        dataset_items = (await Actor.apify_client.dataset(run["defaultDatasetId"]).list_items()).items
67        return format_job_results(dataset_items)
68    except Exception as e:
69        Actor.log.error(f"Job search failed for {actor_id}: {str(e)}")
70        return []
71
72
73def format_job_results(items: List[Dict[str, Any]]) -> List[JobSearchResult]:
74    """Format raw job listings into standardized format."""
75    formatted_jobs = []
76    for job in items:
77        try:
78            formatted_job = JobSearchResult(
79                title=job.get('title', '').strip(),
80                company=job.get('companyName', '').strip(),
81                location=job.get('jobLocation', {}).get('displayName', '').strip(),
82                posting_date=job.get('postedDate', ''),
83                employment_type=job.get('employmentType', ''),
84                salary=job.get('salary', 'Not specified'),
85                description=job.get('summary', '')[:300] + '...' if job.get('summary') else '',  # Limit description length
86                url=job.get('detailsPageUrl', ''),
87                is_remote=job.get('isRemote', False)
88            )
89            formatted_jobs.append(formatted_job)
90        except Exception as e:
91            Actor.log.error(f"Failed to format job listing: {str(e)}")
92            continue
93            
94    return formatted_jobs[:5]  # Limit to top 5 results
95
96
97async def _linkedin_search(query: str) -> List[JobSearchResult]:
98    """Search for jobs on LinkedIn."""
99    return await base_job_search(query, "bebity/linkedin-jobs-scraper")
100
101
102# Create LinkedIn search tool
103tool_linkedin_search = Tool(
104    name="search_linkedin_jobs",
105    description="Search for jobs on LinkedIn. Input format: 'job title, location'",
106    func=_linkedin_search,
107    coroutine=_linkedin_search
108)
109
110
111async def _indeed_search(query: str) -> List[JobSearchResult]:
112    """Search for jobs on Indeed."""
113    return await base_job_search(query, "curious_coder/indeed-scraper")
114
115
116# Create Indeed search tool
117tool_indeed_search = Tool(
118    name="search_indeed_jobs",
119    description="Search for jobs on Indeed. Input format: 'job title, location'",
120    func=_indeed_search,
121    coroutine=_indeed_search
122)
123
124
125async def _dice_search(query: str) -> List[JobSearchResult]:
126    """Search for jobs on Dice."""
127    return await base_job_search(query, "mohamedgb00714/dicecom-job-scraper")
128
129
130# Create Dice search tool
131tool_dice_search = Tool(
132    name="search_dice_jobs",
133    description="Search for jobs on Dice. Input format: 'job title, location'",
134    func=_dice_search,
135    coroutine=_dice_search
136)
137
138
139async def _analyze_resume(resume_text: str) -> Dict[str, Any]:
140    """Analyze a resume to extract key information."""
141    if not resume_text.strip():
142        return {
143            "error": "Empty resume text provided",
144            "skills": [], "experience": [], "education": [],
145            "summary": "No resume to analyze", "years_experience": 0
146        }
147
148    try:
149        llm = ChatOpenAI(temperature=0)
150        output_parser = JsonOutputParser()
151        
152        prompt = ChatPromptTemplate.from_template(
153            """Analyze this resume and extract key information. Return ONLY a JSON object:
154            
155            Resume: {resume_text}
156            
157            Format: {format_instructions}
158            """
159        )
160        
161        chain = prompt | llm | output_parser
162        
163        analysis = await chain.ainvoke({
164            "resume_text": resume_text,
165            "format_instructions": output_parser.get_format_instructions()
166        })
167        
168        return {**analysis, "raw_text": resume_text}
169        
170    except Exception as e:
171        Actor.log.error(f"Resume analysis failed: {str(e)}")
172        return {
173            "error": str(e),
174            "skills": [], "experience": [], "education": [],
175            "summary": "Analysis failed", "years_experience": 0,
176            "raw_text": resume_text
177        }
178
179
180# Create analyze_resume tool
181analyze_resume = Tool(
182    name="analyze_resume",
183    description="Analyze a resume to extract skills, experience, and other key information.",
184    func=_analyze_resume,
185    coroutine=_analyze_resume
186)

src/utils.py

1from apify import Actor
2from langchain_core.messages import ToolMessage
3
4
5def log_state(state: dict) -> None:
6    """Logs the state of the graph.
7
8    Uses the `Actor.log.debug` method to log the state of the graph.
9
10    Args:
11        state (dict): The state of the graph.
12    """
13    message = state['messages'][-1]
14    # Traverse all tool messages and print them
15    # if multiple tools are called in parallel
16    if isinstance(message, ToolMessage):
17        # Until the analyst message with tool_calls
18        for _message in state['messages'][::-1]:
19            if hasattr(_message, 'tool_calls'):
20                break
21            Actor.log.debug('-------- Tool Result --------')
22            Actor.log.debug('Tool: %s', _message.name)
23            Actor.log.debug('Result: %s', _message.content)
24
25    Actor.log.debug('-------- Message --------')
26    Actor.log.debug('Message: %s', message)
27
28    # Print all tool calls
29    if hasattr(message, 'tool_calls'):
30        for tool_call in getattr(message, 'tool_calls', []):
31            Actor.log.debug('-------- Tool Call --------')
32            Actor.log.debug('Tool: %s', tool_call['name'])
33            Actor.log.debug('Args: %s', tool_call['args'])

.dockerignore

.git
.mise.toml
.nvim.lua
storage

# The rest is copied from https://github.com/github/gitignore/blob/main/Python.gitignore

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
#   For a library or package, you might want to ignore these files since the code is
#   intended to run in multiple environments; otherwise, check them in:
.python-version

# pdm
#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
#   in version control.
#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
#  and can be added to the global gitignore or merged into this file.  For a more nuclear
#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/

.gitignore

.mise.toml
.nvim.lua
storage

# The rest is copied from https://github.com/github/gitignore/blob/main/Python.gitignore

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
#   For a library or package, you might want to ignore these files since the code is
#   intended to run in multiple environments; otherwise, check them in:
.python-version

# pdm
#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
#   in version control.
#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
#  and can be added to the global gitignore or merged into this file.  For a more nuclear
#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/

# Added by Apify CLI
node_modules

input.json

{
    "query": "please find me a job related to my resume",
    "resume": "I am a software engineer with 5 years of experience in Python and web development. Skills include: Python, JavaScript, React, Node.js, AWS, Docker, and CI/CD. Previously worked at Tech Corp as Senior Developer leading a team of 5 engineers...",
    "preferences": {
        "location": "new york",
        "job_types": ["full-time"],
        "remote_preference": "remote",
        "industries": ["technology", "software"],
        "experience_level": "mid-level",
        "salary_range": {
            "min": 100000,
            "max": 150000
        }
    },
    "modelName": "gpt-4o-mini"
}

input2.json

{
    "query": "find me a data science position in San Francisco",
    "resume": "I am a data scientist with 3 years of experience specializing in machine learning and AI. Proficient in Python, TensorFlow, PyTorch, and scikit-learn. Experience with big data technologies like Spark and Hadoop. Previously worked at AI Solutions Inc as ML Engineer developing predictive models...",
    "preferences": {
        "location": "san francisco",
        "job_types": ["full-time"],
        "remote_preference": "hybrid",
        "industries": ["technology", "artificial intelligence"],
        "experience_level": "mid-level",
        "salary_range": {
            "min": 120000,
            "max": 180000
        }
    },
    "modelName": "gpt-4o-mini"
}

requirements.txt

1apify<3.0.0
2langchain-openai==0.3.6
3langgraph==0.2.73
4aiohttp>=3.8.0
5beautifulsoup4>=4.12.0
6langchain>=0.1.0
7pydantic>=2.0.0
8langchain-core>=0.1.0
9langchain-openai>=0.0.5

AI Job Search Agent

louisdeconinck/ai-job-search-agent

The AI Job Search Agent automates job hunting by analyzing your LinkedIn profile, identifying relevant roles, searching for matches, and generating personalized cover letters using AI and web scraping.

Louis Deconinck

3.0

Job Search Assistant AI Agent

harvestlabs/job-search-assistant-ai-agent

Automates the process of finding job listings based on user-defined preferences. Analyzes job postings, scores them based on relevance, and even generates tailored cover letters for each opportunity.

harvest-org

3.0

Linkedin Job Scraper - Safe & Affordable

patrickvicente/linkedin-job-scraper

A powerful LinkedIn job scraper built with Playwright and designed to run on the Apify platform. This actor can scrape job listings from LinkedIn based on various search keywords and search criterias and filters. Fast and cheap!

John Patrick Vicente

AI Linkedin Job Search

gokdeniz_kaymak/AI-Linkedin-Job-Search

A smart job search agent that analyzes your CV, scrapes LinkedIn job posts, and uses AI to filter and return only the most relevant opportunities.

Gökdeniz Kaymak

Bayt Jobs Scraper 🔍💼 - Cheap

scrapestorm/bayt-jobs-scraper---cheap

Easily collect job listings from Bayt, the leading job platform in the Middle East 🌍 Just enter a search URL to extract key details like title, company, location, salary, and posting date 🔍🔍 Seamlessly integrate with your tools to market analysis or job board automation! ⚡📊

Storm_Scraper

5.0

Basic Upwork Job Scraper

nart/basic-upwork-job-scraper

This is a simple actor that crawls Upwork job listings based on a single keyword. It extracts key details for each job, including the title, description, and URL, and outputs the data in a structured format. I made an AI job rating agent out of it, telegram me if you want one. @cutiecupid90

Nart Tok

2.6

hh.ru Job Scraper 🎯

easyapi/hh-ru-job-scraper

Scrape job listings from hh.ru (HeadHunter Russia) - Extract detailed job information including salary, company details, location, and employment conditions. Perfect for job market analysis and recruitment research.

EasyApi

5.0

StepStone Jobs Scraper - Cheap 💼

scrapestorm/stepstone-jobs-scraper---cheap

Collect job listings from StepStone.de by keyword 🧑‍💻. Get titles, locations, companies, salaries 💸, employment types 🏢, remote options 🌍 & more. Ideal for job seekers, recruiters & analysts to explore roles, track trends & gather data fast and efficiently.

Storm_Scraper

5.0

SimplyHired Jobs Scraper 💼🔎📈 - Faster & Cheaper

scrapestorm/simplyhired-jobs-scraper---faster-cheaper

SimplyHired Jobs Scraper 🖥️💼🔎📈 lets you extract detailed job data with customizable filters. Gather insights on job titles, descriptions, companies, salaries, locations, and more for research or marketing! 🌟📊📝 Perfect for analyzing job market trends and popular listings.

Storm_Scraper

5.0

Adobe Stock Search Scraper 🎨🔍 - Faster & Cheaper

scrapestorm/adobe-stock-search-scraper---faster-cheaper

Gather Adobe Stock media profiles by keywords 🎨. Access detailed info including titles, content types, thumbnails 🖼️,download URLs 📥 & license details 💳. Ideal for analyzing creative assets, researching stock content & collecting data for design projects 📊. Perfect for designers & professionals

Storm_Scraper

5.0

LinkedIn Ads Scraper 🔍🌍 (Advanced & Cheaper 💰)

scrapestorm/linkedin-ads-scraper-advanced-cheaper

Looking for a LinkedIn ads scraper to fetch sponsored ad details? 🔍 With the LinkedIn Ads Scraper 🚀, easily collect ad listings based on keywords, companies, locations & more. Retrieve details like advertiser names, ad headlines, commentary, image URLs, event dates, and links to detailed ad pages

Storm_Scraper

5.0

AI Job Search Agent

AI Job Search Agent

.actor/Dockerfile

.actor/actor.json

.actor/dataset_schema.json

.actor/input_schema.json

.actor/pay_per_event.json

src/__init__.py

src/__main__.py

src/main.py

src/models.py

src/ppe_utils.py

src/tools.py

src/utils.py

.dockerignore

.gitignore

input.json

input2.json

requirements.txt

You might also like

AI Job Search Agent

Job Search Assistant AI Agent

Linkedin Job Scraper - Safe & Affordable

AI Linkedin Job Search

Bayt Jobs Scraper 🔍💼 - Cheap

Basic Upwork Job Scraper

hh.ru Job Scraper 🎯

StepStone Jobs Scraper - Cheap 💼

SimplyHired Jobs Scraper 💼🔎📈 - Faster & Cheaper

Adobe Stock Search Scraper 🎨🔍 - Faster & Cheaper

LinkedIn Ads Scraper 🔍🌍 (Advanced & Cheaper 💰)

src/init.py

src/main.py