1from typing import Dict, Any, List
2import logging
3from langchain_openai import ChatOpenAI
4from langchain_core.messages import SystemMessage, HumanMessage
5import json
6
7
8from src.agents.base import BaseAgent
9from src.config.settings import OPENAI_API_KEY, OPENAI_MODEL, FINAL_SCORING_WEIGHTS
10from src.models.schema import JobListing, ScoredJobListing
11
12logger = logging.getLogger(__name__)
13
14class RelevanceScorerAgent(BaseAgent):
15 def __init__(self, apify_client):
16 super().__init__(apify_client)
17 self.llm = ChatOpenAI(
18 api_key=OPENAI_API_KEY,
19 model=OPENAI_MODEL,
20 temperature=0.2
21 )
22
23 def _get_system_prompt(self) -> str:
24 return """
25 Relevance Scorer Agent responsible for:
26 1. Analyzing job details against resume
27 2. Calculating match scores for skills, experience, location
28 3. Ranking job opportunities by relevance
29 """
30
31 def _calculate_position_match(self, job_title: str, desired_role: str) -> int:
32 """Calculate position match score (0-100)."""
33 if not job_title or not desired_role:
34 return 0
35
36 job_title = job_title.lower()
37 desired_role = desired_role.lower()
38
39
40 if job_title == desired_role:
41 return 100
42
43
44 if desired_role in job_title or job_title in desired_role:
45 return 80
46
47
48 common_keywords = ["engineer", "developer", "manager", "analyst", "designer", "specialist"]
49 job_keywords = [kw for kw in common_keywords if kw in job_title]
50 role_keywords = [kw for kw in common_keywords if kw in desired_role]
51
52 common_count = len(set(job_keywords).intersection(set(role_keywords)))
53 if common_count > 0:
54 return 60
55
56 return 30
57
58 def _calculate_skills_match(self, job_description: str, skills: List[str]) -> int:
59 """Calculate skills match score (0-100)."""
60 if not job_description or not skills:
61 return 0
62
63 job_description = job_description.lower()
64 matched_skills = [skill for skill in skills if skill.lower() in job_description]
65
66 if not skills:
67 return 0
68
69 match_percentage = (len(matched_skills) / len(skills)) * 100
70 return min(int(match_percentage), 100)
71
72 def _calculate_location_match(self, job_location: str, preferred_location: str, remote: bool) -> int:
73 """Calculate location match score (0-100)."""
74 if not job_location:
75 return 0
76
77
78 if remote and "remote" in preferred_location.lower():
79 return 100
80
81 job_location = job_location.lower()
82 preferred_location = preferred_location.lower()
83
84
85 if preferred_location in job_location or job_location in preferred_location:
86 return 100
87
88
89 location_parts = preferred_location.split(',')
90 for part in location_parts:
91 part = part.strip()
92 if part and part in job_location:
93 return 80
94
95 return 40
96
97 def _score_job(self, job: Dict[str, Any], resume_data: Dict[str, Any]) -> Dict[str, Any]:
98 """Score a job listing against resume data."""
99
100 job_title = job.get("title", "")
101 job_description = job.get("description", "")
102 job_location = job.get("location", "")
103 company = job.get("company", "")
104 remote = job.get("remote_status", False)
105
106
107 desired_role = resume_data.get("desired_role", "")
108 skills = resume_data.get("skills", [])
109 preferred_location = resume_data.get("location_preference", "")
110
111
112 position_score = self._calculate_position_match(job_title, desired_role)
113 skills_score = self._calculate_skills_match(job_description, skills)
114 location_score = self._calculate_location_match(job_location, preferred_location, remote)
115 company_score = 70
116
117
118 weighted_scores = {
119 "position_match": position_score * FINAL_SCORING_WEIGHTS["position_match"],
120 "skills_experience": skills_score * FINAL_SCORING_WEIGHTS["skills_experience"],
121 "location": location_score * FINAL_SCORING_WEIGHTS["location"],
122 "company": company_score * FINAL_SCORING_WEIGHTS["company"]
123 }
124
125
126 total_score = sum(weighted_scores.values())
127
128
129 match_details = {
130 "position_match": f"{position_score}% match with desired role '{desired_role}'",
131 "skills_experience": f"{skills_score}% of skills match the job requirements",
132 "location": f"{location_score}% location match with preference '{preferred_location}'",
133 "company": f"{company_score}% company match"
134 }
135
136 return {
137 "total_score": int(total_score),
138 "score_breakdown": weighted_scores,
139 "match_details": match_details
140 }
141
142 async def process(self, state: Dict[str, Any]) -> Dict[str, Any]:
143 """Process job listings and score them against resume data."""
144 job_listings = state.get("job_listings", [])
145 resume_data = state.get("resume_data", {})
146
147 if not job_listings:
148 state["scored_listings"] = []
149 return state
150
151 if not resume_data:
152 state["error_log"] = state.get("error_log", []) + ["Missing resume data for scoring"]
153 return state
154
155
156 scored_jobs = []
157 for job in job_listings:
158 try:
159
160 standardized_job = self._standardize_job_data(job)
161
162
163 if not standardized_job.get("title") or not standardized_job.get("company"):
164 continue
165
166
167 score_data = self._score_job(standardized_job, resume_data)
168
169
170 scored_job = ScoredJobListing(
171 job_id=standardized_job.get("job_id", ""),
172 title=standardized_job.get("title", ""),
173 company=standardized_job.get("company", ""),
174 location=standardized_job.get("location", ""),
175 remote_status=standardized_job.get("remote_status", False),
176 description=standardized_job.get("description", ""),
177 required_skills=standardized_job.get("required_skills", []),
178 salary_info=standardized_job.get("salary_info", ""),
179 posted_date=standardized_job.get("posted_date", ""),
180 application_url=standardized_job.get("application_url", ""),
181 source=standardized_job.get("source", ""),
182 total_score=score_data["total_score"],
183 score_breakdown=score_data["score_breakdown"],
184 match_details=score_data["match_details"]
185 )
186
187 scored_jobs.append(scored_job.dict())
188
189 except Exception as e:
190 logger.error(f"Error scoring job: {str(e)}")
191 continue
192
193
194 scored_jobs.sort(key=lambda x: x["total_score"], reverse=True)
195
196
197 top_matches = scored_jobs[:10]
198
199
200 state["scored_listings"] = top_matches
201
202 return state
203
204 def _standardize_job_data(self, job_data: Dict[str, Any]) -> Dict[str, Any]:
205 """Standardize job data from different sources."""
206
207 if isinstance(job_data, list):
208 if not job_data:
209 return {}
210 job_data = job_data[0]
211
212
213 if "job_title" in job_data:
214 return {
215 "job_id": job_data.get("job_id", ""),
216 "title": job_data.get("job_title", ""),
217 "company": job_data.get("company_name", ""),
218 "location": job_data.get("job_location", ""),
219 "remote_status": "remote" in job_data.get("job_location", "").lower(),
220 "description": job_data.get("job_description", ""),
221 "required_skills": job_data.get("job_skills", []),
222 "salary_info": job_data.get("salary_range", ""),
223 "posted_date": job_data.get("posted_date", ""),
224 "application_url": job_data.get("job_url", ""),
225 "source": "LinkedIn"
226 }
227
228
229 elif "jobTitle" in job_data:
230 return {
231 "job_id": job_data.get("jobId", ""),
232 "title": job_data.get("jobTitle", ""),
233 "company": job_data.get("companyName", ""),
234 "location": job_data.get("location", ""),
235 "remote_status": "remote" in job_data.get("location", "").lower(),
236 "description": job_data.get("description", ""),
237 "required_skills": [],
238 "salary_info": job_data.get("salary", ""),
239 "posted_date": job_data.get("date", ""),
240 "application_url": job_data.get("url", ""),
241 "source": "Indeed"
242 }
243
244
245 return job_data