Files
Brancheneinstufung2/company-explorer/backend/services/role_mapping.py

64 lines
2.1 KiB
Python

import logging
import re
from sqlalchemy.orm import Session
from typing import Optional
from ..database import JobRolePattern, RawJobTitle, Persona, Contact
logger = logging.getLogger(__name__)
class RoleMappingService:
def __init__(self, db: Session):
self.db = db
def get_role_for_job_title(self, job_title: str) -> Optional[str]:
"""
Finds the corresponding role for a given job title using a multi-step process.
1. Check for exact matches.
2. Evaluate regex patterns.
"""
if not job_title:
return None
# Normalize job title for matching
normalized_title = job_title.lower().strip()
# 1. Fetch all active patterns from the database, ordered by priority
patterns = self.db.query(JobRolePattern).filter(
JobRolePattern.is_active == True
).order_by(JobRolePattern.priority.asc()).all()
# 2. Separate patterns for easier processing
exact_patterns = {p.pattern_value.lower(): p.role for p in patterns if p.pattern_type == 'exact'}
regex_patterns = [(p.pattern_value, p.role) for p in patterns if p.pattern_type == 'regex']
# 3. Check for exact match first (most efficient)
if normalized_title in exact_patterns:
return exact_patterns[normalized_title]
# 4. Evaluate regex patterns
for pattern, role in regex_patterns:
try:
if re.search(pattern, job_title, re.IGNORECASE):
return role
except re.error as e:
logger.error(f"Invalid regex for role '{role}': {pattern}. Error: {e}")
continue
return None
def add_or_update_unclassified_title(self, job_title: str):
"""
Logs an unclassified job title or increments its count if already present.
"""
if not job_title:
return
entry = self.db.query(RawJobTitle).filter(RawJobTitle.title == job_title).first()
if entry:
entry.count += 1
else:
entry = RawJobTitle(title=job_title, count=1)
self.db.add(entry)
self.db.commit()