158 lines
6.3 KiB
Python
158 lines
6.3 KiB
Python
from sqlalchemy.orm import Session
|
|
from ..database import JobRolePattern, Persona
|
|
from ..lib.core_utils import call_gemini_flash
|
|
import json
|
|
import logging
|
|
import re
|
|
import ast
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class PatternOptimizationService:
|
|
def __init__(self, db: Session):
|
|
self.db = db
|
|
|
|
def generate_proposals(self):
|
|
"""
|
|
Analyzes existing EXACT patterns and proposes consolidated REGEX patterns.
|
|
"""
|
|
# ... (Fetch Data logic remains)
|
|
# 1. Fetch Data
|
|
patterns = self.db.query(JobRolePattern).filter(JobRolePattern.pattern_type == "exact").all()
|
|
|
|
# Group by Role
|
|
roles_data = {}
|
|
pattern_map = {}
|
|
|
|
for p in patterns:
|
|
if p.role not in roles_data:
|
|
roles_data[p.role] = []
|
|
roles_data[p.role].append(p.pattern_value)
|
|
pattern_map[p.pattern_value] = p.id
|
|
|
|
if not roles_data:
|
|
return []
|
|
|
|
proposals = []
|
|
|
|
# 2. Analyze each role
|
|
for target_role in roles_data.keys():
|
|
target_titles = roles_data[target_role]
|
|
|
|
if len(target_titles) < 3:
|
|
continue
|
|
|
|
negative_examples = []
|
|
for other_role, titles in roles_data.items():
|
|
if other_role != target_role:
|
|
negative_examples.extend(titles[:50])
|
|
|
|
# 3. Build Prompt
|
|
prompt = f"""
|
|
Act as a Regex Optimization Engine for B2B Job Titles.
|
|
|
|
GOAL: Break down the list of 'Positive Examples' into logical CLUSTERS and create a Regex for each cluster.
|
|
TARGET ROLE: "{target_role}"
|
|
|
|
TITLES TO COVER (Positive Examples):
|
|
{json.dumps(target_titles)}
|
|
|
|
TITLES TO AVOID (Negative Examples - DO NOT MATCH THESE):
|
|
{json.dumps(negative_examples[:150])}
|
|
|
|
INSTRUCTIONS:
|
|
1. Analyze the 'Positive Examples'. Do NOT try to create one single regex for all of them.
|
|
2. Identify distinct semantic groups.
|
|
3. Create a Regex for EACH group.
|
|
4. CRITICAL - CONFLICT HANDLING:
|
|
- The Regex must NOT match the 'Negative Examples'.
|
|
- Use Negative Lookahead (e.g. ^(?=.*Manager)(?!.*Facility).*) if needed.
|
|
5. Aggressiveness: Be bold.
|
|
|
|
OUTPUT FORMAT:
|
|
Return a valid Python List of Dictionaries.
|
|
Example:
|
|
[
|
|
{{
|
|
"regex": r"(?i).*pattern.*",
|
|
"explanation": "Explanation...",
|
|
"suggested_priority": 50
|
|
}}
|
|
]
|
|
Enclose regex patterns in r"..." strings to handle backslashes correctly.
|
|
"""
|
|
|
|
try:
|
|
logger.info(f"Optimizing patterns for role: {target_role} (Positive: {len(target_titles)})")
|
|
|
|
response = call_gemini_flash(prompt) # Removed json_mode=True to allow Python syntax
|
|
|
|
# Cleanup markdown
|
|
clean_text = response.strip()
|
|
if clean_text.startswith("```python"):
|
|
clean_text = clean_text[9:-3]
|
|
elif clean_text.startswith("```json"):
|
|
clean_text = clean_text[7:-3]
|
|
elif clean_text.startswith("```"):
|
|
clean_text = clean_text[3:-3]
|
|
clean_text = clean_text.strip()
|
|
|
|
ai_suggestions = []
|
|
try:
|
|
# First try standard JSON
|
|
ai_suggestions = json.loads(clean_text)
|
|
except json.JSONDecodeError:
|
|
try:
|
|
# Fallback: Python AST Literal Eval (handles r"..." strings)
|
|
ai_suggestions = ast.literal_eval(clean_text)
|
|
except Exception as e:
|
|
logger.error(f"Failed to parse response for {target_role} with JSON and AST. Error: {e}")
|
|
continue
|
|
|
|
# Verify and map back IDs
|
|
for sugg in ai_suggestions:
|
|
try:
|
|
regex_str = sugg.get('regex')
|
|
if not regex_str: continue
|
|
|
|
# Python AST already handles r"..." decoding, so regex_str is the raw pattern
|
|
regex = re.compile(regex_str)
|
|
|
|
# Calculate coverage locally
|
|
covered_ids = []
|
|
covered_titles_verified = []
|
|
|
|
for t in target_titles:
|
|
if regex.search(t):
|
|
if t in pattern_map:
|
|
covered_ids.append(pattern_map[t])
|
|
covered_titles_verified.append(t)
|
|
|
|
# Calculate False Positives
|
|
false_positives = []
|
|
for t in negative_examples:
|
|
if regex.search(t):
|
|
false_positives.append(t)
|
|
|
|
if len(covered_ids) >= 2 and len(false_positives) == 0:
|
|
proposals.append({
|
|
"target_role": target_role,
|
|
"regex": regex_str,
|
|
"explanation": sugg.get('explanation', 'No explanation provided'),
|
|
"priority": sugg.get('suggested_priority', 50),
|
|
"covered_pattern_ids": covered_ids,
|
|
"covered_titles": covered_titles_verified,
|
|
"false_positives": false_positives
|
|
})
|
|
|
|
except re.error:
|
|
logger.warning(f"AI generated invalid regex: {sugg.get('regex')}")
|
|
continue
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error optimizing patterns for {target_role}: {e}", exc_info=True)
|
|
continue
|
|
|
|
logger.info(f"Optimization complete. Generated {len(proposals)} proposals.")
|
|
return proposals
|