[30388f42] Infrastructure Hardening: Repaired CE/Connector DB schema, fixed frontend styling build, implemented robust echo shield in worker v2.1.1, and integrated Lead Engine into gateway.

2026-03-07 14:08:42 +00:00
parent 35c30bc39a
commit d1b77fd2f6
415 changed files with 24100 additions and 13301 deletions
--- a/company-explorer/backend/services/optimization.py
+++ b/company-explorer/backend/services/optimization.py
@@ -0,0 +1,157 @@
+from sqlalchemy.orm import Session
+from ..database import JobRolePattern, Persona
+from ..lib.core_utils import call_gemini_flash
+import json
+import logging
+import re
+import ast
+
+logger = logging.getLogger(__name__)
+
+class PatternOptimizationService:
+    def __init__(self, db: Session):
+        self.db = db
+
+    def generate_proposals(self):
+        """
+        Analyzes existing EXACT patterns and proposes consolidated REGEX patterns.
+        """
+        # ... (Fetch Data logic remains)
+        # 1. Fetch Data
+        patterns = self.db.query(JobRolePattern).filter(JobRolePattern.pattern_type == "exact").all()
+        
+        # Group by Role
+        roles_data = {}
+        pattern_map = {} 
+        
+        for p in patterns:
+            if p.role not in roles_data:
+                roles_data[p.role] = []
+            roles_data[p.role].append(p.pattern_value)
+            pattern_map[p.pattern_value] = p.id
+
+        if not roles_data:
+            return []
+
+        proposals = []
+
+        # 2. Analyze each role
+        for target_role in roles_data.keys():
+            target_titles = roles_data[target_role]
+            
+            if len(target_titles) < 3:
+                continue
+
+            negative_examples = []
+            for other_role, titles in roles_data.items():
+                if other_role != target_role:
+                    negative_examples.extend(titles[:50]) 
+
+            # 3. Build Prompt
+            prompt = f"""
+            Act as a Regex Optimization Engine for B2B Job Titles.
+            
+            GOAL: Break down the list of 'Positive Examples' into logical CLUSTERS and create a Regex for each cluster.
+            TARGET ROLE: "{target_role}"
+            
+            TITLES TO COVER (Positive Examples):
+            {json.dumps(target_titles)}
+            
+            TITLES TO AVOID (Negative Examples - DO NOT MATCH THESE):
+            {json.dumps(negative_examples[:150])}
+            
+            INSTRUCTIONS:
+            1. Analyze the 'Positive Examples'. Do NOT try to create one single regex for all of them.
+            2. Identify distinct semantic groups.
+            3. Create a Regex for EACH group.
+            4. CRITICAL - CONFLICT HANDLING:
+               - The Regex must NOT match the 'Negative Examples'.
+               - Use Negative Lookahead (e.g. ^(?=.*Manager)(?!.*Facility).*) if needed.
+            5. Aggressiveness: Be bold.
+            
+            OUTPUT FORMAT:
+            Return a valid Python List of Dictionaries. 
+            Example:
+            [
+                {{
+                    "regex": r"(?i).*pattern.*",
+                    "explanation": "Explanation...",
+                    "suggested_priority": 50
+                }}
+            ]
+            Enclose regex patterns in r"..." strings to handle backslashes correctly.
+            """
+
+            try:
+                logger.info(f"Optimizing patterns for role: {target_role} (Positive: {len(target_titles)})")
+                
+                response = call_gemini_flash(prompt) # Removed json_mode=True to allow Python syntax
+                
+                # Cleanup markdown
+                clean_text = response.strip()
+                if clean_text.startswith("```python"):
+                    clean_text = clean_text[9:-3]
+                elif clean_text.startswith("```json"):
+                    clean_text = clean_text[7:-3]
+                elif clean_text.startswith("```"):
+                    clean_text = clean_text[3:-3]
+                clean_text = clean_text.strip()
+                
+                ai_suggestions = []
+                try:
+                    # First try standard JSON
+                    ai_suggestions = json.loads(clean_text)
+                except json.JSONDecodeError:
+                    try:
+                        # Fallback: Python AST Literal Eval (handles r"..." strings)
+                        ai_suggestions = ast.literal_eval(clean_text)
+                    except Exception as e:
+                        logger.error(f"Failed to parse response for {target_role} with JSON and AST. Error: {e}")
+                        continue
+                
+                # Verify and map back IDs
+                for sugg in ai_suggestions:
+                    try:
+                        regex_str = sugg.get('regex')
+                        if not regex_str: continue
+                        
+                        # Python AST already handles r"..." decoding, so regex_str is the raw pattern
+                        regex = re.compile(regex_str)
+                        
+                        # Calculate coverage locally
+                        covered_ids = []
+                        covered_titles_verified = []
+                        
+                        for t in target_titles:
+                            if regex.search(t):
+                                if t in pattern_map:
+                                    covered_ids.append(pattern_map[t])
+                                    covered_titles_verified.append(t)
+                        
+                        # Calculate False Positives
+                        false_positives = []
+                        for t in negative_examples:
+                            if regex.search(t):
+                                false_positives.append(t)
+                        
+                        if len(covered_ids) >= 2 and len(false_positives) == 0:
+                            proposals.append({
+                                "target_role": target_role,
+                                "regex": regex_str,
+                                "explanation": sugg.get('explanation', 'No explanation provided'),
+                                "priority": sugg.get('suggested_priority', 50),
+                                "covered_pattern_ids": covered_ids,
+                                "covered_titles": covered_titles_verified,
+                                "false_positives": false_positives
+                            })
+                            
+                    except re.error:
+                        logger.warning(f"AI generated invalid regex: {sugg.get('regex')}")
+                        continue
+
+            except Exception as e:
+                logger.error(f"Error optimizing patterns for {target_role}: {e}", exc_info=True)
+                continue
+
+        logger.info(f"Optimization complete. Generated {len(proposals)} proposals.")
+        return proposals