From 056c1797e3e4ba8c2bcfb54173c284a32fa3bedf Mon Sep 17 00:00:00 2001 From: Floke Date: Wed, 4 Mar 2026 15:14:11 +0000 Subject: [PATCH] =?UTF-8?q?Docs:=20Aktualisierung=20der=20Dokumentation=20?= =?UTF-8?q?f=C3=BCr=20Task=20[2ea88f42]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- MIGRATION_PLAN.md | 5 + company-explorer/backend/app.py | 213 ++++++++- .../scripts/analyze_job_title_patterns.py | 82 ++++ .../backend/services/optimization.py | 157 +++++++ .../src/components/RoboticsSettings.tsx | 409 +++++++++++++++++- tasks.md | 9 + 6 files changed, 858 insertions(+), 17 deletions(-) create mode 100644 company-explorer/backend/scripts/analyze_job_title_patterns.py create mode 100644 company-explorer/backend/services/optimization.py diff --git a/MIGRATION_PLAN.md b/MIGRATION_PLAN.md index fa3118b6..e89af588 100644 --- a/MIGRATION_PLAN.md +++ b/MIGRATION_PLAN.md @@ -198,6 +198,11 @@ Um DSGVO-konforme Marketing-Automatisierung zu ermöglichen, wurde eine sichere ## 7. Historie & Fixes (Jan 2026) + * **[MAJOR] v0.9.0: Role Matching Optimization & Portability (March 2026)** + * **Pattern Optimizer:** Asynchrones Hintergrund-System zur automatischen Konsolidierung von Einzel-Matches in mächtige Regex-Regeln via Gemini. Inklusive Konfliktprüfung gegen andere Rollen. Nutzt `ast.literal_eval` für robustes Regex-Parsing. + * **Database Management:** Direkter Up- & Download der SQLite-Datenbank aus dem UI heraus. Automatisches Backup-System bei Upload. + * **Regex Sandbox:** Integriertes Test-Tool für Muster vor der Speicherung in der Datenbank. + * **Smart Suggestions:** Live-Analyse der Kontaktdaten zur Identifikation häufiger Schlüsselwörter pro Rolle als Klick-Vorschläge. * **[CRITICAL] v0.7.4: Service Restoration & Logic Fix (Jan 24, 2026)** * **[STABILITY] v0.7.3: Hardening Metric Parser & Regression Testing (Jan 23, 2026)** * **[STABILITY] v0.7.2: Robust Metric Parsing (Jan 23, 2026)** diff --git a/company-explorer/backend/app.py b/company-explorer/backend/app.py index 487257b2..c9c99644 100644 --- a/company-explorer/backend/app.py +++ b/company-explorer/backend/app.py @@ -1,4 +1,4 @@ -from fastapi import FastAPI, Depends, HTTPException, Query, BackgroundTasks +from fastapi import FastAPI, Depends, HTTPException, Query, BackgroundTasks, UploadFile, File from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles from fastapi.responses import FileResponse @@ -9,6 +9,9 @@ from datetime import datetime import os import sys import uuid +import shutil +import re +from collections import Counter from fastapi.security import HTTPBasic, HTTPBasicCredentials import secrets @@ -39,6 +42,7 @@ from .services.discovery import DiscoveryService from .services.scraping import ScraperService from .services.classification import ClassificationService from .services.role_mapping import RoleMappingService +from .services.optimization import PatternOptimizationService # Initialize App app = FastAPI( @@ -60,6 +64,14 @@ scraper = ScraperService() classifier = ClassificationService() # Now works without args discovery = DiscoveryService() +# Global State for Long-Running Optimization Task +optimization_status = { + "state": "idle", # idle, processing, completed, error + "progress": 0, + "result": None, + "error": None +} + # --- Pydantic Models --- class CompanyCreate(BaseModel): name: str @@ -898,6 +910,96 @@ class ClassificationResponse(BaseModel): processed: int new_patterns: int +class OptimizationProposal(BaseModel): + target_role: str + regex: str + explanation: str + priority: int + covered_pattern_ids: List[int] + covered_titles: List[str] + false_positives: List[str] + +class ApplyOptimizationRequest(BaseModel): + target_role: str + regex: str + priority: int + ids_to_delete: List[int] + +def run_optimization_task(): + global optimization_status + optimization_status["state"] = "processing" + optimization_status["result"] = None + optimization_status["error"] = None + + from .database import SessionLocal + db = SessionLocal() + try: + optimizer = PatternOptimizationService(db) + proposals = optimizer.generate_proposals() + optimization_status["result"] = proposals + optimization_status["state"] = "completed" + except Exception as e: + logger.error(f"Optimization task failed: {e}", exc_info=True) + optimization_status["state"] = "error" + optimization_status["error"] = str(e) + finally: + db.close() + +@app.post("/api/job_roles/optimize-start") +def start_pattern_optimization( + background_tasks: BackgroundTasks, + username: str = Depends(authenticate_user) +): + """ + Starts the optimization analysis in the background. + """ + global optimization_status + if optimization_status["state"] == "processing": + return {"status": "already_running"} + + background_tasks.add_task(run_optimization_task) + return {"status": "started"} + +@app.get("/api/job_roles/optimize-status") +def get_pattern_optimization_status( + username: str = Depends(authenticate_user) +): + """ + Poll this endpoint to get the result of the optimization. + """ + return optimization_status + +@app.post("/api/job_roles/apply-optimization") +def apply_pattern_optimization( + req: ApplyOptimizationRequest, + db: Session = Depends(get_db), + username: str = Depends(authenticate_user) +): + """ + Applies a proposal: Creates the new regex and deletes the obsolete exact patterns. + """ + # 1. Create new Regex Pattern + # Check duplicate first + existing = db.query(JobRolePattern).filter(JobRolePattern.pattern_value == req.regex).first() + if not existing: + new_pattern = JobRolePattern( + pattern_type="regex", + pattern_value=req.regex, + role=req.target_role, + priority=req.priority, + created_by="optimizer" + ) + db.add(new_pattern) + logger.info(f"Optimization: Created new regex {req.regex} for {req.target_role}") + + # 2. Delete covered Exact Patterns + if req.ids_to_delete: + db.query(JobRolePattern).filter(JobRolePattern.id.in_(req.ids_to_delete)).delete(synchronize_session=False) + logger.info(f"Optimization: Deleted {len(req.ids_to_delete)} obsolete patterns.") + + db.commit() + return {"status": "success", "message": f"Created regex and removed {len(req.ids_to_delete)} old patterns."} + @app.post("/api/job_roles", response_model=JobRolePatternResponse) def create_job_role( job_role: JobRolePatternCreate, @@ -977,6 +1079,34 @@ def list_raw_job_titles( return query.order_by(RawJobTitle.count.desc()).limit(limit).all() +@app.get("/api/job_roles/suggestions") +def get_job_role_suggestions(db: Session = Depends(get_db), username: str = Depends(authenticate_user)): + """ + Analyzes existing contacts to suggest regex patterns based on frequent keywords per role. + """ + contacts = db.query(Contact).filter(Contact.role != None, Contact.job_title != None).all() + + role_groups = {} + for c in contacts: + if c.role not in role_groups: + role_groups[c.role] = [] + role_groups[c.role].append(c.job_title) + + suggestions = {} + + for role, titles in role_groups.items(): + all_tokens = [] + for t in titles: + # Simple cleaning: keep alphanum, lower + cleaned = re.sub(r'[^\w\s]', ' ', t).lower() + tokens = [w for w in cleaned.split() if len(w) > 3] # Ignore short words + all_tokens.extend(tokens) + + common = Counter(all_tokens).most_common(10) + suggestions[role] = [{"word": w, "count": c} for w, c in common] + + return suggestions + @app.get("/api/mistakes") def list_reported_mistakes( status: Optional[str] = Query(None), @@ -1024,6 +1154,87 @@ def update_reported_mistake_status( logger.info(f"Updated status for mistake {mistake_id} to {mistake.status}") return {"status": "success", "mistake": mistake} +# --- Database Management --- + +@app.get("/api/admin/database/download") +def download_database(username: str = Depends(authenticate_user)): + """ + Downloads the current SQLite database file. + """ + db_path = "/app/companies_v3_fixed_2.db" + if not os.path.exists(db_path): + raise HTTPException(status_code=404, detail="Database file not found") + + filename = f"companies_backup_{datetime.utcnow().strftime('%Y-%m-%d_%H-%M')}.db" + return FileResponse(db_path, media_type="application/octet-stream", filename=filename) + +@app.post("/api/admin/database/upload") +async def upload_database( + file: UploadFile = File(...), + username: str = Depends(authenticate_user) +): + """ + Uploads and replaces the SQLite database file. Creating a backup first. + """ + db_path = "/app/companies_v3_fixed_2.db" + backup_path = f"{db_path}.bak.{datetime.utcnow().strftime('%Y-%m-%d_%H-%M-%S')}" + + try: + # Create Backup + if os.path.exists(db_path): + shutil.copy2(db_path, backup_path) + logger.info(f"Created database backup at {backup_path}") + + # Save new file + with open(db_path, "wb") as buffer: + shutil.copyfileobj(file.file, buffer) + + logger.info(f"Database replaced via upload by user {username}") + return {"status": "success", "message": "Database uploaded successfully. Please restart the container to apply changes."} + + except Exception as e: + logger.error(f"Database upload failed: {e}", exc_info=True) + # Try to restore backup if something went wrong during write + if os.path.exists(backup_path): + shutil.copy2(backup_path, db_path) + logger.warning("Restored database from backup due to upload failure.") + + raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}") + +# --- Regex Testing --- + +class RegexTestRequest(BaseModel): + pattern: str + pattern_type: str = "regex" # regex, exact, startswith + test_string: str + +@app.post("/api/job_roles/test-pattern") +def test_job_role_pattern(req: RegexTestRequest, username: str = Depends(authenticate_user)): + """ + Tests if a given pattern matches a test string. + """ + try: + is_match = False + normalized_test = req.test_string.lower().strip() + pattern = req.pattern.lower().strip() + + if req.pattern_type == "regex": + if re.search(pattern, normalized_test, re.IGNORECASE): + is_match = True + elif req.pattern_type == "exact": + if pattern == normalized_test: + is_match = True + elif req.pattern_type == "startswith": + if normalized_test.startswith(pattern): + is_match = True + + return {"match": is_match} + except re.error as e: + return {"match": False, "error": f"Invalid Regex: {str(e)}"} + except Exception as e: + logger.error(f"Pattern test error: {e}") + return {"match": False, "error": str(e)} + @app.post("/api/enrich/discover") def discover_company(req: AnalysisRequest, background_tasks: BackgroundTasks, db: Session = Depends(get_db), username: str = Depends(authenticate_user)): company = db.query(Company).filter(Company.id == req.company_id).first() diff --git a/company-explorer/backend/scripts/analyze_job_title_patterns.py b/company-explorer/backend/scripts/analyze_job_title_patterns.py new file mode 100644 index 00000000..ceb7f9aa --- /dev/null +++ b/company-explorer/backend/scripts/analyze_job_title_patterns.py @@ -0,0 +1,82 @@ +import sys +import os +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +from collections import Counter +import re + +# Add backend to path to import models +sys.path.append(os.path.join(os.path.dirname(__file__), "../..")) + +from backend.config import settings +from backend.database import Contact, JobRolePattern + +def clean_text(text): + if not text: return "" + # Keep only alphanumeric and spaces + text = re.sub(r'[^\w\s]', ' ', text) + return text.lower().strip() + +def get_ngrams(tokens, n): + if len(tokens) < n: + return [] + return [" ".join(tokens[i:i+n]) for i in range(len(tokens)-n+1)] + +def analyze_patterns(): + print(f"Connecting to database: {settings.DATABASE_URL}") + engine = create_engine(settings.DATABASE_URL) + Session = sessionmaker(bind=engine) + session = Session() + + try: + # Fetch all contacts with a role + contacts = session.query(Contact).filter(Contact.role != None, Contact.job_title != None).all() + print(f"Found {len(contacts)} classified contacts to analyze.") + + role_groups = {} + for c in contacts: + if c.role not in role_groups: + role_groups[c.role] = [] + role_groups[c.role].append(c.job_title) + + print("\n" + "="*60) + print(" JOB TITLE PATTERN ANALYSIS REPORT") + print("="*60 + "\n") + + for role, titles in role_groups.items(): + print(f"--- ROLE: {role} ({len(titles)} samples) ---") + + # Tokenize all titles + all_tokens = [] + all_bigrams = [] + + for t in titles: + cleaned = clean_text(t) + tokens = [w for w in cleaned.split() if len(w) > 2] # Ignore short words + all_tokens.extend(tokens) + all_bigrams.extend(get_ngrams(tokens, 2)) + + # Analyze frequencies + common_words = Counter(all_tokens).most_common(15) + common_bigrams = Counter(all_bigrams).most_common(10) + + print("Top Keywords:") + for word, count in common_words: + print(f" - {word}: {count}") + + print("\nTop Bigrams (Word Pairs):") + for bg, count in common_bigrams: + print(f" - \"{bg}\": {count}") + + print("\nSuggested Regex Components:") + top_5_words = [w[0] for w in common_words[:5]] + print(f" ({ '|'.join(top_5_words) })") + print("\n" + "-"*30 + "\n") + + except Exception as e: + print(f"Error: {e}") + finally: + session.close() + +if __name__ == "__main__": + analyze_patterns() diff --git a/company-explorer/backend/services/optimization.py b/company-explorer/backend/services/optimization.py new file mode 100644 index 00000000..be7f40cd --- /dev/null +++ b/company-explorer/backend/services/optimization.py @@ -0,0 +1,157 @@ +from sqlalchemy.orm import Session +from ..database import JobRolePattern, Persona +from ..lib.core_utils import call_gemini_flash +import json +import logging +import re +import ast + +logger = logging.getLogger(__name__) + +class PatternOptimizationService: + def __init__(self, db: Session): + self.db = db + + def generate_proposals(self): + """ + Analyzes existing EXACT patterns and proposes consolidated REGEX patterns. + """ + # ... (Fetch Data logic remains) + # 1. Fetch Data + patterns = self.db.query(JobRolePattern).filter(JobRolePattern.pattern_type == "exact").all() + + # Group by Role + roles_data = {} + pattern_map = {} + + for p in patterns: + if p.role not in roles_data: + roles_data[p.role] = [] + roles_data[p.role].append(p.pattern_value) + pattern_map[p.pattern_value] = p.id + + if not roles_data: + return [] + + proposals = [] + + # 2. Analyze each role + for target_role in roles_data.keys(): + target_titles = roles_data[target_role] + + if len(target_titles) < 3: + continue + + negative_examples = [] + for other_role, titles in roles_data.items(): + if other_role != target_role: + negative_examples.extend(titles[:50]) + + # 3. Build Prompt + prompt = f""" + Act as a Regex Optimization Engine for B2B Job Titles. + + GOAL: Break down the list of 'Positive Examples' into logical CLUSTERS and create a Regex for each cluster. + TARGET ROLE: "{target_role}" + + TITLES TO COVER (Positive Examples): + {json.dumps(target_titles)} + + TITLES TO AVOID (Negative Examples - DO NOT MATCH THESE): + {json.dumps(negative_examples[:150])} + + INSTRUCTIONS: + 1. Analyze the 'Positive Examples'. Do NOT try to create one single regex for all of them. + 2. Identify distinct semantic groups. + 3. Create a Regex for EACH group. + 4. CRITICAL - CONFLICT HANDLING: + - The Regex must NOT match the 'Negative Examples'. + - Use Negative Lookahead (e.g. ^(?=.*Manager)(?!.*Facility).*) if needed. + 5. Aggressiveness: Be bold. + + OUTPUT FORMAT: + Return a valid Python List of Dictionaries. + Example: + [ + {{ + "regex": r"(?i).*pattern.*", + "explanation": "Explanation...", + "suggested_priority": 50 + }} + ] + Enclose regex patterns in r"..." strings to handle backslashes correctly. + """ + + try: + logger.info(f"Optimizing patterns for role: {target_role} (Positive: {len(target_titles)})") + + response = call_gemini_flash(prompt) # Removed json_mode=True to allow Python syntax + + # Cleanup markdown + clean_text = response.strip() + if clean_text.startswith("```python"): + clean_text = clean_text[9:-3] + elif clean_text.startswith("```json"): + clean_text = clean_text[7:-3] + elif clean_text.startswith("```"): + clean_text = clean_text[3:-3] + clean_text = clean_text.strip() + + ai_suggestions = [] + try: + # First try standard JSON + ai_suggestions = json.loads(clean_text) + except json.JSONDecodeError: + try: + # Fallback: Python AST Literal Eval (handles r"..." strings) + ai_suggestions = ast.literal_eval(clean_text) + except Exception as e: + logger.error(f"Failed to parse response for {target_role} with JSON and AST. Error: {e}") + continue + + # Verify and map back IDs + for sugg in ai_suggestions: + try: + regex_str = sugg.get('regex') + if not regex_str: continue + + # Python AST already handles r"..." decoding, so regex_str is the raw pattern + regex = re.compile(regex_str) + + # Calculate coverage locally + covered_ids = [] + covered_titles_verified = [] + + for t in target_titles: + if regex.search(t): + if t in pattern_map: + covered_ids.append(pattern_map[t]) + covered_titles_verified.append(t) + + # Calculate False Positives + false_positives = [] + for t in negative_examples: + if regex.search(t): + false_positives.append(t) + + if len(covered_ids) >= 2 and len(false_positives) == 0: + proposals.append({ + "target_role": target_role, + "regex": regex_str, + "explanation": sugg.get('explanation', 'No explanation provided'), + "priority": sugg.get('suggested_priority', 50), + "covered_pattern_ids": covered_ids, + "covered_titles": covered_titles_verified, + "false_positives": false_positives + }) + + except re.error: + logger.warning(f"AI generated invalid regex: {sugg.get('regex')}") + continue + + except Exception as e: + logger.error(f"Error optimizing patterns for {target_role}: {e}", exc_info=True) + continue + + logger.info(f"Optimization complete. Generated {len(proposals)} proposals.") + return proposals diff --git a/company-explorer/frontend/src/components/RoboticsSettings.tsx b/company-explorer/frontend/src/components/RoboticsSettings.tsx index 33384f21..2eadc60c 100644 --- a/company-explorer/frontend/src/components/RoboticsSettings.tsx +++ b/company-explorer/frontend/src/components/RoboticsSettings.tsx @@ -1,6 +1,6 @@ import { useEffect, useState, useMemo } from 'react' import axios from 'axios' -import { X, Bot, Tag, Target, Users, Plus, Trash2, Save, Flag, Check, Ban, ExternalLink, ChevronDown, Grid } from 'lucide-react' +import { X, Bot, Tag, Target, Users, Plus, Trash2, Save, Flag, Check, Ban, ExternalLink, ChevronDown, Grid, Database, Download, UploadCloud, Play, AlertTriangle, Lightbulb, Sparkles } from 'lucide-react' import clsx from 'clsx' import { MarketingMatrixManager } from './MarketingMatrixManager' @@ -47,9 +47,22 @@ type ReportedMistake = { updated_at: string; } +type SuggestionItem = { word: string, count: number }; +type RoleSuggestions = Record; + +type OptimizationProposal = { + target_role: string; + regex: string; + explanation: string; + priority: number; + covered_pattern_ids: number[]; + covered_titles: string[]; + false_positives: string[]; +} + export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsProps) { - const [activeTab, setActiveTab] = useState<'robotics' | 'industries' | 'roles' | 'mistakes' | 'matrix'>( - localStorage.getItem('roboticsSettingsActiveTab') as 'robotics' | 'industries' | 'roles' | 'mistakes' | 'matrix' || 'robotics' + const [activeTab, setActiveTab] = useState<'robotics' | 'industries' | 'roles' | 'matrix' | 'mistakes' | 'database'>( + localStorage.getItem('roboticsSettingsActiveTab') as 'robotics' | 'industries' | 'roles' | 'matrix' | 'mistakes' | 'database' || 'robotics' ) const [roboticsCategories, setRoboticsCategories] = useState([]) @@ -57,11 +70,25 @@ export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsP const [jobRoles, setJobRoles] = useState([]) const [rawJobTitles, setRawJobTitles] = useState([]) const [reportedMistakes, setReportedMistakes] = useState([]) + const [suggestions, setSuggestions] = useState({}); + const [optimizationProposals, setOptimizationProposals] = useState([]); + const [showOptimizationModal, setShowOptimizationModal] = useState(false); + const [currentMistakeStatusFilter, setCurrentMistakeStatusFilter] = useState("PENDING"); const [isLoading, setIsLoading] = useState(false); const [isClassifying, setIsClassifying] = useState(false); + const [isOptimizing, setIsOptimizing] = useState(false); const [roleSearch, setRoleSearch] = useState(""); + // Database & Regex State + const [fileToUpload, setFileToUpload] = useState(null); + const [uploadStatus, setUploadStatus] = useState(""); + const [testPattern, setTestPattern] = useState(""); + const [testPatternType, setTestPatternType] = useState("regex"); + const [testString, setTestString] = useState(""); + const [testResult, setTestResult] = useState(null); + const [testError, setTestError] = useState(null); + const groupedAndFilteredRoles = useMemo(() => { const grouped = jobRoles.reduce((acc: Record, role) => { const key = role.role || 'Unassigned'; @@ -91,18 +118,20 @@ export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsP const fetchAllData = async () => { setIsLoading(true); try { - const [resRobotics, resIndustries, resJobRoles, resRawTitles, resMistakes] = await Promise.all([ + const [resRobotics, resIndustries, resJobRoles, resRawTitles, resMistakes, resSuggestions] = await Promise.all([ axios.get(`${apiBase}/robotics/categories`), axios.get(`${apiBase}/industries`), axios.get(`${apiBase}/job_roles`), axios.get(`${apiBase}/job_roles/raw?unmapped_only=true`), // Ensure we only get unmapped axios.get(`${apiBase}/mistakes?status=${currentMistakeStatusFilter}`), + axios.get(`${apiBase}/job_roles/suggestions`), ]); setRoboticsCategories(resRobotics.data); setIndustries(resIndustries.data); setJobRoles(resJobRoles.data); setRawJobTitles(resRawTitles.data); setReportedMistakes(resMistakes.data.items); + setSuggestions(resSuggestions.data); } catch (e) { console.error("Failed to fetch settings data:", e); alert("Fehler beim Laden der Settings. Siehe Konsole."); @@ -171,8 +200,6 @@ export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsP try { await axios.post(`${apiBase}/job_roles/classify-batch`); alert("Batch classification started in the background. The list will update automatically as titles are processed. You can close this window."); - // Optionally, you can poll for completion or just let the user see the number go down on next refresh. - // For now, we just inform the user. } catch (e) { alert("Failed to start batch classification."); console.error(e); @@ -180,6 +207,74 @@ export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsP setIsClassifying(false); } }; + + const handleOptimizePatterns = async () => { + setIsOptimizing(true); + setOptimizationProposals([]); + setShowOptimizationModal(true); + + try { + // 1. Start Task + await axios.post(`${apiBase}/job_roles/optimize-start`); + + // 2. Poll for Status + const pollInterval = 2000; // 2 seconds + const maxAttempts = 150; // 5 minutes + let attempts = 0; + + const checkStatus = async () => { + if (attempts >= maxAttempts) { + alert("Optimization timed out. Please check logs."); + setIsOptimizing(false); + return; + } + + try { + const res = await axios.get(`${apiBase}/job_roles/optimize-status`); + const status = res.data.state; + + if (status === 'completed') { + setOptimizationProposals(res.data.result); + setIsOptimizing(false); + } else if (status === 'error') { + alert(`Optimization failed: ${res.data.error}`); + setIsOptimizing(false); + } else { + attempts++; + setTimeout(checkStatus, pollInterval); + } + } catch (e) { + console.error("Polling error", e); + setIsOptimizing(false); + } + }; + + setTimeout(checkStatus, 1000); + + } catch (e) { + alert("Failed to start optimization."); + console.error(e); + setShowOptimizationModal(false); + setIsOptimizing(false); + } + }; + + const handleApplyOptimization = async (proposal: OptimizationProposal) => { + try { + await axios.post(`${apiBase}/job_roles/apply-optimization`, { + target_role: proposal.target_role, + regex: proposal.regex, + priority: proposal.priority, + ids_to_delete: proposal.covered_pattern_ids + }); + // Remove applied proposal from list + setOptimizationProposals(prev => prev.filter(p => p.regex !== proposal.regex)); + fetchAllData(); // Refresh main list + } catch (e) { + alert("Failed to apply optimization."); + console.error(e); + } + }; const handleUpdateJobRole = async (roleId: number, field: string, value: any) => { const roleToUpdate = jobRoles.find(r => r.id === roleId); @@ -199,19 +294,17 @@ export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsP } catch (e) { alert("Failed to update job role"); console.error(e); - // Revert on failure if needed, but for now just log it } }; - const handleAddJobRole = async (title?: string) => { - const patternValue = title || "New Pattern"; + const handleAddJobRole = async (value: string, type: 'exact' | 'regex' = 'exact', roleName?: string) => { setIsLoading(true); try { await axios.post(`${apiBase}/job_roles`, { - pattern_type: "exact", - pattern_value: patternValue, - role: "Influencer", - priority: 100 + pattern_type: type, + pattern_value: value, + role: roleName || "Influencer", + priority: type === 'regex' ? 80 : 100 }); fetchAllData(); } catch (e) { @@ -238,11 +331,66 @@ export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsP } } + const handleDownloadDb = () => { + window.location.href = `${apiBase}/admin/database/download`; + }; + + const handleFileSelect = (event: React.ChangeEvent) => { + if (event.target.files && event.target.files[0]) { + setFileToUpload(event.target.files[0]); + setUploadStatus(""); + } + }; + + const handleUploadDb = async () => { + if (!fileToUpload) return; + if (!window.confirm("WARNING: This will overwrite the current database! A backup will be created, but any recent changes might be lost. You MUST restart the container afterwards. Continue?")) return; + + const formData = new FormData(); + formData.append("file", fileToUpload); + + setUploadStatus("uploading"); + + try { + await axios.post(`${apiBase}/admin/database/upload`, formData, { + headers: { "Content-Type": "multipart/form-data" }, + }); + setUploadStatus("success"); + alert("Upload successful! Please RESTART the Docker container to apply changes."); + } catch (e: any) { + console.error(e); + setUploadStatus("error"); + alert(`Upload failed: ${e.response?.data?.detail || e.message}`); + } + }; + + const handleTestPattern = async () => { + setTestResult(null); + setTestError(null); + if (!testPattern || !testString) return; + + try { + const res = await axios.post(`${apiBase}/job_roles/test-pattern`, { + pattern: testPattern, + pattern_type: testPatternType, + test_string: testString + }); + if (res.data.error) { + setTestError(res.data.error); + } else { + setTestResult(res.data.match); + } + } catch (e: any) { + setTestError(e.message); + } + }; + + if (!isOpen) return null return (
-
+
{/* Header */}
@@ -262,6 +410,7 @@ export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsP { id: 'roles', label: 'Job Role Mapping', icon: Users }, { id: 'matrix', label: 'Marketing Matrix', icon: Grid }, { id: 'mistakes', label: 'Reported Mistakes', icon: Flag }, + { id: 'database', label: 'Database & Regex', icon: Database }, ].map(t => (
- + +
@@ -397,7 +549,36 @@ export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsP
+
+ {/* AI Suggestions Area */} + {suggestions[roleName] && suggestions[roleName].length > 0 && ( +
+
+ + AI Suggestions (Common Keywords) +
+
+ {suggestions[roleName].map(s => ( + + ))} +
+
+ )} + @@ -455,7 +636,7 @@ export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsP - + ))} {rawJobTitles.length === 0 && ()} @@ -470,6 +651,7 @@ export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsP
+ {/* ... existing mistakes content ... */}

Reported Data Mistakes

@@ -548,7 +730,202 @@ export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsP
{raw.title} {raw.count}x
Discovery inbox is empty. Import raw job titles to see data here.
+ +
+ + {/* Regex Tester */} +
+
+

Regex Tester

+

Validate your patterns before adding them to the database.

+
+ +
+
+ +
+ + setTestPattern(e.target.value)} + placeholder="e.g. (leiter|head).{0,15}vertrieb" + className="flex-1 bg-white dark:bg-slate-900 border border-slate-200 dark:border-slate-800 rounded p-2 text-xs font-mono text-slate-800 dark:text-slate-200 focus:ring-1 focus:ring-blue-500 outline-none" + /> +
+
+ +
+ + setTestString(e.target.value)} + placeholder="e.g. Leiter Vertrieb und Marketing" + className="w-full bg-white dark:bg-slate-900 border border-slate-200 dark:border-slate-800 rounded p-2 text-xs text-slate-800 dark:text-slate-200 focus:ring-1 focus:ring-blue-500 outline-none" + /> +
+
+ +
+
+ + {testResult !== null && ( + + {testResult ? : } + {testResult ? "MATCH" : "NO MATCH"} + + )} + {testError && {testError}} +
+
+
+ + {/* Database Management */} +
+
+

Database Management

+

Download the full database for offline analysis or restore a backup.

+
+ +
+ {/* Download */} +
+

Export

+ +
+ + {/* Upload */} +
+

Restore / Import

+
+
+ + Warning: Uploading will overwrite the current database. A backup will be created automatically. +
+ + + + +
+
+
+
+
+
+ + {/* Optimization Modal */} + {showOptimizationModal && ( +
+
+
+
+

Pattern Optimization Proposals

+

AI-generated Regex suggestions to consolidate exact matches.

+
+ +
+ + {isOptimizing ? ( +
+
+

Analyzing patterns & checking for conflicts...

+
+ ) : optimizationProposals.length === 0 ? ( +
+ +

No optimization opportunities found. Your patterns are already efficient!

+
+ ) : ( +
+ {optimizationProposals.map((prop, idx) => ( +
+
+
+
+ {prop.target_role} + Priority: {prop.priority} +
+

{prop.regex}

+
+ +
+ +

{prop.explanation}

+ +
+
+
Covers ({prop.covered_titles.length})
+
+ {prop.covered_titles.map(t => ( + {t} + ))} +
+
+ + {prop.false_positives.length > 0 ? ( +
+
Conflicts (Matches other roles!)
+
+ {prop.false_positives.map(t => ( + {t} + ))} +
+
+ ) : ( +
+ No conflicts with other roles detected. +
+ )} +
+
+ ))} +
+ )} +
+
+ )}
) diff --git a/tasks.md b/tasks.md index 0564346f..3b67edc5 100644 --- a/tasks.md +++ b/tasks.md @@ -39,6 +39,15 @@ - [x] **Integrität:** Fehlende API-Endpunkte für Firmen-Erstellung, Bulk-Import und Wiki-Overrides wiederhergestellt. +## Persona Segmentierung & Rollen-Matching (v0.9.0 - Abgeschlossen) + +- [x] **Database Portability:** Up- & Download der SQLite-Datenbank direkt im UI implementiert (inkl. automatischem Backup). +- [x] **Pattern Optimizer:** Asynchrones KI-System zur automatischen Generierung von Regex-Mustern aus Einzelregeln. +- [x] **Konflikt-Management:** KI-gestützte Prüfung von Regex-Regeln gegen andere Rollen (Negative Examples) zur Vermeidung von Fehlzuordnungen. +- [x] **Regex Sandbox:** Interaktives Test-Tool im Frontend zur Validierung von Mustern gegen echte Jobtitel. +- [x] **Smart Suggestions:** Live-Analyse der Datenbank zur Anzeige häufiger Schlüsselwörter als Klick-Vorschläge. +- [x] **Robustheit:** Implementierung eines AST-basierten Parsers für komplexe Regex-Escaping-Szenarien. + ## Lead Engine: Tradingtwins Automation (In Arbeit) - [x] **E-Mail Ingest:** Automatisierter Import von Leads aus dem Postfach `info@robo-planet.de` via Microsoft Graph API.