Docs: Aktualisierung der Dokumentation für Task [2ea88f42]

This commit is contained in:
2026-03-04 15:14:11 +00:00
parent 6b89c68edc
commit fdca0e5f54
6 changed files with 858 additions and 17 deletions

View File

@@ -1,4 +1,4 @@
from fastapi import FastAPI, Depends, HTTPException, Query, BackgroundTasks
from fastapi import FastAPI, Depends, HTTPException, Query, BackgroundTasks, UploadFile, File
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
@@ -9,6 +9,9 @@ from datetime import datetime
import os
import sys
import uuid
import shutil
import re
from collections import Counter
from fastapi.security import HTTPBasic, HTTPBasicCredentials
import secrets
@@ -39,6 +42,7 @@ from .services.discovery import DiscoveryService
from .services.scraping import ScraperService
from .services.classification import ClassificationService
from .services.role_mapping import RoleMappingService
from .services.optimization import PatternOptimizationService
# Initialize App
app = FastAPI(
@@ -60,6 +64,14 @@ scraper = ScraperService()
classifier = ClassificationService() # Now works without args
discovery = DiscoveryService()
# Global State for Long-Running Optimization Task
optimization_status = {
"state": "idle", # idle, processing, completed, error
"progress": 0,
"result": None,
"error": None
}
# --- Pydantic Models ---
class CompanyCreate(BaseModel):
name: str
@@ -898,6 +910,96 @@ class ClassificationResponse(BaseModel):
processed: int
new_patterns: int
class OptimizationProposal(BaseModel):
target_role: str
regex: str
explanation: str
priority: int
covered_pattern_ids: List[int]
covered_titles: List[str]
false_positives: List[str]
class ApplyOptimizationRequest(BaseModel):
target_role: str
regex: str
priority: int
ids_to_delete: List[int]
def run_optimization_task():
global optimization_status
optimization_status["state"] = "processing"
optimization_status["result"] = None
optimization_status["error"] = None
from .database import SessionLocal
db = SessionLocal()
try:
optimizer = PatternOptimizationService(db)
proposals = optimizer.generate_proposals()
optimization_status["result"] = proposals
optimization_status["state"] = "completed"
except Exception as e:
logger.error(f"Optimization task failed: {e}", exc_info=True)
optimization_status["state"] = "error"
optimization_status["error"] = str(e)
finally:
db.close()
@app.post("/api/job_roles/optimize-start")
def start_pattern_optimization(
background_tasks: BackgroundTasks,
username: str = Depends(authenticate_user)
):
"""
Starts the optimization analysis in the background.
"""
global optimization_status
if optimization_status["state"] == "processing":
return {"status": "already_running"}
background_tasks.add_task(run_optimization_task)
return {"status": "started"}
@app.get("/api/job_roles/optimize-status")
def get_pattern_optimization_status(
username: str = Depends(authenticate_user)
):
"""
Poll this endpoint to get the result of the optimization.
"""
return optimization_status
@app.post("/api/job_roles/apply-optimization")
def apply_pattern_optimization(
req: ApplyOptimizationRequest,
db: Session = Depends(get_db),
username: str = Depends(authenticate_user)
):
"""
Applies a proposal: Creates the new regex and deletes the obsolete exact patterns.
"""
# 1. Create new Regex Pattern
# Check duplicate first
existing = db.query(JobRolePattern).filter(JobRolePattern.pattern_value == req.regex).first()
if not existing:
new_pattern = JobRolePattern(
pattern_type="regex",
pattern_value=req.regex,
role=req.target_role,
priority=req.priority,
created_by="optimizer"
)
db.add(new_pattern)
logger.info(f"Optimization: Created new regex {req.regex} for {req.target_role}")
# 2. Delete covered Exact Patterns
if req.ids_to_delete:
db.query(JobRolePattern).filter(JobRolePattern.id.in_(req.ids_to_delete)).delete(synchronize_session=False)
logger.info(f"Optimization: Deleted {len(req.ids_to_delete)} obsolete patterns.")
db.commit()
return {"status": "success", "message": f"Created regex and removed {len(req.ids_to_delete)} old patterns."}
@app.post("/api/job_roles", response_model=JobRolePatternResponse)
def create_job_role(
job_role: JobRolePatternCreate,
@@ -977,6 +1079,34 @@ def list_raw_job_titles(
return query.order_by(RawJobTitle.count.desc()).limit(limit).all()
@app.get("/api/job_roles/suggestions")
def get_job_role_suggestions(db: Session = Depends(get_db), username: str = Depends(authenticate_user)):
"""
Analyzes existing contacts to suggest regex patterns based on frequent keywords per role.
"""
contacts = db.query(Contact).filter(Contact.role != None, Contact.job_title != None).all()
role_groups = {}
for c in contacts:
if c.role not in role_groups:
role_groups[c.role] = []
role_groups[c.role].append(c.job_title)
suggestions = {}
for role, titles in role_groups.items():
all_tokens = []
for t in titles:
# Simple cleaning: keep alphanum, lower
cleaned = re.sub(r'[^\w\s]', ' ', t).lower()
tokens = [w for w in cleaned.split() if len(w) > 3] # Ignore short words
all_tokens.extend(tokens)
common = Counter(all_tokens).most_common(10)
suggestions[role] = [{"word": w, "count": c} for w, c in common]
return suggestions
@app.get("/api/mistakes")
def list_reported_mistakes(
status: Optional[str] = Query(None),
@@ -1024,6 +1154,87 @@ def update_reported_mistake_status(
logger.info(f"Updated status for mistake {mistake_id} to {mistake.status}")
return {"status": "success", "mistake": mistake}
# --- Database Management ---
@app.get("/api/admin/database/download")
def download_database(username: str = Depends(authenticate_user)):
"""
Downloads the current SQLite database file.
"""
db_path = "/app/companies_v3_fixed_2.db"
if not os.path.exists(db_path):
raise HTTPException(status_code=404, detail="Database file not found")
filename = f"companies_backup_{datetime.utcnow().strftime('%Y-%m-%d_%H-%M')}.db"
return FileResponse(db_path, media_type="application/octet-stream", filename=filename)
@app.post("/api/admin/database/upload")
async def upload_database(
file: UploadFile = File(...),
username: str = Depends(authenticate_user)
):
"""
Uploads and replaces the SQLite database file. Creating a backup first.
"""
db_path = "/app/companies_v3_fixed_2.db"
backup_path = f"{db_path}.bak.{datetime.utcnow().strftime('%Y-%m-%d_%H-%M-%S')}"
try:
# Create Backup
if os.path.exists(db_path):
shutil.copy2(db_path, backup_path)
logger.info(f"Created database backup at {backup_path}")
# Save new file
with open(db_path, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
logger.info(f"Database replaced via upload by user {username}")
return {"status": "success", "message": "Database uploaded successfully. Please restart the container to apply changes."}
except Exception as e:
logger.error(f"Database upload failed: {e}", exc_info=True)
# Try to restore backup if something went wrong during write
if os.path.exists(backup_path):
shutil.copy2(backup_path, db_path)
logger.warning("Restored database from backup due to upload failure.")
raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}")
# --- Regex Testing ---
class RegexTestRequest(BaseModel):
pattern: str
pattern_type: str = "regex" # regex, exact, startswith
test_string: str
@app.post("/api/job_roles/test-pattern")
def test_job_role_pattern(req: RegexTestRequest, username: str = Depends(authenticate_user)):
"""
Tests if a given pattern matches a test string.
"""
try:
is_match = False
normalized_test = req.test_string.lower().strip()
pattern = req.pattern.lower().strip()
if req.pattern_type == "regex":
if re.search(pattern, normalized_test, re.IGNORECASE):
is_match = True
elif req.pattern_type == "exact":
if pattern == normalized_test:
is_match = True
elif req.pattern_type == "startswith":
if normalized_test.startswith(pattern):
is_match = True
return {"match": is_match}
except re.error as e:
return {"match": False, "error": f"Invalid Regex: {str(e)}"}
except Exception as e:
logger.error(f"Pattern test error: {e}")
return {"match": False, "error": str(e)}
@app.post("/api/enrich/discover")
def discover_company(req: AnalysisRequest, background_tasks: BackgroundTasks, db: Session = Depends(get_db), username: str = Depends(authenticate_user)):
company = db.query(Company).filter(Company.id == req.company_id).first()