[2ff88f42] einfügen

einfügen
This commit is contained in:
2026-02-20 13:25:21 +00:00
parent 653bd79e1f
commit 101f67936a
5 changed files with 188 additions and 20 deletions

View File

@@ -454,6 +454,22 @@ def list_industries(db: Session = Depends(get_db), username: str = Depends(authe
def list_job_roles(db: Session = Depends(get_db), username: str = Depends(authenticate_user)):
return db.query(JobRoleMapping).order_by(JobRoleMapping.pattern.asc()).all()
@app.get("/api/job_roles/raw")
def list_raw_job_titles(
limit: int = 100,
unmapped_only: bool = True,
db: Session = Depends(get_db),
username: str = Depends(authenticate_user)
):
"""
Returns unique raw job titles from CRM imports, prioritized by frequency.
"""
query = db.query(RawJobTitle)
if unmapped_only:
query = query.filter(RawJobTitle.is_mapped == False)
return query.order_by(RawJobTitle.count.desc()).limit(limit).all()
@app.get("/api/mistakes")
def list_reported_mistakes(
status: Optional[str] = Query(None),

View File

@@ -150,7 +150,7 @@ class Industry(Base):
created_at = Column(DateTime, default=datetime.utcnow)
class JobRoleMapping(Base):
class JobRoleMapping(BaseModel):
"""
Maps job title patterns (regex or simple string) to Roles.
"""
@@ -162,7 +162,25 @@ class JobRoleMapping(Base):
created_at = Column(DateTime, default=datetime.utcnow)
class Persona(Base):
class RawJobTitle(BaseModel):
"""
Stores raw unique job titles imported from CRM to assist in pattern mining.
Tracks frequency to prioritize high-impact patterns.
"""
__tablename__ = "raw_job_titles"
id = Column(Integer, primary_key=True, index=True)
title = Column(String, unique=True, index=True) # The raw string, e.g. "Senior Sales Mgr."
count = Column(Integer, default=1) # How often this title appears in the CRM
source = Column(String, default="import")
# Status Flags
is_mapped = Column(Boolean, default=False) # True if a pattern currently covers this title
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
class Persona(BaseModel):
"""
Represents a generalized persona/role (e.g. 'Geschäftsführer', 'IT-Leiter')
independent of the specific job title pattern.

View File

@@ -0,0 +1,95 @@
import sys
import os
import csv
import argparse
from datetime import datetime
# Setup Environment
sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
from backend.database import SessionLocal, RawJobTitle, init_db, engine, Base
def import_titles(file_path: str, delimiter: str = ';'):
print(f"🚀 Starting Import from {file_path}...")
# Ensure Table Exists
RawJobTitle.__table__.create(bind=engine, checkfirst=True)
db = SessionLocal()
total_rows = 0
new_titles = 0
updated_titles = 0
try:
with open(file_path, 'r', encoding='utf-8-sig') as f: # utf-8-sig handles BOM from Excel
# Try to detect header
sample = f.read(1024)
has_header = csv.Sniffer().has_header(sample)
f.seek(0)
reader = csv.reader(f, delimiter=delimiter)
if has_header:
headers = next(reader)
print(f" Header detected: {headers}")
# Try to find the right column index
col_idx = 0
for i, h in enumerate(headers):
if h.lower() in ['funktion', 'jobtitle', 'title', 'position', 'rolle']:
col_idx = i
print(f" -> Using column '{h}' (Index {i})")
break
else:
col_idx = 0
print(" No header detected, using first column.")
# Process Rows
for row in reader:
if not row: continue
if len(row) <= col_idx: continue
raw_title = row[col_idx].strip()
if not raw_title: continue # Skip empty
total_rows += 1
# Check existance
existing = db.query(RawJobTitle).filter(RawJobTitle.title == raw_title).first()
if existing:
existing.count += 1
existing.updated_at = datetime.utcnow()
updated_titles += 1
else:
db.add(RawJobTitle(title=raw_title, count=1))
new_titles += 1
if total_rows % 100 == 0:
db.commit()
print(f" Processed {total_rows} rows...", end='\r')
db.commit()
except Exception as e:
print(f"\n❌ Error: {e}")
db.rollback()
finally:
db.close()
print(f"\n✅ Import Complete.")
print(f" Total Processed: {total_rows}")
print(f" New Unique Titles: {new_titles}")
print(f" Updated Frequencies: {updated_titles}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Import Job Titles from CSV")
parser.add_argument("file", help="Path to CSV file")
parser.add_argument("--delimiter", default=";", help="CSV Delimiter (default: ';')")
args = parser.parse_args()
if not os.path.exists(args.file):
print(f"❌ File not found: {args.file}")
sys.exit(1)
import_titles(args.file, args.delimiter)