From 101f67936a67a9a8f7049ad41ebaa73df2e34385 Mon Sep 17 00:00:00 2001 From: Floke Date: Fri, 20 Feb 2026 13:25:21 +0000 Subject: [PATCH] =?UTF-8?q?[2ff88f42]=20einf=C3=BCgen?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit einfügen --- .dev_session/SESSION_INFO | 2 +- company-explorer/backend/app.py | 16 ++++ company-explorer/backend/database.py | 22 ++++- .../backend/scripts/import_job_titles.py | 95 +++++++++++++++++++ .../src/components/RoboticsSettings.tsx | 73 ++++++++++---- 5 files changed, 188 insertions(+), 20 deletions(-) create mode 100644 company-explorer/backend/scripts/import_job_titles.py diff --git a/.dev_session/SESSION_INFO b/.dev_session/SESSION_INFO index 23013c1c..76d52452 100644 --- a/.dev_session/SESSION_INFO +++ b/.dev_session/SESSION_INFO @@ -1 +1 @@ -{"task_id": "2ff88f42-8544-8000-8314-c9013414d1d0", "token": "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", "session_start_time": "2026-02-20T10:56:03.179196"} \ No newline at end of file +{"task_id": "2ff88f42-8544-8018-883f-e8837c0421af", "token": "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", "session_start_time": "2026-02-20T13:24:58.251700"} \ No newline at end of file diff --git a/company-explorer/backend/app.py b/company-explorer/backend/app.py index 67d72e75..964b920c 100644 --- a/company-explorer/backend/app.py +++ b/company-explorer/backend/app.py @@ -454,6 +454,22 @@ def list_industries(db: Session = Depends(get_db), username: str = Depends(authe def list_job_roles(db: Session = Depends(get_db), username: str = Depends(authenticate_user)): return db.query(JobRoleMapping).order_by(JobRoleMapping.pattern.asc()).all() +@app.get("/api/job_roles/raw") +def list_raw_job_titles( + limit: int = 100, + unmapped_only: bool = True, + db: Session = Depends(get_db), + username: str = Depends(authenticate_user) +): + """ + Returns unique raw job titles from CRM imports, prioritized by frequency. + """ + query = db.query(RawJobTitle) + if unmapped_only: + query = query.filter(RawJobTitle.is_mapped == False) + + return query.order_by(RawJobTitle.count.desc()).limit(limit).all() + @app.get("/api/mistakes") def list_reported_mistakes( status: Optional[str] = Query(None), diff --git a/company-explorer/backend/database.py b/company-explorer/backend/database.py index dfea3933..9f6a0504 100644 --- a/company-explorer/backend/database.py +++ b/company-explorer/backend/database.py @@ -150,7 +150,7 @@ class Industry(Base): created_at = Column(DateTime, default=datetime.utcnow) -class JobRoleMapping(Base): +class JobRoleMapping(BaseModel): """ Maps job title patterns (regex or simple string) to Roles. """ @@ -162,7 +162,25 @@ class JobRoleMapping(Base): created_at = Column(DateTime, default=datetime.utcnow) -class Persona(Base): +class RawJobTitle(BaseModel): + """ + Stores raw unique job titles imported from CRM to assist in pattern mining. + Tracks frequency to prioritize high-impact patterns. + """ + __tablename__ = "raw_job_titles" + + id = Column(Integer, primary_key=True, index=True) + title = Column(String, unique=True, index=True) # The raw string, e.g. "Senior Sales Mgr." + count = Column(Integer, default=1) # How often this title appears in the CRM + source = Column(String, default="import") + + # Status Flags + is_mapped = Column(Boolean, default=False) # True if a pattern currently covers this title + + created_at = Column(DateTime, default=datetime.utcnow) + updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + +class Persona(BaseModel): """ Represents a generalized persona/role (e.g. 'Geschäftsführer', 'IT-Leiter') independent of the specific job title pattern. diff --git a/company-explorer/backend/scripts/import_job_titles.py b/company-explorer/backend/scripts/import_job_titles.py new file mode 100644 index 00000000..d59f16e2 --- /dev/null +++ b/company-explorer/backend/scripts/import_job_titles.py @@ -0,0 +1,95 @@ +import sys +import os +import csv +import argparse +from datetime import datetime + +# Setup Environment +sys.path.append(os.path.join(os.path.dirname(__file__), "../../")) + +from backend.database import SessionLocal, RawJobTitle, init_db, engine, Base + +def import_titles(file_path: str, delimiter: str = ';'): + print(f"🚀 Starting Import from {file_path}...") + + # Ensure Table Exists + RawJobTitle.__table__.create(bind=engine, checkfirst=True) + + db = SessionLocal() + total_rows = 0 + new_titles = 0 + updated_titles = 0 + + try: + with open(file_path, 'r', encoding='utf-8-sig') as f: # utf-8-sig handles BOM from Excel + # Try to detect header + sample = f.read(1024) + has_header = csv.Sniffer().has_header(sample) + f.seek(0) + + reader = csv.reader(f, delimiter=delimiter) + + if has_header: + headers = next(reader) + print(f"ℹ️ Header detected: {headers}") + # Try to find the right column index + col_idx = 0 + for i, h in enumerate(headers): + if h.lower() in ['funktion', 'jobtitle', 'title', 'position', 'rolle']: + col_idx = i + print(f" -> Using column '{h}' (Index {i})") + break + else: + col_idx = 0 + print("ℹ️ No header detected, using first column.") + + # Process Rows + for row in reader: + if not row: continue + if len(row) <= col_idx: continue + + raw_title = row[col_idx].strip() + if not raw_title: continue # Skip empty + + total_rows += 1 + + # Check existance + existing = db.query(RawJobTitle).filter(RawJobTitle.title == raw_title).first() + + if existing: + existing.count += 1 + existing.updated_at = datetime.utcnow() + updated_titles += 1 + else: + db.add(RawJobTitle(title=raw_title, count=1)) + new_titles += 1 + + if total_rows % 100 == 0: + db.commit() + print(f" Processed {total_rows} rows...", end='\r') + + db.commit() + + except Exception as e: + print(f"\n❌ Error: {e}") + db.rollback() + finally: + db.close() + + print(f"\n✅ Import Complete.") + print(f" Total Processed: {total_rows}") + print(f" New Unique Titles: {new_titles}") + print(f" Updated Frequencies: {updated_titles}") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Import Job Titles from CSV") + parser.add_argument("file", help="Path to CSV file") + parser.add_argument("--delimiter", default=";", help="CSV Delimiter (default: ';')") + + args = parser.parse_args() + + if not os.path.exists(args.file): + print(f"❌ File not found: {args.file}") + sys.exit(1) + + import_titles(args.file, args.delimiter) diff --git a/company-explorer/frontend/src/components/RoboticsSettings.tsx b/company-explorer/frontend/src/components/RoboticsSettings.tsx index 1dcfe132..7ee9dc5b 100644 --- a/company-explorer/frontend/src/components/RoboticsSettings.tsx +++ b/company-explorer/frontend/src/components/RoboticsSettings.tsx @@ -32,6 +32,7 @@ export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsP const [roboticsCategories, setRoboticsCategories] = useState([]) const [industries, setIndustries] = useState([]) const [jobRoles, setJobRoles] = useState([]) + const [rawJobTitles, setRawJobTitles] = useState([]) const [reportedMistakes, setReportedMistakes] = useState([]) const [currentMistakeStatusFilter, setCurrentMistakeStatusFilter] = useState("PENDING"); const [isLoading, setIsLoading] = useState(false); @@ -39,15 +40,17 @@ export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsP const fetchAllData = async () => { setIsLoading(true); try { - const [resRobotics, resIndustries, resJobRoles, resMistakes] = await Promise.all([ + const [resRobotics, resIndustries, resJobRoles, resRawTitles, resMistakes] = await Promise.all([ axios.get(`${apiBase}/robotics/categories`), axios.get(`${apiBase}/industries`), axios.get(`${apiBase}/job_roles`), + axios.get(`${apiBase}/job_roles/raw`), axios.get(`${apiBase}/mistakes?status=${currentMistakeStatusFilter}`), ]); setRoboticsCategories(resRobotics.data); setIndustries(resIndustries.data); setJobRoles(resJobRoles.data); + setRawJobTitles(resRawTitles.data); setReportedMistakes(resMistakes.data.items); } catch (e) { console.error("Failed to fetch settings data:", e); @@ -251,22 +254,58 @@ export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsP -
-

Job Title Mapping Patterns

-
- - - - {jobRoles.map(role => ( - - - - - - ))} - {jobRoles.length === 0 && ()} - -
Job Title Pattern (Regex/Text)Mapped Role
No patterns defined yet.
+
+ {/* Existing Patterns */} +
+
+
+

Active Mapping Patterns

+

Deterministic Regex/Text rules

+
+ +
+
+ + + + {jobRoles.map(role => ( + + + + + + ))} + +
Pattern (% for wildcard)Target Persona Role
+
+
+ + {/* Discovery Inbox */} +
+
+
+

Discovery Inbox

+

Unmapped job titles from CRM, prioritized by frequency

+
+
+
+ + + + {rawJobTitles.map(raw => ( + + + + + + ))} + {rawJobTitles.length === 0 && ()} + +
Job Title from CRMFrequency
{raw.title}{raw.count}x
Discovery inbox is empty. Import raw job titles to see data here.
+