[31188f42] einfügen

einfügen
This commit is contained in:
2026-02-24 06:47:35 +00:00
parent 3d34436f16
commit e39c745a78
21 changed files with 1575 additions and 152 deletions

View File

@@ -1 +1 @@
{"task_id": "2ff88f42-8544-8050-8245-c3bb852058f4", "token": "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", "session_start_time": "2026-02-23T13:57:05.351873"}
{"task_id": "31188f42-8544-80f0-b21a-c6beaa9ea3a1", "token": "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", "session_start_time": "2026-02-24T06:47:22.751414"}

25
check_matrix.py Normal file
View File

@@ -0,0 +1,25 @@
import os
import sys
# Add the company-explorer directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), 'company-explorer')))
from backend.database import SessionLocal, MarketingMatrix, Industry, Persona
import json
db = SessionLocal()
try:
count = db.query(MarketingMatrix).count()
print(f"MarketingMatrix count: {count}")
if count > 0:
first = db.query(MarketingMatrix).first()
print(f"First entry: ID={first.id}, Industry={first.industry_id}, Persona={first.persona_id}")
else:
print("MarketingMatrix is empty.")
# Check if we have industries and personas
ind_count = db.query(Industry).count()
pers_count = db.query(Persona).count()
print(f"Industries: {ind_count}, Personas: {pers_count}")
finally:
db.close()

View File

View File

@@ -32,11 +32,12 @@ setup_logging()
import logging
logger = logging.getLogger(__name__)
from .database import init_db, get_db, Company, Signal, EnrichmentData, RoboticsCategory, Contact, Industry, JobRoleMapping, ReportedMistake, MarketingMatrix, Persona, RawJobTitle
from .database import init_db, get_db, Company, Signal, EnrichmentData, RoboticsCategory, Contact, Industry, JobRolePattern, ReportedMistake, MarketingMatrix, Persona, RawJobTitle
from .services.deduplication import Deduplicator
from .services.discovery import DiscoveryService
from .services.scraping import ScraperService
from .services.classification import ClassificationService
from .services.role_mapping import RoleMappingService
# Initialize App
app = FastAPI(
@@ -119,6 +120,25 @@ class IndustryDetails(BaseModel):
class Config:
from_attributes = True
class MarketingMatrixUpdate(BaseModel):
subject: Optional[str] = None
intro: Optional[str] = None
social_proof: Optional[str] = None
class MarketingMatrixResponse(BaseModel):
id: int
industry_id: int
persona_id: int
industry_name: str
persona_name: str
subject: Optional[str] = None
intro: Optional[str] = None
social_proof: Optional[str] = None
updated_at: datetime
class Config:
from_attributes = True
class ContactResponse(BaseModel):
id: int
first_name: Optional[str] = None
@@ -314,23 +334,21 @@ def provision_superoffice_contact(
logger.info(f"Created new person {req.so_person_id} for company {company.name}")
# Update Job Title & Role logic
if req.job_title:
if req.job_title and req.job_title != person.job_title:
person.job_title = req.job_title
# Simple classification fallback
mappings = db.query(JobRoleMapping).all()
found_role = None
for m in mappings:
pattern_clean = m.pattern.replace("%", "").lower()
if pattern_clean in req.job_title.lower():
found_role = m.role
break
# New, service-based classification
role_mapping_service = RoleMappingService(db)
found_role = role_mapping_service.get_role_for_job_title(req.job_title)
# ALWAYS update role, even if to None, to avoid 'sticking' old roles
if found_role != person.role:
logger.info(f"Role Change for {person.so_person_id}: {person.role} -> {found_role}")
logger.info(f"Role Change for {person.so_person_id} via Mapping Service: {person.role} -> {found_role}")
person.role = found_role
if not found_role:
# If no role was found, we log it for future pattern mining
role_mapping_service.add_or_update_unclassified_title(req.job_title)
db.commit()
db.refresh(person)
@@ -429,6 +447,8 @@ def export_companies_csv(db: Session = Depends(get_db), username: str = Depends(
from fastapi.responses import StreamingResponse
output = io.StringIO()
# Add UTF-8 BOM for Excel
output.write('\ufeff')
writer = csv.writer(output)
# Header
@@ -567,7 +587,229 @@ def list_industries(db: Session = Depends(get_db), username: str = Depends(authe
@app.get("/api/job_roles")
def list_job_roles(db: Session = Depends(get_db), username: str = Depends(authenticate_user)):
return db.query(JobRoleMapping).order_by(JobRoleMapping.pattern.asc()).all()
return db.query(JobRolePattern).order_by(JobRolePattern.priority.asc()).all()
# --- Marketing Matrix Endpoints ---
@app.get("/api/matrix", response_model=List[MarketingMatrixResponse])
def get_marketing_matrix(
industry_id: Optional[int] = Query(None),
persona_id: Optional[int] = Query(None),
db: Session = Depends(get_db),
username: str = Depends(authenticate_user)
):
query = db.query(MarketingMatrix).options(
joinedload(MarketingMatrix.industry),
joinedload(MarketingMatrix.persona)
)
if industry_id:
query = query.filter(MarketingMatrix.industry_id == industry_id)
if persona_id:
query = query.filter(MarketingMatrix.persona_id == persona_id)
entries = query.all()
# Map to response model
return [
MarketingMatrixResponse(
id=e.id,
industry_id=e.industry_id,
persona_id=e.persona_id,
industry_name=e.industry.name if e.industry else "Unknown",
persona_name=e.persona.name if e.persona else "Unknown",
subject=e.subject,
intro=e.intro,
social_proof=e.social_proof,
updated_at=e.updated_at
) for e in entries
]
@app.get("/api/matrix/export")
def export_matrix_csv(
industry_id: Optional[int] = Query(None),
persona_id: Optional[int] = Query(None),
db: Session = Depends(get_db),
username: str = Depends(authenticate_user)
):
"""
Exports a CSV of the marketing matrix, optionally filtered.
"""
import io
import csv
from fastapi.responses import StreamingResponse
query = db.query(MarketingMatrix).options(
joinedload(MarketingMatrix.industry),
joinedload(MarketingMatrix.persona)
)
if industry_id:
query = query.filter(MarketingMatrix.industry_id == industry_id)
if persona_id:
query = query.filter(MarketingMatrix.persona_id == persona_id)
entries = query.all()
output = io.StringIO()
# Add UTF-8 BOM for Excel
output.write('\ufeff')
writer = csv.writer(output)
# Header
writer.writerow([
"ID", "Industry", "Persona", "Subject", "Intro", "Social Proof", "Last Updated"
])
for e in entries:
writer.writerow([
e.id,
e.industry.name if e.industry else "Unknown",
e.persona.name if e.persona else "Unknown",
e.subject,
e.intro,
e.social_proof,
e.updated_at.strftime('%Y-%m-%d %H:%M:%S') if e.updated_at else "-"
])
output.seek(0)
filename = f"marketing_matrix_{datetime.utcnow().strftime('%Y-%m-%d')}.csv"
return StreamingResponse(
output,
media_type="text/csv",
headers={"Content-Disposition": f"attachment; filename={filename}"}
)
@app.put("/api/matrix/{entry_id}", response_model=MarketingMatrixResponse)
def update_matrix_entry(
entry_id: int,
data: MarketingMatrixUpdate,
db: Session = Depends(get_db),
username: str = Depends(authenticate_user)
):
entry = db.query(MarketingMatrix).options(
joinedload(MarketingMatrix.industry),
joinedload(MarketingMatrix.persona)
).filter(MarketingMatrix.id == entry_id).first()
if not entry:
raise HTTPException(status_code=404, detail="Matrix entry not found")
if data.subject is not None:
entry.subject = data.subject
if data.intro is not None:
entry.intro = data.intro
if data.social_proof is not None:
entry.social_proof = data.social_proof
entry.updated_at = datetime.utcnow()
db.commit()
db.refresh(entry)
return MarketingMatrixResponse(
id=entry.id,
industry_id=entry.industry_id,
persona_id=entry.persona_id,
industry_name=entry.industry.name if entry.industry else "Unknown",
persona_name=entry.persona.name if entry.persona else "Unknown",
subject=entry.subject,
intro=entry.intro,
social_proof=entry.social_proof,
updated_at=entry.updated_at
)
@app.get("/api/matrix/personas")
def list_personas(db: Session = Depends(get_db), username: str = Depends(authenticate_user)):
return db.query(Persona).all()
class JobRolePatternCreate(BaseModel):
pattern_type: str
pattern_value: str
role: str
priority: int = 100
class JobRolePatternResponse(BaseModel):
id: int
pattern_type: str
pattern_value: str
role: str
priority: int
is_active: bool
created_by: str
created_at: datetime
updated_at: datetime
class Config:
from_attributes = True
class ClassificationResponse(BaseModel):
status: str
processed: int
new_patterns: int
@app.post("/api/job_roles", response_model=JobRolePatternResponse)
def create_job_role(
job_role: JobRolePatternCreate,
db: Session = Depends(get_db),
username: str = Depends(authenticate_user)
):
db_job_role = JobRolePattern(
pattern_type=job_role.pattern_type,
pattern_value=job_role.pattern_value,
role=job_role.role,
priority=job_role.priority,
created_by="user"
)
db.add(db_job_role)
db.commit()
db.refresh(db_job_role)
return db_job_role
@app.put("/api/job_roles/{role_id}", response_model=JobRolePatternResponse)
def update_job_role(
role_id: int,
job_role: JobRolePatternCreate,
db: Session = Depends(get_db),
username: str = Depends(authenticate_user)
):
db_job_role = db.query(JobRolePattern).filter(JobRolePattern.id == role_id).first()
if not db_job_role:
raise HTTPException(status_code=404, detail="Job role not found")
db_job_role.pattern_type = job_role.pattern_type
db_job_role.pattern_value = job_role.pattern_value
db_job_role.role = job_role.role
db_job_role.priority = job_role.priority
db_job_role.updated_at = datetime.utcnow()
db.commit()
db.refresh(db_job_role)
return db_job_role
@app.delete("/api/job_roles/{role_id}")
def delete_job_role(
role_id: int,
db: Session = Depends(get_db),
username: str = Depends(authenticate_user)
):
db_job_role = db.query(JobRolePattern).filter(JobRolePattern.id == role_id).first()
if not db_job_role:
raise HTTPException(status_code=404, detail="Job role not found")
db.delete(db_job_role)
db.commit()
return {"status": "deleted"}
@app.post("/api/job_roles/classify-batch", response_model=ClassificationResponse)
def classify_batch_job_roles(
background_tasks: BackgroundTasks,
username: str = Depends(authenticate_user)
):
"""
Triggers a background task to classify all unmapped job titles from the inbox.
"""
background_tasks.add_task(run_batch_classification_task)
return {"status": "queued", "processed": 0, "new_patterns": 0}
@app.get("/api/job_roles/raw")
def list_raw_job_titles(
@@ -947,6 +1189,66 @@ def run_analysis_task(company_id: int):
finally:
db.close()
def run_batch_classification_task():
from .database import SessionLocal
from .lib.core_utils import call_gemini_flash
import json
db = SessionLocal()
logger.info("--- [BACKGROUND TASK] Starting Batch Job Title Classification ---")
BATCH_SIZE = 50
try:
personas = db.query(Persona).all()
available_roles = [p.name for p in personas]
if not available_roles:
logger.error("No Personas found. Aborting classification task.")
return
unmapped_titles = db.query(RawJobTitle).filter(RawJobTitle.is_mapped == False).all()
if not unmapped_titles:
logger.info("No unmapped titles to process.")
return
logger.info(f"Found {len(unmapped_titles)} unmapped titles. Processing in batches of {BATCH_SIZE}.")
for i in range(0, len(unmapped_titles), BATCH_SIZE):
batch = unmapped_titles[i:i + BATCH_SIZE]
title_strings = [item.title for item in batch]
prompt = f'''You are an expert in B2B contact segmentation. Classify the following job titles into one of the provided roles: {', '.join(available_roles)}. Respond ONLY with a valid JSON object mapping the title to the role. Use "Influencer" as a fallback. Titles: {json.dumps(title_strings)}'''
response_text = ""
try:
response_text = call_gemini_flash(prompt, json_mode=True)
if response_text.strip().startswith("```json"):
response_text = response_text.strip()[7:-4]
classifications = json.loads(response_text)
except Exception as e:
logger.error(f"LLM response error for batch, skipping. Error: {e}. Response: {response_text}")
continue
new_patterns = 0
for title_obj in batch:
original_title = title_obj.title
assigned_role = classifications.get(original_title)
if assigned_role and assigned_role in available_roles:
if not db.query(JobRolePattern).filter(JobRolePattern.pattern_value == original_title).first():
db.add(JobRolePattern(pattern_type='exact', pattern_value=original_title, role=assigned_role, priority=90, created_by='llm_batch'))
new_patterns += 1
title_obj.is_mapped = True
db.commit()
logger.info(f"Batch {i//BATCH_SIZE + 1} complete. Created {new_patterns} new patterns.")
except Exception as e:
logger.critical(f"--- [BACKGROUND TASK] CRITICAL ERROR during classification ---", exc_info=True)
db.rollback()
finally:
db.close()
logger.info("--- [BACKGROUND TASK] Finished Batch Job Title Classification ---")
# --- Serve Frontend ---
static_path = "/frontend_static"
if not os.path.exists(static_path):

View File

@@ -157,17 +157,24 @@ class Industry(Base):
created_at = Column(DateTime, default=datetime.utcnow)
class JobRoleMapping(Base):
class JobRolePattern(Base):
"""
Maps job title patterns (regex or simple string) to Roles.
Maps job title patterns (regex or exact string) to internal Roles.
"""
__tablename__ = "job_role_mappings"
__tablename__ = "job_role_patterns"
id = Column(Integer, primary_key=True, index=True)
pattern = Column(String, unique=True) # e.g. "%CTO%" or "Technischer Leiter"
role = Column(String) # The target Role
pattern_type = Column(String, default="exact", index=True) # 'exact' or 'regex'
pattern_value = Column(String, unique=True) # e.g. "Technischer Leiter" or "(?i)leiter.*technik"
role = Column(String, index=True) # The target Role, maps to Persona.name
priority = Column(Integer, default=100) # Lower number means higher priority
is_active = Column(Boolean, default=True)
created_by = Column(String, default="system") # 'system', 'user', 'llm'
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
class RawJobTitle(Base):
"""
@@ -196,7 +203,7 @@ class Persona(Base):
__tablename__ = "personas"
id = Column(Integer, primary_key=True, index=True)
name = Column(String, unique=True, index=True) # Matches the 'role' string in JobRoleMapping
name = Column(String, unique=True, index=True) # Matches the 'role' string in JobRolePattern
pains = Column(Text, nullable=True) # JSON list or multiline string
gains = Column(Text, nullable=True) # JSON list or multiline string

View File

@@ -5,14 +5,14 @@ import os
# Setup Environment
sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
from backend.database import SessionLocal, JobRoleMapping
from backend.database import SessionLocal, JobRolePattern
def check_mappings():
db = SessionLocal()
count = db.query(JobRoleMapping).count()
print(f"Total JobRoleMappings: {count}")
count = db.query(JobRolePattern).count()
print(f"Total JobRolePatterns: {count}")
examples = db.query(JobRoleMapping).limit(5).all()
examples = db.query(JobRolePattern).limit(5).all()
for ex in examples:
print(f" - {ex.pattern} -> {ex.role}")

View File

@@ -0,0 +1,171 @@
import sys
import os
import argparse
import json
import logging
from sqlalchemy.orm import sessionmaker, declarative_base
from sqlalchemy import create_engine, Column, Integer, String, Boolean, DateTime
from datetime import datetime
# --- Standalone Configuration ---
# Add the project root to the Python path to find the LLM utility
sys.path.insert(0, '/app')
from company_explorer.backend.lib.core_utils import call_gemini_flash
DATABASE_URL = "sqlite:////app/companies_v3_fixed_2.db"
LOG_FILE = "/app/Log_from_docker/batch_classifier.log"
BATCH_SIZE = 50 # Number of titles to process in one LLM call
# --- Logging Setup ---
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(LOG_FILE),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
# --- SQLAlchemy Models (self-contained) ---
Base = declarative_base()
class RawJobTitle(Base):
__tablename__ = 'raw_job_titles'
id = Column(Integer, primary_key=True)
title = Column(String, unique=True, index=True)
count = Column(Integer, default=1)
source = Column(String)
is_mapped = Column(Boolean, default=False)
created_at = Column(DateTime, default=datetime.now)
updated_at = Column(DateTime, default=datetime.now, onupdate=datetime.now)
class JobRolePattern(Base):
__tablename__ = "job_role_patterns"
id = Column(Integer, primary_key=True, index=True)
pattern_type = Column(String, default="exact", index=True)
pattern_value = Column(String, unique=True)
role = Column(String, index=True)
priority = Column(Integer, default=100)
is_active = Column(Boolean, default=True)
created_by = Column(String, default="system")
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
class Persona(Base):
__tablename__ = "personas"
id = Column(Integer, primary_key=True, index=True)
name = Column(String, unique=True, index=True)
pains = Column(String)
gains = Column(String)
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# --- Database Connection ---
engine = create_engine(DATABASE_URL)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
def build_classification_prompt(titles_to_classify, available_roles):
"""Builds the prompt for the LLM to classify a batch of job titles."""
prompt = f"""
You are an expert in B2B contact segmentation. Your task is to classify a list of job titles into predefined roles.
Analyze the following list of job titles and assign each one to the most appropriate role from the list provided.
The available roles are:
- {', '.join(available_roles)}
RULES:
1. Respond ONLY with a valid JSON object. Do not include any text, explanations, or markdown code fences before or after the JSON.
2. The JSON object should have the original job title as the key and the assigned role as the value.
3. If a job title is ambiguous or you cannot confidently classify it, assign the value "Influencer". Use this as a fallback.
4. Do not invent new roles. Only use the roles from the provided list.
Here are the job titles to classify:
{json.dumps(titles_to_classify, indent=2)}
Your JSON response:
"""
return prompt
def classify_and_store_titles():
db = SessionLocal()
try:
# 1. Fetch available persona names (roles)
personas = db.query(Persona).all()
available_roles = [p.name for p in personas]
if not available_roles:
logger.error("No Personas/Roles found in the database. Cannot classify. Please seed personas first.")
return
logger.info(f"Classifying based on these roles: {available_roles}")
# 2. Fetch unmapped titles
unmapped_titles = db.query(RawJobTitle).filter(RawJobTitle.is_mapped == False).all()
if not unmapped_titles:
logger.info("No unmapped job titles found. Nothing to do.")
return
logger.info(f"Found {len(unmapped_titles)} unmapped job titles to process.")
# 3. Process in batches
for i in range(0, len(unmapped_titles), BATCH_SIZE):
batch = unmapped_titles[i:i + BATCH_SIZE]
title_strings = [item.title for item in batch]
logger.info(f"Processing batch {i//BATCH_SIZE + 1} of { (len(unmapped_titles) + BATCH_SIZE - 1) // BATCH_SIZE } with {len(title_strings)} titles...")
# 4. Call LLM
prompt = build_classification_prompt(title_strings, available_roles)
response_text = ""
try:
response_text = call_gemini_flash(prompt, json_mode=True)
# Clean potential markdown fences
if response_text.strip().startswith("```json"):
response_text = response_text.strip()[7:-4]
classifications = json.loads(response_text)
except Exception as e:
logger.error(f"Failed to get or parse LLM response for batch. Skipping. Error: {e}")
logger.error(f"Raw response was: {response_text}")
continue
# 5. Process results
new_patterns = 0
for title_obj in batch:
original_title = title_obj.title
assigned_role = classifications.get(original_title)
if assigned_role and assigned_role in available_roles:
exists = db.query(JobRolePattern).filter(JobRolePattern.pattern_value == original_title).first()
if not exists:
new_pattern = JobRolePattern(
pattern_type='exact',
pattern_value=original_title,
role=assigned_role,
priority=90,
created_by='llm_batch'
)
db.add(new_pattern)
new_patterns += 1
title_obj.is_mapped = True
else:
logger.warning(f"Could not classify '{original_title}' or role '{assigned_role}' is invalid. It will be re-processed later.")
db.commit()
logger.info(f"Batch {i//BATCH_SIZE + 1} complete. Created {new_patterns} new mapping patterns.")
except Exception as e:
logger.error(f"An unexpected error occurred: {e}", exc_info=True)
db.rollback()
finally:
db.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Batch classify unmapped job titles using an LLM.")
args = parser.parse_args()
logger.info("--- Starting Batch Classification Script ---")
classify_and_store_titles()
logger.info("--- Batch Classification Script Finished ---")

View File

@@ -1,95 +1,66 @@
import sys
import os
import csv
from collections import Counter
import argparse
from datetime import datetime
# Setup Environment
sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
# Add the 'backend' directory to the path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from backend.database import SessionLocal, RawJobTitle, init_db, engine, Base
from database import SessionLocal, RawJobTitle
from lib.logging_setup import setup_logging
import logging
def import_titles(file_path: str, delimiter: str = ';'):
print(f"🚀 Starting Import from {file_path}...")
# Ensure Table Exists
RawJobTitle.__table__.create(bind=engine, checkfirst=True)
setup_logging()
logger = logging.getLogger(__name__)
def import_job_titles_from_csv(file_path: str):
db = SessionLocal()
total_rows = 0
new_titles = 0
updated_titles = 0
try:
with open(file_path, 'r', encoding='utf-8-sig') as f: # utf-8-sig handles BOM from Excel
# Try to detect header
sample = f.read(1024)
has_header = csv.Sniffer().has_header(sample)
f.seek(0)
logger.info(f"Starting import of job titles from {file_path}")
reader = csv.reader(f, delimiter=delimiter)
# Use Counter to get frequencies directly from the CSV
job_title_counts = Counter()
total_rows = 0
if has_header:
headers = next(reader)
print(f" Header detected: {headers}")
# Try to find the right column index
col_idx = 0
for i, h in enumerate(headers):
if h.lower() in ['funktion', 'jobtitle', 'title', 'position', 'rolle']:
col_idx = i
print(f" -> Using column '{h}' (Index {i})")
break
else:
col_idx = 0
print(" No header detected, using first column.")
# Process Rows
with open(file_path, 'r', encoding='utf-8') as f:
reader = csv.reader(f)
# Assuming the CSV contains only job titles, one per row
for row in reader:
if not row: continue
if len(row) <= col_idx: continue
raw_title = row[col_idx].strip()
if not raw_title: continue # Skip empty
if row and row[0].strip():
title = row[0].strip()
job_title_counts[title] += 1
total_rows += 1
# Check existance
existing = db.query(RawJobTitle).filter(RawJobTitle.title == raw_title).first()
logger.info(f"Read {total_rows} total job title entries. Found {len(job_title_counts)} unique titles.")
if existing:
existing.count += 1
existing.updated_at = datetime.utcnow()
updated_titles += 1
added_count = 0
updated_count = 0
for title, count in job_title_counts.items():
existing_title = db.query(RawJobTitle).filter(RawJobTitle.title == title).first()
if existing_title:
if existing_title.count != count:
existing_title.count = count
updated_count += 1
# If it exists and count is the same, do nothing.
else:
db.add(RawJobTitle(title=raw_title, count=1))
new_titles += 1
if total_rows % 100 == 0:
db.commit()
print(f" Processed {total_rows} rows...", end='\r')
new_title = RawJobTitle(title=title, count=count, source="csv_import", is_mapped=False)
db.add(new_title)
added_count += 1
db.commit()
logger.info(f"Import complete. Added {added_count} new unique titles, updated {updated_count} existing titles.")
except Exception as e:
print(f"\n❌ Error: {e}")
logger.error(f"Error during job title import: {e}", exc_info=True)
db.rollback()
finally:
db.close()
print(f"\n✅ Import Complete.")
print(f" Total Processed: {total_rows}")
print(f" New Unique Titles: {new_titles}")
print(f" Updated Frequencies: {updated_titles}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Import Job Titles from CSV")
parser.add_argument("file", help="Path to CSV file")
parser.add_argument("--delimiter", default=";", help="CSV Delimiter (default: ';')")
parser = argparse.ArgumentParser(description="Import job titles from a CSV file into the RawJobTitle database table.")
parser.add_argument("file_path", type=str, help="Path to the CSV file containing job titles.")
args = parser.parse_args()
if not os.path.exists(args.file):
print(f"❌ File not found: {args.file}")
sys.exit(1)
import_titles(args.file, args.delimiter)
import_job_titles_from_csv(args.file_path)

View File

@@ -4,7 +4,7 @@ import json
# Setup Environment to import backend modules
sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
from backend.database import SessionLocal, Persona, JobRoleMapping
from backend.database import SessionLocal, Persona, JobRolePattern
def seed_archetypes():
db = SessionLocal()
@@ -87,33 +87,41 @@ def seed_archetypes():
db.commit()
# --- 2. Update JobRoleMappings to map to Archetypes ---
# --- 2. Update JobRolePatterns to map to Archetypes ---
# We map the patterns to the new 4 Archetypes
mapping_updates = [
# Wirtschaftlicher Entscheider
{"role": "Wirtschaftlicher Entscheider", "patterns": ["%geschäftsführer%", "%ceo%", "%director%", "%einkauf%", "%procurement%", "%finance%", "%cfo%"]},
{"role": "Wirtschaftlicher Entscheider", "patterns": ["geschäftsführer", "ceo", "director", "einkauf", "procurement", "finance", "cfo"]},
# Operativer Entscheider
{"role": "Operativer Entscheider", "patterns": ["%housekeeping%", "%hausdame%", "%hauswirtschaft%", "%reinigung%", "%restaurant%", "%f&b%", "%werksleiter%", "%produktionsleiter%", "%lager%", "%logistik%", "%operations%", "%coo%"]},
{"role": "Operativer Entscheider", "patterns": ["housekeeping", "hausdame", "hauswirtschaft", "reinigung", "restaurant", "f&b", "werksleiter", "produktionsleiter", "lager", "logistik", "operations", "coo"]},
# Infrastruktur-Verantwortlicher
{"role": "Infrastruktur-Verantwortlicher", "patterns": ["%facility%", "%technik%", "%instandhaltung%", "%it-leiter%", "%cto%", "%admin%", "%building%"]},
{"role": "Infrastruktur-Verantwortlicher", "patterns": ["facility", "technik", "instandhaltung", "it-leiter", "cto", "admin", "building"]},
# Innovations-Treiber
{"role": "Innovations-Treiber", "patterns": ["%innovation%", "%digital%", "%transformation%", "%business dev%", "%marketing%"]}
{"role": "Innovations-Treiber", "patterns": ["innovation", "digital", "transformation", "business dev", "marketing"]}
]
# Clear old mappings to prevent confusion
db.query(JobRoleMapping).delete()
db.query(JobRolePattern).delete()
db.commit()
print("Cleared old JobRoleMappings.")
print("Cleared old JobRolePatterns.")
for group in mapping_updates:
role_name = group["role"]
for pattern in group["patterns"]:
print(f"Mapping '{pattern}' -> '{role_name}'")
db.add(JobRoleMapping(pattern=pattern, role=role_name))
for pattern_text in group["patterns"]:
print(f"Mapping '{pattern_text}' -> '{role_name}'")
# All seeded patterns are regex contains checks
new_pattern = JobRolePattern(
pattern_type='regex',
pattern_value=pattern_text, # Stored without wildcards
role=role_name,
priority=100, # Default priority for seeded patterns
created_by='system'
)
db.add(new_pattern)
db.commit()
print("Archetypes and Mappings Seeded Successfully.")

View File

@@ -5,15 +5,15 @@ import os
# Setup Environment
sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
from backend.database import SessionLocal, JobRoleMapping, Persona
from backend.database import SessionLocal, JobRolePattern, Persona
def test_mapping(job_title):
db = SessionLocal()
print(f"\n--- Testing Mapping for '{job_title}' ---")
# 1. Find Role Name via JobRoleMapping
# 1. Find Role Name via JobRolePattern
role_name = None
mappings = db.query(JobRoleMapping).all()
mappings = db.query(JobRolePattern).all()
for m in mappings:
pattern_clean = m.pattern.replace("%", "").lower()
if pattern_clean in job_title.lower():

View File

@@ -6,7 +6,7 @@ import os
sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
# Import everything to ensure metadata is populated
from backend.database import engine, Base, Company, Contact, Industry, JobRoleMapping, Persona, Signal, EnrichmentData, RoboticsCategory, ImportLog, ReportedMistake, MarketingMatrix
from backend.database import engine, Base, Company, Contact, Industry, JobRolePattern, Persona, Signal, EnrichmentData, RoboticsCategory, ImportLog, ReportedMistake, MarketingMatrix
def migrate():
print("Migrating Database Schema...")

View File

@@ -7,10 +7,10 @@ from typing import Optional, Dict, Any, List
from sqlalchemy.orm import Session, joinedload
from backend.database import Company, Industry, RoboticsCategory, EnrichmentData
from backend.lib.core_utils import call_gemini_flash, safe_eval_math, run_serp_search
from backend.services.scraping import scrape_website_content
from backend.lib.metric_parser import MetricParser
from ..database import Company, Industry, RoboticsCategory, EnrichmentData
from ..lib.core_utils import call_gemini_flash, safe_eval_math, run_serp_search
from .scraping import scrape_website_content
from ..lib.metric_parser import MetricParser
logger = logging.getLogger(__name__)

View File

@@ -0,0 +1,63 @@
import logging
import re
from sqlalchemy.orm import Session
from typing import Optional
from ..database import JobRolePattern, RawJobTitle, Persona, Contact
logger = logging.getLogger(__name__)
class RoleMappingService:
def __init__(self, db: Session):
self.db = db
def get_role_for_job_title(self, job_title: str) -> Optional[str]:
"""
Finds the corresponding role for a given job title using a multi-step process.
1. Check for exact matches.
2. Evaluate regex patterns.
"""
if not job_title:
return None
# Normalize job title for matching
normalized_title = job_title.lower().strip()
# 1. Fetch all active patterns from the database, ordered by priority
patterns = self.db.query(JobRolePattern).filter(
JobRolePattern.is_active == True
).order_by(JobRolePattern.priority.asc()).all()
# 2. Separate patterns for easier processing
exact_patterns = {p.pattern_value.lower(): p.role for p in patterns if p.pattern_type == 'exact'}
regex_patterns = [(p.pattern_value, p.role) for p in patterns if p.pattern_type == 'regex']
# 3. Check for exact match first (most efficient)
if normalized_title in exact_patterns:
return exact_patterns[normalized_title]
# 4. Evaluate regex patterns
for pattern, role in regex_patterns:
try:
if re.search(pattern, job_title, re.IGNORECASE):
return role
except re.error as e:
logger.error(f"Invalid regex for role '{role}': {pattern}. Error: {e}")
continue
return None
def add_or_update_unclassified_title(self, job_title: str):
"""
Logs an unclassified job title or increments its count if already present.
"""
if not job_title:
return
entry = self.db.query(RawJobTitle).filter(RawJobTitle.title == job_title).first()
if entry:
entry.count += 1
else:
entry = RawJobTitle(title=job_title, count=1)
self.db.add(entry)
self.db.commit()

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,13 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Company Explorer (Robotics)</title>
<script type="module" crossorigin src="/ce/assets/index-tQU9lyIc.js"></script>
<link rel="stylesheet" crossorigin href="/ce/assets/index-BgxQoHsm.css">
</head>
<body class="bg-slate-950 text-slate-100">
<div id="root"></div>
</body>
</html>

View File

@@ -0,0 +1,279 @@
import { useEffect, useState, useMemo } from 'react'
import axios from 'axios'
import { Search, Edit2, X, Check, Filter, Download } from 'lucide-react'
import clsx from 'clsx'
interface MarketingMatrixManagerProps {
apiBase: string
}
interface MatrixEntry {
id: number
industry_id: number
persona_id: number
industry_name: string
persona_name: string
subject: string | null
intro: string | null
social_proof: string | null
updated_at: string
}
interface Industry {
id: number
name: string
}
interface Persona {
id: number
name: string
}
export function MarketingMatrixManager({ apiBase }: MarketingMatrixManagerProps) {
const [entries, setEntries] = useState<MatrixEntry[]>([])
const [industries, setIndustries] = useState<Industry[]>([])
const [personas, setPersonas] = useState<Persona[]>([])
const [isLoading, setIsLoading] = useState(false)
// Filters
const [industryFilter, setIndustryFilter] = useState<number | 'all'>('all')
const [personaFilter, setPersonaFilter] = useState<number | 'all'>('all')
const [searchTerm, setSearchTerm] = useState('')
// Editing state
const [editingId, setEditingId] = useState<number | null>(null)
const [editValues, setEditValues] = useState<{
subject: string
intro: string
social_proof: string
}>({ subject: '', intro: '', social_proof: '' })
const fetchMetadata = async () => {
try {
const [resInd, resPers] = await Promise.all([
axios.get(`${apiBase}/industries`),
axios.get(`${apiBase}/matrix/personas`)
])
setIndustries(resInd.data)
setPersonas(resPers.data)
} catch (e) {
console.error("Failed to fetch metadata:", e)
}
}
const fetchEntries = async () => {
setIsLoading(true)
try {
const params: any = {}
if (industryFilter !== 'all') params.industry_id = industryFilter
if (personaFilter !== 'all') params.persona_id = personaFilter
const res = await axios.get(`${apiBase}/matrix`, { params })
setEntries(res.data)
} catch (e) {
console.error("Failed to fetch matrix entries:", e)
} finally {
setIsLoading(false)
}
}
useEffect(() => {
fetchMetadata()
}, [])
useEffect(() => {
fetchEntries()
}, [industryFilter, personaFilter])
const filteredEntries = useMemo(() => {
if (!searchTerm) return entries
const s = searchTerm.toLowerCase()
return entries.filter(e =>
e.industry_name.toLowerCase().includes(s) ||
e.persona_name.toLowerCase().includes(s) ||
(e.subject?.toLowerCase().includes(s)) ||
(e.intro?.toLowerCase().includes(s)) ||
(e.social_proof?.toLowerCase().includes(s))
)
}, [entries, searchTerm])
const startEditing = (entry: MatrixEntry) => {
setEditingId(entry.id)
setEditValues({
subject: entry.subject || '',
intro: entry.intro || '',
social_proof: entry.social_proof || ''
})
}
const cancelEditing = () => {
setEditingId(null)
}
const saveEditing = async (id: number) => {
try {
await axios.put(`${apiBase}/matrix/${id}`, editValues)
setEntries(prev => prev.map(e => e.id === id ? { ...e, ...editValues } : e))
setEditingId(null)
} catch (e) {
alert("Save failed")
console.error(e)
}
}
const handleDownloadCSV = () => {
let url = `${apiBase}/matrix/export`
const params = new URLSearchParams()
if (industryFilter !== 'all') params.append('industry_id', industryFilter.toString())
if (personaFilter !== 'all') params.append('persona_id', personaFilter.toString())
if (params.toString()) {
url += `?${params.toString()}`
}
window.open(url, '_blank')
}
return (
<div className="space-y-4">
{/* Toolbar */}
<div className="flex flex-wrap items-center gap-3 bg-slate-50 dark:bg-slate-950 p-3 rounded-lg border border-slate-200 dark:border-slate-800">
<div className="flex items-center gap-2">
<Filter className="h-4 w-4 text-slate-400" />
<span className="text-xs font-bold text-slate-500 uppercase">Filters:</span>
</div>
<select
className="bg-white dark:bg-slate-900 border border-slate-300 dark:border-slate-700 rounded px-2 py-1.5 text-xs outline-none focus:ring-1 focus:ring-blue-500"
value={industryFilter}
onChange={e => setIndustryFilter(e.target.value === 'all' ? 'all' : parseInt(e.target.value))}
>
<option value="all">All Industries</option>
{industries.map(i => <option key={i.id} value={i.id}>{i.name}</option>)}
</select>
<select
className="bg-white dark:bg-slate-900 border border-slate-300 dark:border-slate-700 rounded px-2 py-1.5 text-xs outline-none focus:ring-1 focus:ring-blue-500"
value={personaFilter}
onChange={e => setPersonaFilter(e.target.value === 'all' ? 'all' : parseInt(e.target.value))}
>
<option value="all">All Personas</option>
{personas.map(p => <option key={p.id} value={p.id}>{p.name}</option>)}
</select>
<div className="flex-1 min-w-[200px] relative">
<Search className="absolute left-3 top-1/2 -translate-y-1/2 h-3.5 w-3.5 text-slate-400" />
<input
type="text"
placeholder="Search in texts..."
className="w-full bg-white dark:bg-slate-900 border border-slate-300 dark:border-slate-700 rounded px-9 py-1.5 text-xs outline-none focus:ring-1 focus:ring-blue-500"
value={searchTerm}
onChange={e => setSearchTerm(e.target.value)}
/>
</div>
<button
onClick={handleDownloadCSV}
className="flex items-center gap-2 bg-slate-200 dark:bg-slate-800 hover:bg-slate-300 dark:hover:bg-slate-700 text-slate-700 dark:text-slate-300 px-3 py-1.5 rounded text-xs font-bold transition-all border border-slate-300 dark:border-slate-700"
>
<Download className="h-3.5 w-3.5" />
EXPORT CSV
</button>
</div>
{/* Table */}
<div className="border border-slate-200 dark:border-slate-800 rounded-lg overflow-hidden bg-white dark:bg-slate-950">
<table className="w-full text-left text-xs table-fixed">
<thead className="bg-slate-50 dark:bg-slate-900 border-b border-slate-200 dark:border-slate-800 text-slate-500 font-bold uppercase">
<tr>
<th className="p-3 w-40">Combination</th>
<th className="p-3 w-1/4">Subject Line</th>
<th className="p-3 w-1/3">Intro Text</th>
<th className="p-3">Social Proof</th>
<th className="p-3 w-20 text-center">Action</th>
</tr>
</thead>
<tbody className="divide-y divide-slate-100 dark:divide-slate-800/50">
{isLoading ? (
<tr><td colSpan={5} className="p-12 text-center text-slate-400 italic">Loading matrix entries...</td></tr>
) : filteredEntries.length === 0 ? (
<tr><td colSpan={5} className="p-12 text-center text-slate-400 italic">No entries found for the selected filters.</td></tr>
) : filteredEntries.map(entry => (
<tr key={entry.id} className={clsx("group transition-colors", editingId === entry.id ? "bg-blue-50/50 dark:bg-blue-900/10" : "hover:bg-slate-50/50 dark:hover:bg-slate-900/30")}>
<td className="p-3 align-top">
<div className="font-bold text-slate-900 dark:text-white leading-tight mb-1">{entry.industry_name}</div>
<div className="text-[10px] text-blue-600 dark:text-blue-400 font-bold uppercase tracking-wider">{entry.persona_name}</div>
</td>
<td className="p-3 align-top">
{editingId === entry.id ? (
<input
className="w-full bg-white dark:bg-slate-900 border border-blue-300 dark:border-blue-700 rounded p-1.5 outline-none"
value={editValues.subject}
onChange={e => setEditValues(v => ({ ...v, subject: e.target.value }))}
/>
) : (
<div className="text-slate-700 dark:text-slate-300">{entry.subject || <span className="text-slate-400 italic">Empty</span>}</div>
)}
</td>
<td className="p-3 align-top">
{editingId === entry.id ? (
<textarea
className="w-full bg-white dark:bg-slate-900 border border-blue-300 dark:border-blue-700 rounded p-1.5 outline-none h-24 text-[11px]"
value={editValues.intro}
onChange={e => setEditValues(v => ({ ...v, intro: e.target.value }))}
/>
) : (
<div className="text-slate-600 dark:text-slate-400 line-clamp-4 hover:line-clamp-none transition-all">{entry.intro || <span className="text-slate-400 italic">Empty</span>}</div>
)}
</td>
<td className="p-3 align-top">
{editingId === entry.id ? (
<textarea
className="w-full bg-white dark:bg-slate-900 border border-blue-300 dark:border-blue-700 rounded p-1.5 outline-none h-24 text-[11px]"
value={editValues.social_proof}
onChange={e => setEditValues(v => ({ ...v, social_proof: e.target.value }))}
/>
) : (
<div className="text-slate-600 dark:text-slate-400 line-clamp-4 hover:line-clamp-none transition-all">{entry.social_proof || <span className="text-slate-400 italic">Empty</span>}</div>
)}
</td>
<td className="p-3 align-top text-center">
{editingId === entry.id ? (
<div className="flex flex-col gap-2">
<button
onClick={() => saveEditing(entry.id)}
className="p-1.5 bg-green-600 text-white rounded hover:bg-green-500 transition-colors shadow-sm"
title="Save Changes"
>
<Check className="h-4 w-4" />
</button>
<button
onClick={cancelEditing}
className="p-1.5 bg-slate-200 dark:bg-slate-800 text-slate-600 dark:text-slate-400 rounded hover:bg-slate-300 dark:hover:bg-slate-700 transition-colors"
title="Cancel"
>
<X className="h-4 w-4" />
</button>
</div>
) : (
<button
onClick={() => startEditing(entry)}
className="p-2 text-slate-400 hover:text-blue-500 hover:bg-blue-50 dark:hover:bg-blue-900/20 rounded-full transition-all opacity-0 group-hover:opacity-100"
title="Edit Entry"
>
<Edit2 className="h-4 w-4" />
</button>
)}
</td>
</tr>
))}
</tbody>
</table>
</div>
</div>
)
}

View File

@@ -1,7 +1,8 @@
import { useEffect, useState } from 'react'
import { useEffect, useState, useMemo } from 'react'
import axios from 'axios'
import { X, Bot, Tag, Target, Users, Plus, Trash2, Save, Flag, Check, Ban, ExternalLink } from 'lucide-react'
import { X, Bot, Tag, Target, Users, Plus, Trash2, Save, Flag, Check, Ban, ExternalLink, ChevronDown, Grid } from 'lucide-react'
import clsx from 'clsx'
import { MarketingMatrixManager } from './MarketingMatrixManager'
interface RoboticsSettingsProps {
isOpen: boolean
@@ -9,6 +10,28 @@ interface RoboticsSettingsProps {
apiBase: string
}
type JobRolePatternType = {
id: number;
pattern_type: 'exact' | 'regex';
pattern_value: string;
role: string; // Should match Persona.name
priority: number;
is_active: boolean;
created_by: string;
created_at: string;
updated_at: string;
}
type RawJobTitleType = {
id: number;
title: string;
count: number;
source: string;
is_mapped: boolean;
created_at: string;
updated_at: string;
}
type ReportedMistake = {
id: number;
company_id: number;
@@ -25,17 +48,45 @@ type ReportedMistake = {
}
export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsProps) {
const [activeTab, setActiveTab] = useState<'robotics' | 'industries' | 'roles' | 'mistakes'>(
localStorage.getItem('roboticsSettingsActiveTab') as 'robotics' | 'industries' | 'roles' | 'mistakes' || 'robotics'
const [activeTab, setActiveTab] = useState<'robotics' | 'industries' | 'roles' | 'mistakes' | 'matrix'>(
localStorage.getItem('roboticsSettingsActiveTab') as 'robotics' | 'industries' | 'roles' | 'mistakes' | 'matrix' || 'robotics'
)
const [roboticsCategories, setRoboticsCategories] = useState<any[]>([])
const [industries, setIndustries] = useState<any[]>([])
const [jobRoles, setJobRoles] = useState<any[]>([])
const [rawJobTitles, setRawJobTitles] = useState<any[]>([])
const [jobRoles, setJobRoles] = useState<JobRolePatternType[]>([])
const [rawJobTitles, setRawJobTitles] = useState<RawJobTitleType[]>([])
const [reportedMistakes, setReportedMistakes] = useState<ReportedMistake[]>([])
const [currentMistakeStatusFilter, setCurrentMistakeStatusFilter] = useState<string>("PENDING");
const [isLoading, setIsLoading] = useState(false);
const [isClassifying, setIsClassifying] = useState(false);
const [roleSearch, setRoleSearch] = useState("");
const groupedAndFilteredRoles = useMemo(() => {
const grouped = jobRoles.reduce((acc: Record<string, JobRolePatternType[]>, role) => {
const key = role.role || 'Unassigned';
if (!acc[key]) {
acc[key] = [];
}
acc[key].push(role);
return acc;
}, {} as Record<string, JobRolePatternType[]>);
if (!roleSearch) {
return grouped;
}
const filtered = {} as Record<string, JobRolePatternType[]>;
for (const roleName in grouped) {
const roles = grouped[roleName].filter((r: JobRolePatternType) =>
r.pattern_value.toLowerCase().includes(roleSearch.toLowerCase())
);
if (roles.length > 0) {
filtered[roleName] = roles;
}
}
return filtered;
}, [jobRoles, roleSearch]);
const fetchAllData = async () => {
setIsLoading(true);
@@ -44,7 +95,7 @@ export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsP
axios.get(`${apiBase}/robotics/categories`),
axios.get(`${apiBase}/industries`),
axios.get(`${apiBase}/job_roles`),
axios.get(`${apiBase}/job_roles/raw`),
axios.get(`${apiBase}/job_roles/raw?unmapped_only=true`), // Ensure we only get unmapped
axios.get(`${apiBase}/mistakes?status=${currentMistakeStatusFilter}`),
]);
setRoboticsCategories(resRobotics.data);
@@ -66,6 +117,21 @@ export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsP
}
}, [isOpen]);
useEffect(() => {
if (isOpen) {
// Refetch mistakes when filter changes
const fetchMistakes = async () => {
setIsLoading(true);
try {
const res = await axios.get(`${apiBase}/mistakes?status=${currentMistakeStatusFilter}`);
setReportedMistakes(res.data.items);
} catch (e) { console.error(e) }
finally { setIsLoading(false) }
}
fetchMistakes();
}
}, [currentMistakeStatusFilter]);
useEffect(() => {
localStorage.setItem('roboticsSettingsActiveTab', activeTab);
}, [activeTab]);
@@ -97,10 +163,56 @@ export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsP
}
};
const handleAddJobRole = async () => {
const handleBatchClassify = async () => {
if (!window.confirm(`This will send all ${rawJobTitles.length} unmapped job titles to the AI for classification. This may take a few minutes. Continue?`)) {
return;
}
setIsClassifying(true);
try {
await axios.post(`${apiBase}/job_roles/classify-batch`);
alert("Batch classification started in the background. The list will update automatically as titles are processed. You can close this window.");
// Optionally, you can poll for completion or just let the user see the number go down on next refresh.
// For now, we just inform the user.
} catch (e) {
alert("Failed to start batch classification.");
console.error(e);
} finally {
setIsClassifying(false);
}
};
const handleUpdateJobRole = async (roleId: number, field: string, value: any) => {
const roleToUpdate = jobRoles.find(r => r.id === roleId);
if (!roleToUpdate) return;
const updatedRole = { ...roleToUpdate, [field]: value };
// Convert priority to number just in case
if (field === 'priority') {
updatedRole.priority = parseInt(value, 10);
}
try {
await axios.put(`${apiBase}/job_roles/${roleId}`, updatedRole);
// Optimistic update
setJobRoles(jobRoles.map(r => r.id === roleId ? updatedRole : r));
} catch (e) {
alert("Failed to update job role");
console.error(e);
// Revert on failure if needed, but for now just log it
}
};
const handleAddJobRole = async (title?: string) => {
const patternValue = title || "New Pattern";
setIsLoading(true);
try {
await axios.post(`${apiBase}/job_roles`, { pattern: "New Pattern", role: "Operativer Entscheider" });
await axios.post(`${apiBase}/job_roles`, {
pattern_type: "exact",
pattern_value: patternValue,
role: "Influencer",
priority: 100
});
fetchAllData();
} catch (e) {
alert("Failed to add job role");
@@ -109,7 +221,11 @@ export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsP
setIsLoading(false);
}
}
const handleDeleteJobRole = async (id: number) => {
if (!window.confirm("Are you sure you want to delete this pattern?")) {
return;
}
setIsLoading(true);
try {
await axios.delete(`${apiBase}/job_roles/${id}`);
@@ -144,6 +260,7 @@ export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsP
{ id: 'robotics', label: 'Robotics Potential', icon: Bot },
{ id: 'industries', label: 'Industry Focus', icon: Target },
{ id: 'roles', label: 'Job Role Mapping', icon: Users },
{ id: 'matrix', label: 'Marketing Matrix', icon: Grid },
{ id: 'mistakes', label: 'Reported Mistakes', icon: Flag },
].map(t => (
<button
@@ -255,29 +372,61 @@ export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsP
</div>
<div key="roles-content" className={clsx("space-y-8", { 'hidden': isLoading || activeTab !== 'roles' })}>
{/* Existing Patterns */}
{/* Existing Patterns Grouped */}
<div className="space-y-4">
<div className="flex justify-between items-center">
<div>
<h3 className="text-sm font-bold text-slate-700 dark:text-slate-300">Active Mapping Patterns</h3>
<p className="text-[10px] text-slate-500 uppercase font-semibold">Deterministic Regex/Text rules</p>
<div className="flex justify-between items-center gap-4">
<div className="flex-1">
<input
type="text"
placeholder="Search patterns..."
value={roleSearch}
onChange={e => setRoleSearch(e.target.value)}
className="w-full bg-white dark:bg-slate-950 border border-slate-300 dark:border-slate-700 rounded-md px-3 py-1.5 text-xs text-slate-900 dark:text-white focus:ring-1 focus:ring-blue-500 outline-none"
/>
</div>
<button onClick={handleAddJobRole} className="flex items-center gap-1 px-3 py-1.5 bg-blue-600 hover:bg-blue-500 text-white text-xs font-bold rounded shadow-lg shadow-blue-500/20"><Plus className="h-3 w-3" /> ADD PATTERN</button>
<button onClick={() => handleAddJobRole()} className="flex items-center gap-1 px-3 py-1.5 bg-blue-600 hover:bg-blue-500 text-white text-xs font-bold rounded shadow-lg shadow-blue-500/20"><Plus className="h-3 w-3" /> ADD PATTERN</button>
</div>
<div className="bg-white dark:bg-slate-950 border border-slate-200 dark:border-slate-800 rounded-xl overflow-hidden shadow-sm">
<div className="space-y-2">
{Object.keys(groupedAndFilteredRoles).sort().map(roleName => (
<details key={roleName} className="bg-white dark:bg-slate-950 border border-slate-200 dark:border-slate-800 rounded-lg group" open={!!roleSearch}>
<summary className="p-3 cursor-pointer flex justify-between items-center group-hover:bg-slate-50 dark:group-hover:bg-slate-900 transition-colors">
<div className="font-semibold text-slate-800 dark:text-slate-200 text-xs">
{roleName}
<span className="ml-2 text-slate-400 font-normal">({groupedAndFilteredRoles[roleName].length} patterns)</span>
</div>
<ChevronDown className="h-4 w-4 text-slate-400 transform group-open:rotate-180 transition-transform" />
</summary>
<div className="border-t border-slate-200 dark:border-slate-800">
<table className="w-full text-left text-xs">
<thead className="bg-slate-50 dark:bg-slate-900/50 border-b border-slate-200 dark:border-slate-800 text-slate-500 font-bold uppercase tracking-wider"><tr><th className="p-3">Pattern (% for wildcard)</th><th className="p-3">Target Persona Role</th><th className="p-3 w-10"></th></tr></thead>
<thead className="bg-slate-50 dark:bg-slate-900/50 text-slate-500 font-bold uppercase tracking-wider">
<tr>
<th className="p-2">Type</th>
<th className="p-2">Pattern Value</th>
<th className="p-2">Priority</th>
<th className="p-2 w-8"></th>
</tr>
</thead>
<tbody className="divide-y divide-slate-100 dark:divide-slate-800/50">
{jobRoles.map(role => (
<tr key={role.id} className="group hover:bg-slate-50/50 dark:hover:bg-slate-800/30 transition-colors">
<td className="p-2"><input className="w-full bg-transparent border border-transparent hover:border-slate-300 dark:hover:border-slate-700 rounded px-2 py-1 text-slate-900 dark:text-slate-200 outline-none focus:border-blue-500 font-mono" defaultValue={role.pattern} /></td>
<td className="p-2"><select className="w-full bg-transparent border border-transparent hover:border-slate-300 dark:hover:border-slate-700 rounded px-2 py-1 text-slate-900 dark:text-slate-200 outline-none focus:border-blue-500" defaultValue={role.role}><option>Operativer Entscheider</option><option>Infrastruktur-Verantwortlicher</option><option>Wirtschaftlicher Entscheider</option><option>Innovations-Treiber</option><option>Influencer</option></select></td>
<td className="p-2 text-center"><button onClick={() => handleDeleteJobRole(role.id)} className="text-slate-400 hover:text-red-500 opacity-0 group-hover:opacity-100 transition-all transform hover:scale-110"><Trash2 className="h-4 w-4" /></button></td>
{groupedAndFilteredRoles[roleName].map((role: JobRolePatternType) => (
<tr key={role.id} className="group/row hover:bg-slate-50/50 dark:hover:bg-slate-800/30 transition-colors">
<td className="p-1.5">
<select className="w-full bg-transparent border border-transparent hover:border-slate-300 dark:hover:border-slate-700 rounded px-1 py-0.5 text-slate-900 dark:text-slate-200 outline-none focus:border-blue-500" defaultValue={role.pattern_type} onChange={(e) => handleUpdateJobRole(role.id, 'pattern_type', e.target.value)}>
<option>exact</option>
<option>regex</option>
</select>
</td>
<td className="p-1.5"><input className="w-full bg-transparent border border-transparent hover:border-slate-300 dark:hover:border-slate-700 rounded px-1 py-0.5 text-slate-900 dark:text-slate-200 outline-none focus:border-blue-500 font-mono" defaultValue={role.pattern_value} onBlur={(e) => handleUpdateJobRole(role.id, 'pattern_value', e.target.value)} /></td>
<td className="p-1.5"><input type="number" className="w-16 bg-transparent border border-transparent hover:border-slate-300 dark:hover:border-slate-700 rounded px-1 py-0.5 text-slate-900 dark:text-slate-200 outline-none focus:border-blue-500 font-mono" defaultValue={role.priority} onBlur={(e) => handleUpdateJobRole(role.id, 'priority', e.target.value)} /></td>
<td className="p-1.5 text-center"><button onClick={() => handleDeleteJobRole(role.id)} className="text-slate-400 hover:text-red-500 opacity-0 group-hover/row:opacity-100 transition-all transform hover:scale-110"><Trash2 className="h-4 w-4" /></button></td>
</tr>
))}
</tbody>
</table>
</div>
</details>
))}
</div>
</div>
{/* Discovery Inbox */}
@@ -287,19 +436,26 @@ export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsP
<h3 className="text-sm font-bold text-slate-700 dark:text-slate-300">Discovery Inbox</h3>
<p className="text-[10px] text-slate-500 uppercase font-semibold">Unmapped job titles from CRM, prioritized by frequency</p>
</div>
{rawJobTitles.length > 0 && (
<button
onClick={handleBatchClassify}
disabled={isClassifying}
className="flex items-center gap-1 px-3 py-1.5 bg-green-600 hover:bg-green-500 text-white text-xs font-bold rounded shadow-lg shadow-green-500/20 disabled:bg-slate-400 disabled:shadow-none"
>
<Bot className="h-3 w-3" />
{isClassifying ? 'CLASSIFYING...' : `CLASSIFY ${rawJobTitles.length} TITLES`}
</button>
)}
</div>
<div className="bg-slate-50/50 dark:bg-slate-900/20 border border-dashed border-slate-300 dark:border-slate-700 rounded-xl overflow-hidden">
<table className="w-full text-left text-xs">
<thead className="bg-slate-100/50 dark:bg-slate-900/80 border-b border-slate-200 dark:border-slate-800 text-slate-400 font-bold uppercase tracking-wider"><tr><th className="p-3">Job Title from CRM</th><th className="p-3 w-20 text-center">Frequency</th><th className="p-3 w-10"></th></tr></thead>
<tbody className="divide-y divide-slate-100 dark:divide-slate-800/50">
{rawJobTitles.map(raw => (
{rawJobTitles.map((raw: RawJobTitleType) => (
<tr key={raw.id} className="group hover:bg-white dark:hover:bg-slate-800 transition-colors">
<td className="p-3 font-medium text-slate-600 dark:text-slate-400 italic">{raw.title}</td>
<td className="p-3 text-center"><span className="px-2 py-1 bg-slate-200 dark:bg-slate-800 rounded-full font-bold text-[10px] text-slate-500">{raw.count}x</span></td>
<td className="p-3 text-center"><button onClick={async () => {
await axios.post(`${apiBase}/job_roles`, { pattern: `%${raw.title.toLowerCase()}%`, role: "Influencer" });
fetchAllData();
}} className="p-1 text-blue-500 hover:bg-blue-100 dark:hover:bg-blue-900/30 rounded transition-all"><Plus className="h-4 w-4" /></button></td>
<td className="p-3 text-center"><button onClick={() => handleAddJobRole(raw.title)} className="p-1 text-blue-500 hover:bg-blue-100 dark:hover:bg-blue-900/30 rounded transition-all"><Plus className="h-4 w-4" /></button></td>
</tr>
))}
{rawJobTitles.length === 0 && (<tr><td colSpan={3} className="p-12 text-center text-slate-400 italic">Discovery inbox is empty. Import raw job titles to see data here.</td></tr>)}
@@ -309,6 +465,10 @@ export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsP
</div>
</div>
<div key="matrix-content" className={clsx("space-y-4", { 'hidden': isLoading || activeTab !== 'matrix' })}>
<MarketingMatrixManager apiBase={apiBase} />
</div>
<div key="mistakes-content" className={clsx("space-y-4", { 'hidden': isLoading || activeTab !== 'mistakes' })}>
<div className="flex justify-between items-center">
<h3 className="text-sm font-bold text-slate-700 dark:text-slate-300">Reported Data Mistakes</h3>

92
standalone_importer.py Normal file
View File

@@ -0,0 +1,92 @@
import csv
from collections import Counter
import os
import argparse
from sqlalchemy import create_engine, Column, Integer, String, Boolean, DateTime
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from datetime import datetime
import logging
# --- Standalone Configuration ---
DATABASE_URL = "sqlite:////app/companies_v3_fixed_2.db"
LOG_FILE = "/app/Log_from_docker/standalone_importer.log"
# --- Logging Setup ---
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(LOG_FILE),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
# --- SQLAlchemy Models (simplified, only what's needed) ---
Base = declarative_base()
class RawJobTitle(Base):
__tablename__ = 'raw_job_titles'
id = Column(Integer, primary_key=True)
title = Column(String, unique=True, index=True)
count = Column(Integer, default=1)
source = Column(String, default="import")
is_mapped = Column(Boolean, default=False)
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# --- Database Connection ---
engine = create_engine(DATABASE_URL)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
def import_job_titles_standalone(file_path: str):
db = SessionLocal()
try:
logger.info(f"Starting standalone import of job titles from {file_path}")
job_title_counts = Counter()
total_rows = 0
with open(file_path, 'r', encoding='utf-8') as f:
reader = csv.reader(f)
for row in reader:
if row and row[0].strip():
title = row[0].strip()
job_title_counts[title] += 1
total_rows += 1
logger.info(f"Read {total_rows} total job title entries. Found {len(job_title_counts)} unique titles.")
added_count = 0
updated_count = 0
for title, count in job_title_counts.items():
existing_title = db.query(RawJobTitle).filter(RawJobTitle.title == title).first()
if existing_title:
if existing_title.count != count:
existing_title.count = count
updated_count += 1
else:
new_title = RawJobTitle(title=title, count=count, source="csv_import", is_mapped=False)
db.add(new_title)
added_count += 1
db.commit()
logger.info(f"Standalone import complete. Added {added_count} new unique titles, updated {updated_count} existing titles.")
except Exception as e:
logger.error(f"Error during standalone job title import: {e}", exc_info=True)
db.rollback()
finally:
db.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Standalone script to import job titles from a CSV file.")
parser.add_argument("file_path", type=str, help="Path to the CSV file containing job titles.")
args = parser.parse_args()
# Ensure the log directory exists
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
import_job_titles_standalone(args.file_path)

22
test_api_logic.py Normal file
View File

@@ -0,0 +1,22 @@
import os
import sys
# Add the company-explorer directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), 'company-explorer')))
from backend.database import SessionLocal, MarketingMatrix, Industry, Persona
from sqlalchemy.orm import joinedload
db = SessionLocal()
try:
query = db.query(MarketingMatrix).options(
joinedload(MarketingMatrix.industry),
joinedload(MarketingMatrix.persona)
)
entries = query.all()
print(f"Total entries: {len(entries)}")
for e in entries[:3]:
print(f"ID={e.id}, Industry={e.industry.name if e.industry else 'N/A'}, Persona={e.persona.name if e.persona else 'N/A'}")
print(f" Subject: {e.subject}")
finally:
db.close()