From ea87ace6e21ac158e058dcd0b7f6756539576f4d Mon Sep 17 00:00:00 2001 From: Floke Date: Mon, 19 Jan 2026 07:58:49 +0000 Subject: [PATCH] feat: Connect classification service to DB industries & update docs --- MIGRATION_PLAN.md | 2 +- .../backend/services/classification.py | 26 ++++++++++++------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/MIGRATION_PLAN.md b/MIGRATION_PLAN.md index 8bc06231..d6c48cac 100644 --- a/MIGRATION_PLAN.md +++ b/MIGRATION_PLAN.md @@ -304,6 +304,6 @@ Based on the identified infrastructure, score the potential for these categories **Variablen:** * **`company_name`**: Name des Unternehmens. * **`website_text`**: Der gescrapte Text der Hauptseite (max. 20.000 Zeichen). -* **`allowed_industries`**: JSON-Liste der erlaubten Branchen (Strict Mode). +* **`allowed_industries`**: Dynamisch geladene Liste der erlaubten Branchen aus der Datenbanktabelle `industries` (konfiguriert via Settings > Industry Focus). * **`category_guidance`**: Dynamisch generierte Definitionen und Scoring-Regeln für die Robotik-Kategorien (aus der Datenbank). diff --git a/company-explorer/backend/services/classification.py b/company-explorer/backend/services/classification.py index 3a077691..1771e055 100644 --- a/company-explorer/backend/services/classification.py +++ b/company-explorer/backend/services/classification.py @@ -4,23 +4,30 @@ import os from typing import Dict, Any, List from ..lib.core_utils import call_gemini, clean_json_response from ..config import settings -from ..database import SessionLocal, RoboticsCategory +from ..database import SessionLocal, RoboticsCategory, Industry logger = logging.getLogger(__name__) -ALLOWED_INDUSTRIES_FILE = os.path.join(os.path.dirname(__file__), "../data/allowed_industries.json") - class ClassificationService: def __init__(self): - self.allowed_industries = self._load_allowed_industries() + pass - def _load_allowed_industries(self) -> List[str]: + def _get_allowed_industries(self) -> List[str]: + """ + Fetches the allowed industries from the database (Settings > Industry Focus). + """ + db = SessionLocal() try: - with open(ALLOWED_INDUSTRIES_FILE, 'r', encoding='utf-8') as f: - return json.load(f) + # Query all industries, order by name for consistency + industries = db.query(Industry.name).order_by(Industry.name).all() + # extract names from tuples (query returns list of tuples) + names = [i[0] for i in industries] + return names if names else ["Sonstige"] except Exception as e: - logger.error(f"Failed to load allowed industries: {e}") + logger.error(f"Failed to load allowed industries from DB: {e}") return ["Sonstige"] + finally: + db.close() def _get_category_prompts(self) -> str: """ @@ -52,6 +59,7 @@ class ClassificationService: return {"error": "Insufficient text content"} category_guidance = self._get_category_prompts() + allowed_industries = self._get_allowed_industries() prompt = f""" You are a Senior B2B Market Analyst for 'Roboplanet', a specialized robotics distributor. @@ -64,7 +72,7 @@ class ClassificationService: --- ALLOWED INDUSTRIES (STRICT) --- You MUST assign the company to exactly ONE of these industries. If unsure, choose the closest match or "Sonstige". - {json.dumps(self.allowed_industries, ensure_ascii=False)} + {json.dumps(allowed_industries, ensure_ascii=False)} --- ANALYSIS PART 1: BUSINESS MODEL --- 1. Identify the core products/services.