feat: Connect classification service to DB industries & update docs

This commit is contained in:
2026-01-19 07:58:49 +00:00
parent 15280b00cf
commit 8bdc9983e6
2 changed files with 18 additions and 10 deletions

View File

@@ -304,6 +304,6 @@ Based on the identified infrastructure, score the potential for these categories
**Variablen:** **Variablen:**
* **`company_name`**: Name des Unternehmens. * **`company_name`**: Name des Unternehmens.
* **`website_text`**: Der gescrapte Text der Hauptseite (max. 20.000 Zeichen). * **`website_text`**: Der gescrapte Text der Hauptseite (max. 20.000 Zeichen).
* **`allowed_industries`**: JSON-Liste der erlaubten Branchen (Strict Mode). * **`allowed_industries`**: Dynamisch geladene Liste der erlaubten Branchen aus der Datenbanktabelle `industries` (konfiguriert via Settings > Industry Focus).
* **`category_guidance`**: Dynamisch generierte Definitionen und Scoring-Regeln für die Robotik-Kategorien (aus der Datenbank). * **`category_guidance`**: Dynamisch generierte Definitionen und Scoring-Regeln für die Robotik-Kategorien (aus der Datenbank).

View File

@@ -4,23 +4,30 @@ import os
from typing import Dict, Any, List from typing import Dict, Any, List
from ..lib.core_utils import call_gemini, clean_json_response from ..lib.core_utils import call_gemini, clean_json_response
from ..config import settings from ..config import settings
from ..database import SessionLocal, RoboticsCategory from ..database import SessionLocal, RoboticsCategory, Industry
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
ALLOWED_INDUSTRIES_FILE = os.path.join(os.path.dirname(__file__), "../data/allowed_industries.json")
class ClassificationService: class ClassificationService:
def __init__(self): def __init__(self):
self.allowed_industries = self._load_allowed_industries() pass
def _load_allowed_industries(self) -> List[str]: def _get_allowed_industries(self) -> List[str]:
"""
Fetches the allowed industries from the database (Settings > Industry Focus).
"""
db = SessionLocal()
try: try:
with open(ALLOWED_INDUSTRIES_FILE, 'r', encoding='utf-8') as f: # Query all industries, order by name for consistency
return json.load(f) industries = db.query(Industry.name).order_by(Industry.name).all()
# extract names from tuples (query returns list of tuples)
names = [i[0] for i in industries]
return names if names else ["Sonstige"]
except Exception as e: except Exception as e:
logger.error(f"Failed to load allowed industries: {e}") logger.error(f"Failed to load allowed industries from DB: {e}")
return ["Sonstige"] return ["Sonstige"]
finally:
db.close()
def _get_category_prompts(self) -> str: def _get_category_prompts(self) -> str:
""" """
@@ -52,6 +59,7 @@ class ClassificationService:
return {"error": "Insufficient text content"} return {"error": "Insufficient text content"}
category_guidance = self._get_category_prompts() category_guidance = self._get_category_prompts()
allowed_industries = self._get_allowed_industries()
prompt = f""" prompt = f"""
You are a Senior B2B Market Analyst for 'Roboplanet', a specialized robotics distributor. You are a Senior B2B Market Analyst for 'Roboplanet', a specialized robotics distributor.
@@ -64,7 +72,7 @@ class ClassificationService:
--- ALLOWED INDUSTRIES (STRICT) --- --- ALLOWED INDUSTRIES (STRICT) ---
You MUST assign the company to exactly ONE of these industries. If unsure, choose the closest match or "Sonstige". You MUST assign the company to exactly ONE of these industries. If unsure, choose the closest match or "Sonstige".
{json.dumps(self.allowed_industries, ensure_ascii=False)} {json.dumps(allowed_industries, ensure_ascii=False)}
--- ANALYSIS PART 1: BUSINESS MODEL --- --- ANALYSIS PART 1: BUSINESS MODEL ---
1. Identify the core products/services. 1. Identify the core products/services.