Brancheneinstufung2/company-explorer/backend/services/classification.py

import json
import logging
import os
from typing import Dict, Any, List
from ..lib.core_utils import call_gemini
from ..config import settings
from ..database import SessionLocal, RoboticsCategory

logger = logging.getLogger(__name__)

ALLOWED_INDUSTRIES_FILE = os.path.join(os.path.dirname(__file__), "../data/allowed_industries.json")

class ClassificationService:
    def __init__(self):
        self.allowed_industries = self._load_allowed_industries()

    def _load_allowed_industries(self) -> List[str]:
        try:
            with open(ALLOWED_INDUSTRIES_FILE, 'r', encoding='utf-8') as f:
                return json.load(f)
        except Exception as e:
            logger.error(f"Failed to load allowed industries: {e}")
            return ["Sonstige"]

    def _get_category_prompts(self) -> str:
        """
        Fetches the latest category definitions from the database.
        """
        db = SessionLocal()
        try:
            categories = db.query(RoboticsCategory).all()
            if not categories:
                return "Error: No categories defined."

            prompt_parts = []
            for cat in categories:
                prompt_parts.append(f"* **{cat.name} ({cat.key}):**\n     - Definition: {cat.description}\n     - Scoring Guide: {cat.reasoning_guide}")

            return "\n".join(prompt_parts)
        except Exception as e:
            logger.error(f"Error fetching categories: {e}")
            return "Error loading categories."
        finally:
            db.close()

    def analyze_robotics_potential(self, company_name: str, website_text: str) -> Dict[str, Any]:
        """
        Analyzes the company for robotics potential based on website content.
        Returns strict JSON.
        """
        if not website_text or len(website_text) < 100:
            return {"error": "Insufficient text content"}

        category_guidance = self._get_category_prompts()

        prompt = f"""
        You are a Senior B2B Market Analyst for 'Roboplanet', a specialized robotics distributor.
        Your task is to analyze a target company based on their website text to determine their **operational need** for service robotics.

        --- TARGET COMPANY ---
        Name: {company_name}
        Website Content (Excerpt):
        {website_text[:20000]}

        --- ALLOWED INDUSTRIES (STRICT) ---
        You MUST assign the company to exactly ONE of these industries. If unsure, choose the closest match or "Sonstige".
        {json.dumps(self.allowed_industries, ensure_ascii=False)}

        --- ANALYSIS GUIDELINES (CHAIN OF THOUGHT) ---
        1. **Infrastructure Analysis:** What physical assets does this company likely operate based on their business model?
           - Factories / Production Plants? (-> Needs Cleaning, Security, Intralogistics)
           - Large Warehouses? (-> Needs Intralogistics, Security, Floor Washing)
           - Offices / Headquarters? (-> Needs Vacuuming, Window Cleaning)
           - Critical Infrastructure (Solar Parks, Wind Farms)? (-> Needs Perimeter Security, Inspection)
           - Hotels / Hospitals? (-> Needs Service, Cleaning, Transport)

        2. **Provider vs. User Distinction (CRITICAL):**
           - If a company SELLS cleaning products (e.g., 3M, Henkel), they do NOT necessarily have a higher need for cleaning robots than any other manufacturer. Do not score them high just because the word "cleaning" appears. Score them based on their *factories*.
           - If a company SELLS security services, they might be a potential PARTNER, but check if they *manage* sites.

        3. **Scale Assessment:**
           - 5 locations implies more need than 1.
           - "Global player" implies large facilities.

        --- SCORING CATEGORIES (0-100) ---
        Based on the current strategic focus of Roboplanet:

        {category_guidance}

        --- OUTPUT FORMAT (JSON ONLY) ---
        {{
            "industry": "String (from list)",
            "summary": "Concise analysis of their infrastructure and business model (German)",
            "potentials": {{
                "cleaning": {{ "score": 0-100, "reason": "Specific reasoning based on infrastructure (e.g. 'Operates 5 production plants in DE')." }},
                "transport": {{ "score": 0-100, "reason": "..." }},
                "security": {{ "score": 0-100, "reason": "..." }},
                "service": {{ "score": 0-100, "reason": "..." }}
            }}
        }}
        """

        try:
            response_text = call_gemini(
                prompt=prompt,
                json_mode=True,
                temperature=0.1 # Very low temp for analytical reasoning
            )
            return json.loads(response_text)
        except Exception as e:
            logger.error(f"Classification failed: {e}")
            return {"error": str(e)}