import json import logging import os from typing import Dict, Any, List from ..lib.core_utils import call_gemini from ..config import settings from ..database import SessionLocal, RoboticsCategory logger = logging.getLogger(__name__) ALLOWED_INDUSTRIES_FILE = os.path.join(os.path.dirname(__file__), "../data/allowed_industries.json") class ClassificationService: def __init__(self): self.allowed_industries = self._load_allowed_industries() def _load_allowed_industries(self) -> List[str]: try: with open(ALLOWED_INDUSTRIES_FILE, 'r', encoding='utf-8') as f: return json.load(f) except Exception as e: logger.error(f"Failed to load allowed industries: {e}") return ["Sonstige"] def _get_category_prompts(self) -> str: """ Fetches the latest category definitions from the database. """ db = SessionLocal() try: categories = db.query(RoboticsCategory).all() if not categories: return "Error: No categories defined." prompt_parts = [] for cat in categories: prompt_parts.append(f"* **{cat.name} ({cat.key}):**\n - Definition: {cat.description}\n - Scoring Guide: {cat.reasoning_guide}") return "\n".join(prompt_parts) except Exception as e: logger.error(f"Error fetching categories: {e}") return "Error loading categories." finally: db.close() def analyze_robotics_potential(self, company_name: str, website_text: str) -> Dict[str, Any]: """ Analyzes the company for robotics potential based on website content. Returns strict JSON. """ if not website_text or len(website_text) < 100: return {"error": "Insufficient text content"} category_guidance = self._get_category_prompts() prompt = f""" You are a Senior B2B Market Analyst for 'Roboplanet', a specialized robotics distributor. Your task is to analyze a target company based on their website text to determine their **operational need** for service robotics. --- TARGET COMPANY --- Name: {company_name} Website Content (Excerpt): {website_text[:20000]} --- ALLOWED INDUSTRIES (STRICT) --- You MUST assign the company to exactly ONE of these industries. If unsure, choose the closest match or "Sonstige". {json.dumps(self.allowed_industries, ensure_ascii=False)} --- ANALYSIS GUIDELINES (CHAIN OF THOUGHT) --- 1. **Infrastructure Analysis:** What physical assets does this company likely operate based on their business model? - Factories / Production Plants? (-> Needs Cleaning, Security, Intralogistics) - Large Warehouses? (-> Needs Intralogistics, Security, Floor Washing) - Offices / Headquarters? (-> Needs Vacuuming, Window Cleaning) - Critical Infrastructure (Solar Parks, Wind Farms)? (-> Needs Perimeter Security, Inspection) - Hotels / Hospitals? (-> Needs Service, Cleaning, Transport) 2. **Provider vs. User Distinction (CRITICAL):** - If a company SELLS cleaning products (e.g., 3M, Henkel), they do NOT necessarily have a higher need for cleaning robots than any other manufacturer. Do not score them high just because the word "cleaning" appears. Score them based on their *factories*. - If a company SELLS security services, they might be a potential PARTNER, but check if they *manage* sites. 3. **Scale Assessment:** - 5 locations implies more need than 1. - "Global player" implies large facilities. --- SCORING CATEGORIES (0-100) --- Based on the current strategic focus of Roboplanet: {category_guidance} --- OUTPUT FORMAT (JSON ONLY) --- {{ "industry": "String (from list)", "summary": "Concise analysis of their infrastructure and business model (German)", "potentials": {{ "cleaning": {{ "score": 0-100, "reason": "Specific reasoning based on infrastructure (e.g. 'Operates 5 production plants in DE')." }}, "transport": {{ "score": 0-100, "reason": "..." }}, "security": {{ "score": 0-100, "reason": "..." }}, "service": {{ "score": 0-100, "reason": "..." }} }} }} """ try: response_text = call_gemini( prompt=prompt, json_mode=True, temperature=0.1 # Very low temp for analytical reasoning ) return json.loads(response_text) except Exception as e: logger.error(f"Classification failed: {e}") return {"error": str(e)}