import json import logging import os from typing import Dict, Any, List from ..lib.core_utils import call_gemini, clean_json_response from ..config import settings from ..database import SessionLocal, RoboticsCategory, Industry logger = logging.getLogger(__name__) class ClassificationService: def __init__(self): pass def _get_allowed_industries(self) -> List[str]: """ Fetches the allowed industries from the database (Settings > Industry Focus). """ db = SessionLocal() try: # Query all industries, order by name for consistency industries = db.query(Industry.name).order_by(Industry.name).all() # extract names from tuples (query returns list of tuples) names = [i[0] for i in industries] return names if names else ["Sonstige"] except Exception as e: logger.error(f"Failed to load allowed industries from DB: {e}") return ["Sonstige"] finally: db.close() def _get_category_prompts(self) -> str: """ Fetches the latest category definitions from the database. """ db = SessionLocal() try: categories = db.query(RoboticsCategory).all() if not categories: return "Error: No categories defined." prompt_parts = [] for cat in categories: prompt_parts.append(f"* **{cat.name} ({cat.key}):**\n - Definition: {cat.description}\n - Scoring Guide: {cat.reasoning_guide}") return "\n".join(prompt_parts) except Exception as e: logger.error(f"Error fetching categories: {e}") return "Error loading categories." finally: db.close() def analyze_robotics_potential(self, company_name: str, website_text: str) -> Dict[str, Any]: """ Analyzes the company for robotics potential based on website content. Returns strict JSON. """ if not website_text or len(website_text) < 100: return {"error": "Insufficient text content"} category_guidance = self._get_category_prompts() allowed_industries = self._get_allowed_industries() prompt = f""" You are a Senior B2B Market Analyst for 'Roboplanet', a specialized robotics distributor. Your task is to analyze the target company based on their website text and create a concise **Dossier**. --- TARGET COMPANY --- Name: {company_name} Website Content (Excerpt): {website_text[:20000]} --- ALLOWED INDUSTRIES (STRICT) --- You MUST assign the company to exactly ONE of these industries. If unsure, choose the closest match or "Sonstige". {json.dumps(allowed_industries, ensure_ascii=False)} --- ANALYSIS PART 1: BUSINESS MODEL --- 1. Identify the core products/services. 2. Summarize in 2-3 German sentences: What do they do and for whom? (Target: "business_model") --- ANALYSIS PART 2: INFRASTRUCTURE & POTENTIAL (Chain of Thought) --- 1. **Infrastructure Scan:** Look for evidence of physical assets like *Factories, Large Warehouses, Production Lines, Campuses, Hospitals*. 2. **Provider vs. User Check:** - Does the company USE this infrastructure (Potential Customer)? - Or do they SELL products for it (Competitor/Partner)? - *Example:* "Cleaning" -> Do they sell soap (Provider) or do they have a 50,000sqm factory (User)? 3. **Evidence Extraction:** Extract 1-2 key sentences from the text proving this infrastructure. (Target: "infrastructure_evidence") --- ANALYSIS PART 3: SCORING (0-100) --- Based on the identified infrastructure, score the potential for these categories: {category_guidance} --- OUTPUT FORMAT (JSON ONLY) --- {{ "industry": "String (from list)", "business_model": "2-3 sentences summary (German)", "infrastructure_evidence": "1-2 key sentences proving physical assets (German)", "potentials": {{ "cleaning": {{ "score": 0-100, "reason": "Reasoning based on infrastructure." }}, "transport": {{ "score": 0-100, "reason": "Reasoning based on logistics volume." }}, "security": {{ "score": 0-100, "reason": "Reasoning based on perimeter/assets." }}, "service": {{ "score": 0-100, "reason": "Reasoning based on guest interaction." }} }} }} """ try: response_text = call_gemini( prompt=prompt, json_mode=True, temperature=0.1 # Very low temp for analytical reasoning ) return json.loads(clean_json_response(response_text)) except Exception as e: logger.error(f"Classification failed: {e}") return {"error": str(e)}