import json import logging import os from typing import Dict, Any, List from ..lib.core_utils import call_gemini from ..config import settings logger = logging.getLogger(__name__) ALLOWED_INDUSTRIES_FILE = os.path.join(os.path.dirname(__file__), "../data/allowed_industries.json") class ClassificationService: def __init__(self): self.allowed_industries = self._load_allowed_industries() def _load_allowed_industries(self) -> List[str]: try: with open(ALLOWED_INDUSTRIES_FILE, 'r', encoding='utf-8') as f: return json.load(f) except Exception as e: logger.error(f"Failed to load allowed industries: {e}") return ["Sonstige"] def analyze_robotics_potential(self, company_name: str, website_text: str) -> Dict[str, Any]: """ Analyzes the company for robotics potential based on website content. Returns strict JSON. """ if not website_text or len(website_text) < 100: return {"error": "Insufficient text content"} prompt = f""" You are a Senior B2B Market Analyst for 'Roboplanet', a robotics distributor. Your job is to analyze a target company based on their website text and determine their potential for using robots. --- TARGET COMPANY --- Name: {company_name} Website Content (Excerpt): {website_text[:15000]} --- ALLOWED INDUSTRIES (STRICT) --- You MUST assign the company to exactly ONE of these industries. If unsure, choose the closest match or "Sonstige". {json.dumps(self.allowed_industries, ensure_ascii=False)} --- ANALYSIS TASKS --- 1. **Industry Classification:** Pick one from the list. 2. **Robotics Potential Scoring (0-100):** - **Cleaning:** Does the company manage large floors, hospitals, hotels, or public spaces? (Keywords: Hygiene, Cleaning, SPA, Facility Management) - **Transport/Logistics:** Do they move goods internally? (Keywords: Warehouse, Intralogistics, Production line, Hospital logistics) - **Security:** Do they have large perimeters or night patrols? (Keywords: Werkschutz, Security, Monitoring) - **Service:** Do they interact with guests/patients? (Keywords: Reception, Restaurant, Nursing) 3. **Explanation:** A short, strategic reason for the scoring (German). --- OUTPUT FORMAT (JSON ONLY) --- {{ "industry": "String (from list)", "summary": "Short business summary (German)", "potentials": {{ "cleaning": {{ "score": 0-100, "reason": "..." }}, "transport": {{ "score": 0-100, "reason": "..." }}, "security": {{ "score": 0-100, "reason": "..." }}, "service": {{ "score": 0-100, "reason": "..." }} }} }} """ try: response_text = call_gemini( prompt=prompt, json_mode=True, temperature=0.2 # Low temp for consistency ) return json.loads(response_text) except Exception as e: logger.error(f"Classification failed: {e}") return {"error": str(e)}