118 lines
4.9 KiB
Python
118 lines
4.9 KiB
Python
import json
|
|
import logging
|
|
import os
|
|
from typing import Dict, Any, List
|
|
from ..lib.core_utils import call_gemini, clean_json_response
|
|
from ..config import settings
|
|
from ..database import SessionLocal, RoboticsCategory, Industry
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class ClassificationService:
|
|
def __init__(self):
|
|
pass
|
|
|
|
def _get_allowed_industries(self) -> List[str]:
|
|
"""
|
|
Fetches the allowed industries from the database (Settings > Industry Focus).
|
|
"""
|
|
db = SessionLocal()
|
|
try:
|
|
# Query all industries, order by name for consistency
|
|
industries = db.query(Industry.name).order_by(Industry.name).all()
|
|
# extract names from tuples (query returns list of tuples)
|
|
names = [i[0] for i in industries]
|
|
return names if names else ["Sonstige"]
|
|
except Exception as e:
|
|
logger.error(f"Failed to load allowed industries from DB: {e}")
|
|
return ["Sonstige"]
|
|
finally:
|
|
db.close()
|
|
|
|
def _get_category_prompts(self) -> str:
|
|
"""
|
|
Fetches the latest category definitions from the database.
|
|
"""
|
|
db = SessionLocal()
|
|
try:
|
|
categories = db.query(RoboticsCategory).all()
|
|
if not categories:
|
|
return "Error: No categories defined."
|
|
|
|
prompt_parts = []
|
|
for cat in categories:
|
|
prompt_parts.append(f"* **{cat.name} ({cat.key}):**\n - Definition: {cat.description}\n - Scoring Guide: {cat.reasoning_guide}")
|
|
|
|
return "\n".join(prompt_parts)
|
|
except Exception as e:
|
|
logger.error(f"Error fetching categories: {e}")
|
|
return "Error loading categories."
|
|
finally:
|
|
db.close()
|
|
|
|
def analyze_robotics_potential(self, company_name: str, website_text: str) -> Dict[str, Any]:
|
|
"""
|
|
Analyzes the company for robotics potential based on website content.
|
|
Returns strict JSON.
|
|
"""
|
|
if not website_text or len(website_text) < 100:
|
|
return {"error": "Insufficient text content"}
|
|
|
|
category_guidance = self._get_category_prompts()
|
|
allowed_industries = self._get_allowed_industries()
|
|
|
|
prompt = f"""
|
|
You are a Senior B2B Market Analyst for 'Roboplanet', a specialized robotics distributor.
|
|
Your task is to analyze the target company based on their website text and create a concise **Dossier**.
|
|
|
|
--- TARGET COMPANY ---
|
|
Name: {company_name}
|
|
Website Content (Excerpt):
|
|
{website_text[:20000]}
|
|
|
|
--- ALLOWED INDUSTRIES (STRICT) ---
|
|
You MUST assign the company to exactly ONE of these industries. If unsure, choose the closest match or "Sonstige".
|
|
{json.dumps(allowed_industries, ensure_ascii=False)}
|
|
|
|
--- ANALYSIS PART 1: BUSINESS MODEL ---
|
|
1. Identify the core products/services.
|
|
2. Summarize in 2-3 German sentences: What do they do and for whom? (Target: "business_model")
|
|
|
|
--- ANALYSIS PART 2: INFRASTRUCTURE & POTENTIAL (Chain of Thought) ---
|
|
1. **Infrastructure Scan:** Look for evidence of physical assets like *Factories, Large Warehouses, Production Lines, Campuses, Hospitals*.
|
|
2. **Provider vs. User Check:**
|
|
- Does the company USE this infrastructure (Potential Customer)?
|
|
- Or do they SELL products for it (Competitor/Partner)?
|
|
- *Example:* "Cleaning" -> Do they sell soap (Provider) or do they have a 50,000sqm factory (User)?
|
|
3. **Evidence Extraction:** Extract 1-2 key sentences from the text proving this infrastructure. (Target: "infrastructure_evidence")
|
|
|
|
--- ANALYSIS PART 3: SCORING (0-100) ---
|
|
Based on the identified infrastructure, score the potential for these categories:
|
|
|
|
{category_guidance}
|
|
|
|
--- OUTPUT FORMAT (JSON ONLY) ---
|
|
{{
|
|
"industry": "String (from list)",
|
|
"business_model": "2-3 sentences summary (German)",
|
|
"infrastructure_evidence": "1-2 key sentences proving physical assets (German)",
|
|
"potentials": {{
|
|
"cleaning": {{ "score": 0-100, "reason": "Reasoning based on infrastructure." }},
|
|
"transport": {{ "score": 0-100, "reason": "Reasoning based on logistics volume." }},
|
|
"security": {{ "score": 0-100, "reason": "Reasoning based on perimeter/assets." }},
|
|
"service": {{ "score": 0-100, "reason": "Reasoning based on guest interaction." }}
|
|
}}
|
|
}}
|
|
"""
|
|
|
|
try:
|
|
response_text = call_gemini(
|
|
prompt=prompt,
|
|
json_mode=True,
|
|
temperature=0.1 # Very low temp for analytical reasoning
|
|
)
|
|
return json.loads(clean_json_response(response_text))
|
|
except Exception as e:
|
|
logger.error(f"Classification failed: {e}")
|
|
return {"error": str(e)}
|