- Ported robust Wikipedia extraction logic (categories, first paragraph) from legacy system. - Implemented database-driven Robotics Category configuration with frontend settings UI. - Updated Robotics Potential analysis to use Chain-of-Thought infrastructure reasoning. - Added Manual Override features for Wikipedia URL (with locking) and Website URL (with re-scrape trigger). - Enhanced Inspector UI with Wikipedia profile, category tags, and action buttons.
113 lines
4.8 KiB
Python
113 lines
4.8 KiB
Python
import json
|
|
import logging
|
|
import os
|
|
from typing import Dict, Any, List
|
|
from ..lib.core_utils import call_gemini
|
|
from ..config import settings
|
|
from ..database import SessionLocal, RoboticsCategory
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
ALLOWED_INDUSTRIES_FILE = os.path.join(os.path.dirname(__file__), "../data/allowed_industries.json")
|
|
|
|
class ClassificationService:
|
|
def __init__(self):
|
|
self.allowed_industries = self._load_allowed_industries()
|
|
|
|
def _load_allowed_industries(self) -> List[str]:
|
|
try:
|
|
with open(ALLOWED_INDUSTRIES_FILE, 'r', encoding='utf-8') as f:
|
|
return json.load(f)
|
|
except Exception as e:
|
|
logger.error(f"Failed to load allowed industries: {e}")
|
|
return ["Sonstige"]
|
|
|
|
def _get_category_prompts(self) -> str:
|
|
"""
|
|
Fetches the latest category definitions from the database.
|
|
"""
|
|
db = SessionLocal()
|
|
try:
|
|
categories = db.query(RoboticsCategory).all()
|
|
if not categories:
|
|
return "Error: No categories defined."
|
|
|
|
prompt_parts = []
|
|
for cat in categories:
|
|
prompt_parts.append(f"* **{cat.name} ({cat.key}):**\n - Definition: {cat.description}\n - Scoring Guide: {cat.reasoning_guide}")
|
|
|
|
return "\n".join(prompt_parts)
|
|
except Exception as e:
|
|
logger.error(f"Error fetching categories: {e}")
|
|
return "Error loading categories."
|
|
finally:
|
|
db.close()
|
|
|
|
def analyze_robotics_potential(self, company_name: str, website_text: str) -> Dict[str, Any]:
|
|
"""
|
|
Analyzes the company for robotics potential based on website content.
|
|
Returns strict JSON.
|
|
"""
|
|
if not website_text or len(website_text) < 100:
|
|
return {"error": "Insufficient text content"}
|
|
|
|
category_guidance = self._get_category_prompts()
|
|
|
|
prompt = f"""
|
|
You are a Senior B2B Market Analyst for 'Roboplanet', a specialized robotics distributor.
|
|
Your task is to analyze a target company based on their website text to determine their **operational need** for service robotics.
|
|
|
|
--- TARGET COMPANY ---
|
|
Name: {company_name}
|
|
Website Content (Excerpt):
|
|
{website_text[:20000]}
|
|
|
|
--- ALLOWED INDUSTRIES (STRICT) ---
|
|
You MUST assign the company to exactly ONE of these industries. If unsure, choose the closest match or "Sonstige".
|
|
{json.dumps(self.allowed_industries, ensure_ascii=False)}
|
|
|
|
--- ANALYSIS GUIDELINES (CHAIN OF THOUGHT) ---
|
|
1. **Infrastructure Analysis:** What physical assets does this company likely operate based on their business model?
|
|
- Factories / Production Plants? (-> Needs Cleaning, Security, Intralogistics)
|
|
- Large Warehouses? (-> Needs Intralogistics, Security, Floor Washing)
|
|
- Offices / Headquarters? (-> Needs Vacuuming, Window Cleaning)
|
|
- Critical Infrastructure (Solar Parks, Wind Farms)? (-> Needs Perimeter Security, Inspection)
|
|
- Hotels / Hospitals? (-> Needs Service, Cleaning, Transport)
|
|
|
|
2. **Provider vs. User Distinction (CRITICAL):**
|
|
- If a company SELLS cleaning products (e.g., 3M, Henkel), they do NOT necessarily have a higher need for cleaning robots than any other manufacturer. Do not score them high just because the word "cleaning" appears. Score them based on their *factories*.
|
|
- If a company SELLS security services, they might be a potential PARTNER, but check if they *manage* sites.
|
|
|
|
3. **Scale Assessment:**
|
|
- 5 locations implies more need than 1.
|
|
- "Global player" implies large facilities.
|
|
|
|
--- SCORING CATEGORIES (0-100) ---
|
|
Based on the current strategic focus of Roboplanet:
|
|
|
|
{category_guidance}
|
|
|
|
--- OUTPUT FORMAT (JSON ONLY) ---
|
|
{{
|
|
"industry": "String (from list)",
|
|
"summary": "Concise analysis of their infrastructure and business model (German)",
|
|
"potentials": {{
|
|
"cleaning": {{ "score": 0-100, "reason": "Specific reasoning based on infrastructure (e.g. 'Operates 5 production plants in DE')." }},
|
|
"transport": {{ "score": 0-100, "reason": "..." }},
|
|
"security": {{ "score": 0-100, "reason": "..." }},
|
|
"service": {{ "score": 0-100, "reason": "..." }}
|
|
}}
|
|
}}
|
|
"""
|
|
|
|
try:
|
|
response_text = call_gemini(
|
|
prompt=prompt,
|
|
json_mode=True,
|
|
temperature=0.1 # Very low temp for analytical reasoning
|
|
)
|
|
return json.loads(response_text)
|
|
except Exception as e:
|
|
logger.error(f"Classification failed: {e}")
|
|
return {"error": str(e)}
|