feat(company-explorer): add wikipedia integration, robotics settings, and manual overrides

- Ported robust Wikipedia extraction logic (categories, first paragraph) from legacy system.
- Implemented database-driven Robotics Category configuration with frontend settings UI.
- Updated Robotics Potential analysis to use Chain-of-Thought infrastructure reasoning.
- Added Manual Override features for Wikipedia URL (with locking) and Website URL (with re-scrape trigger).
- Enhanced Inspector UI with Wikipedia profile, category tags, and action buttons.
This commit is contained in:
2026-01-08 10:08:21 +00:00
parent 6fda69a611
commit 565c56dc9a
12 changed files with 1320 additions and 160 deletions

View File

@@ -77,13 +77,30 @@ class EnrichmentData(Base):
id = Column(Integer, primary_key=True, index=True)
company_id = Column(Integer, ForeignKey("companies.id"))
source_type = Column(String) # "website_scrape", "wikipedia_api", "google_serp"
source_type = Column(String) # "website_scrape", "wikipedia", "google_serp"
content = Column(JSON) # The raw data
is_locked = Column(Boolean, default=False) # Manual override flag
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
company = relationship("Company", back_populates="enrichment_data")
class RoboticsCategory(Base):
"""
Stores definitions for robotics categories to allow user customization via UI.
"""
__tablename__ = "robotics_categories"
id = Column(Integer, primary_key=True, index=True)
key = Column(String, unique=True, index=True) # e.g. "cleaning", "service"
name = Column(String) # Display Name
description = Column(Text) # The core definition used in LLM prompts
reasoning_guide = Column(Text) # Instructions for the Chain-of-Thought
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
class ImportLog(Base):
"""
Logs bulk imports (e.g. from Excel lists).
@@ -104,6 +121,47 @@ class ImportLog(Base):
def init_db():
Base.metadata.create_all(bind=engine)
init_robotics_defaults()
def init_robotics_defaults():
"""Seeds the database with default robotics categories if empty."""
db = SessionLocal()
try:
if db.query(RoboticsCategory).count() == 0:
defaults = [
{
"key": "cleaning",
"name": "Cleaning Robots",
"description": "Does the company manage large floors, hospitals, hotels, or public spaces? (Keywords: Hygiene, Cleaning, SPA, Facility Management)",
"reasoning_guide": "High (80-100): Large industrial floors, shopping malls, hospitals, airports. Medium (40-79): Mid-sized production, large offices, supermarkets. Low (0-39): Small offices, software consultancies."
},
{
"key": "transport",
"name": "Intralogistics / Transport",
"description": "Do they move goods internally? (Keywords: Warehouse, Intralogistics, Production line, Hospital logistics)",
"reasoning_guide": "High: Manufacturing, E-Commerce fulfillment, Hospitals. Low: Pure service providers, law firms."
},
{
"key": "security",
"name": "Security & Surveillance",
"description": "Do they have large perimeters, solar parks, wind farms, or night patrols? (Keywords: Werkschutz, Security, Monitoring)",
"reasoning_guide": "High: Critical infrastructure, large open-air storage, factories with valuable assets, 24/7 operations. Medium: Standard corporate HQs. Low: Offices in shared buildings."
},
{
"key": "service",
"name": "Service / Waiter Robots",
"description": "Do they operate restaurants, nursing homes, or event venues where food/items need to be served to people?",
"reasoning_guide": "High: Restaurants, Hotels (Room Service), Nursing Homes (Meal delivery). Low: B2B manufacturing, closed offices, pure installation services."
}
]
for d in defaults:
db.add(RoboticsCategory(**d))
db.commit()
print("Seeded Robotics Categories.")
except Exception as e:
print(f"Error seeding robotics defaults: {e}")
finally:
db.close()
def get_db():
db = SessionLocal()