feat(company-explorer): Initial Web UI & Backend with Enrichment Flow

This commit introduces the foundational elements for the new "Company Explorer" web application, marking a significant step away from the legacy Google Sheets / CLI system. Key changes include: - Project Structure: A new directory with separate (FastAPI) and (React/Vite) components. - Data Persistence: Migration from Google Sheets to a local SQLite database () using SQLAlchemy. - Core Utilities: Extraction and cleanup of essential helper functions (LLM wrappers, text utilities) into . - Backend Services: , , for AI-powered analysis, and logic. - Frontend UI: Basic React application with company table, import wizard, and dynamic inspector sidebar. - Docker Integration: Updated and for multi-stage builds and sideloading. - Deployment & Access: Integrated into central Nginx proxy and dashboard, accessible via . Lessons Learned & Fixed during development: - Frontend Asset Loading: Addressed issues with Vite's path and FastAPI's . - TypeScript Configuration: Added and . - Database Schema Evolution: Solved errors by forcing a new database file and correcting override. - Logging: Implemented robust file-based logging (). This new foundation provides a powerful and maintainable platform for future B2B robotics lead generation.
2026-01-07 17:55:08 +00:00
parent 7405c2acb9
commit 2c7bb262ef
51 changed files with 3475 additions and 2 deletions
--- a/company-explorer/backend/database.py
+++ b/company-explorer/backend/database.py
@@ -0,0 +1,113 @@
+from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, ForeignKey, Float, Boolean, JSON
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker, relationship
+from datetime import datetime
+from .config import settings
+
+# Setup
+engine = create_engine(settings.DATABASE_URL, connect_args={"check_same_thread": False})
+SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+Base = declarative_base()
+
+# ==============================================================================
+# MODELS
+# ==============================================================================
+
+class Company(Base):
+    __tablename__ = "companies"
+
+    id = Column(Integer, primary_key=True, index=True)
+    
+    # Core Identity
+    name = Column(String, index=True)
+    website = Column(String, index=True) # Normalized Domain preferred
+    crm_id = Column(String, unique=True, index=True, nullable=True) # Link to D365
+    
+    # Classification
+    industry_crm = Column(String, nullable=True) # The "allowed" industry
+    industry_ai = Column(String, nullable=True)  # The AI suggested industry
+    
+    # Location
+    city = Column(String, nullable=True)
+    country = Column(String, default="DE")
+    
+    # Workflow Status
+    status = Column(String, default="NEW", index=True)
+
+    # Granular Process Tracking (Timestamps)
+    created_at = Column(DateTime, default=datetime.utcnow)
+    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+    
+    last_scraped_at = Column(DateTime, nullable=True)
+    last_wiki_search_at = Column(DateTime, nullable=True)
+    last_classification_at = Column(DateTime, nullable=True)
+    last_signal_check_at = Column(DateTime, nullable=True)
+    
+    # Relationships
+    signals = relationship("Signal", back_populates="company", cascade="all, delete-orphan")
+    enrichment_data = relationship("EnrichmentData", back_populates="company", cascade="all, delete-orphan")
+
+
+class Signal(Base):
+    """
+    Represents a specific sales signal or potential.
+    Example: type='has_spa', value='true', proof='Wellnessbereich mit 2000qm'
+    """
+    __tablename__ = "signals"
+
+    id = Column(Integer, primary_key=True, index=True)
+    company_id = Column(Integer, ForeignKey("companies.id"))
+    
+    signal_type = Column(String, index=True) # e.g. "robotics_cleaning_potential"
+    confidence = Column(Float, default=0.0)  # 0.0 to 1.0
+    value = Column(String) # "High", "Medium", "Yes", "No"
+    proof_text = Column(Text, nullable=True) # Snippet from website/source
+    
+    created_at = Column(DateTime, default=datetime.utcnow)
+    
+    company = relationship("Company", back_populates="signals")
+
+
+class EnrichmentData(Base):
+    """
+    Stores raw data blobs (HTML, API responses) to allow re-processing.
+    """
+    __tablename__ = "enrichment_data"
+
+    id = Column(Integer, primary_key=True, index=True)
+    company_id = Column(Integer, ForeignKey("companies.id"))
+    
+    source_type = Column(String) # "website_scrape", "wikipedia_api", "google_serp"
+    content = Column(JSON)       # The raw data
+    
+    created_at = Column(DateTime, default=datetime.utcnow)
+    
+    company = relationship("Company", back_populates="enrichment_data")
+
+class ImportLog(Base):
+    """
+    Logs bulk imports (e.g. from Excel lists).
+    """
+    __tablename__ = "import_logs"
+    
+    id = Column(Integer, primary_key=True)
+    filename = Column(String)
+    import_type = Column(String) # "crm_dump" or "event_list"
+    total_rows = Column(Integer)
+    imported_rows = Column(Integer)
+    duplicate_rows = Column(Integer)
+    created_at = Column(DateTime, default=datetime.utcnow)
+
+# ==============================================================================
+# UTILS
+# ==============================================================================
+
+def init_db():
+    Base.metadata.create_all(bind=engine)
+
+def get_db():
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()