feat(company-explorer): Initial Web UI & Backend with Enrichment Flow
This commit introduces the foundational elements for the new "Company Explorer" web application, marking a significant step away from the legacy Google Sheets / CLI system. Key changes include: - Project Structure: A new directory with separate (FastAPI) and (React/Vite) components. - Data Persistence: Migration from Google Sheets to a local SQLite database () using SQLAlchemy. - Core Utilities: Extraction and cleanup of essential helper functions (LLM wrappers, text utilities) into . - Backend Services: , , for AI-powered analysis, and logic. - Frontend UI: Basic React application with company table, import wizard, and dynamic inspector sidebar. - Docker Integration: Updated and for multi-stage builds and sideloading. - Deployment & Access: Integrated into central Nginx proxy and dashboard, accessible via . Lessons Learned & Fixed during development: - Frontend Asset Loading: Addressed issues with Vite's path and FastAPI's . - TypeScript Configuration: Added and . - Database Schema Evolution: Solved errors by forcing a new database file and correcting override. - Logging: Implemented robust file-based logging (). This new foundation provides a powerful and maintainable platform for future B2B robotics lead generation.
This commit is contained in:
113
company-explorer/backend/database.py
Normal file
113
company-explorer/backend/database.py
Normal file
@@ -0,0 +1,113 @@
|
||||
from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, ForeignKey, Float, Boolean, JSON
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy.orm import sessionmaker, relationship
|
||||
from datetime import datetime
|
||||
from .config import settings
|
||||
|
||||
# Setup
|
||||
engine = create_engine(settings.DATABASE_URL, connect_args={"check_same_thread": False})
|
||||
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
||||
Base = declarative_base()
|
||||
|
||||
# ==============================================================================
|
||||
# MODELS
|
||||
# ==============================================================================
|
||||
|
||||
class Company(Base):
|
||||
__tablename__ = "companies"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
|
||||
# Core Identity
|
||||
name = Column(String, index=True)
|
||||
website = Column(String, index=True) # Normalized Domain preferred
|
||||
crm_id = Column(String, unique=True, index=True, nullable=True) # Link to D365
|
||||
|
||||
# Classification
|
||||
industry_crm = Column(String, nullable=True) # The "allowed" industry
|
||||
industry_ai = Column(String, nullable=True) # The AI suggested industry
|
||||
|
||||
# Location
|
||||
city = Column(String, nullable=True)
|
||||
country = Column(String, default="DE")
|
||||
|
||||
# Workflow Status
|
||||
status = Column(String, default="NEW", index=True)
|
||||
|
||||
# Granular Process Tracking (Timestamps)
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
last_scraped_at = Column(DateTime, nullable=True)
|
||||
last_wiki_search_at = Column(DateTime, nullable=True)
|
||||
last_classification_at = Column(DateTime, nullable=True)
|
||||
last_signal_check_at = Column(DateTime, nullable=True)
|
||||
|
||||
# Relationships
|
||||
signals = relationship("Signal", back_populates="company", cascade="all, delete-orphan")
|
||||
enrichment_data = relationship("EnrichmentData", back_populates="company", cascade="all, delete-orphan")
|
||||
|
||||
|
||||
class Signal(Base):
|
||||
"""
|
||||
Represents a specific sales signal or potential.
|
||||
Example: type='has_spa', value='true', proof='Wellnessbereich mit 2000qm'
|
||||
"""
|
||||
__tablename__ = "signals"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
company_id = Column(Integer, ForeignKey("companies.id"))
|
||||
|
||||
signal_type = Column(String, index=True) # e.g. "robotics_cleaning_potential"
|
||||
confidence = Column(Float, default=0.0) # 0.0 to 1.0
|
||||
value = Column(String) # "High", "Medium", "Yes", "No"
|
||||
proof_text = Column(Text, nullable=True) # Snippet from website/source
|
||||
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
|
||||
company = relationship("Company", back_populates="signals")
|
||||
|
||||
|
||||
class EnrichmentData(Base):
|
||||
"""
|
||||
Stores raw data blobs (HTML, API responses) to allow re-processing.
|
||||
"""
|
||||
__tablename__ = "enrichment_data"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
company_id = Column(Integer, ForeignKey("companies.id"))
|
||||
|
||||
source_type = Column(String) # "website_scrape", "wikipedia_api", "google_serp"
|
||||
content = Column(JSON) # The raw data
|
||||
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
|
||||
company = relationship("Company", back_populates="enrichment_data")
|
||||
|
||||
class ImportLog(Base):
|
||||
"""
|
||||
Logs bulk imports (e.g. from Excel lists).
|
||||
"""
|
||||
__tablename__ = "import_logs"
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
filename = Column(String)
|
||||
import_type = Column(String) # "crm_dump" or "event_list"
|
||||
total_rows = Column(Integer)
|
||||
imported_rows = Column(Integer)
|
||||
duplicate_rows = Column(Integer)
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
|
||||
# ==============================================================================
|
||||
# UTILS
|
||||
# ==============================================================================
|
||||
|
||||
def init_db():
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
def get_db():
|
||||
db = SessionLocal()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
Reference in New Issue
Block a user