This commit introduces the foundational elements for the new "Company Explorer" web application, marking a significant step away from the legacy Google Sheets / CLI system. Key changes include: - Project Structure: A new directory with separate (FastAPI) and (React/Vite) components. - Data Persistence: Migration from Google Sheets to a local SQLite database () using SQLAlchemy. - Core Utilities: Extraction and cleanup of essential helper functions (LLM wrappers, text utilities) into . - Backend Services: , , for AI-powered analysis, and logic. - Frontend UI: Basic React application with company table, import wizard, and dynamic inspector sidebar. - Docker Integration: Updated and for multi-stage builds and sideloading. - Deployment & Access: Integrated into central Nginx proxy and dashboard, accessible via . Lessons Learned & Fixed during development: - Frontend Asset Loading: Addressed issues with Vite's path and FastAPI's . - TypeScript Configuration: Added and . - Database Schema Evolution: Solved errors by forcing a new database file and correcting override. - Logging: Implemented robust file-based logging (). This new foundation provides a powerful and maintainable platform for future B2B robotics lead generation.
113 lines
3.9 KiB
Python
113 lines
3.9 KiB
Python
from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, ForeignKey, Float, Boolean, JSON
|
|
from sqlalchemy.ext.declarative import declarative_base
|
|
from sqlalchemy.orm import sessionmaker, relationship
|
|
from datetime import datetime
|
|
from .config import settings
|
|
|
|
# Setup
|
|
engine = create_engine(settings.DATABASE_URL, connect_args={"check_same_thread": False})
|
|
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
|
Base = declarative_base()
|
|
|
|
# ==============================================================================
|
|
# MODELS
|
|
# ==============================================================================
|
|
|
|
class Company(Base):
|
|
__tablename__ = "companies"
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
|
|
# Core Identity
|
|
name = Column(String, index=True)
|
|
website = Column(String, index=True) # Normalized Domain preferred
|
|
crm_id = Column(String, unique=True, index=True, nullable=True) # Link to D365
|
|
|
|
# Classification
|
|
industry_crm = Column(String, nullable=True) # The "allowed" industry
|
|
industry_ai = Column(String, nullable=True) # The AI suggested industry
|
|
|
|
# Location
|
|
city = Column(String, nullable=True)
|
|
country = Column(String, default="DE")
|
|
|
|
# Workflow Status
|
|
status = Column(String, default="NEW", index=True)
|
|
|
|
# Granular Process Tracking (Timestamps)
|
|
created_at = Column(DateTime, default=datetime.utcnow)
|
|
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
|
|
|
last_scraped_at = Column(DateTime, nullable=True)
|
|
last_wiki_search_at = Column(DateTime, nullable=True)
|
|
last_classification_at = Column(DateTime, nullable=True)
|
|
last_signal_check_at = Column(DateTime, nullable=True)
|
|
|
|
# Relationships
|
|
signals = relationship("Signal", back_populates="company", cascade="all, delete-orphan")
|
|
enrichment_data = relationship("EnrichmentData", back_populates="company", cascade="all, delete-orphan")
|
|
|
|
|
|
class Signal(Base):
|
|
"""
|
|
Represents a specific sales signal or potential.
|
|
Example: type='has_spa', value='true', proof='Wellnessbereich mit 2000qm'
|
|
"""
|
|
__tablename__ = "signals"
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
company_id = Column(Integer, ForeignKey("companies.id"))
|
|
|
|
signal_type = Column(String, index=True) # e.g. "robotics_cleaning_potential"
|
|
confidence = Column(Float, default=0.0) # 0.0 to 1.0
|
|
value = Column(String) # "High", "Medium", "Yes", "No"
|
|
proof_text = Column(Text, nullable=True) # Snippet from website/source
|
|
|
|
created_at = Column(DateTime, default=datetime.utcnow)
|
|
|
|
company = relationship("Company", back_populates="signals")
|
|
|
|
|
|
class EnrichmentData(Base):
|
|
"""
|
|
Stores raw data blobs (HTML, API responses) to allow re-processing.
|
|
"""
|
|
__tablename__ = "enrichment_data"
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
company_id = Column(Integer, ForeignKey("companies.id"))
|
|
|
|
source_type = Column(String) # "website_scrape", "wikipedia_api", "google_serp"
|
|
content = Column(JSON) # The raw data
|
|
|
|
created_at = Column(DateTime, default=datetime.utcnow)
|
|
|
|
company = relationship("Company", back_populates="enrichment_data")
|
|
|
|
class ImportLog(Base):
|
|
"""
|
|
Logs bulk imports (e.g. from Excel lists).
|
|
"""
|
|
__tablename__ = "import_logs"
|
|
|
|
id = Column(Integer, primary_key=True)
|
|
filename = Column(String)
|
|
import_type = Column(String) # "crm_dump" or "event_list"
|
|
total_rows = Column(Integer)
|
|
imported_rows = Column(Integer)
|
|
duplicate_rows = Column(Integer)
|
|
created_at = Column(DateTime, default=datetime.utcnow)
|
|
|
|
# ==============================================================================
|
|
# UTILS
|
|
# ==============================================================================
|
|
|
|
def init_db():
|
|
Base.metadata.create_all(bind=engine)
|
|
|
|
def get_db():
|
|
db = SessionLocal()
|
|
try:
|
|
yield db
|
|
finally:
|
|
db.close() |