from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, ForeignKey, Float, Boolean, JSON from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker, relationship from datetime import datetime from .config import settings # Setup engine = create_engine(settings.DATABASE_URL, connect_args={"check_same_thread": False}) SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) Base = declarative_base() # ============================================================================== # MODELS # ============================================================================== class Company(Base): __tablename__ = "companies" id = Column(Integer, primary_key=True, index=True) # Core Identity name = Column(String, index=True) website = Column(String, index=True) # Normalized Domain preferred crm_id = Column(String, unique=True, index=True, nullable=True) # Link to D365 # Classification industry_crm = Column(String, nullable=True) # The "allowed" industry industry_ai = Column(String, nullable=True) # The AI suggested industry # Location city = Column(String, nullable=True) country = Column(String, default="DE") # Workflow Status status = Column(String, default="NEW", index=True) # Granular Process Tracking (Timestamps) created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) last_scraped_at = Column(DateTime, nullable=True) last_wiki_search_at = Column(DateTime, nullable=True) last_classification_at = Column(DateTime, nullable=True) last_signal_check_at = Column(DateTime, nullable=True) # Relationships signals = relationship("Signal", back_populates="company", cascade="all, delete-orphan") enrichment_data = relationship("EnrichmentData", back_populates="company", cascade="all, delete-orphan") class Signal(Base): """ Represents a specific sales signal or potential. Example: type='has_spa', value='true', proof='Wellnessbereich mit 2000qm' """ __tablename__ = "signals" id = Column(Integer, primary_key=True, index=True) company_id = Column(Integer, ForeignKey("companies.id")) signal_type = Column(String, index=True) # e.g. "robotics_cleaning_potential" confidence = Column(Float, default=0.0) # 0.0 to 1.0 value = Column(String) # "High", "Medium", "Yes", "No" proof_text = Column(Text, nullable=True) # Snippet from website/source created_at = Column(DateTime, default=datetime.utcnow) company = relationship("Company", back_populates="signals") class EnrichmentData(Base): """ Stores raw data blobs (HTML, API responses) to allow re-processing. """ __tablename__ = "enrichment_data" id = Column(Integer, primary_key=True, index=True) company_id = Column(Integer, ForeignKey("companies.id")) source_type = Column(String) # "website_scrape", "wikipedia_api", "google_serp" content = Column(JSON) # The raw data created_at = Column(DateTime, default=datetime.utcnow) company = relationship("Company", back_populates="enrichment_data") class ImportLog(Base): """ Logs bulk imports (e.g. from Excel lists). """ __tablename__ = "import_logs" id = Column(Integer, primary_key=True) filename = Column(String) import_type = Column(String) # "crm_dump" or "event_list" total_rows = Column(Integer) imported_rows = Column(Integer) duplicate_rows = Column(Integer) created_at = Column(DateTime, default=datetime.utcnow) # ============================================================================== # UTILS # ============================================================================== def init_db(): Base.metadata.create_all(bind=engine) def get_db(): db = SessionLocal() try: yield db finally: db.close()