from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, ForeignKey, Float, Boolean, JSON from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker, relationship from datetime import datetime from .config import settings # Setup engine = create_engine(settings.DATABASE_URL, connect_args={"check_same_thread": False}) SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) Base = declarative_base() # ============================================================================== # MODELS # ============================================================================== class Company(Base): __tablename__ = "companies" id = Column(Integer, primary_key=True, index=True) # Core Identity name = Column(String, index=True) website = Column(String, index=True) # Normalized Domain preferred crm_id = Column(String, unique=True, index=True, nullable=True) # Link to D365 # Classification industry_crm = Column(String, nullable=True) # The "allowed" industry industry_ai = Column(String, nullable=True) # The AI suggested industry # Location city = Column(String, nullable=True) country = Column(String, default="DE") # Workflow Status status = Column(String, default="NEW", index=True) # Granular Process Tracking (Timestamps) created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) last_scraped_at = Column(DateTime, nullable=True) last_wiki_search_at = Column(DateTime, nullable=True) last_classification_at = Column(DateTime, nullable=True) last_signal_check_at = Column(DateTime, nullable=True) # NEW: Quantitative Potential Metrics (v0.7.0) calculated_metric_name = Column(String, nullable=True) # e.g., "Anzahl Betten" calculated_metric_value = Column(Float, nullable=True) # e.g., 180.0 calculated_metric_unit = Column(String, nullable=True) # e.g., "Betten" standardized_metric_value = Column(Float, nullable=True) # e.g., 4500.0 standardized_metric_unit = Column(String, nullable=True) # e.g., "m²" metric_source = Column(String, nullable=True) # "website", "wikipedia", "serpapi" metric_proof_text = Column(Text, nullable=True) # Snippet showing the value (e.g. "2,0 Mio Besucher (2020)") metric_source_url = Column(Text, nullable=True) # URL where the proof was found metric_confidence = Column(Float, nullable=True) # 0.0 - 1.0 metric_confidence_reason = Column(Text, nullable=True) # Why is it high/low? # Relationships signals = relationship("Signal", back_populates="company", cascade="all, delete-orphan") enrichment_data = relationship("EnrichmentData", back_populates="company", cascade="all, delete-orphan") reported_mistakes = relationship("ReportedMistake", back_populates="company", cascade="all, delete-orphan") contacts = relationship("Contact", back_populates="company", cascade="all, delete-orphan") class Contact(Base): """ Represents a person associated with a company. """ __tablename__ = "contacts" id = Column(Integer, primary_key=True, index=True) company_id = Column(Integer, ForeignKey("companies.id"), index=True) gender = Column(String) # "männlich", "weiblich" title = Column(String, default="") # "Dr.", "Prof." first_name = Column(String) last_name = Column(String) email = Column(String, index=True) job_title = Column(String) # Visitenkarten-Titel language = Column(String, default="De") # "De", "En" role = Column(String) # Operativer Entscheider, etc. status = Column(String, default="") # Marketing Status is_primary = Column(Boolean, default=False) created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) company = relationship("Company", back_populates="contacts") class Industry(Base): """ Represents a specific industry vertical (Branche). """ __tablename__ = "industries" id = Column(Integer, primary_key=True, index=True) notion_id = Column(String, unique=True, index=True, nullable=True) # Notion Page ID name = Column(String, unique=True, index=True) description = Column(Text, nullable=True) # Definition aus Notion # Notion Sync Fields (V3.0+) status_notion = Column(String, nullable=True) # e.g. "P1 Focus Industry" is_focus = Column(Boolean, default=False) # Derived from status_notion # NEW SCHEMA FIELDS (from MIGRATION_PLAN) metric_type = Column(String, nullable=True) # Unit_Count, Area_in, Area_out min_requirement = Column(Float, nullable=True) whale_threshold = Column(Float, nullable=True) proxy_factor = Column(Float, nullable=True) scraper_search_term = Column(Text, nullable=True) scraper_keywords = Column(Text, nullable=True) # JSON-Array von Strings standardization_logic = Column(Text, nullable=True) # Formel, z.B. "wert * 25m²" # Optional link to a Robotics Category (the "product" relevant for this industry) primary_category_id = Column(Integer, ForeignKey("robotics_categories.id"), nullable=True) created_at = Column(DateTime, default=datetime.utcnow) class JobRoleMapping(Base): """ Maps job title patterns (regex or simple string) to Roles. """ __tablename__ = "job_role_mappings" id = Column(Integer, primary_key=True, index=True) pattern = Column(String, unique=True) # e.g. "%CTO%" or "Technischer Leiter" role = Column(String) # The target Role created_at = Column(DateTime, default=datetime.utcnow) class Signal(Base): """ Represents a specific sales signal or potential. Example: type='has_spa', value='true', proof='Wellnessbereich mit 2000qm' """ __tablename__ = "signals" id = Column(Integer, primary_key=True, index=True) company_id = Column(Integer, ForeignKey("companies.id")) signal_type = Column(String, index=True) # e.g. "robotics_cleaning_potential" confidence = Column(Float, default=0.0) # 0.0 to 1.0 value = Column(String) # "High", "Medium", "Yes", "No" proof_text = Column(Text, nullable=True) # Snippet from website/source created_at = Column(DateTime, default=datetime.utcnow) company = relationship("Company", back_populates="signals") class EnrichmentData(Base): """ Stores raw data blobs (HTML, API responses) to allow re-processing. """ __tablename__ = "enrichment_data" id = Column(Integer, primary_key=True, index=True) company_id = Column(Integer, ForeignKey("companies.id")) source_type = Column(String) # "website_scrape", "wikipedia", "google_serp" content = Column(JSON) # The raw data is_locked = Column(Boolean, default=False) # Manual override flag wiki_verified_empty = Column(Boolean, default=False) # NEW: Mark Wikipedia as definitively empty created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) company = relationship("Company", back_populates="enrichment_data") class RoboticsCategory(Base): """ Stores definitions for robotics categories to allow user customization via UI. """ __tablename__ = "robotics_categories" id = Column(Integer, primary_key=True, index=True) notion_id = Column(String, unique=True, index=True, nullable=True) # Notion Page ID key = Column(String, unique=True, index=True) # e.g. "cleaning", "service" name = Column(String) # Display Name description = Column(Text) # The core definition used in LLM prompts reasoning_guide = Column(Text) # Instructions for the Chain-of-Thought updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) class ImportLog(Base): """ Logs bulk imports (e.g. from Excel lists). """ __tablename__ = "import_logs" id = Column(Integer, primary_key=True) filename = Column(String) import_type = Column(String) # "crm_dump" or "event_list" total_rows = Column(Integer) imported_rows = Column(Integer) duplicate_rows = Column(Integer) created_at = Column(DateTime, default=datetime.utcnow) class ReportedMistake(Base): __tablename__ = "reported_mistakes" id = Column(Integer, primary_key=True, index=True) company_id = Column(Integer, ForeignKey("companies.id"), index=True, nullable=False) field_name = Column(String, nullable=False) wrong_value = Column(Text, nullable=True) corrected_value = Column(Text, nullable=True) source_url = Column(String, nullable=True) quote = Column(Text, nullable=True) user_comment = Column(Text, nullable=True) status = Column(String, default="PENDING", nullable=False) # PENDING, APPROVED, REJECTED created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) company = relationship("Company", back_populates="reported_mistakes") # ============================================================================== # UTILS # ============================================================================== def init_db(): Base.metadata.create_all(bind=engine) init_robotics_defaults() def init_robotics_defaults(): """Seeds the database with default robotics categories if empty.""" db = SessionLocal() try: if db.query(RoboticsCategory).count() == 0: defaults = [ { "key": "cleaning", "name": "Cleaning Robots", "description": "Does the company manage large floors, hospitals, hotels, or public spaces? (Keywords: Hygiene, Cleaning, SPA, Facility Management)", "reasoning_guide": "High (80-100): Large industrial floors, shopping malls, hospitals, airports. Medium (40-79): Mid-sized production, large offices, supermarkets. Low (0-39): Small offices, software consultancies." }, { "key": "transport", "name": "Intralogistics / Transport", "description": "Do they move goods internally? (Keywords: Warehouse, Intralogistics, Production line, Hospital logistics)", "reasoning_guide": "High: Manufacturing, E-Commerce fulfillment, Hospitals. Low: Pure service providers, law firms." }, { "key": "security", "name": "Security & Surveillance", "description": "Do they have large perimeters, solar parks, wind farms, or night patrols? (Keywords: Werkschutz, Security, Monitoring)", "reasoning_guide": "High: Critical infrastructure, large open-air storage, factories with valuable assets, 24/7 operations. Medium: Standard corporate HQs. Low: Offices in shared buildings." }, { "key": "service", "name": "Service / Waiter Robots", "description": "Do they operate restaurants, nursing homes, or event venues where food/items need to be served to people?", "reasoning_guide": "High: Restaurants, Hotels (Room Service), Nursing Homes (Meal delivery). Low: B2B manufacturing, closed offices, pure installation services." } ] for d in defaults: db.add(RoboticsCategory(**d)) db.commit() print("Seeded Robotics Categories.") except Exception as e: print(f"Error seeding robotics defaults: {e}") finally: db.close() def get_db(): db = SessionLocal() try: yield db finally: db.close()