- Fixed a critical in the company-explorer by forcing a database re-initialization with a new file (). This ensures the application code is in sync with the database schema. - Documented the schema mismatch incident and its resolution in MIGRATION_PLAN.md. - Restored and enhanced BUILDER_APPS_MIGRATION.md by recovering extensive, valuable content from the git history that was accidentally deleted. The guide now again includes detailed troubleshooting steps and code templates for common migration pitfalls.
231 lines
9.1 KiB
Python
231 lines
9.1 KiB
Python
from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, ForeignKey, Float, Boolean, JSON
|
|
from sqlalchemy.ext.declarative import declarative_base
|
|
from sqlalchemy.orm import sessionmaker, relationship
|
|
from datetime import datetime
|
|
from .config import settings
|
|
|
|
# Setup
|
|
engine = create_engine(settings.DATABASE_URL, connect_args={"check_same_thread": False})
|
|
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
|
Base = declarative_base()
|
|
|
|
# ==============================================================================
|
|
# MODELS
|
|
# ==============================================================================
|
|
|
|
class Company(Base):
|
|
__tablename__ = "companies"
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
|
|
# Core Identity
|
|
name = Column(String, index=True)
|
|
website = Column(String, index=True) # Normalized Domain preferred
|
|
crm_id = Column(String, unique=True, index=True, nullable=True) # Link to D365
|
|
|
|
# Classification
|
|
industry_crm = Column(String, nullable=True) # The "allowed" industry
|
|
industry_ai = Column(String, nullable=True) # The AI suggested industry
|
|
|
|
# Location
|
|
city = Column(String, nullable=True)
|
|
country = Column(String, default="DE")
|
|
|
|
# Workflow Status
|
|
status = Column(String, default="NEW", index=True)
|
|
|
|
# Granular Process Tracking (Timestamps)
|
|
created_at = Column(DateTime, default=datetime.utcnow)
|
|
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
|
|
|
last_scraped_at = Column(DateTime, nullable=True)
|
|
last_wiki_search_at = Column(DateTime, nullable=True)
|
|
last_classification_at = Column(DateTime, nullable=True)
|
|
last_signal_check_at = Column(DateTime, nullable=True)
|
|
|
|
# Relationships
|
|
signals = relationship("Signal", back_populates="company", cascade="all, delete-orphan")
|
|
enrichment_data = relationship("EnrichmentData", back_populates="company", cascade="all, delete-orphan")
|
|
contacts = relationship("Contact", back_populates="company", cascade="all, delete-orphan")
|
|
|
|
|
|
class Contact(Base):
|
|
"""
|
|
Represents a person associated with a company.
|
|
"""
|
|
__tablename__ = "contacts"
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
company_id = Column(Integer, ForeignKey("companies.id"), index=True)
|
|
|
|
gender = Column(String) # "männlich", "weiblich"
|
|
title = Column(String, default="") # "Dr.", "Prof."
|
|
first_name = Column(String)
|
|
last_name = Column(String)
|
|
email = Column(String, index=True)
|
|
job_title = Column(String) # Visitenkarten-Titel
|
|
language = Column(String, default="De") # "De", "En"
|
|
|
|
role = Column(String) # Operativer Entscheider, etc.
|
|
status = Column(String, default="") # Marketing Status
|
|
|
|
is_primary = Column(Boolean, default=False)
|
|
|
|
created_at = Column(DateTime, default=datetime.utcnow)
|
|
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
|
|
|
company = relationship("Company", back_populates="contacts")
|
|
|
|
|
|
class Industry(Base):
|
|
"""
|
|
Represents a specific industry vertical (Branche).
|
|
"""
|
|
__tablename__ = "industries"
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
name = Column(String, unique=True, index=True)
|
|
description = Column(Text, nullable=True) # Abgrenzung
|
|
is_focus = Column(Boolean, default=False)
|
|
|
|
# Optional link to a Robotics Category (the "product" relevant for this industry)
|
|
primary_category_id = Column(Integer, ForeignKey("robotics_categories.id"), nullable=True)
|
|
|
|
created_at = Column(DateTime, default=datetime.utcnow)
|
|
|
|
|
|
class JobRoleMapping(Base):
|
|
"""
|
|
Maps job title patterns (regex or simple string) to Roles.
|
|
"""
|
|
__tablename__ = "job_role_mappings"
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
pattern = Column(String, unique=True) # e.g. "%CTO%" or "Technischer Leiter"
|
|
role = Column(String) # The target Role
|
|
|
|
created_at = Column(DateTime, default=datetime.utcnow)
|
|
|
|
|
|
class Signal(Base):
|
|
"""
|
|
Represents a specific sales signal or potential.
|
|
Example: type='has_spa', value='true', proof='Wellnessbereich mit 2000qm'
|
|
"""
|
|
__tablename__ = "signals"
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
company_id = Column(Integer, ForeignKey("companies.id"))
|
|
|
|
signal_type = Column(String, index=True) # e.g. "robotics_cleaning_potential"
|
|
confidence = Column(Float, default=0.0) # 0.0 to 1.0
|
|
value = Column(String) # "High", "Medium", "Yes", "No"
|
|
proof_text = Column(Text, nullable=True) # Snippet from website/source
|
|
|
|
created_at = Column(DateTime, default=datetime.utcnow)
|
|
|
|
company = relationship("Company", back_populates="signals")
|
|
|
|
|
|
class EnrichmentData(Base):
|
|
"""
|
|
Stores raw data blobs (HTML, API responses) to allow re-processing.
|
|
"""
|
|
__tablename__ = "enrichment_data"
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
company_id = Column(Integer, ForeignKey("companies.id"))
|
|
|
|
source_type = Column(String) # "website_scrape", "wikipedia", "google_serp"
|
|
content = Column(JSON) # The raw data
|
|
is_locked = Column(Boolean, default=False) # Manual override flag
|
|
wiki_verified_empty = Column(Boolean, default=False) # NEW: Mark Wikipedia as definitively empty
|
|
|
|
created_at = Column(DateTime, default=datetime.utcnow)
|
|
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
|
|
|
company = relationship("Company", back_populates="enrichment_data")
|
|
|
|
|
|
class RoboticsCategory(Base):
|
|
"""
|
|
Stores definitions for robotics categories to allow user customization via UI.
|
|
"""
|
|
__tablename__ = "robotics_categories"
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
key = Column(String, unique=True, index=True) # e.g. "cleaning", "service"
|
|
name = Column(String) # Display Name
|
|
description = Column(Text) # The core definition used in LLM prompts
|
|
reasoning_guide = Column(Text) # Instructions for the Chain-of-Thought
|
|
|
|
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
|
|
|
class ImportLog(Base):
|
|
"""
|
|
Logs bulk imports (e.g. from Excel lists).
|
|
"""
|
|
__tablename__ = "import_logs"
|
|
|
|
id = Column(Integer, primary_key=True)
|
|
filename = Column(String)
|
|
import_type = Column(String) # "crm_dump" or "event_list"
|
|
total_rows = Column(Integer)
|
|
imported_rows = Column(Integer)
|
|
duplicate_rows = Column(Integer)
|
|
created_at = Column(DateTime, default=datetime.utcnow)
|
|
|
|
# ==============================================================================
|
|
# UTILS
|
|
# ==============================================================================
|
|
|
|
def init_db():
|
|
Base.metadata.create_all(bind=engine)
|
|
init_robotics_defaults()
|
|
|
|
def init_robotics_defaults():
|
|
"""Seeds the database with default robotics categories if empty."""
|
|
db = SessionLocal()
|
|
try:
|
|
if db.query(RoboticsCategory).count() == 0:
|
|
defaults = [
|
|
{
|
|
"key": "cleaning",
|
|
"name": "Cleaning Robots",
|
|
"description": "Does the company manage large floors, hospitals, hotels, or public spaces? (Keywords: Hygiene, Cleaning, SPA, Facility Management)",
|
|
"reasoning_guide": "High (80-100): Large industrial floors, shopping malls, hospitals, airports. Medium (40-79): Mid-sized production, large offices, supermarkets. Low (0-39): Small offices, software consultancies."
|
|
},
|
|
{
|
|
"key": "transport",
|
|
"name": "Intralogistics / Transport",
|
|
"description": "Do they move goods internally? (Keywords: Warehouse, Intralogistics, Production line, Hospital logistics)",
|
|
"reasoning_guide": "High: Manufacturing, E-Commerce fulfillment, Hospitals. Low: Pure service providers, law firms."
|
|
},
|
|
{
|
|
"key": "security",
|
|
"name": "Security & Surveillance",
|
|
"description": "Do they have large perimeters, solar parks, wind farms, or night patrols? (Keywords: Werkschutz, Security, Monitoring)",
|
|
"reasoning_guide": "High: Critical infrastructure, large open-air storage, factories with valuable assets, 24/7 operations. Medium: Standard corporate HQs. Low: Offices in shared buildings."
|
|
},
|
|
{
|
|
"key": "service",
|
|
"name": "Service / Waiter Robots",
|
|
"description": "Do they operate restaurants, nursing homes, or event venues where food/items need to be served to people?",
|
|
"reasoning_guide": "High: Restaurants, Hotels (Room Service), Nursing Homes (Meal delivery). Low: B2B manufacturing, closed offices, pure installation services."
|
|
}
|
|
]
|
|
for d in defaults:
|
|
db.add(RoboticsCategory(**d))
|
|
db.commit()
|
|
print("Seeded Robotics Categories.")
|
|
except Exception as e:
|
|
print(f"Error seeding robotics defaults: {e}")
|
|
finally:
|
|
db.close()
|
|
|
|
def get_db():
|
|
db = SessionLocal()
|
|
try:
|
|
yield db
|
|
finally:
|
|
db.close() |