Files
Brancheneinstufung2/company-explorer/backend/database.py
Floke 2c7bb262ef feat(company-explorer): Initial Web UI & Backend with Enrichment Flow
This commit introduces the foundational elements for the new "Company Explorer" web application, marking a significant step away from the legacy Google Sheets / CLI system.

Key changes include:
- Project Structure: A new  directory with separate  (FastAPI) and  (React/Vite) components.
- Data Persistence: Migration from Google Sheets to a local SQLite database () using SQLAlchemy.
- Core Utilities: Extraction and cleanup of essential helper functions (LLM wrappers, text utilities) into .
- Backend Services: , ,  for AI-powered analysis, and  logic.
- Frontend UI: Basic React application with company table, import wizard, and dynamic inspector sidebar.
- Docker Integration: Updated  and  for multi-stage builds and sideloading.
- Deployment & Access: Integrated into central Nginx proxy and dashboard, accessible via .

Lessons Learned & Fixed during development:
- Frontend Asset Loading: Addressed issues with Vite's  path and FastAPI's .
- TypeScript Configuration: Added  and .
- Database Schema Evolution: Solved  errors by forcing a new database file and correcting  override.
- Logging: Implemented robust file-based logging ().

This new foundation provides a powerful and maintainable platform for future B2B robotics lead generation.
2026-01-07 17:55:08 +00:00

113 lines
3.9 KiB
Python

from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, ForeignKey, Float, Boolean, JSON
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, relationship
from datetime import datetime
from .config import settings
# Setup
engine = create_engine(settings.DATABASE_URL, connect_args={"check_same_thread": False})
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base()
# ==============================================================================
# MODELS
# ==============================================================================
class Company(Base):
__tablename__ = "companies"
id = Column(Integer, primary_key=True, index=True)
# Core Identity
name = Column(String, index=True)
website = Column(String, index=True) # Normalized Domain preferred
crm_id = Column(String, unique=True, index=True, nullable=True) # Link to D365
# Classification
industry_crm = Column(String, nullable=True) # The "allowed" industry
industry_ai = Column(String, nullable=True) # The AI suggested industry
# Location
city = Column(String, nullable=True)
country = Column(String, default="DE")
# Workflow Status
status = Column(String, default="NEW", index=True)
# Granular Process Tracking (Timestamps)
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
last_scraped_at = Column(DateTime, nullable=True)
last_wiki_search_at = Column(DateTime, nullable=True)
last_classification_at = Column(DateTime, nullable=True)
last_signal_check_at = Column(DateTime, nullable=True)
# Relationships
signals = relationship("Signal", back_populates="company", cascade="all, delete-orphan")
enrichment_data = relationship("EnrichmentData", back_populates="company", cascade="all, delete-orphan")
class Signal(Base):
"""
Represents a specific sales signal or potential.
Example: type='has_spa', value='true', proof='Wellnessbereich mit 2000qm'
"""
__tablename__ = "signals"
id = Column(Integer, primary_key=True, index=True)
company_id = Column(Integer, ForeignKey("companies.id"))
signal_type = Column(String, index=True) # e.g. "robotics_cleaning_potential"
confidence = Column(Float, default=0.0) # 0.0 to 1.0
value = Column(String) # "High", "Medium", "Yes", "No"
proof_text = Column(Text, nullable=True) # Snippet from website/source
created_at = Column(DateTime, default=datetime.utcnow)
company = relationship("Company", back_populates="signals")
class EnrichmentData(Base):
"""
Stores raw data blobs (HTML, API responses) to allow re-processing.
"""
__tablename__ = "enrichment_data"
id = Column(Integer, primary_key=True, index=True)
company_id = Column(Integer, ForeignKey("companies.id"))
source_type = Column(String) # "website_scrape", "wikipedia_api", "google_serp"
content = Column(JSON) # The raw data
created_at = Column(DateTime, default=datetime.utcnow)
company = relationship("Company", back_populates="enrichment_data")
class ImportLog(Base):
"""
Logs bulk imports (e.g. from Excel lists).
"""
__tablename__ = "import_logs"
id = Column(Integer, primary_key=True)
filename = Column(String)
import_type = Column(String) # "crm_dump" or "event_list"
total_rows = Column(Integer)
imported_rows = Column(Integer)
duplicate_rows = Column(Integer)
created_at = Column(DateTime, default=datetime.utcnow)
# ==============================================================================
# UTILS
# ==============================================================================
def init_db():
Base.metadata.create_all(bind=engine)
def get_db():
db = SessionLocal()
try:
yield db
finally:
db.close()