feat(company-explorer): Initial Web UI & Backend with Enrichment Flow

This commit introduces the foundational elements for the new "Company Explorer" web application, marking a significant step away from the legacy Google Sheets / CLI system.

Key changes include:
- Project Structure: A new  directory with separate  (FastAPI) and  (React/Vite) components.
- Data Persistence: Migration from Google Sheets to a local SQLite database () using SQLAlchemy.
- Core Utilities: Extraction and cleanup of essential helper functions (LLM wrappers, text utilities) into .
- Backend Services: , ,  for AI-powered analysis, and  logic.
- Frontend UI: Basic React application with company table, import wizard, and dynamic inspector sidebar.
- Docker Integration: Updated  and  for multi-stage builds and sideloading.
- Deployment & Access: Integrated into central Nginx proxy and dashboard, accessible via .

Lessons Learned & Fixed during development:
- Frontend Asset Loading: Addressed issues with Vite's  path and FastAPI's .
- TypeScript Configuration: Added  and .
- Database Schema Evolution: Solved  errors by forcing a new database file and correcting  override.
- Logging: Implemented robust file-based logging ().

This new foundation provides a powerful and maintainable platform for future B2B robotics lead generation.
This commit is contained in:
2026-01-07 17:55:08 +00:00
parent e27cc995f6
commit c6a37a3c17
51 changed files with 3475 additions and 2 deletions

View File

@@ -0,0 +1,36 @@
# --- STAGE 1: Build Frontend ---
FROM node:20-slim AS frontend-builder
WORKDIR /build
COPY frontend/package*.json ./
RUN npm install
COPY frontend/ ./
RUN npm run build
# --- STAGE 2: Backend & Runtime ---
FROM python:3.11-slim
WORKDIR /app
# System Dependencies
RUN apt-get update && apt-get install -y \
build-essential \
&& rm -rf /var/lib/apt/lists/*
# Copy Requirements & Install
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy Built Frontend from Stage 1 (To a safe location outside /app)
COPY --from=frontend-builder /build/dist /frontend_static
# Copy Backend Source
COPY backend ./backend
# Environment Variables
ENV PYTHONPATH=/app
ENV PYTHONUNBUFFERED=1
# Expose Port
EXPOSE 8000
# Start FastAPI
CMD ["uvicorn", "backend.app:app", "--host", "0.0.0.0", "--port", "8000"]

View File

@@ -0,0 +1,314 @@
from fastapi import FastAPI, Depends, HTTPException, Query, BackgroundTasks
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from sqlalchemy.orm import Session, joinedload
from typing import List, Optional, Dict, Any
from pydantic import BaseModel
from datetime import datetime
import os
import sys
from .config import settings
from .lib.logging_setup import setup_logging
# Setup Logging first
setup_logging()
import logging
logger = logging.getLogger(__name__)
from .database import init_db, get_db, Company, Signal, EnrichmentData
from .services.deduplication import Deduplicator
from .services.discovery import DiscoveryService
from .services.scraping import ScraperService
from .services.classification import ClassificationService
# Initialize App
app = FastAPI(
title=settings.APP_NAME,
version=settings.VERSION,
description="Backend for Company Explorer (Robotics Edition)",
root_path="/ce"
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Service Singletons
scraper = ScraperService()
classifier = ClassificationService()
discovery = DiscoveryService()
# --- Pydantic Models ---
class CompanyCreate(BaseModel):
name: str
city: Optional[str] = None
country: str = "DE"
website: Optional[str] = None
class BulkImportRequest(BaseModel):
names: List[str]
class AnalysisRequest(BaseModel):
company_id: int
force_scrape: bool = False
# --- Events ---
@app.on_event("startup")
def on_startup():
logger.info("Startup Event: Initializing Database...")
try:
init_db()
logger.info("Database initialized successfully.")
except Exception as e:
logger.critical(f"Database init failed: {e}", exc_info=True)
# --- Routes ---
@app.get("/api/health")
def health_check():
return {"status": "ok", "version": settings.VERSION, "db": settings.DATABASE_URL}
@app.get("/api/companies")
def list_companies(
skip: int = 0,
limit: int = 50,
search: Optional[str] = None,
db: Session = Depends(get_db)
):
try:
query = db.query(Company)
if search:
query = query.filter(Company.name.ilike(f"%{search}%"))
total = query.count()
# Sort by ID desc (newest first)
items = query.order_by(Company.id.desc()).offset(skip).limit(limit).all()
return {"total": total, "items": items}
except Exception as e:
logger.error(f"List Companies Error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/companies/{company_id}")
def get_company(company_id: int, db: Session = Depends(get_db)):
company = db.query(Company).options(joinedload(Company.signals)).filter(Company.id == company_id).first()
if not company:
raise HTTPException(status_code=404, detail="Company not found")
return company
@app.post("/api/companies/bulk")
def bulk_import_names(req: BulkImportRequest, db: Session = Depends(get_db)):
"""
Quick import for testing. Just a list of names.
"""
logger.info(f"Starting bulk import of {len(req.names)} names.")
try:
added = 0
skipped = 0
# Deduplicator init
try:
dedup = Deduplicator(db)
logger.info("Deduplicator initialized.")
except Exception as e:
logger.warning(f"Deduplicator init failed: {e}")
dedup = None
for name in req.names:
clean_name = name.strip()
if not clean_name: continue
# 1. Simple Deduplication (Exact Name)
exists = db.query(Company).filter(Company.name == clean_name).first()
if exists:
skipped += 1
continue
# 2. Smart Deduplication (if available)
if dedup:
matches = dedup.find_duplicates({"name": clean_name})
if matches and matches[0]['score'] > 95:
logger.info(f"Duplicate found for {clean_name}: {matches[0]['name']}")
skipped += 1
continue
# 3. Create
new_comp = Company(
name=clean_name,
status="NEW" # This triggered the error before
)
db.add(new_comp)
added += 1
db.commit()
logger.info(f"Import success. Added: {added}, Skipped: {skipped}")
return {"added": added, "skipped": skipped}
except Exception as e:
logger.error(f"Bulk Import Failed: {e}", exc_info=True)
db.rollback()
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/enrich/discover")
def discover_company(req: AnalysisRequest, background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
"""
Triggers Stage 1: Discovery (Website Search + Wikipedia Search)
"""
try:
company = db.query(Company).filter(Company.id == req.company_id).first()
if not company:
raise HTTPException(404, "Company not found")
# Run in background
background_tasks.add_task(run_discovery_task, company.id)
return {"status": "queued", "message": f"Discovery started for {company.name}"}
except Exception as e:
logger.error(f"Discovery Error: {e}")
raise HTTPException(status_code=500, detail=str(e))
def run_discovery_task(company_id: int):
# New Session for Background Task
from .database import SessionLocal
db = SessionLocal()
try:
company = db.query(Company).filter(Company.id == company_id).first()
if not company: return
logger.info(f"Running Discovery Task for {company.name}")
# 1. Website Search
if not company.website or company.website == "k.A.":
found_url = discovery.find_company_website(company.name, company.city)
if found_url and found_url != "k.A.":
company.website = found_url
logger.info(f"-> Found URL: {found_url}")
# 2. Wikipedia Search
wiki_url = discovery.find_wikipedia_url(company.name)
company.last_wiki_search_at = datetime.utcnow()
existing_wiki = db.query(EnrichmentData).filter(
EnrichmentData.company_id == company.id,
EnrichmentData.source_type == "wikipedia_url"
).first()
if not existing_wiki:
db.add(EnrichmentData(company_id=company.id, source_type="wikipedia_url", content={"url": wiki_url}))
else:
existing_wiki.content = {"url": wiki_url}
existing_wiki.updated_at = datetime.utcnow()
if company.status == "NEW" and company.website and company.website != "k.A.":
company.status = "DISCOVERED"
db.commit()
logger.info(f"Discovery finished for {company.id}")
except Exception as e:
logger.error(f"Background Task Error: {e}", exc_info=True)
db.rollback()
finally:
db.close()
@app.post("/api/enrich/analyze")
def analyze_company(req: AnalysisRequest, background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
company = db.query(Company).filter(Company.id == req.company_id).first()
if not company:
raise HTTPException(404, "Company not found")
if not company.website or company.website == "k.A.":
return {"error": "No website to analyze. Run Discovery first."}
background_tasks.add_task(run_analysis_task, company.id, company.website)
return {"status": "queued"}
def run_analysis_task(company_id: int, url: str):
from .database import SessionLocal
db = SessionLocal()
try:
company = db.query(Company).filter(Company.id == company_id).first()
if not company: return
logger.info(f"Running Analysis Task for {company.name}")
# 1. Scrape Website
scrape_result = scraper.scrape_url(url)
# Save Scrape Data
existing_scrape_data = db.query(EnrichmentData).filter(
EnrichmentData.company_id == company.id,
EnrichmentData.source_type == "website_scrape"
).first()
if "text" in scrape_result and scrape_result["text"]:
if not existing_scrape_data:
db.add(EnrichmentData(company_id=company.id, source_type="website_scrape", content=scrape_result))
else:
existing_scrape_data.content = scrape_result
existing_scrape_data.updated_at = datetime.utcnow()
elif "error" in scrape_result:
logger.warning(f"Scraping failed for {company.name}: {scrape_result['error']}")
# 2. Classify Robotics Potential
if "text" in scrape_result and scrape_result["text"]:
analysis = classifier.analyze_robotics_potential(
company_name=company.name,
website_text=scrape_result["text"]
)
if "error" in analysis:
logger.error(f"Robotics classification failed for {company.name}: {analysis['error']}")
else:
industry = analysis.get("industry")
if industry:
company.industry_ai = industry
# Delete old signals
db.query(Signal).filter(Signal.company_id == company.id).delete()
# Save new signals
potentials = analysis.get("potentials", {})
for signal_type, data in potentials.items():
new_signal = Signal(
company_id=company.id,
signal_type=f"robotics_{signal_type}_potential",
confidence=data.get("score", 0),
value="High" if data.get("score", 0) > 70 else "Medium" if data.get("score", 0) > 30 else "Low",
proof_text=data.get("reason")
)
db.add(new_signal)
company.status = "ENRICHED"
company.last_classification_at = datetime.utcnow()
logger.info(f"Robotics analysis complete for {company.name}.")
db.commit()
logger.info(f"Analysis finished for {company.id}")
except Exception as e:
logger.error(f"Analyze Task Error: {e}", exc_info=True)
db.rollback()
finally:
db.close()
# --- Serve Frontend ---
# Priority 1: Container Path (outside of /app volume)
static_path = "/frontend_static"
# Priority 2: Local Dev Path (relative to this file)
if not os.path.exists(static_path):
static_path = os.path.join(os.path.dirname(__file__), "../static")
if os.path.exists(static_path):
logger.info(f"Serving frontend from {static_path}")
app.mount("/", StaticFiles(directory=static_path, html=True), name="static")
else:
logger.warning(f"Frontend static files not found at {static_path} or local fallback.")
if __name__ == "__main__":
import uvicorn
uvicorn.run("backend.app:app", host="0.0.0.0", port=8000, reload=True)

View File

@@ -0,0 +1,63 @@
import os
import logging
from typing import Optional
# Versuche Pydantic zu nutzen, Fallback auf os.environ
try:
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
# App Info
APP_NAME: str = "Company Explorer"
VERSION: str = "0.2.2"
DEBUG: bool = True
# Database (Store in App dir for simplicity)
DATABASE_URL: str = "sqlite:////app/companies_v3_final.db"
# API Keys
GEMINI_API_KEY: Optional[str] = None
OPENAI_API_KEY: Optional[str] = None
SERP_API_KEY: Optional[str] = None
# Paths
LOG_DIR: str = "/app/logs_debug"
class Config:
env_file = ".env"
settings = Settings()
except ImportError:
# Fallback wenn pydantic-settings nicht installiert ist
class Settings:
APP_NAME = "Company Explorer"
VERSION = "0.2.1"
DEBUG = True
DATABASE_URL = "sqlite:////app/logs_debug/companies_debug.db"
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
SERP_API_KEY = os.getenv("SERP_API_KEY")
LOG_DIR = "/app/logs_debug"
settings = Settings()
# Ensure Log Dir
os.makedirs(settings.LOG_DIR, exist_ok=True)
# API Key Loading Helper (from file if env missing)
def load_api_key_from_file(filename: str) -> Optional[str]:
try:
if os.path.exists(filename):
with open(filename, 'r') as f:
return f.read().strip()
except Exception as e:
print(f"Could not load key from {filename}: {e}") # Print because logging might not be ready
return None
# Auto-load keys if not in env
if not settings.GEMINI_API_KEY:
settings.GEMINI_API_KEY = load_api_key_from_file("/app/gemini_api_key.txt")
if not settings.SERP_API_KEY:
settings.SERP_API_KEY = load_api_key_from_file("/app/serpapikey.txt")

View File

@@ -0,0 +1,113 @@
from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, ForeignKey, Float, Boolean, JSON
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, relationship
from datetime import datetime
from .config import settings
# Setup
engine = create_engine(settings.DATABASE_URL, connect_args={"check_same_thread": False})
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base()
# ==============================================================================
# MODELS
# ==============================================================================
class Company(Base):
__tablename__ = "companies"
id = Column(Integer, primary_key=True, index=True)
# Core Identity
name = Column(String, index=True)
website = Column(String, index=True) # Normalized Domain preferred
crm_id = Column(String, unique=True, index=True, nullable=True) # Link to D365
# Classification
industry_crm = Column(String, nullable=True) # The "allowed" industry
industry_ai = Column(String, nullable=True) # The AI suggested industry
# Location
city = Column(String, nullable=True)
country = Column(String, default="DE")
# Workflow Status
status = Column(String, default="NEW", index=True)
# Granular Process Tracking (Timestamps)
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
last_scraped_at = Column(DateTime, nullable=True)
last_wiki_search_at = Column(DateTime, nullable=True)
last_classification_at = Column(DateTime, nullable=True)
last_signal_check_at = Column(DateTime, nullable=True)
# Relationships
signals = relationship("Signal", back_populates="company", cascade="all, delete-orphan")
enrichment_data = relationship("EnrichmentData", back_populates="company", cascade="all, delete-orphan")
class Signal(Base):
"""
Represents a specific sales signal or potential.
Example: type='has_spa', value='true', proof='Wellnessbereich mit 2000qm'
"""
__tablename__ = "signals"
id = Column(Integer, primary_key=True, index=True)
company_id = Column(Integer, ForeignKey("companies.id"))
signal_type = Column(String, index=True) # e.g. "robotics_cleaning_potential"
confidence = Column(Float, default=0.0) # 0.0 to 1.0
value = Column(String) # "High", "Medium", "Yes", "No"
proof_text = Column(Text, nullable=True) # Snippet from website/source
created_at = Column(DateTime, default=datetime.utcnow)
company = relationship("Company", back_populates="signals")
class EnrichmentData(Base):
"""
Stores raw data blobs (HTML, API responses) to allow re-processing.
"""
__tablename__ = "enrichment_data"
id = Column(Integer, primary_key=True, index=True)
company_id = Column(Integer, ForeignKey("companies.id"))
source_type = Column(String) # "website_scrape", "wikipedia_api", "google_serp"
content = Column(JSON) # The raw data
created_at = Column(DateTime, default=datetime.utcnow)
company = relationship("Company", back_populates="enrichment_data")
class ImportLog(Base):
"""
Logs bulk imports (e.g. from Excel lists).
"""
__tablename__ = "import_logs"
id = Column(Integer, primary_key=True)
filename = Column(String)
import_type = Column(String) # "crm_dump" or "event_list"
total_rows = Column(Integer)
imported_rows = Column(Integer)
duplicate_rows = Column(Integer)
created_at = Column(DateTime, default=datetime.utcnow)
# ==============================================================================
# UTILS
# ==============================================================================
def init_db():
Base.metadata.create_all(bind=engine)
def get_db():
db = SessionLocal()
try:
yield db
finally:
db.close()

View File

@@ -0,0 +1,56 @@
from abc import ABC, abstractmethod
from typing import List, Optional, Dict, Any
from pydantic import BaseModel
# --- Generisches Datenmodell ---
# Damit ist unsere App unabhängig davon, wie SuperOffice Felder benennt.
class LeadData(BaseModel):
name: str
website: Optional[str] = None
city: Optional[str] = None
country: str = "DE"
industry: Optional[str] = None
# Enrichment Data
robotics_potential_score: int = 0
robotics_potential_reason: Optional[str] = None
# Meta
source_id: Optional[str] = None # ID im Quellsystem (z.B. SuperOffice ID)
class TaskData(BaseModel):
subject: str
description: str
deadline: Optional[str] = None
# --- Der Vertrag (Repository Interface) ---
class CRMRepository(ABC):
"""
Abstrakte Basisklasse für alle CRM-Integrationen.
Egal ob Notion, SuperOffice oder Odoo - alle müssen diese Methoden haben.
"""
@abstractmethod
def get_name(self) -> str:
"""Gibt den Namen des Systems zurück (z.B. 'SuperOffice')"""
pass
@abstractmethod
def find_company(self, name: str, email: str = None) -> Optional[str]:
"""Sucht eine Firma und gibt die externe ID zurück, falls gefunden."""
pass
@abstractmethod
def create_lead(self, lead: LeadData) -> str:
"""Erstellt einen neuen Lead und gibt die externe ID zurück."""
pass
@abstractmethod
def update_lead(self, external_id: str, lead: LeadData) -> bool:
"""Aktualisiert einen bestehenden Lead mit neuen Enrichment-Daten."""
pass
@abstractmethod
def create_task(self, external_id: str, task: TaskData) -> bool:
"""Erstellt eine Aufgabe/Wiedervorlage für den Vertriebler beim Lead."""
pass

View File

@@ -0,0 +1,144 @@
import time
import logging
import random
import os
import re
from functools import wraps
from typing import Optional, Union, List
# Versuche neue Google GenAI Lib (v1.0+)
try:
from google import genai
from google.genai import types
HAS_NEW_GENAI = True
except ImportError:
HAS_NEW_GENAI = False
# Fallback auf alte Lib
try:
import google.generativeai as old_genai
HAS_OLD_GENAI = True
except ImportError:
HAS_OLD_GENAI = False
from ..config import settings
logger = logging.getLogger(__name__)
# ==============================================================================
# 1. DECORATORS
# ==============================================================================
def retry_on_failure(max_retries: int = 3, delay: float = 2.0):
"""
Decorator for retrying functions with exponential backoff.
"""
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
last_exception = None
for attempt in range(max_retries):
try:
return func(*args, **kwargs)
except Exception as e:
last_exception = e
# Don't retry on certain fatal errors (can be extended)
if isinstance(e, ValueError) and "API Key" in str(e):
raise e
wait_time = delay * (2 ** attempt) + random.uniform(0, 1)
logger.warning(f"Retry {attempt + 1}/{max_retries} for '{func.__name__}' after error: {e}. Waiting {wait_time:.1f}s")
time.sleep(wait_time)
logger.error(f"Function '{func.__name__}' failed after {max_retries} attempts.")
raise last_exception
return wrapper
return decorator
# ==============================================================================
# 2. TEXT TOOLS
# ==============================================================================
def clean_text(text: str) -> str:
"""Removes excess whitespace and control characters."""
if not text:
return ""
text = str(text).strip()
text = re.sub(r'\s+', ' ', text)
return text
def normalize_string(s: str) -> str:
"""Basic normalization (lowercase, stripped)."""
return s.lower().strip() if s else ""
# ==============================================================================
# 3. LLM WRAPPER (GEMINI)
# ==============================================================================
@retry_on_failure(max_retries=3)
def call_gemini(
prompt: Union[str, List[str]],
model_name: str = "gemini-2.0-flash",
temperature: float = 0.3,
json_mode: bool = False,
system_instruction: Optional[str] = None
) -> str:
"""
Unified caller for Gemini API. Prefers new `google.genai` library.
"""
api_key = settings.GEMINI_API_KEY
if not api_key:
raise ValueError("GEMINI_API_KEY is missing in configuration.")
# Option A: New Library (google-genai)
if HAS_NEW_GENAI:
try:
client = genai.Client(api_key=api_key)
config = {
"temperature": temperature,
"top_p": 0.95,
"top_k": 40,
"max_output_tokens": 8192,
}
if json_mode:
config["response_mime_type"] = "application/json"
response = client.models.generate_content(
model=model_name,
contents=[prompt] if isinstance(prompt, str) else prompt,
config=config,
)
if not response.text:
raise ValueError("Empty response from Gemini")
return response.text.strip()
except Exception as e:
logger.error(f"Error with google-genai lib: {e}")
if not HAS_OLD_GENAI:
raise e
# Fallthrough to Option B
# Option B: Old Library (google-generativeai)
if HAS_OLD_GENAI:
try:
old_genai.configure(api_key=api_key)
generation_config = {
"temperature": temperature,
"top_p": 0.95,
"top_k": 40,
"max_output_tokens": 8192,
}
if json_mode:
generation_config["response_mime_type"] = "application/json"
model = old_genai.GenerativeModel(
model_name=model_name,
generation_config=generation_config,
system_instruction=system_instruction
)
response = model.generate_content(prompt)
return response.text.strip()
except Exception as e:
logger.error(f"Error with google-generativeai lib: {e}")
raise e
raise ImportError("No Google GenAI library installed (neither google-genai nor google-generativeai).")

View File

@@ -0,0 +1,39 @@
import logging
import sys
import os
from logging.handlers import RotatingFileHandler
from ..config import settings
def setup_logging():
log_file = os.path.join(settings.LOG_DIR, "company_explorer_debug.log")
# Create Formatter
formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
# File Handler
try:
file_handler = RotatingFileHandler(log_file, maxBytes=10*1024*1024, backupCount=5)
file_handler.setFormatter(formatter)
file_handler.setLevel(logging.DEBUG)
except Exception as e:
print(f"FATAL: Could not create log file at {log_file}: {e}")
return
# Console Handler
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setFormatter(formatter)
console_handler.setLevel(logging.INFO) # Keep console clean
# Root Logger Config
root_logger = logging.getLogger()
root_logger.setLevel(logging.DEBUG) # Catch ALL
root_logger.addHandler(file_handler)
root_logger.addHandler(console_handler)
# Silence noisy libs partially
logging.getLogger("uvicorn.access").setLevel(logging.INFO)
logging.getLogger("sqlalchemy.engine").setLevel(logging.INFO) # Set to DEBUG to see SQL queries!
logging.info(f"Logging initialized. Writing to {log_file}")

View File

@@ -0,0 +1,42 @@
import logging
import uuid
from typing import Optional
from ..interfaces import CRMRepository, LeadData, TaskData
logger = logging.getLogger(__name__)
class MockRepository(CRMRepository):
"""
Simulates a CRM. Use this for local dev or tests.
Stores data in memory (lost on restart).
"""
def __init__(self):
self._store = {}
def get_name(self) -> str:
return "Local Mock CRM"
def find_company(self, name: str, email: str = None) -> Optional[str]:
# Simple Exact Match Simulation
for lead_id, lead in self._store.items():
if lead.name.lower() == name.lower():
logger.info(f"[MockCRM] Found existing company '{name}' with ID {lead_id}")
return lead_id
return None
def create_lead(self, lead: LeadData) -> str:
new_id = f"MOCK_{uuid.uuid4().hex[:8]}"
self._store[new_id] = lead
logger.info(f"[MockCRM] Created company '{lead.name}' (ID: {new_id}). Total records: {len(self._store)}")
return new_id
def update_lead(self, external_id: str, lead: LeadData) -> bool:
if external_id in self._store:
self._store[external_id] = lead
logger.info(f"[MockCRM] Updated company {external_id} with robotics score: {lead.robotics_potential_score}")
return True
return False
def create_task(self, external_id: str, task: TaskData) -> bool:
logger.info(f"[MockCRM] 🔔 TASK CREATED for {external_id}: '{task.subject}'")
return True

View File

@@ -0,0 +1,40 @@
import logging
import requests
from typing import Optional
from ..interfaces import CRMRepository, LeadData, TaskData
from ..config import settings
logger = logging.getLogger(__name__)
class SuperOfficeRepository(CRMRepository):
def __init__(self, tenant_id: str, api_token: str):
self.base_url = f"https://{tenant_id}.superoffice.com/api/v1"
self.headers = {
"Authorization": f"Bearer {api_token}",
"Accept": "application/json"
}
def get_name(self) -> str:
return "SuperOffice"
def find_company(self, name: str, email: str = None) -> Optional[str]:
# TODO: Implement actual OData query
# Example: GET /Contact?$filter=Name eq '{name}'
logger.info(f"[SuperOffice] Searching for '{name}'...")
return None
def create_lead(self, lead: LeadData) -> str:
logger.info(f"[SuperOffice] Creating Lead: {lead.name}")
# TODO: POST /Contact
# Payload mapping: lead.industry -> SuperOffice BusinessId
return "SO_DUMMY_ID_123"
def update_lead(self, external_id: str, lead: LeadData) -> bool:
logger.info(f"[SuperOffice] Updating Lead {external_id} with Score {lead.robotics_potential_score}")
# TODO: PUT /Contact/{id}
# Wir schreiben das Robotics-Potential z.B. in ein benutzerdefiniertes Feld (UserDefinedField)
return True
def create_task(self, external_id: str, task: TaskData) -> bool:
logger.info(f"[SuperOffice] Creating Task for {external_id}: {task.subject}")
return True

View File

@@ -0,0 +1,91 @@
import sys
import os
import logging
from sqlalchemy.orm import Session
# Add paths to access legacy and new modules
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../"))) # Root for legacy
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../"))) # Company Explorer Root
# Legacy Import
try:
from _legacy_gsheets_system.google_sheet_handler import GoogleSheetHandler
from _legacy_gsheets_system.config import Config as LegacyConfig
except ImportError as e:
print(f"Failed to import legacy modules: {e}")
sys.exit(1)
# New DB
from backend.database import SessionLocal, Company, init_db
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("LegacyImporter")
def migrate():
logger.info("Starting migration from Google Sheets...")
# 1. Connect to GSheets
LegacyConfig.load_api_keys() # Ensure keys are loaded
try:
handler = GoogleSheetHandler()
df = handler.get_sheet_as_dataframe("CRM_Accounts") # Assuming standard sheet name
except Exception as e:
logger.error(f"GSheet Connection failed: {e}")
return
if df is None or df.empty:
logger.warning("No data found in sheet.")
return
logger.info(f"Found {len(df)} rows. Transforming...")
# 2. Connect to New DB
init_db() # Ensure tables exist
db = SessionLocal()
count = 0
skipped = 0
try:
for _, row in df.iterrows():
name = str(row.get('CRM Name', '')).strip()
if not name or name.lower() in ['nan', 'none', '']:
continue
# Check duplicate (simple check by name for migration)
exists = db.query(Company).filter(Company.name == name).first()
if exists:
skipped += 1
continue
# Create Company
comp = Company(
name=name,
website=str(row.get('CRM Website', '')).strip() or None,
crm_id=str(row.get('CRM ID', '')).strip() or None,
city=str(row.get('CRM Ort', '')).strip() or None,
country=str(row.get('CRM Land', 'DE')).strip(),
status="IMPORTED" # Mark as imported so we know to enrich them
)
# Map old industry if useful, otherwise leave blank for re-classification
# comp.industry_ai = str(row.get('Chat Vorschlag Branche', ''))
db.add(comp)
count += 1
if count % 100 == 0:
logger.info(f"Committed {count}...")
db.commit()
db.commit()
logger.info(f"Migration finished. Imported: {count}, Skipped: {skipped}")
except Exception as e:
logger.error(f"Migration error: {e}")
db.rollback()
finally:
db.close()
if __name__ == "__main__":
migrate()

View File

@@ -0,0 +1,77 @@
import json
import logging
import os
from typing import Dict, Any, List
from ..lib.core_utils import call_gemini
from ..config import settings
logger = logging.getLogger(__name__)
ALLOWED_INDUSTRIES_FILE = os.path.join(os.path.dirname(__file__), "../data/allowed_industries.json")
class ClassificationService:
def __init__(self):
self.allowed_industries = self._load_allowed_industries()
def _load_allowed_industries(self) -> List[str]:
try:
with open(ALLOWED_INDUSTRIES_FILE, 'r', encoding='utf-8') as f:
return json.load(f)
except Exception as e:
logger.error(f"Failed to load allowed industries: {e}")
return ["Sonstige"]
def analyze_robotics_potential(self, company_name: str, website_text: str) -> Dict[str, Any]:
"""
Analyzes the company for robotics potential based on website content.
Returns strict JSON.
"""
if not website_text or len(website_text) < 100:
return {"error": "Insufficient text content"}
prompt = f"""
You are a Senior B2B Market Analyst for 'Roboplanet', a robotics distributor.
Your job is to analyze a target company based on their website text and determine their potential for using robots.
--- TARGET COMPANY ---
Name: {company_name}
Website Content (Excerpt):
{website_text[:15000]}
--- ALLOWED INDUSTRIES (STRICT) ---
You MUST assign the company to exactly ONE of these industries. If unsure, choose the closest match or "Sonstige".
{json.dumps(self.allowed_industries, ensure_ascii=False)}
--- ANALYSIS TASKS ---
1. **Industry Classification:** Pick one from the list.
2. **Robotics Potential Scoring (0-100):**
- **Cleaning:** Does the company manage large floors, hospitals, hotels, or public spaces? (Keywords: Hygiene, Cleaning, SPA, Facility Management)
- **Transport/Logistics:** Do they move goods internally? (Keywords: Warehouse, Intralogistics, Production line, Hospital logistics)
- **Security:** Do they have large perimeters or night patrols? (Keywords: Werkschutz, Security, Monitoring)
- **Service:** Do they interact with guests/patients? (Keywords: Reception, Restaurant, Nursing)
3. **Explanation:** A short, strategic reason for the scoring (German).
--- OUTPUT FORMAT (JSON ONLY) ---
{{
"industry": "String (from list)",
"summary": "Short business summary (German)",
"potentials": {{
"cleaning": {{ "score": 0-100, "reason": "..." }},
"transport": {{ "score": 0-100, "reason": "..." }},
"security": {{ "score": 0-100, "reason": "..." }},
"service": {{ "score": 0-100, "reason": "..." }}
}}
}}
"""
try:
response_text = call_gemini(
prompt=prompt,
json_mode=True,
temperature=0.2 # Low temp for consistency
)
return json.loads(response_text)
except Exception as e:
logger.error(f"Classification failed: {e}")
return {"error": str(e)}

View File

@@ -0,0 +1,209 @@
import logging
import re
from collections import Counter
from typing import List, Tuple, Dict, Any, Optional
from sqlalchemy.orm import Session
from sqlalchemy import select
# External libs (must be in requirements.txt)
from thefuzz import fuzz
from ..database import Company
from ..lib.core_utils import clean_text, normalize_string
logger = logging.getLogger(__name__)
# --- Configuration (Ported from Legacy) ---
SCORE_THRESHOLD = 80
SCORE_THRESHOLD_WEAK = 95
MIN_NAME_FOR_DOMAIN = 70
CITY_MISMATCH_PENALTY = 30
COUNTRY_MISMATCH_PENALTY = 40
STOP_TOKENS_BASE = {
'gmbh','mbh','ag','kg','ug','ohg','se','co','kgaa','inc','llc','ltd','sarl',
'holding','gruppe','group','international','solutions','solution','service','services',
'deutschland','austria','germany','technik','technology','technologies','systems','systeme',
'logistik','logistics','industries','industrie','management','consulting','vertrieb','handel',
'international','company','gesellschaft','mbh&co','mbhco','werke','werk'
}
# ==============================================================================
# Helpers
# ==============================================================================
def _tokenize(s: str) -> List[str]:
if not s: return []
return re.split(r"[^a-z0-9]+", str(s).lower())
def split_tokens(name: str) -> List[str]:
if not name: return []
tokens = [t for t in _tokenize(name) if len(t) >= 3]
return [t for t in tokens if t not in STOP_TOKENS_BASE]
def clean_name_for_scoring(norm_name: str) -> Tuple[str, set]:
toks = split_tokens(norm_name)
return " ".join(toks), set(toks)
# ==============================================================================
# Core Deduplication Logic
# ==============================================================================
class Deduplicator:
def __init__(self, db: Session):
self.db = db
self.reference_data = [] # Cache for DB records
self.domain_index = {}
self.token_freq = Counter()
self.token_index = {}
self._load_reference_data()
def _load_reference_data(self):
"""
Loads minimal dataset from DB into RAM for fast fuzzy matching.
Optimized for 10k-50k records.
"""
logger.info("Loading reference data for deduplication...")
query = self.db.query(Company.id, Company.name, Company.website, Company.city, Company.country)
companies = query.all()
for c in companies:
norm_name = normalize_string(c.name)
norm_domain = normalize_string(c.website) # Simplified, should extract domain
record = {
'id': c.id,
'name': c.name,
'normalized_name': norm_name,
'normalized_domain': norm_domain,
'city': normalize_string(c.city),
'country': normalize_string(c.country)
}
self.reference_data.append(record)
# Build Indexes
if norm_domain:
self.domain_index.setdefault(norm_domain, []).append(record)
# Token Frequency
_, toks = clean_name_for_scoring(norm_name)
for t in toks:
self.token_freq[t] += 1
self.token_index.setdefault(t, []).append(record)
logger.info(f"Loaded {len(self.reference_data)} records for deduplication.")
def _choose_rarest_token(self, norm_name: str) -> Optional[str]:
_, toks = clean_name_for_scoring(norm_name)
if not toks: return None
# Sort by frequency (asc) then length (desc)
lst = sorted(list(toks), key=lambda x: (self.token_freq.get(x, 10**9), -len(x)))
return lst[0] if lst else None
def find_duplicates(self, candidate: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Checks a single candidate against the loaded index.
Returns list of matches with score >= Threshold.
"""
# Prepare Candidate
c_norm_name = normalize_string(candidate.get('name', ''))
c_norm_domain = normalize_string(candidate.get('website', ''))
c_city = normalize_string(candidate.get('city', ''))
c_country = normalize_string(candidate.get('country', ''))
candidates_to_check = {} # Map ID -> Record
# 1. Domain Match (Fastest)
if c_norm_domain and c_norm_domain in self.domain_index:
for r in self.domain_index[c_norm_domain]:
candidates_to_check[r['id']] = r
# 2. Rarest Token Match (Blocking)
rtok = self._choose_rarest_token(c_norm_name)
if rtok and rtok in self.token_index:
for r in self.token_index[rtok]:
candidates_to_check[r['id']] = r
if not candidates_to_check:
return []
# 3. Scoring
matches = []
for db_rec in candidates_to_check.values():
score, details = self._calculate_similarity(
cand={'n': c_norm_name, 'd': c_norm_domain, 'c': c_city, 'ct': c_country},
ref=db_rec
)
# Threshold Logic (Weak vs Strong)
is_weak = (details['domain_match'] == 0 and not (details['loc_match']))
threshold = SCORE_THRESHOLD_WEAK if is_weak else SCORE_THRESHOLD
if score >= threshold:
matches.append({
'company_id': db_rec['id'],
'name': db_rec['name'],
'score': score,
'details': details
})
matches.sort(key=lambda x: x['score'], reverse=True)
return matches
def _calculate_similarity(self, cand, ref):
# Data Prep
n1, n2 = cand['n'], ref['normalized_name']
# Exact Name Shortcut
if n1 and n1 == n2:
return 100, {'exact': True, 'domain_match': 0, 'loc_match': 0}
# Domain
d1, d2 = cand['d'], ref['normalized_domain']
domain_match = 1 if (d1 and d2 and d1 == d2) else 0
# Location
city_match = 1 if (cand['c'] and ref['city'] and cand['c'] == ref['city']) else 0
country_match = 1 if (cand['ct'] and ref['country'] and cand['ct'] == ref['country']) else 0
loc_match = city_match and country_match
# Name Fuzzy Score
clean1, _ = clean_name_for_scoring(n1)
clean2, _ = clean_name_for_scoring(n2)
if clean1 and clean2:
ts = fuzz.token_set_ratio(clean1, clean2)
pr = fuzz.partial_ratio(clean1, clean2)
ss = fuzz.token_sort_ratio(clean1, clean2)
name_score = max(ts, pr, ss)
else:
name_score = 0
# Penalties
penalties = 0
if cand['ct'] and ref['country'] and not country_match:
penalties += COUNTRY_MISMATCH_PENALTY
if cand['c'] and ref['city'] and not city_match:
penalties += CITY_MISMATCH_PENALTY
# Final Calc
# Base weights: Domain is king (100), Name is mandatory (unless domain match)
total = 0
if domain_match:
total = 100
else:
total = name_score
if loc_match:
total += 10 # Bonus
total -= penalties
# Capping
total = min(100, max(0, total))
return total, {
'name_score': name_score,
'domain_match': domain_match,
'loc_match': loc_match,
'penalties': penalties
}

View File

@@ -0,0 +1,126 @@
import logging
import requests
import re
from typing import Optional, Dict, Tuple
from urllib.parse import urlparse
from ..config import settings
from ..lib.core_utils import retry_on_failure, normalize_string
logger = logging.getLogger(__name__)
# Domains to ignore when looking for official company homepage
BLACKLIST_DOMAINS = {
"linkedin.com", "xing.com", "facebook.com", "instagram.com", "twitter.com",
"northdata.de", "northdata.com", "firmenwissen.de", "creditreform.de",
"dnb.com", "kompass.com", "wer-zu-wem.de", "kununu.com", "glassdoor.com",
"stepstone.de", "indeed.com", "monster.de", "youtube.com", "wikipedia.org"
}
class DiscoveryService:
def __init__(self):
self.api_key = settings.SERP_API_KEY
if not self.api_key:
logger.warning("SERP_API_KEY not set. Discovery features will fail.")
@retry_on_failure(max_retries=2)
def find_company_website(self, company_name: str, city: Optional[str] = None) -> str:
"""
Uses Google Search via SerpAPI to find the most likely official homepage.
Returns "k.A." if nothing credible is found.
"""
if not self.api_key:
return "k.A."
query = f"{company_name} offizielle Website"
if city:
query += f" {city}"
logger.info(f"Searching website for: {query}")
try:
params = {
"engine": "google",
"q": query,
"api_key": self.api_key,
"num": 5,
"gl": "de",
"hl": "de"
}
response = requests.get("https://serpapi.com/search", params=params, timeout=15)
response.raise_for_status()
data = response.json()
if "organic_results" not in data:
return "k.A."
for result in data["organic_results"]:
link = result.get("link", "")
if self._is_credible_url(link):
# Simple heuristic: If the company name is part of the domain, high confidence
# Otherwise, take the first credible result.
return link
return "k.A."
except Exception as e:
logger.error(f"SerpAPI Error: {e}")
return "k.A."
@retry_on_failure(max_retries=2)
def find_wikipedia_url(self, company_name: str) -> str:
"""
Searches for a specific German Wikipedia article.
"""
if not self.api_key:
return "k.A."
query = f"{company_name} Wikipedia"
try:
params = {
"engine": "google",
"q": query,
"api_key": self.api_key,
"num": 3,
"gl": "de",
"hl": "de"
}
response = requests.get("https://serpapi.com/search", params=params, timeout=15)
response.raise_for_status()
data = response.json()
for result in data.get("organic_results", []):
link = result.get("link", "")
if "de.wikipedia.org/wiki/" in link:
# Basic validation: Is the title roughly the company?
title = result.get("title", "").replace(" Wikipedia", "")
if self._check_name_similarity(company_name, title):
return link
return "k.A."
except Exception as e:
logger.error(f"Wiki Search Error: {e}")
return "k.A."
def _is_credible_url(self, url: str) -> bool:
"""Filters out social media, directories, and junk."""
if not url: return False
try:
domain = urlparse(url).netloc.lower().replace("www.", "")
if domain in BLACKLIST_DOMAINS:
return False
# Check for subdomains of blacklist (e.g. de.linkedin.com)
for bad in BLACKLIST_DOMAINS:
if domain.endswith("." + bad):
return False
return True
except:
return False
def _check_name_similarity(self, name1: str, name2: str) -> bool:
"""Simple fuzzy check for validation."""
n1 = normalize_string(name1)
n2 = normalize_string(name2)
# Very permissive: if one is contained in the other
return n1 in n2 or n2 in n1

View File

@@ -0,0 +1,82 @@
import logging
import requests
import random
import re
from bs4 import BeautifulSoup
from typing import Optional, Dict
from ..lib.core_utils import clean_text, retry_on_failure
logger = logging.getLogger(__name__)
USER_AGENTS = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0'
]
class ScraperService:
def __init__(self, timeout: int = 15):
self.timeout = timeout
@retry_on_failure(max_retries=2)
def scrape_url(self, url: str) -> Dict[str, str]:
"""
Fetches a URL and returns cleaned text content + meta info.
"""
if not url.startswith("http"):
url = "https://" + url
try:
headers = {'User-Agent': random.choice(USER_AGENTS)}
# verify=False is risky but often needed for poorly configured corporate sites
response = requests.get(url, headers=headers, timeout=self.timeout, verify=False)
response.raise_for_status()
# Check Content Type
content_type = response.headers.get('Content-Type', '').lower()
if 'text/html' not in content_type:
logger.warning(f"Skipping non-HTML content for {url}: {content_type}")
return {"error": "Not HTML"}
return self._parse_html(response.content)
except requests.exceptions.SSLError:
# Retry with HTTP if HTTPS fails
if url.startswith("https://"):
logger.info(f"SSL failed for {url}, retrying with http://...")
return self.scrape_url(url.replace("https://", "http://"))
raise
except Exception as e:
logger.error(f"Scraping failed for {url}: {e}")
return {"error": str(e)}
def _parse_html(self, html_content: bytes) -> Dict[str, str]:
soup = BeautifulSoup(html_content, 'html.parser')
# 1. Cleanup Junk
for element in soup(['script', 'style', 'noscript', 'iframe', 'svg', 'header', 'footer', 'nav', 'aside', 'form', 'button']):
element.decompose()
# 2. Extract Title & Meta Description
title = soup.title.string if soup.title else ""
meta_desc = ""
meta_tag = soup.find('meta', attrs={'name': 'description'})
if meta_tag:
meta_desc = meta_tag.get('content', '')
# 3. Extract Main Text
# Prefer body, fallback to full soup
body = soup.find('body')
raw_text = body.get_text(separator=' ', strip=True) if body else soup.get_text(separator=' ', strip=True)
cleaned_text = clean_text(raw_text)
# 4. Extract Emails (Basic Regex)
emails = set(re.findall(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', raw_text))
return {
"title": clean_text(title),
"description": clean_text(meta_desc),
"text": cleaned_text[:25000], # Limit to avoid context overflow
"emails": list(emails)[:5] # Limit to 5
}

View File

@@ -0,0 +1,103 @@
import os
import logging
from sqlalchemy.orm import Session
from ..database import Company
from ..interfaces import LeadData, TaskData, CRMRepository
from ..repositories.mock import MockRepository
from ..repositories.superoffice import SuperOfficeRepository
from ..config import settings
logger = logging.getLogger(__name__)
class CRMFactory:
_instance: CRMRepository = None
@classmethod
def get_repository(cls) -> CRMRepository:
if cls._instance:
return cls._instance
crm_type = os.getenv("CRM_TYPE", "MOCK").upper()
if crm_type == "SUPEROFFICE":
# Load credentials securely from settings/env
tenant = os.getenv("SO_TENANT_ID", "")
token = os.getenv("SO_API_TOKEN", "")
logger.info("Initializing SuperOffice Repository...")
cls._instance = SuperOfficeRepository(tenant, token)
else:
logger.info("Initializing Mock Repository (Default)...")
cls._instance = MockRepository()
return cls._instance
class SyncService:
def __init__(self, db: Session):
self.db = db
self.repo = CRMFactory.get_repository()
def sync_company(self, company_id: int) -> dict:
"""
Pushes a local company to the external CRM.
"""
local_company = self.db.query(Company).filter(Company.id == company_id).first()
if not local_company:
return {"error": "Company not found"}
# 1. Map Data
# Extract highest robotics potential score
max_score = 0
reason = ""
for sig in local_company.signals:
if sig.confidence > max_score:
max_score = int(sig.confidence)
reason = f"{sig.signal_type} ({sig.value})"
lead_data = LeadData(
name=local_company.name,
website=local_company.website,
city=local_company.city,
country=local_company.country,
industry=local_company.industry_ai, # We suggest our AI industry
robotics_potential_score=max_score,
robotics_potential_reason=reason
)
# 2. Check if already linked
external_id = local_company.crm_id
# 3. Check if exists in CRM (by name) if not linked yet
if not external_id:
external_id = self.repo.find_company(local_company.name)
action = "none"
if external_id:
# Update
success = self.repo.update_lead(external_id, lead_data)
if success:
action = "updated"
# If we found it by search, link it locally
if not local_company.crm_id:
local_company.crm_id = external_id
self.db.commit()
else:
# Create
new_id = self.repo.create_lead(lead_data)
if new_id:
action = "created"
local_company.crm_id = new_id
self.db.commit()
# Create a task for the sales rep if high potential
if max_score > 70:
self.repo.create_task(new_id, TaskData(
subject="🔥 Hot Robotics Lead",
description=f"AI detected high potential ({max_score}%). Reason: {reason}. Please check website."
))
return {
"status": "success",
"action": action,
"crm": self.repo.get_name(),
"external_id": local_company.crm_id
}

View File

@@ -0,0 +1,12 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Company Explorer (Robotics)</title>
</head>
<body class="bg-slate-950 text-slate-100">
<div id="root"></div>
<script type="module" src="/src/main.tsx"></script>
</body>
</html>

View File

@@ -0,0 +1,31 @@
{
"name": "company-explorer-frontend",
"private": true,
"version": "0.1.0",
"type": "module",
"scripts": {
"dev": "vite",
"build": "tsc && vite build",
"preview": "vite preview"
},
"dependencies": {
"@tanstack/react-table": "^8.10.7",
"axios": "^1.6.2",
"clsx": "^2.0.0",
"lucide-react": "^0.294.0",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"tailwind-merge": "^2.1.0"
},
"devDependencies": {
"@types/node": "^20.10.4",
"@types/react": "^18.2.43",
"@types/react-dom": "^18.2.17",
"@vitejs/plugin-react": "^4.2.1",
"autoprefixer": "^10.4.16",
"postcss": "^8.4.32",
"tailwindcss": "^3.3.6",
"typescript": "^5.3.3",
"vite": "^5.0.8"
}
}

View File

@@ -0,0 +1,6 @@
export default {
plugins: {
tailwindcss: {},
autoprefixer: {},
},
}

View File

@@ -0,0 +1,116 @@
import { useState, useEffect } from 'react'
import axios from 'axios'
import { CompanyTable } from './components/CompanyTable'
import { ImportWizard } from './components/ImportWizard'
import { Inspector } from './components/Inspector' // NEW
import { LayoutDashboard, UploadCloud, Search, RefreshCw } from 'lucide-react'
// Base URL detection (Production vs Dev)
const API_BASE = import.meta.env.BASE_URL === '/ce/' ? '/ce/api' : '/api';
interface Stats {
total: number;
}
function App() {
const [stats, setStats] = useState<Stats>({ total: 0 })
const [refreshKey, setRefreshKey] = useState(0)
const [isImportOpen, setIsImportOpen] = useState(false)
const [selectedCompanyId, setSelectedCompanyId] = useState<number | null>(null) // NEW
const fetchStats = async () => {
try {
const res = await axios.get(`${API_BASE}/companies?limit=1`)
setStats({ total: res.data.total })
} catch (e) {
console.error("Failed to fetch stats", e)
}
}
useEffect(() => {
fetchStats()
}, [refreshKey])
const handleCompanySelect = (id: number) => {
setSelectedCompanyId(id)
}
const handleCloseInspector = () => {
setSelectedCompanyId(null)
}
return (
<div className="min-h-screen bg-slate-950 text-slate-200 font-sans">
<ImportWizard
isOpen={isImportOpen}
onClose={() => setIsImportOpen(false)}
apiBase={API_BASE}
onSuccess={() => setRefreshKey(k => k + 1)}
/>
{/* Inspector Sidebar */}
<Inspector
companyId={selectedCompanyId}
onClose={handleCloseInspector}
apiBase={API_BASE}
/>
{/* Header */}
<header className="border-b border-slate-800 bg-slate-900/50 sticky top-0 z-10 backdrop-blur-md">
<div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 h-16 flex items-center justify-between">
<div className="flex items-center gap-3">
<div className="p-2 bg-blue-600 rounded-lg">
<LayoutDashboard className="h-6 w-6 text-white" />
</div>
<div>
<h1 className="text-xl font-bold text-white tracking-tight">Company Explorer</h1>
<p className="text-xs text-blue-400 font-medium">ROBOTICS EDITION <span className="text-slate-600 ml-2">v0.2.2 (New DB Path)</span></p>
</div>
</div>
<div className="flex items-center gap-4">
<div className="text-sm text-slate-400">
<span className="text-white font-bold">{stats.total}</span> Companies
</div>
<button
onClick={() => setRefreshKey(k => k + 1)}
className="p-2 hover:bg-slate-800 rounded-full transition-colors text-slate-400 hover:text-white"
title="Refresh Data"
>
<RefreshCw className="h-5 w-5" />
</button>
<button
className="flex items-center gap-2 bg-blue-600 hover:bg-blue-500 text-white px-4 py-2 rounded-md font-medium text-sm transition-all shadow-lg shadow-blue-900/20"
onClick={() => setIsImportOpen(true)}
>
<UploadCloud className="h-4 w-4" />
Import List
</button>
</div>
</div>
</header>
{/* Main Content */}
<main className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-8">
<div className="mb-6 flex gap-4">
<div className="relative flex-1 max-w-md">
<Search className="absolute left-3 top-2.5 h-5 w-5 text-slate-500" />
<input
type="text"
placeholder="Search companies..."
className="w-full bg-slate-900 border border-slate-700 text-slate-200 rounded-md pl-10 pr-4 py-2 focus:ring-2 focus:ring-blue-500 focus:border-transparent outline-none"
/>
</div>
</div>
<div className="bg-slate-900 border border-slate-800 rounded-xl overflow-hidden shadow-xl">
<CompanyTable key={refreshKey} apiBase={API_BASE} onRowClick={handleCompanySelect} /> {/* NEW PROP */}
</div>
</main>
</div>
)
}
export default App

View File

@@ -0,0 +1,205 @@
import { useState, useEffect, useMemo } from 'react'
import {
useReactTable,
getCoreRowModel,
flexRender,
createColumnHelper,
} from '@tanstack/react-table'
import axios from 'axios'
import { Play, Globe, AlertCircle, Search as SearchIcon, Loader2 } from 'lucide-react'
import clsx from 'clsx'
type Company = {
id: number
name: string
city: string | null
country: string
website: string | null
status: string
industry_ai: string | null
}
const columnHelper = createColumnHelper<Company>()
interface CompanyTableProps {
apiBase: string
onRowClick: (companyId: number) => void // NEW PROP
}
export function CompanyTable({ apiBase, onRowClick }: CompanyTableProps) {
const [data, setData] = useState<Company[]>([])
const [loading, setLoading] = useState(true)
const [processingId, setProcessingId] = useState<number | null>(null)
const fetchData = async () => {
setLoading(true)
try {
const res = await axios.get(`${apiBase}/companies?limit=100`)
setData(res.data.items)
} catch (e) {
console.error(e)
} finally {
setLoading(false)
}
}
useEffect(() => {
fetchData()
}, [])
const triggerDiscovery = async (id: number) => {
setProcessingId(id)
try {
await axios.post(`${apiBase}/enrich/discover`, { company_id: id })
// Optimistic update or wait for refresh? Let's refresh shortly after to see results
setTimeout(fetchData, 2000)
} catch (e) {
alert("Discovery Error")
setProcessingId(null)
}
}
const triggerAnalysis = async (id: number) => {
setProcessingId(id)
try {
await axios.post(`${apiBase}/enrich/analyze`, { company_id: id })
setTimeout(fetchData, 2000)
} catch (e) {
alert("Analysis Error")
setProcessingId(null)
}
}
const columns = useMemo(() => [
columnHelper.accessor('name', {
header: 'Company',
cell: info => <span className="font-semibold text-white">{info.getValue()}</span>,
}),
columnHelper.accessor('city', {
header: 'Location',
cell: info => (
<div className="text-slate-400 text-sm">
{info.getValue() || '-'} <span className="text-slate-600">({info.row.original.country})</span>
</div>
),
}),
columnHelper.accessor('website', {
header: 'Website',
cell: info => {
const url = info.getValue()
if (url && url !== "k.A.") {
return (
<a href={url} target="_blank" rel="noreferrer" className="flex items-center gap-1 text-blue-400 hover:underline text-sm">
<Globe className="h-3 w-3" /> {new URL(url).hostname.replace('www.', '')}
</a>
)
}
return <span className="text-slate-600 text-sm italic">Not found</span>
},
}),
columnHelper.accessor('status', {
header: 'Status',
cell: info => {
const s = info.getValue()
return (
<span className={clsx(
"px-2 py-0.5 rounded-full text-[10px] font-bold uppercase tracking-wider",
s === 'NEW' && "bg-slate-800 text-slate-400 border border-slate-700",
s === 'DISCOVERED' && "bg-blue-500/10 text-blue-400 border border-blue-500/20",
s === 'ENRICHED' && "bg-green-500/10 text-green-400 border border-green-500/20",
)}>
{s}
</span>
)
}
}),
columnHelper.display({
id: 'actions',
header: '',
cell: info => {
const c = info.row.original
const isProcessing = processingId === c.id
if (isProcessing) {
return <Loader2 className="h-4 w-4 animate-spin text-blue-500" />
}
// Action Logic
if (c.status === 'NEW' || !c.website || c.website === "k.A.") {
return (
<button
onClick={(e) => { e.stopPropagation(); triggerDiscovery(c.id); }}
className="flex items-center gap-1 px-2 py-1 bg-slate-800 hover:bg-slate-700 text-xs font-medium text-slate-300 rounded border border-slate-700 transition-colors"
title="Search Website & Wiki"
>
<SearchIcon className="h-3 w-3" /> Find
</button>
)
}
// Ready for Analysis
return (
<button
onClick={(e) => { e.stopPropagation(); triggerAnalysis(c.id); }}
className="flex items-center gap-1 px-2 py-1 bg-blue-600/10 hover:bg-blue-600/20 text-blue-400 text-xs font-medium rounded border border-blue-500/20 transition-colors"
title="Run AI Analysis"
>
<Play className="h-3 w-3 fill-current" /> Analyze
</button>
)
}
})
], [processingId])
const table = useReactTable({
data,
columns,
getCoreRowModel: getCoreRowModel(),
})
if (loading && data.length === 0) return <div className="p-8 text-center text-slate-500">Loading companies...</div>
if (data.length === 0) return (
<div className="p-12 text-center">
<div className="inline-block p-4 bg-slate-800 rounded-full mb-4">
<AlertCircle className="h-8 w-8 text-slate-500" />
</div>
<h3 className="text-lg font-medium text-white">No companies found</h3>
<p className="text-slate-400 mt-2">Import a list to get started.</p>
</div>
)
return (
<div className="overflow-x-auto">
<table className="w-full text-left border-collapse">
<thead>
{table.getHeaderGroups().map(headerGroup => (
<tr key={headerGroup.id} className="border-b border-slate-800 bg-slate-900/50">
{headerGroup.headers.map(header => (
<th key={header.id} className="p-4 text-xs font-medium text-slate-500 uppercase tracking-wider">
{flexRender(header.column.columnDef.header, header.getContext())}
</th>
))}
</tr>
))}
</thead>
<tbody className="divide-y divide-slate-800/50">
{table.getRowModel().rows.map(row => (
// Make row clickable
<tr
key={row.id}
onClick={() => onRowClick(row.original.id)} // NEW: Row Click Handler
className="hover:bg-slate-800/30 transition-colors cursor-pointer"
>
{row.getVisibleCells().map(cell => (
<td key={cell.id} className="p-4 align-middle">
{flexRender(cell.column.columnDef.cell, cell.getContext())}
</td>
))}
</tr>
))}
</tbody>
</table>
</div>
)
}

View File

@@ -0,0 +1,85 @@
import { useState } from 'react'
import axios from 'axios'
import { X, UploadCloud } from 'lucide-react'
interface ImportWizardProps {
isOpen: boolean
onClose: () => void
onSuccess: () => void
apiBase: string
}
export function ImportWizard({ isOpen, onClose, onSuccess, apiBase }: ImportWizardProps) {
const [text, setText] = useState("")
const [loading, setLoading] = useState(false)
if (!isOpen) return null
const handleImport = async () => {
const lines = text.split('\n').map(l => l.trim()).filter(l => l.length > 0)
if (lines.length === 0) return
setLoading(true)
try {
await axios.post(`${apiBase}/companies/bulk`, { names: lines })
setText("")
onSuccess()
onClose()
} catch (e: any) {
console.error(e)
const msg = e.response?.data?.detail || e.message || "Unknown Error"
alert(`Import failed: ${msg}`)
} finally {
setLoading(false)
}
}
return (
<div className="fixed inset-0 bg-black/70 backdrop-blur-sm z-50 flex items-center justify-center p-4">
<div className="bg-slate-900 border border-slate-700 rounded-xl w-full max-w-lg shadow-2xl">
{/* Header */}
<div className="flex items-center justify-between p-4 border-b border-slate-800">
<h3 className="text-lg font-semibold text-white flex items-center gap-2">
<UploadCloud className="h-5 w-5 text-blue-400" />
Quick Import
</h3>
<button onClick={onClose} className="text-slate-400 hover:text-white">
<X className="h-5 w-5" />
</button>
</div>
{/* Body */}
<div className="p-4 space-y-4">
<p className="text-sm text-slate-400">
Paste company names below (one per line). Duplicates in the database will be skipped automatically.
</p>
<textarea
className="w-full h-64 bg-slate-950 border border-slate-700 rounded-lg p-3 text-sm text-slate-200 focus:ring-2 focus:ring-blue-600 outline-none font-mono"
placeholder="Company A&#10;Company B&#10;Company C..."
value={text}
onChange={e => setText(e.target.value)}
/>
</div>
{/* Footer */}
<div className="p-4 border-t border-slate-800 flex justify-end gap-3">
<button
onClick={onClose}
className="px-4 py-2 text-sm font-medium text-slate-400 hover:text-white"
>
Cancel
</button>
<button
onClick={handleImport}
disabled={loading || !text.trim()}
className="px-4 py-2 bg-blue-600 hover:bg-blue-500 text-white rounded-md text-sm font-medium disabled:opacity-50 disabled:cursor-not-allowed"
>
{loading ? "Importing..." : "Import Companies"}
</button>
</div>
</div>
</div>
)
}

View File

@@ -0,0 +1,123 @@
import { useEffect, useState } from 'react'
import axios from 'axios'
import { X, ExternalLink, Robot, Briefcase, Calendar } from 'lucide-react'
import clsx from 'clsx'
interface InspectorProps {
companyId: number | null
onClose: () => void
apiBase: string
}
type Signal = {
signal_type: string
confidence: number
value: string
proof_text: string
}
type CompanyDetail = {
id: number
name: string
website: string | null
industry_ai: string | null
status: string
created_at: string
signals: Signal[]
}
export function Inspector({ companyId, onClose, apiBase }: InspectorProps) {
const [data, setData] = useState<CompanyDetail | null>(null)
const [loading, setLoading] = useState(false)
useEffect(() => {
if (!companyId) return
setLoading(true)
axios.get(`${apiBase}/companies/${companyId}`)
.then(res => setData(res.data))
.catch(console.error)
.finally(() => setLoading(false))
}, [companyId])
if (!companyId) return null
return (
<div className="fixed inset-y-0 right-0 w-[500px] bg-slate-900 border-l border-slate-800 shadow-2xl transform transition-transform duration-300 ease-in-out z-40 overflow-y-auto">
{loading ? (
<div className="p-8 text-slate-500">Loading details...</div>
) : !data ? (
<div className="p-8 text-red-400">Failed to load data.</div>
) : (
<div className="flex flex-col h-full">
{/* Header */}
<div className="p-6 border-b border-slate-800 bg-slate-950/50">
<div className="flex justify-between items-start mb-4">
<h2 className="text-xl font-bold text-white leading-tight">{data.name}</h2>
<button onClick={onClose} className="text-slate-400 hover:text-white">
<X className="h-6 w-6" />
</button>
</div>
<div className="flex flex-wrap gap-2 text-sm">
{data.website && (
<a href={data.website} target="_blank" className="flex items-center gap-1 text-blue-400 hover:underline">
<ExternalLink className="h-3 w-3" /> {new URL(data.website).hostname.replace('www.', '')}
</a>
)}
{data.industry_ai && (
<span className="flex items-center gap-1 px-2 py-0.5 bg-slate-800 text-slate-300 rounded border border-slate-700">
<Briefcase className="h-3 w-3" /> {data.industry_ai}
</span>
)}
</div>
</div>
{/* Robotics Scorecard */}
<div className="p-6 space-y-6">
<div>
<h3 className="text-sm font-semibold text-slate-400 uppercase tracking-wider mb-3 flex items-center gap-2">
<Robot className="h-4 w-4" /> Robotics Potential
</h3>
<div className="grid grid-cols-2 gap-4">
{['cleaning', 'transport', 'security', 'service'].map(type => {
const sig = data.signals.find(s => s.signal_type.includes(type))
const score = sig ? sig.confidence : 0
return (
<div key={type} className="bg-slate-800/50 p-3 rounded-lg border border-slate-700">
<div className="flex justify-between mb-1">
<span className="text-sm text-slate-300 capitalize">{type}</span>
<span className={clsx("text-sm font-bold", score > 70 ? "text-green-400" : score > 30 ? "text-yellow-400" : "text-slate-500")}>
{score}%
</span>
</div>
<div className="w-full bg-slate-700 h-1.5 rounded-full overflow-hidden">
<div
className={clsx("h-full rounded-full", score > 70 ? "bg-green-500" : score > 30 ? "bg-yellow-500" : "bg-slate-600")}
style={{ width: `${score}%` }}
/>
</div>
{sig?.proof_text && (
<p className="text-xs text-slate-500 mt-2 line-clamp-2" title={sig.proof_text}>
"{sig.proof_text}"
</p>
)}
</div>
)
})}
</div>
</div>
{/* Meta Info */}
<div className="pt-6 border-t border-slate-800">
<div className="text-xs text-slate-500 flex items-center gap-2">
<Calendar className="h-3 w-3" /> Added: {new Date(data.created_at).toLocaleDateString()}
</div>
</div>
</div>
</div>
)}
</div>
)
}

View File

@@ -0,0 +1,19 @@
@tailwind base;
@tailwind components;
@tailwind utilities;
/* Custom Scrollbar for dark theme */
::-webkit-scrollbar {
width: 8px;
height: 8px;
}
::-webkit-scrollbar-track {
background: #1e293b;
}
::-webkit-scrollbar-thumb {
background: #475569;
border-radius: 4px;
}
::-webkit-scrollbar-thumb:hover {
background: #64748b;
}

View File

@@ -0,0 +1,10 @@
import React from 'react'
import ReactDOM from 'react-dom/client'
import App from './App.tsx'
import './index.css'
ReactDOM.createRoot(document.getElementById('root')!).render(
<React.StrictMode>
<App />
</React.StrictMode>,
)

View File

@@ -0,0 +1 @@
/// <reference types="vite/client" />

View File

@@ -0,0 +1,11 @@
/** @type {import('tailwindcss').Config} */
export default {
content: [
"./index.html",
"./src/**/*.{js,ts,jsx,tsx}",
],
theme: {
extend: {},
},
plugins: [],
}

View File

@@ -0,0 +1,16 @@
import { defineConfig } from 'vite'
import react from '@vitejs/plugin-react'
// https://vitejs.dev/config/
export default defineConfig({
plugins: [react()],
base: '/ce/', // Critical for Nginx Reverse Proxy
server: {
proxy: {
'/api': {
target: 'http://localhost:8000', // Forward API calls to FastAPI during dev
changeOrigin: true
}
}
}
})

View File

@@ -0,0 +1,15 @@
fastapi
uvicorn
sqlalchemy
pydantic
pydantic-settings
requests
beautifulsoup4
pandas
openpyxl
thefuzz
python-Levenshtein
google-genai
pillow
python-multipart
python-dotenv