Files
Brancheneinstufung2/company-explorer/backend/app.py

226 lines
7.8 KiB
Python

from fastapi import FastAPI, Depends, HTTPException, Query, BackgroundTasks
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from sqlalchemy.orm import Session, joinedload
from typing import List, Optional, Dict, Any
from pydantic import BaseModel
from datetime import datetime
import os
import sys
from .config import settings
from .lib.logging_setup import setup_logging
# Setup Logging first
setup_logging()
import logging
logger = logging.getLogger(__name__)
from .database import init_db, get_db, Company, Signal, EnrichmentData, RoboticsCategory, Contact, Industry, JobRoleMapping
from .services.deduplication import Deduplicator
from .services.discovery import DiscoveryService
from .services.scraping import ScraperService
from .services.classification import ClassificationService
# Initialize App
app = FastAPI(
title=settings.APP_NAME,
version=settings.VERSION,
description="Backend for Company Explorer (Robotics Edition)",
root_path="/ce"
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Service Singletons
scraper = ScraperService()
classifier = ClassificationService() # Now works without args
discovery = DiscoveryService()
# --- Pydantic Models ---
class CompanyCreate(BaseModel):
name: str
city: Optional[str] = None
country: str = "DE"
website: Optional[str] = None
class BulkImportRequest(BaseModel):
names: List[str]
class AnalysisRequest(BaseModel):
company_id: int
force_scrape: bool = False
# --- Events ---
@app.on_event("startup")
def on_startup():
logger.info("Startup Event: Initializing Database...")
try:
init_db()
logger.info("Database initialized successfully.")
except Exception as e:
logger.critical(f"Database init failed: {e}", exc_info=True)
# --- Routes ---
@app.get("/api/health")
def health_check():
return {"status": "ok", "version": settings.VERSION, "db": settings.DATABASE_URL}
@app.get("/api/companies")
def list_companies(
skip: int = 0,
limit: int = 50,
search: Optional[str] = None,
sort_by: Optional[str] = Query("name_asc"),
db: Session = Depends(get_db)
):
try:
query = db.query(Company)
if search:
query = query.filter(Company.name.ilike(f"%{search}%"))
total = query.count()
if sort_by == "updated_desc":
query = query.order_by(Company.updated_at.desc())
elif sort_by == "created_desc":
query = query.order_by(Company.id.desc())
else: # Default: name_asc
query = query.order_by(Company.name.asc())
items = query.offset(skip).limit(limit).all()
return {"total": total, "items": items}
except Exception as e:
logger.error(f"List Companies Error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/companies/{company_id}")
def get_company(company_id: int, db: Session = Depends(get_db)):
company = db.query(Company).options(
joinedload(Company.enrichment_data),
joinedload(Company.contacts)
).filter(Company.id == company_id).first()
if not company:
raise HTTPException(404, detail="Company not found")
return company
@app.get("/api/robotics/categories")
def list_robotics_categories(db: Session = Depends(get_db)):
return db.query(RoboticsCategory).all()
@app.get("/api/industries")
def list_industries(db: Session = Depends(get_db)):
return db.query(Industry).all()
@app.post("/api/enrich/discover")
def discover_company(req: AnalysisRequest, background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
company = db.query(Company).filter(Company.id == req.company_id).first()
if not company: raise HTTPException(404, "Company not found")
background_tasks.add_task(run_discovery_task, company.id)
return {"status": "queued"}
@app.post("/api/enrich/analyze")
def analyze_company(req: AnalysisRequest, background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
company = db.query(Company).filter(Company.id == req.company_id).first()
if not company: raise HTTPException(404, "Company not found")
if not company.website or company.website == "k.A.":
return {"error": "No website to analyze. Run Discovery first."}
background_tasks.add_task(run_analysis_task, company.id)
return {"status": "queued"}
def run_discovery_task(company_id: int):
from .database import SessionLocal
db = SessionLocal()
try:
company = db.query(Company).filter(Company.id == company_id).first()
if not company: return
# 1. Website Search
if not company.website or company.website == "k.A.":
found_url = discovery.find_company_website(company.name, company.city)
if found_url and found_url != "k.A.":
company.website = found_url
# 2. Wikipedia Search
existing_wiki = db.query(EnrichmentData).filter(
EnrichmentData.company_id == company.id,
EnrichmentData.source_type == "wikipedia"
).first()
if not existing_wiki or not existing_wiki.is_locked:
wiki_url = discovery.find_wikipedia_url(company.name, website=company.website, city=company.city)
wiki_data = discovery.extract_wikipedia_data(wiki_url) if wiki_url and wiki_url != "k.A." else {"url": wiki_url}
if not existing_wiki:
db.add(EnrichmentData(company_id=company.id, source_type="wikipedia", content=wiki_data))
else:
existing_wiki.content = wiki_data
existing_wiki.updated_at = datetime.utcnow()
if company.status == "NEW" and company.website and company.website != "k.A.":
company.status = "DISCOVERED"
db.commit()
except Exception as e:
logger.error(f"Discovery Task Error: {e}", exc_info=True)
finally:
db.close()
def run_analysis_task(company_id: int):
from .database import SessionLocal
db = SessionLocal()
try:
company = db.query(Company).filter(Company.id == company_id).first()
if not company: return
logger.info(f"Running Analysis Task for {company.name}")
# 1. Scrape Website (if not locked)
existing_scrape = db.query(EnrichmentData).filter(
EnrichmentData.company_id == company.id,
EnrichmentData.source_type == "website_scrape"
).first()
if not existing_scrape or not existing_scrape.is_locked:
from .services.scraping import ScraperService
scrape_res = ScraperService().scrape_url(company.website)
if not existing_scrape:
db.add(EnrichmentData(company_id=company.id, source_type="website_scrape", content=scrape_res))
else:
existing_scrape.content = scrape_res
existing_scrape.updated_at = datetime.utcnow()
db.commit()
# 2. Classify Industry & Metrics
# IMPORTANT: Using the new method name and passing db session
classifier.classify_company_potential(company, db)
company.status = "ENRICHED"
db.commit()
logger.info(f"Analysis complete for {company.name}")
except Exception as e:
logger.error(f"Analyze Task Error: {e}", exc_info=True)
finally:
db.close()
# --- Serve Frontend ---
static_path = "/frontend_static"
if not os.path.exists(static_path):
static_path = os.path.join(os.path.dirname(__file__), "../static")
if os.path.exists(static_path):
app.mount("/", StaticFiles(directory=static_path, html=True), name="static")
if __name__ == "__main__":
import uvicorn
uvicorn.run("backend.app:app", host="0.0.0.0", port=8000, reload=True)