feat(app): Add wiki re-evaluation and fix wolfra bug

- Implemented a "Re-evaluate Wikipedia" button in the UI.

- Added a backend endpoint to trigger targeted Wikipedia metric extraction.

- Hardened the LLM metric extraction prompt to prevent hallucinations.

- Corrected several database path errors that caused data loss.

- Updated application version to 0.6.4 and documented the ongoing issue.
This commit is contained in:
2026-01-23 16:05:44 +00:00
parent d8665697b2
commit c5652fc9b5
7 changed files with 1427 additions and 791 deletions

View File

@@ -58,6 +58,9 @@ class AnalysisRequest(BaseModel):
company_id: int
force_scrape: bool = False
class IndustryUpdateModel(BaseModel):
industry_ai: str
# --- Events ---
@app.on_event("startup")
def on_startup():
@@ -137,6 +140,137 @@ def analyze_company(req: AnalysisRequest, background_tasks: BackgroundTasks, db:
background_tasks.add_task(run_analysis_task, company.id)
return {"status": "queued"}
@app.put("/api/companies/{company_id}/industry")
def update_company_industry(
company_id: int,
data: IndustryUpdateModel,
background_tasks: BackgroundTasks,
db: Session = Depends(get_db)
):
company = db.query(Company).filter(Company.id == company_id).first()
if not company:
raise HTTPException(404, detail="Company not found")
# 1. Update Industry
company.industry_ai = data.industry_ai
company.updated_at = datetime.utcnow()
db.commit()
# 2. Trigger Metric Re-extraction in Background
background_tasks.add_task(run_metric_reextraction_task, company.id)
return {"status": "updated", "industry_ai": company.industry_ai}
@app.post("/api/companies/{company_id}/reevaluate-wikipedia")
def reevaluate_wikipedia(company_id: int, background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
company = db.query(Company).filter(Company.id == company_id).first()
if not company:
raise HTTPException(404, detail="Company not found")
background_tasks.add_task(run_wikipedia_reevaluation_task, company.id)
return {"status": "queued"}
@app.delete("/api/companies/{company_id}")
def delete_company(company_id: int, db: Session = Depends(get_db)):
company = db.query(Company).filter(Company.id == company_id).first()
if not company:
raise HTTPException(404, detail="Company not found")
# Delete related data first (Cascade might handle this but being explicit is safer)
db.query(EnrichmentData).filter(EnrichmentData.company_id == company_id).delete()
db.query(Signal).filter(Signal.company_id == company_id).delete()
db.query(Contact).filter(Contact.company_id == company_id).delete()
db.delete(company)
db.commit()
return {"status": "deleted"}
@app.post("/api/companies/{company_id}/override/website")
def override_website(company_id: int, url: str, db: Session = Depends(get_db)):
company = db.query(Company).filter(Company.id == company_id).first()
if not company:
raise HTTPException(404, detail="Company not found")
company.website = url
company.updated_at = datetime.utcnow()
db.commit()
return {"status": "updated", "website": company.website}
@app.post("/api/companies/{company_id}/override/impressum")
def override_impressum(company_id: int, url: str, background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
company = db.query(Company).filter(Company.id == company_id).first()
if not company:
raise HTTPException(404, detail="Company not found")
# Create or update manual impressum lock
existing = db.query(EnrichmentData).filter(
EnrichmentData.company_id == company_id,
EnrichmentData.source_type == "impressum_override"
).first()
if not existing:
db.add(EnrichmentData(
company_id=company_id,
source_type="impressum_override",
content={"url": url},
is_locked=True
))
else:
existing.content = {"url": url}
existing.is_locked = True
db.commit()
return {"status": "updated"}
def run_wikipedia_reevaluation_task(company_id: int):
from .database import SessionLocal
db = SessionLocal()
try:
company = db.query(Company).filter(Company.id == company_id).first()
if not company: return
logger.info(f"Re-evaluating Wikipedia metric for {company.name} (Industry: {company.industry_ai})")
industry = db.query(Industry).filter(Industry.name == company.industry_ai).first()
if industry:
classifier.reevaluate_wikipedia_metric(company, db, industry)
logger.info(f"Wikipedia metric re-evaluation complete for {company.name}")
else:
logger.warning(f"Industry '{company.industry_ai}' not found for re-evaluation.")
except Exception as e:
logger.error(f"Wikipedia Re-evaluation Task Error: {e}", exc_info=True)
finally:
db.close()
def run_metric_reextraction_task(company_id: int):
from .database import SessionLocal
db = SessionLocal()
try:
company = db.query(Company).filter(Company.id == company_id).first()
if not company: return
logger.info(f"Re-extracting metrics for {company.name} (Industry: {company.industry_ai})")
industries = db.query(Industry).all()
industry = next((i for i in industries if i.name == company.industry_ai), None)
if industry:
classifier.extract_metrics_for_industry(company, db, industry)
company.status = "ENRICHED"
db.commit()
logger.info(f"Metric re-extraction complete for {company.name}")
else:
logger.warning(f"Industry '{company.industry_ai}' not found for re-extraction.")
except Exception as e:
logger.error(f"Metric Re-extraction Task Error: {e}", exc_info=True)
finally:
db.close()
def run_discovery_task(company_id: int):
from .database import SessionLocal
db = SessionLocal()