feat(app): Add wiki re-evaluation and fix wolfra bug
- Implemented a "Re-evaluate Wikipedia" button in the UI. - Added a backend endpoint to trigger targeted Wikipedia metric extraction. - Hardened the LLM metric extraction prompt to prevent hallucinations. - Corrected several database path errors that caused data loss. - Updated application version to 0.6.4 and documented the ongoing issue.
This commit is contained in:
@@ -58,6 +58,9 @@ class AnalysisRequest(BaseModel):
|
||||
company_id: int
|
||||
force_scrape: bool = False
|
||||
|
||||
class IndustryUpdateModel(BaseModel):
|
||||
industry_ai: str
|
||||
|
||||
# --- Events ---
|
||||
@app.on_event("startup")
|
||||
def on_startup():
|
||||
@@ -137,6 +140,137 @@ def analyze_company(req: AnalysisRequest, background_tasks: BackgroundTasks, db:
|
||||
background_tasks.add_task(run_analysis_task, company.id)
|
||||
return {"status": "queued"}
|
||||
|
||||
@app.put("/api/companies/{company_id}/industry")
|
||||
def update_company_industry(
|
||||
company_id: int,
|
||||
data: IndustryUpdateModel,
|
||||
background_tasks: BackgroundTasks,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
company = db.query(Company).filter(Company.id == company_id).first()
|
||||
if not company:
|
||||
raise HTTPException(404, detail="Company not found")
|
||||
|
||||
# 1. Update Industry
|
||||
company.industry_ai = data.industry_ai
|
||||
company.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
# 2. Trigger Metric Re-extraction in Background
|
||||
background_tasks.add_task(run_metric_reextraction_task, company.id)
|
||||
|
||||
return {"status": "updated", "industry_ai": company.industry_ai}
|
||||
|
||||
|
||||
@app.post("/api/companies/{company_id}/reevaluate-wikipedia")
|
||||
def reevaluate_wikipedia(company_id: int, background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
|
||||
company = db.query(Company).filter(Company.id == company_id).first()
|
||||
if not company:
|
||||
raise HTTPException(404, detail="Company not found")
|
||||
|
||||
background_tasks.add_task(run_wikipedia_reevaluation_task, company.id)
|
||||
return {"status": "queued"}
|
||||
|
||||
|
||||
@app.delete("/api/companies/{company_id}")
|
||||
def delete_company(company_id: int, db: Session = Depends(get_db)):
|
||||
company = db.query(Company).filter(Company.id == company_id).first()
|
||||
if not company:
|
||||
raise HTTPException(404, detail="Company not found")
|
||||
|
||||
# Delete related data first (Cascade might handle this but being explicit is safer)
|
||||
db.query(EnrichmentData).filter(EnrichmentData.company_id == company_id).delete()
|
||||
db.query(Signal).filter(Signal.company_id == company_id).delete()
|
||||
db.query(Contact).filter(Contact.company_id == company_id).delete()
|
||||
|
||||
db.delete(company)
|
||||
db.commit()
|
||||
return {"status": "deleted"}
|
||||
|
||||
@app.post("/api/companies/{company_id}/override/website")
|
||||
def override_website(company_id: int, url: str, db: Session = Depends(get_db)):
|
||||
company = db.query(Company).filter(Company.id == company_id).first()
|
||||
if not company:
|
||||
raise HTTPException(404, detail="Company not found")
|
||||
|
||||
company.website = url
|
||||
company.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
return {"status": "updated", "website": company.website}
|
||||
|
||||
@app.post("/api/companies/{company_id}/override/impressum")
|
||||
def override_impressum(company_id: int, url: str, background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
|
||||
company = db.query(Company).filter(Company.id == company_id).first()
|
||||
if not company:
|
||||
raise HTTPException(404, detail="Company not found")
|
||||
|
||||
# Create or update manual impressum lock
|
||||
existing = db.query(EnrichmentData).filter(
|
||||
EnrichmentData.company_id == company_id,
|
||||
EnrichmentData.source_type == "impressum_override"
|
||||
).first()
|
||||
|
||||
if not existing:
|
||||
db.add(EnrichmentData(
|
||||
company_id=company_id,
|
||||
source_type="impressum_override",
|
||||
content={"url": url},
|
||||
is_locked=True
|
||||
))
|
||||
else:
|
||||
existing.content = {"url": url}
|
||||
existing.is_locked = True
|
||||
|
||||
db.commit()
|
||||
return {"status": "updated"}
|
||||
|
||||
def run_wikipedia_reevaluation_task(company_id: int):
|
||||
from .database import SessionLocal
|
||||
db = SessionLocal()
|
||||
try:
|
||||
company = db.query(Company).filter(Company.id == company_id).first()
|
||||
if not company: return
|
||||
|
||||
logger.info(f"Re-evaluating Wikipedia metric for {company.name} (Industry: {company.industry_ai})")
|
||||
|
||||
industry = db.query(Industry).filter(Industry.name == company.industry_ai).first()
|
||||
|
||||
if industry:
|
||||
classifier.reevaluate_wikipedia_metric(company, db, industry)
|
||||
logger.info(f"Wikipedia metric re-evaluation complete for {company.name}")
|
||||
else:
|
||||
logger.warning(f"Industry '{company.industry_ai}' not found for re-evaluation.")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Wikipedia Re-evaluation Task Error: {e}", exc_info=True)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
def run_metric_reextraction_task(company_id: int):
|
||||
from .database import SessionLocal
|
||||
db = SessionLocal()
|
||||
try:
|
||||
company = db.query(Company).filter(Company.id == company_id).first()
|
||||
if not company: return
|
||||
|
||||
logger.info(f"Re-extracting metrics for {company.name} (Industry: {company.industry_ai})")
|
||||
|
||||
industries = db.query(Industry).all()
|
||||
industry = next((i for i in industries if i.name == company.industry_ai), None)
|
||||
|
||||
if industry:
|
||||
classifier.extract_metrics_for_industry(company, db, industry)
|
||||
company.status = "ENRICHED"
|
||||
db.commit()
|
||||
logger.info(f"Metric re-extraction complete for {company.name}")
|
||||
else:
|
||||
logger.warning(f"Industry '{company.industry_ai}' not found for re-extraction.")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Metric Re-extraction Task Error: {e}", exc_info=True)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
def run_discovery_task(company_id: int):
|
||||
from .database import SessionLocal
|
||||
db = SessionLocal()
|
||||
|
||||
Reference in New Issue
Block a user