From d67245c50a34bb68ba9f35c95ff65d52c25335d4 Mon Sep 17 00:00:00 2001 From: Floke Date: Tue, 27 Jan 2026 09:00:20 +0000 Subject: [PATCH] feat(reporting): Implement 'Report Mistake' feature with API and UI [2f388f42] --- MIGRATION_PLAN.md | 38 ++++ company-explorer/backend/app.py | 131 ++++++++++- company-explorer/backend/database.py | 20 ++ .../backend/scripts/migrate_db.py | 20 ++ .../frontend/src/components/Inspector.tsx | 203 +++++++++++++++++- .../src/components/RoboticsSettings.tsx | 121 ++++++++++- 6 files changed, 520 insertions(+), 13 deletions(-) diff --git a/MIGRATION_PLAN.md b/MIGRATION_PLAN.md index 51cae1f4..8f0f08f6 100644 --- a/MIGRATION_PLAN.md +++ b/MIGRATION_PLAN.md @@ -29,12 +29,16 @@ Das System wird in `company-explorer/` neu aufgebaut. Wir lösen Abhängigkeiten | **Classification Service** | **NEU (v0.7.0).** Zweistufige Logik:
1. Strict Industry Classification.
2. Metric Extraction Cascade (Web -> Wiki -> SerpAPI). | 1 | | **Marketing Engine** | Ersetzt `generate_marketing_text.py`. Nutzt neue `marketing_wissen_robotics.yaml`. | 3 | +**Identifizierte Hauptdatei:** `company-explorer/backend/app.py` + ### B. Frontend (`frontend/`) - React * **View 1: Der "Explorer":** DataGrid aller Firmen. Filterbar nach "Roboter-Potential" und Status. * **View 2: Der "Inspector":** Detailansicht einer Firma. Zeigt gefundene Signale ("Hat SPA Bereich"). Manuelle Korrektur-Möglichkeit. + * **Identifizierte Komponente:** `company-explorer/frontend/src/components/Inspector.tsx` * **View 3: "List Matcher":** Upload einer Excel-Liste -> Anzeige von Duplikaten -> Button "Neue importieren". * **View 4: "Settings":** Konfiguration von Branchen, Rollen und Robotik-Logik. + * **Identifizierte Komponente:** `company-explorer/frontend/src/components/RoboticsSettings.tsx` ## 3. Umgang mit Shared Code (`helpers.py` & Co.) @@ -223,6 +227,40 @@ Wenn die `industries`-Tabelle in einer bestehenden Datenbank aktualisiert werden ## 12. Deployment & Access Notes +## 13. Task [2f388f42]: Report mistakes + +### Aufgabenbeschreibung: +When a user notices an error on an account, such as a wrong value he should have the option to mark this mistake (specify whihch value is wrong) and add a link to the source and a quote option as well as a comment why the user prefers the information above the information found by the llm in the first place. These corrections should be collected in a database for later review. The database will be displayed in the settings. + +The review shall happen by a sepcific checker-Process which should process the information and include the information into the research process to improve the search quality over time. + +### Detaillierter Plan: + +**Phase 1: Backend & Datenbank** +1. **Neue Datenbank-Tabelle:** Ich werde eine neue Tabelle `reported_mistakes` in der SQLite-Datenbank erstellen. Sie wird Spalten für die `company_id` (FK), den `field_name` (String), den `wrong_value` (Text), den `corrected_value` (Text), die `source_url` (String), das `quote` (Text), den `user_comment` (Text) und einen `status` (Enum: `PENDING`, `APPROVED`, `REJECTED`, Standard: `PENDING`) sowie `created_at` (Timestamp) und `updated_at` (Timestamp) enthalten. +2. **API-Endpunkt zum Melden (POST):** Ich erstelle einen neuen `POST /api/companies/{company_id}/report-mistake` Endpunkt in `company-explorer/backend/app.py`, der die gemeldeten Fehler entgegennimmt und in der neuen Tabelle speichert. +3. **API-Endpunkt zum Anzeigen (GET):** Ich füge einen `GET /api/mistakes` Endpunkt in `company-explorer/backend/app.py` hinzu, der alle gemeldeten Fehler (oder gefilterte nach Status) für die Anzeige auf der Einstellungsseite abruft. +4. **API-Endpunkt zum Aktualisieren (PUT):** Ich füge einen `PUT /api/mistakes/{mistake_id}` Endpunkt in `company-explorer/backend/app.py` hinzu, um den Status eines gemeldeten Fehlers (z.B. `APPROVED`, `REJECTED`) zu aktualisieren. + +**Phase 2: Frontend (React)** +5. **Benutzeroberfläche zum Melden:** In der `Inspector.tsx` Komponente (`company-explorer/frontend/src/components/Inspector.tsx`) werde ich neben den wichtigsten Datenfeldern ein kleines "Fehler melden"-Icon hinzufügen. Ein Klick darauf öffnet ein Modalfenster/Formular, in das der Benutzer die Korrekturinformationen (Feldname, falscher Wert, korrigierter Wert, URL, Zitat, Kommentar) eingeben kann. +6. **Anzeige in den Einstellungen:** Im Einstellungsbereich, wahrscheinlich in `RoboticsSettings.tsx` (`company-explorer/frontend/src/components/RoboticsSettings.tsx`), wird ein neuer Tab "Gemeldete Fehler" oder eine neue Sektion hinzugefügt. Dort wird eine Tabelle alle Einträge aus der `reported_mistakes`-Tabelle anzeigen, mit Optionen zum Filtern nach Status und zur Interaktion (z.B. Genehmigen/Ablehnen). + +**Phase 3: Prüfprozess & Ausblick** +7. **Manueller Prüf-Workflow:** Die Tabelle in den Einstellungen wird um "Genehmigen"- und "Ablehnen"-Buttons erweitert. Ein Prüfer kann damit den Status jeder Meldung aktualisieren. Dies wird über den `PUT /api/mistakes/{mistake_id}` Endpunkt umgesetzt. +8. **Konzept für die Zukunft:** Die gesammelten und genehmigten Korrekturen bilden die Grundlage für eine spätere, automatisierte Verbesserung. Dies könnte beinhalten: + * **LLM Fine-Tuning/Prompt-Verbesserung:** Genehmigte Korrekturen können als Beispiele für das Training oder die Kontextualisierung von LLM-Prompts verwendet werden, um die Genauigkeit der Datenextraktion zu verbessern. + * **Scraping-Regel-Anpassung:** Systematische Fehler, die durch gemeldete Fehler identifiziert werden, könnten zur Anpassung von Scraping-Regeln oder Parser-Logik führen. + * **Automatisierte Datenkorrektur:** Bei einer hohen Konfidenz könnten genehmigte Korrekturen direkt in die `companies`-Tabelle zurückgeschrieben werden. + +### Wichtige Erkenntnisse zur Umsetzung: +* **Backend-Hauptdatei:** `company-explorer/backend/app.py` +* **Frontend "Inspector" Komponente:** `company-explorer/frontend/src/components/Inspector.tsx` +* **Frontend "Settings" Komponente:** `company-explorer/frontend/src/components/RoboticsSettings.tsx` + +--- + + **Wichtiger Hinweis zum Deployment-Setup:** Dieses Projekt läuft in einer Docker-Compose-Umgebung, typischerweise auf einer Synology Diskstation. Der Zugriff auf die einzelnen Microservices erfolgt über einen zentralen Nginx-Reverse-Proxy (`proxy`-Service), der auf Port `8090` des Host-Systems lauscht. diff --git a/company-explorer/backend/app.py b/company-explorer/backend/app.py index d5f0ac06..8261a591 100644 --- a/company-explorer/backend/app.py +++ b/company-explorer/backend/app.py @@ -17,7 +17,7 @@ setup_logging() import logging logger = logging.getLogger(__name__) -from .database import init_db, get_db, Company, Signal, EnrichmentData, RoboticsCategory, Contact, Industry, JobRoleMapping +from .database import init_db, get_db, Company, Signal, EnrichmentData, RoboticsCategory, Contact, Industry, JobRoleMapping, ReportedMistake from .services.deduplication import Deduplicator from .services.discovery import DiscoveryService from .services.scraping import ScraperService @@ -61,6 +61,14 @@ class AnalysisRequest(BaseModel): class IndustryUpdateModel(BaseModel): industry_ai: str +class ReportMistakeRequest(BaseModel): + field_name: str + wrong_value: Optional[str] = None + corrected_value: Optional[str] = None + source_url: Optional[str] = None + quote: Optional[str] = None + user_comment: Optional[str] = None + # --- Events --- @app.on_event("startup") def on_startup(): @@ -240,6 +248,47 @@ def list_industries(db: Session = Depends(get_db)): def list_job_roles(db: Session = Depends(get_db)): return db.query(JobRoleMapping).order_by(JobRoleMapping.pattern.asc()).all() +@app.get("/api/mistakes") +def list_reported_mistakes( + status: Optional[str] = Query(None), + skip: int = 0, + limit: int = 50, + db: Session = Depends(get_db) +): + query = db.query(ReportedMistake).options(joinedload(ReportedMistake.company)) + + if status: + query = query.filter(ReportedMistake.status == status.upper()) + + total = query.count() + items = query.order_by(ReportedMistake.created_at.desc()).offset(skip).limit(limit).all() + + return {"total": total, "items": items} + +class MistakeUpdateStatusRequest(BaseModel): + status: str # PENDING, APPROVED, REJECTED + +@app.put("/api/mistakes/{mistake_id}") +def update_reported_mistake_status( + mistake_id: int, + request: MistakeUpdateStatusRequest, + db: Session = Depends(get_db) +): + mistake = db.query(ReportedMistake).filter(ReportedMistake.id == mistake_id).first() + if not mistake: + raise HTTPException(404, detail="Reported mistake not found") + + if request.status.upper() not in ["PENDING", "APPROVED", "REJECTED"]: + raise HTTPException(400, detail="Invalid status. Must be PENDING, APPROVED, or REJECTED.") + + mistake.status = request.status.upper() + mistake.updated_at = datetime.utcnow() + db.commit() + db.refresh(mistake) + + logger.info(f"Updated status for mistake {mistake_id} to {mistake.status}") + return {"status": "success", "mistake": mistake} + @app.post("/api/enrich/discover") def discover_company(req: AnalysisRequest, background_tasks: BackgroundTasks, db: Session = Depends(get_db)): company = db.query(Company).filter(Company.id == req.company_id).first() @@ -317,35 +366,115 @@ def override_website(company_id: int, url: str, db: Session = Depends(get_db)): return {"status": "updated", "website": company.website} @app.post("/api/companies/{company_id}/override/impressum") + def override_impressum(company_id: int, url: str, background_tasks: BackgroundTasks, db: Session = Depends(get_db)): + company = db.query(Company).filter(Company.id == company_id).first() + if not company: + raise HTTPException(404, detail="Company not found") + + # Create or update manual impressum lock + existing = db.query(EnrichmentData).filter( + EnrichmentData.company_id == company_id, + EnrichmentData.source_type == "impressum_override" + ).first() + + if not existing: + db.add(EnrichmentData( + company_id=company_id, + source_type="impressum_override", + content={"url": url}, + is_locked=True + )) + else: + existing.content = {"url": url} + existing.is_locked = True + + db.commit() + return {"status": "updated"} +@app.post("/api/companies/{company_id}/report-mistake") + +def report_company_mistake( + + company_id: int, + + request: ReportMistakeRequest, + + db: Session = Depends(get_db) + +): + + company = db.query(Company).filter(Company.id == company_id).first() + + if not company: + + raise HTTPException(404, detail="Company not found") + + + + new_mistake = ReportedMistake( + + company_id=company_id, + + field_name=request.field_name, + + wrong_value=request.wrong_value, + + corrected_value=request.corrected_value, + + source_url=request.source_url, + + quote=request.quote, + + user_comment=request.user_comment + + ) + + db.add(new_mistake) + + db.commit() + + db.refresh(new_mistake) + + + + logger.info(f"Reported mistake for company {company_id}: {request.field_name} -> {request.corrected_value}") + + return {"status": "success", "mistake_id": new_mistake.id} + + + + + def run_wikipedia_reevaluation_task(company_id: int): + from .database import SessionLocal + db = SessionLocal() try: company = db.query(Company).filter(Company.id == company_id).first() diff --git a/company-explorer/backend/database.py b/company-explorer/backend/database.py index 35929d1e..2940a0ce 100644 --- a/company-explorer/backend/database.py +++ b/company-explorer/backend/database.py @@ -58,6 +58,7 @@ class Company(Base): # Relationships signals = relationship("Signal", back_populates="company", cascade="all, delete-orphan") enrichment_data = relationship("EnrichmentData", back_populates="company", cascade="all, delete-orphan") + reported_mistakes = relationship("ReportedMistake", back_populates="company", cascade="all, delete-orphan") contacts = relationship("Contact", back_populates="company", cascade="all, delete-orphan") @@ -203,6 +204,25 @@ class ImportLog(Base): duplicate_rows = Column(Integer) created_at = Column(DateTime, default=datetime.utcnow) + +class ReportedMistake(Base): + __tablename__ = "reported_mistakes" + + id = Column(Integer, primary_key=True, index=True) + company_id = Column(Integer, ForeignKey("companies.id"), index=True, nullable=False) + field_name = Column(String, nullable=False) + wrong_value = Column(Text, nullable=True) + corrected_value = Column(Text, nullable=True) + source_url = Column(String, nullable=True) + quote = Column(Text, nullable=True) + user_comment = Column(Text, nullable=True) + status = Column(String, default="PENDING", nullable=False) # PENDING, APPROVED, REJECTED + created_at = Column(DateTime, default=datetime.utcnow) + updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + + company = relationship("Company", back_populates="reported_mistakes") + + # ============================================================================== # UTILS # ============================================================================== diff --git a/company-explorer/backend/scripts/migrate_db.py b/company-explorer/backend/scripts/migrate_db.py index 78cbad13..edab195f 100644 --- a/company-explorer/backend/scripts/migrate_db.py +++ b/company-explorer/backend/scripts/migrate_db.py @@ -69,6 +69,26 @@ def migrate_tables(): logger.info(f"Adding column '{col}' to 'companies' table...") cursor.execute(f"ALTER TABLE companies ADD COLUMN {col} {col_type}") + # 3. Create REPORTED_MISTAKES Table + logger.info("Checking 'reported_mistakes' table schema...") + cursor.execute(""" + CREATE TABLE IF NOT EXISTS reported_mistakes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + company_id INTEGER NOT NULL, + field_name TEXT NOT NULL, + wrong_value TEXT, + corrected_value TEXT, + source_url TEXT, + quote TEXT, + user_comment TEXT, + status TEXT NOT NULL DEFAULT 'PENDING', + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + updated_at DATETIME DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (company_id) REFERENCES companies (id) + ) + """) + logger.info("Table 'reported_mistakes' ensured to exist.") + conn.commit() logger.info("All migrations completed successfully.") diff --git a/company-explorer/frontend/src/components/Inspector.tsx b/company-explorer/frontend/src/components/Inspector.tsx index 3699201f..d0645218 100644 --- a/company-explorer/frontend/src/components/Inspector.tsx +++ b/company-explorer/frontend/src/components/Inspector.tsx @@ -1,6 +1,6 @@ import { useEffect, useState } from 'react' import axios from 'axios' -import { X, ExternalLink, Bot, Briefcase, Calendar, Globe, Users, DollarSign, MapPin, Tag, RefreshCw as RefreshCwIcon, Search as SearchIcon, Pencil, Check, Download, Clock, Lock, Unlock, Calculator, Ruler, Database, Trash2 } from 'lucide-react' +import { X, ExternalLink, Bot, Briefcase, Calendar, Globe, Users, DollarSign, MapPin, Tag, RefreshCw as RefreshCwIcon, Search as SearchIcon, Pencil, Check, Download, Clock, Lock, Unlock, Calculator, Ruler, Database, Trash2, Flag } from 'lucide-react' import clsx from 'clsx' import { ContactsManager, Contact } from './ContactsManager' @@ -54,6 +54,15 @@ export function Inspector({ companyId, initialContactId, onClose, apiBase }: Ins const [isProcessing, setIsProcessing] = useState(false) const [activeTab, setActiveTab] = useState<'overview' | 'contacts'>('overview') + // NEW: Report Mistake State + const [isReportingMistake, setIsReportingMistake] = useState(false) + const [reportedFieldName, setReportedFieldName] = useState("") + const [reportedWrongValue, setReportedWrongValue] = useState("") + const [reportedCorrectedValue, setReportedCorrectedValue] = useState("") + const [reportedSourceUrl, setReportedSourceUrl] = useState("") + const [reportedQuote, setReportedQuote] = useState("") + const [reportedComment, setReportedComment] = useState("") + // Polling Logic useEffect(() => { let interval: NodeJS.Timeout; @@ -297,6 +306,52 @@ export function Inspector({ companyId, initialContactId, onClose, apiBase }: Ins } } + // NEW: Interface for reporting mistakes + interface ReportedMistakeRequest { + field_name: string; + wrong_value?: string | null; + corrected_value?: string | null; + source_url?: string | null; + quote?: string | null; + user_comment?: string | null; + } + + const handleReportMistake = async () => { + if (!companyId) return; + if (!reportedFieldName) { + alert("Field Name is required."); + return; + } + + setIsProcessing(true); + try { + const payload: ReportedMistakeRequest = { + field_name: reportedFieldName, + wrong_value: reportedWrongValue || null, + corrected_value: reportedCorrectedValue || null, + source_url: reportedSourceUrl || null, + quote: reportedQuote || null, + user_comment: reportedComment || null, + }; + + await axios.post(`${apiBase}/companies/${companyId}/report-mistake`, payload); + alert("Mistake reported successfully!"); + setIsReportingMistake(false); + // Reset form fields + setReportedFieldName(""); + setReportedWrongValue(""); + setReportedCorrectedValue(""); + setReportedSourceUrl(""); + setReportedQuote(""); + setReportedComment(""); + } catch (e) { + alert("Failed to report mistake."); + console.error(e); + } finally { + setIsProcessing(false); + } + }; + const handleAddContact = async (contact: Contact) => { if (!companyId) return try { @@ -362,6 +417,13 @@ export function Inspector({ companyId, initialContactId, onClose, apiBase }: Ins > + + + + + + )} + + ) + } diff --git a/company-explorer/frontend/src/components/RoboticsSettings.tsx b/company-explorer/frontend/src/components/RoboticsSettings.tsx index 9cea4b87..3e90eb14 100644 --- a/company-explorer/frontend/src/components/RoboticsSettings.tsx +++ b/company-explorer/frontend/src/components/RoboticsSettings.tsx @@ -1,6 +1,6 @@ import { useEffect, useState } from 'react' import axios from 'axios' -import { X, Bot, Tag, Target, Users, Plus, Trash2, Save } from 'lucide-react' +import { X, Bot, Tag, Target, Users, Plus, Trash2, Save, Flag, Check, Ban, ExternalLink } from 'lucide-react' import clsx from 'clsx' interface RoboticsSettingsProps { @@ -9,27 +9,46 @@ interface RoboticsSettingsProps { apiBase: string } +type ReportedMistake = { + id: number; + company_id: number; + company: { name: string }; // Assuming company name is eagerly loaded + field_name: string; + wrong_value: string | null; + corrected_value: string | null; + source_url: string | null; + quote: string | null; + user_comment: string | null; + status: 'PENDING' | 'APPROVED' | 'REJECTED'; + created_at: string; + updated_at: string; +} + export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsProps) { - const [activeTab, setActiveTab] = useState<'robotics' | 'industries' | 'roles'>( - localStorage.getItem('roboticsSettingsActiveTab') as 'robotics' | 'industries' | 'roles' || 'robotics' + const [activeTab, setActiveTab] = useState<'robotics' | 'industries' | 'roles' | 'mistakes'>( + localStorage.getItem('roboticsSettingsActiveTab') as 'robotics' | 'industries' | 'roles' | 'mistakes' || 'robotics' ) const [roboticsCategories, setRoboticsCategories] = useState([]) const [industries, setIndustries] = useState([]) const [jobRoles, setJobRoles] = useState([]) + const [reportedMistakes, setReportedMistakes] = useState([]) + const [currentMistakeStatusFilter, setCurrentMistakeStatusFilter] = useState("PENDING"); const [isLoading, setIsLoading] = useState(false); const fetchAllData = async () => { setIsLoading(true); try { - const [resRobotics, resIndustries, resJobRoles] = await Promise.all([ + const [resRobotics, resIndustries, resJobRoles, resMistakes] = await Promise.all([ axios.get(`${apiBase}/robotics/categories`), axios.get(`${apiBase}/industries`), axios.get(`${apiBase}/job_roles`), + axios.get(`${apiBase}/mistakes?status=${currentMistakeStatusFilter}`), ]); setRoboticsCategories(resRobotics.data); setIndustries(resIndustries.data); setJobRoles(resJobRoles.data); + setReportedMistakes(resMistakes.data.items); } catch (e) { console.error("Failed to fetch settings data:", e); alert("Fehler beim Laden der Settings. Siehe Konsole."); @@ -62,6 +81,19 @@ export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsP } } + const handleUpdateMistakeStatus = async (mistakeId: number, newStatus: 'APPROVED' | 'REJECTED') => { + setIsLoading(true); + try { + await axios.put(`${apiBase}/mistakes/${mistakeId}`, { status: newStatus }); + fetchAllData(); // Refresh all data, including mistakes + } catch (e) { + alert("Failed to update mistake status"); + console.error(e); + } finally { + setIsLoading(false); + } + }; + const handleAddJobRole = async () => { setIsLoading(true); try { @@ -109,6 +141,7 @@ export function RoboticsSettings({ isOpen, onClose, apiBase }: RoboticsSettingsP { id: 'robotics', label: 'Robotics Potential', icon: Bot }, { id: 'industries', label: 'Industry Focus', icon: Target }, { id: 'roles', label: 'Job Role Mapping', icon: Users }, + { id: 'mistakes', label: 'Reported Mistakes', icon: Flag }, ].map(t => (