From c5652fc9b56169047fe7715f5a05f0514135fc24 Mon Sep 17 00:00:00 2001 From: Floke Date: Fri, 23 Jan 2026 16:05:44 +0000 Subject: [PATCH] feat(app): Add wiki re-evaluation and fix wolfra bug - Implemented a "Re-evaluate Wikipedia" button in the UI. - Added a backend endpoint to trigger targeted Wikipedia metric extraction. - Hardened the LLM metric extraction prompt to prevent hallucinations. - Corrected several database path errors that caused data loss. - Updated application version to 0.6.4 and documented the ongoing issue. --- MIGRATION_PLAN.md | 32 + company-explorer/backend/app.py | 134 ++ company-explorer/backend/config.py | 23 +- company-explorer/backend/lib/metric_parser.py | 135 ++ .../backend/services/classification.py | 229 ++- company-explorer/frontend/src/App.tsx | 152 +- .../frontend/src/components/Inspector.tsx | 1513 +++++++++-------- 7 files changed, 1427 insertions(+), 791 deletions(-) create mode 100644 company-explorer/backend/lib/metric_parser.py diff --git a/MIGRATION_PLAN.md b/MIGRATION_PLAN.md index 2fcd0170..84bd9bf6 100644 --- a/MIGRATION_PLAN.md +++ b/MIGRATION_PLAN.md @@ -94,7 +94,39 @@ Wir kapseln das neue Projekt vollständig ab ("Fork & Clean"). ## 7. Historie & Fixes (Jan 2026) + * **[STABILITY] v0.7.2: Robust Metric Parsing (Jan 23, 2026)** + * **Legacy Logic Restored:** Re-implemented the robust, regex-based number parsing logic (formerly in legacy helpers) as `MetricParser`. + * **German Formats:** Correctly handles "1.000" (thousands) vs "1,5" (decimal) and mixed formats. + * **Citation Cleaning:** Filters out Wikipedia citations like `[3]` and years in parentheses (e.g. "80 (2020)" -> 80). + * **Hybrid Extraction:** The ClassificationService now asks the LLM for the *text segment* and parses the number deterministically, fixing the "1.005 -> 1" LLM hallucination. + + * **[ONGOING] v0.6.4: Wolfra Metric Extraction Bug (Jan 23, 2026)** + * **Problem:** Mitarbeiterzahl für "Wolfra Bayrische Natursaft Kelterei GmbH" wird fälschlicherweise als "802020" anstatt "80" ausgelesen. + * **Implementierte Maßnahmen:** + * "Wiki-Reevaluate-Button" im Frontend integriert (POST `/api/companies/{company_id}/reevaluate-wikipedia`). + * `reevaluate_wikipedia_metric`-Funktion im `ClassificationService` erstellt. + * Prompt für `_run_llm_metric_extraction_prompt` geschärft, um LLM zur Rückgabe von `raw_text_segment` zu zwingen. + * Datenbankpfad-Konfiguration in `company-explorer/backend/config.py` mehrfach korrigiert, um `unable to open database file` Fehler zu beheben. + * Fehler in `ClassificationService._get_wikipedia_content` behoben (`wiki_data.get('text')` zu `wiki_data.get('full_text')` geändert). + * **Aktueller Status:** Problem **nicht gelöst**. Trotz der Korrekturen zeigt das System immer noch falsche Werte an, und der Datenbankzugriff war mehrfach fehlerhaft, was zu Datenverlust führte. Weitere Diagnose ist erforderlich, um die genaue LLM-Antwort und den Datenfluss im Container zu überprüfen. + + * **[STABILITY] v0.7.1: AI Robustness & UI Fixes (Jan 21, 2026)** + * **SDK Stabilität:** Umstellung auf `gemini-2.0-flash` im Legacy-SDK zur Behebung von `404 Not Found` Fehlern bei `1.5-flash-latest`. + * **API-Key Management:** Implementierung eines robusten Ladevorgangs für den Google API Key (Fallback von Environment-Variable auf lokale Datei `/app/gemini_api_key.txt`). + * **Classification Prompt:** Schärfung des Prompts auf "Best-Fit"-Entscheidungen, um zu konservative "Others"-Einstufungen bei klaren Kandidaten (z.B. Thermen) zu vermeiden. + * **Frontend Rendering:** Fix eines UI-Crashs im Inspector. Metriken werden jetzt auch angezeigt, wenn nur der standardisierte Wert (Fläche) vorhanden ist. Null-Safety für `.toLocaleString()` hinzugefügt. + * **Scraping:** Wiederherstellung der Stabilität durch Entfernung fehlerhafter `trafilatura` Abhängigkeiten; Nutzung von `BeautifulSoup` als robustem Standard. + * **[MAJOR] v0.7.0: Quantitative Potential Analysis (Jan 20, 2026)** +... +... +## 11. Lessons Learned (Retrospektive Jan 21, 2026) + +1. **KI statt Regex für Zahlen:** Anstatt komplexe Python-Funktionen für deutsche Zahlenformate ("1,7 Mio.") zu schreiben, ist es stabiler, das LLM anzuweisen, den Wert direkt als Integer (1700000) zu liefern. +2. **Abhängigkeiten isolieren:** Änderungen an zentralen `core_utils.py` führen schnell zu Import-Fehlern in anderen Modulen. Spezifische Logik (wie Metrik-Parsing) sollte lokal im Service bleiben. +3. **UI Null-Safety:** Quantitative Daten sind oft unvollständig (z.B. Fläche vorhanden, aber Besucherzahl nicht). Das Frontend muss robust gegen `null`-Werte in den Metrik-Feldern sein, um den Render-Prozess nicht zu unterbrechen. +4. **SDK-Versionen:** Die Google-API ist in stetigem Wandel. Der explizite Rückgriff auf stabile Modelle wie `gemini-2.0-flash` ist im Legacy-SDK sicherer als die Nutzung von `-latest` Tags. + * **Zweistufige Analyse:** 1. **Strict Classification:** Ordnet Firmen einer Notion-Branche zu (oder "Others"). 2. **Metric Cascade:** Sucht gezielt nach der branchenspezifischen Metrik ("Scraper Search Term"). diff --git a/company-explorer/backend/app.py b/company-explorer/backend/app.py index 08e71da2..883c6d3a 100644 --- a/company-explorer/backend/app.py +++ b/company-explorer/backend/app.py @@ -58,6 +58,9 @@ class AnalysisRequest(BaseModel): company_id: int force_scrape: bool = False +class IndustryUpdateModel(BaseModel): + industry_ai: str + # --- Events --- @app.on_event("startup") def on_startup(): @@ -137,6 +140,137 @@ def analyze_company(req: AnalysisRequest, background_tasks: BackgroundTasks, db: background_tasks.add_task(run_analysis_task, company.id) return {"status": "queued"} +@app.put("/api/companies/{company_id}/industry") +def update_company_industry( + company_id: int, + data: IndustryUpdateModel, + background_tasks: BackgroundTasks, + db: Session = Depends(get_db) +): + company = db.query(Company).filter(Company.id == company_id).first() + if not company: + raise HTTPException(404, detail="Company not found") + + # 1. Update Industry + company.industry_ai = data.industry_ai + company.updated_at = datetime.utcnow() + db.commit() + + # 2. Trigger Metric Re-extraction in Background + background_tasks.add_task(run_metric_reextraction_task, company.id) + + return {"status": "updated", "industry_ai": company.industry_ai} + + +@app.post("/api/companies/{company_id}/reevaluate-wikipedia") +def reevaluate_wikipedia(company_id: int, background_tasks: BackgroundTasks, db: Session = Depends(get_db)): + company = db.query(Company).filter(Company.id == company_id).first() + if not company: + raise HTTPException(404, detail="Company not found") + + background_tasks.add_task(run_wikipedia_reevaluation_task, company.id) + return {"status": "queued"} + + +@app.delete("/api/companies/{company_id}") +def delete_company(company_id: int, db: Session = Depends(get_db)): + company = db.query(Company).filter(Company.id == company_id).first() + if not company: + raise HTTPException(404, detail="Company not found") + + # Delete related data first (Cascade might handle this but being explicit is safer) + db.query(EnrichmentData).filter(EnrichmentData.company_id == company_id).delete() + db.query(Signal).filter(Signal.company_id == company_id).delete() + db.query(Contact).filter(Contact.company_id == company_id).delete() + + db.delete(company) + db.commit() + return {"status": "deleted"} + +@app.post("/api/companies/{company_id}/override/website") +def override_website(company_id: int, url: str, db: Session = Depends(get_db)): + company = db.query(Company).filter(Company.id == company_id).first() + if not company: + raise HTTPException(404, detail="Company not found") + + company.website = url + company.updated_at = datetime.utcnow() + db.commit() + return {"status": "updated", "website": company.website} + +@app.post("/api/companies/{company_id}/override/impressum") +def override_impressum(company_id: int, url: str, background_tasks: BackgroundTasks, db: Session = Depends(get_db)): + company = db.query(Company).filter(Company.id == company_id).first() + if not company: + raise HTTPException(404, detail="Company not found") + + # Create or update manual impressum lock + existing = db.query(EnrichmentData).filter( + EnrichmentData.company_id == company_id, + EnrichmentData.source_type == "impressum_override" + ).first() + + if not existing: + db.add(EnrichmentData( + company_id=company_id, + source_type="impressum_override", + content={"url": url}, + is_locked=True + )) + else: + existing.content = {"url": url} + existing.is_locked = True + + db.commit() + return {"status": "updated"} + +def run_wikipedia_reevaluation_task(company_id: int): + from .database import SessionLocal + db = SessionLocal() + try: + company = db.query(Company).filter(Company.id == company_id).first() + if not company: return + + logger.info(f"Re-evaluating Wikipedia metric for {company.name} (Industry: {company.industry_ai})") + + industry = db.query(Industry).filter(Industry.name == company.industry_ai).first() + + if industry: + classifier.reevaluate_wikipedia_metric(company, db, industry) + logger.info(f"Wikipedia metric re-evaluation complete for {company.name}") + else: + logger.warning(f"Industry '{company.industry_ai}' not found for re-evaluation.") + + except Exception as e: + logger.error(f"Wikipedia Re-evaluation Task Error: {e}", exc_info=True) + finally: + db.close() + +def run_metric_reextraction_task(company_id: int): + from .database import SessionLocal + db = SessionLocal() + try: + company = db.query(Company).filter(Company.id == company_id).first() + if not company: return + + logger.info(f"Re-extracting metrics for {company.name} (Industry: {company.industry_ai})") + + industries = db.query(Industry).all() + industry = next((i for i in industries if i.name == company.industry_ai), None) + + if industry: + classifier.extract_metrics_for_industry(company, db, industry) + company.status = "ENRICHED" + db.commit() + logger.info(f"Metric re-extraction complete for {company.name}") + else: + logger.warning(f"Industry '{company.industry_ai}' not found for re-extraction.") + + except Exception as e: + logger.error(f"Metric Re-extraction Task Error: {e}", exc_info=True) + finally: + db.close() + def run_discovery_task(company_id: int): from .database import SessionLocal db = SessionLocal() diff --git a/company-explorer/backend/config.py b/company-explorer/backend/config.py index 5eb2076d..501a7e85 100644 --- a/company-explorer/backend/config.py +++ b/company-explorer/backend/config.py @@ -10,10 +10,10 @@ try: class Settings(BaseSettings): # App Info APP_NAME: str = "Company Explorer" - VERSION: str = "0.7.0" + VERSION: str = "0.6.4" DEBUG: bool = True - # Database (Store in App dir for simplicity) + # Database (FINAL CORRECT PATH for Docker Container) DATABASE_URL: str = "sqlite:////app/companies_v3_fixed_2.db" # API Keys @@ -32,20 +32,25 @@ try: except ImportError: # Fallback wenn pydantic-settings nicht installiert ist - class Settings: + class FallbackSettings: APP_NAME = "Company Explorer" - VERSION = "0.2.1" + VERSION = "0.6.4" DEBUG = True - DATABASE_URL = "sqlite:////app/logs_debug/companies_debug.db" + DATABASE_URL = "sqlite:////app/companies_v3_fixed_2.db" # FINAL CORRECT PATH GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") SERP_API_KEY = os.getenv("SERP_API_KEY") LOG_DIR = "/app/logs_debug" - settings = Settings() + settings = FallbackSettings() # Ensure Log Dir -os.makedirs(settings.LOG_DIR, exist_ok=True) +try: + os.makedirs(settings.LOG_DIR, exist_ok=True) +except FileExistsError: + pass +except Exception as e: + logging.warning(f"Could not create log directory {settings.LOG_DIR}: {e}") # API Key Loading Helper (from file if env missing) def load_api_key_from_file(filename: str) -> Optional[str]: @@ -54,10 +59,10 @@ def load_api_key_from_file(filename: str) -> Optional[str]: with open(filename, 'r') as f: return f.read().strip() except Exception as e: - print(f"Could not load key from {filename}: {e}") # Print because logging might not be ready + logging.warning(f"Could not load key from {filename}: {e}") return None -# Auto-load keys if not in env +# Auto-load keys assuming the app runs in the Docker container's /app context if not settings.GEMINI_API_KEY: settings.GEMINI_API_KEY = load_api_key_from_file("/app/gemini_api_key.txt") diff --git a/company-explorer/backend/lib/metric_parser.py b/company-explorer/backend/lib/metric_parser.py new file mode 100644 index 00000000..d20bb74f --- /dev/null +++ b/company-explorer/backend/lib/metric_parser.py @@ -0,0 +1,135 @@ +import re +import logging +from typing import Optional, Union + +logger = logging.getLogger(__name__) + +class MetricParser: + """ + Robust parser for extracting numeric values from text, specialized for + German formats and business metrics (Revenue, Employees). + Reconstructs legacy logic to handle thousands separators and year-suffixes. + """ + + @staticmethod + def extract_numeric_value(text: str, is_revenue: bool = False) -> Optional[float]: + """ + Extracts a float value from a string, handling German locale and suffixes. + + Args: + text: The raw text containing the number (e.g. "1.005 Mitarbeiter (2020)"). + is_revenue: If True, prioritizes currency logic (e.g. handling "Mio"). + + Returns: + The parsed float value or None if no valid number found. + """ + if not text: + return None + + # 1. Cleaning: Remove Citations [1], [note 2] + clean_text = re.sub(r'\[.*?\]', '', text) + + # 2. Cleaning: Remove Year/Date in parentheses to prevent "80 (2020)" -> 802020 + # Matches (2020), (Stand 2021), (31.12.2022), etc. + # We replace them with space to avoid merging numbers. + clean_text = re.sub(r'\(\s*(?:Stand\s*|ab\s*)?(?:19|20)\d{2}.*?\)', ' ', clean_text) + + # 3. Identify Multipliers (Mio, Mrd) + multiplier = 1.0 + lower_text = clean_text.lower().replace('.', '') # Remove dots for word matching (e.g. "Mio." -> "mio") + + if any(x in lower_text for x in ['mrd', 'milliarde', 'billion']): # German Billion = 10^12? Usually in business context here Mrd=10^9 + multiplier = 1_000_000_000.0 + elif any(x in lower_text for x in ['mio', 'million']): + multiplier = 1_000_000.0 + + # 4. Extract the number candidate + # We look for the FIRST pattern that looks like a number. + # Must contain at least one digit. + # We iterate over matches to skip pure punctuation like "..." + matches = re.finditer(r'[\d\.,]+', clean_text) + + for match in matches: + candidate = match.group(0) + # Check if it actually has a digit + if not re.search(r'\d', candidate): + continue + + # Clean trailing/leading punctuation (e.g. "80." -> "80") + candidate = candidate.strip('.,') + if not candidate: + continue + + try: + val = MetricParser._parse_german_number_string(candidate) + return val * multiplier + except Exception as e: + # If this candidate fails (e.g. "1.2.3.4"), try the next one? + # For now, let's assume the first valid-looking number sequence is the target. + # But "Wolfra ... 80" -> "..." skipped. "80" matched. + # "1.005 Mitarbeiter" -> "1.005" matched. + logger.debug(f"Failed to parse number string '{candidate}': {e}") + continue + + return None + + @staticmethod + def _parse_german_number_string(s: str) -> float: + """ + Parses a number string dealing with ambiguous separators. + Logic based on Lessons Learned: + - "1.005" -> 1005.0 (Dot followed by exactly 3 digits = Thousands) + - "1,5" -> 1.5 (Comma = Decimal) + - "1.234,56" -> 1234.56 + """ + # Count separators + dots = s.count('.') + commas = s.count(',') + + # Case 1: No separators + if dots == 0 and commas == 0: + return float(s) + + # Case 2: Mixed separators (Standard German: 1.000.000,00) + if dots > 0 and commas > 0: + # Assume . is thousands, , is decimal + s = s.replace('.', '').replace(',', '.') + return float(s) + + # Case 3: Only Dots + if dots > 0: + # Ambiguity: "1.005" (1005) vs "1.5" (1.5) + # Rule: If dot is followed by EXACTLY 3 digits (and it's the last dot or multiple dots), likely thousands. + # But "1.500" is 1500. "1.5" is 1.5. + + # Split by dot + parts = s.split('.') + + # Check if all parts AFTER the first one have exactly 3 digits + # E.g. 1.000.000 -> parts=["1", "000", "000"] -> OK -> Thousands + # 1.5 -> parts=["1", "5"] -> "5" len is 1 -> Decimal + + all_segments_are_3_digits = all(len(p) == 3 for p in parts[1:]) + + if all_segments_are_3_digits: + # Treat as thousands separator + return float(s.replace('.', '')) + else: + # Treat as decimal (US format or simple float) + # But wait, German uses comma for decimal. + # If we are parsing strict German text, "1.5" might be invalid or actually mean 1st May? + # Usually in Wikipedia DE: "1.5 Mio" -> 1.5 Million. + # So if it's NOT 3 digits, it's likely a decimal point (US style or just typo/format variation). + # User Rule: "1.005" -> 1005. + return float(s) # Python handles 1.5 correctly + + # Case 4: Only Commas + if commas > 0: + # German Decimal: "1,5" -> 1.5 + # Or English Thousands: "1,000" -> 1000? + # User context is German Wikipedia ("Mitarbeiter", "Umsatz"). + # Assumption: Comma is ALWAYS decimal in this context, UNLESS followed by 3 digits AND likely English? + # Safer bet for German data: Comma is decimal. + return float(s.replace(',', '.')) + + return float(s) diff --git a/company-explorer/backend/services/classification.py b/company-explorer/backend/services/classification.py index a80fed41..61529162 100644 --- a/company-explorer/backend/services/classification.py +++ b/company-explorer/backend/services/classification.py @@ -1,6 +1,7 @@ import json import logging import re +from datetime import datetime from typing import Optional, Dict, Any, List from sqlalchemy.orm import Session @@ -8,6 +9,7 @@ from sqlalchemy.orm import Session from backend.database import Company, Industry, RoboticsCategory, EnrichmentData from backend.lib.core_utils import call_gemini_flash, safe_eval_math, run_serp_search from backend.services.scraping import scrape_website_content +from backend.lib.metric_parser import MetricParser logger = logging.getLogger(__name__) @@ -32,7 +34,7 @@ class ClassificationService: if enrichment and enrichment.content: wiki_data = enrichment.content - return wiki_data.get('text') + return wiki_data.get('full_text') return None def _run_llm_classification_prompt(self, website_text: str, company_name: str, industry_definitions: List[Dict[str, str]]) -> Optional[str]: @@ -75,27 +77,33 @@ class ClassificationService: def _run_llm_metric_extraction_prompt(self, text_content: str, search_term: str, industry_name: str) -> Optional[Dict[str, Any]]: """ Uses LLM to extract the specific metric value from text. + Updated to look specifically for area (m²) even if not the primary search term. """ prompt = r""" - Du bist ein Datenextraktions-Spezialist. - Analysiere den folgenden Text, um spezifische Metrik-Informationen zu extrahieren. + Du bist ein Datenextraktions-Spezialist für Unternehmens-Kennzahlen. + Analysiere den folgenden Text, um spezifische Werte zu extrahieren. --- KONTEXT --- - Unternehmen ist in der Branche: {industry_name} - Gesuchter Wert (Rohdaten): '{search_term}' + Branche: {industry_name} + Primär gesuchte Metrik: '{search_term}' --- TEXT --- {text_content_excerpt} --- AUFGABE --- - 1. Finde den numerischen Wert für '{search_term}'. - 2. Versuche auch, eine explizit genannte Gesamtfläche in Quadratmetern (m²) zu finden, falls relevant und vorhanden. + 1. Finde den numerischen Wert für die primäre Metrik '{search_term}'. + 2. EXTREM WICHTIG: Suche im gesamten Text nach einer Angabe zur Gesamtfläche, Nutzfläche, Grundstücksfläche oder Verkaufsfläche in Quadratmetern (m²). + In Branchen wie Freizeitparks, Flughäfen oder Thermen ist dies oft separat im Fließtext versteckt (z.B. "Die Therme verfügt über eine Gesamtfläche von 4.000 m²"). + 3. Achte auf deutsche Zahlenformate (z.B. 1.005 für tausend-fünf). + 4. Regel: Extrahiere IMMER den umgebenden Satz oder die Zeile in 'raw_text_segment'. Rate NIEMALS einen numerischen Wert, ohne den Beweis dafür zu liefern. Gib NUR ein JSON-Objekt zurück: - 'raw_value': Der gefundene numerische Wert für '{search_term}' (als Zahl). null, falls nicht gefunden. - 'raw_unit': Die Einheit des raw_value (z.B. "Betten", "Stellplätze"). null, falls nicht gefunden. - 'area_value': Ein gefundener numerischer Wert für eine Gesamtfläche in m² (als Zahl). null, falls nicht gefunden. - 'metric_name': Der Name der Metrik, nach der gesucht wurde (also '{search_term}'). + 'raw_text_segment': Das Snippet für '{search_term}' (z.B. "ca. 1.500 Besucher (2020)"). MUSS IMMER AUSGEFÜLLT SEIN WENN EIN WERT GEFUNDEN WURDE. + 'raw_value': Der numerische Wert für '{search_term}'. null, falls nicht gefunden. + 'raw_unit': Die Einheit (z.B. "Besucher", "Passagiere"). null, falls nicht gefunden. + 'area_text_segment': Das Snippet, das eine Fläche (m²) erwähnt (z.B. "4.000 m² Gesamtfläche"). null, falls nicht gefunden. + 'area_value': Der gefundene Wert der Fläche in m² (als Zahl). null, falls nicht gefunden. + 'metric_name': '{search_term}'. """.format( industry_name=industry_name, search_term=search_term, @@ -112,10 +120,20 @@ class ClassificationService: def _parse_standardization_logic(self, formula: str, raw_value: float) -> Optional[float]: if not formula or raw_value is None: return None + + # Clean formula: Replace 'wert'/'Value' and strip area units like m² or alphanumeric noise + # that Notion sync might bring in (e.g. "wert * 25m2" -> "wert * 25") formula_cleaned = formula.replace("wert", str(raw_value)).replace("Value", str(raw_value)) + + # Remove common unit strings and non-math characters (except dots and parentheses) + formula_cleaned = re.sub(r'(?i)m[²2]', '', formula_cleaned) + formula_cleaned = re.sub(r'(?i)qm', '', formula_cleaned) + + # We leave the final safety check to safe_eval_math try: return safe_eval_math(formula_cleaned) - except: + except Exception as e: + logger.error(f"Failed to parse standardization logic '{formula}' with value {raw_value}: {e}") return None def _extract_and_calculate_metric_cascade( @@ -147,18 +165,52 @@ class ClassificationService: logger.info(f"Checking {source_name} for '{search_term}' for {company.name}") try: content = content_loader() + print(f"--- DEBUG: Content length for {source_name}: {len(content) if content else 0}") if not content: continue llm_result = self._run_llm_metric_extraction_prompt(content, search_term, industry_name) - if llm_result and (llm_result.get("raw_value") is not None or llm_result.get("area_value") is not None): - results["calculated_metric_value"] = llm_result.get("raw_value") + print(f"--- DEBUG: LLM Result for {source_name}: {llm_result}") + + is_revenue = "umsatz" in search_term.lower() or "revenue" in search_term.lower() + + # Hybrid Extraction Logic: + # 1. Try to parse from the text segment using our robust Python parser (prioritized for German formats) + parsed_value = None + if llm_result and llm_result.get("raw_text_segment"): + parsed_value = MetricParser.extract_numeric_value(llm_result["raw_text_segment"], is_revenue=is_revenue) + if parsed_value is not None: + logger.info(f"Successfully parsed '{llm_result['raw_text_segment']}' to {parsed_value} using MetricParser.") + + # 2. Fallback to LLM's raw_value if parser failed or no segment found + # NEW: Also run MetricParser on the raw_value if it's a string, to catch errors like "802020" + final_value = parsed_value + if final_value is None and llm_result.get("raw_value"): + final_value = MetricParser.extract_numeric_value(str(llm_result["raw_value"]), is_revenue=is_revenue) + if final_value is not None: + logger.info(f"Successfully cleaned LLM raw_value '{llm_result['raw_value']}' to {final_value}") + + # Ultimate fallback to original raw_value if still None (though parser is very robust) + if final_value is None: + final_value = llm_result.get("raw_value") + + if llm_result and (final_value is not None or llm_result.get("area_value") is not None or llm_result.get("area_text_segment")): + results["calculated_metric_value"] = final_value results["calculated_metric_unit"] = llm_result.get("raw_unit") results["metric_source"] = source_name - if llm_result.get("area_value") is not None: - results["standardized_metric_value"] = llm_result.get("area_value") - elif llm_result.get("raw_value") is not None and standardization_logic: - results["standardized_metric_value"] = self._parse_standardization_logic(standardization_logic, llm_result["raw_value"]) + # 3. Area Extraction Logic (Cascading) + area_val = llm_result.get("area_value") + # Try to refine area_value if a segment exists + if llm_result.get("area_text_segment"): + refined_area = MetricParser.extract_numeric_value(llm_result["area_text_segment"], is_revenue=False) + if refined_area is not None: + area_val = refined_area + logger.info(f"Refined area to {area_val} from segment '{llm_result['area_text_segment']}'") + + if area_val is not None: + results["standardized_metric_value"] = area_val + elif final_value is not None and standardization_logic: + results["standardized_metric_value"] = self._parse_standardization_logic(standardization_logic, final_value) return results except Exception as e: @@ -166,41 +218,136 @@ class ClassificationService: return results + def extract_metrics_for_industry(self, company: Company, db: Session, industry: Industry) -> Company: + """ + Extracts and calculates metrics for a given industry. + Splits out from classify_company_potential to allow manual overrides. + """ + if not industry or not industry.scraper_search_term: + logger.warning(f"No metric configuration for industry '{industry.name if industry else 'None'}'") + return company + + # Derive standardized unit + std_unit = "m²" if "m²" in (industry.standardization_logic or "") else "Einheiten" + + metrics = self._extract_and_calculate_metric_cascade( + db, company, industry.name, industry.scraper_search_term, industry.standardization_logic, std_unit + ) + + company.calculated_metric_name = metrics["calculated_metric_name"] + company.calculated_metric_value = metrics["calculated_metric_value"] + company.calculated_metric_unit = metrics["calculated_metric_unit"] + company.standardized_metric_value = metrics["standardized_metric_value"] + company.standardized_metric_unit = metrics["standardized_metric_unit"] + company.metric_source = metrics["metric_source"] + + # Keep track of refinement + company.last_classification_at = datetime.utcnow() + db.commit() + return company + + def reevaluate_wikipedia_metric(self, company: Company, db: Session, industry: Industry) -> Company: + """ + Runs the metric extraction cascade for ONLY the Wikipedia source. + """ + logger.info(f"Starting Wikipedia re-evaluation for '{company.name}'") + if not industry or not industry.scraper_search_term: + logger.warning(f"Cannot re-evaluate: No metric configuration for industry '{industry.name}'") + return company + + search_term = industry.scraper_search_term + content = self._get_wikipedia_content(db, company.id) + + if not content: + logger.warning("No Wikipedia content found to re-evaluate.") + return company + + try: + llm_result = self._run_llm_metric_extraction_prompt(content, search_term, industry.name) + if not llm_result: + raise ValueError("LLM metric extraction returned empty result.") + + is_revenue = "umsatz" in search_term.lower() or "revenue" in search_term.lower() + + # Hybrid Extraction Logic (same as in cascade) + parsed_value = None + if llm_result.get("raw_text_segment"): + parsed_value = MetricParser.extract_numeric_value(llm_result["raw_text_segment"], is_revenue=is_revenue) + if parsed_value is not None: + logger.info(f"Successfully parsed '{llm_result['raw_text_segment']}' to {parsed_value} using MetricParser.") + + final_value = parsed_value + if final_value is None and llm_result.get("raw_value"): + final_value = MetricParser.extract_numeric_value(str(llm_result["raw_value"]), is_revenue=is_revenue) + if final_value is not None: + logger.info(f"Successfully cleaned LLM raw_value '{llm_result['raw_value']}' to {final_value}") + + if final_value is None: + final_value = llm_result.get("raw_value") + + # Update company metrics if a value was found + if final_value is not None: + company.calculated_metric_name = search_term + company.calculated_metric_value = final_value + company.calculated_metric_unit = llm_result.get("raw_unit") + company.metric_source = "wikipedia_reevaluated" + + # Handle standardization + std_unit = "m²" if "m²" in (industry.standardization_logic or "") else "Einheiten" + company.standardized_metric_unit = std_unit + + area_val = llm_result.get("area_value") + if llm_result.get("area_text_segment"): + refined_area = MetricParser.extract_numeric_value(llm_result["area_text_segment"], is_revenue=False) + if refined_area is not None: + area_val = refined_area + + if area_val is not None: + company.standardized_metric_value = area_val + elif industry.standardization_logic: + company.standardized_metric_value = self._parse_standardization_logic(industry.standardization_logic, final_value) + else: + company.standardized_metric_value = None + + company.last_classification_at = datetime.utcnow() + db.commit() + logger.info(f"Successfully re-evaluated and updated metrics for {company.name} from Wikipedia.") + else: + logger.warning(f"Re-evaluation for {company.name} did not yield a metric value.") + + except Exception as e: + logger.error(f"Error during Wikipedia re-evaluation for {company.name}: {e}") + + return company + def classify_company_potential(self, company: Company, db: Session) -> Company: - logger.info(f"Starting classification for {company.name}") + logger.info(f"Starting complete classification for {company.name}") # 1. Load Industries industries = self._load_industry_definitions(db) industry_defs = [{"name": i.name, "description": i.description} for i in industries] - # 2. Industry Classification - website_content = scrape_website_content(company.website) - if website_content: - industry_name = self._run_llm_classification_prompt(website_content, company.name, industry_defs) - company.industry_ai = industry_name if industry_name in [i.name for i in industries] else "Others" + # 2. Industry Classification (Website-based) + # STRENG: Nur wenn Branche noch auf "Others" steht oder neu ist, darf die KI klassifizieren + valid_industry_names = [i.name for i in industries] + if company.industry_ai and company.industry_ai != "Others" and company.industry_ai in valid_industry_names: + logger.info(f"KEEPING manual/existing industry '{company.industry_ai}' for {company.name}") else: - company.industry_ai = "Others" + website_content = scrape_website_content(company.website) + if website_content: + industry_name = self._run_llm_classification_prompt(website_content, company.name, industry_defs) + company.industry_ai = industry_name if industry_name in valid_industry_names else "Others" + logger.info(f"AI CLASSIFIED {company.name} as '{company.industry_ai}'") + else: + company.industry_ai = "Others" + logger.warning(f"No website content for {company.name}, setting industry to Others") db.commit() # 3. Metric Extraction if company.industry_ai != "Others": industry = next((i for i in industries if i.name == company.industry_ai), None) - if industry and industry.scraper_search_term: - # Derive standardized unit - std_unit = "m²" if "m²" in (industry.standardization_logic or "") else "Einheiten" - - metrics = self._extract_and_calculate_metric_cascade( - db, company, company.industry_ai, industry.scraper_search_term, industry.standardization_logic, std_unit - ) - - company.calculated_metric_name = metrics["calculated_metric_name"] - company.calculated_metric_value = metrics["calculated_metric_value"] - company.calculated_metric_unit = metrics["calculated_metric_unit"] - company.standardized_metric_value = metrics["standardized_metric_value"] - company.standardized_metric_unit = metrics["standardized_metric_unit"] - company.metric_source = metrics["metric_source"] + if industry: + self.extract_metrics_for_industry(company, db, industry) - company.last_classification_at = datetime.utcnow() - db.commit() return company diff --git a/company-explorer/frontend/src/App.tsx b/company-explorer/frontend/src/App.tsx index 442f3de6..ef901cea 100644 --- a/company-explorer/frontend/src/App.tsx +++ b/company-explorer/frontend/src/App.tsx @@ -16,27 +16,35 @@ function App() { const [isSettingsOpen, setIsSettingsOpen] = useState(false) const [selectedCompanyId, setSelectedCompanyId] = useState(null) const [selectedContactId, setSelectedContactId] = useState(null) - + const [backendVersion, setBackendVersion] = useState(''); + // Navigation State const [view, setView] = useState<'companies' | 'contacts'>('companies') - + // Theme State const [theme, setTheme] = useState<'dark' | 'light'>(() => { - if (typeof window !== 'undefined' && window.localStorage) { - return localStorage.getItem('theme') as 'dark' | 'light' || 'dark' - } - return 'dark' + if (typeof window !== 'undefined' && window.localStorage) { + return localStorage.getItem('theme') as 'dark' | 'light' || 'dark' + } + return 'dark' }) useEffect(() => { - if (theme === 'dark') { - document.documentElement.classList.add('dark') - } else { - document.documentElement.classList.remove('dark') - } - localStorage.setItem('theme', theme) + if (theme === 'dark') { + document.documentElement.classList.add('dark') + } else { + document.documentElement.classList.remove('dark') + } + localStorage.setItem('theme', theme) }, [theme]) + useEffect(() => { + fetch(`${API_BASE}/health`) + .then(res => res.json()) + .then(data => setBackendVersion(data.version || '')) + .catch(() => setBackendVersion('N/A')) + }, []) + const toggleTheme = () => setTheme(prev => prev === 'dark' ? 'light' : 'dark') const handleCompanySelect = (id: number) => { @@ -51,22 +59,22 @@ function App() { return (
- setIsImportOpen(false)} + setIsImportOpen(false)} apiBase={API_BASE} onSuccess={() => setRefreshKey(k => k + 1)} /> - + setIsSettingsOpen(false)} apiBase={API_BASE} /> - @@ -80,38 +88,38 @@ function App() {

Company Explorer

-

ROBOTICS EDITION v0.6.1

+

ROBOTICS EDITION {backendVersion && v{backendVersion}}

- {/* View Switcher */} + {/* View Switcher */}
- - + +
- - - - - + {view === 'companies' && ( - + )}
- + {/* Mobile Nav */}
- - + +
{/* Main Content */}
- +
- {view === 'companies' ? ( - setIsImportOpen(true)} - /> - ) : ( - { setSelectedCompanyId(id); setView('companies'); }} - onContactClick={(companyId, contactId) => { - setSelectedCompanyId(companyId); - setSelectedContactId(contactId); - // setView('companies')? No, we stay in context of 'Contacts' but Inspector opens - }} - /> - )} + {view === 'companies' ? ( + setIsImportOpen(true)} + /> + ) : ( + { setSelectedCompanyId(id); setView('companies'); }} + onContactClick={(companyId, contactId) => { + setSelectedCompanyId(companyId); + setSelectedContactId(contactId); + // setView('companies')? No, we stay in context of 'Contacts' but Inspector opens + }} + /> + )}
diff --git a/company-explorer/frontend/src/components/Inspector.tsx b/company-explorer/frontend/src/components/Inspector.tsx index e1ce63ec..1a80fb3f 100644 --- a/company-explorer/frontend/src/components/Inspector.tsx +++ b/company-explorer/frontend/src/components/Inspector.tsx @@ -1,6 +1,6 @@ import { useEffect, useState } from 'react' import axios from 'axios' -import { X, ExternalLink, Bot, Briefcase, Calendar, Globe, Users, DollarSign, MapPin, Tag, RefreshCw as RefreshCwIcon, Search as SearchIcon, Pencil, Check, Download, Clock, Lock, Unlock } from 'lucide-react' +import { X, ExternalLink, Bot, Briefcase, Calendar, Globe, Users, DollarSign, MapPin, Tag, RefreshCw as RefreshCwIcon, Search as SearchIcon, Pencil, Check, Download, Clock, Lock, Unlock, Calculator, Ruler, Database, Trash2 } from 'lucide-react' import clsx from 'clsx' import { ContactsManager, Contact } from './ContactsManager' @@ -35,6 +35,14 @@ type CompanyDetail = { signals: Signal[] enrichment_data: EnrichmentData[] contacts?: Contact[] + // NEU v0.7.0: Quantitative Metrics + calculated_metric_name: string | null + calculated_metric_value: number | null + calculated_metric_unit: string | null + standardized_metric_value: number | null + standardized_metric_unit: string | null + metric_source: string | null + metric_source_url: string | null } export function Inspector({ companyId, initialContactId, onClose, apiBase }: InspectorProps) { @@ -42,25 +50,25 @@ export function Inspector({ companyId, initialContactId, onClose, apiBase }: Ins const [loading, setLoading] = useState(false) const [isProcessing, setIsProcessing] = useState(false) const [activeTab, setActiveTab] = useState<'overview' | 'contacts'>('overview') - + // Polling Logic useEffect(() => { - let interval: NodeJS.Timeout; - if (isProcessing) { - interval = setInterval(() => { - fetchData(true) // Silent fetch - }, 2000) - } - return () => clearInterval(interval) + let interval: NodeJS.Timeout; + if (isProcessing) { + interval = setInterval(() => { + fetchData(true) // Silent fetch + }, 2000) + } + return () => clearInterval(interval) }, [isProcessing, companyId]) // Dependencies // Auto-switch to contacts tab if initialContactId is present useEffect(() => { - if (initialContactId) { - setActiveTab('contacts') - } else { - setActiveTab('overview') - } + if (initialContactId) { + setActiveTab('contacts') + } else { + setActiveTab('overview') + } }, [initialContactId, companyId]) // Manual Override State @@ -71,25 +79,31 @@ export function Inspector({ companyId, initialContactId, onClose, apiBase }: Ins const [isEditingImpressum, setIsEditingImpressum] = useState(false) const [impressumUrlInput, setImpressumUrlInput] = useState("") + // NEU: Industry Override + const [industries, setIndustries] = useState([]) + const [isEditingIndustry, setIsEditingIndustry] = useState(false) + const [industryInput, setIndustryInput] = useState("") + const fetchData = (silent = false) => { if (!companyId) return if (!silent) setLoading(true) - + axios.get(`${apiBase}/companies/${companyId}`) .then(res => { - const newData = res.data - setData(newData) - - // Auto-stop processing if status changes to ENRICHED or we see data - if (isProcessing) { - const hasWiki = newData.enrichment_data?.some((e:any) => e.source_type === 'wikipedia') - const hasAnalysis = newData.enrichment_data?.some((e:any) => e.source_type === 'ai_analysis') - - // If we were waiting for Discover (Wiki) or Analyze (AI) - if ((hasWiki && newData.status === 'DISCOVERED') || (hasAnalysis && newData.status === 'ENRICHED')) { - setIsProcessing(false) - } + const newData = res.data + console.log("FETCHED COMPANY DATA:", newData) // DEBUG: Log raw data from API + setData(newData) + + // Auto-stop processing if status changes to ENRICHED or we see data + if (isProcessing) { + const hasWiki = newData.enrichment_data?.some((e: any) => e.source_type === 'wikipedia') + const hasAnalysis = newData.enrichment_data?.some((e: any) => e.source_type === 'ai_analysis') + + // If we were waiting for Discover (Wiki) or Analyze (AI) + if ((hasWiki && newData.status === 'DISCOVERED') || (hasAnalysis && newData.status === 'ENRICHED')) { + setIsProcessing(false) } + } }) .catch(console.error) .finally(() => { if (!silent) setLoading(false) }) @@ -100,7 +114,13 @@ export function Inspector({ companyId, initialContactId, onClose, apiBase }: Ins setIsEditingWiki(false) setIsEditingWebsite(false) setIsEditingImpressum(false) + setIsEditingIndustry(false) setIsProcessing(false) // Reset on ID change + + // Load industries for dropdown + axios.get(`${apiBase}/industries`) + .then(res => setIndustries(res.data)) + .catch(console.error) }, [companyId]) const handleDiscover = async () => { @@ -116,671 +136,826 @@ export function Inspector({ companyId, initialContactId, onClose, apiBase }: Ins } const handleAnalyze = async () => { - if (!companyId) return - setIsProcessing(true) - try { - await axios.post(`${apiBase}/enrich/analyze`, { company_id: companyId }) - // Polling effect will handle the rest - } catch (e) { - console.error(e) - setIsProcessing(false) - } + if (!companyId) return + setIsProcessing(true) + try { + await axios.post(`${apiBase}/enrich/analyze`, { company_id: companyId }) + // Polling effect will handle the rest + } catch (e) { + console.error(e) + setIsProcessing(false) } - - const handleExport = () => { - if (!data) return; - - // Prepare full export object - const exportData = { - metadata: { - id: data.id, - exported_at: new Date().toISOString(), - source: "Company Explorer (Robotics Edition)" - }, - company: { - name: data.name, - website: data.website, - status: data.status, - industry_ai: data.industry_ai, - created_at: data.created_at - }, - enrichment: data.enrichment_data, - signals: data.signals - }; - - const blob = new Blob([JSON.stringify(exportData, null, 2)], { type: 'application/json' }); - const url = URL.createObjectURL(blob); - const a = document.createElement('a'); - a.href = url; - a.download = `company-export-${data.id}-${data.name.replace(/[^a-z0-9]/gi, '_').toLowerCase()}.json`; - document.body.appendChild(a); - a.click(); - document.body.removeChild(a); - URL.revokeObjectURL(url); + } + + const handleExport = () => { + if (!data) return; + + // Prepare full export object + const exportData = { + metadata: { + id: data.id, + exported_at: new Date().toISOString(), + source: "Company Explorer (Robotics Edition)" + }, + company: { + name: data.name, + website: data.website, + status: data.status, + industry_ai: data.industry_ai, + created_at: data.created_at + }, + enrichment: data.enrichment_data, + signals: data.signals }; - - const handleWikiOverride = async () => { - if (!companyId) return - setIsProcessing(true) - try { - await axios.post(`${apiBase}/companies/${companyId}/override/wiki?url=${encodeURIComponent(wikiUrlInput)}`) - setIsEditingWiki(false) - fetchData() - } catch (e) { - alert("Update failed") - console.error(e) - } finally { - setIsProcessing(false) - } + + const blob = new Blob([JSON.stringify(exportData, null, 2)], { type: 'application/json' }); + const url = URL.createObjectURL(blob); + const a = document.createElement('a'); + a.href = url; + a.download = `company-export-${data.id}-${data.name.replace(/[^a-z0-9]/gi, '_').toLowerCase()}.json`; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + URL.revokeObjectURL(url); + }; + + const handleWikiOverride = async () => { + if (!companyId) return + setIsProcessing(true) + try { + await axios.post(`${apiBase}/companies/${companyId}/override/wiki?url=${encodeURIComponent(wikiUrlInput)}`) + setIsEditingWiki(false) + fetchData() + } catch (e) { + alert("Update failed") + console.error(e) + } finally { + setIsProcessing(false) } - - const handleWebsiteOverride = async () => { - if (!companyId) return - setIsProcessing(true) - try { - await axios.post(`${apiBase}/companies/${companyId}/override/website?url=${encodeURIComponent(websiteInput)}`) - setIsEditingWebsite(false) - fetchData() - } catch (e) { - alert("Update failed") - console.error(e) - } finally { - setIsProcessing(false) - } + } + + const handleWebsiteOverride = async () => { + if (!companyId) return + setIsProcessing(true) + try { + await axios.post(`${apiBase}/companies/${companyId}/override/website?url=${encodeURIComponent(websiteInput)}`) + setIsEditingWebsite(false) + fetchData() + } catch (e) { + alert("Update failed") + console.error(e) + } finally { + setIsProcessing(false) } - - const handleImpressumOverride = async () => { - if (!companyId) return - setIsProcessing(true) - try { - await axios.post(`${apiBase}/companies/${companyId}/override/impressum?url=${encodeURIComponent(impressumUrlInput)}`) - setIsEditingImpressum(false) - fetchData() - } catch (e) { - alert("Impressum update failed") - console.error(e) - } finally { - setIsProcessing(false) - } + } + + const handleImpressumOverride = async () => { + if (!companyId) return + setIsProcessing(true) + try { + await axios.post(`${apiBase}/companies/${companyId}/override/impressum?url=${encodeURIComponent(impressumUrlInput)}`) + setIsEditingImpressum(false) + fetchData() + } catch (e) { + alert("Impressum update failed") + console.error(e) + } finally { + setIsProcessing(false) + } + } + + const handleIndustryOverride = async () => { + if (!companyId) return + setIsProcessing(true) + try { + await axios.put(`${apiBase}/companies/${companyId}/industry`, { industry_ai: industryInput }) + setIsEditingIndustry(false) + fetchData() + } catch (e) { + alert("Industry update failed") + console.error(e) + } finally { + setIsProcessing(false) + } + } + + const handleReevaluateWikipedia = async () => { + if (!companyId) return + setIsProcessing(true) + try { + await axios.post(`${apiBase}/companies/${companyId}/reevaluate-wikipedia`) + // Polling effect will handle the rest + } catch (e) { + console.error(e) + setIsProcessing(false) // Stop on direct error + } + } + + const handleDelete = async () => { + console.log("[Inspector] Delete requested for ID:", companyId) + if (!companyId) return; + + if (!window.confirm(`Are you sure you want to delete "${data?.name}"? This action cannot be undone.`)) { + console.log("[Inspector] Delete cancelled by user") + return } - const handleLockToggle = async (sourceType: string, currentLockStatus: boolean) => { - if (!companyId) return - try { - await axios.post(`${apiBase}/enrichment/${companyId}/${sourceType}/lock?locked=${!currentLockStatus}`) - fetchData(true) // Silent refresh - } catch (e) { - console.error("Lock toggle failed", e) - } + try { + console.log("[Inspector] Sending DELETE request...") + await axios.delete(`${apiBase}/companies/${companyId}`) + console.log("[Inspector] Delete successful") + onClose() // Close the inspector on success + window.location.reload() // Force reload to show updated list + } catch (e: any) { + console.error("[Inspector] Delete failed:", e) + alert("Failed to delete company: " + (e.response?.data?.detail || e.message)) } + } - const handleAddContact = async (contact: Contact) => { - if (!companyId) return - try { - await axios.post(`${apiBase}/contacts`, { ...contact, company_id: companyId }) - fetchData(true) - } catch (e) { - alert("Failed to add contact") - console.error(e) - } + const handleLockToggle = async (sourceType: string, currentLockStatus: boolean) => { + if (!companyId) return + try { + await axios.post(`${apiBase}/enrichment/${companyId}/${sourceType}/lock?locked=${!currentLockStatus}`) + fetchData(true) // Silent refresh + } catch (e) { + console.error("Lock toggle failed", e) } - - const handleEditContact = async (contact: Contact) => { - if (!contact.id) return - try { - await axios.put(`${apiBase}/contacts/${contact.id}`, contact) - fetchData(true) - } catch (e) { - alert("Failed to update contact") - console.error(e) - } + } + + const handleAddContact = async (contact: Contact) => { + if (!companyId) return + try { + await axios.post(`${apiBase}/contacts`, { ...contact, company_id: companyId }) + fetchData(true) + } catch (e) { + alert("Failed to add contact") + console.error(e) } - - if (!companyId) return null - - const wikiEntry = data?.enrichment_data?.find(e => e.source_type === 'wikipedia') - const wiki = wikiEntry?.content - const isLocked = wikiEntry?.is_locked - const wikiDate = wikiEntry?.created_at - - const aiAnalysisEntry = data?.enrichment_data?.find(e => e.source_type === 'ai_analysis') - const aiAnalysis = aiAnalysisEntry?.content - const aiDate = aiAnalysisEntry?.created_at - - const scrapeEntry = data?.enrichment_data?.find(e => e.source_type === 'website_scrape') - const scrapeData = scrapeEntry?.content - const impressum = scrapeData?.impressum - const scrapeDate = scrapeEntry?.created_at - - return ( -
- {loading ? ( -
Loading details...
- ) : !data ? ( -
Failed to load data.
- ) : ( -
- {/* Header */} -
-
-

{data.name}

-
- + + + +
+
+ +
+ {!isEditingWebsite ? ( + + {new URL(data.website.startsWith('http') ? data.website : `https://${data.website}`).hostname.replace('www.', '')} + + ) : ( + No website + )} +
- -
- {!isEditingWebsite ? ( -
- {data.website && data.website !== "k.A." ? ( - - {new URL(data.website).hostname.replace('www.', '')} - - ) : ( - No website - )} - + +
+ )} +
+ + {/* Tab Navigation */} +
+ + +
+
+ +
+ + {activeTab === 'overview' && ( + <> + {/* Action Bar (Only for Overview) */} +
+ + +
+ + {/* Impressum / Legal Data */} +
+
+
+
+ +
+ Official Legal Data +
+
+ {scrapeDate && ( +
+ {new Date(scrapeDate).toLocaleDateString()} +
+ )} + + {/* Lock Button for Impressum */} + {scrapeEntry && ( + + )} + + {!isEditingImpressum ? ( + + ) : (
+ +
- ) : ( -
- setWebsiteInput(e.target.value)} - placeholder="https://..." - className="bg-white dark:bg-slate-800 border border-slate-300 dark:border-slate-700 rounded px-2 py-0.5 text-xs text-slate-900 dark:text-white focus:ring-1 focus:ring-blue-500 outline-none w-48" - autoFocus - /> - - -
- )} - - {data.industry_ai && ( - - {data.industry_ai} - - )} - - {data.status} - -
- - {/* Tab Navigation */} -
- - -
-
- -
- - {activeTab === 'overview' && ( - <> - {/* Action Bar (Only for Overview) */} -
- - -
- - {/* Impressum / Legal Data */} -
-
-
-
- -
- Official Legal Data -
-
- {scrapeDate && ( -
- {new Date(scrapeDate).toLocaleDateString()} -
- )} - - {/* Lock Button for Impressum */} - {scrapeEntry && ( - - )} - - {!isEditingImpressum ? ( - - ) : (
- - -
- )} -
-
- - {isEditingImpressum && ( -
- setImpressumUrlInput(e.target.value)} - placeholder="https://.../impressum" - className="w-full bg-white dark:bg-slate-900 border border-slate-300 dark:border-slate-700 rounded px-2 py-1 text-xs text-slate-900 dark:text-white focus:ring-1 focus:ring-blue-500 outline-none" - autoFocus - /> -
- )} - - {impressum ? ( - <> -
- {impressum.legal_name || "Unknown Legal Name"} -
- -
- -
-
{impressum.street}
-
{impressum.zip} {impressum.city}
-
-
- - {(impressum.email || impressum.phone) && ( -
- {impressum.email && {impressum.email}} - {impressum.phone && {impressum.phone}} - {impressum.vat_id && VAT: {impressum.vat_id}} -
- )} - - ) : !isEditingImpressum && ( -
- No legal data found. Click pencil to provide direct Impressum link. -
- )} -
- - - - {/* AI Analysis Dossier */} - {aiAnalysis && ( -
-
-

- AI Strategic Dossier -

- {aiDate && ( -
- {new Date(aiDate).toLocaleDateString()} -
- )} -
-
-
-
Business Model
-

{aiAnalysis.business_model || "No summary available."}

-
- {aiAnalysis.infrastructure_evidence && ( -
-
Infrastructure Evidence
-

"{aiAnalysis.infrastructure_evidence}"

-
- )} -
-
- )} - - {/* Wikipedia Section */} -
-
-

- Company Profile (Wikipedia) -

- - - -
- - {wikiDate && ( - -
- - {new Date(wikiDate).toLocaleDateString()} - -
- - )} - - - - {/* Lock Button for Wiki */} - - {wikiEntry && ( - - - - )} - - - - {!isEditingWiki ? ( - - - - ) : (
- - -
- )} -
-
- - {isEditingWiki && ( -
- setWikiUrlInput(e.target.value)} - placeholder="Paste Wikipedia URL here..." - className="w-full bg-white dark:bg-slate-800 border border-slate-300 dark:border-slate-700 rounded px-2 py-1 text-sm text-slate-900 dark:text-white focus:ring-1 focus:ring-blue-500 outline-none" - /> -

Paste a valid URL. Saving will trigger a re-scan.

-
)} - - {wiki && wiki.url !== 'k.A.' && !isEditingWiki ? ( -
- {/* ... existing wiki content ... */} -
-
- -
- - {isLocked && ( -
- Manual Override -
- )} - -

- "{wiki.first_paragraph}" -

- -
-
-
- -
-
-
Employees
-
{wiki.mitarbeiter || 'k.A.'}
-
-
- -
-
- -
-
-
Revenue
-
{wiki.umsatz && wiki.umsatz !== 'k.A.' ? `${wiki.umsatz} Mio. €` : 'k.A.'}
-
-
- -
-
- -
-
-
Headquarters
-
{wiki.sitz_stadt}{wiki.sitz_land ? `, ${wiki.sitz_land}` : ''}
-
-
- -
-
- -
-
-
Wiki Industry
-
{wiki.branche || 'k.A.'}
-
-
-
- - {wiki.categories && wiki.categories !== 'k.A.' && ( -
-
- Categories -
-
- {wiki.categories.split(',').map((cat: string) => ( - - {cat.trim()} - - ))} -
-
- )} - - +
+
+ + {isEditingImpressum && ( +
+ setImpressumUrlInput(e.target.value)} + placeholder="https://.../impressum" + className="w-full bg-white dark:bg-slate-900 border border-slate-300 dark:border-slate-700 rounded px-2 py-1 text-xs text-slate-900 dark:text-white focus:ring-1 focus:ring-blue-500 outline-none" + autoFocus + /> +
+ )} + + {impressum ? ( + <> +
+ {impressum.legal_name || "Unknown Legal Name"} +
+ +
+ +
+
{impressum.street}
+
{impressum.zip} {impressum.city}
- ) : !isEditingWiki ? ( -
- -

No Wikipedia profile found yet.

-
- ) : null} + + {(impressum.email || impressum.phone) && ( +
+ {impressum.email && {impressum.email}} + {impressum.phone && {impressum.phone}} + {impressum.vat_id && VAT: {impressum.vat_id}} +
+ )} + + ) : !isEditingImpressum && ( +
+ No legal data found. Click pencil to provide direct Impressum link.
- - {/* Robotics Scorecard */} + )} +
+ + + + {/* Core Classification */} +
+
-

- Robotics Potential +
Industry Focus
+ {!isEditingIndustry ? ( +
+
+ +
+
+
{data.industry_ai || "Not Classified"}
+ +
+
+ ) : ( +
+ +
+ + +
+
+ )} +

+
+
Analysis Status
+
+
+ +
+
+ {data.status} +
+
+
+
+
+ + {/* AI Analysis Dossier */} + {aiAnalysis && ( +
+
+

+ AI Strategic Dossier

- -
- {['cleaning', 'transport', 'security', 'service'].map(type => { - const sig = data.signals.find(s => s.signal_type.includes(type)) - const score = sig ? sig.confidence : 0 - - return ( -
-
- {type} - 70 ? "text-green-600 dark:text-green-400" : score > 30 ? "text-yellow-600 dark:text-yellow-400" : "text-slate-500")}> - {score}% - -
-
-
70 ? "bg-green-500" : score > 30 ? "bg-yellow-500" : "bg-slate-500")} - style={{ width: `${score}%` }} - /> -
- {sig?.proof_text && ( -

- "{sig.proof_text}" -

- )} -
- ) - })} -
+ {aiDate && ( +
+ {new Date(aiDate).toLocaleDateString()} +
+ )}
- - {/* Meta Info */} -
-
- Added: {new Date(data.created_at).toLocaleDateString()} -
-
- ID: CE-{data.id.toString().padStart(4, '0')} +
+
+
Business Model
+

{aiAnalysis.business_model || "No summary available."}

+ {aiAnalysis.infrastructure_evidence && ( +
+
Infrastructure Evidence
+

"{aiAnalysis.infrastructure_evidence}"

+
+ )}
- +
)} - - {activeTab === 'contacts' && ( - - )}
-
- )} + + {/* Wikipedia Section */} +
+
+

+ Company Profile (Wikipedia) +

+ + + +
+ + {wikiDate && ( + +
+ + {new Date(wikiDate).toLocaleDateString()} + +
+ + )} + + + + {/* Lock Button for Wiki */} + + {wikiEntry && ( + + + + )} + + {/* Re-evaluate Button */} + + + + + {!isEditingWiki ? ( + + + + ) : (
+ + +
+ )} +
+
+ + {isEditingWiki && ( +
+ setWikiUrlInput(e.target.value)} + placeholder="Paste Wikipedia URL here..." + className="w-full bg-white dark:bg-slate-800 border border-slate-300 dark:border-slate-700 rounded px-2 py-1 text-sm text-slate-900 dark:text-white focus:ring-1 focus:ring-blue-500 outline-none" + /> +

Paste a valid URL. Saving will trigger a re-scan.

+
+ )} + + {wiki && wiki.url !== 'k.A.' && !isEditingWiki ? ( +
+ {/* ... existing wiki content ... */} +
+
+ +
+ + {isLocked && ( +
+ Manual Override +
+ )} + +

+ "{wiki.first_paragraph}" +

+ +
+
+
+ +
+
+
Employees
+
{wiki.mitarbeiter || 'k.A.'}
+
+
+ +
+
+ +
+
+
Revenue
+
{wiki.umsatz && wiki.umsatz !== 'k.A.' ? `${wiki.umsatz} Mio. €` : 'k.A.'}
+
+
+ +
+
+ +
+
+
Headquarters
+
{wiki.sitz_stadt}{wiki.sitz_land ? `, ${wiki.sitz_land}` : ''}
+
+
+ +
+
+ +
+
+
Wiki Industry
+
{wiki.branche || 'k.A.'}
+
+
+
+ + {wiki.categories && wiki.categories !== 'k.A.' && ( +
+
+ Categories +
+
+ {wiki.categories.split(',').map((cat: string) => ( + + {cat.trim()} + + ))} +
+
+ )} + + +
+
+ ) : !isEditingWiki ? ( +
+ +

No Wikipedia profile found yet.

+
+ ) : null} +
+ + {/* Quantitative Potential Analysis (v0.7.0) */} +
+

+ Quantitative Potential +

+ + {data.calculated_metric_value != null || data.standardized_metric_value != null ? ( +
+ {/* Calculated Metric */} + {data.calculated_metric_value != null && ( +
+
+ +
+
+
{data.calculated_metric_name || 'Calculated Metric'}
+
+ {data.calculated_metric_value.toLocaleString('de-DE')} + {data.calculated_metric_unit} +
+
+
+ )} + + {/* Standardized Metric */} + {data.standardized_metric_value != null && ( +
+
+ +
+
+
Standardized Potential ({data.standardized_metric_unit})
+
+ {data.standardized_metric_value.toLocaleString('de-DE')} + {data.standardized_metric_unit} +
+

Comparable value for potential analysis.

+
+
+ )} + + {/* Source */} + {data.metric_source && ( +
+ + Source: + {data.metric_source_url ? ( + + {data.metric_source} + + ) : ( + {data.metric_source} + )} +
+ )} + +
+ ) : ( +
+ +

No quantitative data calculated yet.

+

Run "Analyze Potential" to extract metrics.

+
+ )} +
+ + {/* Meta Info */} +
+
+ Added: {new Date(data.created_at).toLocaleDateString()} +
+
+ ID: CE-{data.id.toString().padStart(4, '0')} +
+
+ + )} + + {activeTab === 'contacts' && ( + + )}
- ) - } + )} +
+ ) +}