feat: robust metric extraction with confidence score and proof snippets

- fixed Year-Prefix Bug in MetricParser - added metric_confidence and metric_proof_text to database - added Entity-Check and Annual-Priority to LLM prompt - improved UI: added confidence traffic light and mouse-over proof tooltip - restored missing API endpoints (create, bulk, wiki-override)
2026-01-23 21:16:07 +00:00
parent c5652fc9b5
commit e43e129771
7006 changed files with 1367435 additions and 201 deletions
--- a/company-explorer/backend/verify_potential.py
+++ b/company-explorer/backend/verify_potential.py
@@ -0,0 +1,42 @@
+import sys
+import os
+from pprint import pprint
+
+# Add the current directory to sys.path to import modules
+sys.path.append(os.path.abspath(os.path.dirname(__file__)))
+
+from lib.metric_parser import MetricParser
+
+def test_parser():
+    test_cases = [
+        # (input_text, is_revenue, expected_value, description)
+        ("1.005 Mitarbeiter", False, 1005.0, "German thousands dot for employees"),
+        ("80 (2020)", False, 80.0, "Year in parentheses removed"),
+        ("375.6 Mio", True, 375.6, "Revenue in Millions (dot as decimal)"),
+        ("1,5 Mrd", True, 1500.0, "Revenue in Billions (comma as decimal)"),
+        ("ca. 4.000 m²", False, 4000.0, "Area with ca. and thousands separator"),
+        ("47.9 Mio. Passagiere", False, 47900000.0, "Absolute Millions for non-revenue"),
+        ("rd. 1,0 Mio. €", True, 1.0, "Revenue with rd. and comma"),
+        ("1.000 (Stand 2021)", False, 1000.0, "Thousands separator with Stand 2021 in parens"),
+        ("120.000", False, 120000.0, "Large number with dot separator"),
+        ("375,6 Millionen Euro", True, 375.6, "Revenue with comma and full word"),
+    ]
+
+    print(f"{'Input':<30} | {'Rev?':<5} | {'Expected':<10} | {'Actual':<10} | {'Status'}")
+    print("-" * 80)
+    
+    all_passed = True
+    for text, is_rev, expected, desc in test_cases:
+        actual = MetricParser.extract_numeric_value(text, is_revenue=is_rev)
+        status = "✅ PASS" if actual == expected else "❌ FAIL"
+        if actual != expected:
+            all_passed = False
+        print(f"{text:<30} | {str(is_rev):<5} | {expected:<10} | {actual if actual is not None else 'None':<10} | {status} ({desc})")
+    
+    if all_passed:
+        print("\nAll parser test cases passed!")
+    else:
+        print("\nSome parser test cases FAILED.")
+
+if __name__ == "__main__":
+    test_parser()