feat: robust metric extraction with confidence score and proof snippets

- fixed Year-Prefix Bug in MetricParser
- added metric_confidence and metric_proof_text to database
- added Entity-Check and Annual-Priority to LLM prompt
- improved UI: added confidence traffic light and mouse-over proof tooltip
- restored missing API endpoints (create, bulk, wiki-override)
This commit is contained in:
2026-01-23 21:16:07 +00:00
parent c5652fc9b5
commit e43e129771
7006 changed files with 1367435 additions and 201 deletions

View File

@@ -0,0 +1,42 @@
import sys
import os
from pprint import pprint
# Add the current directory to sys.path to import modules
sys.path.append(os.path.abspath(os.path.dirname(__file__)))
from lib.metric_parser import MetricParser
def test_parser():
test_cases = [
# (input_text, is_revenue, expected_value, description)
("1.005 Mitarbeiter", False, 1005.0, "German thousands dot for employees"),
("80 (2020)", False, 80.0, "Year in parentheses removed"),
("375.6 Mio", True, 375.6, "Revenue in Millions (dot as decimal)"),
("1,5 Mrd", True, 1500.0, "Revenue in Billions (comma as decimal)"),
("ca. 4.000 m²", False, 4000.0, "Area with ca. and thousands separator"),
("47.9 Mio. Passagiere", False, 47900000.0, "Absolute Millions for non-revenue"),
("rd. 1,0 Mio. €", True, 1.0, "Revenue with rd. and comma"),
("1.000 (Stand 2021)", False, 1000.0, "Thousands separator with Stand 2021 in parens"),
("120.000", False, 120000.0, "Large number with dot separator"),
("375,6 Millionen Euro", True, 375.6, "Revenue with comma and full word"),
]
print(f"{'Input':<30} | {'Rev?':<5} | {'Expected':<10} | {'Actual':<10} | {'Status'}")
print("-" * 80)
all_passed = True
for text, is_rev, expected, desc in test_cases:
actual = MetricParser.extract_numeric_value(text, is_revenue=is_rev)
status = "✅ PASS" if actual == expected else "❌ FAIL"
if actual != expected:
all_passed = False
print(f"{text:<30} | {str(is_rev):<5} | {expected:<10} | {actual if actual is not None else 'None':<10} | {status} ({desc})")
if all_passed:
print("\nAll parser test cases passed!")
else:
print("\nSome parser test cases FAILED.")
if __name__ == "__main__":
test_parser()