feat: robust metric extraction with confidence score and proof snippets
- fixed Year-Prefix Bug in MetricParser - added metric_confidence and metric_proof_text to database - added Entity-Check and Annual-Priority to LLM prompt - improved UI: added confidence traffic light and mouse-over proof tooltip - restored missing API endpoints (create, bulk, wiki-override)
This commit is contained in:
@@ -126,55 +126,16 @@ def extract_numeric_value(raw_value: str, is_umsatz: bool = False) -> str:
|
||||
Returns string representation of the number or 'k.A.'.
|
||||
Handles German number formatting (1.000 = 1000, 1,5 = 1.5).
|
||||
"""
|
||||
if not raw_value:
|
||||
from .metric_parser import MetricParser
|
||||
|
||||
val = MetricParser.extract_numeric_value(raw_value, is_revenue=is_umsatz)
|
||||
if val is None:
|
||||
return "k.A."
|
||||
|
||||
raw_value = str(raw_value).strip().lower()
|
||||
if raw_value in ["k.a.", "nan", "none"]:
|
||||
return "k.A."
|
||||
|
||||
multiplier = 1.0
|
||||
if 'mrd' in raw_value or 'billion' in raw_value or 'bn' in raw_value:
|
||||
multiplier = 1000.0
|
||||
if not is_umsatz: multiplier = 1000000000.0
|
||||
elif 'mio' in raw_value or 'million' in raw_value or 'mn' in raw_value:
|
||||
multiplier = 1.0
|
||||
if not is_umsatz: multiplier = 1000000.0
|
||||
elif 'tsd' in raw_value or 'thousand' in raw_value:
|
||||
multiplier = 0.001
|
||||
if not is_umsatz: multiplier = 1000.0
|
||||
|
||||
matches = re.findall(r'(\d+[\.,]?\d*[\.,]?\d*)', raw_value)
|
||||
if not matches:
|
||||
return "k.A."
|
||||
|
||||
try:
|
||||
num_str = matches[0]
|
||||
|
||||
if '.' in num_str and ',' in num_str:
|
||||
if num_str.rfind(',') > num_str.rfind('.'):
|
||||
num_str = num_str.replace('.', '').replace(',', '.')
|
||||
else:
|
||||
num_str = num_str.replace(',', '')
|
||||
elif '.' in num_str:
|
||||
parts = num_str.split('.')
|
||||
if len(parts) > 1 and len(parts[-1]) == 3 and not is_umsatz:
|
||||
num_str = num_str.replace('.', '')
|
||||
elif is_umsatz and len(parts) > 1 and len(parts[-1]) == 3:
|
||||
if num_str.count('.') > 1:
|
||||
num_str = num_str.replace('.', '')
|
||||
elif ',' in num_str:
|
||||
num_str = num_str.replace(',', '.')
|
||||
|
||||
val = float(num_str) * multiplier
|
||||
|
||||
if is_umsatz:
|
||||
return f"{val:.2f}".rstrip('0').rstrip('.')
|
||||
else:
|
||||
return str(int(val))
|
||||
|
||||
except ValueError:
|
||||
return "k.A."
|
||||
if is_umsatz:
|
||||
return f"{val:.2f}".rstrip('0').rstrip('.')
|
||||
else:
|
||||
return str(int(val))
|
||||
|
||||
def fuzzy_similarity(str1: str, str2: str) -> float:
|
||||
"""Returns fuzzy similarity between two strings (0.0 to 1.0)."""
|
||||
|
||||
Reference in New Issue
Block a user