fix: metric parser now aggressively cleans expected_value to handle units (e.g. '8.000 m²')
This commit is contained in:
@@ -30,7 +30,22 @@ class MetricParser:
|
||||
# Try to parse the LLM's raw value directly first (it's often cleaner: "200000")
|
||||
try:
|
||||
# Remove simple noise from expected value
|
||||
clean_expected = str(expected_value).replace("'", "").replace(" ", "").replace("Mio", "").replace("Millionen", "")
|
||||
# Aggressively strip units and text to isolate the number
|
||||
clean_expected = str(expected_value).lower()
|
||||
# Remove common units
|
||||
for unit in ['m²', 'qm', 'sqm', 'mitarbeiter', 'employees', 'eur', 'usd', 'chf', '€', '$', '£', '¥']:
|
||||
clean_expected = clean_expected.replace(unit, "")
|
||||
|
||||
# Remove multipliers text (we handle multipliers via is_revenue later, but for expected value matching we want the raw number)
|
||||
# Actually, expected_value "2.5 Mio" implies we want to match 2.5 in the text, OR 2500000?
|
||||
# Usually the LLM extract matches the text representation.
|
||||
clean_expected = clean_expected.replace("mio", "").replace("millionen", "").replace("mrd", "").replace("milliarden", "")
|
||||
clean_expected = clean_expected.replace("tsd", "").replace("tausend", "")
|
||||
|
||||
# Final cleanup of non-numeric chars (allow . , ' -)
|
||||
# But preserve structure for robust parser
|
||||
clean_expected = clean_expected.replace(" ", "").replace("'", "")
|
||||
|
||||
# If it looks like a clean number already, try parsing it
|
||||
# But use the robust parser to handle German decimals if present in expected
|
||||
val = MetricParser._parse_robust_number(clean_expected, is_revenue)
|
||||
@@ -123,7 +138,15 @@ class MetricParser:
|
||||
target_val = None
|
||||
if expected_value:
|
||||
try:
|
||||
target_val = MetricParser._parse_robust_number(str(expected_value).replace("'", ""), is_revenue)
|
||||
# Re-apply aggressive cleaning to ensure we have a valid float for comparison
|
||||
clean_expected = str(expected_value).lower()
|
||||
for unit in ['m²', 'qm', 'sqm', 'mitarbeiter', 'employees', 'eur', 'usd', 'chf', '€', '$', '£', '¥']:
|
||||
clean_expected = clean_expected.replace(unit, "")
|
||||
clean_expected = clean_expected.replace("mio", "").replace("millionen", "").replace("mrd", "").replace("milliarden", "")
|
||||
clean_expected = clean_expected.replace("tsd", "").replace("tausend", "")
|
||||
clean_expected = clean_expected.replace(" ", "").replace("'", "")
|
||||
|
||||
target_val = MetricParser._parse_robust_number(clean_expected, is_revenue)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
Reference in New Issue
Block a user