import sys import os import unittest # Ensure the app's root is in the path to allow imports sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from lib.metric_parser import MetricParser class TestMetricParser(unittest.TestCase): def test_wolfra_concatenated_year_bug(self): """ Catches the "802020" bug where a number and a year were concatenated. The parser should now recognize and strip the trailing year. """ text = "802020" result = MetricParser.extract_numeric_value(text, is_revenue=False) self.assertEqual(result, 80.0) text_with_space = "Mitarbeiter: 80 2020" result_space = MetricParser.extract_numeric_value(text_with_space, is_revenue=False) self.assertEqual(result_space, 80.0) def test_erding_year_prefix_bug(self): """ Handles cases where a year appears before the actual metric. The "Smart Year Skip" logic should ignore "2022" and find "200.000". """ text = "2022 lagen die Besucherzahlen bei knapp 200.000." result = MetricParser.extract_numeric_value(text, is_revenue=False, expected_value="200000") self.assertEqual(result, 200000.0) # Test without expected value, relying on fallback # Note: Current fallback takes the *first* non-year, which would be 2022 if not for the smart skip. # This test ensures the smart skip works even without LLM guidance. result_no_expected = MetricParser.extract_numeric_value(text, is_revenue=False) self.assertEqual(result_no_expected, 200000.0) def test_greilmeier_multiple_numbers_bug(self): """ Ensures the parser picks the correct number when multiple are present, guided by the `expected_value` provided by the LLM. It should ignore "2" and correctly parse "8.000". """ text = "An 2 Standorten - in Schwindegg und in Erding – bieten wir unseren Kunden 8.000 m² Lagerkapazität." # Simulate LLM providing a clean number string result_clean_expected = MetricParser.extract_numeric_value(text, is_revenue=False, expected_value="8000") self.assertEqual(result_clean_expected, 8000.0) # Simulate LLM providing a string with units result_unit_expected = MetricParser.extract_numeric_value(text, is_revenue=False, expected_value="8.000 m²") self.assertEqual(result_unit_expected, 8000.0) def test_german_decimal_comma(self): """Tests standard German decimal format.""" text = "Umsatz: 14,5 Mio. Euro" result = MetricParser.extract_numeric_value(text, is_revenue=True) self.assertEqual(result, 14.5) def test_german_thousands_dot(self): """Tests standard German thousands separator.""" text = "1.005 Mitarbeiter" result = MetricParser.extract_numeric_value(text, is_revenue=False) self.assertEqual(result, 1005.0) if __name__ == '__main__': unittest.main()