import sys import os import unittest from unittest.mock import MagicMock, patch # Ensure the app's root is in the path to allow imports sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))) from backend.services.classification import ClassificationService class TestClassificationService(unittest.TestCase): def setUp(self): """Set up a new ClassificationService instance for each test.""" self.service = ClassificationService() def test_plausibility_check_hospital_beds(self): """ Tests the _is_metric_plausible method with rules for hospital beds. """ # Plausible value self.assertTrue(self.service._is_metric_plausible("# Planbetten (Krankenhaus)", 150)) # Implausible value (too low) self.assertFalse(self.service._is_metric_plausible("# Planbetten (Krankenhaus)", 11)) # Edge case: exactly the minimum self.assertTrue(self.service._is_metric_plausible("# Planbetten (Krankenhaus)", 20)) # No value self.assertTrue(self.service._is_metric_plausible("# Planbetten (Krankenhaus)", None)) def test_plausibility_check_no_rule(self): """ Tests that metrics without a specific rule are always considered plausible. """ self.assertTrue(self.service._is_metric_plausible("Some New Metric", 5)) self.assertTrue(self.service._is_metric_plausible("Another Metric", 100000)) @patch('backend.services.classification.run_serp_search') @patch('backend.services.classification.scrape_website_content') @patch('backend.services.classification.ClassificationService._get_wikipedia_content') def test_source_prioritization_erding_case(self, mock_get_wiki, mock_scrape_web, mock_serp): """ Tests that a high-quality Wikipedia result is chosen over a low-quality website result. """ # --- Mocks Setup --- # Mock website to return a bad, implausible value mock_scrape_web.return_value = "Auf unseren 11 Stationen..." # Mock Wikipedia to return a good, plausible value mock_get_wiki.return_value = "Das Klinikum hat 352 Betten." # Mock SerpAPI to return nothing mock_serp.return_value = None # Mock the LLM to return different values based on the source content def llm_side_effect(content, search_term, industry_name): if "11 Stationen" in content: return {"raw_text_segment": "11 Stationen", "raw_value": 11, "raw_unit": "Stationen", "confidence_score": 0.6, "calculated_metric_value": 11} if "352 Betten" in content: return {"raw_text_segment": "352 Betten", "raw_value": 352, "raw_unit": "Betten", "confidence_score": 0.95, "calculated_metric_value": 352} return None # We need to patch the LLM call within the service instance for the test self.service._run_llm_metric_extraction_prompt = MagicMock(side_effect=llm_side_effect) # --- Test Execution --- mock_company = MagicMock() mock_company.website = "http://example.com" mock_company.id = 1 # We need a mock DB session mock_db = MagicMock() results = self.service._extract_and_calculate_metric_cascade( db=mock_db, company=mock_company, industry_name="Krankenhaus", search_term="# Planbetten (Krankenhaus)", standardization_logic=None, standardized_unit="Betten" ) # --- Assertions --- self.assertIsNotNone(results) self.assertEqual(results['calculated_metric_value'], 352) self.assertEqual(results['metric_source'], 'wikipedia') @patch('backend.services.classification.run_serp_search') @patch('backend.services.classification.scrape_website_content') def test_targeted_extraction_spetec_case(self, mock_scrape_web, mock_serp): """ Tests that the correct value is extracted when a text snippet contains multiple numbers. """ # --- Mocks Setup --- # Mock website content with ambiguous numbers mock_scrape_web.return_value = "Wir haben 65 Mitarbeiter auf einer Fläche von 8.000 m²." mock_serp.return_value = None # Mock the LLM to return the full snippet, letting the parser do the work # The improved prompt should guide the LLM to provide the correct 'raw_value' as a hint llm_result = { "raw_text_segment": "65 Mitarbeiter auf einer Fläche von 8.000 m²", "raw_value": "8000", # The crucial hint from the improved prompt "raw_unit": "m²", "confidence_score": 0.9, "calculated_metric_value": 8000.0 } self.service._run_llm_metric_extraction_prompt = MagicMock(return_value=llm_result) # --- Test Execution --- mock_company = MagicMock() mock_company.website = "http://spetec.com" mock_company.id = 2 mock_db = MagicMock() # Set up a mock for _get_wikipedia_content to return None, so we only test the website part self.service._get_wikipedia_content = MagicMock(return_value=None) results = self.service._extract_and_calculate_metric_cascade( db=mock_db, company=mock_company, industry_name="Laborausstattung", search_term="Fabrikhalle (m²)", standardization_logic=None, standardized_unit="m²" ) # --- Assertions --- self.assertIsNotNone(results) self.assertEqual(results['calculated_metric_value'], 8000.0) self.assertEqual(results['metric_source'], 'website') if __name__ == '__main__': unittest.main()