Files
Brancheneinstufung2/company-explorer/backend/tests/test_classification_service.py
Floke 9019a801ed fix(transcription): [2f388f42] finalize and fix AI insights feature
This commit resolves all outstanding issues with the AI Insights feature.

- Corrects the transcript formatting logic in  to properly handle the database JSON structure, ensuring the AI receives the correct context.
- Fixes the Gemini API client by using the correct model name ('gemini-2.0-flash') and the proper client initialization.
- Updates  to securely pass the API key as an environment variable to the container.
- Cleans up the codebase by removing temporary debugging endpoints.
- Adds  script for programmatic updates.
- Updates documentation with troubleshooting insights from the implementation process.
2026-01-26 08:53:13 +00:00

138 lines
5.7 KiB
Python

import sys
import os
import unittest
from unittest.mock import MagicMock, patch
# Ensure the app's root is in the path to allow imports
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
from backend.services.classification import ClassificationService
class TestClassificationService(unittest.TestCase):
def setUp(self):
"""Set up a new ClassificationService instance for each test."""
self.service = ClassificationService()
def test_plausibility_check_hospital_beds(self):
"""
Tests the _is_metric_plausible method with rules for hospital beds.
"""
# Plausible value
self.assertTrue(self.service._is_metric_plausible("# Planbetten (Krankenhaus)", 150))
# Implausible value (too low)
self.assertFalse(self.service._is_metric_plausible("# Planbetten (Krankenhaus)", 11))
# Edge case: exactly the minimum
self.assertTrue(self.service._is_metric_plausible("# Planbetten (Krankenhaus)", 20))
# No value
self.assertTrue(self.service._is_metric_plausible("# Planbetten (Krankenhaus)", None))
def test_plausibility_check_no_rule(self):
"""
Tests that metrics without a specific rule are always considered plausible.
"""
self.assertTrue(self.service._is_metric_plausible("Some New Metric", 5))
self.assertTrue(self.service._is_metric_plausible("Another Metric", 100000))
@patch('backend.services.classification.run_serp_search')
@patch('backend.services.classification.scrape_website_content')
@patch('backend.services.classification.ClassificationService._get_wikipedia_content')
def test_source_prioritization_erding_case(self, mock_get_wiki, mock_scrape_web, mock_serp):
"""
Tests that a high-quality Wikipedia result is chosen over a low-quality website result.
"""
# --- Mocks Setup ---
# Mock website to return a bad, implausible value
mock_scrape_web.return_value = "Auf unseren 11 Stationen..."
# Mock Wikipedia to return a good, plausible value
mock_get_wiki.return_value = "Das Klinikum hat 352 Betten."
# Mock SerpAPI to return nothing
mock_serp.return_value = None
# Mock the LLM to return different values based on the source content
def llm_side_effect(content, search_term, industry_name):
if "11 Stationen" in content:
return {"raw_text_segment": "11 Stationen", "raw_value": 11, "raw_unit": "Stationen", "confidence_score": 0.6, "calculated_metric_value": 11}
if "352 Betten" in content:
return {"raw_text_segment": "352 Betten", "raw_value": 352, "raw_unit": "Betten", "confidence_score": 0.95, "calculated_metric_value": 352}
return None
# We need to patch the LLM call within the service instance for the test
self.service._run_llm_metric_extraction_prompt = MagicMock(side_effect=llm_side_effect)
# --- Test Execution ---
mock_company = MagicMock()
mock_company.website = "http://example.com"
mock_company.id = 1
# We need a mock DB session
mock_db = MagicMock()
results = self.service._extract_and_calculate_metric_cascade(
db=mock_db,
company=mock_company,
industry_name="Krankenhaus",
search_term="# Planbetten (Krankenhaus)",
standardization_logic=None,
standardized_unit="Betten"
)
# --- Assertions ---
self.assertIsNotNone(results)
self.assertEqual(results['calculated_metric_value'], 352)
self.assertEqual(results['metric_source'], 'wikipedia')
@patch('backend.services.classification.run_serp_search')
@patch('backend.services.classification.scrape_website_content')
def test_targeted_extraction_spetec_case(self, mock_scrape_web, mock_serp):
"""
Tests that the correct value is extracted when a text snippet contains multiple numbers.
"""
# --- Mocks Setup ---
# Mock website content with ambiguous numbers
mock_scrape_web.return_value = "Wir haben 65 Mitarbeiter auf einer Fläche von 8.000 m²."
mock_serp.return_value = None
# Mock the LLM to return the full snippet, letting the parser do the work
# The improved prompt should guide the LLM to provide the correct 'raw_value' as a hint
llm_result = {
"raw_text_segment": "65 Mitarbeiter auf einer Fläche von 8.000 m²",
"raw_value": "8000", # The crucial hint from the improved prompt
"raw_unit": "",
"confidence_score": 0.9,
"calculated_metric_value": 8000.0
}
self.service._run_llm_metric_extraction_prompt = MagicMock(return_value=llm_result)
# --- Test Execution ---
mock_company = MagicMock()
mock_company.website = "http://spetec.com"
mock_company.id = 2
mock_db = MagicMock()
# Set up a mock for _get_wikipedia_content to return None, so we only test the website part
self.service._get_wikipedia_content = MagicMock(return_value=None)
results = self.service._extract_and_calculate_metric_cascade(
db=mock_db,
company=mock_company,
industry_name="Laborausstattung",
search_term="Fabrikhalle (m²)",
standardization_logic=None,
standardized_unit=""
)
# --- Assertions ---
self.assertIsNotNone(results)
self.assertEqual(results['calculated_metric_value'], 8000.0)
self.assertEqual(results['metric_source'], 'website')
if __name__ == '__main__':
unittest.main()