[30388f42] Infrastructure Hardening: Repaired CE/Connector DB schema, fixed frontend styling build, implemented robust echo shield in worker v2.1.1, and integrated Lead Engine into gateway.

2026-03-07 14:08:42 +00:00
parent efcaa57cf0
commit ae2303b733
404 changed files with 24100 additions and 13301 deletions
--- a/company-explorer/backend/tests/test_e2e_full_flow.py
+++ b/company-explorer/backend/tests/test_e2e_full_flow.py
@@ -0,0 +1,202 @@
+import requests
+import time
+import json
+import sys
+import logging
+
+# Configure Logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+    handlers=[logging.StreamHandler(sys.stdout)]
+)
+logger = logging.getLogger("E2E-Test")
+
+# Configuration
+API_URL = "http://172.17.0.1:8000"
+API_USER = "admin"
+API_PASSWORD = "gemini"
+
+# Test Data
+TEST_COMPANY = {
+    "so_contact_id": 99999,
+    "so_person_id": 88888,
+    "crm_name": "Klinikum Landkreis Erding (E2E Test)",
+    "crm_website": "https://www.klinikum-erding.de", # Using real URL for successful discovery
+    "job_title": "Geschäftsführer" # Should map to Operative Decision Maker or C-Level
+}
+
+class CompanyExplorerClient:
+    def __init__(self, base_url, username, password):
+        self.base_url = base_url
+        self.auth = (username, password)
+        self.session = requests.Session()
+        self.session.auth = self.auth
+
+    def check_health(self):
+        try:
+            res = self.session.get(f"{self.base_url}/api/health", timeout=5)
+            res.raise_for_status()
+            logger.info(f"✅ Health Check Passed: {res.json()}")
+            return True
+        except Exception as e:
+            logger.error(f"❌ Health Check Failed: {e}")
+            return False
+
+    def provision_contact(self, payload):
+        url = f"{self.base_url}/api/provision/superoffice-contact"
+        logger.info(f"🚀 Provisioning Contact: {payload['crm_name']}")
+        res = self.session.post(url, json=payload)
+        res.raise_for_status()
+        return res.json()
+
+    def get_company(self, company_id):
+        url = f"{self.base_url}/api/companies/{company_id}"
+        # Retry logic for dev environment (uvicorn reloads on DB write)
+        for i in range(5):
+            try:
+                res = self.session.get(url)
+                res.raise_for_status()
+                return res.json()
+            except (requests.exceptions.ConnectionError, requests.exceptions.ChunkedEncodingError):
+                logger.warning(f"Connection dropped (likely uvicorn reload). Retrying {i+1}/5...")
+                time.sleep(2)
+        raise Exception("Failed to get company after retries")
+
+    def delete_company(self, company_id):
+        url = f"{self.base_url}/api/companies/{company_id}"
+        logger.info(f"🗑️ Deleting Company ID: {company_id}")
+        res = self.session.delete(url)
+        res.raise_for_status()
+        return res.json()
+
+def run_test():
+    client = CompanyExplorerClient(API_URL, API_USER, API_PASSWORD)
+
+    if not client.check_health():
+        logger.error("Aborting test due to health check failure.")
+        sys.exit(1)
+
+    # 1. Trigger Provisioning (Create & Discover)
+    # We first send a request WITHOUT job title to just ensure company exists/starts discovery
+    initial_payload = {
+        "so_contact_id": TEST_COMPANY["so_contact_id"],
+        "crm_name": TEST_COMPANY["crm_name"],
+        "crm_website": TEST_COMPANY["crm_website"],
+        # No person/job title yet
+    }
+    
+    try:
+        res = client.provision_contact(initial_payload)
+        logger.info(f"Initial Provision Response: {res['status']}")
+        
+        # We assume the name is unique enough or we find it by listing
+        # But wait, how do we get the ID?
+        # The /provision endpoint returns status and name, but NOT the ID in the response model.
+        # We need to find the company ID to poll it.
+        # Let's search for it.
+        
+        time.sleep(1) # Wait for DB write
+        search_res = client.session.get(f"{API_URL}/api/companies?search={TEST_COMPANY['crm_name']}").json()
+        if not search_res['items']:
+            logger.error("❌ Company not found after creation!")
+            sys.exit(1)
+            
+        company = search_res['items'][0]
+        company_id = company['id']
+        logger.info(f"Found Company ID: {company_id}")
+
+        # 2. Poll for Status "DISCOVERED" first
+        max_retries = 10
+        for i in range(max_retries):
+            company_details = client.get_company(company_id)
+            status = company_details['status']
+            logger.info(f"Polling for Discovery ({i+1}/{max_retries}): {status}")
+            
+            if status == "DISCOVERED" or status == "ENRICHED":
+                break
+            time.sleep(2)
+        
+        # 3. Explicitly Trigger Analysis
+        # This ensures we don't rely on implicit side-effects of the provision endpoint
+        logger.info("🚀 Triggering Analysis explicitly...")
+        res_analyze = client.session.post(f"{API_URL}/api/enrich/analyze", json={"company_id": company_id, "force_scrape": True})
+        if res_analyze.status_code != 200:
+            logger.warning(f"Analysis trigger warning: {res_analyze.text}")
+        else:
+            logger.info("✅ Analysis triggered.")
+
+        # 4. Poll for Status "ENRICHED"
+        max_retries = 40 # Give it more time (analysis takes time)
+        for i in range(max_retries):
+            company_details = client.get_company(company_id)
+            status = company_details['status']
+            logger.info(f"Polling for Enrichment ({i+1}/{max_retries}): {status}")
+            
+            if status == "ENRICHED":
+                break
+            time.sleep(5)
+        else:
+            logger.error("❌ Timeout waiting for Enrichment.")
+            # Don't exit, try to inspect what we have
+        
+        # 3. Verify Opener Logic
+        final_company = client.get_company(company_id)
+        
+        logger.info("--- 🔍 Verifying Analysis Results ---")
+        logger.info(f"Industry: {final_company.get('industry_ai')}")
+        logger.info(f"Metrics: {final_company.get('calculated_metric_name')} = {final_company.get('calculated_metric_value')}")
+        
+        opener_primary = final_company.get('ai_opener')
+        opener_secondary = final_company.get('ai_opener_secondary')
+        
+        logger.info(f"Opener (Primary): {opener_primary}")
+        logger.info(f"Opener (Secondary): {opener_secondary}")
+        
+        if not opener_primary or not opener_secondary:
+            logger.error("❌ Openers are missing!")
+            # sys.exit(1) # Let's continue to see if write-back works at least partially
+        else:
+            logger.info("✅ Openers generated.")
+
+        # 4. Simulate Final Write-Back (Provisioning with Person)
+        full_payload = TEST_COMPANY.copy()
+        logger.info("🚀 Triggering Final Provisioning (Write-Back Simulation)...")
+        final_res = client.provision_contact(full_payload)
+        
+        logger.info(f"Final Response Status: {final_res['status']}")
+        logger.info(f"Role: {final_res.get('role_name')}")
+        logger.info(f"Subject: {final_res.get('texts', {}).get('subject')}")
+        
+        # Assertions
+        if final_res['status'] != "success":
+             logger.error(f"❌ Expected status 'success', got '{final_res['status']}'")
+        
+        if final_res.get('opener') != opener_primary:
+             logger.error("❌ Primary Opener mismatch in response")
+
+        if final_res.get('opener_secondary') != opener_secondary:
+             logger.error("❌ Secondary Opener mismatch in response")
+             
+        if not final_res.get('texts', {}).get('intro'):
+             logger.warning("⚠️ Matrix Text (intro) missing (Check Seed Data)")
+        else:
+             logger.info("✅ Matrix Texts present.")
+
+        logger.info("🎉 E2E Test Completed Successfully (mostly)!")
+
+    except Exception as e:
+        logger.error(f"💥 Test Failed with Exception: {e}", exc_info=True)
+    finally:
+        # Cleanup
+        try:
+            # Re-fetch company ID if we lost it?
+            # We assume company_id is set if we got past step 1
+            if 'company_id' in locals():
+                client.delete_company(company_id)
+                logger.info("✅ Cleanup complete.")
+        except Exception as e:
+            logger.error(f"Cleanup failed: {e}")
+
+if __name__ == "__main__":
+    run_test()
--- a/company-explorer/backend/tests/test_metric_extraction_hospital.py
+++ b/company-explorer/backend/tests/test_metric_extraction_hospital.py
@@ -0,0 +1,82 @@
+import unittest
+import os
+import sys
+from unittest.mock import MagicMock, patch
+
+# Adjust path to allow importing from backend
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from backend.services.classification import ClassificationService
+from backend.database import Company, Industry, RoboticsCategory, Session
+
+class TestHospitalMetricFinal(unittest.TestCase):
+
+    def setUp(self):
+        self.service = ClassificationService()
+        self.mock_db = MagicMock(spec=Session)
+        
+        self.mock_company = Company(id=8, name="Klinikum Landkreis Erding")
+        self.mock_industry_hospital = Industry(
+            id=1,
+            name="Healthcare - Hospital",
+            scraper_search_term="Anzahl Betten",
+            standardization_logic="wert * 100",
+            primary_category=RoboticsCategory(name="Reinigungsroboter"),
+            secondary_category=RoboticsCategory(name="Serviceroboter"),
+        )
+        self.mock_website_content = "Ein langer Text, der die 100-Zeichen-Prüfung besteht."
+
+    @patch('backend.services.classification.ClassificationService._generate_marketing_opener')
+    @patch('backend.services.classification.ClassificationService._extract_and_calculate_metric_cascade')
+    @patch('backend.services.classification.ClassificationService._find_direct_area')
+    @patch('backend.services.classification.ClassificationService._run_llm_classification_prompt')
+    @patch('backend.services.classification.ClassificationService._get_website_content_and_url')
+    @patch('backend.services.classification.ClassificationService._load_industry_definitions')
+    def test_final_hospital_logic(
+        self, 
+        mock_load_industries, 
+        mock_get_website, 
+        mock_classify, 
+        mock_find_direct_area, 
+        mock_extract_cascade, 
+        mock_generate_opener
+    ):
+        print("\n--- Running Final Hospital Logic Test ---")
+
+        # --- MOCK SETUP ---
+        mock_load_industries.return_value = [self.mock_industry_hospital]
+        mock_get_website.return_value = (self.mock_website_content, "http://mock.com")
+        mock_classify.return_value = "Healthcare - Hospital"
+        mock_find_direct_area.return_value = None # STAGE 1 MUST FAIL
+        
+        proxy_metric_result = {
+            "calculated_metric_name": "Anzahl Betten",
+            "calculated_metric_value": 352.0,
+            "calculated_metric_unit": "Betten",
+            "standardized_metric_value": 35200.0,
+            "standardized_metric_unit": "m²",
+            "metric_source": "wikipedia",
+        }
+        mock_extract_cascade.return_value = proxy_metric_result
+        mock_generate_opener.side_effect = ["Primary Opener", "Secondary Opener"]
+
+        # --- EXECUTION ---
+        updated_company = self.service.classify_company_potential(self.mock_company, self.mock_db)
+
+        # --- ASSERTIONS ---
+        mock_find_direct_area.assert_called_once()
+        mock_extract_cascade.assert_called_once()
+
+        self.assertEqual(updated_company.calculated_metric_name, "Anzahl Betten")
+        self.assertEqual(updated_company.calculated_metric_value, 352.0)
+        self.assertEqual(updated_company.standardized_metric_value, 35200.0)
+        print("   ✅ Metrics from Stage 2 correctly applied.")
+
+        self.assertEqual(updated_company.ai_opener, "Primary Opener")
+        self.assertEqual(updated_company.ai_opener_secondary, "Secondary Opener")
+        print("   ✅ Openers correctly applied.")
+
+        print("\n--- ✅ PASSED: Final Hospital Logic Test. ---")
+
+if __name__ == '__main__':
+    unittest.main()
--- a/company-explorer/backend/tests/test_opener_logic.py
+++ b/company-explorer/backend/tests/test_opener_logic.py
@@ -0,0 +1,110 @@
+import sys
+import os
+import logging
+import unittest
+from unittest.mock import MagicMock, patch
+
+# Add backend path & activate venv if possible
+sys.path.insert(0, "/app")
+
+from backend.services.classification import ClassificationService
+from backend.database import Company, Industry
+
+# Setup basic logging
+logging.basicConfig(level=logging.INFO)
+
+class TestOpenerGeneration(unittest.TestCase):
+
+    def setUp(self):
+        """Set up a mock environment."""
+        self.service = ClassificationService()
+        
+        # Mock a database session
+        self.mock_db = MagicMock()
+
+        # Mock Industry object (as if read from DB)
+        self.mock_industry = Industry(
+            name="Leisure - Wet & Spa",
+            pains="Hohes Unfallrisiko durch Nässe, strenge Hygiene-Anforderungen.",
+            ops_focus_secondary=False 
+        )
+
+        # Mock Company object
+        self.mock_company = Company(
+            id=1,
+            name="Therme Erding",
+            industry_ai="Leisure - Wet & Spa"
+        )
+        # Add the fields we are testing
+        self.mock_company.ai_opener = None
+        self.mock_company.ai_opener_secondary = None
+
+    @patch('backend.services.classification.call_gemini_flash')
+    @patch('backend.services.classification.ClassificationService._run_llm_classification_prompt')
+    def test_dual_opener_generation(self, mock_classification_call, mock_gemini_call):
+        """
+        Test that both primary and secondary openers are generated and stored.
+        """
+        print("\n--- Running Integration Test for Dual Opener Generation ---")
+
+        # --- Configure Mocks ---
+        # 1. Mock the classification call to return the correct industry
+        mock_classification_call.return_value = "Leisure - Wet & Spa"
+
+        # 2. Mock the opener generation calls (Gemini)
+        mock_gemini_call.side_effect = [
+            "Der reibungslose Betrieb ist entscheidend, um maximale Sicherheit zu gewährleisten.", # Mocked Primary
+            "Ein einzigartiges Gästeerlebnis ist der Schlüssel zum Erfolg."  # Mocked Secondary
+        ]
+        
+        # Mock the content loader to return some text
+        with patch.object(self.service, '_get_website_content_and_url', return_value=("Die Therme Erding ist die größte Therme der Welt.", "http://mock.com")):
+            # --- Execute the Method ---
+            print("1. Calling classify_company_potential...")
+            # We patch the metric extraction to isolate the opener logic
+            with patch.object(self.service, 'extract_metrics_for_industry', return_value=self.mock_company):
+                # The method under test!
+                result_company = self.service.classify_company_potential(self.mock_company, self.mock_db)
+
+        # --- Assertions ---
+        print("2. Verifying results...")
+
+        # 1. Check that Gemini was called twice for the OPENERS
+        self.assertEqual(mock_gemini_call.call_count, 2, "❌ FAILED: AI model for OPENERS should have been called twice.")
+        print("   ✅ AI model for openers was called twice.")
+
+        # 2. Check that the classification prompt was called
+        self.assertEqual(mock_classification_call.call_count, 1, "❌ FAILED: Classification prompt should have been called once.")
+        print("   ✅ Classification prompt was called once.")
+        
+        # 3. Check prompts contained the correct focus
+        first_call_args, _ = mock_gemini_call.call_args_list[0]
+        second_call_args, _ = mock_gemini_call.call_args_list[1]
+        self.assertIn("FOKUS: PRIMÄR-PROZESSE", first_call_args[0], "❌ FAILED: First call prompt did not have PRIMARY focus.")
+        print("   ✅ First opener call had PRIMARY focus.")
+        self.assertIn("FOKUS: SEKUNDÄR-PROZESSE", second_call_args[0], "❌ FAILED: Second call prompt did not have SECONDARY focus.")
+        print("   ✅ Second opener call had SECONDARY focus.")
+        
+        # 4. Check that the results were stored on the company object
+        self.assertIsNotNone(result_company.ai_opener, "❌ FAILED: ai_opener (primary) was not set.")
+        self.assertIsNotNone(result_company.ai_opener_secondary, "❌ FAILED: ai_opener_secondary was not set.")
+        print("   ✅ Both ai_opener fields were set on the company object.")
+        
+        # 5. Check content of the fields
+        self.assertIn("Sicherheit", result_company.ai_opener, "❌ FAILED: Primary opener content mismatch.")
+        print(f"   -> Primary Opener: '{result_company.ai_opener}'")
+        self.assertIn("Gästeerlebnis", result_company.ai_opener_secondary, "❌ FAILED: Secondary opener content mismatch.")
+        print(f"   -> Secondary Opener: '{result_company.ai_opener_secondary}'")
+        
+        print("\n--- ✅ PASSED: Dual Opener logic is working correctly. ---")
+
+if __name__ == '__main__':
+    # Patch the _load_industry_definitions to return our mock
+    with patch('backend.services.classification.ClassificationService._load_industry_definitions') as mock_load:
+         # Provide a description, so the classifier can match the industry
+        mock_load.return_value = [Industry(
+            name="Leisure - Wet & Spa", 
+            description="Thermalbad, Spa, Wasserwelten, Saunen und Rutschenparks.",
+            pains="Hygiene"
+        )]
+        unittest.main()