[2f988f42] fix(company-explorer): Implement robust quantitative potential and atomic opener generation\n\n- Refactored ClassificationService for two-stage metric extraction (direct area and proxy).- Enhanced MetricParser for targeted value matching and robust number parsing.- Implemented persona-specific 'Atomic Opener' generation using segmented pains.- Fixed logging configuration and Pydantic response models.- Added dedicated debugging script and updated documentation (GEMINI.md, MIGRATION_PLAN.md).

2026-02-21 08:01:07 +00:00
parent 62a924a168
commit 45acbeefb9
13 changed files with 666 additions and 534 deletions
--- a/company-explorer/backend/app.py
+++ b/company-explorer/backend/app.py
@@ -32,7 +32,7 @@ setup_logging()
 import logging
 logger = logging.getLogger(__name__)

-from .database import init_db, get_db, Company, Signal, EnrichmentData, RoboticsCategory, Contact, Industry, JobRoleMapping, ReportedMistake, MarketingMatrix, Persona
+from .database import init_db, get_db, Company, Signal, EnrichmentData, RoboticsCategory, Contact, Industry, JobRoleMapping, ReportedMistake, MarketingMatrix, Persona, RawJobTitle
 from .services.deduplication import Deduplicator
 from .services.discovery import DiscoveryService
 from .services.scraping import ScraperService
@@ -101,6 +101,71 @@ class ProvisioningResponse(BaseModel):
    opener_secondary: Optional[str] = None # Secondary opener (Service/Logistics)
    texts: Dict[str, Optional[str]] = {}

+class IndustryDetails(BaseModel):
+    pains: Optional[str] = None
+    gains: Optional[str] = None
+    priority: Optional[str] = None
+    notes: Optional[str] = None
+    ops_focus_secondary: bool = False
+
+    class Config:
+        from_attributes = True
+
+class ContactResponse(BaseModel):
+    id: int
+    first_name: Optional[str] = None
+    last_name: Optional[str] = None
+    job_title: Optional[str] = None
+    role: Optional[str] = None
+    email: Optional[str] = None
+    is_primary: bool
+
+    class Config:
+        from_attributes = True
+
+class EnrichmentDataResponse(BaseModel):
+    id: int
+    source_type: str
+    content: Dict[str, Any]
+    is_locked: bool
+    wiki_verified_empty: bool
+    updated_at: datetime
+
+    class Config:
+        from_attributes = True
+
+class CompanyDetailsResponse(BaseModel):
+    id: int
+    name: str
+    website: Optional[str] = None
+    city: Optional[str] = None
+    country: Optional[str] = None
+    industry_ai: Optional[str] = None
+    status: str
+    
+    # Metrics
+    calculated_metric_name: Optional[str] = None
+    calculated_metric_value: Optional[float] = None
+    calculated_metric_unit: Optional[str] = None
+    standardized_metric_value: Optional[float] = None
+    standardized_metric_unit: Optional[str] = None
+    metric_source: Optional[str] = None
+    metric_proof_text: Optional[str] = None
+    metric_source_url: Optional[str] = None
+    metric_confidence: Optional[float] = None
+
+    # Openers
+    ai_opener: Optional[str] = None
+    ai_opener_secondary: Optional[str] = None
+    
+    # Relations
+    industry_details: Optional[IndustryDetails] = None
+    contacts: List[ContactResponse] = []
+    enrichment_data: List[EnrichmentDataResponse] = []
+
+    class Config:
+        from_attributes = True
+
 # --- Events ---
@app.on_event("startup")
 def on_startup():
@@ -336,7 +401,7 @@ def export_companies_csv(db: Session = Depends(get_db), username: str = Depends(
        headers={"Content-Disposition": f"attachment; filename=company_export_{datetime.utcnow().strftime('%Y-%m-%d')}.csv"}
    )

-@app.get("/api/companies/{company_id}")
+@app.get("/api/companies/{company_id}", response_model=CompanyDetailsResponse)
 def get_company(company_id: int, db: Session = Depends(get_db), username: str = Depends(authenticate_user)):
    company = db.query(Company).options(
        joinedload(Company.enrichment_data),
@@ -350,28 +415,14 @@ def get_company(company_id: int, db: Session = Depends(get_db), username: str =
    if company.industry_ai:
        ind = db.query(Industry).filter(Industry.name == company.industry_ai).first()
        if ind:
-            industry_details = {
-                "pains": ind.pains,
-                "gains": ind.gains,
-                "priority": ind.priority,
-                "notes": ind.notes,
-                "ops_focus_secondary": ind.ops_focus_secondary
-            }
+            industry_details = IndustryDetails.model_validate(ind)
    
-    # HACK: Attach to response object (Pydantic would be cleaner, but this works for fast prototyping)
-    # We convert to dict and append
-    resp = company.__dict__.copy()
-    resp["industry_details"] = industry_details
-    # Handle SQLAlchemy internal state
-    if "_sa_instance_state" in resp: del resp["_sa_instance_state"]
-    # Handle relationships manually if needed, or let FastAPI encode the SQLAlchemy model + extra dict
-    # Better: return a custom dict merging both
+    # FastAPI will automatically serialize the 'company' ORM object into the
+    # CompanyDetailsResponse schema. We just need to attach the extra 'industry_details'.
+    response_data = CompanyDetailsResponse.model_validate(company)
+    response_data.industry_details = industry_details
    
-    # Since we use joinedload, relationships are loaded. 
-    # Let's rely on FastAPI's ability to serialize the object, but we need to inject the extra field.
-    # The safest way without changing Pydantic schemas everywhere is to return a dict.
-    
-    return {**resp, "enrichment_data": company.enrichment_data, "contacts": company.contacts, "signals": company.signals}
+    return response_data

@app.post("/api/companies")
 def create_company(company: CompanyCreate, db: Session = Depends(get_db), username: str = Depends(authenticate_user)):
@@ -797,23 +848,21 @@ def run_analysis_task(company_id: int):
    db = SessionLocal()
    try:
        company = db.query(Company).filter(Company.id == company_id).first()
-        if not company: return
+        if not company: 
+            logger.error(f"Analysis Task: Company with ID {company_id} not found.")
+            return

-        logger.info(f"Running Analysis Task for {company.name}")
+        logger.info(f"--- [BACKGROUND TASK] Starting for {company.name} ---")

        # --- 1. Scrape Website (if not locked) ---
-        # Check for existing scrape data first
        existing_scrape = db.query(EnrichmentData).filter(
            EnrichmentData.company_id == company.id,
            EnrichmentData.source_type == "website_scrape"
        ).first()

-        # If it doesn't exist or is not locked, we perform a scrape
        if not existing_scrape or not existing_scrape.is_locked:
            logger.info(f"Scraping website for {company.name}...")
-            scrape_res = scraper.scrape_url(company.website) # Use singleton
-            
-            # Now, either create new or update existing
+            scrape_res = scraper.scrape_url(company.website)
            if not existing_scrape:
                db.add(EnrichmentData(company_id=company.id, source_type="website_scrape", content=scrape_res))
                logger.info("Created new website_scrape entry.")
@@ -825,15 +874,16 @@ def run_analysis_task(company_id: int):
        else:
            logger.info("Website scrape is locked. Skipping.")

-        # 2. Classify Industry & Metrics
-        # IMPORTANT: Using the new method name and passing db session
+        # --- 2. Classify Industry & Metrics ---
+        logger.info(f"Handing over to ClassificationService for {company.name}...")
        classifier.classify_company_potential(company, db)
        
        company.status = "ENRICHED"
        db.commit()
-        logger.info(f"Analysis complete for {company.name}")
+        logger.info(f"--- [BACKGROUND TASK] Successfully finished for {company.name} ---")
+
    except Exception as e:
-        logger.error(f"Analyze Task Error: {e}", exc_info=True)
+        logger.critical(f"--- [BACKGROUND TASK] CRITICAL ERROR for Company ID {company_id} ---", exc_info=True)
    finally:
        db.close()

--- a/company-explorer/backend/config.py
+++ b/company-explorer/backend/config.py
@@ -22,7 +22,7 @@ try:
        SERP_API_KEY: Optional[str] = None
        
        # Paths
-        LOG_DIR: str = "/app/logs_debug"
+        LOG_DIR: str = "/app/Log_from_docker"

        class Config:
            env_file = ".env"
@@ -40,7 +40,7 @@ except ImportError:
        GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
        OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
        SERP_API_KEY = os.getenv("SERP_API_KEY")
-        LOG_DIR = "/app/logs_debug"
+        LOG_DIR = "/app/Log_from_docker"
    
    settings = FallbackSettings()

--- a/company-explorer/backend/database.py
+++ b/company-explorer/backend/database.py
@@ -68,6 +68,10 @@ class Company(Base):
    metric_source_url = Column(Text, nullable=True)          # URL where the proof was found
    metric_confidence = Column(Float, nullable=True)         # 0.0 - 1.0
    metric_confidence_reason = Column(Text, nullable=True)   # Why is it high/low?
+
+    # NEW: AI-generated Marketing Openers
+    ai_opener = Column(Text, nullable=True)
+    ai_opener_secondary = Column(Text, nullable=True)
    
    # Relationships
    signals = relationship("Signal", back_populates="company", cascade="all, delete-orphan")
--- a/company-explorer/backend/lib/metric_parser.py
+++ b/company-explorer/backend/lib/metric_parser.py
@@ -23,52 +23,43 @@ class MetricParser:
        
        # 1. Pre-cleaning
        text_processed = str(text).strip()
-        logger.info(f"[MetricParser] Processing: '{text_processed}' (Expected: {expected_value})")
+        logger.info(f"[MetricParser] Processing text (len: {len(text_processed)}) (Hint: {expected_value})")
        
-        # Optimize: If we have an expected value, try to clean and parse THAT first
+        # Optimize: If we have an expected value (hint), try to find that specific number first
        if expected_value:
-             # Try to parse the LLM's raw value directly first (it's often cleaner: "200000")
-             try:
-                 # Remove simple noise from expected value
-                 # Aggressively strip units and text to isolate the number
-                 clean_expected = str(expected_value).lower()
-                 # Remove common units
-                 for unit in ['m²', 'qm', 'sqm', 'mitarbeiter', 'employees', 'eur', 'usd', 'chf', '€', '$', '£', '¥']:
-                     clean_expected = clean_expected.replace(unit, "")
-                 
-                 # Remove multipliers text (we handle multipliers via is_revenue later, but for expected value matching we want the raw number)
-                 # Actually, expected_value "2.5 Mio" implies we want to match 2.5 in the text, OR 2500000?
-                 # Usually the LLM extract matches the text representation.
-                 clean_expected = clean_expected.replace("mio", "").replace("millionen", "").replace("mrd", "").replace("milliarden", "")
-                 clean_expected = clean_expected.replace("tsd", "").replace("tausend", "")
-                 
-                 # Final cleanup of non-numeric chars (allow . , ' -)
-                 # But preserve structure for robust parser
-                 clean_expected = clean_expected.replace(" ", "").replace("'", "")
-                 
-                 # If it looks like a clean number already, try parsing it
-                 # But use the robust parser to handle German decimals if present in expected
-                 val = MetricParser._parse_robust_number(clean_expected, is_revenue)
-                 
-                 # Check if this value (or a close representation) actually exists in the text
-                 # This prevents hallucination acceptance, but allows the LLM to guide us to the *second* number in a string.
-                 # Simplified check: is the digits sequence present?
-                 # No, better: Let the parser run on the FULL text, find all candidates, and pick the one closest to 'val'.
-             except:
-                 pass
+            try:
+                # Clean the hint to get the target digits (e.g. "352" from "352 Betten")
+                # We only take the FIRST sequence of digits as the target
+                hint_match = re.search(r'[\d\.,\']+', str(expected_value))
+                if hint_match:
+                    target_str = hint_match.group(0)
+                    target_digits = re.sub(r'[^0-9]', '', target_str)
+                    
+                    if target_digits:
+                        # Find all numbers in the text and check if they match our target
+                        all_numbers_in_text = re.findall(r'[\d\.,\']+', text_processed)
+                        for num_str in all_numbers_in_text:
+                            if target_digits == re.sub(r'[^0-9]', '', num_str):
+                                # Exact digit match!
+                                val = MetricParser._parse_robust_number(num_str, is_revenue)
+                                if val is not None:
+                                    logger.info(f"[MetricParser] Found targeted value via hint: '{num_str}' -> {val}")
+                                    return val
+            except Exception as e:
+                logger.error(f"Error while parsing with hint: {e}")

+        # Fallback: Classic robust parsing
        # Normalize quotes
        text_processed = text_processed.replace("’", "'").replace("‘", "'")
        
        # 2. Remove noise: Citations [1] and Year/Date in parentheses (2020)
-        # We remove everything in parentheses/brackets as it's almost always noise for the metric itself.
        text_processed = re.sub(r'\(.*?\)|\[.*?\]', ' ', text_processed).strip()
        
        # 3. Remove common prefixes and currency symbols
        prefixes = [
-            r'ca\.?\s*', r'circa\s*', r'rund\s*', r'etwa\s*', r'über\s*', r'unter\s*', 
+            r'ca\.?:?\s*', r'circa\s*', r'rund\s*', r'etwa\s*', r'über\s*', r'unter\s*', 
            r'mehr als\s*', r'weniger als\s*', r'bis zu\s*', r'about\s*', r'over\s*', 
-            r'approx\.?\s*', r'around\s*', r'up to\s*', r'~\s*', r'rd\.?\s*'
+            r'approx\.?:?\s*', r'around\s*', r'up to\s*', r'~\s*', r'rd\.?:?\s*'
        ]
        currencies = [
            r'€', r'EUR', r'US\$', r'USD', r'CHF', r'GBP', r'£', r'¥', r'JPY'
@@ -79,23 +70,16 @@ class MetricParser:
        for c in currencies:
            text_processed = re.sub(f'(?i){c}', '', text_processed).strip()

-        # 4. Remove Range Splitting (was too aggressive, cutting off text after dashes)
-        # Old: text_processed = re.split(r'\s*(-|–|bis|to)\s*', text_processed, 1)[0].strip()
-        
-        # 5. Extract Multipliers (Mio, Mrd)
+        # 4. Extract Multipliers (Mio, Mrd)
        multiplier = 1.0
        lower_text = text_processed.lower()
        
        def has_unit(text, units):
            for u in units:
-                # Escape special chars if any, though mostly alphanumeric here
-                # Use word boundaries \b for safe matching
                if re.search(r'\b' + re.escape(u) + r'\b', text):
                    return True
            return False
        
-        # For Revenue, we normalize to Millions (User Rule)
-        # For others (Employees), we scale to absolute numbers
        if is_revenue:
            if has_unit(lower_text, ['mrd', 'milliarden', 'billion', 'bn']):
                multiplier = 1000.0
@@ -111,214 +95,92 @@ class MetricParser:
            elif has_unit(lower_text, ['tsd', 'tausend', 'k']):
                multiplier = 1000.0

-        # 6. Extract the number candidate
-        # Loop through matches to find the best candidate (skipping years if possible)
+        # 5. Extract the first valid number candidate
        candidates = re.finditer(r'([\d\.,\'\s]+)', text_processed)
        
-        selected_candidate = None
-        best_candidate_val = None
-        
-        matches = [m for m in candidates]
-        # logger.info(f"DEBUG matches: {[m.group(1) for m in matches]}")
-        # logger.info(f"DEBUG: Found {len(matches)} matches: {[m.group(1) for m in matches]}")
-        
-        # Helper to parse a candidate string
-        def parse_cand(c):
-             # Extract temporary multiplier for this specific candidate context? 
-             # Complex. For now, we assume the global multiplier applies or we rely on the candidates raw numeric value.
-             # Actually, simpler: We parse the candidate as is (treating as raw number)
-             try:
-                 # Remove thousands separators for comparison
-                 c_clean = c.replace("'", "").replace(".", "").replace(" ", "").replace(",", ".") # Rough EN/DE mix
-                 return float(c_clean)
-             except:
-                 return None
-
-        # Parse expected value for comparison
-        target_val = None
-        if expected_value:
-             try:
-                 # Re-apply aggressive cleaning to ensure we have a valid float for comparison
-                 clean_expected = str(expected_value).lower()
-                 for unit in ['m²', 'qm', 'sqm', 'mitarbeiter', 'employees', 'eur', 'usd', 'chf', '€', '$', '£', '¥']:
-                     clean_expected = clean_expected.replace(unit, "")
-                 clean_expected = clean_expected.replace("mio", "").replace("millionen", "").replace("mrd", "").replace("milliarden", "")
-                 clean_expected = clean_expected.replace("tsd", "").replace("tausend", "")
-                 clean_expected = clean_expected.replace(" ", "").replace("'", "")
-
-                 target_val = MetricParser._parse_robust_number(clean_expected, is_revenue)
-             except:
-                 pass
-
-        for i, match in enumerate(matches):
+        for match in candidates:
            cand = match.group(1).strip()
-            if not cand: continue
+            if not cand or not re.search(r'\d', cand):
+                continue
            
-            # Clean candidate for analysis (remove separators)
+            # Clean candidate
            clean_cand = cand.replace("'", "").replace(".", "").replace(",", "").replace(" ", "")
            
-            # Check if it looks like a year (4 digits, 1900-2100)
-            is_year_like = False
+            # Year detection
            if clean_cand.isdigit() and len(clean_cand) == 4:
                val = int(clean_cand)
                if 1900 <= val <= 2100:
-                    is_year_like = True
+                    continue # Skip years
            
-            # Smart Year Skip (Legacy Logic)
-            if is_year_like and not target_val: # Only skip if we don't have a specific target
-                if i < len(matches) - 1:
-                    logger.info(f"[MetricParser] Skipping year-like candidate '{cand}' because another number follows.")
-                    continue
-            
-            # Clean candidate for checking (remove internal spaces if they look like thousands separators)
-            # Simple approach: Remove all spaces for parsing check
-            cand_clean_for_parse = cand.replace(" ", "")
-            
-            # If we have a target value from LLM, check if this candidate matches it
-            if target_val is not None:
-                try:
-                    curr_val = MetricParser._parse_robust_number(cand_clean_for_parse, is_revenue)
-                    
-                    if abs(curr_val - target_val) < 0.1 or abs(curr_val - target_val/1000) < 0.1 or abs(curr_val - target_val*1000) < 0.1:
-                         selected_candidate = cand # Keep original with spaces for final processing
-                         logger.info(f"[MetricParser] Found candidate '{cand}' matching expected '{expected_value}'")
-                         break
-                except:
-                    pass
-            
-            # Fallback logic: 
-            # If we have NO target value, we take the first valid one we find.
-            # If we DO have a target value, we only take a fallback if we reach the end and haven't found the target?
-            # Better: We keep the FIRST valid candidate as a fallback in a separate variable.
-            
-            if selected_candidate is None:
-                 # Check if it's a valid number at all before storing as fallback
-                 try:
-                     MetricParser._parse_robust_number(cand_clean_for_parse, is_revenue)
-                     if not is_year_like:
-                        if best_candidate_val is None: # Store first valid non-year
-                            best_candidate_val = cand
-                 except:
-                     pass
+            # Smart separator handling for spaces
+            if " " in cand:
+                parts = cand.split()
+                if len(parts) > 1:
+                    if not (len(parts[1]) == 3 and parts[1].isdigit()):
+                        cand = parts[0]
+                    else:
+                        merged = parts[0]
+                        for p in parts[1:]:
+                            if len(p) == 3 and p.isdigit():
+                                merged += p
+                            else:
+                                break
+                        cand = merged

-        # If we found a specific match, use it. Otherwise use the fallback.
-        if selected_candidate:
-             candidate = selected_candidate
-        elif best_candidate_val:
-             candidate = best_candidate_val
-        else:
-             return None
-             
-        # logger.info(f"DEBUG: Selected candidate: '{candidate}'")
-        
-        # Smart separator handling (on the chosen candidate):
-        
-        # Smart separator handling:
-        
-        # Smart separator handling:
-        # A space is only a thousands-separator if it's followed by 3 digits.
-        # Otherwise it's likely a separator between unrelated numbers (e.g. "80 2020")
-        if " " in candidate:
-            parts = candidate.split()
-            if len(parts) > 1:
-                # Basic check: if second part is not 3 digits, we take only the first part
-                if not (len(parts[1]) == 3 and parts[1].isdigit()):
-                    candidate = parts[0]
-                else:
-                    # It might be 1 000. Keep merging if subsequent parts are also 3 digits.
-                    merged = parts[0]
-                    for p in parts[1:]:
-                        if len(p) == 3 and p.isdigit():
-                            merged += p
-                        else:
-                            break
-                    candidate = merged
-        
-        # Remove thousands separators (Quote)
-        candidate = candidate.replace("'", "")
-        
-        if not candidate or not re.search(r'\d', candidate):
-            return None
+            try:
+                val = MetricParser._parse_robust_number(cand, is_revenue)
+                if val is not None:
+                    final = val * multiplier
+                    logger.info(f"[MetricParser] Found value: '{cand}' -> {final}")
+                    return final
+            except:
+                continue

-        # Count separators for rule checks
-        dots = candidate.count('.')
-        commas = candidate.count(',')
-
-        # 7. Concatenated Year Detection (Bug Fix for 802020)
-        # If the number is long (5-7 digits) and ends with a recent year (2018-2026), 
-        # and has no separators, it's likely a concatenation like "802020".
-        if dots == 0 and commas == 0 and " " not in candidate:
-            if len(candidate) >= 5 and len(candidate) <= 7:
-                for year in range(2018, 2027):
-                    y_str = str(year)
-                    if candidate.endswith(y_str):
-                        val_str = candidate[:-4]
-                        if val_str.isdigit():
-                            logger.warning(f"[MetricParser] Caught concatenated year BUG: '{candidate}' -> '{val_str}' (Year {year})")
-                            candidate = val_str
-                            break
-
-        try:
-            val = MetricParser._parse_robust_number(candidate, is_revenue)
-            final = val * multiplier
-            logger.info(f"[MetricParser] Candidate: '{candidate}' -> Multiplier: {multiplier} -> Value: {final}")
-            return final
-        except Exception as e:
-            logger.debug(f"Failed to parse number string '{candidate}': {e}")
-            return None
+        return None

    @staticmethod
-    def _parse_robust_number(s: str, is_revenue: bool) -> float:
+    def _parse_robust_number(s: str, is_revenue: bool) -> Optional[float]:
        """
        Parses a number string dealing with ambiguous separators.
        Standardizes to Python float.
        """
-        # Count separators
+        s = s.strip().replace("'", "")
+        if not s:
+            return None
+            
        dots = s.count('.')
        commas = s.count(',')

-        # Case 1: Both present (e.g. 1.234,56 or 1,234.56)
-        if dots > 0 and commas > 0:
-            # Check which comes last
-            if s.rfind('.') > s.rfind(','): # US Style: 1,234.56
+        try:
+            # Case 1: Both present
+            if dots > 0 and commas > 0:
+                if s.rfind('.') > s.rfind(','): # US Style
+                    return float(s.replace(',', ''))
+                else: # German Style
+                    return float(s.replace('.', '').replace(',', '.'))
+            
+            # Case 2: Multiple dots
+            if dots > 1:
+                return float(s.replace('.', ''))
+                
+            # Case 3: Multiple commas
+            if commas > 1:
                return float(s.replace(',', ''))
-            else: # German Style: 1.234,56
-                return float(s.replace('.', '').replace(',', '.'))
-        
-        # Case 2: Multiple dots (Thousands: 1.000.000)
-        if dots > 1:
-            return float(s.replace('.', ''))
-            
-        # Case 3: Multiple commas (Unusual, but treat as thousands)
-        if commas > 1:
-            return float(s.replace(',', ''))

-        # Case 4: Only Comma
-        if commas == 1:
-            # In German context "1,5" is 1.5. "1.000" is usually 1000.
-            # If it looks like decimal (1-2 digits after comma), treat as decimal.
-            # Except if it's exactly 3 digits and not is_revenue? No, comma is almost always decimal in DE.
-            return float(s.replace(',', '.'))
-        
-        # Case 5: Only Dot
-        if dots == 1:
-            # Ambiguity: "1.005" (1005) vs "1.5" (1.5)
-            # Rule from Lesson 1: "1.005 Mitarbeiter" extracted as "1" (wrong). 
-            # If dot followed by exactly 3 digits (and no comma), it's a thousands separator.
-            # FOR REVENUE: dots are generally decimals (375.6 Mio) unless unambiguous.
+            # Case 4: Only Comma
+            if commas == 1:
+                return float(s.replace(',', '.'))
            
-            parts = s.split('.')
-            if len(parts[1]) == 3:
-                if is_revenue:
-                    # Revenue: 375.600 Mio? Unlikely compared to 375.6 Mio.
-                    # But 1.000 Mio is 1 Billion? No, 1.000 (thousand) millions.
-                    # User Rule: "Revenue: dots are generally treated as decimals"
-                    # "1.005" as revenue -> 1.005 (Millions)
-                    # "1.005" as employees -> 1005
-                    return float(s)
-                else:
-                    return float(s.replace('.', ''))
+            # Case 5: Only Dot
+            if dots == 1:
+                parts = s.split('.')
+                if len(parts[1]) == 3:
+                    if is_revenue:
+                        return float(s)
+                    else:
+                        return float(s.replace('.', ''))
+                return float(s)
+                
            return float(s)
-            
-        return float(s)
-
+        except:
+            return None
--- a/company-explorer/backend/scripts/debug_single_company.py
+++ b/company-explorer/backend/scripts/debug_single_company.py
@@ -0,0 +1,72 @@
+import os
+import sys
+import argparse
+import logging
+
+# Add the backend directory to the Python path
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from backend.database import get_db, Company
+from backend.services.classification import ClassificationService
+from backend.lib.logging_setup import setup_logging
+
+# --- CONFIGURATION ---
+# Setup logging to be very verbose for this script
+setup_logging()
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+logger = logging.getLogger(__name__)
+
+def run_debug_analysis(company_id: int):
+    """
+    Runs the full classification and enrichment process for a single company
+    in the foreground and prints detailed results.
+    """
+    logger.info(f"--- Starting Interactive Debug for Company ID: {company_id} ---")
+    
+    db_session = next(get_db())
+    
+    try:
+        # 1. Fetch the company
+        company = db_session.query(Company).filter(Company.id == company_id).first()
+        if not company:
+            logger.error(f"Company with ID {company_id} not found.")
+            return
+
+        logger.info(f"Found Company: {company.name}")
+        
+        # --- PRE-ANALYSIS STATE ---
+        print("\n--- METRICS BEFORE ---")
+        print(f"Calculated: {company.calculated_metric_value} {company.calculated_metric_unit}")
+        print(f"Standardized: {company.standardized_metric_value} {company.standardized_metric_unit}")
+        print("----------------------\n")
+        
+        # 2. Instantiate the service
+        classifier = ClassificationService()
+        
+        # 3. RUN THE CORE LOGIC
+        # This will now print all the detailed logs we added
+        updated_company = classifier.classify_company_potential(company, db_session)
+        
+        # --- POST-ANALYSIS STATE ---
+        print("\n--- METRICS AFTER ---")
+        print(f"Industry (AI): {updated_company.industry_ai}")
+        print(f"Metric Source: {updated_company.metric_source}")
+        print(f"Proof Text: {updated_company.metric_proof_text}")
+        print(f"Calculated: {updated_company.calculated_metric_value} {updated_company.calculated_metric_unit}")
+        print(f"Standardized: {updated_company.standardized_metric_value} {updated_company.standardized_metric_unit}")
+        print(f"\nOpener 1 (Infra): {updated_company.ai_opener}")
+        print(f"Opener 2 (Ops): {updated_company.ai_opener_secondary}")
+        print("---------------------")
+        
+        logger.info(f"--- Interactive Debug Finished for Company ID: {company_id} ---")
+
+    finally:
+        db_session.close()
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Run a single company analysis for debugging.")
+    parser.add_argument("--id", type=int, default=1, help="The ID of the company to analyze.")
+    args = parser.parse_args()
+    
+    run_debug_analysis(args.id)
--- a/company-explorer/backend/scripts/trigger_analysis.py
+++ b/company-explorer/backend/scripts/trigger_analysis.py
@@ -0,0 +1,67 @@
+import requests
+import os
+import time
+import argparse
+import sys
+import logging
+
+# Add the backend directory to the Python path for relative imports to work
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+# --- Configuration ---
+def load_env_manual(path):
+    if not os.path.exists(path):
+        # print(f"⚠️  Warning: .env file not found at {path}") # Suppress for cleaner output in container
+        return
+    with open(path) as f:
+        for line in f:
+            line = line.strip()
+            if line and not line.startswith('#') and '=' in line:
+                key, val = line.split('=', 1)
+                os.environ.setdefault(key.strip(), val.strip())
+
+# Load .env (assuming it's in /app) - this needs to be run from /app or adjusted
+# For docker-compose exec from project root, /app is the container's WORKDIR
+load_env_manual('/app/.env')
+
+API_USER = os.getenv("API_USER")
+API_PASS = os.getenv("API_PASSWORD")
+# When run INSIDE the container, the service is reachable via localhost
+CE_URL = "http://localhost:8000"
+ANALYZE_ENDPOINT = f"{CE_URL}/api/enrich/analyze"
+
+def trigger_analysis(company_id: int):
+    print("="*60)
+    print(f"🚀 Triggering REAL analysis for Company ID: {company_id}")
+    print("="*60)
+
+    payload = {"company_id": company_id}
+
+    try:
+        # Added logging for API user/pass (debug only, remove in prod)
+        logger.debug(f"API Call to {ANALYZE_ENDPOINT} with user {API_USER}")
+        response = requests.post(ANALYZE_ENDPOINT, json=payload, auth=(API_USER, API_PASS), timeout=30) # Increased timeout
+        
+        if response.status_code == 200 and response.json().get("status") == "queued":
+            print("   ✅ SUCCESS: Analysis task has been queued on the server.")
+            print("   The result will be available in the database and UI shortly.")
+            return True
+        else:
+            print(f"   ❌ FAILURE: Server responded with status {response.status_code}")
+            print(f"   Response: {response.text}")
+            return False
+
+    except requests.exceptions.RequestException as e:
+        print(f"   ❌ FATAL: Could not connect to the server: {e}")
+        return False
+
+if __name__ == "__main__":
+    # Add a basic logger to the script itself for clearer output
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+    logger = logging.getLogger(__name__)
+
+    parser = argparse.ArgumentParser(description="Trigger Company Explorer Analysis Task")
+    parser.add_argument("--company-id", type=int, required=True, help="ID of the company to analyze")
+    args = parser.parse_args()
+    
+    trigger_analysis(args.company_id)
--- a/company-explorer/backend/services/classification.py
+++ b/company-explorer/backend/services/classification.py
@@ -5,7 +5,7 @@ import re
 from datetime import datetime
 from typing import Optional, Dict, Any, List

-from sqlalchemy.orm import Session
+from sqlalchemy.orm import Session, joinedload

 from backend.database import Company, Industry, RoboticsCategory, EnrichmentData
 from backend.lib.core_utils import call_gemini_flash, safe_eval_math, run_serp_search
@@ -19,9 +19,12 @@ class ClassificationService:
        pass

    def _load_industry_definitions(self, db: Session) -> List[Industry]:
-        industries = db.query(Industry).all()
+        industries = db.query(Industry).options(
+            joinedload(Industry.primary_category),
+            joinedload(Industry.secondary_category)
+        ).all()
        if not industries:
-            logger.warning("No industry definitions found in DB. Classification might be limited.")
+            logger.warning("No industry definitions found in DB.")
        return industries

    def _get_wikipedia_content(self, db: Session, company_id: int) -> Optional[Dict[str, Any]]:
@@ -49,18 +52,11 @@ Return ONLY the exact name of the industry.
        try:
            response = call_gemini_flash(prompt)
            if not response: return "Others"
-            
            cleaned = response.strip().replace('"', '').replace("'", "")
-            # Simple fuzzy match check
            valid_names = [i['name'] for i in industry_definitions] + ["Others"]
-            if cleaned in valid_names:
-                return cleaned
-            
-            # Fallback: Try to find name in response
+            if cleaned in valid_names: return cleaned
            for name in valid_names:
-                if name in cleaned:
-                    return name
-                    
+                if name in cleaned: return name
            return "Others"
        except Exception as e:
            logger.error(f"Classification Prompt Error: {e}")
@@ -79,23 +75,20 @@ Return a JSON object with:
 - "raw_unit": The unit found (e.g. "Betten", "m²").
 - "proof_text": A short quote from the text proving this value.

-**IMPORTANT:** Ignore obvious year numbers (like 1900-2026) if other, more plausible metric values are present in the text. Focus on the target metric.
-
 JSON ONLY.
 """
        try:
            response = call_gemini_flash(prompt, json_mode=True)
            if not response: return None
-            
            if isinstance(response, str):
-                response = response.replace("```json", "").replace("```", "").strip()
-                data = json.loads(response)
+                try:
+                    data = json.loads(response.replace("```json", "").replace("```", "").strip())
+                except: return None
            else:
                data = response
-            
-            # Basic cleanup
+            if isinstance(data, list) and data: data = data[0]
+            if not isinstance(data, dict): return None
            if data.get("raw_value") == "null": data["raw_value"] = None
-            
            return data
        except Exception as e:
            logger.error(f"LLM Extraction Parse Error: {e}")
@@ -103,38 +96,37 @@ JSON ONLY.

    def _is_metric_plausible(self, metric_name: str, value: Optional[float]) -> bool:
        if value is None: return False
-        try:
-            val_float = float(value)
-            return val_float > 0
-        except:
-            return False
+        try: return float(value) > 0
+        except: return False

    def _parse_standardization_logic(self, formula: str, raw_value: float) -> Optional[float]:
-        if not formula or raw_value is None:
-            return None
-        formula_cleaned = formula.replace("wert", str(raw_value)).replace("Value", str(raw_value)).replace("Wert", str(raw_value))
-        formula_cleaned = re.sub(r'(?i)m[²2]', '', formula_cleaned)
-        formula_cleaned = re.sub(r'(?i)qm', '', formula_cleaned)
-        formula_cleaned = re.sub(r'\s*\(.*\)\s*$', '', formula_cleaned).strip()
+        if not formula or raw_value is None: return None
+        # Clean formula: remove anything in parentheses first (often units or comments)
+        clean_formula = re.sub(r'\(.*?\)', '', formula.lower())
+        # Replace 'wert' with the actual value
+        expression = clean_formula.replace("wert", str(raw_value))
+        # Remove any non-math characters
+        expression = re.sub(r'[^0-9\.\+\-\*\/]', '', expression)
        try:
-            return safe_eval_math(formula_cleaned)
+            return safe_eval_math(expression)
        except Exception as e:
-            logger.error(f"Failed to parse standardization logic '{formula}' with value {raw_value}: {e}")
+            logger.error(f"Failed to parse logic '{formula}' with value {raw_value}: {e}")
            return None

    def _get_best_metric_result(self, results_list: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
-        if not results_list:
-            return None
+        if not results_list: return None
        source_priority = {"wikipedia": 0, "website": 1, "serpapi": 2}
        valid_results = [r for r in results_list if r.get("calculated_metric_value") is not None]
-        if not valid_results:
-            return None
-        valid_results.sort(key=lambda r: (source_priority.get(r.get("metric_source"), 99), -r.get("metric_confidence", 0.0)))
-        logger.info(f"Best result chosen: {valid_results[0]}")
+        if not valid_results: return None
+        valid_results.sort(key=lambda r: source_priority.get(r.get("metric_source"), 99))
        return valid_results[0]

-    def _get_website_content_and_url(self, company: Company) -> Tuple[Optional[str], Optional[str]]:
-        return scrape_website_content(company.website), company.website
+    def _get_website_content_and_url(self, db: Session, company: Company) -> Tuple[Optional[str], Optional[str]]:
+        enrichment = db.query(EnrichmentData).filter_by(company_id=company.id, source_type="website_scrape").order_by(EnrichmentData.created_at.desc()).first()
+        if enrichment and enrichment.content and "raw_text" in enrichment.content:
+            return enrichment.content["raw_text"], company.website
+        content = scrape_website_content(company.website)
+        return content, company.website

    def _get_wikipedia_content_and_url(self, db: Session, company_id: int) -> Tuple[Optional[str], Optional[str]]:
        wiki_data = self._get_wikipedia_content(db, company_id)
@@ -142,219 +134,135 @@ JSON ONLY.

    def _get_serpapi_content_and_url(self, company: Company, search_term: str) -> Tuple[Optional[str], Optional[str]]:
        serp_results = run_serp_search(f"{company.name} {company.city or ''} {search_term}")
-        if not serp_results:
-            return None, None
+        if not serp_results: return None, None
        content = " ".join([res.get("snippet", "") for res in serp_results.get("organic_results", [])])
        url = serp_results.get("organic_results", [{}])[0].get("link") if serp_results.get("organic_results") else None
        return content, url

    def _extract_and_calculate_metric_cascade(self, db: Session, company: Company, industry_name: str, search_term: str, standardization_logic: Optional[str], standardized_unit: Optional[str]) -> Dict[str, Any]:
-        final_result = {"calculated_metric_name": search_term, "calculated_metric_value": None, "calculated_metric_unit": None, "standardized_metric_value": None, "standardized_metric_unit": standardized_unit, "metric_source": None, "metric_proof_text": None, "metric_source_url": None, "metric_confidence": 0.0, "metric_confidence_reason": "No value found in any source."}
+        final_result = {"calculated_metric_name": search_term, "calculated_metric_value": None, "calculated_metric_unit": None, "standardized_metric_value": None, "standardized_metric_unit": standardized_unit, "metric_source": None, "proof_text": None, "metric_source_url": None}
        sources = [
-            ("website", self._get_website_content_and_url),
-            ("wikipedia", self._get_wikipedia_content_and_url),
-            ("serpapi", self._get_serpapi_content_and_url)
+            ("website", lambda: self._get_website_content_and_url(db, company)),
+            ("wikipedia", lambda: self._get_wikipedia_content_and_url(db, company.id)),
+            ("serpapi", lambda: self._get_serpapi_content_and_url(company, search_term))
        ]
        all_source_results = []
+        parser = MetricParser()
        for source_name, content_loader in sources:
-            logger.info(f"Checking {source_name} for '{search_term}' for {company.name}")
+            logger.info(f"    -> Checking source: [{source_name.upper()}] for '{search_term}'")
            try:
-                args = (company,) if source_name == 'website' else (db, company.id) if source_name == 'wikipedia' else (company, search_term)
-                content_text, current_source_url = content_loader(*args)
-                if not content_text or len(content_text) < 100:
-                    logger.info(f"No or insufficient content for {source_name} (Length: {len(content_text) if content_text else 0}).")
-                    continue
+                content_text, current_source_url = content_loader()
+                if not content_text or len(content_text) < 100: continue
                llm_result = self._run_llm_metric_extraction_prompt(content_text, search_term, industry_name)
-                if llm_result:
-                    llm_result['source_url'] = current_source_url
-                    all_source_results.append((source_name, llm_result))
-            except Exception as e:
-                logger.error(f"Error in {source_name} stage: {e}")
+                if llm_result and llm_result.get("proof_text"):
+                    # Use the robust parser on the LLM's proof text or raw_value
+                    hint = llm_result.get("raw_value") or llm_result.get("proof_text")
+                    parsed_value = parser.extract_numeric_value(text=content_text, expected_value=str(hint))
+                    if parsed_value is not None:
+                        llm_result.update({"calculated_metric_value": parsed_value, "calculated_metric_unit": llm_result.get('raw_unit'), "metric_source": source_name, "metric_source_url": current_source_url})
+                        all_source_results.append(llm_result)
+            except Exception as e: logger.error(f"    -> Error in {source_name} stage: {e}")
        
-        processed_results = []
-        for source_name, llm_result in all_source_results:
-            metric_value = llm_result.get("raw_value")
-            metric_unit = llm_result.get("raw_unit")
-
-            if metric_value is not None and self._is_metric_plausible(search_term, metric_value):
-                standardized_value = None
-                if standardization_logic and metric_value is not None:
-                    standardized_value = self._parse_standardization_logic(standardization_logic, metric_value)
-
-                processed_results.append({
-                    "calculated_metric_name": search_term,
-                    "calculated_metric_value": metric_value,
-                    "calculated_metric_unit": metric_unit,
-                    "standardized_metric_value": standardized_value,
-                    "standardized_metric_unit": standardized_unit,
-                    "metric_source": source_name,
-                    "metric_proof_text": llm_result.get("proof_text"),
-                    "metric_source_url": llm_result.get("source_url"),
-                    "metric_confidence": 0.95,
-                    "metric_confidence_reason": "Value found and extracted by LLM."
-                })
-            else:
-                logger.info(f"LLM found no plausible metric for {search_term} in {source_name}.")
-
-        best_result = self._get_best_metric_result(processed_results)
-        return best_result if best_result else final_result
+        best_result = self._get_best_metric_result(all_source_results)
+        if not best_result: return final_result
+        final_result.update(best_result)
+        if self._is_metric_plausible(search_term, final_result['calculated_metric_value']):
+            final_result['standardized_metric_value'] = self._parse_standardization_logic(standardization_logic, final_result['calculated_metric_value'])
+        return final_result
    
-    def extract_metrics_for_industry(self, company: Company, db: Session, industry: Industry) -> Company:
-        if not industry or not industry.scraper_search_term:
-            logger.warning(f"No metric configuration for industry '{industry.name if industry else 'None'}'")
-            return company
-        
-        # Improved unit derivation
-        if "m²" in (industry.standardization_logic or "") or "m²" in (industry.scraper_search_term or ""):
-            std_unit = "m²"
-        else:
-            std_unit = "Einheiten"
-        
-        metrics = self._extract_and_calculate_metric_cascade(
-            db, company, industry.name, industry.scraper_search_term, industry.standardization_logic, std_unit
-        )
-        
-        company.calculated_metric_name = metrics["calculated_metric_name"]
-        company.calculated_metric_value = metrics["calculated_metric_value"]
-        company.calculated_metric_unit = metrics["calculated_metric_unit"]
-        company.standardized_metric_value = metrics["standardized_metric_value"]
-        company.standardized_metric_unit = metrics["standardized_metric_unit"]
-        company.metric_source = metrics["metric_source"]
-        company.metric_proof_text = metrics["metric_proof_text"]
-        company.metric_source_url = metrics.get("metric_source_url")
-        company.metric_confidence = metrics["metric_confidence"]
-        company.metric_confidence_reason = metrics["metric_confidence_reason"]
-        
-        company.last_classification_at = datetime.utcnow()
-        # REMOVED: db.commit() - This should be handled by the calling function.
-        return company
+    def _find_direct_area(self, db: Session, company: Company, industry_name: str) -> Optional[Dict[str, Any]]:
+        logger.info("  -> (Helper) Running specific search for 'Fläche'...")
+        area_metrics = self._extract_and_calculate_metric_cascade(db, company, industry_name, search_term="Fläche", standardization_logic=None, standardized_unit="m²")
+        if area_metrics and area_metrics.get("calculated_metric_value") is not None:
+            unit = area_metrics.get("calculated_metric_unit", "").lower()
+            if any(u in unit for u in ["m²", "qm", "quadratmeter"]):
+                logger.info("     ✅ SUCCESS: Found direct area value.")
+                area_metrics['standardized_metric_value'] = area_metrics['calculated_metric_value']
+                return area_metrics
+        return None

-    def reevaluate_wikipedia_metric(self, company: Company, db: Session, industry: Industry) -> Company:
-        logger.info(f"Re-evaluating metric for {company.name}...")
-        return self.extract_metrics_for_industry(company, db, industry)
+    def _generate_marketing_opener(self, company: Company, industry: Industry, website_text: str, focus_mode: str = "primary") -> Optional[str]:
+        if not industry: return None
+        
+        # 1. Determine Context & Pains/Gains
+        product_context = industry.primary_category.name if industry.primary_category else "Robotik-Lösungen"
+        raw_pains = industry.pains or ""
+        
+        # Split pains/gains based on markers
+        def extract_segment(text, marker):
+            if not text: return ""
+            segments = re.split(r'\[(.*?)\]', text)
+            for i in range(1, len(segments), 2):
+                if marker.lower() in segments[i].lower():
+                    return segments[i+1].strip()
+            return text # Fallback to full text if no markers found

-    def _generate_marketing_opener(self, company_name: str, website_text: str, industry_name: str, industry_pains: str, focus_mode: str = "primary") -> Optional[str]:
-        """
-        Generates the 'First Sentence' (Opener).
-        focus_mode: 'primary' (Standard/Cleaning) or 'secondary' (Service/Logistics).
-        """
-        if not industry_pains:
-            industry_pains = "Effizienz und Personalmangel" # Fallback
-
-        # Dynamic Focus Instruction
-        if focus_mode == "secondary":
-            focus_instruction = """
-   - **FOKUS: SEKUNDÄR-PROZESSE (Logistik/Service/Versorgung).**
-   - Ignoriere das Thema Reinigung. Konzentriere dich auf **Abläufe, Materialfluss, Entlastung von Fachkräften** oder **Gäste-Service**.
-   - Der Satz muss einen operativen Entscheider (z.B. Pflegedienstleitung, Produktionsleiter) abholen."""
-        else:
-            focus_instruction = """
-   - **FOKUS: PRIMÄR-PROZESSE (Infrastruktur/Sauberkeit/Sicherheit).**
-   - Konzentriere dich auf Anforderungen an das Facility Management, Hygiene, Außenwirkung oder Arbeitssicherheit.
-   - Der Satz muss einen Infrastruktur-Entscheider (z.B. FM-Leiter, Geschäftsführer) abholen."""
+        relevant_pains = extract_segment(raw_pains, "Primary Product")
+        if focus_mode == "secondary" and industry.ops_focus_secondary and industry.secondary_category:
+            product_context = industry.secondary_category.name
+            relevant_pains = extract_segment(raw_pains, "Secondary Product")

        prompt = f"""
-Du bist ein exzellenter B2B-Stratege und Texter.
-Deine Aufgabe ist es, einen hochpersonalisierten Einleitungssatz für eine E-Mail an ein potenzielles Kundenunternehmen zu formulieren.
+Du bist ein exzellenter B2B-Stratege und Texter. Formuliere einen hochpersonalisierten Einleitungssatz (1-2 Sätze).
+Unternehmen: {company.name}
+Branche: {industry.name}
+Fokus: {focus_mode.upper()}
+Herausforderungen: {relevant_pains}
+Kontext: {website_text[:2500]}

--- KONTEXT ---
-Zielunternehmen: {company_name}
-Branche: {industry_name}
-Operative Herausforderung (Pain): "{industry_pains}"
-
-Webseiten-Kontext:
-{website_text[:2500]}
-
--- Denkprozess & Stilvorgaben ---
-1. **Analysiere den Kontext:** Verstehe das Kerngeschäft.
-2. **Identifiziere den Hebel:** Was ist der Erfolgsfaktor in Bezug auf den FOKUS?
-3. **Formuliere den Satz (ca. 20-35 Wörter):**
-   - Wähle einen eleganten, aktiven Einstieg.
-   - Verbinde die **Tätigkeit** mit dem **Hebel** und den **Konsequenzen**.
-   - **WICHTIG:** Formuliere als positive Beobachtung über eine Kernkompetenz.
-   - **VERMEIDE:** Konkrete Zahlen.
-   - Verwende den Firmennamen: {company_name}.
-   {focus_instruction}
-
--- Deine Ausgabe ---
-Gib NUR den finalen Satz aus. Keine Anführungszeichen.
+REGEL: Nenne NICHT das Produkt "{product_context}". Fokussiere dich NUR auf die Herausforderung.
+AUSGABE: NUR den fertigen Satz.
 """
        try:
            response = call_gemini_flash(prompt)
-            if response:
-                return response.strip().strip('"')
-            return None
+            return response.strip().strip('"') if response else None
        except Exception as e:
-            logger.error(f"Opener Generation Error: {e}")
+            logger.error(f"Opener Error: {e}")
            return None

    def classify_company_potential(self, company: Company, db: Session) -> Company:
-        logger.info(f"Starting classification for {company.name}...")
-        
-        # 1. Load Definitions
+        logger.info(f"--- Starting FULL Analysis v3.0 for {company.name} ---")
        industries = self._load_industry_definitions(db)
-        industry_defs = [{"name": i.name, "description": i.description} for i in industries]
-        logger.debug(f"Loaded {len(industries)} industry definitions.")
-        
-        # 2. Get Content (Website)
-        website_content, _ = self._get_website_content_and_url(company)
-        
+        website_content, _ = self._get_website_content_and_url(db, company)
        if not website_content or len(website_content) < 100:
-            logger.warning(f"No or insufficient website content for {company.name} (Length: {len(website_content) if website_content else 0}). Skipping classification.")
+            company.status = "ENRICH_FAILED"
+            db.commit()
            return company
-        logger.debug(f"Website content length for classification: {len(website_content)}")

-        # 3. Classify Industry
-        logger.info(f"Running LLM classification prompt for {company.name}...")
+        industry_defs = [{"name": i.name, "description": i.description} for i in industries]
        suggested_industry_name = self._run_llm_classification_prompt(website_content, company.name, industry_defs)
-        logger.info(f"AI suggests industry: {suggested_industry_name}")
-        
-        # 4. Update Company & Generate Openers
        matched_industry = next((i for i in industries if i.name == suggested_industry_name), None)
+        if not matched_industry:
+            company.industry_ai = "Others"
+            db.commit()
+            return company
        
-        if matched_industry:
-            company.industry_ai = matched_industry.name
-            logger.info(f"Matched company to industry: {matched_industry.name}")
-            
-            # --- Generate PRIMARY Opener (Infrastructure/Cleaning) ---
-            logger.info(f"Generating PRIMARY opener for {company.name}...")
-            op_prim = self._generate_marketing_opener(
-                company.name, website_content, matched_industry.name, matched_industry.pains, "primary"
-            )
-            if op_prim:
-                company.ai_opener = op_prim
-                logger.info(f"Opener (Primary) generated and set.")
-            else:
-                logger.warning(f"Failed to generate PRIMARY opener for {company.name}.")
+        company.industry_ai = matched_industry.name
+        logger.info(f"✅ Industry: {matched_industry.name}")

-            # --- Generate SECONDARY Opener (Service/Logistics) ---
-            logger.info(f"Generating SECONDARY opener for {company.name}...")
-            op_sec = self._generate_marketing_opener(
-                company.name, website_content, matched_industry.name, matched_industry.pains, "secondary"
-            )
-            if op_sec:
-                company.ai_opener_secondary = op_sec
-                logger.info(f"Opener (Secondary) generated and set.")
-            else:
-                logger.warning(f"Failed to generate SECONDARY opener for {company.name}.")
-            
-        else:
-            company.industry_ai = "Others" 
-            logger.warning(f"No specific industry matched for {company.name}. Set to 'Others'.")
-
-        # 5. Extract Metrics (Cascade)
-        if matched_industry:
-            logger.info(f"Extracting metrics for {company.name} and industry {matched_industry.name}...")
-            try:
-                self.extract_metrics_for_industry(company, db, matched_industry)
-                logger.info(f"Metric extraction completed for {company.name}.")
-            except Exception as e:
-                logger.error(f"Error during metric extraction for {company.name}: {e}", exc_info=True)
-        else:
-            logger.warning(f"Skipping metric extraction for {company.name} as no specific industry was matched.")
-            
+        metrics = self._find_direct_area(db, company, matched_industry.name)
+        if not metrics:
+            logger.info("     -> No direct area. Trying proxy...")
+            if matched_industry.scraper_search_term:
+                metrics = self._extract_and_calculate_metric_cascade(db, company, matched_industry.name, search_term=matched_industry.scraper_search_term, standardization_logic=matched_industry.standardization_logic, standardized_unit="m²")
+        
+        if metrics and metrics.get("calculated_metric_value"):
+            logger.info(f"     ✅ SUCCESS: {metrics.get('calculated_metric_value')} {metrics.get('calculated_metric_unit')}")
+            company.calculated_metric_name = metrics.get("calculated_metric_name", matched_industry.scraper_search_term or "Fläche")
+            company.calculated_metric_value = metrics.get("calculated_metric_value")
+            company.calculated_metric_unit = metrics.get("calculated_metric_unit")
+            company.standardized_metric_value = metrics.get("standardized_metric_value")
+            company.standardized_metric_unit = metrics.get("standardized_metric_unit")
+            company.metric_source = metrics.get("metric_source")
+            company.metric_proof_text = metrics.get("proof_text")
+            company.metric_source_url = metrics.get("metric_source_url")
+            company.metric_confidence = 0.8
+            company.metric_confidence_reason = "Metric processed."
+        
+        company.ai_opener = self._generate_marketing_opener(company, matched_industry, website_content, "primary")
+        company.ai_opener_secondary = self._generate_marketing_opener(company, matched_industry, website_content, "secondary")
        company.last_classification_at = datetime.utcnow()
+        company.status = "ENRICHED"
        db.commit()
-        logger.info(f"Classification and enrichment for {company.name} completed and committed.")
-        
+        logger.info(f"--- ✅ Analysis Finished for {company.name} ---")
        return company
--- a/company-explorer/backend/tests/test_metric_extraction_hospital.py
+++ b/company-explorer/backend/tests/test_metric_extraction_hospital.py
@@ -0,0 +1,82 @@
+import unittest
+import os
+import sys
+from unittest.mock import MagicMock, patch
+
+# Adjust path to allow importing from backend
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from backend.services.classification import ClassificationService
+from backend.database import Company, Industry, RoboticsCategory, Session
+
+class TestHospitalMetricFinal(unittest.TestCase):
+
+    def setUp(self):
+        self.service = ClassificationService()
+        self.mock_db = MagicMock(spec=Session)
+        
+        self.mock_company = Company(id=8, name="Klinikum Landkreis Erding")
+        self.mock_industry_hospital = Industry(
+            id=1,
+            name="Healthcare - Hospital",
+            scraper_search_term="Anzahl Betten",
+            standardization_logic="wert * 100",
+            primary_category=RoboticsCategory(name="Reinigungsroboter"),
+            secondary_category=RoboticsCategory(name="Serviceroboter"),
+        )
+        self.mock_website_content = "Ein langer Text, der die 100-Zeichen-Prüfung besteht."
+
+    @patch('backend.services.classification.ClassificationService._generate_marketing_opener')
+    @patch('backend.services.classification.ClassificationService._extract_and_calculate_metric_cascade')
+    @patch('backend.services.classification.ClassificationService._find_direct_area')
+    @patch('backend.services.classification.ClassificationService._run_llm_classification_prompt')
+    @patch('backend.services.classification.ClassificationService._get_website_content_and_url')
+    @patch('backend.services.classification.ClassificationService._load_industry_definitions')
+    def test_final_hospital_logic(
+        self, 
+        mock_load_industries, 
+        mock_get_website, 
+        mock_classify, 
+        mock_find_direct_area, 
+        mock_extract_cascade, 
+        mock_generate_opener
+    ):
+        print("\n--- Running Final Hospital Logic Test ---")
+
+        # --- MOCK SETUP ---
+        mock_load_industries.return_value = [self.mock_industry_hospital]
+        mock_get_website.return_value = (self.mock_website_content, "http://mock.com")
+        mock_classify.return_value = "Healthcare - Hospital"
+        mock_find_direct_area.return_value = None # STAGE 1 MUST FAIL
+        
+        proxy_metric_result = {
+            "calculated_metric_name": "Anzahl Betten",
+            "calculated_metric_value": 352.0,
+            "calculated_metric_unit": "Betten",
+            "standardized_metric_value": 35200.0,
+            "standardized_metric_unit": "m²",
+            "metric_source": "wikipedia",
+        }
+        mock_extract_cascade.return_value = proxy_metric_result
+        mock_generate_opener.side_effect = ["Primary Opener", "Secondary Opener"]
+
+        # --- EXECUTION ---
+        updated_company = self.service.classify_company_potential(self.mock_company, self.mock_db)
+
+        # --- ASSERTIONS ---
+        mock_find_direct_area.assert_called_once()
+        mock_extract_cascade.assert_called_once()
+
+        self.assertEqual(updated_company.calculated_metric_name, "Anzahl Betten")
+        self.assertEqual(updated_company.calculated_metric_value, 352.0)
+        self.assertEqual(updated_company.standardized_metric_value, 35200.0)
+        print("   ✅ Metrics from Stage 2 correctly applied.")
+
+        self.assertEqual(updated_company.ai_opener, "Primary Opener")
+        self.assertEqual(updated_company.ai_opener_secondary, "Secondary Opener")
+        print("   ✅ Openers correctly applied.")
+
+        print("\n--- ✅ PASSED: Final Hospital Logic Test. ---")
+
+if __name__ == '__main__':
+    unittest.main()