From 3864ddb60686fbbc0bec9d5210d9e57dc80b8375 Mon Sep 17 00:00:00 2001
From: Floke <floke.com@gmail.com>
Date: Mon, 23 Feb 2026 10:45:12 +0000
Subject: [PATCH] [2ff88f42] Implement Ops-Secondary Logic & Matrix Gen v3.2

---
 .../backend/scripts/generate_matrix.py        | 134 +++++++++++++-----
 .../backend/scripts/sync_notion_industries.py |  19 +++
 2 files changed, 114 insertions(+), 39 deletions(-)

diff --git a/company-explorer/backend/scripts/generate_matrix.py b/company-explorer/backend/scripts/generate_matrix.py
index 4d9886a2..36e2dfa5 100644
--- a/company-explorer/backend/scripts/generate_matrix.py
+++ b/company-explorer/backend/scripts/generate_matrix.py
@@ -2,7 +2,7 @@ import sys
 import os
 import json
 import argparse
-from typing import List
+import re
 import google.generativeai as genai
 
 # Setup Environment
@@ -14,30 +14,76 @@ from backend.config import settings
 # --- Configuration ---
 MODEL_NAME = "gemini-2.0-flash" # High quality copy
 
+def extract_segment(text: str, marker: str) -> str:
+    """
+    Extracts a text block starting with [marker].
+    Example: [Primary Product: Cleaning] ... [Secondary Product: Service]
+    """
+    if not text: return ""
+    
+    # Split by square brackets that look like headers [Text: ...]
+    # We look for the marker inside the header
+    # Simplified Regex: Capture everything inside brackets as ONE group
+    pattern = r'\[(.*?)\]'
+    segments = re.split(pattern, text)
+    
+    # segments[0] is text before first bracket
+    # segments[1] is content of first bracket (header)
+    # segments[2] is content after first bracket (body)
+    # ...
+    
+    best_match = ""
+    
+    for i in range(1, len(segments), 2):
+        header = segments[i]
+        content = segments[i+1]
+        
+        # print(f"DEBUG: Checking Header: '{header}' for Marker: '{marker}'") # Uncomment for deep debug
+        
+        if marker.lower() in header.lower():
+            return content.strip()
+            
+    # Fallback: If no markers found, return full text (legacy support)
+    if "Primary Product" not in text and "Secondary Product" not in text:
+        return text
+        
+    return ""
+
 def generate_prompt(industry: Industry, persona: Persona) -> str:
     """
     Builds the prompt for the AI to generate the marketing texts.
     Combines Industry context with Persona specific pains/gains and Product Category.
     """
     
-    # 1. Determine Product Context
-    # We focus on the primary category for the general matrix, 
-    # but we inform the AI about the secondary option if applicable.
-    primary_cat = industry.primary_category
-    product_context = f"{primary_cat.name}: {primary_cat.description}" if primary_cat else "Intelligente Robotik-Lösungen"
+    # 1. Determine Product Focus Strategy
+    # Default: Primary
+    target_scope = "Primary Product"
+    target_category = industry.primary_category
     
-    # 2. Extract specific segments from industry pains/gains
-    def extract_segment(text, marker):
-        if not text: return ""
-        import re
-        segments = re.split(r'\[(.*?)\]', text)
-        for i in range(1, len(segments), 2):
-            if marker.lower() in segments[i].lower():
-                return segments[i+1].strip()
-        return text
+    # Special Rule: "Operativer Entscheider" gets Secondary Product IF ops_focus_secondary is True
+    # Logic: A Nursing Director (Ops) doesn't care about floor cleaning (Facility), 
+    # but cares about Service Robots (Secondary).
+    if persona.name == "Operativer Entscheider" and industry.ops_focus_secondary:
+        target_scope = "Secondary Product"
+        target_category = industry.secondary_category
+        print(f"    -> STRATEGY SWITCH: Using {target_scope} for {persona.name}")
 
-    industry_pains = extract_segment(industry.pains, "Primary Product")
-    industry_gains = extract_segment(industry.gains, "Primary Product")
+    # Fallback if secondary was requested but not defined
+    if not target_category:
+        target_category = industry.primary_category
+        target_scope = "Primary Product" # Fallback to primary if secondary category object is missing
+
+    product_context = f"{target_category.name}: {target_category.description}" if target_category else "Intelligente Robotik-Lösungen"
+    
+    # 2. Extract specific segments from industry pains/gains based on scope
+    industry_pains = extract_segment(industry.pains, target_scope)
+    industry_gains = extract_segment(industry.gains, target_scope)
+    
+    # Fallback: If specific scope is empty (e.g. no Secondary Pains defined), try Primary
+    if not industry_pains and target_scope == "Secondary Product":
+         print(f"    -> WARNING: No specific Pains found for {target_scope}. Fallback to Primary.")
+         industry_pains = extract_segment(industry.pains, "Primary Product")
+         industry_gains = extract_segment(industry.gains, "Primary Product")
 
     # 3. Handle Persona Data
     try:
@@ -48,32 +94,38 @@ def generate_prompt(industry: Industry, persona: Persona) -> str:
         persona_gains = [persona.gains] if persona.gains else []
     
     prompt = f"""
-Du bist ein scharfsinniger B2B-Strategieberater und exzellenter Copywriter.
-Deine Aufgabe: Erstelle hochpräzise, "scharfe" Marketing-Textbausteine für einen Outreach an Entscheider.
+Du bist ein kompetenter Lösungsberater und brillanter Texter.
+AUFGABE: Erstelle 3 Textblöcke (Subject, Introduction_Textonly, Industry_References_Textonly) für eine E-Mail an einen Entscheider.
 
---- STRATEGISCHER RAHMEN ---
-ZIELUNTERNEHMEN (Branche): {industry.name}
-BRANCHEN-KONTEXT: {industry.description or 'Keine spezifische Beschreibung'}
-BRANCEHN-HERAUSFORDERUNGEN: {industry_pains}
-ANGESTREBTE MEHRWERTE: {industry_gains}
+--- KONTEXT ---
+ZIELBRANCHE: {industry.name}
+BRANCHEN-HERAUSFORDERUNGEN (PAIN POINTS):
+{industry_pains}
 
-ZIELPERSON (Rolle): {persona.name}
-PERSÖNLICHER DRUCK (Pains der Rolle):
-{chr(10).join(['- ' + p for p in persona_pains])}
-
-GEWÜNSCHTE ERFOLGE (Gains der Rolle):
-{chr(10).join(['- ' + g for g in persona_gains])}
-
-ANGEBOTENE LÖSUNG (Produkt-Fokus): 
+FOKUS-PRODUKT (LÖSUNG):
 {product_context}
 
---- DEIN AUFTRAG ---
-Erstelle ein JSON-Objekt mit 3 Textbausteinen, die den persönlichen Druck des Empfängers mit den strategischen Notwendigkeiten seiner Branche und der technologischen Lösung verknüpfen. 
-Tonalität: Wertschätzend, auf Augenhöhe, scharfsinnig, absolut NICHT marktschreierisch.
+ANSPRECHPARTNER (ROLLE): {persona.name}
+PERSÖNLICHE HERAUSFORDERUNGEN DES ANSPRECHPARTNERS (PAIN POINTS):
+{chr(10).join(['- ' + str(p) for p in persona_pains])}
 
-1. "subject": Eine Betreffzeile (Max 6 Wörter), die den Finger direkt in eine Wunde (Pain) legt oder ein hohes Ziel (Gain) verspricht.
-2. "intro": Einleitung (2-3 Sätze). Verbinde die spezifische Branchen-Herausforderung mit der persönlichen Verantwortung des Empfängers. Er muss sich sofort verstanden fühlen.
-3. "social_proof": Ein Beweissatz, der zeigt, dass diese Lösung in der Branche {industry.name} bereits reale Probleme (z.B. Personalmangel, Dokumentationsdruck) gelöst hat. Nenne keine konkreten Firmennamen, aber quantifizierbare Effekte.
+--- DEINE AUFGABE ---
+1.  **Subject:** Formuliere eine kurze Betreffzeile (max. 6 Wörter). Richte sie **direkt an einem der persönlichen Pain Points** des Ansprechpartners oder dem zentralen Branchen-Pain. Sei scharfsinnig, nicht werblich.
+
+2.  **Introduction_Textonly:** Formuliere einen Einleitungstext (2-3 Sätze).
+    - **Satz 1 (Die Brücke):** Knüpfe an die (uns unbekannte) operative Herausforderung an. Beschreibe subtil den Nutzen einer Lösung, ohne das Produkt (Roboter) plump zu nennen.
+    - **Satz 2 (Die Relevanz):** Schaffe die Relevanz für die Zielperson, indem du das Thema mit einem ihrer persönlichen Pain Points verknüpfst (z.B. "Für Sie als {persona.name} ist dabei entscheidend...").
+
+3.  **Industry_References_Textonly:** Formuliere einen **strategischen Referenz-Block (ca. 2-3 Sätze)** nach folgendem Muster:
+    - **Satz 1 (Social Proof):** Beginne direkt mit dem Nutzen, den vergleichbare Unternehmen in der Branche {industry.name} bereits erzielen. (Erfinde keine Firmennamen, sprich von "Führenden Einrichtungen" oder "Vergleichbaren Häusern").
+    - **Satz 2 (Rollen-Relevanz):** Schaffe den direkten Nutzen für die Zielperson. Formuliere z.B. 'Dieser Wissensvorsprung hilft uns, Ihre [persönlicher Pain Point der Rolle] besonders effizient zu lösen.'
+
+--- BEISPIEL FÜR EINEN PERFEKTEN OUTPUT ---
+{{
+  "Subject": "Kostenkontrolle im Service",
+  "Introduction_Textonly": "Genau bei der Optimierung dieser Serviceprozesse können erhebliche Effizienzgewinne erzielt werden. Für Sie als Finanzleiter ist dabei die Sicherstellung der Profitabilität bei gleichzeitiger Kostentransparenz von zentraler Bedeutung.",
+  "Industry_References_Textonly": "Vergleichbare Unternehmen profitieren bereits massiv von automatisierten Prozessen. Unsere Erfahrung zeigt, dass die grundlegenden Herausforderungen in der Einsatzplanung oft branchenübergreifend ähnlich sind. Dieser Wissensvorsprung hilft uns, Ihre Ziele bei der Kostenkontrolle und Profitabilitätssteigerung besonders effizient zu unterstützen."
+}}
 
 --- FORMAT ---
 Antworte NUR mit einem validen JSON-Objekt.
@@ -88,7 +140,7 @@ Format:
 
 def mock_call(prompt: str):
     """Simulates an API call for dry runs."""
-    print(f"\n--- [MOCK] GENERATING PROMPT ---\n{prompt[:300]}...\n--------------------------------")
+    print(f"\n--- [MOCK] GENERATING PROMPT ---\n{prompt[:800]}...\n--------------------------------")
     return {
         "subject": "[MOCK] Effizienzsteigerung in der Produktion",
         "intro": "[MOCK] Als Produktionsleiter wissen Sie, wie teuer Stillstand ist. Unsere Roboter helfen.",
@@ -149,10 +201,14 @@ def run_matrix_generation(dry_run: bool = True, force: bool = False, specific_in
         print(f"Found {len(industries)} Industries and {len(personas)} Personas.")
         print(f"Mode: {'DRY RUN (No API calls, no DB writes)' if dry_run else 'LIVE - GEMINI GENERATION'}")
         
+        # Pre-load categories to avoid lazy load issues if detached
+        # (SQLAlchemy session is open, so should be fine, but good practice)
+        
         total_combinations = len(industries) * len(personas)
         processed = 0
         
         for ind in industries:
+            print(f"\n>>> Processing Industry: {ind.name} (Ops Secondary: {ind.ops_focus_secondary})")
             for pers in personas:
                 processed += 1
                 print(f"[{processed}/{total_combinations}] Check: {ind.name} x {pers.name}")
diff --git a/company-explorer/backend/scripts/sync_notion_industries.py b/company-explorer/backend/scripts/sync_notion_industries.py
index 091347ae..0d29705b 100644
--- a/company-explorer/backend/scripts/sync_notion_industries.py
+++ b/company-explorer/backend/scripts/sync_notion_industries.py
@@ -67,6 +67,7 @@ def extract_select(prop):
     return prop.get("select", {}).get("name", "") if prop.get("select") else ""
 
 def extract_number(prop):
+    if not prop: return None
     return prop.get("number")
 
 def sync_categories(token, session):
@@ -135,6 +136,11 @@ def sync_industries(token, session):
         industry.name = name
         industry.description = extract_rich_text(props.get("Definition"))
         
+        # New: Map Pains & Gains explicitly
+        industry.pains = extract_rich_text(props.get("Pains"))
+        industry.gains = extract_rich_text(props.get("Gains"))
+        industry.notes = extract_rich_text(props.get("Notes"))
+        
         status = extract_select(props.get("Status"))
         industry.status_notion = status
         industry.is_focus = (status == "P1 Focus Industry")
@@ -147,6 +153,9 @@ def sync_industries(token, session):
         industry.scraper_search_term = extract_select(props.get("Scraper Search Term")) # <-- FIXED HERE
         industry.scraper_keywords = extract_rich_text(props.get("Scraper Keywords"))
         industry.standardization_logic = extract_rich_text(props.get("Standardization Logic"))
+        
+        # New Field: Ops Focus Secondary (Checkbox)
+        industry.ops_focus_secondary = props.get("Ops Focus: Secondary", {}).get("checkbox", False)
 
         # Relation: Primary Product Category
         relation = props.get("Primary Product Category", {}).get("relation", [])
@@ -157,6 +166,16 @@ def sync_industries(token, session):
                 industry.primary_category_id = cat.id
             else:
                 logger.warning(f"Related category {related_id} not found for industry {name}")
+        
+        # Relation: Secondary Product Category
+        relation_sec = props.get("Secondary Product", {}).get("relation", [])
+        if relation_sec:
+            related_id = relation_sec[0]["id"]
+            cat = session.query(RoboticsCategory).filter(RoboticsCategory.notion_id == related_id).first()
+            if cat:
+                industry.secondary_category_id = cat.id
+            else:
+                logger.warning(f"Related Secondary category {related_id} not found for industry {name}")
                 
         count += 1