From 5a04fd6bf24959050e00651996bcf15540e4f6ca Mon Sep 17 00:00:00 2001
From: Floke <floke.com@gmail.com>
Date: Wed, 14 Jan 2026 10:33:53 +0000
Subject: [PATCH] feat(market-intel): Optimize outreach quality and remove
 hallucination bias. Implemented context-sensitive technographics and
 strategic bridge prompting.

---
 market_intel_backend_plan.md | 54 +++++++++++++++++++++++----------
 market_intel_orchestrator.py | 58 +++++++++++++++++++++++++-----------
 2 files changed, 78 insertions(+), 34 deletions(-)

diff --git a/market_intel_backend_plan.md b/market_intel_backend_plan.md
index 7f0e63a5..b3ba68ae 100644
--- a/market_intel_backend_plan.md
+++ b/market_intel_backend_plan.md
@@ -114,24 +114,46 @@ Für eine schnelle Entwicklung ist "Sideloading" für die Python-Logik aktiviert
 
 ---
 
-## 6. Status Update (2025-12-22) - Campaign Engine & Reporting
+### Status Update (2026-01-14) - Quality & Stability Refinements
 
-### Erreichte Meilensteine:
-1.  **Rollenbasierte Campaign-Engine:**
-    *   Die Funktion `generate_outreach_campaign` wurde komplett überarbeitet.
-    *   Sie nutzt nun die volle Tiefe der Knowledge Base (`yamaichi_neu.md`), um **personalisierte Sequenzen für spezifische Rollen** (z.B. "Hardware-Entwickler" vs. "Einkäufer") zu erstellen.
-    *   Die Ansprache erfolgt strikt im "Partner auf Augenhöhe"-Tonfall.
-    *   **Social Proof Integration:** Der Absender (`reference_url`) wird als Beweis der Kompetenz inkl. passender KPIs im Abbinder integriert.
-    *   **"Grit"-Prompting:** Der Prompt wurde massiv geschärft, um operative Schmerzpunkte ("ASNs", "Bandstillstand") statt Marketing-Bla-Bla zu nutzen.
 
-2.  **Report Polishing (Frontend):**
-    *   Der Markdown-Export (`StepReport.tsx`) wurde erweitert.
-    *   Er enthält nun die **"Proof-Links"** (Beweise/URLs) direkt in den Tabellenzellen, sauber formatiert. Damit ist die Herleitung der Ergebnisse (z.B. "Warum nutzt der Kunde Ariba?") auch im Export transparent nachvollziehbar.
 
-3.  **Frontend UX & Bugfixes:**
-    *   **Kein doppelter Upload:** `StepOutreach.tsx` wurde angepasst, um den Strategie-Kontext aus Schritt 1 direkt zu übernehmen.
-    *   **Lösch-Bug:** `StepReview.tsx` wurde korrigiert, sodass gelöschte Unternehmen sofort aus der UI verschwinden.
-    *   **Crash-Fix:** Die Behandlung der API-Antwort in `geminiService.ts` wurde gehärtet, um die neue verschachtelte Antwortstruktur der Campaign-Engine korrekt zu verarbeiten.
+**Erreichte Meilensteine:**
+
+
+
+1.  **Anti-Halluzinations-Fix (Technographic Audit):**
+
+    *   **Problem:** Die KI hat aufgrund von Suggestiv-Prompts ("Look for SAP Ariba") oft Technologien halluziniert oder irrelevante Systeme als Wettbewerber eingestuft.
+
+    *   **Lösung:** Entfernung aller festcodierten "Suggestiv-Listen" aus dem Code. Der Audit sucht nun rein faktenbasiert oder basierend auf der expliziten Strategie-Eingabe.
+
+    *   **Ergebnis:** Keine "falschen Feinde" mehr. Wenn keine Konkurrenz-Hardware gefunden wird, wird korrekt "Greenfield" (Status Quo: Manuell) erkannt.
+
+
+
+2.  **Outreach-Optimierung ("Strategic Observer"):**
+
+    *   **Prompting:** Radikale Überarbeitung des Outreach-Prompts.
+
+    *   **Stil:** Weg vom "Verkäufer", hin zum "Scharfsinnigen Branchenbeobachter".
+
+    *   **Opportunity Bridge:** Die E-Mails schlagen in der ersten Nachricht sofort die Brücke von der Beobachtung (z.B. "Expansion") zur Lösungskategorie (z.B. "Autonome Reinigungsrobotik"), ohne plump Features zu verkaufen.
+
+    *   **Kontext-Sensitivität:** Technologische Signale (wie ERP-Systeme) werden nur noch bei Rollen erwähnt, für die sie relevant sind (CIO, CFO), aber bei operativen Rollen (Facility Management) ausgeblendet, um Verwirrung zu vermeiden.
+
+
+
+3.  **Produktionsreife:**
+
+    *   Der Prozess liefert nun konsistent hochwertige, C-Level-taugliche Ansprachen, die strategische Schmerzpunkte mit operativen Lösungen verbinden.
+
+
 
 ### Nächste Schritte:
-*   **Stabilitäts-Test:** Ausführung eines Batch-Audits mit >20 Firmen, um Rate-Limits und Fehlerbehandlung unter Last zu prüfen.
+
+*   **Regelbetrieb & Monitoring:** Überwachung der Qualität bei neuen Branchen.
+
+--- End of content ---
+
+
diff --git a/market_intel_orchestrator.py b/market_intel_orchestrator.py
index 86d1562a..179cbf55 100644
--- a/market_intel_orchestrator.py
+++ b/market_intel_orchestrator.py
@@ -388,14 +388,29 @@ def analyze_company(company_name, strategy, target_market, language='de'):
         scraping_note = "(No URL found)"
 
     tech_evidence = []
-    known_incumbents = ["SAP Ariba", "Jaggaer", "Coupa", "SynerTrade", "Ivalua", "ServiceNow", "Salesforce", "Oracle SCM", "Zycus", "GEP", "SupplyOn", "EcoVadis", "IntegrityNext"]
-    half = len(known_incumbents) // 2
-    group1 = " OR ".join([f'"{inc}"' for inc in known_incumbents[:half]])
-    group2 = " OR ".join([f'"{inc}"' for inc in known_incumbents[half:]])
-    tech_queries = [f'"{company_name}" ({group1})', f'"{company_name}" ({group2})', f'"{company_name}" "supplier portal" login']
     
+    # NEU: Dynamische Suche basierend auf Strategie statt Hardcoded Liste
+    # Wir suchen NICHT mehr proaktiv nach SAP Ariba, es sei denn, es steht in der Strategie.
+    # Stattdessen machen wir eine generische "Tech Stack"-Suche.
+    tech_queries = [
+        f'site:{url.split("//")[-1].split("/")[0] if url and "//" in url else company_name} "software" OR "technology" OR "system"',
+        f'"{company_name}" "technology stack"',
+        f'"{company_name}" "partners"'
+    ]
+    
+    # Add explicit tech signals from strategy if they exist
+    signals = strategy.get('signals', [])
+    for signal in signals:
+        if "technographic" in signal.get('id', '').lower() or "incumbent" in signal.get('id', '').lower():
+             keywords = signal.get('targetPageKeywords', [])
+             for kw in keywords:
+                 tech_queries.append(f'"{company_name}" "{kw}"')
+
+    # Deduplicate queries and limit
+    tech_queries = list(set(tech_queries))[:4]
+
     for q in tech_queries:
-        results = serp_search(q, num_results=4)
+        results = serp_search(q, num_results=3)
         if results:
             for r in results:
                 tech_evidence.append(f"- Found: {r['title']}\n  Snippet: {r['snippet']}\n  Link: {r['link']}")
@@ -405,9 +420,10 @@ def analyze_company(company_name, strategy, target_market, language='de'):
     firmographics_results = serp_search(f"{company_name} Umsatz Mitarbeiterzahl 2023")
     firmographics_context = "\n".join([f"- {r['snippet']} ({r['link']})" for r in firmographics_results])
 
-    signals = strategy.get('signals', [])
     for signal in signals:
-        if "incumbent" in signal['id'].lower() or "tech" in signal['id'].lower(): continue
+        # Skip technographic signals here as they are handled above or via generic search
+        if "incumbent" in signal['id'].lower() or "technographic" in signal['id'].lower(): continue
+        
         proof_strategy = signal.get('proofStrategy', {})
         query_template = proof_strategy.get('searchQueryTemplate')
         search_context = ""
@@ -432,8 +448,8 @@ def analyze_company(company_name, strategy, target_market, language='de'):
     --- STRATEGY (What we are looking for) ---
     {json.dumps(signals, indent=2)}
 
-    --- EVIDENCE 1: EXTERNAL TECH-STACK INTELLIGENCE (CRITICAL) ---
-    Look closely here for mentions of competitors like SAP Ariba, Jaggaer, SynerTrade, Coupa, etc.
+    --- EVIDENCE 1: EXTERNAL TECH-STACK INTELLIGENCE ---
+    Analyze the search results below. Do NOT hallucinate technologies. Only list what is explicitly found.
     {tech_evidence_text}
 
     --- EVIDENCE 2: HOMEPAGE CONTENT {scraping_note} ---
@@ -448,15 +464,16 @@ def analyze_company(company_name, strategy, target_market, language='de'):
 
     TASK:
     1. **Firmographics**: Estimate Revenue and Employees.
-    2. **Technographic Audit**: Look for specific competitor software or legacy systems mentioned in EVIDENCE 1 (e.g., "Partner of SynerTrade", "Login to Jaggaer Portal").
+    2. **Technographic Audit**: Check if any relevant competitor technology or legacy system is ACTUALLY found in the evidence.
+       - **CRITICAL:** If no specific competitor software is found, assume the status is "Greenfield" (Manual Process / Status Quo). Do NOT invent a competitor like SAP Ariba just because it's a common tool.
     3. **Status**:
-       - Set to "Nutzt Wettbewerber" if ANY competitor technology is found.
-       - Set to "Greenfield" ONLY if absolutely no competitor tech is found.
+       - Set to "Nutzt Wettbewerber" ONLY if a direct competitor is explicitly found.
+       - Set to "Greenfield" if no competitor tech is found.
        - Set to "Bestandskunde" if they already use our solution.
     4. **Evaluate Signals**: For each signal, provide a "value" (Yes/No/Partial) and "proof".
     5. **Recommendation (Pitch Strategy)**: 
-       - If they use a competitor, explain how to position against it.
-       - If Greenfield, explain the entry point.
+       - If Greenfield: Pitch against the manual status quo (efficiency, error reduction).
+       - If Competitor: Pitch replacement/upgrade.
        - **Tone**: Strategic, insider-knowledge, specific.
 
     --- LANGUAGE INSTRUCTION ---
@@ -579,9 +596,14 @@ def generate_outreach_campaign(company_data_json, knowledge_base_content, refere
     {task_description}
 
     --- TONE & STYLE GUIDELINES (CRITICAL) ---
-    - **Perspective:** Operational Expert & Insider. NOT generic marketing.
-    - **Be Gritty & Specific:** Use hard, operational keywords from the Knowledge Base (e.g., "ASNs", "8D-Reports").
-    - **Language:** {lang_instruction}.
+    1. **Professional & Flowing:** Aim for approx. 500-600 characters per email. Use full sentences and professional courtesies. It should feel like a high-quality human message.
+    2. **Stance:** Act as an **astute industry observer** and peer consultant. You have analyzed their specific situation and identified a strategic bottleneck.
+    3. **The Opportunity Bridge (Email 1):** Bridge observation to a strategic solution immediately using concrete terms (e.g., "autonome Reinigungsrobotik").
+    4. **Context-Sensitive Technographics:** Only mention discovered IT or Procurement systems (e.g., SAP Ariba) if it is highly relevant to the **specific role** (e.g., for CEO, CFO, or Head of Procurement). For **purely operational roles** (e.g., Facility Manager, Head of Operations), AVOID mentioning these systems as it may cause confusion; focus entirely on the operational pain (labor shortage) and growth bottlenecks instead.
+    5. **Soft-Sell vs. Hard-Pitch:** Position technology as a logical answer to the bottleneck. Pitch the **outcome/capability**, not features.
+    6. **Social Proof as the Engine:** Let the Reference Client ({reference_url}) provide the evidence. Use a role-specific KPI.
+    7. **Operational Grit:** Use domain-specific terms (e.g., "ASNs", "8D", "TCO") to establish authority.
+    8. **Language:** {lang_instruction}.
 
     {output_format}
     """