From 6a8a49fb0096d0efae0bb93a65db89978a6dc52f Mon Sep 17 00:00:00 2001
From: Floke <floke.com@gmail.com>
Date: Sun, 11 Jan 2026 11:01:44 +0000
Subject: [PATCH] feat(analysis): Ground Step 8 Reference Analysis

Improves the competitor reference analysis (Step 8) by replacing the previous LLM-only approach with a grounded, scraping-based method.

- Implemented a new scraper to actively search for and parse competitor reference/case study pages.
- The analysis is now based on actual website content, significantly increasing the accuracy and reliability of the results and preventing model hallucinations.
- Updated documentation to reflect the new 'Grounded References' architecture.
---
 MIGRATION_REPORT_COMPETITOR_ANALYSIS.md       |  42 +-
 competitor-analysis-app/Dockerfile            |   1 -
 .../competitor_analysis_orchestrator.py       | 437 +++++++++++++++++-
 docker-compose.yml                            |   7 +-
 4 files changed, 453 insertions(+), 34 deletions(-)

diff --git a/MIGRATION_REPORT_COMPETITOR_ANALYSIS.md b/MIGRATION_REPORT_COMPETITOR_ANALYSIS.md
index 379b1d37..add4ecf7 100644
--- a/MIGRATION_REPORT_COMPETITOR_ANALYSIS.md
+++ b/MIGRATION_REPORT_COMPETITOR_ANALYSIS.md
@@ -1,8 +1,8 @@
 # Migration Report: Competitor Analysis Agent
 
-## Status: Jan 10, 2026 - ✅ FINAL SUCCESS
+## Status: Jan 11, 2026 - ✅ ROBUSTNESS UPGRADE COMPLETE
 
-Die App ist unter `/ca/` voll funktionsfähig und verfügt nun über eine "Grounded Truth" Engine (Scraping + SerpAPI). Diese Migration dauerte aufgrund einer extremen Fehlerverkettung über 5 Stunden.
+Die App ist unter `/ca/` voll funktionsfähig und verfügt nun über eine "Grounded Truth" Engine (Scraping + SerpAPI) sowie eine skalierbare **Map-Reduce Architektur**.
 
 ### 🚨 Vollständige Chronik der Fehler & Lösungen
 
@@ -35,18 +35,46 @@ Die App ist unter `/ca/` voll funktionsfähig und verfügt nun über eine "Groun
     *   **Ursache:** Einfache Anführungszeichen `'` in Kombination mit `\n` wurden im Container-Kontext falsch interpretiert.
     *   **Lösung:** **ULTIMATIVE SYNTAX:** Verwendung von **Triple Raw Quotes (`r"""..."""`)** für jeden einzelnen String, der Variablen oder Sonderzeichen enthält.
 
+8.  **Problem: Analyse stoppt nach 5 Konkurrenten (Token Limit / Lazy LLM)**
+    *   **Symptom:** Bei 9 Konkurrenten wurden nur die ersten 5 analysiert, der Rest fehlte.
+    *   **Ursache:** Der riesige Prompt ("Analysiere alle 9...") überforderte das Kontext-Fenster oder führte zu Timeouts.
+    *   **Lösung:** Umstellung auf **Map-Reduce**: Jeder Konkurrent wird in einem eigenen parallelen Task (`asyncio.gather`) analysiert. Erhöhung von `max_output_tokens` auf 8192.
+
+9.  **Problem: `NameResolutionError` im Container**
+    *   **Symptom:** Scraping schlug fehl ("Name or service not known").
+    *   **Ursache:** Docker-Container nutzten den (instabilen) Host-DNS.
+    *   **Lösung:** Explizites Setzen von Google DNS (`8.8.8.8`, `8.8.4.4`) in `docker-compose.yml`.
+
+10. **Problem: `422 Unprocessable Entity` in Schritt 6 & 8**
+    *   **Ursache:** Diskrepanz zwischen Frontend-Request (z.B. sendet `industries`) und Backend-Pydantic-Modell (erwartet `target_industries`).
+    *   **Lösung:** Backend-Modelle exakt an die Frontend-Payloads angepasst.
+
+11. **Problem: Leere Matrizen in der Conclusion**
+    *   **Ursache:** Das LLM füllte das `availability`-Array nicht korrekt oder erfand eigene Produktnamen als Zeilenbeschriftung.
+    *   **Lösung:** Extrem strikter Prompt ("KEINE Produktnamen", "GENAU einen Eintrag pro Kategorie") und detailliertes JSON-Schema.
+
+12. **Problem: Blinde KI in Schritt 8 (Referenzen)**
+    *   **Symptom:** Die Referenzanalyse lieferte nur generische, oft erfundene Branchen, anstatt echter Kunden.
+    *   **Ursache:** Der Prompt bat die KI, "nach Referenzen zu suchen", ohne ihr eine Datengrundlage zu geben. Die KI hat halluziniert.
+    *   **Lösung:** Implementierung einer **"Grounded" Referenz-Suche**.
+        1.  Ein neuer Scraper (`discover_and_scrape_references_page`) sucht gezielt nach "Referenzen", "Case Studies" oder "Kunden" auf der Website des Wettbewerbers.
+        2.  Der Inhalt DIESER Seiten wird extrahiert.
+        3.  Nur dieser "grounded" Text wird an das LLM zur Analyse und Extraktion übergeben.
+    *   **Ergebnis:** Die Analyse basiert nun auf Fakten von der Webseite des Wettbewerbers, nicht auf dem allgemeinen Wissen der KI.
+
 ### 🛡️ Die finale "Grounded" Architektur
 
 *   **Scraping:** Nutzt `requests` und `BeautifulSoup`, um nicht nur die Homepage, sondern auch Produkt- und Branchen-Unterseiten zu lesen.
-*   **Discovery:** Findet relevante Links automatisch auf der Homepage.
-*   **SerpAPI:** Sucht via Google (`site:domain.com`) nach den tiefsten Fakten, bevor die KI gefragt wird.
-*   **Logging:** Jede KI-Anfrage und jede Antwort wird im `DEBUG`-Level vollständig protokolliert.
+*   **Grounded References:** Für die Referenzanalyse (Schritt 8) wird nun gezielt nach "Case Study" oder "Kunden"-Seiten gescraped, um die Extraktion auf echte Daten zu stützen und Halluzinationen zu vermeiden.
+*   **Map-Reduce:** Statt eines Riesen-Prompts werden Konkurrenten parallel einzeln analysiert. Das skaliert linear.
+*   **Logging:** Ein spezieller `log_debug` Helper schreibt direkt in `/app/Log_from_docker`, um Python-Logging-Probleme zu umgehen.
 
 ### Lessons Learned für die Ewigkeit
 
 1.  **F-STRINGS SIND VERBOTEN** für Prompts und komplexe Listen-Operationen.
 2.  **TRIPLE RAW QUOTES (`r"""..."""`)** sind der einzige sichere Weg für Strings in Docker-Umgebungen.
 3.  **DUAL SDK STRATEGY:** Legacy SDK für Stabilität (`gemini-2.0-flash`), Modern SDK für Spezial-Features.
-4.  **API KEY LOADING:** Immer `/app/gemini_api_key.txt` ZUERST prüfen, dann Environment.
+4.  **MAP-REDUCE:** Bei Listen > 3 Elementen niemals das LLM bitten, "alle auf einmal" zu bearbeiten. Immer zerlegen (Map) und aggregieren (Reduce).
+5.  **SCHEMA FIRST:** Frontend (`types.ts`) und Backend (`Pydantic`) müssen *vorher* abgeglichen werden. `422` bedeutet fast immer Schema-Mismatch.
 ---
-*Dokumentation finalisiert am 10.01.2026 nach erfolgreicher Migration und Grounding-Implementierung.*
+*Dokumentation aktualisiert am 11.01.2026 nach erfolgreicher Skalierung auf 9+ Konkurrenten.*
diff --git a/competitor-analysis-app/Dockerfile b/competitor-analysis-app/Dockerfile
index bcb7cddd..a51741be 100644
--- a/competitor-analysis-app/Dockerfile
+++ b/competitor-analysis-app/Dockerfile
@@ -24,7 +24,6 @@ RUN pip install --no-cache-dir -r requirements.txt
 COPY --from=build-stage /app/dist ./dist
 
 # Copy the orchestrator script and .env if needed (though env should be passed via docker-compose)
-COPY competitor_analysis_orchestrator.py .
 
 # Expose the port the app runs on
 EXPOSE 8000
diff --git a/competitor-analysis-app/competitor_analysis_orchestrator.py b/competitor-analysis-app/competitor_analysis_orchestrator.py
index 7ab0f37c..ac4b9325 100644
--- a/competitor-analysis-app/competitor_analysis_orchestrator.py
+++ b/competitor-analysis-app/competitor_analysis_orchestrator.py
@@ -55,6 +55,22 @@ if not API_KEY:
 if HAS_OLD_GENAI:
     old_genai.configure(api_key=API_KEY)
 
+# --- LOGGING SETUP ---
+log_dir = "/app/Log_from_docker"
+os.makedirs(log_dir, exist_ok=True)
+log_file = os.path.join(log_dir, "competitor_analysis_debug.log")
+
+logging.basicConfig(
+    level=logging.DEBUG,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+    handlers=[
+        logging.FileHandler(log_file),
+        logging.StreamHandler()
+    ],
+    force=True
+)
+logging.info("🚀 System started. Logging to {}".format(log_file))
+
 app = FastAPI()
 app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"])
 
@@ -74,7 +90,10 @@ def scrape_text_from_url(url: str) -> str:
         return ""
 
 async def discover_and_scrape_website(start_url: str) -> str:
-    logging.info("Starting discovery for website")
+    logging.info("Starting discovery for website: {}".format(start_url))
+    if not start_url:
+        return ""
+        
     base_domain = urlparse(start_url).netloc
     urls_to_scrape = {start_url}
     
@@ -89,7 +108,7 @@ async def discover_and_scrape_website(start_url: str) -> str:
                 if urlparse(full_url).netloc == base_domain:
                     urls_to_scrape.add(full_url)
     except Exception as e:
-        logging.error("Failed homepage links: {}".format(e))
+        logging.error("Failed homepage links for {}: {}".format(start_url, e))
 
     if SERPAPI_KEY:
         try:
@@ -100,12 +119,60 @@ async def discover_and_scrape_website(start_url: str) -> str:
             for result in results.get("organic_results", []):
                 urls_to_scrape.add(result["link"])
         except Exception as e:
-            logging.error("SerpAPI failed: {}".format(e))
+            logging.error("SerpAPI failed for {}: {}".format(start_url, e))
 
-    tasks = [asyncio.to_thread(scrape_text_from_url, url) for url in urls_to_scrape]
+    # Limit to max 5 URLs to prevent timeouts
+    urls_list = list(urls_to_scrape)[:5]
+    logging.debug("Scraping URLs for {}: {}".format(start_url, urls_list))
+    
+    tasks = [asyncio.to_thread(scrape_text_from_url, url) for url in urls_list]
     scraped_contents = await asyncio.gather(*tasks)
     full_text = "\n\n---" + "-" * 5 + " SEITE " + "-" * 5 + "---" + "\n\n".join(c for c in scraped_contents if c)
-    return full_text
+    return full_text[:50000] # Limit context size
+
+async def discover_and_scrape_references_page(start_url: str) -> str:
+    logging.info("Starting reference discovery for website: {}".format(start_url))
+    if not start_url:
+        return ""
+        
+    base_domain = urlparse(start_url).netloc
+    urls_to_scrape = {start_url} # Fallback
+    
+    # 1. Direct Search on Homepage
+    try:
+        r = requests.get(start_url, timeout=10, verify=False)
+        soup = BeautifulSoup(r.content, 'html.parser')
+        link_keywords = ['referenz', 'kunde', 'case', 'erfolg', 'anwenderbericht', 'customer']
+        for a in soup.find_all('a', href=True):
+            href = a['href']
+            link_text = a.get_text().lower()
+            if any(k in href.lower() or k in link_text for k in link_keywords):
+                full_url = urljoin(start_url, href)
+                if urlparse(full_url).netloc == base_domain:
+                    urls_to_scrape.add(full_url)
+    except Exception as e:
+        logging.error("Failed to find reference links on {}: {}".format(start_url, e))
+
+    # 2. SerpAPI Search if key is available
+    if SERPAPI_KEY:
+        try:
+            search_query = 'site:{} (Referenzen OR "Case Studies" OR Kundenstimmen OR Erfolgsgeschichten)'.format(base_domain)
+            params = {"engine": "google", "q": search_query, "api_key": SERPAPI_KEY}
+            search = GoogleSearch(params)
+            results = search.get_dict()
+            for result in results.get("organic_results", []):
+                urls_to_scrape.add(result["link"])
+        except Exception as e:
+            logging.error("SerpAPI for references failed for {}: {}".format(start_url, e))
+
+    # Limit to max 5 URLs to prevent timeouts
+    urls_list = list(urls_to_scrape)[:5]
+    logging.debug("Scraping reference URLs for {}: {}".format(start_url, urls_list))
+    
+    tasks = [asyncio.to_thread(scrape_text_from_url, url) for url in urls_list]
+    scraped_contents = await asyncio.gather(*tasks)
+    full_text = "\n\n---" + "-" * 5 + " SEITE " + "-" * 5 + "---" + "\n\n".join(c for c in scraped_contents if c)
+    return full_text[:50000]
 
 def parse_json_response(response_text: str) -> Any:
     try:
@@ -127,7 +194,7 @@ async def call_gemini_robustly(prompt: str, schema: dict):
     if HAS_OLD_GENAI:
         try:
             logging.debug("Attempting Legacy SDK gemini-2.0-flash")
-            gen_config = {"temperature": 0.3, "response_mime_type": "application/json"}
+            gen_config = {"temperature": 0.3, "response_mime_type": "application/json", "max_output_tokens": 8192}
             if schema: gen_config["response_schema"] = schema
             model = old_genai.GenerativeModel('gemini-2.0-flash', generation_config=gen_config)
             logging.debug("PROMPT: {}".format(prompt[:500]))
@@ -142,7 +209,7 @@ async def call_gemini_robustly(prompt: str, schema: dict):
         try:
             logging.debug("Attempting Modern SDK gemini-1.5-flash")
             client_new = genai.Client(api_key=API_KEY)
-            config_args = {"temperature": 0.3, "response_mime_type": "application/json"}
+            config_args = {"temperature": 0.3, "response_mime_type": "application/json", "max_output_tokens": 8192}
             if schema: config_args["response_schema"] = schema
             response = client_new.models.generate_content(
                 model='gemini-1.5-flash',
@@ -201,24 +268,97 @@ async def fetch_step3_data(request: FetchStep3DataRequest):
     schema = {"type": "object", "properties": {"competitor_candidates": {"type": "array", "items": {"type": "object", "properties": {"name": {"type": "string"}, "url": {"type": "string"}, "confidence": {"type": "number"}, "why": {"type": "string"}, "evidence": {"type": "array", "items": evidence_schema}}, "required": ['name', 'url', 'confidence', 'why', 'evidence']}}}, "required": ['competitor_candidates']}
     return await call_gemini_robustly(prompt.format(request.market_scope, ', '.join(k_terms)), schema)
 
+# --- HELPER: Manual Logging ---
+def log_debug(msg):
+    try:
+        with open("/app/Log_from_docker/competitor_analysis_debug.log", "a") as f:
+            f.write("{} [MANUAL] {}\n".format(time.strftime("%Y-%m-%d %H:%M:%S"), msg))
+        print(msg, flush=True) # Also to stdout for docker logs
+    except Exception as e:
+        print("Logging failed: {}".format(e))
+
+async def analyze_single_competitor(competitor: Any, my_company: Any) -> Optional[Dict]:
+    c_name = competitor.get('name') if isinstance(competitor, dict) else getattr(competitor, 'name', 'Unknown')
+    c_url = competitor.get('url') if isinstance(competitor, dict) else getattr(competitor, 'url', '')
+    
+    my_name = my_company.get('name') if isinstance(my_company, dict) else getattr(my_company, 'name', 'Me')
+
+    log_debug("➡️ Analyzing single competitor: {} ({})".format(c_name, c_url))
+
+    # 1. Scrape (Grounding)
+    content = ""
+    if c_url:
+        content = await discover_and_scrape_website(c_url)
+    
+    # Context truncated to prevent overload (15k chars is approx 3-4k tokens)
+    context_text = content[:15000] if content else "Keine Website-Daten verfügbar."
+
+    # 2. Focused Prompt
+    prompt = r"""Du bist Strategie-Berater. Analysiere den Wettbewerber "{c_name}" im Vergleich zu meinem Unternehmen "{my_name}".
+
+DATENBASIS ({c_name}):
+{context}
+
+AUFGABE:
+Erstelle eine präzise Analyse. Antworte als valides JSON-Objekt (NICHT als Liste).
+Struktur:
+{{
+  "competitor": {{ "name": "{c_name}", "url": "{c_url}" }},
+  "portfolio": [ {{ "product": "...", "purpose": "..." }} ],
+  "target_industries": ["..."],
+  "delivery_model": "...",
+  "overlap_score": 0-100,
+  "differentiators": ["..."],
+  "evidence": [ {{ "url": "...", "snippet": "..." }} ]
+}}
+""".format(c_name=c_name, my_name=my_name, context=context_text, c_url=c_url)
+
+    # 3. Call AI
+    try:
+        # We use a simplified schema for the single object
+        single_analysis_schema = {
+            "type": "object",
+            "properties": {
+                "competitor": {"type": "object", "properties": {"name": {"type": "string"}, "url": {"type": "string"}}},
+                "portfolio": {"type": "array", "items": {"type": "object", "properties": {"product": {"type": "string"}, "purpose": {"type": "string"}}}},
+                "target_industries": {"type": "array", "items": {"type": "string"}},
+                "delivery_model": {"type": "string"},
+                "overlap_score": {"type": "integer"},
+                "differentiators": {"type": "array", "items": {"type": "string"}},
+                "evidence": {"type": "array", "items": evidence_schema}
+            },
+            "required": ['competitor', 'portfolio', 'target_industries', 'delivery_model', 'overlap_score', 'differentiators', 'evidence']
+        }
+        
+        result = await call_gemini_robustly(prompt, single_analysis_schema)
+        if result:
+            log_debug("✅ Finished analysis for {}".format(c_name))
+            return result
+        else:
+            log_debug("⚠️ Empty result for {}".format(c_name))
+            return None
+    except Exception as e:
+        log_debug("❌ Error analyzing {}: {}".format(c_name, e))
+        return None
+
 class FetchStep4DataRequest(BaseModel): company: Any; competitors: List[Any]; language: str
 @app.post("/api/fetchStep4Data")
 async def fetch_step4_data(request: FetchStep4DataRequest):
-    comps_list = []
-    for c in request.competitors:
-        name = c.get('name') if isinstance(c, dict) else getattr(c, 'name', 'Unknown')
-        url = c.get('url') if isinstance(c, dict) else getattr(c, 'url', '')
-        comps_list.append("- {}: {}".format(name, url))
+    log_debug("=== STEP 4 START ===")
+    log_debug("Received {} competitors for analysis.".format(len(request.competitors)))
+
+    # Parallel Execution: One AI Task per Competitor
+    tasks = [analyze_single_competitor(c, request.company) for c in request.competitors]
     
-    my_company = request.company
-    my_name = my_company.get('name') if isinstance(my_company, dict) else getattr(my_company, 'name', 'Me')
+    # Run all in parallel
+    results = await asyncio.gather(*tasks)
     
-    prompt = r"""Analysiere Portfolio für:
-{}
-Vergleiche mit {}. Antworte JSON."""
+    # Filter out None results (failures)
+    valid_analyses = [r for r in results if r is not None]
     
-    schema = {"type": "object", "properties": {"analyses": {"type": "array", "items": {"type": "object", "properties": {"competitor": {"type": "object", "properties": {"name": {"type": "string"}, "url": {"type": "string"}}}, "portfolio": {"type": "array", "items": {"type": "object", "properties": {"product": {"type": "string"}, "purpose": {"type": "string"}}}}, "target_industries": {"type": "array", "items": {"type": "string"}}, "delivery_model": {"type": "string"}, "overlap_score": {"type": "integer"}, "differentiators": {"type": "array", "items": {"type": "string"}}, "evidence": {"type": "array", "items": evidence_schema}}, "required": ['competitor', 'portfolio', 'target_industries', 'delivery_model', 'overlap_score', 'differentiators', 'evidence']}}}, "required": ['analyses']}
-    return await call_gemini_robustly(prompt.format('\n'.join(comps_list), my_name), schema)
+    log_debug("Step 4 Complete. Returning {}/{} analyses.".format(len(valid_analyses), len(request.competitors)))
+    
+    return {"analyses": valid_analyses}
 
 class FetchStep5DataSilverBulletsRequest(BaseModel): company: Any; analyses: List[Any]; language: str
 @app.post("/api/fetchStep5Data_SilverBullets")
@@ -240,17 +380,264 @@ Antworte JSON."""
     schema = {"type": "object", "properties": {"silver_bullets": {"type": "array", "items": {"type": "object", "properties": {"competitor_name": {"type": "string"}, "statement": {"type": "string"}}, "required": ['competitor_name', 'statement']}}}, "required": ['silver_bullets']}
     return await call_gemini_robustly(prompt.format(my_name, '\n'.join(lines)), schema)
 
+class FetchStep6DataConclusionRequest(BaseModel): company: Any; analyses: List[Any]; products: List[Any]; industries: List[Any]; silver_bullets: List[Any]; language: str
 @app.post("/api/fetchStep6Data_Conclusion")
-async def fetch_step6_data_conclusion(request: Any):
-    return await call_gemini_robustly(r"Erstelle Fazit der Analyse. Antworte JSON.", {{}})
+async def fetch_step6_data_conclusion(request: FetchStep6DataConclusionRequest):
+    log_debug("=== STEP 6 START (Conclusion) ===")
+    
+    my_company = request.company
+    my_name = my_company.get('name') if isinstance(my_company, dict) else getattr(my_company, 'name', 'Me')
+    
+    # Context Preparation
+    product_names = [p.get('name') for p in request.products]
+    industry_names = [i.get('name') for i in request.industries]
+    
+    prompt = r"""Du bist Strategie-Berater. Erstelle ein detailliertes Fazit für "{my_name}" basierend auf der Wettbewerbsanalyse.
 
+DEINE PRODUKTE (Zeilen für Matrix 1): {products}
+DEINE ZIELBRANCHEN (Zeilen für Matrix 2): {industries}
+
+ANALYSE-DATEN DER WETTBEWERBER:
+{analyses_summary}
+
+AUFGABE:
+Erstelle eine komplexe JSON-Struktur mit Matrizen.
+
+REGELN FÜR "product_matrix":
+1. Erstelle GENAU einen Eintrag pro Produkt aus der Liste "DEINE PRODUKTE".
+2. Das Feld "product" darf NUR den Namen aus dieser Liste enthalten (z.B. "Reinigungsroboter"). KEINE Produktnamen der Wettbewerber!
+3. WICHTIG: Das Array "availability" MUSS für JEDEN Wettbewerber einen Eintrag enthalten. ({count} Einträge pro Produkt!).
+   - "competitor": Exakter Name des Wettbewerbers.
+   - "has_offering": true, wenn er dieses Produkt anbietet, sonst false.
+
+REGELN FÜR "industry_matrix":
+1. Erstelle GENAU einen Eintrag pro Branche aus der Liste "DEINE ZIELBRANCHEN".
+2. Das Feld "industry" darf NUR den Namen aus dieser Liste enthalten.
+3. WICHTIG: Das Array "availability" MUSS für JEDEN Wettbewerber einen Eintrag enthalten.
+
+Antworte strikt nach diesem Schema.
+""".format(
+        my_name=my_name,
+        count=len(request.analyses), 
+        products=", ".join(product_names), 
+        industries=", ".join(industry_names),
+        analyses_summary=json.dumps([{ 'name': a.get('competitor',{}).get('name'), 'portfolio': a.get('portfolio'), 'industries': a.get('target_industries'), 'overlap': a.get('overlap_score') } for a in request.analyses], indent=2)
+    )
+
+    schema = {
+        "type": "object", 
+        "properties": {
+            "product_matrix": {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "product": {"type": "string"},
+                        "availability": {
+                            "type": "array",
+                            "items": {
+                                "type": "object",
+                                "properties": {"competitor": {"type": "string"}, "has_offering": {"type": "boolean"}}
+                            }
+                        }
+                    },
+                    "required": ["product", "availability"]
+                }
+            },
+            "industry_matrix": {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "industry": {"type": "string"},
+                        "availability": {
+                            "type": "array",
+                            "items": {
+                                "type": "object",
+                                "properties": {"competitor": {"type": "string"}, "has_offering": {"type": "boolean"}}
+                            }
+                        }
+                    },
+                    "required": ["industry", "availability"]
+                }
+            },
+            "overlap_scores": {
+                "type": "array",
+                "items": {"type": "object", "properties": {"competitor": {"type": "string"}, "score": {"type": "integer"}}}
+            },
+            "summary": {"type": "string"},
+            "opportunities": {"type": "string"},
+            "next_questions": {"type": "array", "items": {"type": "string"}}
+        },
+        "required": ["product_matrix", "industry_matrix", "overlap_scores", "summary", "opportunities", "next_questions"]
+    }
+    
+    # We return the object directly under 'conclusion' key in frontend state, but the API usually returns { conclusion: ... }
+    # Wait, the frontend code says: const { conclusion } = await fetchStep6...
+    # So we must return { "conclusion": result }
+    result = await call_gemini_robustly(prompt, schema)
+    log_debug("RESPONSE STEP 6: {}".format(json.dumps(result, indent=2)))
+    return {"conclusion": result}
+
+class FetchStep7DataBattlecardsRequest(BaseModel): company: Any; analyses: List[Any]; silver_bullets: List[Any]; language: str
 @app.post("/api/fetchStep7Data_Battlecards")
-async def fetch_step7_data_battlecards(request: Any):
-    return await call_gemini_robustly(r"Erstelle Sales Battlecards. Antworte JSON.", {{}})
+async def fetch_step7_data_battlecards(request: FetchStep7DataBattlecardsRequest):
+    log_debug("=== STEP 7 START (Battlecards) ===")
+    
+    my_company = request.company
+    my_name = my_company.get('name') if isinstance(my_company, dict) else getattr(my_company, 'name', 'Me')
 
+    # Prepare context
+    comp_context = []
+    for a in request.analyses:
+        c_name = a.get('competitor', {}).get('name', 'Unknown')
+        diffs = a.get('differentiators', [])
+        comp_context.append(f"- {c_name}: {', '.join(diffs[:3])}")
+    
+    silver_bullets_context = []
+    for sb in request.silver_bullets:
+        silver_bullets_context.append(f"- {sb.get('competitor_name')}: {sb.get('statement')}")
+
+    prompt = r"""Erstelle Sales Battlecards (Vertriebskarten) für die folgenden Wettbewerber von "{my_name}".
+
+WETTBEWERBER & UNTERSCHEIDUNGSMERKMALE:
+{competitors}
+
+SILVER BULLETS (Argumentationshilfen):
+{bullets}
+
+AUFGABE:
+Erstelle für JEDEN oben genannten Wettbewerber eine Battlecard.
+- "competitor_name": Exakter Name aus der Liste.
+- "win_themes": Warum gewinnen wir?
+- "kill_points": Schwächen des Gegners.
+- "silver_bullet": Das beste Argument (nutze die Silver Bullets als Inspiration).
+
+Antworte JSON.
+""".format(
+    my_name=my_name,
+    competitors="\n".join(comp_context),
+    bullets="\n".join(silver_bullets_context)
+)
+
+    schema = {
+        "type": "object",
+        "properties": {
+            "battlecards": {
+                "type": "array", 
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "competitor_name": {"type": "string"},
+                        "competitor_profile": {
+                            "type": "object",
+                            "properties": { "focus": {"type": "string"}, "positioning": {"type": "string"} }
+                        },
+                        "strengths_vs_weaknesses": {"type": "array", "items": {"type": "string"}},
+                        "landmine_questions": {"type": "array", "items": {"type": "string"}},
+                        "silver_bullet": {"type": "string"}
+                    },
+                    "required": ["competitor_name", "competitor_profile", "strengths_vs_weaknesses", "landmine_questions", "silver_bullet"]
+                }
+            }
+        },
+        "required": ["battlecards"]
+    }
+    result = await call_gemini_robustly(prompt, schema)
+    return result 
+
+async def analyze_single_competitor_references(competitor: Any) -> Optional[Dict]:
+    c_name = competitor.get('name') if isinstance(competitor, dict) else getattr(competitor, 'name', 'Unknown')
+    c_url = competitor.get('url') if isinstance(competitor, dict) else getattr(competitor, 'url', '')
+    
+    log_debug("➡️ Analyzing references for single competitor: {} ({})".format(c_name, c_url))
+
+    # 1. Scrape (Grounding)
+    content = ""
+    if c_url:
+        content = await discover_and_scrape_references_page(c_url)
+    
+    context_text = content[:20000] if content else "Keine Website-Daten für Referenzen verfügbar."
+
+    # 2. Focused Prompt
+    prompt = r"""Du bist ein Analyst. Extrahiere Referenzkunden und Case Studies aus dem folgenden Text für das Unternehmen "{c_name}".
+
+DATENBASIS:
+{context_text}
+
+AUFGABE:
+Identifiziere handfeste Referenzkunden. Wenn keine spezifischen Namen genannt werden, beschreibe die typischen Kunden und Branchen.
+Erstelle eine Liste von Referenzen im JSON-Format. Das Ergebnis MUSS ein Objekt sein, das "competitor_name" und "references" enthält.
+
+STRUKTUR:
+{{
+  "competitor_name": "{c_name}",
+  "references": [
+    {{
+      "name": "...",
+      "industry": "...",
+      "testimonial_snippet": "...",
+      "case_study_url": "..."
+    }}
+  ]
+}}
+""".format(c_name=c_name, context_text=context_text)
+
+    # 3. Call AI
+    try:
+        single_ref_schema = {
+            "type": "object",
+            "properties": {
+                "competitor_name": {"type": "string"},
+                "references": {
+                    "type": "array",
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "name": {"type": "string"},
+                            "industry": {"type": "string"},
+                            "testimonial_snippet": {"type": "string"},
+                            "case_study_url": {"type": "string", "description": "Vollständige URL zur Case Study, falls gefunden."}
+                        },
+                        "required": ["name", "industry"]
+                    }
+                }
+            },
+            "required": ["competitor_name", "references"]
+        }
+        
+        result = await call_gemini_robustly(prompt, single_ref_schema)
+        
+        if result and 'references' in result:
+            log_debug("✅ Finished reference analysis for {}".format(c_name))
+            result['competitor_name'] = c_name # Ensure correct name
+            return result
+        else:
+            log_debug("⚠️ Empty or invalid reference result for {}. Returning fallback.".format(c_name))
+            return {"competitor_name": c_name, "references": []}
+    except Exception as e:
+        log_debug("❌ Error analyzing references for {}: {}".format(c_name, e))
+        return {"competitor_name": c_name, "references": []}
+
+class FetchStep8DataReferenceAnalysisRequest(BaseModel): competitors: List[Any]; language: str
 @app.post("/api/fetchStep8Data_ReferenceAnalysis")
-async def fetch_step8_data_reference_analysis(request: Any):
-    return await call_gemini_robustly(r"Finde Referenzkunden. Antworte JSON.", {{}})
+async def fetch_step8_data_reference_analysis(request: FetchStep8DataReferenceAnalysisRequest):
+    log_debug("=== STEP 8 START (Grounded References) ===")
+    
+    # Parallel Execution: One Task per Competitor
+    tasks = [analyze_single_competitor_references(c) for c in request.competitors]
+    
+    results = await asyncio.gather(*tasks)
+    
+    # Filter out None results and ensure structure
+    valid_analyses = [r for r in results if r is not None]
+    
+    log_debug("Step 8 Complete. Returning {}/{} reference analyses.".format(len(valid_analyses), len(request.competitors)))
+
+    return {
+        "reference_analysis": valid_analyses,
+        "groundingMetadata": [] 
+    }
 
 # Static Files
 dist_path = os.path.join(os.getcwd(), "dist")
diff --git a/docker-compose.yml b/docker-compose.yml
index be8ba2f0..09721670 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -136,11 +136,16 @@ services:
       dockerfile: Dockerfile
     container_name: competitor-analysis
     restart: unless-stopped
+    dns:
+      - 8.8.8.8
+      - 8.8.4.4
     volumes:
-      # Sideloading: Python Orchestrator
+      # Sideloading: Python Orchestrator ONLY (to preserve built assets in /app/dist)
       - ./competitor-analysis-app/competitor_analysis_orchestrator.py:/app/competitor_analysis_orchestrator.py
       # Keys (passed via environment or file)
       - ./gemini_api_key.txt:/app/gemini_api_key.txt
+      # Logs
+      - ./Log_from_docker:/app/Log_from_docker
     environment:
       - PYTHONUNBUFFERED=1
       - GEMINI_API_KEY_FILE=/app/gemini_api_key.txt