feat(b2b-marketing): Finalize grounding architecture and frontend improvements

- Upgrade backend to use gemini-2.5-flash with sanitized HTML parsing (no token limit). - Implement robust retry logic and increased timeouts (600s) for deep analysis. - Add file-based logging for prompts and responses. - Fix API endpoint (v1) and regex parsing issues. - Frontend: Optimize PDF export (landscape, no scrollbars), fix copy-paste button, add 'Repeat Step 6' feature. - Update documentation to 'Completed' status.
2025-12-23 10:40:48 +00:00
parent 101933f618
commit 46bf8b0b48
12 changed files with 3758 additions and 569 deletions
--- a/b2b-marketing-assistant/App.tsx
+++ b/b2b-marketing-assistant/App.tsx
@@ -1,14 +1,14 @@

-import React, { useState, useCallback, useRef } from 'react';
-import { GoogleGenAI, Chat } from "@google/genai";
+import React, { useState, useCallback } from 'react';
 import { InputForm } from './components/InputForm';
 import { StepDisplay } from './components/StepDisplay';
 import { LoadingSpinner, BotIcon, SparklesIcon, MarkdownIcon, PrintIcon } from './components/Icons';
 import { ExportMenu } from './components/ExportMenu';
-import { PROMPTS, translations } from './constants';
+import { translations } from './constants';
 import type { AnalysisStep, AnalysisData, InputData } from './types';
-import { parseGeminiStepResponse } from './services/parser';
-import { generateMarkdown, downloadFile, tableToMarkdown } from './services/export';
+import { generateMarkdown, downloadFile } from './services/export';
+
+const API_BASE_URL = '/api';

 const App: React.FC = () => {
    const [inputData, setInputData] = useState<InputData>({
@@ -20,10 +20,8 @@ const App: React.FC = () => {
    });
    const [analysisData, setAnalysisData] = useState<Partial<AnalysisData>>({});
    const [isLoading, setIsLoading] = useState<boolean>(false);
-    const [isEnriching, setIsEnriching] = useState<boolean>(false);
    const [error, setError] = useState<string | null>(null);
    const [generationStep, setGenerationStep] = useState<number>(0); // 0: idle, 1-6: step X is complete
-    const chatRef = useRef<Chat | null>(null);

    const t = translations[inputData.language];
    const STEP_TITLES = t.stepTitles;
@@ -41,26 +39,28 @@ const App: React.FC = () => {
        setGenerationStep(0);

        try {
-            const ai = new GoogleGenAI({ apiKey: process.env.API_KEY });
-            const currentPrompts = PROMPTS[inputData.language];
-
-            const newChat = ai.chats.create({
-                model: 'gemini-2.5-pro',
-                config: {
-                    systemInstruction: currentPrompts.SYSTEM_PROMPT.replace('{{language}}', inputData.language)
-                }
+            const response = await fetch(`${API_BASE_URL}/start-generation`, {
+                method: 'POST',
+                headers: { 'Content-Type': 'application/json' },
+                body: JSON.stringify({
+                    companyUrl: inputData.companyUrl,
+                    language: inputData.language,
+                    regions: inputData.regions,
+                    focus: inputData.focus,
+                }),
            });
-            chatRef.current = newChat;

-            let prompt = currentPrompts.STEP_PROMPTS[0];
-            prompt = prompt.replace('{{company_url}}', inputData.companyUrl);
-            prompt = prompt.replace('{{language}}', inputData.language);
-            prompt = prompt.replace('{{regions}}', inputData.regions);
-            prompt = prompt.replace('{{focus}}', inputData.focus);
+            if (!response.ok) {
+                const errorData = await response.json();
+                throw new Error(errorData.details || `HTTP error! status: ${response.status}`);
+            }

-            const response = await newChat.sendMessage({ message: prompt });
+            const parsedData = await response.json();
+            
+            if (parsedData.error) {
+                throw new Error(parsedData.error);
+            }

-            const parsedData = parseGeminiStepResponse(response.text);
            setAnalysisData(parsedData);
            setGenerationStep(1);

@@ -73,36 +73,34 @@ const App: React.FC = () => {
    }, [inputData]);
    
    const handleGenerateNextStep = useCallback(async () => {
-        if (!chatRef.current || generationStep >= 6) return;
+        if (generationStep >= 6) return;

        setIsLoading(true);
        setError(null);
-        const nextStepIndex = generationStep;
        
        try {
-            let context = '';
-            for (let i = 0; i < generationStep; i++) {
-                const stepKey = STEP_KEYS[i];
-                const stepObject = analysisData[stepKey];
-                if (stepObject) {
-                    context += `\n\n## ${STEP_TITLES[stepKey]}\n\n`;
-                    const summary = stepObject.summary && stepObject.summary.length > 0 ? `**${t.summaryTitle}**\n${stepObject.summary.map(s => `* ${s}`).join('\n')}\n\n` : '';
-                    const table = tableToMarkdown(stepObject);
-                    context += `${summary}${table}`;
-                }
+            const response = await fetch(`${API_BASE_URL}/next-step`, {
+                 method: 'POST',
+                 headers: { 'Content-Type': 'application/json' },
+                 body: JSON.stringify({
+                    analysisData,
+                    language: inputData.language,
+                    channels: inputData.channels,
+                    generationStep: generationStep + 1, // Pass the step we want to generate
+                 }),
+            });
+
+            if (!response.ok) {
+                const errorData = await response.json();
+                throw new Error(errorData.details || `HTTP error! status: ${response.status}`);
            }

-            const currentPrompts = PROMPTS[inputData.language];
-            let prompt = currentPrompts.STEP_PROMPTS[nextStepIndex];
-            prompt = prompt.replace('{{previous_steps_data}}', context);
+            const parsedData = await response.json();
            
-            if (nextStepIndex === 5) { // Step 6 is index 5
-                 prompt = prompt.replace('{{channels}}', inputData.channels.join(', '));
+            if (parsedData.error) {
+                throw new Error(parsedData.error);
            }

-            const response = await chatRef.current.sendMessage({ message: prompt });
-            const parsedData = parseGeminiStepResponse(response.text);
-
            setAnalysisData(prev => ({ ...prev, ...parsedData }));
            setGenerationStep(prev => prev + 1);

@@ -113,7 +111,7 @@ const App: React.FC = () => {
        } finally {
            setIsLoading(false);
        }
-    }, [analysisData, generationStep, inputData.channels, inputData.language, STEP_KEYS, STEP_TITLES, t.summaryTitle]);
+    }, [analysisData, generationStep, inputData.channels, inputData.language]);

    const handleDataChange = <K extends keyof AnalysisData>(step: K, newData: AnalysisData[K]) => {
         if (analysisData[step]) {
@@ -121,112 +119,6 @@ const App: React.FC = () => {
        }
    };
    
-    const getEnrichPrompt = (lang: 'de' | 'en', url: string, productName: string) => {
-        if (lang === 'en') {
-            return `# Task
-Fill in the information for the following product/solution based on the website ${url}. Respond ONLY with the content for the remaining 4 columns, separated by '|||'.
-
-# Product/Solution
-${productName}
-
-# Column Order
-1. Description (1–2 sentences)
-2. Core Features
-3. Differentiation
-4. Primary Source (URL)
-
-# Important
-Respond *only* with the text for the 4 columns, separated by '|||'. Do not output any headers or explanations.`;
-        }
-        // German (original)
-        return `# Aufgabe
-Fülle die Informationen für das folgende Produkt/Lösung basierend auf der Webseite ${url} aus. Antworte NUR mit dem Inhalt für die restlichen 4 Spalten, getrennt durch '|||'.
-
-# Produkt/Lösung
-${productName}
-
-# Spalten-Reihenfolge
-1. Beschreibung (1–2 Sätze)
-2. Kernfunktionen
-3. Differenzierung
-4. Primäre Quelle (URL)
-
-# Wichtig
-Antworte *nur* mit dem Text für die 4 Spalten, getrennt durch '|||'. Gib keine Überschriften oder Erklärungen aus.`;
-    };
-
-
-    const handleEnrichOfferRow = useCallback(async (productName: string, productUrl?: string) => {
-        if (!analysisData.offer) return;
-
-        setIsEnriching(true);
-        setError(null);
-        
-        const loadingText = t.loadingButton.replace('...', '');
-        const placeholderRow = [productName, loadingText, loadingText, loadingText, loadingText];
-        
-        setAnalysisData(prev => {
-            if (!prev.offer) return prev;
-            const currentOffer = prev.offer;
-            const updatedRows = [...currentOffer.rows, placeholderRow];
-            return {
-                ...prev,
-                offer: {
-                    ...currentOffer,
-                    rows: updatedRows,
-                }
-            };
-        });
-
-        try {
-            const ai = new GoogleGenAI({ apiKey: process.env.API_KEY });
-            // Use the specific product URL if provided, otherwise fallback to the main company URL
-            const targetUrl = productUrl && productUrl.trim() !== '' ? productUrl.trim() : inputData.companyUrl;
-            
-            const enrichPrompt = getEnrichPrompt(inputData.language, targetUrl, productName);
-
-            const response = await ai.models.generateContent({
-                model: 'gemini-2.5-flash',
-                contents: enrichPrompt
-            });
-            const text = response.text || '';
-            const enrichedParts = text.split('|||').map(s => s.trim());
-            
-            const finalEnrichedData = Array(4).fill('');
-            for (let i = 0; i < 4; i++) {
-                if (enrichedParts[i]) {
-                    finalEnrichedData[i] = enrichedParts[i];
-                }
-            }
-
-            const newRow = [productName, ...finalEnrichedData];
-
-            setAnalysisData(prev => {
-                const offerData = prev.offer;
-                if (!offerData) return prev;
-                const finalRows = offerData.rows.map(row => 
-                    row[0] === productName && row[1] === loadingText ? newRow : row
-                );
-                return { ...prev, offer: { ...offerData, rows: finalRows } };
-            });
-
-        } catch (e) {
-            console.error(e);
-            // On error, clear loading text so user can edit manually
-             setAnalysisData(prev => {
-                 const offerData = prev.offer;
-                 if (!offerData) return prev;
-                 const emptyRow = [productName, '', '', '', ''];
-                 const finalRows = offerData.rows.map(row => 
-                    row[0] === productName && row[1] === loadingText ? emptyRow : row
-                 );
-                 return { ...prev, offer: { ...offerData, rows: finalRows } };
-            });
-        } finally {
-            setIsEnriching(false);
-        }
-    }, [analysisData.offer, inputData.companyUrl, inputData.language, t.loadingButton]);
-
    const handleDownloadMarkdown = () => {
        if (!analysisData) return;
        const markdownContent = generateMarkdown(analysisData as AnalysisData, STEP_TITLES, t.summaryTitle);
@@ -251,9 +143,9 @@ Antworte *nur* mit dem Text für die 4 Spalten, getrennt durch '|||'. Gib keine
                headers={step.headers}
                rows={step.rows}
                onDataChange={(newRows) => handleDataChange(stepKey, { ...step, rows: newRows })}
-                canAddRows={stepKey === 'offer'}
-                onEnrichRow={stepKey === 'offer' ? handleEnrichOfferRow : undefined}
-                isEnriching={isEnriching}
+                canAddRows={false} // Disabled enrich functionality
+                onEnrichRow={undefined}
+                isEnriching={false}
                canDeleteRows={canDelete}
                t={t}
            />
@@ -359,7 +251,7 @@ Antworte *nur* mit dem Text für die 4 Spalten, getrennt durch '|||'. Gib keine
                                        {t.analysisCompleteText.replace('{{otherLanguage}}', t.otherLanguage)}
                                    </p>
                                    
-                                    <div className="flex flex-col sm:flex-row justify-center items-center gap-4">
+                                    <div className="flex flex-col sm:flex-row justify-center items-center gap-4 mb-6">
                                        <button
                                            onClick={handleDownloadMarkdown}
                                            className="w-full sm:w-auto flex items-center justify-center px-6 py-3 border border-transparent text-base font-medium rounded-lg text-white bg-slate-900 dark:bg-sky-600 hover:bg-slate-800 dark:hover:bg-sky-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-slate-500 shadow-md transition-all"
@@ -375,6 +267,16 @@ Antworte *nur* mit dem Text für die 4 Spalten, getrennt durch '|||'. Gib keine
                                            {t.exportAsPdf}
                                        </button>
                                    </div>
+                                    
+                                    <div className="border-t border-sky-200 dark:border-sky-700/50 pt-6 mt-2">
+                                        <p className="text-sm text-slate-500 dark:text-slate-400 mb-3">Ergebnis verfeinern?</p>
+                                        <button
+                                            onClick={() => setGenerationStep(5)}
+                                            className="text-sm text-sky-600 dark:text-sky-400 hover:text-sky-800 dark:hover:text-sky-200 font-medium underline decoration-sky-300 hover:decoration-sky-600 transition-colors"
+                                        >
+                                            Schritt 6 (Botschaften) mit Fokus auf eine Branche neu generieren
+                                        </button>
+                                    </div>
                                </div>
                            )}
                        </>
--- a/b2b-marketing-assistant/Dockerfile
+++ b/b2b-marketing-assistant/Dockerfile
@@ -0,0 +1,55 @@
+# Stage 1: Build the React frontend
+FROM node:20-slim AS frontend-builder
+
+WORKDIR /app
+
+# Copy package.json and install all dependencies
+# Paths are relative to the build context (project root)
+COPY b2b-marketing-assistant/package.json ./
+RUN npm install
+
+# Copy the rest of the frontend application code
+COPY b2b-marketing-assistant/ .
+
+# Build the application
+RUN npm run build
+
+# ---
+
+# Stage 2: Final application image
+FROM python:3.11-slim
+
+WORKDIR /app
+
+# Install system dependencies (minimal)
+# We use NodeSource to get a clean, modern Node.js install without bloat
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends curl ca-certificates && \
+    curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
+    apt-get install -y --no-install-recommends nodejs && \
+    rm -rf /var/lib/apt/lists/*
+
+# Install Python dependencies
+COPY b2b-marketing-assistant/requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the Node.js server and its production dependencies manifest
+COPY b2b-marketing-assistant/server.cjs .
+COPY b2b-marketing-assistant/package.json .
+
+# Install only production dependencies for the Node.js server
+RUN npm install --omit=dev
+
+# Copy the built React app from the builder stage
+COPY --from=frontend-builder /app/dist ./dist
+
+# Copy the main Python orchestrator script from the project root
+COPY b2b_marketing_orchestrator.py .
+# Copy Gemini API Key file if it exists in root
+COPY gemini_api_key.txt .
+
+# Expose the port the Node.js server will run on
+EXPOSE 3002
+
+# The command to run the application
+CMD ["node", "server.cjs"]
--- a/b2b-marketing-assistant/components/StepDisplay.tsx
+++ b/b2b-marketing-assistant/components/StepDisplay.tsx
@@ -51,19 +51,55 @@ export const StepDisplay: React.FC<StepDisplayProps> = ({ title, summary, header
        onDataChange(newRows);
    };
    
+    const fallbackCopyTextToClipboard = (text: string) => {
+        const textArea = document.createElement("textarea");
+        textArea.value = text;
+        
+        // Ensure textarea is not visible but part of DOM
+        textArea.style.position = "fixed";
+        textArea.style.left = "-9999px";
+        textArea.style.top = "0";
+        
+        document.body.appendChild(textArea);
+        textArea.focus();
+        textArea.select();
+        
+        try {
+            document.execCommand('copy');
+            setCopySuccess(t.copySuccess);
+            setTimeout(() => setCopySuccess(''), 2000);
+        } catch (err) {
+            console.error('Fallback: Oops, unable to copy', err);
+            setCopySuccess(t.copyFailure);
+            setTimeout(() => setCopySuccess(''), 2000);
+        }
+        
+        document.body.removeChild(textArea);
+    };
+
    const handleCopyToClipboard = (text: string) => {
-        navigator.clipboard.writeText(text).catch(err => console.error('Failed to copy text: ', err));
+        if (!navigator.clipboard) {
+            fallbackCopyTextToClipboard(text);
+            return;
+        }
+        navigator.clipboard.writeText(text).catch(err => {
+            console.error('Failed to copy text: ', err);
+            fallbackCopyTextToClipboard(text);
+        });
    };

    const handleCopyTable = () => {
        const tsvString = convertArrayToTsv(headers, filteredRows);
+        if (!navigator.clipboard) {
+            fallbackCopyTextToClipboard(tsvString);
+            return;
+        }
        navigator.clipboard.writeText(tsvString).then(() => {
            setCopySuccess(t.copySuccess);
            setTimeout(() => setCopySuccess(''), 2000);
        }).catch(err => {
            console.error('Failed to copy table: ', err);
-            setCopySuccess(t.copyFailure);
-            setTimeout(() => setCopySuccess(''), 2000);
+            fallbackCopyTextToClipboard(tsvString);
        });
    };

@@ -173,7 +209,7 @@ export const StepDisplay: React.FC<StepDisplayProps> = ({ title, summary, header
                </div>
            </div>
            
-            <div className="overflow-x-auto">
+            <div className="overflow-x-auto print:overflow-visible print:block">
                <table className="w-full table-fixed text-sm text-left text-slate-500 dark:text-slate-400">
                    <thead className="text-xs text-slate-700 dark:text-slate-300 uppercase bg-slate-100 dark:bg-slate-700">
                        <tr>
--- a/b2b-marketing-assistant/constants.ts
+++ b/b2b-marketing-assistant/constants.ts
@@ -104,262 +104,3 @@ export const translations = {
    }
  }
 };
-
-
-const SYSTEM_PROMPT_DE = `
-# Systemrolle
-
-Du bist ein **B2B-Marketing-Researcher & Copywriter**. Du analysierst eine Unternehmens-URL, identifizierst Angebot, Zielgruppen, konkrete Zielrollen, deren Painpoints sowie Gains und formulierst darauf basierend eine wertschätzende, fachkundige Marketingbotschaft. **Antworte nur mit Ergebnissen, keine Gedankengänge.** Belege jede Aussage mit einer konkreten Seiten-URL der analysierten Domain. Kennzeichne Unsicherheiten explizit. Deine Antwort muss immer in der Zielsprache {{language}} sein.
-
-# Arbeitsprinzipien
-
-1.  **Quellenpriorisierung:** Produktseite → Lösungsseite → Branchen/Industrien/Referenzen → Unternehmens-/Über-uns-Seite → Blog/News (nur zur Verifikation).
-2.  **Faktenpolicy:** Nur aus der Domain der bereitgestellten URL ableiten; bei Vermutungen: als *Hypothese* kennzeichnen.
-3.  **B2B-Ton:** sachkundig, respektvoll, nicht marktschreierisch.
-4.  **Klarheit & Struktur:** Jede Stufe als Markdown-Tabelle + optionales Kurzresümee dokumentieren.
-5.  **Mindestens 4 spezifische Rollen** je Zielgruppe (nicht generisch).
-6.  **Kompakt & nützlich:** Präzise Formulierungen; keine Floskeln.
-`;
-
-const STEP_PROMPTS_DE = [
-    // Step 1: Offer
-    `# Aufgabe
-Führe **Schritt 1 – Angebot verstehen (WAS)** für das folgende Unternehmen durch.
-
-# Eingaben
-*   **Unternehmens-URL:** \`{{company_url}}\`
-*   **Zielsprache der Ausgabe:** \`{{language}}\`
-*   **Region(en) / Märkte (optional):** \`{{regions}}\`
-*   **Produkt-/Lösungsfokus (optional):** \`{{focus}}\`
-
-# Anweisungen für Schritt 1
-*   Extrahiere Produkt(e)/Leistung(en), Kernfunktionen, Differenzierung, relevante Werteversprechen.
-*   Erstelle ein kurzes Resümee (max. 4 Bulletpoints) der wichtigsten Erkenntnisse.
-*   **Output:** Tabelle mit Spalten: *Produkt/Lösung | Beschreibung (1–2 Sätze) | Kernfunktionen | Differenzierung | Primäre Quelle (URL)*.
-*   **Format-Anforderung:** Antworte NUR mit den Ergebnissen für diesen einen Schritt. Deine Antwort muss mit der Überschrift "## Schritt 1: Angebot (WAS)" beginnen und das Kurzresümee sowie die Markdown-Tabelle enthalten. Gib keine weiteren Erklärungen ab.`,
-
-    // Step 2: Target Groups
-    `# Aufgabe
-Führe nun **Schritt 2 – Zielgruppen (WER – Unternehmen)** durch.
-
-# Kontext: Validierte Ergebnisse aus vorherigen Schritten
-{{previous_steps_data}}
-
-# Anweisungen für Schritt 2
-*   Identifiziere B2B-Zielsegmente (Branchen/Unternehmensarten/Größen/Regionen) basierend auf dem gegebenen Angebot.
-*   **Output:** Tabelle: *Zielbranche/Segment | Typische Unternehmensmerkmale | Region(en) | Relevanzbeleg (URL)*.
-*   **Format-Anforderung:** Antworte NUR mit den Ergebnissen für diesen einen Schritt. Deine Antwort muss mit der Überschrift "## Schritt 2: Zielgruppen (Unternehmen)" beginnen und die Markdown-Tabelle enthalten.`,
-
-    // Step 3: Personas
-    `# Aufgabe
-Führe nun **Schritt 3 – Zielpersonen/Rollen (WER – Personen)** durch.
-
-# Kontext: Validierte Ergebnisse aus vorherigen Schritten
-{{previous_steps_data}}
-
-# Anweisungen für Schritt 3
-*   Für jede Zielbranche: mind. 4 **spezifische** Rollen mit Verantwortungsbereich und Kaufbeteiligung (E, I, D, U nach RACI-Logik). Erfinde **keine** Personen; leite Rollen logisch aus Problem-/Prozessbezug ab.
-*   **Output:** Tabelle: *Rolle (präzise) | Verantwortungsbereich | Warum relevant für Produkt | Kaufbeteiligung (E/I/D/U) | Quelle/Indiz (URL oder Referenz)*.
-*   **Format-Anforderung:** Antworte NUR mit den Ergebnissen für diesen einen Schritt. Deine Antwort muss mit der Überschrift "## Schritt 3: Zielpersonen (Rollen)" beginnen.`,
-    
-    // Step 4: Pain Points
-    `# Aufgabe
-Führe nun **Schritt 4 – Painpoints je Rolle (WARUM)** durch.
-
-# Kontext: Validierte Ergebnisse aus vorherigen Schritten
-{{previous_steps_data}}
-
-# Anweisungen für Schritt 4
-*   Formuliere pro Rolle 3–5 konkrete Painpoints (Beobachtungen, keine Features).
-*   Tagge jeden Painpoint mit einer Kategorie: **Kosten | Zeit | Risiko | Compliance | Qualität | Mitarbeiterbindung.**
-*   Füge eine **Impact-Schätzung (€, h, %)** als Hypothese hinzu.
-*   **Output:** Tabelle: *Rolle | Painpoint (konkret, messbar/operativ) | Kategorie | Auswirkung (Kosten, Risiko, Zeit) | Impact-Schätzung (€, h, %) | Dringlichkeit (hoch/mittel/niedrig) | Quelle/Indiz (URL)*.
-*   **Format-Anforderung:** Antworte NUR mit den Ergebnissen für diesen einen Schritt. Deine Antwort muss mit der Überschrift "## Schritt 4: Painpoints je Rolle" beginnen.`,
-
-    // Step 5: Gains
-    `# Aufgabe
-Führe nun **Schritt 5 – Gains & Nutzen je Rolle (WARUM wechseln)** durch.
-
-# Kontext: Validierte Ergebnisse aus vorherigen Schritten
-{{previous_steps_data}}
-
-# Anweisungen für Schritt 5
-*   Basierend auf den identifizierten Painpoints, formuliere pro Rolle 2-3 konkrete Gains (Vorteile/Nutzen).
-*   Quantifiziere den Nutzen als Hypothese (z.B. Einsparung in €, Zeitgewinn in h, Effizienzsteigerung in %).
-*   **Output:** Tabelle: *Rolle | Gain (konkreter Nutzen) | Quantifizierung (Hypothese in €, h, %) | Quelle/Indiz (URL)*.
-*   **Format-Anforderung:** Antworte NUR mit den Ergebnissen für diesen einen Schritt. Deine Antwort muss mit der Überschrift "## Schritt 5: Gains & Nutzen je Rolle" beginnen.`,
-
-    // Step 6: Messages
-    `# Aufgabe
-Führe nun **Schritt 6 – Marketingbotschaft (WIE sprechen)** durch.
-
-# Kontext: Validierte Ergebnisse aus vorherigen Schritten
-{{previous_steps_data}}
-
-# Eingaben für diesen Schritt
-*   **Gewünschte Kanäle für die Botschaft:** \`{{channels}}\`
-
-# Anweisungen für Schritt 6: Chain-of-Thought-Analyse & Texterstellung
-
-**Dein Ziel ist es, für JEDE Zielbranche aus Schritt 2 eine EIGENE, spezifische Botschaft für JEDE Rolle aus Schritt 3 zu erstellen. Das Ergebnis MUSS eine vollständige Matrix sein (z.B. 3 Zielbranchen x 4 Rollen = 12 einzigartige Botschaften).**
-
-Führe für jede Kombination aus **[Zielbranche/Segment]** und **[Rolle]** den folgenden Denkprozess durch, bevor du die finale Botschaft formulierst:
-
-1.  **Schritt 6.1 (Analyse): Produkt-Rollen-Fit.**
-    *   Welches Produkt/welche Lösung aus der "Angebot"-Tabelle (Schritt 1) ist für die **[Rolle]** am relevantesten?
-    *   *Beispiel-Gedanke:* "Für den Einsatzleiter im Kundenservice ist die Software zur Einsatzplanung relevanter als die mobile App, da er für die Disposition zuständig ist."
-
-2.  **Schritt 6.2 (Analyse): Branchen-Use-Case.**
-    *   Was sind 1-2 typische Anwendungsfälle für das ausgewählte Produkt in der **[Zielbranche/Segment]**? Was macht die **[Rolle]** damit konkret?
-    *   *Beispiel-Gedanke:* "Ein Servicetechniker im Maschinenbau nutzt die mobile App typischerweise, um auf Wartungsprotokolle zuzugreifen und digitale Serviceberichte direkt beim Kunden zu erstellen."
-
-3.  **Schritt 6.3 (Analyse): Nutzen-Quantifizierung.**
-    *   Betrachte die Painpoints (Schritt 4) und Gains (Schritt 5) für die **[Rolle]**. Welcher Painpoint ist am dringendsten, welcher Gain am überzeugendsten?
-    *   Leite daraus einen konkreten, für die **[Rolle]** relevanten KPI ab (z.B. First-Contact-Resolution-Rate, Zeit-bis-zur-Lösung, Anlagenausfallzeit).
-    *   Formuliere den quantifizierbaren Nutzen aus Schritt 5 in Bezug auf diesen KPI.
-    *   *Beispiel-Gedanke:* "Der Painpoint des Leiters Instandhaltung ist 'ungeplante Anlagenausfälle'. Der Gain ist 'Reduzierung der Ausfallzeiten um 15%'. Der relevante KPI ist die 'Overall Equipment Effectiveness (OEE)'. Der Nutzen ist die Steigerung der OEE durch vorausschauende Wartungsplanung."
-
-4.  **Schritt 6.4 (Synthese): Botschaft formulieren.**
-    *   Synthetisiere die Erkenntnisse aus 6.1-6.3 zu einer prägnanten Kernbotschaft (2-3 Sätze) nach der Struktur: **Beobachtung (Problem) → Niedrigschwellige Lösungsidee → Produkt-Brücke → Quantifizierter Nutzen.**
-    *   Erstelle Varianten dieser Botschaft für die Kanäle: {{channels}}.
-
-# Output-Format
-Erstelle NUR die finale Markdown-Tabelle. Der Denkprozess (6.1-6.3) ist eine interne Anweisung an dich und soll NICHT im Output erscheinen.
-*   **Tabelle-Spalten:** *Zielbranche/Segment | Rolle | Kernbotschaft (2–3 Sätze) | {{channels}}*.
-*   **Anforderung:** Deine Antwort muss mit der Überschrift "## Schritt 6: Botschaften" beginnen und NUR die vollständige Markdown-Tabelle enthalten.`,
-];
-
-
-const SYSTEM_PROMPT_EN = `
-# System Role
-
-You are a **B2B Marketing Researcher & Copywriter**. You analyze a company URL, identify the offer, target groups, specific target roles, their pain points and gains, and based on this, you formulate an appreciative, expert marketing message. **Answer only with results, no thought processes.** Support every statement with a specific page URL from the analyzed domain. Explicitly mark uncertainties. Your response must always be in the target language {{language}}.
-
-# Working Principles
-
-1.  **Source Prioritization:** Product Page → Solutions Page → Industries/References → Company/About Us Page → Blog/News (for verification only).
-2.  **Fact Policy:** Only derive from the domain of the provided URL; for assumptions, mark them as a *hypothesis*.
-3.  **B2B Tone:** Knowledgeable, respectful, not salesy.
-4.  **Clarity & Structure:** Document each stage as a Markdown table + an optional short summary.
-5.  **At least 4 specific roles** per target group (not generic).
-6.  **Concise & Useful:** Precise wording; no clichés.
-`;
-
-const STEP_PROMPTS_EN = [
-    // Step 1: Offer
-    `# Task
-Perform **Step 1 – Understand the Offer (WHAT)** for the following company.
-
-# Inputs
-*   **Company URL:** \`{{company_url}}\`
-*   **Target Language of Output:** \`{{language}}\`
-*   **Region(s) / Markets (optional):** \`{{regions}}\`
-*   **Product/Solution Focus (optional):** \`{{focus}}\`
-
-# Instructions for Step 1
-*   Extract product(s)/service(s), core features, differentiation, and relevant value propositions.
-*   Create a short summary (max. 4 bullet points) of the key findings.
-*   **Output:** Table with columns: *Product/Solution | Description (1–2 sentences) | Core Features | Differentiation | Primary Source (URL)*.
-*   **Format Requirement:** Respond ONLY with the results for this single step. Your response must start with the heading "## Step 1: Offer (WHAT)" and include the short summary and the Markdown table. Do not provide any other explanations.`,
-
-    // Step 2: Target Groups
-    `# Task
-Now perform **Step 2 – Target Groups (WHO – Companies)**.
-
-# Context: Validated results from previous steps
-{{previous_steps_data}}
-
-# Instructions for Step 2
-*   Identify B2B target segments (industries/company types/sizes/regions) based on the given offer.
-*   **Output:** Table: *Target Industry/Segment | Typical Company Characteristics | Region(s) | Proof of Relevance (URL)*.
-*   **Format Requirement:** Respond ONLY with the results for this single step. Your response must start with the heading "## Step 2: Target Groups (Companies)" and include the Markdown table.`,
-
-    // Step 3: Personas
-    `# Task
-Now perform **Step 3 – Personas/Roles (WHO – People)**.
-
-# Context: Validated results from previous steps
-{{previous_steps_data}}
-
-# Instructions for Step 3
-*   For each target industry: at least 4 **specific** roles with their area of responsibility and involvement in purchasing (E, I, D, U based on RACI logic). Do **not** invent people; logically derive roles from problem/process context.
-*   **Output:** Table: *Role (precise) | Area of Responsibility | Why relevant for the product | Buying Involvement (E/I/D/U) | Source/Indication (URL or reference)*.
-*   **Format Requirement:** Respond ONLY with the results for this single step. Your response must start with the heading "## Step 3: Personas (Roles)".`,
-    
-    // Step 4: Pain Points
-    `# Task
-Now perform **Step 4 – Pain Points per Role (WHY)**.
-
-# Context: Validated results from previous steps
-{{previous_steps_data}}
-
-# Instructions for Step 4
-*   For each role, formulate 3–5 specific pain points (observations, not features).
-*   Tag each pain point with a category: **Cost | Time | Risk | Compliance | Quality | Employee Retention.**
-*   Add an **Impact Estimate (€, h, %)** as a hypothesis.
-*   **Output:** Table: *Role | Pain Point (specific, measurable/operational) | Category | Impact (Cost, Risk, Time) | Impact Estimate (€, h, %) | Urgency (high/medium/low) | Source/Indication (URL)*.
-*   **Format Requirement:** Respond ONLY with the results for this single step. Your response must start with the heading "## Step 4: Pain Points per Role".`,
-
-    // Step 5: Gains
-    `# Task
-Now perform **Step 5 – Gains & Benefits per Role (WHY switch)**.
-
-# Context: Validated results from previous steps
-{{previous_steps_data}}
-
-# Instructions for Step 5
-*   Based on the identified pain points, formulate 2-3 concrete gains (advantages/benefits) for each role.
-*   Quantify the benefit as a hypothesis (e.g., savings in €, time gained in h, efficiency increase in %).
-*   **Output:** Table: *Role | Gain (specific benefit) | Quantification (Hypothesis in €, h, %) | Source/Indication (URL)*.
-*   **Format Requirement:** Respond ONLY with the results for this single step. Your response must start with the heading "## Step 5: Gains & Benefits per Role".`,
-
-    // Step 6: Messages
-    `# Task
-Now perform **Step 6 – Marketing Message (HOW to speak)**.
-
-# Context: Validated results from previous steps
-{{previous_steps_data}}
-
-# Inputs for this step
-*   **Desired channels for the message:** \`{{channels}}\`
-
-# Instructions for Step 6: Chain-of-Thought Analysis & Copywriting
-
-**Your goal is to create a SEPARATE, specific message for EACH role from Step 3 within EACH target industry from Step 2. The result MUST be a complete matrix (e.g., 3 target industries x 4 roles = 12 unique messages).**
-
-For each combination of **[Target Industry/Segment]** and **[Role]**, perform the following thought process before writing the final message:
-
-1.  **Step 6.1 (Analysis): Product-Role Fit.**
-    *   Which product/solution from the "Offer" table (Step 1) is most relevant to the **[Role]**?
-    *   *Example thought:* "For the Customer Service Manager, the scheduling software is more relevant than the mobile app because they are responsible for dispatching."
-
-2.  **Step 6.2 (Analysis): Industry Use Case.**
-    *   What are 1-2 typical use cases for the selected product in the **[Target Industry/Segment]**? What does the **[Role]** specifically do with it?
-    *   *Example thought:* "A service technician in mechanical engineering typically uses the mobile app to access maintenance logs and create digital service reports directly on-site with the customer."
-
-3.  **Step 6.3 (Analysis): Benefit Quantification.**
-    *   Look at the pain points (Step 4) and gains (Step 5) for the **[Role]**. Which pain point is most urgent, which gain most convincing?
-    *   Derive a concrete KPI relevant to the **[Role]** (e.g., First-Contact Resolution Rate, Time-to-Resolution, plant downtime).
-    *   Formulate the quantifiable benefit from Step 5 in relation to this KPI.
-    *   *Example thought:* "The Maintenance Manager's pain point is 'unplanned plant downtime'. The gain is 'reducing downtime by 15%'. The relevant KPI is 'Overall Equipment Effectiveness (OEE)'. The benefit is increasing OEE through predictive maintenance planning."
-
-4.  **Step 6.4 (Synthesis): Formulate Message.**
-    *   Synthesize the findings from 6.1-6.3 into a concise core message (2-3 sentences) following the structure: **Observation (Problem) → Low-threshold solution idea → Product bridge → Quantified benefit.**
-    *   Create variations of this message for the channels: {{channels}}.
-
-# Output Format
-Create ONLY the final Markdown table. The thought process (6.1-6.3) is an internal instruction for you and should NOT appear in the output.
-*   **Table Columns:** *Target Industry/Segment | Role | Core Message (2–3 sentences) | {{channels}}*.
-*   **Requirement:** Your response must start with the heading "## Step 6: Messages" and contain ONLY the complete Markdown table.`,
-];
-
-export const PROMPTS = {
-  de: {
-    SYSTEM_PROMPT: SYSTEM_PROMPT_DE,
-    STEP_PROMPTS: STEP_PROMPTS_DE,
-  },
-  en: {
-    SYSTEM_PROMPT: SYSTEM_PROMPT_EN,
-    STEP_PROMPTS: STEP_PROMPTS_EN,
-  }
-};
--- a/b2b-marketing-assistant/index.html
+++ b/b2b-marketing-assistant/index.html
@@ -18,35 +18,77 @@
 </script>
    <style>
      @media print {
+        @page {
+            size: landscape;
+            margin: 1cm;
+        }
        body {
          -webkit-print-color-adjust: exact;
          print-color-adjust: exact;
          color-adjust: exact;
-          background-color: #fff;
-          color: #000;
+          background-color: #fff !important;
+          color: #000 !important;
+          font-size: 11px;
        }
        main {
-          padding-top: 1rem !important;
-          padding-bottom: 1rem !important;
+          padding: 0 !important;
+          margin: 0 !important;
+          width: 100% !important;
+          max-width: none !important;
        }
        section {
          page-break-inside: avoid;
+          margin-bottom: 2rem;
+          border: none !important;
+          box-shadow: none !important;
+          padding: 0 !important;
+        }
+        h2 {
+            font-size: 16px !important;
+            margin-bottom: 10px !important;
+            color: #000 !important;
+        }
+        /* Break scroll containers */
+        .overflow-x-auto {
+            overflow: visible !important;
+            display: block !important;
+            width: 100% !important;
        }
        table {
          width: 100% !important;
-          table-layout: fixed;
+          border-collapse: collapse !important;
+          table-layout: auto !important; /* Allow columns to adapt */
        }
-        td, th {
-          word-wrap: break-word;
+        th, td {
+          border: 1px solid #ccc !important;
+          padding: 4px 6px !important;
+          word-wrap: break-word !important;
+          white-space: normal !important;
+          page-break-inside: avoid;
        }
+        th {
+            background-color: #f0f0f0 !important;
+            color: #000 !important;
+        }
+        /* Style textareas to look like text */
        textarea {
          border: none !important;
          resize: none !important;
          box-shadow: none !important;
-          -webkit-box-shadow: none !important;
+          padding: 0 !important;
+          min-height: auto !important;
+          height: auto !important;
+          overflow: visible !important;
+          white-space: pre-wrap !important;
+          font-size: 11px !important;
        }
-        .overflow-x-auto {
-            overflow-x: visible !important;
+        /* Hide UI elements */
+        button, .print\:hidden, input[type="text"] {
+            display: none !important;
+        }
+        /* Hide scrollbars */
+        ::-webkit-scrollbar {
+            display: none;
        }
      }
    </style>
--- a/b2b-marketing-assistant/package-lock.json
+++ b/b2b-marketing-assistant/package-lock.json
--- a/b2b-marketing-assistant/package.json
+++ b/b2b-marketing-assistant/package.json
@@ -6,17 +6,24 @@
  "scripts": {
    "dev": "vite",
    "build": "vite build",
-    "preview": "vite preview"
+    "preview": "vite preview",
+    "start-backend": "node server.cjs"
  },
  "dependencies": {
-    "react-dom": "^19.2.0",
-    "@google/genai": "^1.28.0",
-    "react": "^19.2.0"
+    "body-parser": "^1.20.2",
+    "cors": "^2.8.5",
+    "express": "^4.18.2",
+    "react": "^18.2.0",
+    "react-dom": "^18.2.0"
  },
  "devDependencies": {
-    "@types/node": "^22.14.0",
-    "@vitejs/plugin-react": "^5.0.0",
-    "typescript": "~5.8.2",
-    "vite": "^6.2.0"
+    "@types/cors": "^2.8.17",
+    "@types/express": "^4.17.21",
+    "@types/node": "^20.10.4",
+    "@types/react": "^18.2.43",
+    "@types/react-dom": "^18.2.17",
+    "@vitejs/plugin-react": "^4.2.1",
+    "typescript": "^5.2.2",
+    "vite": "^5.0.8"
  }
 }
--- a/b2b-marketing-assistant/requirements.txt
+++ b/b2b-marketing-assistant/requirements.txt
@@ -0,0 +1,5 @@
+google-generativeai
+requests
+beautifulsoup4
+lxml
+python-dotenv
--- a/b2b-marketing-assistant/server.cjs
+++ b/b2b-marketing-assistant/server.cjs
@@ -0,0 +1,175 @@
+const express = require('express');
+const { spawn } = require('child_process');
+const bodyParser = require('body-parser');
+const cors = require('cors');
+const fs = require('fs');
+const path = require('path');
+
+const app = express();
+// Port 3002, um Konflikte mit dem Market Intelligence Tool (3001) und dem React Dev Server (3000) zu vermeiden
+const PORT = 3002; 
+
+// Middleware
+app.use(cors());
+app.use(bodyParser.json({ limit: '10mb' })); // Erhöhe das Limit für potenziell große Payloads
+
+const PYTHON_EXECUTABLE = 'python3'; // Annahme, dass python3 im PATH des Containers ist
+// Im Docker-Container liegen server.cjs und das Python-Skript im selben Verzeichnis (/app)
+const SCRIPT_PATH = path.join(__dirname, 'b2b_marketing_orchestrator.py');
+
+
+// Helper-Funktion zum Ausführen des Python-Skripts
+const runPythonScript = (args, res) => {
+    console.log(`[${new Date().toISOString()}] Spawning: ${PYTHON_EXECUTABLE} ${args.join(' ')}`);
+
+    const pythonProcess = spawn(PYTHON_EXECUTABLE, args);
+
+    let pythonOutput = '';
+    let pythonError = '';
+
+    pythonProcess.stdout.on('data', (data) => {
+        pythonOutput += data.toString();
+    });
+
+    pythonProcess.stderr.on('data', (data) => {
+        pythonError += data.toString();
+    });
+
+    pythonProcess.on('close', (code) => {
+        console.log(`[${new Date().toISOString()}] Python script finished with exit code: ${code}`);
+        
+        if (pythonError) {
+             console.log(`--- STDERR ---`);
+             console.log(pythonError);
+             console.log(`----------------`);
+        }
+
+        if (code !== 0) {
+            console.error('Python script exited with an error.');
+            return res.status(500).json({ 
+                error: 'An error occurred in the backend script.', 
+                details: pythonError 
+            });
+        }
+
+        try {
+            const result = JSON.parse(pythonOutput);
+            res.json(result);
+        } catch (parseError) {
+            console.error('Failed to parse Python output as JSON:', parseError);
+            res.status(500).json({ 
+                error: 'Invalid JSON response from the backend script.', 
+                rawOutput: pythonOutput,
+                details: pythonError
+            });
+        }
+    });
+
+    pythonProcess.on('error', (err) => {
+        console.error('FATAL: Failed to start the python process itself.', err);
+        res.status(500).json({ 
+            error: 'Failed to start the backend process.', 
+            details: err.message 
+        });
+    });
+};
+
+
+// API-Endpunkt, um eine neue Analyse zu starten (Schritt 1)
+app.post('/api/start-generation', (req, res) => {
+    console.log(`[${new Date().toISOString()}] HIT: /api/start-generation`);
+    const { companyUrl, language, regions, focus } = req.body;
+
+    if (!companyUrl || !language) {
+        return res.status(400).json({ error: 'Missing required parameters: companyUrl and language.' });
+    }
+
+    const args = [
+        SCRIPT_PATH,
+        '--mode', 'start_generation',
+        '--url', companyUrl,
+        '--language', language
+    ];
+
+    if (regions) args.push('--regions', regions);
+    if (focus) args.push('--focus', focus);
+    
+    runPythonScript(args, res);
+});
+
+
+// API-Endpunkt, um den nächsten Schritt zu generieren
+app.post('/api/next-step', (req, res) => {
+    console.log(`[${new Date().toISOString()}] HIT: /api/next-step`);
+    const { analysisData, language, channels, generationStep } = req.body;
+
+    if (!analysisData || !language || generationStep === undefined) {
+         return res.status(400).json({ error: 'Missing required parameters: analysisData, language, generationStep.' });
+    }
+    
+    // Wir schreiben die komplexen Kontext-Daten in eine temporäre Datei, um die Kommandozeile sauber zu halten.
+    const tmpDir = path.join(__dirname, 'tmp');
+    if (!fs.existsSync(tmpDir)) {
+        fs.mkdirSync(tmpDir);
+    }
+    const contextFilePath = path.join(tmpDir, `context_${Date.now()}.json`);
+
+    try {
+        fs.writeFileSync(contextFilePath, JSON.stringify(analysisData));
+
+        const args = [
+            SCRIPT_PATH,
+            '--mode', 'next_step',
+            '--language', language,
+            '--context_file', contextFilePath,
+            '--generation_step', generationStep.toString()
+        ];
+        
+        if (channels && Array.isArray(channels)) {
+             args.push('--channels', channels.join(','));
+        }
+        
+        // Da die runPythonScript-Funktion res behandelt, fügen wir hier die Bereinigung hinzu
+        const originalJson = res.json.bind(res);
+        res.json = (data) => {
+            if (fs.existsSync(contextFilePath)) {
+                fs.unlinkSync(contextFilePath);
+            }
+            originalJson(data);
+        };
+       
+       const originalStatus = res.status.bind(res);
+       res.status = (code) => {
+           // Wenn ein Fehler auftritt, rufen wir send auf, um die Bereinigung auszulösen
+           const originalSend = res.send.bind(res);
+           res.send = (body) => {
+                 if (fs.existsSync(contextFilePath)) {
+                    fs.unlinkSync(contextFilePath);
+                }
+                originalSend(body);
+           }
+           return originalStatus(code);
+       }
+
+
+        runPythonScript(args, res);
+
+    } catch (error) {
+        console.error('Failed to write temporary context file:', error);
+        return res.status(500).json({ error: 'Failed to process request context.', details: error.message });
+    }
+});
+
+// --- SERVE STATIC FRONTEND ---
+// Serve static files from the 'dist' directory created by `npm run build`
+app.use(express.static(path.join(__dirname, 'dist')));
+
+// Handle client-side routing: return index.html for all non-API routes
+app.get('*', (req, res) => {
+    res.sendFile(path.join(__dirname, 'dist', 'index.html'));
+});
+
+// Start des Servers
+app.listen(PORT, () => {
+    console.log(`B2B Marketing Assistant API Bridge running on http://localhost:${PORT}`);
+});
--- a/b2b-marketing-assistant/services/parser.ts
+++ b/b2b-marketing-assistant/services/parser.ts
@@ -1,129 +0,0 @@
-import type { AnalysisData, AnalysisStep } from '../types';
-
-function parseTable(tableLines: string[]): { headers: string[], rows:string[][] } {
-    if (tableLines.length < 2) return { headers: [], rows: [] };
-
-    // Removes leading/trailing markdown asterisks (for bold/italic) from a string.
-    const cleanMarkdownFormatting = (text: string) => {
-        return text.trim().replace(/^\*+|\*+$/g, '');
-    };
-
-    const headers = tableLines[0]
-        .split('|')
-        .map(h => cleanMarkdownFormatting(h))
-        .filter(h => h);
-        
-    const rows = tableLines.slice(2) // Skip header and separator line
-        .map(rowLine => {
-            const cells = rowLine
-                .split('|')
-                .map(c => cleanMarkdownFormatting(c))
-                .filter((c, i, arr) => i > 0 && i < arr.length - 1); // remove first and last empty string from split
-            return cells;
-        })
-        .filter(row => {
-            if (row.length === 0 || !row.some(cell => cell.trim() !== '')) return false;
-            // Filter out any row that looks like a markdown separator, e.g. |:---|:---|
-            // This checks if all cells are composed of dashes and optional colons
-            const isSeparator = row.every(cell => /^:?-+:?$/.test(cell.trim()));
-            return !isSeparator;
-        });
-
-    return { headers, rows };
-}
-
-function parseSection(sectionText: string): AnalysisStep {
-    const lines = sectionText.split('\n').filter(line => line.trim() !== '');
-    let summary: string[] = [];
-    let tableStartIndex = -1;
-    
-    const summaryMatch = sectionText.match(/\*\*(?:Kurzresümee|Summary).*?:\*\*\s*([\s\S]*?)(\n\n|\| ---|## (?:Schritt|Step))/i);
-    if(summaryMatch && summaryMatch[1]) {
-        summary = summaryMatch[1].split('\n').map(s => s.replace(/^[*\-]\s*/, '').trim()).filter(Boolean);
-    }
-    
-    for (let i = 0; i < lines.length; i++) {
-        if (lines[i].includes('|') && lines[i+1]?.includes('---')) {
-            tableStartIndex = i;
-            break;
-        }
-    }
-
-    if (tableStartIndex === -1) {
-        return { headers: [], rows: [], summary };
-    }
-
-    const tableLines = lines.slice(tableStartIndex);
-    const { headers, rows } = parseTable(tableLines);
-    
-    // In step 6, headers can be dynamic based on user channel selection. Let's fix them.
-    if (sectionText.match(/## (?:Schritt|Step) 6:/i)) {
-         const foundChannels = headers.slice(3); // Channels start after Zielbranche, Rolle, Kernbotschaft
-         if(foundChannels.length > 0) {
-            const defaultHeaders = headers.slice(0, 3);
-            const dynamicHeaders = foundChannels.join(', ');
-            headers.splice(3, foundChannels.length, dynamicHeaders);
-         }
-    }
-
-
-    return { headers, rows, summary };
-}
-
-/**
- * Fills empty cells in a specified column by carrying down the last non-empty value from preceding rows.
- * This is useful for tables where grouping values are not repeated for each row.
- * @param step The analysis step object containing headers and rows.
- * @param columnNames An array of possible column names (e.g., ['Rolle', 'Role']) to fill down.
- * @returns The analysis step with the specified column filled.
- */
-const fillDownColumn = (step: AnalysisStep, columnNames: string[]): AnalysisStep => {
-    const columnIndex = step.headers.findIndex(h => 
-        columnNames.some(name => h.toLowerCase().trim().startsWith(name.toLowerCase().trim()))
-    );
-    
-    if (columnIndex === -1 || !step.rows || step.rows.length === 0) {
-        return step;
-    }
-
-    let lastSeenValue = '';
-    const filledRows = step.rows.map(row => {
-        const newRow = [...row];
-        if (newRow.length > columnIndex) {
-            const currentCell = newRow[columnIndex] || ''; // handle undefined
-            if (currentCell.trim() !== '') {
-                lastSeenValue = currentCell;
-            } else {
-                newRow[columnIndex] = lastSeenValue;
-            }
-        }
-        return newRow;
-    });
-
-    return { ...step, rows: filledRows };
-};
-
-
-export const parseGeminiStepResponse = (markdown: string): Partial<AnalysisData> => {
-    const stepMatch = markdown.match(/## (?:Schritt|Step) (\d+):/i);
-    if (!stepMatch) return {};
-
-    const stepNumber = parseInt(stepMatch[1], 10);
-    const stepKeys: (keyof AnalysisData)[] = ['offer', 'targetGroups', 'personas', 'painPoints', 'gains', 'messages'];
-    const currentStepKey = stepKeys[stepNumber - 1];
-    
-    if (!currentStepKey) return {};
-
-    let stepData = parseSection(markdown);
-
-    // Post-processing to fill down empty cells in key columns for better filtering and readability.
-    if (['personas', 'painPoints', 'gains'].includes(currentStepKey)) {
-        stepData = fillDownColumn(stepData, ['Rolle', 'Role']);
-    }
-    if (currentStepKey === 'messages') {
-        stepData = fillDownColumn(stepData, ['Zielbranche/Segment', 'Target Industry/Segment']);
-        stepData = fillDownColumn(stepData, ['Rolle', 'Role']);
-    }
-
-    return { [currentStepKey]: stepData };
-};
--- a/b2b_marketing_assistant_plan.md
+++ b/b2b_marketing_assistant_plan.md
@@ -0,0 +1,102 @@
+# Plan: Umsetzung des "B2B Marketing Assistant" Backends
+
+Dieses Dokument beschreibt den Plan zur Umsetzung der Backend-Logik für die React-Anwendung unter `/b2b-marketing-assistant` als robusten, faktenbasierten Python-Service. Das primäre Ziel ist es, die Konsistenz und Zuverlässigkeit der Analyseergebnisse durch "Grounding" (Verankerung in realen Daten) signifikant zu erhöhen.
+
+## 1. Zielsetzung & Architektur
+
+-   **Ziel:** Transformation der reinen Frontend-Anwendung in einen Service mit einem Python-Backend, das vor jeder KI-Analyse eine solide Faktenbasis durch Web-Scraping schafft. Dadurch werden die Ergebnisse reproduzierbar und basieren auf den tatsächlichen Inhalten der Unternehmens-Website.
+-   **Architektur:** Wir replizieren den bewährten Aufbau des "Market Intelligence" Tools:
+    1.  **React-Frontend:** Die Benutzeroberfläche in `/b2b-marketing-assistant` bleibt bestehen, wird aber von direkten KI-Aufrufen befreit.
+    2.  **Node.js API-Brücke (`server.cjs`):** Ein minimaler Express.js-Server, der Anfragen vom Frontend annimmt und an das Python-Backend weiterleitet.
+    3.  **Python-Orchestrator (`b2b_marketing_orchestrator.py`):** Das neue Herzstück, das die gesamte Logik kapselt.
+
+## 2. Kernprozess mit "Grounding"
+
+Der 6-stufige Prozess der App wird im Backend abgebildet, wobei die ersten Schritte fundamental geändert werden:
+
+1.  **Schritt 1 (Angebot) & 2 (Zielgruppen):**
+    *   **Intelligentes Scraping:** Das Python-Skript crawlt die gegebene URL und sucht aktiv nach Unterseiten wie "Produkte", "Lösungen", "Branchen" etc.
+    *   **Text-Extraktion:** Der relevante Inhalt dieser Seiten wird extrahiert und zu einem "Grounding-Dokument" zusammengefasst.
+    *   **KI als Extraktions-Engine:** Die KI wird angewiesen, **ausschließlich auf Basis dieses extrahierten Textes** das Angebot und die Zielgruppen zu identifizieren und zu strukturieren. Halluzinationen werden so unterbunden.
+
+2.  **Schritt 3-6 (Personas, Pains, Gains, Messages):**
+    *   Diese Schritte bauen auf den validierten, faktenbasierten Ergebnissen aus Schritt 1 & 2 auf. Die gesamte Logikkette wird dadurch stabiler und konsistenter.
+
+## 3. Strategische Vision: Integration der Tools
+
+Dieses Projekt ist der erste Schritt zur Schaffung eines einheitlichen "Strategy & Audit"-Workflows.
+
+-   **Phase 1 (Aktuelles Projekt):** Wir bauen den "B2B Marketing Assistant" als eigenständigen Service mit einem modularen Python-Backend.
+-   **Phase 2 (Zukünftig):** Die wiederverwendbaren Python-Module (Scraping, API-Handler etc.) werden mit dem `market_intel_orchestrator.py` zu einem einzigen, leistungsfähigen Backend verschmolzen. Der Workflow wäre dann nahtlos:
+    1.  **Strategie definieren:** Mit dem B2B Marketing Assistant eine Tiefenanalyse eines Referenzkunden durchführen.
+    2.  **Markt auditieren:** Die erstellte Strategie direkt nutzen, um Lookalikes zu finden und zu bewerten.
+
+## 4. Fortschritts-Log
+
+### Phase 1: Initialisierung & Planung
+
+- [x] Anforderungsanalyse und Zieldefinition (Grounding, Konsistenz).
+
+- [x] Architektur nach Vorbild `market-intel-backend` festgelegt.
+
+- [x] Diesen Schlachtplan in `b2b_marketing_assistant_plan.md` erstellt.
+
+- [x] Aufbau der Grundstruktur: Erstellen der `b2b_marketing_orchestrator.py`, der `server.cjs` in `/b2b-marketing-assistant` und des `Dockerfile`.
+
+- [x] Erstellung von `package.json` und `requirements.txt`.
+
+- [x] Anpassung des Frontends (`App.tsx`) für die Kommunikation mit dem neuen Backend.
+
+- [x] Entfernen von Frontend-Dateien und -Inhalten, die nicht mehr benötigt werden (`parser.ts`, Prompts aus `constants.ts`).
+
+- [x] Implementierung der `start_generation`-Logik im Python-Backend (Scraping, Grounding, initialer Gemini-Aufruf für Schritt 1).
+
+- [x] Implementierung der `next_step`-Logik im Python-Backend (mehrstufige Gemini-Aufrufe für Schritte 2-6, Kontext-Management).
+
+- [x] Fehlerbehebung: Alle Python-Syntaxfehler (Encoding, Strings) behoben.
+
+- [x] Validierung: Das Tool lädt das Frontend und führt das Web-Scraping erfolgreich durch.
+
+- [x] **API-Fix:** Umstellung auf Gemini v1 API und Modell `gemini-2.5-flash` (1M Token Context).
+
+
+
+### Phase 2: Validierung & Optimierung (Abgeschlossen)
+
+- [x] Docker-Container gebaut und gestartet.
+
+- [x] Zugriff auf die UI über Port 3004 erfolgreich.
+
+- [x] **Grounding Upgrade:** Umstellung von Plain-Text auf "Sanitized HTML" (H1-H6, Links erhalten) für präzise Produkterkennung.
+
+- [x] **Kontext-Erweiterung:** Entfernung des 30.000 Zeichen Limits für vollständige Website-Analyse.
+
+- [x] **Robustheit:** Implementierung von Retry-Logik (Exponential Backoff) und Timeout-Erhöhung (600s) für komplexe Analysen.
+
+- [x] **Frontend Fixes:**
+
+    - [x] Robuster "Copy Table" Button (Fallback für Non-HTTPS).
+
+    - [x] PDF-Export optimiert (Landscape, keine Scrollbalken).
+
+    - [x] "Schritt 6 Wiederholen"-Funktion eingebaut.
+
+- [x] **Prozess-Optimierung:** Schritt 6 fokussiert nun automatisch auf die Top-Branche, um Token-Limits und Lesezeit zu optimieren.
+
+- [x] **Logging:** Detailliertes File-Logging (`Log_from_docker`) für Prompts und Antworten implementiert.
+
+
+
+## 5. Status: Produktionsbereit
+
+
+
+Das System liefert nun hochqualitative, faktenbasierte Analysen ("Grounding"), die weit über die ursprüngliche Online-Version hinausgehen. Alle bekannten Fehler (Timeouts, API 404, Copy-Paste) sind behoben.
+
+
+
+### Nächste Schritte (Optional)
+
+-   Erweiterung auf Multi-Language Support im Frontend (aktuell DE fokussiert).
+
+-   Integration von SerpAPI für noch breitere Marktrecherchen (analog Market Intel).
--- a/b2b_marketing_orchestrator.py
+++ b/b2b_marketing_orchestrator.py
@@ -0,0 +1,559 @@
+# -*- coding: utf-8 -*-
+import argparse
+import json
+import sys
+import logging
+import os
+import re
+from urllib.parse import urljoin, urlparse
+
+import requests
+import time
+from bs4 import BeautifulSoup
+from datetime import datetime
+
+# Logging Setup
+log_dir = "Log_from_docker"
+if not os.path.exists(log_dir):
+    os.makedirs(log_dir)
+
+timestamp = datetime.now().strftime("%Y-%m-%d")
+log_file = os.path.join(log_dir, f"{timestamp}_b2b_marketing.log")
+
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler(log_file, mode='a', encoding='utf-8'),
+        logging.StreamHandler(sys.stderr)
+    ]
+)
+
+def save_detailed_log(step_name, content_type, content):
+    """Saves detailed logs (prompts, raw responses) to separate files for inspection."""
+    try:
+        ts = datetime.now().strftime("%H-%M-%S")
+        filename = f"{ts}_{step_name}_{content_type}.txt"
+        filepath = os.path.join(log_dir, filename)
+        with open(filepath, "w", encoding="utf-8") as f:
+            f.write(content)
+        logging.info(f"Detailed log saved: {filepath}")
+    except Exception as e:
+        logging.error(f"Failed to save detailed log: {e}")
+
+LINK_KEYWORDS = ['product', 'solution', 'service', 'industrie', 'branche', 'anwendung', 'produkt', 'loesung', 'dienstleistung', 'portfolio', 'angebot']
+
+# --- PROMPTS (Single line strings with explicit newlines for maximum compatibility) ---
+
+SYSTEM_PROMPT_DE = """# Systemrolle
+
+Du bist ein **B2B-Marketing-Researcher & Copywriter**. Du analysierst eine Unternehmens-URL, identifizierst Angebot, Zielgruppen, konkrete Zielrollen, deren Painpoints sowie Gains und formulierst darauf basierend eine wertschaetzende, fachkundige Marketingbotschaft. **Antworte nur mit Ergebnissen, keine Gedankengaenge.** Belege jede Aussage mit einer konkreten Seiten-URL der analysierten Domain. Kennzeichne Unsicherheiten explizit. Deine Antwort muss immer in der Zielsprache {{language}} sein.
+
+# Arbeitsprinzipien
+
+1.  **Quellenpriorisierung:** Produktseite -> Loesungsseite -> Branchen/Industrien/Referenzen -> Unternehmens-/Ueber-uns-Seite -> Blog/News (nur zur Verifikation).
+2.  **Faktenpolicy:** Nur aus der Domain der bereitgestellten URL ableiten; bei Vermutungen: als *Hypothese* kennzeichnen.
+3.  **B2B-Ton:** sachkundig, respektvoll, nicht marktschreierisch.
+4.  **Klarheit & Struktur:** Jede Stufe als Markdown-Tabelle + optionales Kurzresuemee dokumentieren.
+5.  **Mindestens 4 spezifische Rollen** je Zielgruppe (nicht generisch).
+6.  **Kompakt & nuetzlich:** Praezise Formulierungen; keine Floskeln."""
+
+STEP_PROMPTS_DE = [
+    """# Aufgabe
+Fuehre **Schritt 1 - Angebot verstehen (WAS)** fuer das folgende Unternehmen durch.
+
+# Eingaben
+*   **Unternehmens-URL:** `{{company_url}}`
+*   **Zielsprache der Ausgabe:** `{{language}}`
+*   **Region(en) / Maerkte (optional):** `{{regions}}`
+*   **Produkt-/Loesungsfokus (optional):** `{{focus}}`
+
+# Anweisungen fuer Schritt 1
+*   Extrahiere Produkt(e)/Leistung(en), Kernfunktionen, Differenzierung, relevante Werteversprechen.
+*   Erstelle ein kurzes Resuemee (max. 4 Bulletpoints) der wichtigsten Erkenntnisse.
+*   **Output:** Tabelle mit Spalten: *Produkt/Loesung | Beschreibung (1-2 Saetze) | Kernfunktionen | Differenzierung | Primaere Quelle (URL)*.
+*   **Format-Anforderung:** Antworte NUR mit den Ergebnissen fuer diesen einen Schritt. Deine Antwort muss mit der Ueberschrift \"## Schritt 1: Angebot (WAS)\" beginnen und das Kurzresuemee sowie die Markdown-Tabelle enthalten. Gib keine weiteren Erklaerungen ab.""",
+    """# Aufgabe
+Fuehre nun **Schritt 2 - Zielgruppen (WER - Unternehmen)** durch.
+
+# Kontext: Validierte Ergebnisse aus vorherigen Schritten
+{{previous_steps_data}}
+
+# Anweisungen fuer Schritt 2
+*   Identifiziere B2B-Zielsegmente (Branchen/Unternehmensarten/Groessen/Regionen) basierend auf dem gegebenen Angebot.
+*   **Output:** Tabelle: *Zielbranche/Segment | Typische Unternehmensmerkmale | Region(en) | Relevanzbeleg (URL)*.
+*   **Format-Anforderung:** Antworte NUR mit den Ergebnissen fuer diesen einen Schritt. Deine Antwort muss mit der Ueberschrift \"## Schritt 2: Zielgruppen (Unternehmen)\" beginnen und die Markdown-Tabelle enthalten.""",
+    """# Aufgabe
+Fuehre nun **Schritt 3 - Zielpersonen/Rollen (WER - Personen)** durch.
+
+# Kontext: Validierte Ergebnisse aus vorherigen Schritten
+{{previous_steps_data}}
+
+# Anweisungen fuer Schritt 3
+*   Fuer jede Zielbranche: mind. 4 **spezifische** Rollen mit Verantwortungsbereich und Kaufbeteiligung (E, I, D, U nach RACI-Logik). Erfinde **keine** Personen; leite Rollen logisch aus Problem-/Prozessbezug ab.
+*   **Output:** Tabelle: *Rolle (praezise) | Verantwortungsbereich | Warum relevant fuer Produkt | Kaufbeteiligung (E/I/D/U) | Quelle/Indiz (URL oder Referenz)*.
+*   **Format-Anforderung:** Antworte NUR mit den Ergebnissen fuer diesen einen Schritt. Deine Antwort muss mit der Ueberschrift \"## Schritt 3: Zielpersonen (Rollen)\" beginnen.""",
+    """# Aufgabe
+Fuehre nun **Schritt 4 - Painpoints je Rolle (WARUM)** durch.
+
+# Kontext: Validierte Ergebnisse aus vorherigen Schritten
+{{previous_steps_data}}
+
+# Anweisungen fuer Schritt 4
+*   Formuliere pro Rolle 3-5 konkrete Painpoints (Beobachtungen, keine Features).
+*   Tagge jeden Painpoint mit einer Kategorie: **Kosten | Zeit | Risiko | Compliance | Qualitaet | Mitarbeiterbindung.**
+*   Fuege eine **Impact-Schaetzung (EUR, h, %)** als Hypothese hinzu.
+*   **Output:** Tabelle: *Rolle | Painpoint (konkret, messbar/operativ) | Kategorie | Auswirkung (Kosten, Risiko, Zeit) | Impact-Schaetzung (EUR, h, %) | Dringlichkeit (hoch/mittel/niedrig) | Quelle/Indiz (URL)*.
+*   **Format-Anforderung:** Antworte NUR mit den Ergebnissen fuer diesen einen Schritt. Deine Antwort muss mit der Ueberschrift \"## Schritt 4: Painpoints je Rolle\" beginnen.""",
+    """# Aufgabe
+Fuehre nun **Schritt 5 - Gains & Nutzen je Rolle (WARUM wechseln)** durch.
+
+# Kontext: Validierte Ergebnisse aus vorherigen Schritten
+{{previous_steps_data}}
+
+# Anweisungen fuer Schritt 5
+*   Basierend auf den identifizierten Painpoints, formuliere pro Rolle 2-3 konkrete Gains (Vorteile/Nutzen).
+*   Quantifiziere den Nutzen als Hypothese (z.B. Einsparung in EUR, Zeitgewinn in h, Effizienzsteigerung in %).
+*   **Output:** Tabelle: *Rolle | Gain (konkreter Nutzen) | Quantifizierung (Hypothese in EUR, h, %) | Quelle/Indiz (URL)*.
+*   **Format-Anforderung:** Antworte NUR mit den Ergebnissen fuer diesen einen Schritt. Deine Antwort muss mit der Ueberschrift \"## Schritt 5: Gains & Nutzen je Rolle\" beginnen.""",
+    """# Aufgabe
+Fuehre nun **Schritt 6 - Marketingbotschaft (WIE sprechen)** durch.
+
+# Kontext: Validierte Ergebnisse aus vorherigen Schritten
+{{previous_steps_data}}
+
+# Eingaben fuer diesen Schritt
+*   **Gewuenschte Kanaele fuer die Botschaft:** `{{channels}}` 
+
+# Anweisungen fuer Schritt 6: Chain-of-Thought-Analyse & Texterstellung
+
+**FOKUS:** Um die Analyse handhabbar zu machen, waehle aus Schritt 2 die **eine (1) relevanteste und vielversprechendste Zielbranche** (Primary Industry) aus.
+Dein Ziel ist es, NUR fuer diese EINE Fokus-Branche eine spezifische Botschaft fuer JEDE Rolle aus Schritt 3 zu erstellen.
+
+Fuehre fuer jede **[Rolle]** innerhalb der ausgewaehlten **[Fokus-Branche]** den folgenden Denkprozess durch:
+
+1.  **Schritt 6.1 (Analyse): Produkt-Rollen-Fit.**
+    *   Welches Produkt/welche Loesung aus der "Angebot"-Tabelle (Schritt 1) ist fuer die **[Rolle]** am relevantesten?
+
+2.  **Schritt 6.2 (Analyse): Branchen-Use-Case.**
+    *   Was sind 1-2 typische Anwendungsfaelle fuer das ausgewaehlte Produkt in der **[Fokus-Branche]**? Was macht die **[Rolle]** damit konkret?
+
+3.  **Schritt 6.3 (Analyse): Nutzen-Quantifizierung.**
+    *   Betrachte die Painpoints (Schritt 4) und Gains (Schritt 5) fuer die **[Rolle]**.
+    *   Leite daraus einen konkreten, fuer die **[Rolle]** relevanten KPI ab.
+
+4.  **Schritt 6.4 (Synthese): Botschaft formulieren.**
+    *   Synthetisiere die Erkenntnisse aus 6.1-6.3 zu einer praegnanten Kernbotschaft (2-3 Saetze) nach der Struktur: **Beobachtung (Problem) -> Niedrigschwellige Loesungsidee -> Produkt-Bruecke -> Quantifizierter Nutzen.**
+    *   Erstelle Varianten dieser Botschaft fuer die Kanaele: {{channels}}.
+
+# Output Format
+Erstelle ONLY die finale Markdown-Tabelle.
+*   **Table Columns:** *Fokus-Branche | Rolle | Kernbotschaft (2-3 sentences) | {{channels}}*.
+*   **Requirement:** Your response must start with the heading \"## Schritt 6: Botschaften\" and contain ONLY die vollstaendige Markdown-Tabelle.""",
+]
+
+SYSTEM_PROMPT_EN = """# System Role
+
+You are a **B2B Marketing Researcher & Copywriter**. You analyze a company URL, identify the offer, target groups, specific target roles, their pain points and gains, and based on this, you formulate an appreciative, expert marketing message. **Answer only with results, no thought processes.** Support every statement with a specific page URL from the analyzed domain. Explicitly mark uncertainties. Your response must always be in the target language {{language}}.
+
+# Working Principles
+
+1.  **Source Prioritization:** Product Page -> Solutions Page -> Industries/References -> Company/About Us Page -> Blog/News (for verification only).
+2.  **Fact Policy:** Only derive from the domain of the provided URL; for assumptions, mark them as a *hypothesis*.
+3.  B2B Tone: Knowledgeable, respectful, not salesy.
+4.  Clarity & Structure: Document each stage as a Markdown table + an optional short summary.
+5.  At least 4 specific roles per target group (not generic).
+6.  Concise & Useful: Precise wording; no clichés."""
+
+STEP_PROMPTS_EN = [
+    """# Task
+Perform **Step 1 - Understand the Offer (WHAT)** for the following company.
+
+# Inputs
+*   **Company URL:** `{{company_url}}` 
+*   **Target Language of Output:** `{{language}}` 
+*   **Region(s) / Markets (optional):** `{{regions}}` 
+*   **Product/Solution Focus (optional):** `{{focus}}` 
+
+# Instructions for Step 1
+*   Extract product(s)/service(s), core features, differentiation, and relevant value propositions.
+*   Create a short summary (max. 4 bullet points) of the key findings.
+*   **Output:** Table with columns: *Product/Solution | Description (1-2 sentences) | Core Features | Differentiation | Primary Source (URL)*.
+*   **Format Requirement:** Respond ONLY with the results for this single step. Your response must start with the heading \"## Step 1: Offer (WHAT)\" and include the short summary and the Markdown table. Do not provide any other explanations.""",
+    """# Task
+Now perform **Step 2 - Target Groups (WHO - Companies)**.
+
+# Context: Validated results from previous steps
+{{previous_steps_data}}
+
+# Instructions for Step 2
+*   Identify B2B target segments (industries/company types/sizes/regions) based on the given offer.
+*   **Output:** Table: *Target Industry/Segment | Typical Company Characteristics | Region(s) | Proof of Relevance (URL)*.
+*   **Format Requirement:** Respond ONLY with the results for this single step. Your response must start with the heading \"## Step 2: Target Groups (Companies)\" and include the Markdown table.""",
+    """# Task
+Now perform **Step 3 - Personas/Roles (WHO - People)**.
+
+# Context: Validated results from previous steps
+{{previous_steps_data}}
+
+# Instructions for Step 3
+*   For each target industry: at least 4 **specific** roles with their area of responsibility and involvement in purchasing (E, I, D, U based on RACI logic). Do **not** invent people; logically derive roles from problem/process context.
+*   **Output:** Table: *Role (precise) | Area of Responsibility | Why relevant for the product | Buying Involvement (E/I/D/U) | Source/Indication (URL or reference)*.
+*   **Format Requirement:** Respond ONLY with the results for this single step. Your response must start with the heading \"## Step 3: Personas (Roles)\".""",
+    """# Task
+Now perform **Step 4 - Pain Points per Role (WHY)**.
+
+# Context: Validated results from previous steps
+{{previous_steps_data}}
+
+# Instructions for Step 4
+*   For each role, formulate 3-5 specific pain points (observations, not features).
+*   Tag each pain point with a category: **Cost | Time | Risk | Compliance | Quality | Employee Retention.**
+*   Add an **Impact Estimate (EUR, h, %)** as a hypothesis.
+*   **Output:** Table: *Role | Pain Point (specific, measurable/operational) | Category | Impact (Cost, Risk, Time) | Impact Estimate (EUR, h, %) | Urgency (high/medium/low) | Source/Indication (URL)*.
+*   **Format Requirement:** Respond ONLY with the results for this single step. Your response must start with the heading \"## Step 4: Pain Points per Role\".""",
+    """# Task
+Now perform **Step 5 - Gains & Benefits per Role (WHY switch)**.
+
+# Context: Validated results from previous steps
+{{previous_steps_data}}
+
+# Instructions for Step 5
+*   Based on the identified pain points, formulate 2-3 concrete gains (advantages/benefits) for each role.
+*   Quantify the benefit as a hypothesis (e.g., savings in EUR, time gained in h, efficiency increase in %).
+*   **Output:** Table: *Role | Gain (specific benefit) | Quantification (Hypothesis in EUR, h, %) | Source/Indication (URL)*.
+*   **Format Requirement:** Respond ONLY with the results for this single step. Your response must start with the heading \"## Step 5: Gains & Benefits per Role\".""",
+    """# Task
+Now perform **Step 6 - Marketing Message (HOW to speak)**.
+
+# Context: Validated results from previous steps
+{{previous_steps_data}}
+
+# Inputs for this step
+*   **Desired channels for the message:** `{{channels}}` 
+
+# Instructions for Step 6: Chain-of-Thought Analysis & Copywriting
+
+**FOCUS:** To make the analysis manageable, select the **one (1) most relevant and promising target industry** (Primary Industry) from Step 2.
+Your goal is to create a specific message for EACH role from Step 3 ONLY for this ONE focus industry.
+
+For each **[Role]** within the selected **[Focus Industry]**, perform the following thought process:
+
+1.  **Step 6.1 (Analysis): Product-Role Fit.**
+    *   Which product/solution from the "Offer" table (Step 1) is most relevant for the **[Role]**?
+
+2.  **Step 6.2 (Analysis): Industry Use Case.**
+    *   What are 1-2 typical use cases for the selected product in the **[Focus Industry]**? What does the **[Role]** actually do with it?
+
+3.  **Step 6.3 (Analysis): Benefit Quantification.**
+    *   Look at the Pain Points (Step 4) and Gains (Step 5) for the **[Role]**.
+    *   Derive a concrete KPI relevant to the **[Role]**.
+
+4.  **Step 6.4 (Synthesis): Formulate Message.**
+    *   Synthesize the findings from 6.1-6.3 into a concise core message (2-3 sentences) following the structure: **Observation (Problem) -> Low-threshold Solution Idea -> Product Bridge -> Quantified Benefit.**
+    *   Create variants of this message for the channels: {{channels}}.
+
+# Output Format
+Create ONLY the final Markdown table.
+*   **Table Columns:** *Focus Industry | Role | Core Message (2-3 sentences) | {{channels}}*.
+*   **Requirement:** Your response must start with the heading \"## Step 6: Messages\" and contain ONLY the complete Markdown table."""
+]
+
+PROMPTS = {
+    'de': {
+        'SYSTEM_PROMPT': SYSTEM_PROMPT_DE,
+        'STEP_PROMPTS': STEP_PROMPTS_DE,
+        'STEP_TITLES': {
+            'offer': 'Schritt 1: Angebot (WAS)',
+            'targetGroups': 'Schritt 2: Zielgruppen (WER - Unternehmen)',
+            'personas': 'Schritt 3: Zielpersonen/Rollen (WER - Personen)',
+            'painPoints': 'Schritt 4: Painpoints je Rolle (WARUM)',
+            'gains': 'Schritt 5: Gains & Nutzen je Rolle (WARUM wechseln)',
+            'messages': 'Schritt 6: Marketingbotschaften je Segment & Rolle (WIE sprechen)',
+        },
+        'SUMMARY_TITLE': 'Kurzresuemee:',
+        'SUMMARY_TEXT_FOR_STEP1': [
+            "Die Angebotsanalyse wurde erfolgreich auf Basis der Website-Inhalte generiert.",
+            "Dies ist der erste Schritt des Prozesses, der vom neuen Python-Backend ausgefuehrt wird."
+        ]
+    },
+    'en': {
+        'SYSTEM_PROMPT': SYSTEM_PROMPT_EN,
+        'STEP_PROMPTS': STEP_PROMPTS_EN,
+        'STEP_TITLES': {
+            'offer': 'Step 1: Offer (WHAT)',
+            'targetGroups': 'Step 2: Target Groups (WHO - Companies)',
+            'personas': 'Step 3: Personas/Roles (WHO - People)',
+            'painPoints': 'Step 4: Pain Points per Role (WHY)',
+            'gains': 'Step 5: Gains & Benefits per Role (WHY switch)',
+            'messages': 'Step 6: Marketing Messages per Segment & Role (HOW to speak)',
+        },
+        'SUMMARY_TITLE': 'Summary:',
+        'SUMMARY_TEXT_FOR_STEP1': [
+            "The offer analysis has been successfully generated based on website content.",
+            "This is the first step of the process, executed by the new Python backend."
+        ]
+    }
+}
+
+# --- API & SCRAPING HELPERS ---
+
+def load_api_key():
+    try:
+        with open("gemini_api_key.txt", "r") as f:
+            return f.read().strip()
+    except FileNotFoundError:
+        logging.error("API key file 'gemini_api_key.txt' not found.")
+        return None
+
+def call_gemini_api(prompt, api_key, retries=3):
+    url = f"https://generativelanguage.googleapis.com/v1/models/gemini-2.5-flash:generateContent?key={api_key}"
+    headers = {'Content-Type': 'application/json'}
+    payload = {"contents": [{"parts": [{"text": prompt}]}]}
+    
+    for attempt in range(retries):
+        try:
+            # Increased timeout to 600s (10 minutes) for complex Step 6 generation
+            response = requests.post(url, headers=headers, json=payload, timeout=600)
+            response.raise_for_status()
+            result = response.json()
+            if 'candidates' in result and result['candidates']:
+                candidate = result['candidates'][0]
+                if 'content' in candidate and 'parts' in candidate['content']:
+                    return candidate['content']['parts'][0]['text']
+            logging.warning(f"Unexpected API response structure: {result}")
+            return ""
+        except requests.exceptions.HTTPError as e:
+            # Retry on server errors (500, 502, 503, 504)
+            if e.response.status_code in [500, 502, 503, 504] and attempt < retries - 1:
+                wait_time = (attempt + 1) * 5
+                logging.warning(f"API Error {e.response.status_code}. Retrying in {wait_time}s...")
+                time.sleep(wait_time)
+                continue
+            logging.error(f"Error calling Gemini API: {e}")
+            raise
+        except Exception as e:
+            # Retry on connection errors
+            if attempt < retries - 1:
+                wait_time = (attempt + 1) * 5
+                logging.warning(f"API Connection Error: {e}. Retrying in {wait_time}s...")
+                time.sleep(wait_time)
+                continue
+            logging.error(f"Final Error calling Gemini API: {e}")
+            raise
+
+def get_text_from_url(url):
+    try:
+        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
+        response = requests.get(url, headers=headers, timeout=10)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.content, 'lxml')
+        
+        # 1. Remove specific noise tags (including header/footer to avoid navigation links)
+        for element in soup(['script', 'style', 'noscript', 'iframe', 'svg', 'header', 'footer', 'nav', 'aside', 'form', 'button', 'meta', 'link']):
+            element.decompose()
+
+        # 2. Clean attributes but keep structure and HREF
+        for tag in soup.find_all(True):
+            # We want to keep the tag (e.g. <h1>, <a>), but clean attributes
+            current_attrs = dict(tag.attrs)
+            for attr in current_attrs:
+                # Keep 'href' for links so the LLM can extract the source URL
+                if tag.name == 'a' and attr == 'href':
+                    continue
+                # Remove everything else (class, id, style, onclick, etc.)
+                del tag[attr]
+
+        # 3. Return the HTML structure (body only if possible)
+        body = soup.find('body')
+        if body:
+            html_content = str(body)
+        else:
+            html_content = str(soup)
+
+        # 4. Minimize whitespace to save tokens (remove empty lines)
+        lines = [line.strip() for line in html_content.split('\n') if line.strip()]
+        return "\n".join(lines)
+
+    except Exception as e:
+        logging.warning(f"Could not fetch or read URL {url}: {e}")
+        return ""
+
+def find_relevant_links(base_url):
+    try:
+        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
+        response = requests.get(base_url, headers=headers, timeout=10)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.content, 'lxml')
+        base_netloc = urlparse(base_url).netloc
+        relevant_links = set()
+        for a_tag in soup.find_all('a', href=True):
+            href = a_tag['href']
+            link_text = a_tag.get_text(strip=True).lower()
+            if any(keyword in href.lower() or keyword in link_text for keyword in LINK_KEYWORDS):
+                abs_url = urljoin(base_url, href)
+                if urlparse(abs_url).netloc == base_netloc:
+                    relevant_links.add(abs_url)
+        return list(relevant_links)[:10]
+    except Exception as e:
+        logging.warning(f"Could not scrape base URL {base_url} for links: {e}")
+        return []
+        
+def parse_markdown_table(markdown_text):
+    lines = [line.strip() for line in markdown_text.strip().split('\n')]
+    table_lines = []
+    in_table_section = False
+    for line in lines:
+        if re.match(r'^\|.*\|$', line) and '---' not in line:
+            in_table_section = True
+            table_lines.append(line)
+        elif in_table_section and '---' in line and re.match(r'^\|(:?-+:?)\|', line.replace(' ', '')):
+            table_lines.append(line)
+        elif in_table_section:
+            break
+    if not table_lines: return {"headers": [], "rows": []}
+    separator_index = -1
+    for i, line in enumerate(table_lines):
+        if '---' in line and re.match(r'^\|(:?-+:?)\|', line.replace(' ', '')):
+            separator_index = i
+            break
+    if separator_index == -1 or separator_index == 0: return {"headers": [], "rows": []}
+    header_line = table_lines[0]
+    headers = [re.sub(r'\*+([^*]+)\*+', r'\1', h.strip()).strip() for h in header_line.split('|') if h.strip()]
+    rows = []
+    for line in table_lines[separator_index + 1:]:
+        raw_cells = line.split('|')
+        cells = [re.sub(r'\*+([^*]+)\*+', r'\1', c.strip()).strip() for c in raw_cells if c.strip()]
+        if len(cells) == len(headers):
+            rows.append(cells)
+    return {"headers": headers, "rows": rows}
+
+def format_context_for_prompt(analysis_data, language):
+    context = ""
+    current_prompts = PROMPTS[language]
+    step_titles = current_prompts['STEP_TITLES']
+    step_keys = ['offer', 'targetGroups', 'personas', 'painPoints', 'gains', 'messages']
+    for i, step_key in enumerate(step_keys):
+        step_data = analysis_data.get(step_key)
+        if step_data:
+            title = step_titles.get(step_key, f"Step {i+1}")
+            context += f"\n\n## {title}\n\n"
+            summary = step_data.get('summary')
+            if summary:
+                context += f"**{current_prompts['SUMMARY_TITLE']}**\n"
+                context += "\n".join([f"* {s}" for s in summary]) + "\n\n"
+            headers, rows = step_data.get('headers', []), step_data.get('rows', [])
+            if headers and rows:
+                context += f"| {' | '.join(headers)} |\n| {' | '.join(['---']*len(headers))} |\n"
+                for row in rows:
+                    padded_row = row + [''] * (len(headers) - len(row))
+                    context += f"| {' | '.join(padded_row)} |\n"
+            context += "\n"
+    return context
+
+# --- CORE LOGIC ---
+
+def start_generation(url, language, regions, focus):
+    logging.info(f"Starting Step 1 for URL: {url} in language: {language}")
+    api_key = load_api_key()
+    if not api_key: raise ValueError("Gemini API key is missing.")
+    
+    # 1. Scraping Strategy: Main Page + Relevant Sub-pages
+    urls_to_scrape = sorted(list(set([url] + find_relevant_links(url))))
+    grounding_text = ""
+    logging.info(f"Identified {len(urls_to_scrape)} pages to scrape.")
+    
+    for u in urls_to_scrape:
+        logging.info(f"  - Scraping: {u}")
+        text_content = get_text_from_url(u)
+        if text_content:
+            # Inject SOURCE_URL marker for the LLM
+            grounding_text += f"SOURCE_URL: {u}\nCONTENT (Simplified HTML):\n{text_content}\n\n{'='*50}\n\n"
+            
+    if not grounding_text.strip(): raise RuntimeError(f"Failed to scrape content from {url}")
+    
+    current_prompts = PROMPTS[language]
+    system_instruction = current_prompts['SYSTEM_PROMPT'].replace('{{language}}', language)
+    
+    # Updated Prompt: Removed length limit and added instruction for SOURCE_URL
+    grounded_offer_prompt = f"{system_instruction}\n\n# TASK\nAnalyze the provided website content to understand the company's offerings. Your response MUST be a Markdown table.\n\n# CONTEXT\n- Website Content: The input provided is **Simplified HTML**. Use the structure (e.g. <h1>-<h6> headers, <ul> lists, <div> groupings) to identify distinct products or services.\n- **Content Data (with SOURCE_URL markers):** \n```html\n{grounding_text}\n```\n- Target Language: {language}\n- Company URL: {url}\n- Focus: {focus or 'N/A'}\n- Regions: {regions or 'N/A'}\n\n# INSTRUCTIONS\n1. Identify products/services by looking for recurring HTML patterns (e.g. a Header followed by a description and a 'Learn More' link).\n2. Create Markdown table: Produkt/Loesung | Beschreibung (1-2 Saetze) | Kernfunktionen | Differenzierung | Primaere Quelle (URL)\n3. **IMPORTANT:** For the 'Primaere Quelle (URL)' column, look for the `<a href='...'>` tag NEAREST to the product description. Combine it with the `SOURCE_URL` if it's a relative link. Do not just link the homepage.\n4. Response must be ONLY the table starting with '## {current_prompts['STEP_TITLES']['offer']}'."
+    
+    # Log the full prompt (Input)
+    save_detailed_log("step1_offer", "prompt", grounded_offer_prompt)
+    
+    response_text = call_gemini_api(grounded_offer_prompt, api_key)
+    
+    # Log the full response (Output)
+    save_detailed_log("step1_offer", "response", response_text)
+    
+    step1_title = current_prompts['STEP_TITLES']['offer']
+    title_match = re.search(rf'## {re.escape(step1_title)}\s*', response_text, re.IGNORECASE)
+    content = response_text[title_match.end():].strip() if title_match else response_text
+    table_data = parse_markdown_table(content)
+    
+    return {
+        "_initial_inputs": {"url": url, "language": language, "regions": regions, "focus": focus},
+        "offer": {"summary": current_prompts['SUMMARY_TEXT_FOR_STEP1'], "headers": table_data['headers'], "rows": table_data['rows']}
+    }
+
+def next_step(language, context_file, generation_step, channels):
+    logging.info(f"Starting Step {generation_step} in language: {language}")
+    api_key = load_api_key()
+    if not api_key: raise ValueError("Gemini API key is missing.")
+    with open(context_file, 'r', encoding='utf-8') as f: analysis_data = json.load(f)
+    current_prompts = PROMPTS[language]
+    system_instruction = current_prompts['SYSTEM_PROMPT'].replace('{{language}}', language)
+    step_prompt_template = current_prompts['STEP_PROMPTS'][generation_step - 1]
+    previous_steps_markdown = format_context_for_prompt(analysis_data, language)
+    prompt = step_prompt_template.replace('{{previous_steps_data}}', previous_steps_markdown)
+    if '{{channels}}' in prompt: prompt = prompt.replace('{{channels}}', channels or 'LinkedIn, Kaltmail, Landingpage')
+    initial_inputs = analysis_data.get('_initial_inputs', {})
+    
+    # Helper to safely get string values even if they are None/null in the JSON
+    def get_safe(key):
+        val = initial_inputs.get(key)
+        return str(val) if val is not None else 'N/A'
+
+    prompt = prompt.replace('{{company_url}}', get_safe('url')).replace('{{language}}', language).replace('{{regions}}', get_safe('regions')).replace('{{focus}}', get_safe('focus'))
+    full_prompt = f"{system_instruction}\n\n{prompt}"
+    
+    # Log the full prompt
+    save_detailed_log(f"step{generation_step}", "prompt", full_prompt)
+    
+    response_text = call_gemini_api(full_prompt, api_key)
+    
+    # Log the full response
+    save_detailed_log(f"step{generation_step}", "response", response_text)
+    
+    step_key = ['offer', 'targetGroups', 'personas', 'painPoints', 'gains', 'messages'][generation_step - 1]
+    expected_title = current_prompts['STEP_TITLES'][step_key]
+    title_match = re.search(rf'## {re.escape(expected_title)}\s*', response_text, re.IGNORECASE)
+    content = response_text[title_match.end():].strip() if title_match else response_text
+    table_data = parse_markdown_table(content)
+    
+    # Fixed Regex: Added proper grouping (?: ... ) around the stop tokens
+    summary_match = re.search(r'\*\*(?:Kurzresuemee|Summary).*?:\*\*\s*([\s\S]*?)(?:\| ---|## (?:Schritt|Step))', response_text, re.IGNORECASE)
+    summary = [re.sub(r'^\*\s*|^-\s*|^\d+\.\s*', '', s.strip()) for s in summary_match[1].split('\n') if s.strip()] if summary_match else []
+    return {step_key: {"summary": summary, "headers": table_data['headers'], "rows": table_data['rows']}}
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--mode', required=True)
+    parser.add_argument('--url')
+    parser.add_argument('--focus')
+    parser.add_argument('--regions')
+    parser.add_argument('--context_file')
+    parser.add_argument('--generation_step', type=int)
+    parser.add_argument('--channels')
+    parser.add_argument('--language', required=True)
+    args = parser.parse_args()
+    try:
+        if args.mode == 'start_generation': result = start_generation(args.url, args.language, args.regions, args.focus)
+        elif args.mode == 'next_step': result = next_step(args.language, args.context_file, args.generation_step, args.channels)
+        sys.stdout.write(json.dumps(result, ensure_ascii=False))
+    except Exception as e:
+        logging.error(f"Error: {e}", exc_info=True)
+        sys.stdout.write(json.dumps({"error": str(e)}, ensure_ascii=False))
+        sys.exit(1)
+
+if __name__ == '__main__': main()