Brancheneinstufung2/gtm_architect_orchestrator.py

import argparse
import base64
import json
import logging
import re
import sys
import os
import requests
from bs4 import BeautifulSoup
from datetime import datetime
from config import Config
import gtm_db_manager as db_manager

sys.path.append(os.path.dirname(os.path.abspath(__file__)))

from helpers import call_gemini_flash, scrape_website_details, call_gemini_image
from config import Config, BASE_DIR # Import Config and BASE_DIR

LOG_DIR = "Log_from_docker"
if not os.path.exists(LOG_DIR):
    os.makedirs(LOG_DIR)

ORCHESTRATOR_VERSION = "1.3.0" # Bump version for image fix & language enforcement
run_timestamp = datetime.now().strftime("%y-%m-%d_%H-%M-%S")
log_file_path = os.path.join(LOG_DIR, f"{run_timestamp}_gtm_orchestrator_run.log")

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_file_path, mode='a', encoding='utf-8'),
        logging.StreamHandler(sys.stderr)
    ]
)
logging.info(f"GTM Architect Orchestrator v{ORCHESTRATOR_VERSION} ({run_timestamp}) starting...")

# !!! CRITICAL FIX: Load API keys at the very beginning !!!
# This ensures Config.API_KEYS is populated before any AI functions are called.
Config.load_api_keys()

def log_and_save(project_id, step_name, data_type, content):
    logging.info(f"Project {project_id} - Step: {step_name} - Type: {data_type}")
    filename = f"{run_timestamp}_{step_name}_{data_type}.txt"
    filepath = os.path.join(LOG_DIR, filename)
    try:
        with open(filepath, 'w', encoding='utf-8') as f:
            if isinstance(content, (dict, list)):
                json.dump(content, f, indent=4, ensure_ascii=False)
            else:
                f.write(str(content))
        logging.info(f"Saved {data_type} to {filepath}")
    except Exception as e:
        logging.error(f"Failed to save {data_type} to file: {e}")

def get_system_instruction(lang):
    if lang == 'de':
        return """
        Du bist ein internationaler Go-to-Market (GTM) Experte für B2B-Technologie-Unternehmen im Bereich Robotik, Facility Management und IoT.
        Deine Aufgabe ist es, aus technischen Spezifikationen und Produktbeschreibungen eine umfassende GTM-Strategie zu entwickeln.
        Du arbeitest strukturiert, datengetrieben und präzise. Deine Antworten sind immer klar, professionell und direkt auf den Punkt.
        Wenn du JSON ausgeben sollst, gib NUR das JSON-Objekt aus, ohne umschließende Text- oder Code-Formatierungen.
        Behalte während des gesamten Prozesses eine konsistente Logik bei. Alle Phasen bauen aufeinander auf.
        Führe eine interne Plausibilitätsprüfung durch, bevor du eine Antwort gibst.

        # CONTEXT: THE WACKLER GROUP ECOSYSTEM
        Wir sind Teil der Wackler Group. Wir nutzen das gesamte Dienstleistungsportfolio der Muttergesellschaft, um Hardware-Schwächen in Service-Stärken zu verwandeln.
        Das Ziel ist immer eine "Symbiose aus Mensch & Maschine".

        # REGEL 5: THE "DYNAMIC SERVICE" LOGIC (UNIVERSAL)
        Analysiere zuerst die **Kategorie** des Roboters und wende dann die passende Hybrid-Logik an:

        1. CLEANING INDOOR (CARPET) - Sauger für Teppiche
           * Robot: Macht die Fläche (80%).
           * Human (Wackler Cleaning): Macht Kanten, Ecken, Fleckenentfernung (20%).

        2. CLEANING INDOOR (WET SURFACE) - Scheuersauger (Hartboden)
           * Robot: Reinigt Flure/Hallen kontinuierlich.
           * Human (Wackler Cleaning): Sicherheits-Check (Rutschgefahr), Wasserwechsel, Hygiene-Audit.

        3. CLEANING OUTDOOR (SWEEPER) - Kehrmaschine (Asphalt)
           * Robot: Nimmt Feinstaub und Zigaretten auf.
           * Human (Wackler Cleaning): Leert Mülleimer, entfernt Sperrmüll, pflegt Grünanlagen.

        4. POS ROBOTER - Retail/Airport Assistenz
           * Robot: Information, Wegweiser, Blickfang.
           * Human (Wackler Service): Beratung, Verkauf, emotionale Kundenbindung.

        5. SECURITY ROBOTER - Mobile Überwachung (Quadruped/Drohne)
           * Robot: "Detektion & Präsenz". 24/7 Patrouille, Wärmebild, keine Müdigkeit.
           * Human (Wackler Security): "Bewertung & Intervention". NSL bewertet Alarm, Interventionskraft fährt raus.
           * Pitch: "Der Roboter sieht die Gefahr, Wackler beseitigt sie."

        6. SERVICE ROBOTER - Transport (Gastro/Klinik)
           * Robot: Schweres Tragen (Tabletts, Wäsche) von A nach B.
           * Human (Wackler Service): Patientenkontakt, Tisch-Service, Hygiene.

        7. TRANSPORT ROBOTER - Intralogistik (Lager)
           * Robot: Paletten-Transport, Milkrun.
           * Human (Wackler Logistics): Prozesssteuerung, Ausnahmebehandlung, Umpacken.

        Wende diese spezifische Logik zwingend in PHASE 4 (Strategy) und PHASE 6 (Sales Enablement) an.

        WICHTIG: Antworte IMMER in der vom User geforderten Sprache (Deutsch), auch wenn der Input Englisch ist.
        """
    else: # Default to English
        return """
        You are an international Go-to-Market (GTM) expert for B2B technology companies in robotics, facility management, and IoT.
        Your task is to develop a comprehensive GTM strategy from technical specifications and product descriptions.
        You are structured, data-driven, and precise. Your answers are always clear, professional, and to the point.
        When asked to output JSON, provide ONLY the JSON object without any surrounding text or code formatting.
        Maintain consistent logic throughout the process. All phases build on each other.
        Perform an internal plausibility check before providing an answer.

        # CONTEXT: THE WACKLER GROUP ECOSYSTEM
        We are part of the Wackler Group. We leverage the full service portfolio of the parent company to turn hardware weaknesses into service strengths.
        The goal is always a "Symbiosis of Man & Machine".

        # RULE 5: THE "DYNAMIC SERVICE" LOGIC (UNIVERSAL)
        First analyze the **category** of the robot and then apply the appropriate hybrid logic:

        1. CLEANING INDOOR (CARPET) - Vacuums for carpets
           * Robot: Does the area (80%).
           * Human (Wackler Cleaning): Does edges, corners, spot removal (20%).

        2. CLEANING INDOOR (WET SURFACE) - Scrubber dryers (Hard floor)
           * Robot: Cleans halls/corridors continuously.
           * Human (Wackler Cleaning): Safety check (slip hazard), water change, hygiene audit.

        3. CLEANING OUTDOOR (SWEEPER) - Sweepers (Asphalt)
           * Robot: Picks up fine dust and cigarettes.
           * Human (Wackler Cleaning): Empties bins, removes bulky waste, maintains greenery.

        4. POS ROBOT - Retail/Airport Assistance
           * Robot: Information, wayfinding, eye-catcher.
           * Human (Wackler Service): Consultation, sales, emotional customer bonding.

        5. SECURITY ROBOT - Mobile Surveillance (Quadruped/Drone)
           * Robot: "Detection & Presence". 24/7 patrol, thermal imaging, no fatigue.
           * Human (Wackler Security): "Evaluation & Intervention". NSL evaluates alarm, intervention force drives out.
           * Pitch: "The robot sees the danger, Wackler eliminates it."

        6. SERVICE ROBOT - Transport (Hospitality/Clinic)
           * Robot: Heavy lifting (trays, laundry) from A to B.
           * Human (Wackler Service): Patient contact, table service, hygiene.

        7. TRANSPORT ROBOT - Intralogistics (Warehouse)
           * Robot: Pallet transport, milkrun.
           * Human (Wackler Logistics): Process control, exception handling, repacking.

        Mandatory application of this logic in PHASE 4 (Strategy) and PHASE 6 (Sales Enablement).

        IMPORTANT: Always answer in the requested language.
        """

def get_output_lang_instruction(lang):
    """Returns a strong instruction to enforce the output language."""
    if lang == 'de':
        return "ACHTUNG: Die gesamte Ausgabe (JSON-Werte, Texte, Analysen) MUSS in DEUTSCH sein. Übersetze englische Input-Daten."
    return "IMPORTANT: The entire output MUST be in ENGLISH."

# --- ORCHESTRATOR PHASES ---

def list_history(payload):
    projects = db_manager.get_all_projects()
    return {"projects": projects}

def load_history(payload):
    project_id = payload.get('projectId')
    if not project_id:
        raise ValueError("No projectId provided for loading history.")

    data = db_manager.get_project_data(project_id)
    if not data:
        raise ValueError(f"Project {project_id} not found.")

    # FIX: Check for and parse stringified JSON in phase results
    if 'phases' in data and isinstance(data['phases'], dict):
        for phase_name, phase_result in data['phases'].items():
            if isinstance(phase_result, str):
                try:
                    data['phases'][phase_name] = json.loads(phase_result)
                except json.JSONDecodeError:
                    logging.warning(f"Could not decode JSON for {phase_name} in project {project_id}. Leaving as is.")

    return data

def delete_session(payload):
    project_id = payload.get('projectId')
    if not project_id:
        raise ValueError("No projectId provided for deletion.")
    return db_manager.delete_project(project_id)

def phase1(payload):
    product_input = payload.get('productInput', '')
    lang = payload.get('lang', 'de')
    project_id = payload.get('projectId')

    # Check if input is a URL and scrape it
    if product_input.strip().startswith('http'):
        logging.info(f"Input detected as URL. Starting scrape for: {product_input}")
        analysis_content = scrape_website_details(product_input)
        if "Fehler:" in analysis_content:
            # If scraping fails, use the URL itself with a note for the AI.
            analysis_content = f"Scraping der URL {product_input} ist fehlgeschlagen. Analysiere das Produkt basierend auf der URL und deinem allgemeinen Wissen."
            logging.warning("Scraping failed. Using URL as fallback content for analysis.")
    else:
        analysis_content = product_input
        logging.info("Input is raw text. Analyzing directly.")

    # AUTOMATISCHE PROJEKTERSTELLUNG
    if not project_id:
        # Generiere Namen aus Input
        raw_name = product_input.strip()
        if raw_name.startswith('http'):
            name = f"Web Analysis: {raw_name[:30]}..."
        else:
            name = (raw_name[:30] + "...") if len(raw_name) > 30 else raw_name

        logging.info(f"Creating new project: {name}")
        new_proj = db_manager.create_project(name)
        project_id = new_proj['id']
        logging.info(f"New Project ID: {project_id}")

    sys_instr = get_system_instruction(lang)
    lang_instr = get_output_lang_instruction(lang)

    prompt = f"""
    PHASE 1: PRODUCT ANALYSIS & CONSTRAINTS
    Input: "{analysis_content}"
    Task:
    1. Extract and CONSOLIDATE technical features into 8-12 high-level core capabilities or value propositions. Group minor specs (e.g., specific ports like USB/Ethernet) into broader categories (e.g., "Connectivity & Integration"). Do NOT list every single hardware spec individually. Focus on what matters for the buyer.
    2. Define hard constraints (e.g., physical dimensions, max payload, environment limitations).
    3. Classify the product into one of the 7 Wackler Categories: [Cleaning Indoor (Carpet), Cleaning Indoor (Wet), Cleaning Outdoor (Sweeper), POS Robot, Security Robot, Service Robot, Transport Robot].
    4. Check for internal portfolio conflicts (hypothetical product "Scrubber 5000").

    {lang_instr}

    Output JSON format ONLY: {{"features": [], "constraints": [], "category": "Identified Category", "conflictCheck": {{"hasConflict": false, "details": "", "relatedProduct": ""}}, "rawAnalysis": ""}}
    """
    log_and_save(project_id, "phase1", "prompt", prompt)
    response = call_gemini_flash(prompt, system_instruction=sys_instr, json_mode=True)
    log_and_save(project_id, "phase1", "response", response)

    try:
        data = json.loads(response)

        # --- PART 2: HARD FACTS EXTRACTION ---
        spec_schema = """
        {
          "metadata": {
            "product_id": "string (slug)",
            "brand": "string",
            "model_name": "string",
            "description": "string (short marketing description of the product)",
            "category": "cleaning | service | security | industrial",
            "manufacturer_url": "string"
          },
          "core_specs": {
            "battery_runtime_min": "integer (standardized to minutes)",
            "charge_time_min": "integer (standardized to minutes)",
            "weight_kg": "float",
            "dimensions_cm": { "l": "float", "w": "float", "h": "float" },
            "max_slope_deg": "float",
            "ip_rating": "string",
            "climb_height_cm": "float",
            "navigation_type": "string (e.g. SLAM, LiDAR, VSLAM)",
            "connectivity": ["string"]
          },
          "layers": {
            "cleaning": {
              "fresh_water_l": "float",
              "dirty_water_l": "float",
              "area_performance_sqm_h": "float",
              "mop_pressure_kg": "float"
            },
            "service": {
              "max_payload_kg": "float",
              "number_of_trays": "integer",
              "display_size_inch": "float",
              "ads_capable": "boolean"
            },
            "security": {
              "camera_types": ["string"],
              "night_vision": "boolean",
              "gas_detection": ["string"],
              "at_interface": "boolean"
            }
          },
          "extended_features": [
            { "feature": "string", "value": "string", "unit": "string" }
          ]
        }
        """

        specs_prompt = f"""
        PHASE 1 (Part 2): HARD FACT EXTRACTION
        Input: "{analysis_content}"

        Task: Extract technical specifications strictly according to the provided JSON schema.

        NORMALIZATION RULES (STRICTLY FOLLOW):
        1. Time: Convert ALL time values (runtime, charging) to MINUTES (Integer). Example: "1:30 h" -> 90, "2 hours" -> 120.
        2. Dimensions/Weight: All lengths in CM, weights in KG.
        3. Performance: Area performance always in m²/h.
        4. Booleans: Use true/false (not strings).
        5. Unknowns: If a value is not in the text, set it to null. DO NOT HALLUCINATE.

        LOGIC FOR LAYERS:
        - If product uses water/brushes -> Fill 'layers.cleaning'.
        - If product delivers items/trays -> Fill 'layers.service'.
        - If product patrols/detects -> Fill 'layers.security'.

        EXTENDED FEATURES:
        - Put any technical feature that doesn't fit the schema into 'extended_features'.

        Output JSON format ONLY based on this schema:
        {spec_schema}
        """

        log_and_save(project_id, "phase1_specs", "prompt", specs_prompt)
        specs_response = call_gemini_flash(specs_prompt, system_instruction=sys_instr, json_mode=True)
        log_and_save(project_id, "phase1_specs", "response", specs_response)

        try:
            specs_data = json.loads(specs_response)

            # FORCE URL PERSISTENCE: If input was a URL, ensure it's in the metadata
            if product_input.strip().startswith('http'):
                if 'metadata' not in specs_data:
                    specs_data['metadata'] = {}
                specs_data['metadata']['manufacturer_url'] = product_input.strip()

            # AUTO-RENAME PROJECT based on extracted metadata
            if 'metadata' in specs_data:
                brand = specs_data['metadata'].get('brand', '')
                model = specs_data['metadata'].get('model_name', '')
                if brand or model:
                    new_name = f"{brand} {model}".strip()
                    if new_name:
                        logging.info(f"Renaming project {project_id} to: {new_name}")
                        db_manager.update_project_name(project_id, new_name)

            data['specs'] = specs_data
        except json.JSONDecodeError:
            logging.error(f"Failed to decode JSON from Gemini response in phase1 (specs): {specs_response}")
            data['specs'] = {"error": "Failed to extract specs", "raw": specs_response}

        db_manager.save_gtm_result(project_id, 'phase1_result', json.dumps(data))

        # WICHTIG: ID zurückgeben, damit Frontend sie speichert
        data['projectId'] = project_id
        return data
    except json.JSONDecodeError:
        logging.error(f"Failed to decode JSON from Gemini response in phase1: {response}")
        error_response = {
            "error": "Die Antwort des KI-Modells war kein gültiges JSON. Das passiert manchmal bei hoher Auslastung. Bitte versuchen Sie es in Kürze erneut.",
            "details": response,
            "projectId": project_id # Auch bei Fehler ID zurückgeben? Besser nicht, da noch nichts gespeichert.
        }
        return error_response


def phase2(payload):
    phase1_data = payload.get('phase1Data', {})
    lang = payload.get('lang', 'de')
    project_id = payload.get('projectId')

    sys_instr = get_system_instruction(lang)
    lang_instr = get_output_lang_instruction(lang)

    prompt = f"""
    PHASE 2: IDEAL CUSTOMER PROFILE (ICP) & DATA PROXIES - STRATEGIC ANALYSIS

    **Product Context:**
    {json.dumps(phase1_data)}

    **Your Task:**
    Answer the following strategic questions to determine the Ideal Customer Profiles (ICPs).

    **Strategic Questions:**
    1.  **ICP Identification:** Based on the product's category ({phase1_data.get('category', 'Unknown')}), which 3 industries face the most significant operational challenges (e.g., safety, efficiency, high manual labor costs, security risks) that this product directly solves?
    2.  **Rationale:** For each identified ICP, provide a concise rationale. Why is this product a perfect fit for this specific industry? (e.g., "Reduces inspection costs by X%", "Improves safety in hazardous environments", "Automates a critical but repetitive task").
    3.  **Data Proxies:** How can we find these companies online? What specific digital footprints (data proxies) do they leave? Think about:
        *   Keywords on their websites (e.g., 'plant safety', 'autonomous inspection', 'logistics automation').
        *   Specific job titles on LinkedIn (e.g., 'Head of Security', 'Logistics Manager', 'Maintenance Lead').
        *   Their participation in specific industry trade shows or publications.

    {lang_instr}

    **Output:**
    Provide your analysis ONLY in the following JSON format:
    {{"icps": [{{"name": "Industry Name", "rationale": "Why it's a fit."}}], "dataProxies": [{{"target": "e.g., Company Websites", "method": "How to find them."}}]}}
    """
    log_and_save(project_id, "phase2", "prompt", prompt)
    response = call_gemini_flash(prompt, system_instruction=sys_instr, json_mode=True)
    log_and_save(project_id, "phase2", "response", response)
    data = json.loads(response)
    db_manager.save_gtm_result(project_id, 'phase2_result', json.dumps(data))
    return data

def phase3(payload):
    phase2_data = payload.get('phase2Data', {})
    lang = payload.get('lang', 'de')
    project_id = payload.get('projectId')

    sys_instr = get_system_instruction(lang)
    lang_instr = get_output_lang_instruction(lang)

    prompt = f"""
    PHASE 3: WHALE HUNTING & BUYING CENTER ANALYSIS - STRATEGIC ANALYSIS

    **Target ICPs (Industries):**
    {json.dumps(phase2_data.get('icps'))}

    **Your Task:**
    Answer the following strategic questions to identify key accounts and decision-makers.

    **Strategic Questions:**
    1.  **Whale Identification:** For each ICP, identify 3-5 specific 'Whale' companies in the DACH market. These should be leaders, innovators, or companies with significant scale in that sector.
    2.  **Buying Center Roles:** Identify the specific job titles for the 4 Universal Strategic Archetypes in the context of these industries.
        *   **Operativer Entscheider:** Who feels the pain daily? (e.g., Plant Manager, Store Manager, Head of Logistics).
        *   **Infrastruktur Verantwortlicher:** Who has to integrate it? (e.g., IT Security, Facility Manager, Legal/Compliance).
        *   **Wirtschaftlicher Entscheider:** Who signs the check? (e.g., CFO, Purchasing Director).
        *   **Innovations-Treiber:** Who pushes for the pilot? (e.g., CDO, Strategy Lead).

    {lang_instr}

    **Output:**
    Provide your analysis ONLY in the following JSON format:
    {{"whales": [{{"industry": "ICP Name", "accounts": ["Company A", "Company B"]}}], "roles": ["Operativer Entscheider: [Job Titles]", "Infrastruktur Verantwortlicher: [Job Titles]", "Wirtschaftlicher Entscheider: [Job Titles]", "Innovations-Treiber: [Job Titles]"]}}
    """
    log_and_save(project_id, "phase3", "prompt", prompt)
    response = call_gemini_flash(prompt, system_instruction=sys_instr, json_mode=True)
    log_and_save(project_id, "phase3", "response", response)
    data = json.loads(response)
    db_manager.save_gtm_result(project_id, 'phase3_result', json.dumps(data))
    return data

def phase4(payload):
    phase3_data = payload.get('phase3Data', {})
    phase1_data = payload.get('phase1Data', {})
    lang = payload.get('lang', 'de')
    project_id = payload.get('projectId')

    sys_instr = get_system_instruction(lang)
    lang_instr = get_output_lang_instruction(lang)

    all_accounts = []
    for w in phase3_data.get('whales', []):
        all_accounts.extend(w.get('accounts', []))

    prompt = f"""
    PHASE 4: STRATEGY & ANGLE DEVELOPMENT - STRATEGIC ANALYSIS

    **Product Category:** {phase1_data.get('category')}
    **Target Industries:** {json.dumps([w.get('industry') for w in phase3_data.get('whales', [])])}
    **Product Features:** {json.dumps(phase1_data.get('features'))}

    **Your Task:**
    Answer the following strategic questions to build the core of our market approach.

    **Strategic Questions:**
    1.  **Pain Point Analysis:** For each industry segment, what is the single most significant, measurable **Pain Point** this product solves?
    2.  **Develop the Angle:** What is our unique story? The "Angle" should directly connect a product capability to their primary pain point.
    3.  **Define Differentiation (Hybrid Service):** Why should they choose us? Explain the specific "Service Gap" that our Hybrid Model (Machine + Human) closes for this specific Category ({phase1_data.get('category')}). E.g., for Security, the gap is "Intervention"; for Cleaning, it is "Edges/Hygiene".

    {lang_instr}

    **Output:**
    Provide your analysis ONLY in the following JSON format:
    {{"strategyMatrix": [{{"segment": "Target Industry", "painPoint": "The core problem.", "angle": "Our unique story.", "differentiation": "Why us (Hybrid Service logic)."}}]}}
    """
    log_and_save(project_id, "phase4", "prompt", prompt)
    response = call_gemini_flash(prompt, system_instruction=sys_instr, json_mode=True)
    log_and_save(project_id, "phase4", "response", response)
    data = json.loads(response)
    db_manager.save_gtm_result(project_id, 'phase4_result', json.dumps(data))
    return data

def phase5(payload):
    phase4_data = payload.get('phase4Data', {})
    phase3_data = payload.get('phase3Data', {})
    phase2_data = payload.get('phase2Data', {})
    phase1_data = payload.get('phase1Data', {})
    lang = payload.get('lang', 'de')
    project_id = payload.get('projectId')

    # Logging zur Diagnose
    strat_matrix = phase4_data.get('strategyMatrix', [])
    logging.info(f"Phase 5 Input Check - Strategy Matrix Rows: {len(strat_matrix)}")

    # SPEZIAL-INSTRUKTION FÜR PHASE 5 (REPORTING)
    # Wir überschreiben hier die globale JSON-Instruktion, um ausführlichen Text zu erzwingen.
    if lang == 'de':
        report_sys_instr = """
        Du bist ein Senior Business Consultant bei einer Top-Tier-Beratung (wie McKinsey oder BCG).
        Deine Aufgabe ist es, einen strategisch tiefgehenden, detaillierten "Go-to-Market Strategy Report" zu verfassen.

        REGELN:
        1.  **Kein JSON:** Deine Ausgabe ist reines, sauber formatiertes Markdown.
        2.  **Senior Grade:** Schreibe nicht stichpunktartig "dünn", sondern formuliere ganze Sätze und erkläre die Zusammenhänge ("Why it matters").
        3.  **Vollständigkeit:** Brich niemals mitten in einer Tabelle oder einem Satz ab.
        4.  **Formatierung:** Nutze Fettgedrucktes, Listen und Tabellen, um die Lesbarkeit zu erhöhen.
        """
    else:
        report_sys_instr = """
        You are a Senior Business Consultant at a top-tier firm (like McKinsey or BCG).
        Your task is to write a strategically deep, detailed "Go-to-Market Strategy Report".

        RULES:
        1.  **No JSON:** Your output is pure, cleanly formatted Markdown.
        2.  **Senior Grade:** Do not write "thin" bullet points. Write full sentences and explain the context ("Why it matters").
        3.  **Completeness:** Never stop in the middle of a table or sentence.
        4.  **Formatting:** Use bolding, lists, and tables to enhance readability.
        """

    lang_instr = get_output_lang_instruction(lang)

    # Reduziere Input-Daten auf das Wesentliche, um den Output-Fokus zu verbessern
    # FIX: Include 'specs' (Hard Facts) for the report
    lean_phase1 = {
        "features": phase1_data.get('features', []),
        "constraints": phase1_data.get('constraints', []),
        "specs": phase1_data.get('specs', {}),
        "category": phase1_data.get('category', 'Unknown')
    }

    prompt = f"""
    PHASE 5: FINAL REPORT GENERATION

    INPUT DATA:
    - Product: {json.dumps(lean_phase1)}
    - ICPs: {json.dumps(phase2_data.get('icps', []))}
    - Targets: {json.dumps(phase3_data.get('whales', []))}
    - Strategy Matrix: {json.dumps(phase4_data.get('strategyMatrix', []))}

    TASK:
    Write the "GTM STRATEGY REPORT v3.1" in Markdown.
    Expand on the input data. Don't just copy it. Interpret it.

    REQUIRED STRUCTURE & CONTENT:

    # GTM STRATEGY REPORT v3.1

    ## 1. Strategic Core
    *   **Category Definition:** Explicitly state that this product falls under the '{lean_phase1.get('category')}' category.
    *   **Dynamic Service Logic:** Explain clearly how the "Machine Layer" (What the robot does) and the "Human Service Layer" (What Wackler does) work together for THIS specific category. Use the logic defined for '{lean_phase1.get('category')}'.

    ## 2. Executive Summary
    *   Write a compelling management summary (approx. 150 words) outlining the market opportunity and the core value proposition.

    ## 3. Product Reality Check (Technical Deep Dive)
    *   **Core Capabilities:** Summarize the top 3-5 capabilities.
    *   **Technical Constraints:** Create a detailed Markdown table for the Hard Facts.
        *   Include ALL available specs (Dimensions, Weight, Runtime, Limits, Sensor types, Cleaning performance, etc.) from the input.
        *   Make it as comprehensive as a technical datasheet to satisfy the "Evaluator" persona.
        | Feature | Value | Implication |
        | :--- | :--- | :--- |
        | ... | ... | ... |

    ## 4. Target Architecture (ICPs)
    *   For each ICP, write a short paragraph explaining the "Strategic Fit". Why is this industry under pressure to buy?
    *   Mention key "Whale" accounts identified.

    ## 5. Strategy Matrix
    *   Create a detailed Markdown table mapping the strategy.
    *   **CRITICAL:** Ensure the table syntax is perfect. use <br> for line breaks inside cells.
    *   Columns: **Target Segment** | **The Pain (Operational)** | **The Angle (Story)** | **Differentiation (Service Gap)**
    *   Fill this table with the data from the 'Strategy Matrix' input.

    ## 6. Operational GTM Roadmap
    *   **Step 1: Lead Gen:** Recommend specific Inbound/Outbound tactics for these ICPs.
    *   **Step 2: Consultative Sales:** How to handle the site-check? What constraints need checking?
    *   **Step 3: Proof of Value:** Define the Pilot Phase (Paid Pilot vs. Free PoC).
    *   **Step 4: Expansion:** Path to RaaS/Service contracts.

    ## 7. Commercial Logic (ROI Framework)
    *   Present the ROI calculation logic.
    *   **The Formula:** Show the Net Value formula.
    *   **Input Variables:** List the specific variables the customer needs to provide.
    *   **Example Calculation:** Provide a hypothetical example calculation with plausible ranges (e.g. "Assuming 20-30% efficiency gain...") to illustrate the potential.

    {lang_instr}

    Output: Return strictly MARKDOWN formatted text.
    """
    log_and_save(project_id, "phase5", "prompt", prompt)

    # Use the specialized system instruction here!
    report = call_gemini_flash(prompt, system_instruction=report_sys_instr, json_mode=False)

    # Clean up potentially fenced markdown code blocks
    report = report.strip()
    if report.startswith("```markdown"):
        report = report.replace("```markdown", "", 1)
    if report.startswith("```"):
        report = report.replace("```", "", 1)
    if report.endswith("```"):
        report = report[:-3]
    report = report.strip()

    log_and_save(project_id, "phase5", "response", report)
    db_manager.save_gtm_result(project_id, 'phase5_result', json.dumps({"report": report}))
    return {"report": report}

def phase6(payload):
    phase4_data = payload.get('phase4Data', {})
    phase3_data = payload.get('phase3Data', {})
    phase1_data = payload.get('phase1Data', {})
    lang = payload.get('lang', 'de')
    project_id = payload.get('projectId')

    sys_instr = get_system_instruction(lang)
    lang_instr = get_output_lang_instruction(lang)

    prompt = f"""
    PHASE 6: SALES ENABLEMENT & VISUALS - STRATEGIC ANALYSIS

    **Context:**
    - Product Features: {json.dumps(phase1_data.get('features'))}
    - Personas: {json.dumps(phase3_data.get('roles'))}
    - Strategy: {json.dumps(phase4_data.get('strategyMatrix'))}

    **Your Task:**
    Answer the following strategic questions to create sales enablement materials.

    **Strategic Questions:**
    1.  **Anticipate Objections:** For each of the 4 key Archetypes (Operative, Infrastructure, Economic, Innovation), what is their most likely and critical **objection**?
        *   *Special Focus for 'Infrastructure Responsible' (Gatekeeper):* Address **Legal, Liability & Compliance** issues (e.g. GDPR, DGUV V3, accident liability) specifically.
    2.  **Formulate Battlecards:** For each objection, formulate a concise **response script**.
        *   *Requirement:* Use specific **proof points** (e.g., "Certified according to...", "Data hosted in Germany", "Insurance coverage by Wackler") instead of generic promises.
    3.  **Create Visual Prompts:** For the top 3 use cases, write a detailed **visual prompt** for an image generation AI.

    {lang_instr}

    **Output:**
    Provide your analysis ONLY in the following JSON format:
    {{"battlecards": [{{"persona": "Archetype (Job Title)", "objection": "The key objection.", "responseScript": "The compelling response with proof points."}}], "visualPrompts": [{{"title": "Image Title", "context": "Use case description.", "prompt": "Detailed photorealistic prompt."}}]}}
    """
    log_and_save(project_id, "phase6", "prompt", prompt)
    response = call_gemini_flash(prompt, system_instruction=sys_instr, json_mode=True)
    log_and_save(project_id, "phase6", "response", response)
    data = json.loads(response)
    if isinstance(data, list):
        data = data[0]
    db_manager.save_gtm_result(project_id, 'phase6_result', json.dumps(data))
    return data

def phase7(payload):
    phase4_data = payload.get('phase4Data', {})
    phase2_data = payload.get('phase2Data', {})
    lang = payload.get('lang', 'de')
    project_id = payload.get('projectId')

    sys_instr = get_system_instruction(lang)
    lang_instr = get_output_lang_instruction(lang)

    prompt = f"""
    PHASE 7: VERTICAL LANDING PAGE COPY - STRATEGIC ANALYSIS

    **Context:**
    - ICPs: {json.dumps(phase2_data.get('icps'))}
    - Strategy: {json.dumps(phase4_data.get('strategyMatrix'))}

    **Your Task:**
    Create conversion-optimized landing page copy for the top 2 ICPs by answering the following questions.

    **Strategic Questions:**
    1.  **Headline:** What is the most powerful **outcome** for this industry? The headline must grab the attention of a Decider and state this primary result.
    2.  **Subline:** How can you elaborate on the headline? Briefly mention the core problem this industry faces and introduce our solution as the answer.
    3.  **Benefit Bullets:** Transform 3-5 key technical features into tangible **benefit statements** for this specific industry. Each bullet point should answer the customer's question: "What's in it for me?".
    4.  **Call-to-Action (CTA):** What is the logical next step we want the user to take? The CTA should be clear, concise, and action-oriented.
    5.  **Apply Wackler Symbiosis:** Ensure the copy clearly communicates the value of the robot combined with the human expert service.

    {lang_instr}

    **Output:**
    Provide your analysis ONLY in the following JSON format:
    {{"landingPages": [{{"industry": "ICP Name", "headline": "The compelling headline.", "subline": "The elaborating subline.", "bullets": ["Benefit 1", "Benefit 2"], "cta": "The call to action."}}]}}
    """
    log_and_save(project_id, "phase7", "prompt", prompt)
    response = call_gemini_flash(prompt, system_instruction=sys_instr, json_mode=True)
    log_and_save(project_id, "phase7", "response", response)
    data = json.loads(response)
    if isinstance(data, list):
        data = data[0]
    db_manager.save_gtm_result(project_id, 'phase7_result', json.dumps(data))
    return data

def phase8(payload):
    phase2_data = payload.get('phase2Data', {})
    phase1_data = payload.get('phase1Data', {})
    lang = payload.get('lang', 'de')
    project_id = payload.get('projectId')

    sys_instr = get_system_instruction(lang)
    lang_instr = get_output_lang_instruction(lang)

    prompt = f"""
    PHASE 8: COMMERCIAL LOGIC & ROI CALCULATOR - STRATEGIC ANALYSIS

    **Context:**
    - Product Category: {phase1_data.get('category')}
    - ICPs: {json.dumps(phase2_data.get('icps'))}

    **Your Task:**
    Develop a calculation framework (NOT just random numbers) for the CFO pitch.

    **Strategic Questions:**
    1.  **Identify the Cost Driver:** What is the unit of cost we are attacking?
    2.  **ROI Formula & Example:** Create a formula: `Net Value = (Savings + Risk Mitigation) - (TCO)`.
        *   *CRITICAL:* Provide **PLAUSIBLE EXAMPLE RANGES** for efficiency gains (e.g., "Estimate: 20-30% reduction in manual patrol time") instead of just listing the variable.
        *   **Do NOT output "undefined".** Give a realistic estimation based on the industry context.
    3.  **Risk Argument:** Financial value of avoiding the worst-case scenario.

    {lang_instr}

    **Output:**
    Provide your analysis ONLY in the following JSON format:
    {{"businessCases": [{{"industry": "ICP Name", "costDriver": "Unit of cost.", "efficiencyGain": "Plausible estimate range (e.g. 25-35%).", "roiFormula": "The formula with defined variables.", "riskArgument": "The cost of inaction."}}]}}
    """
    log_and_save(project_id, "phase8", "prompt", prompt)
    response = call_gemini_flash(prompt, system_instruction=sys_instr, json_mode=True)
    log_and_save(project_id, "phase8", "response", response)
    data = json.loads(response)
    if isinstance(data, list):
        data = data[0]
    db_manager.save_gtm_result(project_id, 'phase8_result', json.dumps(data))
    return data

def phase9(payload):
    phase1_data = payload.get('phase1Data', {})
    phase4_data = payload.get('phase4Data', {})
    lang = payload.get('lang', 'de')
    project_id = payload.get('projectId')

    sys_instr = get_system_instruction(lang)
    lang_instr = get_output_lang_instruction(lang)

    prompt = f"""
    PHASE 9: THE "FEATURE-TO-VALUE" TRANSLATOR - STRATEGIC ANALYSIS

    **Context:**
    - Input Features: {json.dumps(phase1_data.get('features'))}
    - Strategy Pains: {json.dumps([s.get('painPoint') for s in phase4_data.get('strategyMatrix', [])])}

    **Your Task:**
    Translate technical features into compelling, value-oriented benefits.

    **Structured Process:**
    1.  **State the Feature:** Pick a key technical feature.
    2.  **Ask "So what?" (The Consequence):** What is the immediate consequence?
    3.  **Ask "So what?" again (The Value):** What is the ultimate benefit?
    4.  **Formulate Headline:** Short, powerful headline.

    {lang_instr}

    **Output:**
    Provide your analysis ONLY in the following JSON format:
    {{"techTranslations": [{{"feature": "The technical feature.", "story": "The 'So what? So what?' analysis.", "headline": "The final value headline."}}]}}
    """
    log_and_save(project_id, "phase9", "prompt", prompt)
    response = call_gemini_flash(prompt, system_instruction=sys_instr, json_mode=True)
    log_and_save(project_id, "phase9", "response", response)
    data = json.loads(response)
    db_manager.save_gtm_result(project_id, 'phase9_result', json.dumps(data))
    return data

def update_specs(payload):
    """
    Updates the technical specifications (Hard Facts) for a project.
    This allows manual correction of AI-extracted data.
    """
    project_id = payload.get('projectId')
    new_specs = payload.get('specs')

    if not project_id:
        raise ValueError("No projectId provided for update_specs.")
    if not new_specs:
        raise ValueError("No specs provided for update_specs.")

    # Load current project data
    project_data = db_manager.get_project_data(project_id)
    if not project_data:
        raise ValueError(f"Project {project_id} not found.")

    phases = project_data.get('phases', {})
    phase1_result = phases.get('phase1_result')

    if not phase1_result:
        raise ValueError("Phase 1 result not found. Cannot update specs.")

    # FIX: Parse JSON string if necessary
    if isinstance(phase1_result, str):
        try:
            phase1_result = json.loads(phase1_result)
        except json.JSONDecodeError:
            raise ValueError("Phase 1 result is corrupted (invalid JSON string).")

    # Update specs
    phase1_result['specs'] = new_specs

    # Save back to DB
    # We use save_gtm_result which expects a stringified JSON for the phase result
    db_manager.save_gtm_result(project_id, 'phase1_result', json.dumps(phase1_result))

    logging.info(f"Updated specs for project {project_id}")
    return {"status": "success", "specs": new_specs}

def translate(payload):
    # ... (to be implemented)
    return {"report": "Translated report will be here."}

def image(payload):
    prompt = payload.get('prompt', 'No Prompt')
    project_id = payload.get('projectId')
    aspect_ratio = payload.get('aspectRatio')

    ref_images = payload.get('referenceImagesBase64')
    ref_image = None

    if ref_images and isinstance(ref_images, list) and len(ref_images) > 0:
        ref_image = ref_images[0]
    elif payload.get('referenceImage'):
        ref_image = payload.get('referenceImage')

    log_and_save(project_id, "image", "prompt", f"{prompt} (Ratio: {aspect_ratio or 'default'})")
    if ref_image:
         logging.info(f"Image-Mode: Reference Image found (Length: {len(ref_image)})")

    try:
        image_b64 = call_gemini_image(prompt, reference_image_b64=ref_image, aspect_ratio=aspect_ratio)
        log_and_save(project_id, "image", "response_b64_preview", image_b64[:100] + "...")
        return {"imageBase64": f"data:image/png;base64,{image_b64}"}
    except Exception as e:
        logging.error(f"Failed to generate image: {e}", exc_info=True)
        return {"error": "Image generation failed.", "details": str(e)}

def main():
    """
    Main entry point of the script.
    Parses command-line arguments to determine which phase to run.
    """
    parser = argparse.ArgumentParser(description="GTM Architect Orchestrator")
    parser.add_argument("--mode", required=True, help="The execution mode (e.g., phase1, phase2).")
    parser.add_argument("--payload_base64", help="The Base64 encoded JSON payload (deprecated, use payload_file).")
    parser.add_argument("--payload_file", help="Path to a JSON file containing the payload (preferred).")

    args = parser.parse_args()

    payload = {}
    try:
        if args.payload_file:
            if not os.path.exists(args.payload_file):
                raise FileNotFoundError(f"Payload file not found: {args.payload_file}")
            with open(args.payload_file, 'r', encoding='utf-8') as f:
                payload = json.load(f)
        elif args.payload_base64:
            payload_str = base64.b64decode(args.payload_base64).decode('utf-8')
            payload = json.loads(payload_str)
        else:
            raise ValueError("No payload provided (neither --payload_file nor --payload_base64).")

    except (json.JSONDecodeError, base64.binascii.Error, ValueError, FileNotFoundError) as e:
        logging.error(f"Failed to load payload: {e}")
        # Print error as JSON to stdout for the server to catch
        print(json.dumps({"error": "Invalid payload.", "details": str(e)}))
        sys.exit(1)

    # Function mapping to dynamically call the correct phase
    modes = {
        "phase1": phase1,
        "phase2": phase2,
        "phase3": phase3,
        "phase4": phase4,
        "phase5": phase5,
        "phase6": phase6,
        "phase7": phase7,
        "phase8": phase8,
        "phase9": phase9,
        "update_specs": update_specs,
        "translate": translate,
        "image": image,
        "list_history": list_history,
        "load_history": load_history,
        "delete_session": delete_session,
    }

    mode_function = modes.get(args.mode)

    if not mode_function:
        logging.error(f"Invalid mode specified: {args.mode}")
        print(json.dumps({"error": f"Invalid mode: {args.mode}"}))
        sys.exit(1)

    try:
        logging.info(f"Executing mode: {args.mode}")
        result = mode_function(payload)
        # Ensure the output is always a JSON string
        print(json.dumps(result, ensure_ascii=False))
        logging.info(f"Successfully executed mode: {args.mode}")

    except Exception as e:
        logging.error(f"An error occurred during execution of mode '{args.mode}': {e}", exc_info=True)
        print(json.dumps({"error": f"An error occurred in {args.mode}.", "details": str(e)}))
        sys.exit(1)

if __name__ == "__main__":
    main()