Brancheneinstufung2/market_intel_orchestrator.py

import argparse
import json
import os
import sys # Import sys for stderr
import requests
from bs4 import BeautifulSoup
import logging
from datetime import datetime
import re # Für Regex-Operationen

# --- AUTARKES LOGGING SETUP --- #
def create_self_contained_log_filename(mode):
    """
    Erstellt einen zeitgestempelten Logdateinamen für den Orchestrator.
    Verwendet ein festes Log-Verzeichnis innerhalb des Docker-Containers.
    NEU: Nur eine Datei pro Tag, um Log-Spam zu verhindern.
    """
    log_dir_path = "/app/Log" # Festes Verzeichnis im Container
    if not os.path.exists(log_dir_path):
        os.makedirs(log_dir_path, exist_ok=True)

    # Nur Datum verwenden, nicht Uhrzeit, damit alle Runs des Tages in einer Datei landen
    date_str = datetime.now().strftime("%Y-%m-%d")
    filename = f"{date_str}_market_intel.log"
    return os.path.join(log_dir_path, filename)

log_filename = create_self_contained_log_filename("market_intel_orchestrator")
logging.basicConfig(
    level=logging.DEBUG,
    format='[%(asctime)s] %(levelname)s [%(funcName)s]: %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S',
    handlers=[
        logging.FileHandler(log_filename, mode='a', encoding='utf-8'),
        logging.StreamHandler(sys.stderr)
    ]
)
logger = logging.getLogger(__name__)
# --- END AUTARKES LOGGING SETUP --- #

def load_gemini_api_key(file_path="gemini_api_key.txt"):
    try:
        with open(file_path, "r") as f:
            api_key = f.read().strip()
        return api_key
    except Exception as e:
        logger.critical(f"Fehler beim Laden des Gemini API Keys: {e}")
        raise

def load_serp_api_key(file_path="serpapikey.txt"):
    """Lädt den SerpAPI Key. Gibt None zurück, wenn nicht gefunden."""
    try:
        if os.path.exists(file_path):
            with open(file_path, "r") as f:
                return f.read().strip()
        # Fallback: Versuche Umgebungsvariable
        return os.environ.get("SERP_API_KEY")
    except Exception as e:
        logger.warning(f"Konnte SerpAPI Key nicht laden: {e}")
        return None

def get_website_text(url):
    logger.info(f"Scraping URL: {url}")
    try:
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'lxml')
        for tag in soup(['script', 'style', 'nav', 'footer', 'header']):
            tag.decompose()
        text = soup.get_text(separator=' ', strip=True)
        # Bereinigung des Textes von nicht-druckbaren Zeichen
        text = re.sub(r'[^\x20-\x7E\n\r\t]', '', text)
        return text[:10000] # Limit für besseren Kontext
    except Exception as e:
        logger.error(f"Scraping failed for {url}: {e}")
        return None

def serp_search(query, num_results=3):
    """Führt eine Google-Suche über SerpAPI durch."""
    api_key = load_serp_api_key()
    if not api_key:
        logger.warning("SerpAPI Key fehlt. Suche übersprungen.")
        return []

    logger.info(f"SerpAPI Suche: {query}")
    try:
        params = {
            "engine": "google",
            "q": query,
            "api_key": api_key,
            "num": num_results,
            "hl": "de",
            "gl": "de"
        }
        response = requests.get("https://serpapi.com/search", params=params, timeout=20)
        response.raise_for_status()
        data = response.json()

        results = []
        if "organic_results" in data:
            for result in data["organic_results"]:
                results.append({
                    "title": result.get("title"),
                    "link": result.get("link"),
                    "snippet": result.get("snippet")
                })
        return results
    except Exception as e:
        logger.error(f"SerpAPI Fehler: {e}")
        return []

def _extract_target_industries_from_context(context_content):
    md = context_content
    # Versuche verschiedene Muster für die Tabelle, falls das Format variiert
    step2_match = re.search(r'##\s*Schritt\s*2:[\s\S]*?(?=\n##\s*Schritt\s*\d:|\s*$)', md, re.IGNORECASE)
    if not step2_match:
        # Fallback: Suche nach "Zielbranche" irgendwo im Text
        match = re.search(r'Zielbranche\s*\|?\s*([^|\n]+)', md, re.IGNORECASE)
        if match:
            return [s.strip() for s in match.group(1).split(',')]
        return []

    table_lines = []
    in_table = False
    for line in step2_match.group(0).split('\n'):
        if line.strip().startswith('|'):
            in_table = True
            table_lines.append(line.strip())
        elif in_table: break

    if len(table_lines) < 3: return []
    header = [s.strip() for s in table_lines[0].split('|') if s.strip()]
    industry_col = next((h for h in header if re.search(r'zielbranche|segment|branche|industrie', h, re.IGNORECASE)), None)
    if not industry_col: return []

    col_idx = header.index(industry_col)
    industries = []
    for line in table_lines[2:]:
        cells = [s.strip() for s in line.split('|') if s.strip()]
        if len(cells) > col_idx: industries.append(cells[col_idx])
    return list(set(industries))

def _extract_json_from_text(text):
    """
    Versucht, ein JSON-Objekt aus einem Textstring zu extrahieren,
    unabhängig von Markdown-Formatierung (```json ... ```).
    """
    try:
        # 1. Versuch: Direktersatz von Markdown-Tags (falls vorhanden)
        clean_text = text.replace("```json", "").replace("```", "").strip()
        return json.loads(clean_text)
    except json.JSONDecodeError:
        pass

    try:
        # 2. Versuch: Regex Suche nach dem ersten { und letzten }
        json_match = re.search(r"(\{[\s\S]*\})", text)
        if json_match:
            return json.loads(json_match.group(1))
    except json.JSONDecodeError:
        pass

    logger.error(f"JSON Parsing fehlgeschlagen. Roher Text: {text[:500]}...")
    return None

def generate_search_strategy(reference_url, context_content):
    logger.info(f"Generating strategy for {reference_url}")
    api_key = load_gemini_api_key()
    target_industries = _extract_target_industries_from_context(context_content)
    homepage_text = get_website_text(reference_url)

    # Switch to stable 2.5-pro model (which works for v1beta)
    GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}"

    prompt = f"""
    You are a B2B Market Intelligence Architect.

    --- STRATEGIC CONTEXT ---
    {context_content}

    --- EXTRACTED TARGET INDUSTRIES ---
    {', '.join(target_industries)}

    --- REFERENCE CLIENT HOMEPAGE ---
    {homepage_text[:10000] if homepage_text else "No Homepage Text"}

    --- TASK ---
    Based on the context and the reference client's homepage, develop a search strategy to find similar companies (competitors/lookalikes) and audit them to find sales triggers.

    1. **summaryOfOffer**: A 1-sentence summary of what the reference client sells.
    2. **idealCustomerProfile**: A concise definition of the Ideal Customer Profile (ICP) based on the reference client.
    3. **searchStrategyICP**: A detailed description of the Ideal Customer Profile (ICP) based on the analysis.
    4. **digitalSignals**: Identification and description of relevant digital signals that indicate purchase interest or engagement.
    5. **targetPages**: A list of the most important target pages on the company website relevant for marketing and sales activities.
    6. **signals**: Identify exactly 4 specific digital signals.
       - **CRITICAL**: One signal MUST be "Technographic / Incumbent Search". It must look for existing competitor software or legacy systems that our offer replaces or complements (e.g., "Uses SAP Ariba", "Has Supplier Portal", "Uses Salesforce").
       - The other 3 signals should focus on business pains or strategic fit (e.g., "Sustainability Report", "Supply Chain Complexity").

    --- SIGNAL DEFINITION ---
    For EACH signal, you MUST provide:
    - `id`: A unique ID (e.g., "sig_1").
    - `name`: A short, descriptive name.
    - `description`: What does this signal indicate?
    - `targetPageKeywords`: A list of 3-5 keywords to look for on a company's website (e.g., ["career", "jobs"] for a hiring signal).
    - `proofStrategy`: An object containing:
        - `likelySource`: Where on the website or web is this info found? (e.g., "Careers Page").
        - `searchQueryTemplate`: A Google search query to find this. Use `{{COMPANY}}` as a placeholder for the company name.
          Example: `site:{{COMPANY}} "software engineer" OR "developer"`

    --- OUTPUT FORMAT ---
    Return ONLY a valid JSON object.
    {{
      "summaryOfOffer": "...",
      "idealCustomerProfile": "...",
      "searchStrategyICP": "...",
      "digitalSignals": "...",
      "targetPages": "...",
      "signals": [
        {{
          "id": "sig_1",
          "name": "...",
          "description": "...",
          "targetPageKeywords": ["..."],
          "proofStrategy": {{
             "likelySource": "...",
             "searchQueryTemplate": "..."
          }}
        }},
        ...
      ]
    }}
    """

    payload = {"contents": [{"parts": [{"text": prompt}]}]}
    logger.info("Sende Anfrage an Gemini API...")
    # logger.debug(f"Rohe Gemini API-Anfrage (JSON): {json.dumps(payload, indent=2)}")
    try:
        response = requests.post(GEMINI_API_URL, json=payload, headers={'Content-Type': 'application/json'})
        response.raise_for_status()
        res_json = response.json()
        logger.info(f"Gemini API-Antwort erhalten (Status: {response.status_code}).")

        text = res_json['candidates'][0]['content']['parts'][0]['text']
        result = _extract_json_from_text(text)

        if not result:
            raise ValueError("Konnte kein valides JSON extrahieren")

        return result

    except Exception as e:
        logger.error(f"Strategy generation failed: {e}")
        # Return fallback to avoid frontend crash
        return {
            "summaryOfOffer": "Error generating strategy. Please check logs.",
            "idealCustomerProfile": "Error generating ICP. Please check logs.",
            "searchStrategyICP": "Error generating Search Strategy ICP. Please check logs.",
            "digitalSignals": "Error generating Digital Signals. Please check logs.",
            "targetPages": "Error generating Target Pages. Please check logs.",
            "signals": []
        }

def identify_competitors(reference_url, target_market, industries, summary_of_offer=None):
    logger.info(f"Identifying competitors for {reference_url}")
    api_key = load_gemini_api_key()
    # Switch to stable 2.5-pro model
    GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}"

    prompt = f"""
    You are a B2B Market Analyst. Find 3-5 direct competitors or highly similar companies (lookalikes) for the company at `{reference_url}`.

    --- CONTEXT ---
    - Offer: {summary_of_offer}
    - Target Market: {target_market}
    - Relevant Industries: {', '.join(industries)}

    --- TASK ---
    Identify competitors and categorize them into three groups:
    1. 'localCompetitors': Competitors in the same immediate region/city.
    2. 'nationalCompetitors': Competitors operating across the same country.
    3. 'internationalCompetitors': Global players.

    For EACH competitor, you MUST provide:
    - `id`: A unique, URL-friendly identifier (e.g., "competitor-name-gmbh").
    - `name`: The official, full name of the company.
    - `description`: A concise explanation of why they are a competitor.

    --- OUTPUT FORMAT ---
    Return ONLY a valid JSON object with the following structure:
    {{
      "localCompetitors": [ {{ "id": "...", "name": "...", "description": "..." }} ],
      "nationalCompetitors": [ ... ],
      "internationalCompetitors": [ ... ]
    }}
    """

    payload = {"contents": [{"parts": [{"text": prompt}]}]}
    logger.info("Sende Anfrage an Gemini API...")
    # logger.debug(f"Rohe Gemini API-Anfrage (JSON): {json.dumps(payload, indent=2)}")
    try:
        response = requests.post(GEMINI_API_URL, json=payload, headers={'Content-Type': 'application/json'})
        response.raise_for_status()
        res_json = response.json()
        logger.info(f"Gemini API-Antwort erhalten (Status: {response.status_code}).")

        text = res_json['candidates'][0]['content']['parts'][0]['text']
        result = _extract_json_from_text(text)

        if not result:
             raise ValueError("Konnte kein valides JSON extrahieren")

        return result

    except Exception as e:
        logger.error(f"Competitor identification failed: {e}")
        return {"localCompetitors": [], "nationalCompetitors": [], "internationalCompetitors": []}

def analyze_company(company_name, strategy, target_market):
    logger.info(f"--- STARTING DEEP TECH AUDIT FOR: {company_name} ---")
    api_key = load_gemini_api_key()
    # Switch to stable 2.5-pro model
    GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}"

    # 1. Website Finding (SerpAPI fallback to Gemini)
    url = None
    website_search_results = serp_search(f"{company_name} offizielle Website")
    if website_search_results:
        url = website_search_results[0].get("link")
        logger.info(f"Website via SerpAPI gefunden: {url}")

    if not url:
        # Fallback: Frage Gemini (Low Confidence)
        logger.info("Keine URL via SerpAPI, frage Gemini...")
        prompt_url = f"What is the official homepage URL for the company '{company_name}' in the market '{target_market}'? Respond with ONLY the single, complete URL and nothing else."
        payload_url = {"contents": [{"parts": [{"text": prompt_url}]}]}
        logger.info("Sende Anfrage an Gemini API (URL Fallback)...")
        # logger.debug(f"Rohe Gemini API-Anfrage (JSON): {json.dumps(payload_url, indent=2)}")
        try:
            res = requests.post(GEMINI_API_URL, json=payload_url, headers={'Content-Type': 'application/json'}, timeout=15)
            res.raise_for_status()
            res_json = res.json()
            logger.info(f"Gemini API-Antwort erhalten (Status: {res.status_code}).")

            candidate = res_json.get('candidates', [{}])[0]
            content = candidate.get('content', {}).get('parts', [{}])[0]
            text_response = content.get('text', '').strip()

            url_match = re.search(r'(https?://[^\s"]+)', text_response)
            if url_match:
                url = url_match.group(1)
                logger.info(f"Gemini Fallback hat URL gefunden: {url}")
            else:
                logger.warning(f"Keine gültige URL in Gemini-Antwort gefunden: '{text_response}'")

        except Exception as e:
            logger.error(f"Gemini URL Fallback failed: {e}")
            pass

    if not url or not url.startswith("http"):
        return {"error": f"Could not find website for {company_name}"}

    # 2. Homepage Scraping
    homepage_text = get_website_text(url)
    if not homepage_text:
        return {"error": f"Could not scrape website {url}"}

    homepage_text = re.sub(r'[^\x20-\x7E\n\r\t]', '', homepage_text)

    # --- ENHANCED: EXTERNAL TECHNOGRAPHIC INTELLIGENCE ---
    # Suche aktiv nach Wettbewerbern, nicht nur auf der Firmenwebsite.
    tech_evidence = []

    # Liste bekannter Wettbewerber / Incumbents
    known_incumbents = [
        "SAP Ariba", "Jaggaer", "Coupa", "SynerTrade", "Ivalua",
        "ServiceNow", "Salesforce", "Oracle SCM", "Zycus", "GEP",
        "SupplyOn", "EcoVadis", "IntegrityNext"
    ]

    # Suche 1: Direkte Verbindung zu Software-Anbietern (Case Studies, News, etc.)
    # Wir bauen eine Query mit OR, um API-Calls zu sparen.
    # Splitte in 2 Gruppen, um Query-Länge im Rahmen zu halten
    half = len(known_incumbents) // 2
    group1 = " OR ".join([f'"{inc}"' for inc in known_incumbents[:half]])
    group2 = " OR ".join([f'"{inc}"' for inc in known_incumbents[half:]])

    tech_queries = [
        f'"{company_name}" ({group1})',
        f'"{company_name}" ({group2})',
        f'"{company_name}" "supplier portal" login' # Suche nach dem Portal selbst
    ]

    logger.info(f"Starte erweiterte Tech-Stack-Suche für {company_name}...")
    for q in tech_queries:
        logger.info(f"Tech Search: {q}")
        results = serp_search(q, num_results=4) # Etwas mehr Ergebnisse
        if results:
            for r in results:
                tech_evidence.append(f"- Found: {r['title']}\n  Snippet: {r['snippet']}\n  Link: {r['link']}")

    tech_evidence_text = "\n".join(tech_evidence)
    # --- END ENHANCED TECH SEARCH ---

    # 3. Targeted Signal Search (The "Hunter" Phase) - Basierend auf Strategy
    signal_evidence = []

    # Firmographics Search
    firmographics_results = serp_search(f"{company_name} Umsatz Mitarbeiterzahl 2023")
    firmographics_context = "\n".join([f"- {r['snippet']} ({r['link']})" for r in firmographics_results])

    # Signal Searches (Original Strategy)
    signals = strategy.get('signals', [])
    for signal in signals:
        # Überspringe Signale, die wir schon durch die Tech-Suche massiv abgedeckt haben,
        # es sei denn, sie sind sehr spezifisch.
        if "incumbent" in signal['id'].lower() or "tech" in signal['id'].lower():
             logger.info(f"Skipping generic signal search '{signal['name']}' in favor of Enhanced Tech Search.")
             continue

        proof_strategy = signal.get('proofStrategy', {})
        query_template = proof_strategy.get('searchQueryTemplate')

        search_context = ""
        if query_template:
            try:
                domain = url.split("//")[-1].split("/")[0].replace("www.", "")
            except:
                domain = ""

            query = query_template.replace("{{COMPANY}}", company_name).replace("{COMPANY}", company_name)
            query = query.replace("{{domain}}", domain).replace("{domain}", domain)

            logger.info(f"Signal Search '{signal['name']}': {query}")
            results = serp_search(query, num_results=3)
            if results:
                search_context = "\n".join([f"  * Snippet: {r['snippet']}\n    Source: {r['link']}" for r in results])

        if search_context:
            signal_evidence.append(f"SIGNAL '{signal['name']}':\n{search_context}")

    # 4. Final Analysis & Synthesis (The "Judge" Phase)
    evidence_text = "\n\n".join(signal_evidence)

    prompt = f"""
    You are a Strategic B2B Sales Consultant.
    Analyze the company '{company_name}' ({url}) to create a "best-of-breed" sales pitch strategy.

    --- STRATEGY (What we are looking for) ---
    {json.dumps(signals, indent=2)}

    --- EVIDENCE 1: EXTERNAL TECH-STACK INTELLIGENCE (CRITICAL) ---
    Look closely here for mentions of competitors like SAP Ariba, Jaggaer, SynerTrade, Coupa, etc.
    {tech_evidence_text}

    --- EVIDENCE 2: HOMEPAGE CONTENT ---
    {homepage_text[:8000]}

    --- EVIDENCE 3: FIRMOGRAPHICS SEARCH ---
    {firmographics_context}

    --- EVIDENCE 4: TARGETED SIGNAL SEARCH RESULTS ---
    {evidence_text}
    ----------------------------------

    TASK:
    1. **Firmographics**: Estimate Revenue and Employees.
    2. **Technographic Audit**: Look for specific competitor software or legacy systems mentioned in EVIDENCE 1 (e.g., "Partner of SynerTrade", "Login to Jaggaer Portal").
    3. **Status**:
       - Set to "Nutzt Wettbewerber" if ANY competitor technology is found (Ariba, Jaggaer, SynerTrade, Coupa, etc.).
       - Set to "Greenfield" ONLY if absolutely no competitor tech is found.
       - Set to "Bestandskunde" if they already use our solution.
    4. **Evaluate Signals**: For each signal, provide a "value" (Yes/No/Partial) and "proof".
    5. **Recommendation (Pitch Strategy)**:
       - DO NOT write a generic verdict.
       - If they use a competitor (e.g., Ariba), explain how to position against it (e.g., "Pitch as a specialized add-on for logistics, filling Ariba's gaps").
       - If Greenfield, explain the entry point.
       - **Tone**: Strategic, insider-knowledge, specific.

    STRICTLY output only JSON:
    {{
      "companyName": "{company_name}",
      "status": "...",
      "revenue": "...",
      "employees": "...",
      "tier": "Tier 1/2/3",
      "dynamicAnalysis": {{
         "sig_id_from_strategy": {{ "value": "...", "proof": "..." }}
      }},
      "recommendation": "..."
    }}
    """

    payload = {
        "contents": [{"parts": [{"text": prompt}]}],
        "generationConfig": {"response_mime_type": "application/json"}
    }

    try:
        logger.info("Sende Audit-Anfrage an Gemini API...")
        # logger.debug(f"Rohe Gemini API-Anfrage (JSON): {json.dumps(payload, indent=2)}")
        response = requests.post(GEMINI_API_URL, json=payload, headers={'Content-Type': 'application/json'})
        response.raise_for_status()
        response_data = response.json()
        logger.info(f"Gemini API-Antwort erhalten (Status: {response.status_code}).")

        text = response_data['candidates'][0]['content']['parts'][0]['text']
        result = _extract_json_from_text(text)

        if not result:
             raise ValueError("Konnte kein valides JSON extrahieren")

        result['dataSource'] = "Digital Trace Audit (Deep Dive)"
        logger.info(f"Audit für {company_name} erfolgreich abgeschlossen.")
        return result
    except Exception as e:
        logger.error(f"Audit failed for {company_name}: {e}")
        return {
            "companyName": company_name,
            "status": "Unklar / Manuelle Prüfung",
            "revenue": "Error",
            "employees": "Error",
            "tier": "Tier 3",
            "dynamicAnalysis": {},
            "recommendation": f"Audit failed due to API Error: {str(e)}",
            "dataSource": "Error"
        }

def generate_outreach_campaign(company_data_json, knowledge_base_content, reference_url):
    """
    Erstellt personalisierte E-Mail-Kampagnen basierend auf Audit-Daten und einer strukturierten Wissensdatenbank.
    Generiert spezifische Ansprachen für verschiedene Rollen (Personas).
    """
    company_name = company_data_json.get('companyName', 'Unknown')
    logger.info(f"--- STARTING ROLE-BASED OUTREACH GENERATION FOR: {company_name} ---")

    api_key = load_gemini_api_key()
    # Switch to stable 2.5-pro model
    GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}"

    prompt = f"""
    You are a Strategic Key Account Manager and deeply technical Industry Insider.
    Your goal is to write highly personalized, **operationally specific** outreach emails to the company '{company_name}'.

    --- INPUT 1: YOUR IDENTITY & STRATEGY (The Sender) ---
    The following Markdown contains your company's identity, products, and strategy.
    You act as the sales representative for the company described here:
    {knowledge_base_content}

    --- INPUT 2: THE TARGET COMPANY (Audit Facts) ---
    {json.dumps(company_data_json, indent=2)}

    --- INPUT 3: THE REFERENCE CLIENT (Social Proof) ---
    Reference Client URL: {reference_url}

    CRITICAL: This 'Reference Client' is an existing happy customer of ours. They are the "Seed Company" used to find the Target Company (Lookalike).
    You MUST mention this Reference Client by name (derive it from the URL, e.g., 'schindler.com' -> 'Schindler') to establish trust.

    --- TASK ---
    1. **Analyze**: Match the Target Company (Input 2) to the most relevant 'Zielbranche/Segment' from the Knowledge Base (Input 1).
    2. **Select Roles**: Identify the top 2 most distinct and relevant 'Rollen' (Personas) from the Knowledge Base for this specific company situation.
       - *Example:* If the audit says they use a competitor (risk of lock-in), select a role like "Strategic Purchaser" or "Head of R&D" who cares about "Second Source".
       - *Example:* If they have quality issues or complex logistics, pick "Quality Manager" or "Logistics Head".
    3. **Draft Campaigns**: For EACH of the 2 selected roles, write a 3-step email sequence.

    --- TONE & STYLE GUIDELINES (CRITICAL) ---
    - **Perspective:** Operational Expert & Insider. NOT generic marketing.
    - **Be Gritty & Specific:** Do NOT use fluff like "optimize efficiency" or "streamline processes" without context.
      - Use **hard, operational keywords** from the Knowledge Base (e.g., "ASNs", "VMI", "8D-Reports", "Maverick Buying", "Bandstillstand", "Sonderfahrten", "PPAP").
      - Show you understand their daily pain.
    - **Narrative Arc:**
      1. "I noticed [Fact from Audit/Tech Stack]..." (e.g., "You rely on PDF orders via Jaggaer...")
      2. "In [Industry], this often leads to [Operational Pain]..." (e.g., "missing ASNs causing delays at the hub.")
      3. "We helped [Reference Client Name] solve exactly this by [Specific Solution]..."
      4. "Let's discuss how to get [Operational Gain] without replacing your ERP."
    - **Mandatory Social Proof:** You MUST mention the Reference Client Name (from Input 3) in the email body or footer.
    - **Language:** German (as the inputs are German).

    --- OUTPUT FORMAT (Strictly JSON) ---
    Returns a list of campaigns.
    [
      {{
        "target_role": "Name of the Role (e.g. Leiter F&E)",
        "rationale": "Why this role? (e.g. Because the audit found dependency on Competitor X...)",
        "emails": [
            {{
                "subject": "Specific Subject Line",
                "body": "Email Body..."
            }},
            {{
                "subject": "Re: Subject",
                "body": "Follow-up Body..."
            }},
            {{
                "subject": "Final Check",
                "body": "Final Body..."
            }}
        ]
      }},
      ... (Second Role)
    ]
    """

    payload = {
        "contents": [{"parts": [{"text": prompt}]}],
        "generationConfig": {"response_mime_type": "application/json"}
    }

    try:
        logger.info("Sende Campaign-Anfrage an Gemini API...")
        # logger.debug(f"Rohe Gemini API-Anfrage (JSON): {json.dumps(payload, indent=2)}")
        response = requests.post(GEMINI_API_URL, json=payload, headers={'Content-Type': 'application/json'})
        response.raise_for_status()
        response_data = response.json()
        logger.info(f"Gemini API-Antwort erhalten (Status: {response.status_code}).")
        # logger.debug(f"Rohe API-Antwort (JSON): {json.dumps(response_data, indent=2)}")

        text = response_data['candidates'][0]['content']['parts'][0]['text']
        result = _extract_json_from_text(text)

        if not result:
             raise ValueError("Konnte kein valides JSON extrahieren")

        return result
    except Exception as e:
        logger.error(f"Campaign generation failed for {company_name}: {e}")
        return [{"error": str(e)}]

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--mode", required=True)
    parser.add_argument("--reference_url")
    parser.add_argument("--context_file")
    parser.add_argument("--target_market")
    parser.add_argument("--company_name")
    parser.add_argument("--strategy_json")
    parser.add_argument("--summary_of_offer")
    parser.add_argument("--company_data_file") # For generate_outreach
    args = parser.parse_args()

    if args.mode == "generate_strategy":
        with open(args.context_file, "r") as f: context = f.read()
        print(json.dumps(generate_search_strategy(args.reference_url, context)))
    elif args.mode == "identify_competitors":
        industries = []
        if args.context_file:
            with open(args.context_file, "r") as f: context = f.read()
            industries = _extract_target_industries_from_context(context)
        print(json.dumps(identify_competitors(args.reference_url, args.target_market, industries, args.summary_of_offer)))
    elif args.mode == "analyze_company":
        strategy = json.loads(args.strategy_json)
        print(json.dumps(analyze_company(args.company_name, strategy, args.target_market)))
    elif args.mode == "generate_outreach":
        with open(args.company_data_file, "r") as f: company_data = json.load(f)
        with open(args.context_file, "r") as f: knowledge_base = f.read()
        print(json.dumps(generate_outreach_campaign(company_data, knowledge_base, args.reference_url)))


if __name__ == "__main__":
    main()