Brancheneinstufung2/company-explorer/backend/scripts/generate_matrix.py

import sys
import os
import json
import argparse
from typing import List
import google.generativeai as genai

# Setup Environment
sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))

from backend.database import SessionLocal, Industry, Persona, MarketingMatrix
from backend.config import settings

# --- Configuration ---
MODEL_NAME = "gemini-1.5-pro-latest" # High quality copy

def generate_prompt(industry: Industry, persona: Persona) -> str:
    """
    Builds the prompt for the AI to generate the marketing texts.
    Combines Industry context with Persona specific pains/gains.
    """

    # Safely load JSON lists
    try:
        persona_pains = json.loads(persona.pains) if persona.pains else []
        persona_gains = json.loads(persona.gains) if persona.gains else []
    except:
        persona_pains = [persona.pains] if persona.pains else []
        persona_gains = [persona.gains] if persona.gains else []

    industry_pains = industry.pains if industry.pains else "Allgemeine Effizienzprobleme"

    prompt = f"""
Du bist ein erfahrener B2B-Copywriter für Robotik-Lösungen (Reinigung, Transport, Service).
Ziel: Erstelle personalisierte E-Mail-Textbausteine für einen Outreach.

--- KONTEXT ---
ZIELBRANCHE: {industry.name}
BRANCHEN-KONTEXT: {industry.description or 'Keine spezifische Beschreibung'}
BRANCHEN-PAINS: {industry_pains}

ZIELPERSON (ARCHETYP): {persona.name}
PERSÖNLICHE PAINS (Herausforderungen):
{chr(10).join(['- ' + p for p in persona_pains])}

GEWÜNSCHTE GAINS (Ziele):
{chr(10).join(['- ' + g for g in persona_gains])}

--- AUFGABE ---
Erstelle ein JSON-Objekt mit genau 3 Textbausteinen.
Tonalität: Professionell, lösungsorientiert, auf den Punkt. Keine Marketing-Floskeln ("Game Changer").

1. "subject": Betreffzeile (Max 6 Wörter). Muss neugierig machen und einen Pain adressieren.
2. "intro": Einleitungssatz (1-2 Sätze). Verbinde die Branchen-Herausforderung mit der persönlichen Rolle des Empfängers. Zeige Verständnis für seine Situation.
3. "social_proof": Ein Satz, der Vertrauen aufbaut. Nenne generische Erfolge (z.B. "Unternehmen in der {industry.name} senken so ihre Kosten um 15%"), da wir noch keine spezifischen Logos nennen dürfen.

--- FORMAT ---
Respond ONLY with a valid JSON object. Do not add markdown formatting like ```json ... ```.
Format:
{{
  "subject": "...",
  "intro": "...",
  "social_proof": "..."
}}
"""
    return prompt

def mock_call(prompt: str):
    """Simulates an API call for dry runs."""
    print(f"\n--- [MOCK] GENERATING PROMPT ---\n{prompt[:300]}...\n--------------------------------")
    return {
        "subject": "[MOCK] Effizienzsteigerung in der Produktion",
        "intro": "[MOCK] Als Produktionsleiter wissen Sie, wie teuer Stillstand ist. Unsere Roboter helfen.",
        "social_proof": "[MOCK] Ähnliche Betriebe sparten 20% Kosten."
    }

def real_gemini_call(prompt: str):
    if not settings.GEMINI_API_KEY:
        raise ValueError("GEMINI_API_KEY not set in config/env")

    genai.configure(api_key=settings.GEMINI_API_KEY)

    # Configure Model
    generation_config = {
        "temperature": 0.7,
        "top_p": 0.95,
        "top_k": 64,
        "max_output_tokens": 1024,
        "response_mime_type": "application/json",
    }

    model = genai.GenerativeModel(
        model_name=MODEL_NAME,
        generation_config=generation_config,
    )

    response = model.generate_content(prompt)

    try:
        # Clean response if necessary (Gemini usually returns clean JSON with mime_type set, but safety first)
        text = response.text.strip()
        if text.startswith("```json"):
            text = text[7:-3].strip()
        elif text.startswith("```"):
            text = text[3:-3].strip()

        return json.loads(text)
    except Exception as e:
        print(f"JSON Parse Error: {e}. Raw Response: {response.text}")
        raise

def run_matrix_generation(dry_run: bool = True, force: bool = False):
    db = SessionLocal()
    try:
        industries = db.query(Industry).all()
        personas = db.query(Persona).all()

        print(f"Found {len(industries)} Industries and {len(personas)} Personas.")
        print(f"Mode: {'DRY RUN (No API calls, no DB writes)' if dry_run else 'LIVE - GEMINI GENERATION'}")

        total_combinations = len(industries) * len(personas)
        processed = 0

        for ind in industries:
            for pers in personas:
                processed += 1
                print(f"[{processed}/{total_combinations}] Check: {ind.name} x {pers.name}")

                # Check existing
                existing = db.query(MarketingMatrix).filter(
                    MarketingMatrix.industry_id == ind.id,
                    MarketingMatrix.persona_id == pers.id
                ).first()

                if existing and not force:
                    print(f"  -> Skipped (Already exists)")
                    continue

                # Generate
                prompt = generate_prompt(ind, pers)

                if dry_run:
                    result = mock_call(prompt)
                else:
                    try:
                        result = real_gemini_call(prompt)
                        # Basic Validation
                        if not result.get("subject") or not result.get("intro"):
                            print("  -> Invalid result structure. Skipping.")
                            continue

                    except Exception as e:
                        print(f"  -> API ERROR: {e}")
                        continue

                # Write to DB (only if not dry run)
                if not dry_run:
                    if not existing:
                        new_entry = MarketingMatrix(
                            industry_id=ind.id,
                            persona_id=pers.id,
                            subject=result.get("subject"),
                            intro=result.get("intro"),
                            social_proof=result.get("social_proof")
                        )
                        db.add(new_entry)
                        print(f"  -> Created new entry.")
                    else:
                        existing.subject = result.get("subject")
                        existing.intro = result.get("intro")
                        existing.social_proof = result.get("social_proof")
                        print(f"  -> Updated entry.")

                    db.commit()

    except Exception as e:
        print(f"Error: {e}")
    finally:
        db.close()

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--live", action="store_true", help="Actually call Gemini and write to DB")
    parser.add_argument("--force", action="store_true", help="Overwrite existing matrix entries")
    args = parser.parse_args()

    run_matrix_generation(dry_run=not args.live, force=args.force)