Files
Brancheneinstufung2/company-explorer/backend/scripts/generate_matrix.py
2026-02-22 19:22:39 +00:00

219 lines
8.5 KiB
Python

import sys
import os
import json
import argparse
from typing import List
import google.generativeai as genai
# Setup Environment
sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
from backend.database import SessionLocal, Industry, Persona, MarketingMatrix
from backend.config import settings
# --- Configuration ---
MODEL_NAME = "gemini-2.0-flash" # High quality copy
def generate_prompt(industry: Industry, persona: Persona) -> str:
"""
Builds the prompt for the AI to generate the marketing texts.
Combines Industry context with Persona specific pains/gains and Product Category.
"""
# 1. Determine Product Context
# We focus on the primary category for the general matrix,
# but we inform the AI about the secondary option if applicable.
primary_cat = industry.primary_category
product_context = f"{primary_cat.name}: {primary_cat.description}" if primary_cat else "Intelligente Robotik-Lösungen"
# 2. Extract specific segments from industry pains/gains
def extract_segment(text, marker):
if not text: return ""
import re
segments = re.split(r'\[(.*?)\]', text)
for i in range(1, len(segments), 2):
if marker.lower() in segments[i].lower():
return segments[i+1].strip()
return text
industry_pains = extract_segment(industry.pains, "Primary Product")
industry_gains = extract_segment(industry.gains, "Primary Product")
# 3. Handle Persona Data
try:
persona_pains = json.loads(persona.pains) if persona.pains else []
persona_gains = json.loads(persona.gains) if persona.gains else []
except:
persona_pains = [persona.pains] if persona.pains else []
persona_gains = [persona.gains] if persona.gains else []
prompt = f"""
Du bist ein scharfsinniger B2B-Strategieberater und exzellenter Copywriter.
Deine Aufgabe: Erstelle hochpräzise, "scharfe" Marketing-Textbausteine für einen Outreach an Entscheider.
--- STRATEGISCHER RAHMEN ---
ZIELUNTERNEHMEN (Branche): {industry.name}
BRANCHEN-KONTEXT: {industry.description or 'Keine spezifische Beschreibung'}
BRANCEHN-HERAUSFORDERUNGEN: {industry_pains}
ANGESTREBTE MEHRWERTE: {industry_gains}
ZIELPERSON (Rolle): {persona.name}
PERSÖNLICHER DRUCK (Pains der Rolle):
{chr(10).join(['- ' + p for p in persona_pains])}
GEWÜNSCHTE ERFOLGE (Gains der Rolle):
{chr(10).join(['- ' + g for g in persona_gains])}
ANGEBOTENE LÖSUNG (Produkt-Fokus):
{product_context}
--- DEIN AUFTRAG ---
Erstelle ein JSON-Objekt mit 3 Textbausteinen, die den persönlichen Druck des Empfängers mit den strategischen Notwendigkeiten seiner Branche und der technologischen Lösung verknüpfen.
Tonalität: Wertschätzend, auf Augenhöhe, scharfsinnig, absolut NICHT marktschreierisch.
1. "subject": Eine Betreffzeile (Max 6 Wörter), die den Finger direkt in eine Wunde (Pain) legt oder ein hohes Ziel (Gain) verspricht.
2. "intro": Einleitung (2-3 Sätze). Verbinde die spezifische Branchen-Herausforderung mit der persönlichen Verantwortung des Empfängers. Er muss sich sofort verstanden fühlen.
3. "social_proof": Ein Beweissatz, der zeigt, dass diese Lösung in der Branche {industry.name} bereits reale Probleme (z.B. Personalmangel, Dokumentationsdruck) gelöst hat. Nenne keine konkreten Firmennamen, aber quantifizierbare Effekte.
--- FORMAT ---
Antworte NUR mit einem validen JSON-Objekt.
Format:
{{
"subject": "...",
"intro": "...",
"social_proof": "..."
}}
"""
return prompt
def mock_call(prompt: str):
"""Simulates an API call for dry runs."""
print(f"\n--- [MOCK] GENERATING PROMPT ---\n{prompt[:300]}...\n--------------------------------")
return {
"subject": "[MOCK] Effizienzsteigerung in der Produktion",
"intro": "[MOCK] Als Produktionsleiter wissen Sie, wie teuer Stillstand ist. Unsere Roboter helfen.",
"social_proof": "[MOCK] Ähnliche Betriebe sparten 20% Kosten."
}
def real_gemini_call(prompt: str):
if not settings.GEMINI_API_KEY:
raise ValueError("GEMINI_API_KEY not set in config/env")
genai.configure(api_key=settings.GEMINI_API_KEY)
# Configure Model
generation_config = {
"temperature": 0.7,
"top_p": 0.95,
"top_k": 64,
"max_output_tokens": 1024,
"response_mime_type": "application/json",
}
model = genai.GenerativeModel(
model_name=MODEL_NAME,
generation_config=generation_config,
)
response = model.generate_content(prompt)
try:
# Clean response if necessary (Gemini usually returns clean JSON with mime_type set, but safety first)
text = response.text.strip()
if text.startswith("```json"):
text = text[7:-3].strip()
elif text.startswith("```"):
text = text[3:-3].strip()
parsed_json = json.loads(text)
if isinstance(parsed_json, list):
if len(parsed_json) > 0:
return parsed_json[0]
else:
raise ValueError("Empty list returned from API")
return parsed_json
except Exception as e:
print(f"JSON Parse Error: {e}. Raw Response: {response.text}")
raise
def run_matrix_generation(dry_run: bool = True, force: bool = False, specific_industry: str = None):
db = SessionLocal()
try:
query = db.query(Industry)
if specific_industry:
query = query.filter(Industry.name == specific_industry)
industries = query.all()
personas = db.query(Persona).all()
print(f"Found {len(industries)} Industries and {len(personas)} Personas.")
print(f"Mode: {'DRY RUN (No API calls, no DB writes)' if dry_run else 'LIVE - GEMINI GENERATION'}")
total_combinations = len(industries) * len(personas)
processed = 0
for ind in industries:
for pers in personas:
processed += 1
print(f"[{processed}/{total_combinations}] Check: {ind.name} x {pers.name}")
# Check existing
existing = db.query(MarketingMatrix).filter(
MarketingMatrix.industry_id == ind.id,
MarketingMatrix.persona_id == pers.id
).first()
if existing and not force:
print(f" -> Skipped (Already exists)")
continue
# Generate
prompt = generate_prompt(ind, pers)
if dry_run:
result = mock_call(prompt)
else:
try:
result = real_gemini_call(prompt)
# Basic Validation
if not result.get("subject") or not result.get("intro"):
print(" -> Invalid result structure. Skipping.")
continue
except Exception as e:
print(f" -> API ERROR: {e}")
continue
# Write to DB (only if not dry run)
if not dry_run:
if not existing:
new_entry = MarketingMatrix(
industry_id=ind.id,
persona_id=pers.id,
subject=result.get("subject"),
intro=result.get("intro"),
social_proof=result.get("social_proof")
)
db.add(new_entry)
print(f" -> Created new entry.")
else:
existing.subject = result.get("subject")
existing.intro = result.get("intro")
existing.social_proof = result.get("social_proof")
print(f" -> Updated entry.")
db.commit()
except Exception as e:
print(f"Error: {e}")
finally:
db.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--live", action="store_true", help="Actually call Gemini and write to DB")
parser.add_argument("--force", action="store_true", help="Overwrite existing matrix entries")
parser.add_argument("--industry", type=str, help="Specific industry name to process")
args = parser.parse_args()
run_matrix_generation(dry_run=not args.live, force=args.force, specific_industry=args.industry)