feat(market-intel): Implement fully functional, optimized backend
- Refactored market_intel_orchestrator.py for direct Gemini API (v1) calls.\n- Updated model to gemini-2.5-pro for enhanced capabilities.\n- Implemented minimal stdout logging for improved traceability within Docker.\n- Optimized Dockerfile and introduced market-intel.requirements.txt for leaner, faster builds.\n- Ensured end-to-end communication from React frontend through Node.js bridge to Python backend is fully functional.
This commit is contained in:
209
market_intel_orchestrator.py
Normal file
209
market_intel_orchestrator.py
Normal file
@@ -0,0 +1,209 @@
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import logging
|
||||
from datetime import datetime # Nur für Zeitstempel im Logging, nicht für Dateinamen
|
||||
|
||||
# --- MINIMALES LOGGING SETUP ---
|
||||
# Dieses Setup schreibt nur auf stdout/stderr, was von Docker Logs erfasst wird.
|
||||
# Es benötigt keine externen Dateien wie config.py oder helpers.py und erstellt keine Logdateien.
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='[%(asctime)s] %(levelname)s: %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info("Minimales Logging für Market Intelligence Orchestrator konfiguriert (nur Konsole).")
|
||||
# --- END MINIMAL LOGGING SETUP ---
|
||||
|
||||
# Funktion zum Laden des Gemini API Keys
|
||||
def load_gemini_api_key(file_path="gemini_api_key.txt"):
|
||||
try:
|
||||
with open(file_path, "r") as f:
|
||||
api_key = f.read().strip()
|
||||
if not api_key:
|
||||
logger.error("Gemini API Key ist leer. Bitte tragen Sie Ihren Schlüssel in die Datei gemini_api_key.txt ein.")
|
||||
raise ValueError("Gemini API Key ist leer. Bitte tragen Sie Ihren Schlüssel in die Datei gemini_api_key.txt ein.")
|
||||
logger.info("Gemini API Key erfolgreich geladen.")
|
||||
return api_key
|
||||
except FileNotFoundError:
|
||||
logger.critical(f"Die Datei {file_path} wurde nicht gefunden. Bitte stellen Sie sicher, dass Ihr Gemini API Key dort hinterlegt ist.")
|
||||
raise FileNotFoundError(f"Die Datei {file_path} wurde nicht gefunden. Bitte stellen Sie sicher, dass Ihr Gemini API Key dort hinterlegt ist.")
|
||||
except Exception as e:
|
||||
logger.critical(f"Fehler beim Laden des Gemini API Keys: {e}")
|
||||
raise RuntimeError(f"Fehler beim Laden des Gemini API Keys: {e}")
|
||||
|
||||
# Funktion zum Scrapen und Bereinigen einer Webseite
|
||||
def get_website_text(url):
|
||||
logger.info(f"Starte Web-Scraping für URL: {url}")
|
||||
try:
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
||||
}
|
||||
response = requests.get(url, headers=headers, timeout=10)
|
||||
response.raise_for_status() # Löst HTTPError für schlechte Antworten (4xx oder 5xx) aus
|
||||
logger.info(f"Webseite {url} erfolgreich abgerufen (Status: {response.status_code}).")
|
||||
|
||||
soup = BeautifulSoup(response.text, 'lxml')
|
||||
|
||||
for unwanted_tag in soup(['script', 'style', 'nav', 'header', 'footer', 'aside', 'noscript']):
|
||||
unwanted_tag.decompose()
|
||||
|
||||
text = soup.get_text(separator=' ', strip=True)
|
||||
text = text[:8000] # Begrenze auf 8000 Zeichen
|
||||
logger.info(f"Text von {url} erfolgreich extrahiert und auf {len(text)} Zeichen begrenzt.")
|
||||
logger.debug(f"Gescrapter Text-Auszug: {text[:500]}...")
|
||||
return text
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"Fehler beim Abrufen der Webseite {url}: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Parsen der Webseite {url}: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
# Hauptfunktion für die Strategiegenerierung
|
||||
def generate_search_strategy(reference_url, context_content):
|
||||
logger.info("Starte Strategiegenerierung.")
|
||||
logger.info(f"Referenz-URL: {reference_url}")
|
||||
logger.info(f"Kontext-Inhalt Länge: {len(context_content)} Zeichen")
|
||||
logger.debug(f"Kontext-Inhalt Auszug: {context_content[:500]}...")
|
||||
|
||||
api_key = load_gemini_api_key()
|
||||
|
||||
GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1/models/gemini-2.5-pro:generateContent?key={api_key}"
|
||||
logger.debug(f"Gemini API URL: {GEMINI_API_URL}")
|
||||
|
||||
homepage_text = get_website_text(reference_url)
|
||||
if homepage_text is None:
|
||||
logger.error(f"Konnte Webseite für {reference_url} nicht abrufen oder parsen.")
|
||||
return {"error": f"Could not retrieve or parse homepage text for {reference_url}"}
|
||||
|
||||
prompt = f"""
|
||||
You are a B2B Market Intelligence Architect.
|
||||
|
||||
--- STRATEGIC CONTEXT (Uploaded Document) ---
|
||||
{context_content}
|
||||
---------------------------------------------
|
||||
|
||||
--- REFERENCE CLIENT HOMEPAGE TEXT ---
|
||||
{homepage_text}
|
||||
------------------------------------
|
||||
|
||||
Reference Client URL: "{reference_url}"
|
||||
|
||||
Task: Create a "Digital Trace Strategy" to identify high-potential leads based on the Strategic Context and the **factual content of the Reference Client Homepage Text**.
|
||||
|
||||
1. ANALYZE the uploaded context (Offer, Personas, Pain Points).
|
||||
2. EXTRACT a 1-sentence summary of what is being sold ("summaryOfOffer") from the Strategic Context.
|
||||
3. DEFINE an Ideal Customer Profile (ICP) derived from the "Target Groups" in the context and what you learned from the Reference Client's homepage.
|
||||
4. **CRITICAL**: Identify 3-5 specific "Digital Signals" (Traces) that are **ACTUALLY VISIBLE and demonstrable from the provided Homepage Text** that indicate a match for the Pain Points/Needs defined in the context.
|
||||
- Use the "Pain Points" and "Offer" from the Strategic Context to derive these signals.
|
||||
- Signals MUST be directly supported by evidence from the "REFERENCE CLIENT HOMEPAGE TEXT". Do not invent signals that are not verifiable from the text.
|
||||
- Example: If the context mentions "Pain: High return rates", and the homepage text mentions "easy returns within 14 days", a Signal could be "Mentions detailed return policy".
|
||||
|
||||
OUTPUT LANGUAGE: German (Deutsch) for all text fields.
|
||||
|
||||
STRICTLY output only a valid JSON object matching this format. DO NOT include any additional text or markdown code blocks (e.g., ```json```).
|
||||
{{
|
||||
"summaryOfOffer": "<Short 1-sentence summary of the product/service>",
|
||||
"idealCustomerProfile": "<Detailed ICP based on context and homepage analysis>",
|
||||
"signals": [
|
||||
{{
|
||||
"id": "sig_1",
|
||||
"name": "<Short Name (e.g. 'Tech Stack')>",
|
||||
"description": "<What specifically to look for? (e.g. 'Look for Shopify in source code')>",
|
||||
"targetPageKeywords": ["homepage"]
|
||||
}}
|
||||
]
|
||||
}}
|
||||
"""
|
||||
|
||||
# Payload für die REST-API erstellen (generationConfig ohne response_mime_type)
|
||||
payload = {
|
||||
"contents": [
|
||||
{
|
||||
"parts": [
|
||||
{
|
||||
"text": prompt
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
logger.debug(f"Gesamter Prompt, gesendet an Gemini API:\n{prompt}")
|
||||
logger.debug(f"Payload für Gemini API: {json.dumps(payload, indent=2)}")
|
||||
|
||||
try:
|
||||
logger.info("Sende Anfrage an Gemini API...")
|
||||
response = requests.post(GEMINI_API_URL, json=payload, headers={'Content-Type': 'application/json'})
|
||||
response.raise_for_status() # Löst einen Fehler für HTTP-Statuscodes 4xx/5xx aus
|
||||
logger.info(f"Gemini API-Antwort erhalten (Status: {response.status_code}).")
|
||||
|
||||
response_data = response.json()
|
||||
logger.debug(f"Rohe API-Antwort (JSON): {json.dumps(response_data, indent=2)}")
|
||||
|
||||
response_text = response_data['candidates'][0]['content']['parts'][0]['text']
|
||||
logger.debug(f"Extrahierter Text aus API-Antwort: {response_text}")
|
||||
|
||||
if response_text.startswith('```json'):
|
||||
logger.debug("JSON-Antwort im Markdown-Code-Block erkannt. Extrahiere reines JSON.")
|
||||
response_text = response_text.split('```json')[1].split('```')[0].strip()
|
||||
|
||||
strategy = json.loads(response_text)
|
||||
logger.info("Strategie erfolgreich als JSON geparst.")
|
||||
logger.info(f"Generierte Strategie: {json.dumps(strategy, indent=2)}")
|
||||
return strategy
|
||||
except requests.exceptions.HTTPError as http_err:
|
||||
error_message = f"HTTP Fehler bei der Gemini API-Anfrage: {http_err}"
|
||||
logger.error(error_message, exc_info=True)
|
||||
return {"error": error_message, "response_text": response.text}
|
||||
except Exception as e:
|
||||
error_message = f"Fehler bei der Gemini API-Anfrage oder beim Parsen der Antwort: {e}"
|
||||
logger.error(error_message, exc_info=True)
|
||||
raw_response_text = ""
|
||||
try:
|
||||
raw_response_text = response.text
|
||||
except:
|
||||
pass
|
||||
return {"error": error_message, "response_text": raw_response_text}
|
||||
|
||||
# Haupt-CLI-Logik
|
||||
def main():
|
||||
# setup_orchestrator_logging() # Logging wird direkt beim Import konfiguriert
|
||||
logger.info("Starte Market Intelligence Backend Orchestrator.")
|
||||
|
||||
parser = argparse.ArgumentParser(description="Market Intelligence Backend Orchestrator.")
|
||||
parser.add_argument("--mode", required=True, help="Der auszuführende Modus (z.B. generate_strategy).")
|
||||
parser.add_argument("--reference_url", help="Die URL des Referenzkunden.")
|
||||
parser.add_argument("--context_file", help="Pfad zur Datei mit dem Strategie-Dokument.")
|
||||
|
||||
args = parser.parse_args()
|
||||
logger.info(f"Modus: {args.mode}")
|
||||
|
||||
context_content = ""
|
||||
if args.context_file:
|
||||
try:
|
||||
with open(args.context_file, "r") as f:
|
||||
context_content = f.read()
|
||||
logger.info(f"Kontext-Datei {args.context_file} erfolgreich gelesen.")
|
||||
except FileNotFoundError:
|
||||
logger.critical(f"Kontext-Datei nicht gefunden: {args.context_file}")
|
||||
print(json.dumps({"error": f"Context file not found: {args.context_file}"}))
|
||||
return
|
||||
|
||||
if args.mode == "generate_strategy":
|
||||
if not args.reference_url or not args.context_file:
|
||||
logger.error("Für den Modus 'generate_strategy' sind --reference_url und --context_file erforderlich.")
|
||||
print(json.dumps({"error": "Für den Modus 'generate_strategy' sind --reference_url und --context_file erforderlich."}))
|
||||
return
|
||||
|
||||
result = generate_search_strategy(args.reference_url, context_content)
|
||||
print(json.dumps(result, indent=2))
|
||||
else:
|
||||
logger.error(f"Unbekannter Modus: {args.mode}")
|
||||
print(json.dumps({"error": f"Unbekannter Modus: {args.mode}"}))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user