Restore v1.7.9 Parsing Logic

- FIX: `evaluate_branche_chatgpt` in `helpers.py` verwendet nun wieder die robuste Parsing- und Fallback-Logik aus der stabilen Version v1.7.9. - Dies stellt sicher, dass ChatGPT-Antworten zuverlässiger interpretiert werden.
2025-07-01 14:17:28 +00:00
parent dd2cd118f5
commit 67dc44384c
1 changed files with 65 additions and 129 deletions
--- a/helpers.py
+++ b/helpers.py
@@ -894,180 +894,116 @@ def summarize_batch_openai(tasks_data):
 # 9. OPENAI API WRAPPER (BRANCH EVALUATION)
 # ==============================================================================
@retry_on_failure
@retry_on_failure
 def evaluate_branche_chatgpt(crm_branche, beschreibung, wiki_branche, wiki_kategorien, website_summary):
    """
-    Bewertet die Branche eines Unternehmens anhand verschiedener textueller Inputs
+    Bewertet die Branche eines Unternehmens.
-    mittels OpenAI und gibt ein strukturiertes Dictionary zurück.
+    NUTZT DIE BEWÄHRTE PARSING-LOGIK AUS v1.7.9.
    """
    logger = logging.getLogger(__name__)
    from config import TARGET_SCHEMA_STRING, FOCUS_BRANCHES_PROMPT_PART, ALLOWED_TARGET_BRANCHES
    # Lazy Loading des Branchenschemas
    if not ALLOWED_TARGET_BRANCHES:
        logger.warning("Branchenschema nicht geladen. Versuche es jetzt zu laden...")
        if not initialize_target_schema():
-            logger.critical("FEHLER in evaluate_branche_chatgpt: Ziel-Branchenschema konnte auch bei Bedarf nicht geladen werden. Breche Evaluation ab.")
+            logger.critical("FEHLER in evaluate_branche_chatgpt: Schema konnte nicht geladen werden.")
-            return {
+            return {"branch": "FEHLER - SCHEMA FEHLT", "confidence": "N/A", "consistency": "error_schema_missing", "justification": "Fehler: Schema nicht geladen."}
                "branch": "FEHLER - SCHEMA FEHLT",
                "confidence": "N/A",
                "consistency": "error_schema_missing",
                "justification": "Fehler: Ziel-Branchenschema konnte nicht geladen werden."
            }
    allowed_branches_lookup = {b.lower(): b for b in ALLOWED_TARGET_BRANCHES}
    prompt_parts = [TARGET_SCHEMA_STRING, FOCUS_BRANCHES_PROMPT_PART]
    prompt_parts.append("\nOrdne das Unternehmen anhand folgender Angaben exakt einer Branche des Ziel-Branchenschemas (Kurzformen) zu.")
    prompt_parts.append("Gib zusätzlich eine Konfidenz für deine Branchenwahl an (Hoch, Mittel oder Niedrig).")
-
+    
-    if crm_branche and str(crm_branche).strip() and str(crm_branche).strip().lower() != "k.a.":
+    # --- Prompt-Zusammenstellung (unverändert) ---
-        prompt_parts.append(f"- CRM-Branche (Referenz): {str(crm_branche).strip()}")
+    if crm_branche and str(crm_branche).strip() and str(crm_branche).strip().lower() != "k.a.": prompt_parts.append(f"- CRM-Branche (Referenz): {str(crm_branche).strip()}")
    if wiki_branche and str(wiki_branche).strip() and str(wiki_branche).strip().lower() != "k.a.":
-        if beschreibung and str(beschreibung).strip() and str(beschreibung).strip().lower() != "k.a.":
+        if beschreibung and str(beschreibung).strip() and str(beschreibung).strip().lower() != "k.a.": prompt_parts.append(f"- Beschreibung (CRM): {str(beschreibung).strip()[:500]}...")
-            prompt_parts.append(f"- Beschreibung (CRM): {str(beschreibung).strip()[:500]}...")
+        if website_summary and str(website_summary).strip() and str(website_summary).strip().lower() != "k.a." and not str(website_summary).strip().startswith("k.A. (Fehler"): prompt_parts.append(f"- Website-Zusammenfassung: {str(website_summary).strip()[:500]}...")
        if website_summary and str(website_summary).strip() and str(website_summary).strip().lower() != "k.a." and not str(website_summary).strip().startswith("k.A. (Fehler"):
            prompt_parts.append(f"- Website-Zusammenfassung: {str(website_summary).strip()[:500]}...")
        prompt_parts.append(f"- Wikipedia-Branche: {str(wiki_branche).strip()[:300]}...")
-        if wiki_kategorien and str(wiki_kategorien).strip() and str(wiki_kategorien).strip().lower() != "k.a.":
+        if wiki_kategorien and str(wiki_kategorien).strip() and str(wiki_kategorien).strip().lower() != "k.a.": prompt_parts.append(f"- Wikipedia-Kategorien: {str(wiki_kategorien).strip()[:500]}...")
            prompt_parts.append(f"- Wikipedia-Kategorien: {str(wiki_kategorien).strip()[:500]}...")
    else:
-        logger.debug("evaluate_branche_chatgpt: Keine validen Wiki-Daten, nutze Website-Zusammenfassung als primäre Beschreibung (falls vorhanden).")
+        logger.debug("evaluate_branche_chatgpt: Keine validen Wiki-Daten, nutze Website-Zusammenfassung als primäre Beschreibung.")
-        if website_summary and str(website_summary).strip() and str(website_summary).strip().lower() != "k.a." and not str(website_summary).strip().startswith("k.A. (Fehler"):
+        if website_summary and str(website_summary).strip() and str(website_summary).strip().lower() != "k.a." and not str(website_summary).strip().startswith("k.A. (Fehler"): prompt_parts.append(f"- Website-Zusammenfassung (als Hauptbeschreibung): {str(website_summary).strip()[:800]}...")
-            prompt_parts.append(f"- Website-Zusammenfassung (als Hauptbeschreibung): {str(website_summary).strip()[:800]}...")
+        elif beschreibung and str(beschreibung).strip() and str(beschreibung).strip().lower() != "k.a.": prompt_parts.append(f"- Beschreibung (CRM, als Hauptbeschreibung): {str(beschreibung).strip()[:800]}...")
        elif beschreibung and str(beschreibung).strip() and str(beschreibung).strip().lower() != "k.a.":
             prompt_parts.append(f"- Beschreibung (CRM, als Hauptbeschreibung): {str(beschreibung).strip()[:800]}...")
    if len(prompt_parts) < (3 + (1 if FOCUS_BRANCHES_PROMPT_PART else 0)):
        logger.warning("Warnung in evaluate_branche_chatgpt: Zu wenige Informationen für Branchenevaluierung.")
        crm_short_branch_for_fallback = "k.A."
        if crm_branche and isinstance(crm_branche, str) and crm_branche.strip().lower() != "k.a.":
            crm_short_branch_for_fallback = crm_branche.strip()
        return {
            "branch": crm_short_branch_for_fallback if crm_short_branch_for_fallback.lower() != "k.a." else "FEHLER",
            "confidence": "N/A",
            "consistency": "error_no_info",
            "justification": "Fehler: Zu wenige Informationen fuer eine Einschaetzung"
        }
    prompt = "\n".join(filter(None, prompt_parts))
-
+    
    chat_response = None
    try:
        chat_response = call_openai_chat(prompt, temperature=0.0)
-        if not chat_response:
+        if not chat_response: raise APIError("Keine Antwort von OpenAI erhalten.")
             raise APIError("Keine Antwort von OpenAI erhalten fuer Branchenevaluation.")
    except Exception as e:
        logger.error(f"Endgueltiger FEHLER beim OpenAI-Aufruf fuer Branchenevaluation: {e}")
-        crm_short_branch_for_fallback = "k.A."
+        return {"branch": "FEHLER API", "confidence": "N/A", "consistency": "error_api_failed", "justification": f"Fehler API: {str(e)[:100]}"}
        if crm_branche and isinstance(crm_branche, str) and crm_branche.strip().lower() != "k.a.":
            crm_short_branch_for_fallback = crm_branche.strip()
        return {
            "branch": crm_short_branch_for_fallback if crm_short_branch_for_fallback.lower() != "k.a." else "FEHLER API",
            "confidence": "N/A",
            "consistency": "error_api_failed",
            "justification": f"Fehler API: {str(e)[:100]}"
        }
    # --- v1.7.9 PARSING LOGIK WIEDERHERGESTELLT ---
    lines = chat_response.strip().split("\n")
-    result = {
+    result = {"confidence": "N/A", "justification": ""}
        "branch": None,
        "confidence": "N/A",
        "consistency": None,
        "justification": ""
    }
    suggested_branch = ""
    parsed_branch = False
    parsed_confidence = False
    for line in lines:
-        line_lower = line.lower().strip()
+        line_lower = line.lower()
-        # Sucht nach "branche:", "branchenzuordnung:", "branchenwahl:" etc. in der Zeile
+        if line_lower.startswith("branche:"):
-        if "branche:" in line_lower or "branchenzuordnung:" in line_lower or "branchenwahl:" in line_lower:
+            suggested_branch = line.split(":", 1)[1].strip()
            suggested_branch = line.split(":", 1)[1].strip().strip('"\'')
            parsed_branch = True
-        elif line_lower.startswith("konfidenz:"):
+            break # Wichtig: Schleife beenden, sobald der Treffer da ist
-            confidence_text = line.split(":", 1)[1].strip().strip('"\'')
+    
-            valid_confidences = ["hoch", "mittel", "niedrig"]
+    # Fallback, wenn "Branche:" nicht gefunden wurde (Logik aus v1.7.9)
-            if confidence_text.lower() in valid_confidences:
+    if not parsed_branch and lines:
-                result["confidence"] = confidence_text.capitalize()
+        suggested_branch = lines[0].strip() # Nimm die erste Zeile als Vorschlag
-            else:
+        logger.warning(f"Konnte 'Branche:' nicht finden. Interpretiere erste Zeile als Vorschlag: '{suggested_branch}'")
-                result["confidence"] = "N/A (Ungueltig)"
+
-            parsed_confidence = True
+    # Extrahiere Konfidenz und Begründung separat
    for line in lines:
        line_lower = line.lower()
        if line_lower.startswith("konfidenz:"):
            result["confidence"] = line.split(":", 1)[1].strip()
        elif line_lower.startswith("begruendung:"):
            result["justification"] = line.split(":", 1)[1].strip()
-    if not parsed_branch:
+    if not suggested_branch:
-        # Fallback, wenn das Schlüsselwort nicht gefunden wurde, aber die Antwort nur aus einem Wort besteht
+        logger.error(f"Fehler: Konnte keine Branche aus der Antwort extrahieren: {chat_response[:200]}")
-        if len(lines) == 1 and len(lines[0].split()) < 4:
+        return {"branch": "FEHLER PARSING", "confidence": "N/A", "consistency": "error_parsing", "justification": f"Antwort leer oder unklar: {chat_response[:100]}"}
            suggested_branch = lines[0].strip().strip('"\'')
            logger.warning(f"Konnte 'Branche:' nicht finden, nehme die ganze Zeile als Branchenvorschlag: '{suggested_branch}'")
        else:
            logger.error(f"Fehler: Konnte 'Branche:' nicht aus Antwort parsen: {chat_response[:200]}...")
            return {"branch": "FEHLER PARSING", "confidence": "N/A", "consistency": "error_parsing", "justification": f"Antwortformat unerwartet: {chat_response[:100]}"}
-    # Validierung und intelligenter Fallback
+    # --- Validierung und intelligenter Fallback (leicht angepasst aus v1.9.1) ---
    final_branch = None
    suggested_branch_lower = suggested_branch.lower()
    # 1. Exakter Match
    if suggested_branch_lower in allowed_branches_lookup:
        final_branch = allowed_branches_lookup[suggested_branch_lower]
-        result["consistency"] = "pending_comparison"
+        logger.debug(f"KI-Vorschlag '{suggested_branch}' ist ein exakter Match.")
    else:
-        # 2. Substring-Match für den Vorschlag
+        # Substring-Match für KI-Vorschlag
-        best_suggestion_match = None
+        best_suggestion_match = next((val for key, val in allowed_branches_lookup.items() if suggested_branch_lower in key), None)
        for allowed_key, allowed_value in allowed_branches_lookup.items():
            if suggested_branch_lower in allowed_key:
                best_suggestion_match = allowed_value
                break
        if best_suggestion_match:
            final_branch = best_suggestion_match
-            result["consistency"] = "pending_comparison"
+            logger.info(f"KI-Vorschlag '{suggested_branch}' per Substring zu '{final_branch}' gemappt.")
-            result["justification"] = f"Info: KI-Vorschlag '{suggested_branch}' zu '{final_branch}' gemappt. " + result.get("justification", "")
+    
    if final_branch:
        result["branch"] = final_branch
        # Konsistenzprüfung (ok/X)
        if final_branch.lower() == crm_branche.strip().lower():
            result["consistency"] = "ok"
        else:
-            # 3. Fallback auf CRM-Branche mit Substring-Match
+            result["consistency"] = "X"
-            logger.debug(f"Vorschlag '{suggested_branch}' nicht im Schema. Prüfe Fallback auf CRM-Branche...")
+    else:
-            crm_short_branch = crm_branche.strip()
+        # Fallback auf CRM-Branche
-            crm_short_branch_lower = crm_short_branch.lower()
+        logger.debug(f"Vorschlag '{suggested_branch}' nicht im Schema. Prüfe Fallback auf CRM-Branche...")
-            best_crm_fallback = None
+        crm_short_branch_lower = crm_branche.strip().lower()
-            if crm_short_branch_lower and crm_short_branch_lower != 'k.a.':
+        best_crm_fallback = next((val for key, val in allowed_branches_lookup.items() if crm_short_branch_lower in key and crm_short_branch_lower), None)
-                for allowed_key, allowed_value in allowed_branches_lookup.items():
+        
-                    if crm_short_branch_lower in allowed_key:
+        if best_crm_fallback:
-                        best_crm_fallback = allowed_value
+            result["branch"] = best_crm_fallback
-                        break
+            result["consistency"] = "fallback_crm_substring"
-            
+            result["justification"] = f"Fallback: KI-Vorschlag ungültig. CRM-Branche '{crm_branche}' passt zu Schema '{best_crm_fallback}'."
-            if best_crm_fallback:
+            result["confidence"] = "N/A (Fallback)"
-                final_branch = best_crm_fallback
+        else:
-                result["consistency"] = "fallback_crm_substring"
+            result["branch"] = "FEHLER - UNGUELTIGE ZUWEISUNG"
-                result["justification"] = f"Fallback: KI-Vorschlag '{suggested_branch}' ungültig. CRM-Branche '{crm_short_branch}' passt zu '{final_branch}'."
+            result["consistency"] = "fallback_invalid"
-                result["confidence"] = "N/A (Fallback)"
+            result["justification"] = f"Fehler: Weder KI-Vorschlag ('{suggested_branch}') noch CRM-Branche ('{crm_branche}') passen zum Schema."
-            else:
+            result["confidence"] = "N/A (Fehler)"
                final_branch = "FEHLER - UNGUELTIGE ZUWEISUNG"
                result["consistency"] = "fallback_invalid"
                result["justification"] = f"Fehler: Weder KI-Vorschlag ('{suggested_branch}') noch CRM-Branche ('{crm_short_branch}') passen zum Schema."
                result["confidence"] = "N/A (Fehler)"
-    result["branch"] = final_branch if final_branch else "FEHLER"
+    logger.debug(f"Finale Branch-Evaluation: {result}")
    crm_branch_to_compare = crm_branche.strip() if crm_branche and isinstance(crm_branche, str) else "k.A."
    if result["consistency"] == "pending_comparison" and result["branch"] != "FEHLER" and not result["branch"].startswith("FEHLER"):
         if result["branch"].lower() == crm_branch_to_compare.lower():
             result["consistency"] = "ok"
         else:
             result["consistency"] = "X"
    if result["consistency"] == "pending_comparison":
        result["consistency"] = "error_comparison_failed"
    elif result["consistency"] is None:
        result["consistency"] = "error_unknown_state"
    logger.debug(f"Finale Branch-Evaluation Ergebnis: Branch='{result.get('branch')}', Confidence='{result.get('confidence')}', Consistency='{result.get('consistency')}', Justification='{result.get('justification', '')[:100]}...'")
    return result
 # ==============================================================================