Restore v1.7.9 Parsing Logic

- FIX: `evaluate_branche_chatgpt` in `helpers.py` verwendet nun wieder die robuste Parsing- und Fallback-Logik aus der stabilen Version v1.7.9. - Dies stellt sicher, dass ChatGPT-Antworten zuverlässiger interpretiert werden.
2025-07-01 14:17:28 +00:00
parent d7b9e5bf1f
commit 6ae09359b4
1 changed files with 65 additions and 129 deletions
--- a/helpers.py
+++ b/helpers.py
@@ -894,180 +894,116 @@ def summarize_batch_openai(tasks_data):
 # 9. OPENAI API WRAPPER (BRANCH EVALUATION)
 # ==============================================================================

-@retry_on_failure
@retry_on_failure
 def evaluate_branche_chatgpt(crm_branche, beschreibung, wiki_branche, wiki_kategorien, website_summary):
    """
-    Bewertet die Branche eines Unternehmens anhand verschiedener textueller Inputs
-    mittels OpenAI und gibt ein strukturiertes Dictionary zurück.
+    Bewertet die Branche eines Unternehmens.
+    NUTZT DIE BEWÄHRTE PARSING-LOGIK AUS v1.7.9.
    """
    logger = logging.getLogger(__name__)
-
    from config import TARGET_SCHEMA_STRING, FOCUS_BRANCHES_PROMPT_PART, ALLOWED_TARGET_BRANCHES

-    # Lazy Loading des Branchenschemas
    if not ALLOWED_TARGET_BRANCHES:
        logger.warning("Branchenschema nicht geladen. Versuche es jetzt zu laden...")
        if not initialize_target_schema():
-            logger.critical("FEHLER in evaluate_branche_chatgpt: Ziel-Branchenschema konnte auch bei Bedarf nicht geladen werden. Breche Evaluation ab.")
-            return {
-                "branch": "FEHLER - SCHEMA FEHLT",
-                "confidence": "N/A",
-                "consistency": "error_schema_missing",
-                "justification": "Fehler: Ziel-Branchenschema konnte nicht geladen werden."
-            }
+            logger.critical("FEHLER in evaluate_branche_chatgpt: Schema konnte nicht geladen werden.")
+            return {"branch": "FEHLER - SCHEMA FEHLT", "confidence": "N/A", "consistency": "error_schema_missing", "justification": "Fehler: Schema nicht geladen."}

    allowed_branches_lookup = {b.lower(): b for b in ALLOWED_TARGET_BRANCHES}
-
    prompt_parts = [TARGET_SCHEMA_STRING, FOCUS_BRANCHES_PROMPT_PART]
    prompt_parts.append("\nOrdne das Unternehmen anhand folgender Angaben exakt einer Branche des Ziel-Branchenschemas (Kurzformen) zu.")
    prompt_parts.append("Gib zusätzlich eine Konfidenz für deine Branchenwahl an (Hoch, Mittel oder Niedrig).")
-
-    if crm_branche and str(crm_branche).strip() and str(crm_branche).strip().lower() != "k.a.":
-        prompt_parts.append(f"- CRM-Branche (Referenz): {str(crm_branche).strip()}")
+    
+    # --- Prompt-Zusammenstellung (unverändert) ---
+    if crm_branche and str(crm_branche).strip() and str(crm_branche).strip().lower() != "k.a.": prompt_parts.append(f"- CRM-Branche (Referenz): {str(crm_branche).strip()}")
    if wiki_branche and str(wiki_branche).strip() and str(wiki_branche).strip().lower() != "k.a.":
-        if beschreibung and str(beschreibung).strip() and str(beschreibung).strip().lower() != "k.a.":
-            prompt_parts.append(f"- Beschreibung (CRM): {str(beschreibung).strip()[:500]}...")
-        if website_summary and str(website_summary).strip() and str(website_summary).strip().lower() != "k.a." and not str(website_summary).strip().startswith("k.A. (Fehler"):
-            prompt_parts.append(f"- Website-Zusammenfassung: {str(website_summary).strip()[:500]}...")
+        if beschreibung and str(beschreibung).strip() and str(beschreibung).strip().lower() != "k.a.": prompt_parts.append(f"- Beschreibung (CRM): {str(beschreibung).strip()[:500]}...")
+        if website_summary and str(website_summary).strip() and str(website_summary).strip().lower() != "k.a." and not str(website_summary).strip().startswith("k.A. (Fehler"): prompt_parts.append(f"- Website-Zusammenfassung: {str(website_summary).strip()[:500]}...")
        prompt_parts.append(f"- Wikipedia-Branche: {str(wiki_branche).strip()[:300]}...")
-        if wiki_kategorien and str(wiki_kategorien).strip() and str(wiki_kategorien).strip().lower() != "k.a.":
-            prompt_parts.append(f"- Wikipedia-Kategorien: {str(wiki_kategorien).strip()[:500]}...")
+        if wiki_kategorien and str(wiki_kategorien).strip() and str(wiki_kategorien).strip().lower() != "k.a.": prompt_parts.append(f"- Wikipedia-Kategorien: {str(wiki_kategorien).strip()[:500]}...")
    else:
-        logger.debug("evaluate_branche_chatgpt: Keine validen Wiki-Daten, nutze Website-Zusammenfassung als primäre Beschreibung (falls vorhanden).")
-        if website_summary and str(website_summary).strip() and str(website_summary).strip().lower() != "k.a." and not str(website_summary).strip().startswith("k.A. (Fehler"):
-            prompt_parts.append(f"- Website-Zusammenfassung (als Hauptbeschreibung): {str(website_summary).strip()[:800]}...")
-        elif beschreibung and str(beschreibung).strip() and str(beschreibung).strip().lower() != "k.a.":
-             prompt_parts.append(f"- Beschreibung (CRM, als Hauptbeschreibung): {str(beschreibung).strip()[:800]}...")
-
-    if len(prompt_parts) < (3 + (1 if FOCUS_BRANCHES_PROMPT_PART else 0)):
-        logger.warning("Warnung in evaluate_branche_chatgpt: Zu wenige Informationen für Branchenevaluierung.")
-        crm_short_branch_for_fallback = "k.A."
-        if crm_branche and isinstance(crm_branche, str) and crm_branche.strip().lower() != "k.a.":
-            crm_short_branch_for_fallback = crm_branche.strip()
-
-        return {
-            "branch": crm_short_branch_for_fallback if crm_short_branch_for_fallback.lower() != "k.a." else "FEHLER",
-            "confidence": "N/A",
-            "consistency": "error_no_info",
-            "justification": "Fehler: Zu wenige Informationen fuer eine Einschaetzung"
-        }
+        logger.debug("evaluate_branche_chatgpt: Keine validen Wiki-Daten, nutze Website-Zusammenfassung als primäre Beschreibung.")
+        if website_summary and str(website_summary).strip() and str(website_summary).strip().lower() != "k.a." and not str(website_summary).strip().startswith("k.A. (Fehler"): prompt_parts.append(f"- Website-Zusammenfassung (als Hauptbeschreibung): {str(website_summary).strip()[:800]}...")
+        elif beschreibung and str(beschreibung).strip() and str(beschreibung).strip().lower() != "k.a.": prompt_parts.append(f"- Beschreibung (CRM, als Hauptbeschreibung): {str(beschreibung).strip()[:800]}...")

    prompt = "\n".join(filter(None, prompt_parts))
-
-    chat_response = None
+    
    try:
        chat_response = call_openai_chat(prompt, temperature=0.0)
-        if not chat_response:
-             raise APIError("Keine Antwort von OpenAI erhalten fuer Branchenevaluation.")
+        if not chat_response: raise APIError("Keine Antwort von OpenAI erhalten.")
    except Exception as e:
        logger.error(f"Endgueltiger FEHLER beim OpenAI-Aufruf fuer Branchenevaluation: {e}")
-        crm_short_branch_for_fallback = "k.A."
-        if crm_branche and isinstance(crm_branche, str) and crm_branche.strip().lower() != "k.a.":
-            crm_short_branch_for_fallback = crm_branche.strip()
-
-        return {
-            "branch": crm_short_branch_for_fallback if crm_short_branch_for_fallback.lower() != "k.a." else "FEHLER API",
-            "confidence": "N/A",
-            "consistency": "error_api_failed",
-            "justification": f"Fehler API: {str(e)[:100]}"
-        }
+        return {"branch": "FEHLER API", "confidence": "N/A", "consistency": "error_api_failed", "justification": f"Fehler API: {str(e)[:100]}"}

+    # --- v1.7.9 PARSING LOGIK WIEDERHERGESTELLT ---
    lines = chat_response.strip().split("\n")
-    result = {
-        "branch": None,
-        "confidence": "N/A",
-        "consistency": None,
-        "justification": ""
-    }
+    result = {"confidence": "N/A", "justification": ""}
    suggested_branch = ""
    parsed_branch = False
-    parsed_confidence = False

    for line in lines:
-        line_lower = line.lower().strip()
-        # Sucht nach "branche:", "branchenzuordnung:", "branchenwahl:" etc. in der Zeile
-        if "branche:" in line_lower or "branchenzuordnung:" in line_lower or "branchenwahl:" in line_lower:
-            suggested_branch = line.split(":", 1)[1].strip().strip('"\'')
+        line_lower = line.lower()
+        if line_lower.startswith("branche:"):
+            suggested_branch = line.split(":", 1)[1].strip()
            parsed_branch = True
-        elif line_lower.startswith("konfidenz:"):
-            confidence_text = line.split(":", 1)[1].strip().strip('"\'')
-            valid_confidences = ["hoch", "mittel", "niedrig"]
-            if confidence_text.lower() in valid_confidences:
-                result["confidence"] = confidence_text.capitalize()
-            else:
-                result["confidence"] = "N/A (Ungueltig)"
-            parsed_confidence = True
+            break # Wichtig: Schleife beenden, sobald der Treffer da ist
+    
+    # Fallback, wenn "Branche:" nicht gefunden wurde (Logik aus v1.7.9)
+    if not parsed_branch and lines:
+        suggested_branch = lines[0].strip() # Nimm die erste Zeile als Vorschlag
+        logger.warning(f"Konnte 'Branche:' nicht finden. Interpretiere erste Zeile als Vorschlag: '{suggested_branch}'")
+
+    # Extrahiere Konfidenz und Begründung separat
+    for line in lines:
+        line_lower = line.lower()
+        if line_lower.startswith("konfidenz:"):
+            result["confidence"] = line.split(":", 1)[1].strip()
        elif line_lower.startswith("begruendung:"):
            result["justification"] = line.split(":", 1)[1].strip()

-    if not parsed_branch:
-        # Fallback, wenn das Schlüsselwort nicht gefunden wurde, aber die Antwort nur aus einem Wort besteht
-        if len(lines) == 1 and len(lines[0].split()) < 4:
-            suggested_branch = lines[0].strip().strip('"\'')
-            logger.warning(f"Konnte 'Branche:' nicht finden, nehme die ganze Zeile als Branchenvorschlag: '{suggested_branch}'")
-        else:
-            logger.error(f"Fehler: Konnte 'Branche:' nicht aus Antwort parsen: {chat_response[:200]}...")
-            return {"branch": "FEHLER PARSING", "confidence": "N/A", "consistency": "error_parsing", "justification": f"Antwortformat unerwartet: {chat_response[:100]}"}
+    if not suggested_branch:
+        logger.error(f"Fehler: Konnte keine Branche aus der Antwort extrahieren: {chat_response[:200]}")
+        return {"branch": "FEHLER PARSING", "confidence": "N/A", "consistency": "error_parsing", "justification": f"Antwort leer oder unklar: {chat_response[:100]}"}

-    # Validierung und intelligenter Fallback
+    # --- Validierung und intelligenter Fallback (leicht angepasst aus v1.9.1) ---
    final_branch = None
    suggested_branch_lower = suggested_branch.lower()

-    # 1. Exakter Match
    if suggested_branch_lower in allowed_branches_lookup:
        final_branch = allowed_branches_lookup[suggested_branch_lower]
-        result["consistency"] = "pending_comparison"
+        logger.debug(f"KI-Vorschlag '{suggested_branch}' ist ein exakter Match.")
    else:
-        # 2. Substring-Match für den Vorschlag
-        best_suggestion_match = None
-        for allowed_key, allowed_value in allowed_branches_lookup.items():
-            if suggested_branch_lower in allowed_key:
-                best_suggestion_match = allowed_value
-                break
-        
+        # Substring-Match für KI-Vorschlag
+        best_suggestion_match = next((val for key, val in allowed_branches_lookup.items() if suggested_branch_lower in key), None)
        if best_suggestion_match:
            final_branch = best_suggestion_match
-            result["consistency"] = "pending_comparison"
-            result["justification"] = f"Info: KI-Vorschlag '{suggested_branch}' zu '{final_branch}' gemappt. " + result.get("justification", "")
+            logger.info(f"KI-Vorschlag '{suggested_branch}' per Substring zu '{final_branch}' gemappt.")
+    
+    if final_branch:
+        result["branch"] = final_branch
+        # Konsistenzprüfung (ok/X)
+        if final_branch.lower() == crm_branche.strip().lower():
+            result["consistency"] = "ok"
        else:
-            # 3. Fallback auf CRM-Branche mit Substring-Match
-            logger.debug(f"Vorschlag '{suggested_branch}' nicht im Schema. Prüfe Fallback auf CRM-Branche...")
-            crm_short_branch = crm_branche.strip()
-            crm_short_branch_lower = crm_short_branch.lower()
-            best_crm_fallback = None
-            if crm_short_branch_lower and crm_short_branch_lower != 'k.a.':
-                for allowed_key, allowed_value in allowed_branches_lookup.items():
-                    if crm_short_branch_lower in allowed_key:
-                        best_crm_fallback = allowed_value
-                        break
-            
-            if best_crm_fallback:
-                final_branch = best_crm_fallback
-                result["consistency"] = "fallback_crm_substring"
-                result["justification"] = f"Fallback: KI-Vorschlag '{suggested_branch}' ungültig. CRM-Branche '{crm_short_branch}' passt zu '{final_branch}'."
-                result["confidence"] = "N/A (Fallback)"
-            else:
-                final_branch = "FEHLER - UNGUELTIGE ZUWEISUNG"
-                result["consistency"] = "fallback_invalid"
-                result["justification"] = f"Fehler: Weder KI-Vorschlag ('{suggested_branch}') noch CRM-Branche ('{crm_short_branch}') passen zum Schema."
-                result["confidence"] = "N/A (Fehler)"
+            result["consistency"] = "X"
+    else:
+        # Fallback auf CRM-Branche
+        logger.debug(f"Vorschlag '{suggested_branch}' nicht im Schema. Prüfe Fallback auf CRM-Branche...")
+        crm_short_branch_lower = crm_branche.strip().lower()
+        best_crm_fallback = next((val for key, val in allowed_branches_lookup.items() if crm_short_branch_lower in key and crm_short_branch_lower), None)
+        
+        if best_crm_fallback:
+            result["branch"] = best_crm_fallback
+            result["consistency"] = "fallback_crm_substring"
+            result["justification"] = f"Fallback: KI-Vorschlag ungültig. CRM-Branche '{crm_branche}' passt zu Schema '{best_crm_fallback}'."
+            result["confidence"] = "N/A (Fallback)"
+        else:
+            result["branch"] = "FEHLER - UNGUELTIGE ZUWEISUNG"
+            result["consistency"] = "fallback_invalid"
+            result["justification"] = f"Fehler: Weder KI-Vorschlag ('{suggested_branch}') noch CRM-Branche ('{crm_branche}') passen zum Schema."
+            result["confidence"] = "N/A (Fehler)"

-    result["branch"] = final_branch if final_branch else "FEHLER"
-
-    crm_branch_to_compare = crm_branche.strip() if crm_branche and isinstance(crm_branche, str) else "k.A."
-    if result["consistency"] == "pending_comparison" and result["branch"] != "FEHLER" and not result["branch"].startswith("FEHLER"):
-         if result["branch"].lower() == crm_branch_to_compare.lower():
-             result["consistency"] = "ok"
-         else:
-             result["consistency"] = "X"
-
-    if result["consistency"] == "pending_comparison":
-        result["consistency"] = "error_comparison_failed"
-    elif result["consistency"] is None:
-        result["consistency"] = "error_unknown_state"
-
-    logger.debug(f"Finale Branch-Evaluation Ergebnis: Branch='{result.get('branch')}', Confidence='{result.get('confidence')}', Consistency='{result.get('consistency')}', Justification='{result.get('justification', '')[:100]}...'")
+    logger.debug(f"Finale Branch-Evaluation: {result}")
    return result

 # ==============================================================================