diff --git a/helpers.py b/helpers.py index dc0625d7..89650563 100644 --- a/helpers.py +++ b/helpers.py @@ -985,82 +985,73 @@ def evaluate_branche_chatgpt(crm_branche, beschreibung, wiki_branche, wiki_kateg parsed_confidence = False for line in lines: - line_stripped = line.strip() - line_lower = line_stripped.lower() # WICHTIG: Variable wieder einführen - - # Flexibleres Parsing mit Regex, ignoriert Groß/Kleinschreibung - branch_match = re.match(r"^(Branche|Branchenzuordnung|Branchenwahl):\s*(.*)", line_stripped, re.IGNORECASE) - if branch_match: - suggested_branch = branch_match.group(2).strip().strip('"\'') + line_lower = line.lower().strip() + # Sucht nach "branche:", "branchenzuordnung:", "branchenwahl:" etc. in der Zeile + if "branche:" in line_lower or "branchenzuordnung:" in line_lower or "branchenwahl:" in line_lower: + suggested_branch = line.split(":", 1)[1].strip().strip('"\'') parsed_branch = True elif line_lower.startswith("konfidenz:"): - confidence_text = line_stripped.split(":", 1)[1].strip().strip('"\'') + confidence_text = line.split(":", 1)[1].strip().strip('"\'') valid_confidences = ["hoch", "mittel", "niedrig"] if confidence_text.lower() in valid_confidences: result["confidence"] = confidence_text.capitalize() else: - logger.warning(f"Ungueltiger Konfidenzwert '{confidence_text}' von ChatGPT erhalten. Setze auf 'N/A'.") result["confidence"] = "N/A (Ungueltig)" parsed_confidence = True - elif line_lower.startswith("uebereinstimmung:") or line_lower.startswith("ubereinstimmung:"): - pass elif line_lower.startswith("begruendung:"): - justification_text = line_stripped.split(":", 1)[1].strip() - if result["justification"]: result["justification"] += " " + justification_text - else: result["justification"] = justification_text + result["justification"] = line.split(":", 1)[1].strip() - if not parsed_branch or not suggested_branch or suggested_branch.lower() in ["k.a.", "n/a"]: - logger.error(f"Fehler in evaluate_branche_chatgpt: Konnte 'Branche:' nicht oder nur leer/k.A. aus Antwort parsen: {chat_response[:500]}...") - crm_short_branch_for_fallback = "k.A." - if crm_branche and isinstance(crm_branche, str) and crm_branche.strip().lower() != "k.a.": - crm_short_branch_for_fallback = crm_branche.strip() - - return { - "branch": crm_short_branch_for_fallback if crm_short_branch_for_fallback.lower() != "k.a." else "FEHLER PARSING", - "confidence": result.get("confidence", "N/A"), - "consistency": "error_parsing", - "justification": f"Fehler Parsing: Antwortformat unerwartet." - } - - if not parsed_confidence: - logger.warning("Konnte 'Konfidenz:' nicht aus ChatGPT-Antwort parsen. Setze auf 'N/A'.") - result["confidence"] = "N/A (Nicht geparst)" + if not parsed_branch: + # Fallback, wenn das Schlüsselwort nicht gefunden wurde, aber die Antwort nur aus einem Wort besteht + if len(lines) == 1 and len(lines[0].split()) < 4: + suggested_branch = lines[0].strip().strip('"\'') + logger.warning(f"Konnte 'Branche:' nicht finden, nehme die ganze Zeile als Branchenvorschlag: '{suggested_branch}'") + else: + logger.error(f"Fehler: Konnte 'Branche:' nicht aus Antwort parsen: {chat_response[:200]}...") + return {"branch": "FEHLER PARSING", "confidence": "N/A", "consistency": "error_parsing", "justification": f"Antwortformat unerwartet: {chat_response[:100]}"} + # Validierung und intelligenter Fallback final_branch = None suggested_branch_lower = suggested_branch.lower() + # 1. Exakter Match if suggested_branch_lower in allowed_branches_lookup: final_branch = allowed_branches_lookup[suggested_branch_lower] - logger.debug(f"ChatGPT-Branchenvorschlag '{suggested_branch}' ist gueltig ('{final_branch}').") result["consistency"] = "pending_comparison" else: - logger.debug(f"ChatGPT-Branchenvorschlag '{suggested_branch}' ist NICHT im Ziel-Schema. Starte intelligenten Fallback...") - crm_short_branch = crm_branche.strip() if crm_branche and isinstance(crm_branche, str) else "k.A." - crm_short_branch_lower = crm_short_branch.lower() - - # NEU: Intelligenterer Fallback mit Substring-Suche - best_fallback_match = None - if crm_short_branch_lower != "k.a.": - # Suche nach der ersten erlaubten Branche, die die CRM-Branche als Teil enthält - for allowed_branch_key, allowed_branch_value in allowed_branches_lookup.items(): - if crm_short_branch_lower in allowed_branch_key: - best_fallback_match = allowed_branch_value - logger.info(f"Intelligenter Fallback gefunden: CRM-Branche '{crm_short_branch}' ist Teil von '{best_fallback_match}'.") - break # Nimm den ersten Treffer - - if best_fallback_match: - final_branch = best_fallback_match - result["consistency"] = "fallback_crm_substring" - fallback_reason = f"Fallback (Substring): Ungueltiger Vorschlag ('{suggested_branch}'). CRM-Branche '{crm_short_branch}' passt zu Schema-Eintrag '{final_branch}'." - result["justification"] = f"{fallback_reason} (KI Begruendung: {result.get('justification', 'k.A.')})" - result["confidence"] = "N/A (Fallback)" + # 2. Substring-Match für den Vorschlag + best_suggestion_match = None + for allowed_key, allowed_value in allowed_branches_lookup.items(): + if suggested_branch_lower in allowed_key: + best_suggestion_match = allowed_value + break + + if best_suggestion_match: + final_branch = best_suggestion_match + result["consistency"] = "pending_comparison" + result["justification"] = f"Info: KI-Vorschlag '{suggested_branch}' zu '{final_branch}' gemappt. " + result.get("justification", "") else: - final_branch = "FEHLER - UNGUELTIGE ZUWEISUNG" - result["consistency"] = "fallback_invalid" - error_reason = f"Fehler: Ungueltiger ChatGPT-Vorschlag ('{suggested_branch}') und keine gueltige CRM-Kurzform ('{crm_short_branch}') als Fallback." - result["justification"] = f"{error_reason} (ChatGPT Begruendung war: {result.get('justification', 'Keine')})" - logger.warning(f"Fallback fehlgeschlagen. Ungueltiger Vorschlag: '{suggested_branch}', Ungueltige CRM-Kurzform: '{crm_short_branch}'") - result["confidence"] = "N/A (Fehler)" + # 3. Fallback auf CRM-Branche mit Substring-Match + logger.debug(f"Vorschlag '{suggested_branch}' nicht im Schema. Prüfe Fallback auf CRM-Branche...") + crm_short_branch = crm_branche.strip() + crm_short_branch_lower = crm_short_branch.lower() + best_crm_fallback = None + if crm_short_branch_lower and crm_short_branch_lower != 'k.a.': + for allowed_key, allowed_value in allowed_branches_lookup.items(): + if crm_short_branch_lower in allowed_key: + best_crm_fallback = allowed_value + break + + if best_crm_fallback: + final_branch = best_crm_fallback + result["consistency"] = "fallback_crm_substring" + result["justification"] = f"Fallback: KI-Vorschlag '{suggested_branch}' ungültig. CRM-Branche '{crm_short_branch}' passt zu '{final_branch}'." + result["confidence"] = "N/A (Fallback)" + else: + final_branch = "FEHLER - UNGUELTIGE ZUWEISUNG" + result["consistency"] = "fallback_invalid" + result["justification"] = f"Fehler: Weder KI-Vorschlag ('{suggested_branch}') noch CRM-Branche ('{crm_short_branch}') passen zum Schema." + result["confidence"] = "N/A (Fehler)" result["branch"] = final_branch if final_branch else "FEHLER"