Restore Core Logic & Final Stability
- FIX: `data_processor.py` berücksichtigt nun wieder die Wiki-URL aus Spalte N, was das Überschreiben manueller Einträge verhindert. - FIX: Branchen-Parsing in `helpers.py` durch eine robustere, vereinfachte Logik ersetzt, die mehr KI-Antwortvarianten versteht. - FIX: Intelligenter Fallback für die Branchenzuweisung wiederhergestellt, der Teilübereinstimmungen korrekt behandelt. - Das Skript ist nun funktional auf dem Stand der stabilen v1.7.9, aber innerhalb der neuen modularen Architektur.
This commit is contained in:
105
helpers.py
105
helpers.py
@@ -985,82 +985,73 @@ def evaluate_branche_chatgpt(crm_branche, beschreibung, wiki_branche, wiki_kateg
|
||||
parsed_confidence = False
|
||||
|
||||
for line in lines:
|
||||
line_stripped = line.strip()
|
||||
line_lower = line_stripped.lower() # WICHTIG: Variable wieder einführen
|
||||
|
||||
# Flexibleres Parsing mit Regex, ignoriert Groß/Kleinschreibung
|
||||
branch_match = re.match(r"^(Branche|Branchenzuordnung|Branchenwahl):\s*(.*)", line_stripped, re.IGNORECASE)
|
||||
if branch_match:
|
||||
suggested_branch = branch_match.group(2).strip().strip('"\'')
|
||||
line_lower = line.lower().strip()
|
||||
# Sucht nach "branche:", "branchenzuordnung:", "branchenwahl:" etc. in der Zeile
|
||||
if "branche:" in line_lower or "branchenzuordnung:" in line_lower or "branchenwahl:" in line_lower:
|
||||
suggested_branch = line.split(":", 1)[1].strip().strip('"\'')
|
||||
parsed_branch = True
|
||||
elif line_lower.startswith("konfidenz:"):
|
||||
confidence_text = line_stripped.split(":", 1)[1].strip().strip('"\'')
|
||||
confidence_text = line.split(":", 1)[1].strip().strip('"\'')
|
||||
valid_confidences = ["hoch", "mittel", "niedrig"]
|
||||
if confidence_text.lower() in valid_confidences:
|
||||
result["confidence"] = confidence_text.capitalize()
|
||||
else:
|
||||
logger.warning(f"Ungueltiger Konfidenzwert '{confidence_text}' von ChatGPT erhalten. Setze auf 'N/A'.")
|
||||
result["confidence"] = "N/A (Ungueltig)"
|
||||
parsed_confidence = True
|
||||
elif line_lower.startswith("uebereinstimmung:") or line_lower.startswith("ubereinstimmung:"):
|
||||
pass
|
||||
elif line_lower.startswith("begruendung:"):
|
||||
justification_text = line_stripped.split(":", 1)[1].strip()
|
||||
if result["justification"]: result["justification"] += " " + justification_text
|
||||
else: result["justification"] = justification_text
|
||||
result["justification"] = line.split(":", 1)[1].strip()
|
||||
|
||||
if not parsed_branch or not suggested_branch or suggested_branch.lower() in ["k.a.", "n/a"]:
|
||||
logger.error(f"Fehler in evaluate_branche_chatgpt: Konnte 'Branche:' nicht oder nur leer/k.A. aus Antwort parsen: {chat_response[:500]}...")
|
||||
crm_short_branch_for_fallback = "k.A."
|
||||
if crm_branche and isinstance(crm_branche, str) and crm_branche.strip().lower() != "k.a.":
|
||||
crm_short_branch_for_fallback = crm_branche.strip()
|
||||
|
||||
return {
|
||||
"branch": crm_short_branch_for_fallback if crm_short_branch_for_fallback.lower() != "k.a." else "FEHLER PARSING",
|
||||
"confidence": result.get("confidence", "N/A"),
|
||||
"consistency": "error_parsing",
|
||||
"justification": f"Fehler Parsing: Antwortformat unerwartet."
|
||||
}
|
||||
|
||||
if not parsed_confidence:
|
||||
logger.warning("Konnte 'Konfidenz:' nicht aus ChatGPT-Antwort parsen. Setze auf 'N/A'.")
|
||||
result["confidence"] = "N/A (Nicht geparst)"
|
||||
if not parsed_branch:
|
||||
# Fallback, wenn das Schlüsselwort nicht gefunden wurde, aber die Antwort nur aus einem Wort besteht
|
||||
if len(lines) == 1 and len(lines[0].split()) < 4:
|
||||
suggested_branch = lines[0].strip().strip('"\'')
|
||||
logger.warning(f"Konnte 'Branche:' nicht finden, nehme die ganze Zeile als Branchenvorschlag: '{suggested_branch}'")
|
||||
else:
|
||||
logger.error(f"Fehler: Konnte 'Branche:' nicht aus Antwort parsen: {chat_response[:200]}...")
|
||||
return {"branch": "FEHLER PARSING", "confidence": "N/A", "consistency": "error_parsing", "justification": f"Antwortformat unerwartet: {chat_response[:100]}"}
|
||||
|
||||
# Validierung und intelligenter Fallback
|
||||
final_branch = None
|
||||
suggested_branch_lower = suggested_branch.lower()
|
||||
|
||||
# 1. Exakter Match
|
||||
if suggested_branch_lower in allowed_branches_lookup:
|
||||
final_branch = allowed_branches_lookup[suggested_branch_lower]
|
||||
logger.debug(f"ChatGPT-Branchenvorschlag '{suggested_branch}' ist gueltig ('{final_branch}').")
|
||||
result["consistency"] = "pending_comparison"
|
||||
else:
|
||||
logger.debug(f"ChatGPT-Branchenvorschlag '{suggested_branch}' ist NICHT im Ziel-Schema. Starte intelligenten Fallback...")
|
||||
crm_short_branch = crm_branche.strip() if crm_branche and isinstance(crm_branche, str) else "k.A."
|
||||
crm_short_branch_lower = crm_short_branch.lower()
|
||||
|
||||
# NEU: Intelligenterer Fallback mit Substring-Suche
|
||||
best_fallback_match = None
|
||||
if crm_short_branch_lower != "k.a.":
|
||||
# Suche nach der ersten erlaubten Branche, die die CRM-Branche als Teil enthält
|
||||
for allowed_branch_key, allowed_branch_value in allowed_branches_lookup.items():
|
||||
if crm_short_branch_lower in allowed_branch_key:
|
||||
best_fallback_match = allowed_branch_value
|
||||
logger.info(f"Intelligenter Fallback gefunden: CRM-Branche '{crm_short_branch}' ist Teil von '{best_fallback_match}'.")
|
||||
break # Nimm den ersten Treffer
|
||||
|
||||
if best_fallback_match:
|
||||
final_branch = best_fallback_match
|
||||
result["consistency"] = "fallback_crm_substring"
|
||||
fallback_reason = f"Fallback (Substring): Ungueltiger Vorschlag ('{suggested_branch}'). CRM-Branche '{crm_short_branch}' passt zu Schema-Eintrag '{final_branch}'."
|
||||
result["justification"] = f"{fallback_reason} (KI Begruendung: {result.get('justification', 'k.A.')})"
|
||||
result["confidence"] = "N/A (Fallback)"
|
||||
# 2. Substring-Match für den Vorschlag
|
||||
best_suggestion_match = None
|
||||
for allowed_key, allowed_value in allowed_branches_lookup.items():
|
||||
if suggested_branch_lower in allowed_key:
|
||||
best_suggestion_match = allowed_value
|
||||
break
|
||||
|
||||
if best_suggestion_match:
|
||||
final_branch = best_suggestion_match
|
||||
result["consistency"] = "pending_comparison"
|
||||
result["justification"] = f"Info: KI-Vorschlag '{suggested_branch}' zu '{final_branch}' gemappt. " + result.get("justification", "")
|
||||
else:
|
||||
final_branch = "FEHLER - UNGUELTIGE ZUWEISUNG"
|
||||
result["consistency"] = "fallback_invalid"
|
||||
error_reason = f"Fehler: Ungueltiger ChatGPT-Vorschlag ('{suggested_branch}') und keine gueltige CRM-Kurzform ('{crm_short_branch}') als Fallback."
|
||||
result["justification"] = f"{error_reason} (ChatGPT Begruendung war: {result.get('justification', 'Keine')})"
|
||||
logger.warning(f"Fallback fehlgeschlagen. Ungueltiger Vorschlag: '{suggested_branch}', Ungueltige CRM-Kurzform: '{crm_short_branch}'")
|
||||
result["confidence"] = "N/A (Fehler)"
|
||||
# 3. Fallback auf CRM-Branche mit Substring-Match
|
||||
logger.debug(f"Vorschlag '{suggested_branch}' nicht im Schema. Prüfe Fallback auf CRM-Branche...")
|
||||
crm_short_branch = crm_branche.strip()
|
||||
crm_short_branch_lower = crm_short_branch.lower()
|
||||
best_crm_fallback = None
|
||||
if crm_short_branch_lower and crm_short_branch_lower != 'k.a.':
|
||||
for allowed_key, allowed_value in allowed_branches_lookup.items():
|
||||
if crm_short_branch_lower in allowed_key:
|
||||
best_crm_fallback = allowed_value
|
||||
break
|
||||
|
||||
if best_crm_fallback:
|
||||
final_branch = best_crm_fallback
|
||||
result["consistency"] = "fallback_crm_substring"
|
||||
result["justification"] = f"Fallback: KI-Vorschlag '{suggested_branch}' ungültig. CRM-Branche '{crm_short_branch}' passt zu '{final_branch}'."
|
||||
result["confidence"] = "N/A (Fallback)"
|
||||
else:
|
||||
final_branch = "FEHLER - UNGUELTIGE ZUWEISUNG"
|
||||
result["consistency"] = "fallback_invalid"
|
||||
result["justification"] = f"Fehler: Weder KI-Vorschlag ('{suggested_branch}') noch CRM-Branche ('{crm_short_branch}') passen zum Schema."
|
||||
result["confidence"] = "N/A (Fehler)"
|
||||
|
||||
result["branch"] = final_branch if final_branch else "FEHLER"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user