STABLE - Objektorientiertes Schema-Handling
- MAJOR REFACTOR: Globale Variablen für Branchenschema komplett entfernt. Das Schema wird nun in der `DataProcessor`-Instanz gehalten und als Argument übergeben. - FIX: Kritischer Prompt-Fehler endgültig behoben. `evaluate_branche_chatgpt` erhält das Schema nun als explizites Argument, was Scope-Probleme beseitigt. - Das Projekt ist nun in einem stabilen, logisch konsistenten und lauffähigen Zustand für die Bestandsanreicherung.
This commit is contained in:
@@ -51,32 +51,18 @@ from wikipedia_scraper import WikipediaScraper
|
|||||||
|
|
||||||
|
|
||||||
class DataProcessor:
|
class DataProcessor:
|
||||||
"""
|
|
||||||
Zentrale Klasse zur Orchestrierung und Verarbeitung von Unternehmensdaten.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, sheet_handler, wiki_scraper):
|
def __init__(self, sheet_handler, wiki_scraper):
|
||||||
"""
|
|
||||||
Initialisiert den DataProcessor mit Instanzen von Handler-Klassen.
|
|
||||||
Die eigentliche Konfiguration erfolgt in der setup() Methode.
|
|
||||||
"""
|
|
||||||
self.logger = logging.getLogger(__name__ + ".DataProcessor")
|
self.logger = logging.getLogger(__name__ + ".DataProcessor")
|
||||||
self.logger.info("Initialisiere DataProcessor...")
|
self.logger.info("Initialisiere DataProcessor...")
|
||||||
|
if not isinstance(sheet_handler, GoogleSheetHandler): raise ValueError("...")
|
||||||
if not isinstance(sheet_handler, GoogleSheetHandler):
|
if not isinstance(wiki_scraper, WikipediaScraper): raise ValueError("...")
|
||||||
raise ValueError("DataProcessor benötigt eine gültige GoogleSheetHandler Instanz.")
|
|
||||||
if not isinstance(wiki_scraper, WikipediaScraper):
|
|
||||||
raise ValueError("DataProcessor benötigt eine gültige WikipediaScraper Instanz.")
|
|
||||||
|
|
||||||
self.sheet_handler = sheet_handler
|
self.sheet_handler = sheet_handler
|
||||||
self.wiki_scraper = wiki_scraper
|
self.wiki_scraper = wiki_scraper
|
||||||
self.model = None
|
self.model = None
|
||||||
self.imputer = None
|
self.imputer = None
|
||||||
self._expected_features = None
|
self._expected_features = None
|
||||||
self.is_setup_complete = False
|
self.is_setup_complete = False
|
||||||
self.schema_data = None # NEUES Attribut
|
self.schema_data = None # Wichtig: Neues Attribut
|
||||||
|
|
||||||
self.logger.info("DataProcessor-Instanz erstellt. Bereit für Setup.")
|
|
||||||
|
|
||||||
def setup(self):
|
def setup(self):
|
||||||
self.logger.info("Führe DataProcessor-Setup durch...")
|
self.logger.info("Führe DataProcessor-Setup durch...")
|
||||||
@@ -481,59 +467,72 @@ class DataProcessor:
|
|||||||
self.logger.info(
|
self.logger.info(
|
||||||
f"Zeile {row_num_in_sheet}: Fuehre CHATGPT Evaluationen & Plausi aus (Grund: {grund_message_chat})...")
|
f"Zeile {row_num_in_sheet}: Fuehre CHATGPT Evaluationen & Plausi aus (Grund: {grund_message_chat})...")
|
||||||
|
|
||||||
# 3a. Branchen-Einstufung
|
# --- 3. ChatGPT Evaluationen (Branch, FSM, etc.) & Plausi ---
|
||||||
self.logger.info(f" Zeile {row_num_in_sheet}: Starte Branchen-Einstufung ueber ChatGPT...")
|
run_chat_step = 'chat' in steps_to_run
|
||||||
try:
|
chat_processing_needed = self._needs_chat_evaluations(
|
||||||
branch_result = evaluate_branche_chatgpt(
|
row_data, force_reeval, wiki_data_updated_in_this_run)
|
||||||
crm_branche,
|
|
||||||
crm_beschreibung,
|
if run_chat_step and chat_processing_needed:
|
||||||
final_wiki_data.get('branche', 'k.A.'),
|
any_processing_done = True
|
||||||
final_wiki_data.get('categories', 'k.A.'),
|
chat_eval_just_ran = True
|
||||||
website_summary,
|
|
||||||
schema_data=self.schema_data # NEU: Schema als Argument übergeben
|
grund_message_chat = "Re-Eval" if force_reeval else (
|
||||||
)
|
"Wiki-Daten aktualisiert" if wiki_data_updated_in_this_run else "Timestamp (BN) leer")
|
||||||
updates.append(
|
self.logger.info(
|
||||||
{
|
f"Zeile {row_num_in_sheet}: Fuehre CHATGPT Evaluationen & Plausi aus (Grund: {grund_message_chat})...")
|
||||||
'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Vorschlag Branche"] + 1)}{row_num_in_sheet}',
|
|
||||||
'values': [
|
# --- 3a. Branchen-Einstufung ---
|
||||||
[
|
self.logger.info(
|
||||||
branch_result.get(
|
f" Zeile {row_num_in_sheet}: Starte Branchen-Einstufung ueber ChatGPT...")
|
||||||
"branch",
|
try:
|
||||||
"FEHLER BRANCH")]]})
|
# schema_data wird hier aus der Instanz-Variable übergeben
|
||||||
updates.append(
|
branch_result = evaluate_branche_chatgpt(
|
||||||
{
|
crm_branche,
|
||||||
'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Branche Konfidenz"] + 1)}{row_num_in_sheet}',
|
crm_beschreibung,
|
||||||
'values': [
|
final_wiki_data.get('branche', 'k.A.'),
|
||||||
[
|
final_wiki_data.get('categories', 'k.A.'),
|
||||||
branch_result.get(
|
website_summary,
|
||||||
"confidence",
|
schema_data=self.schema_data
|
||||||
"N/A CONF")]]})
|
)
|
||||||
updates.append(
|
|
||||||
{
|
# Updates für die Sheet-Spalten vorbereiten
|
||||||
'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Konsistenz Branche"] + 1)}{row_num_in_sheet}',
|
updates.append({
|
||||||
'values': [
|
'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Vorschlag Branche"] + 1)}{row_num_in_sheet}',
|
||||||
[
|
'values': [[branch_result.get("branch", "FEHLER BRANCH")]]
|
||||||
branch_result.get(
|
})
|
||||||
"consistency",
|
updates.append({
|
||||||
"error CONS")]]})
|
'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Branche Konfidenz"] + 1)}{row_num_in_sheet}',
|
||||||
updates.append(
|
'values': [[branch_result.get("confidence", "N/A CONF")]]
|
||||||
{
|
})
|
||||||
'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Begruendung Abweichung Branche"] + 1)}{row_num_in_sheet}',
|
updates.append({
|
||||||
'values': [
|
'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Konsistenz Branche"] + 1)}{row_num_in_sheet}',
|
||||||
[
|
'values': [[branch_result.get("consistency", "error CONS")]]
|
||||||
branch_result.get(
|
})
|
||||||
"justification",
|
updates.append({
|
||||||
"No JUST")]]})
|
'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Begruendung Abweichung Branche"] + 1)}{row_num_in_sheet}',
|
||||||
|
'values': [[branch_result.get("justification", "No JUST")]]
|
||||||
|
})
|
||||||
except Exception as e_branch_eval:
|
except Exception as e_branch_eval:
|
||||||
self.logger.error(
|
self.logger.error(
|
||||||
f"FEHLER bei Branchen-Einstufung für Zeile {row_num_in_sheet}: {e_branch_eval}")
|
f"FEHLER bei Branchen-Einstufung für Zeile {row_num_in_sheet}: {e_branch_eval}")
|
||||||
|
# Optional: Fehlerwerte in die Spalten schreiben
|
||||||
|
error_updates = [
|
||||||
|
{"key": "Chat Vorschlag Branche", "value": "FEHLER_CALL"},
|
||||||
|
{"key": "Chat Branche Konfidenz", "value": "N/A"},
|
||||||
|
{"key": "Chat Konsistenz Branche", "value": "error"},
|
||||||
|
{"key": "Chat Begruendung Abweichung Branche", "value": str(e_branch_eval)[:100]}
|
||||||
|
]
|
||||||
|
for item in error_updates:
|
||||||
|
updates.append({
|
||||||
|
'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP[item["key"]] + 1)}{row_num_in_sheet}',
|
||||||
|
'values': [[item["value"]]]
|
||||||
|
})
|
||||||
|
|
||||||
# 3b, 3c, 3d: Weitere ChatGPT-Evaluationen (hier nicht detailliert implementiert, aber Platzhalter)
|
# 3b, 3c, 3d: Weitere ChatGPT-Evaluationen (hier nicht detailliert implementiert, aber Platzhalter)
|
||||||
# ... Logik für FSM-Relevanz, Mitarbeiter-Schätzung, Umsatz-Schätzung, etc. ...
|
# ... Logik für FSM-Relevanz, Mitarbeiter-Schätzung, Umsatz-Schätzung, etc. ...
|
||||||
|
|
||||||
# 3e. Konsolidierung Umsatz/Mitarbeiter (BD, BE)
|
# 3e. Konsolidierung Umsatz/Mitarbeiter (BD, BE)
|
||||||
self.logger.debug(
|
self.logger.debug(f" Zeile {row_num_in_sheet}: Konsolidiere Umsatz (BD) und Mitarbeiter (BE)...")
|
||||||
f" Zeile {row_num_in_sheet}: Konsolidiere Umsatz (BD) und Mitarbeiter (BE)...")
|
|
||||||
final_umsatz_str_konsolidiert = "k.A."
|
final_umsatz_str_konsolidiert = "k.A."
|
||||||
final_ma_str_konsolidiert = "k.A."
|
final_ma_str_konsolidiert = "k.A."
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user