data_processor.py aktualisiert

This commit is contained in:
2025-07-28 13:28:15 +00:00
parent a5d3a50e24
commit e5bbfb0428

View File

@@ -1463,31 +1463,26 @@ class DataProcessor:
self.logger.info(f"FSM-Pitch-Generierung abgeschlossen. {processed_count} Zeilen bearbeitet.") self.logger.info(f"FSM-Pitch-Generierung abgeschlossen. {processed_count} Zeilen bearbeitet.")
def reclassify_all_branches(self, start_sheet_row=None, limit=None, batch_size=20): def reclassify_all_branches(self, start_sheet_row=None, limit=None, batch_size=50):
""" """
Führt für alle relevanten Zeilen eine neue Brancheneinstufung (v2.0) in Batches durch. Führt für alle relevanten Zeilen eine neue Brancheneinstufung (v2.0) in Batches durch.
Nutzt nun auch die externe Branchenbeschreibung.
""" """
self.logger.info(f"Starte Modus 'reclassify_branches' im Batch-Modus (Größe: {batch_size}). Bereich: {start_sheet_row or 'Start'}, Limit: {limit or 'Unbegrenzt'}") self.logger.info(f"Starte Modus 'reclassify_branches' im Batch-Modus (Größe: {batch_size}). Bereich: {start_sheet_row or 'Start'}, Limit: {limit or 'Unbegrenzt'}")
if not self.sheet_handler.load_data(): if not self.sheet_handler.load_data():
return return
# DIESE ZEILEN WAREN DAS PROBLEM -> JETZT KORRIGIERT
all_data = self.sheet_handler.get_all_data_with_headers() all_data = self.sheet_handler.get_all_data_with_headers()
header_rows = self.sheet_handler._header_rows header_rows = self.sheet_handler._header_rows
# Wichtig: Der Start MUSS nach den Header-Zeilen sein
effective_start = max(header_rows + 1, start_sheet_row or 0) effective_start = max(header_rows + 1, start_sheet_row or 0)
tasks = [] tasks = []
# Wir starten die Schleife erst NACH den Header-Zeilen
for i in range(effective_start - 1, len(all_data)): for i in range(effective_start - 1, len(all_data)):
if limit is not None and len(tasks) >= limit: if limit is not None and len(tasks) >= limit:
break break
row_data = all_data[i] row_data = all_data[i]
company_name = self._get_cell_value_safe(row_data, "CRM Name").strip() company_name = self._get_cell_value_safe(row_data, "CRM Name").strip()
# Zusätzlicher Check, um sicherzustellen, dass wir keine Header-Texte verarbeiten
if company_name and "firmennamen" not in company_name.lower(): if company_name and "firmennamen" not in company_name.lower():
tasks.append({'row_num': i + 1, 'data': row_data}) tasks.append({'row_num': i + 1, 'data': row_data})
@@ -1500,51 +1495,44 @@ class DataProcessor:
all_sheet_updates = [] all_sheet_updates = []
now_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") now_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# Verarbeite die Tasks in Batches
for i in range(0, len(tasks), batch_size): for i in range(0, len(tasks), batch_size):
batch_tasks = tasks[i:i + batch_size] batch_tasks = tasks[i:i + batch_size]
self.logger.info(f"Verarbeite Batch {i//batch_size + 1}/{(len(tasks) + batch_size - 1)//batch_size} (Zeilen {batch_tasks[0]['row_num']} bis {batch_tasks[-1]['row_num']})...") self.logger.info(f"Verarbeite Batch {i//batch_size + 1}/{(len(tasks) + batch_size - 1)//batch_size} (Zeilen {batch_tasks[0]['row_num']} bis {batch_tasks[-1]['row_num']})...")
# Bereite die Daten für den Batch-Prompt vor
companies_data_for_prompt = [] companies_data_for_prompt = []
for task in batch_tasks: for task in batch_tasks:
row_data = task['data'] row_data = task['data']
companies_data_for_prompt.append({ companies_data_for_prompt.append({
"row_num": task['row_num'], "row_num": task['row_num'],
"name": self._get_cell_value_safe(row_data, "CRM Name"), "name": self._get_cell_value_safe(row_data, "CRM Name"),
# NEU: Spalte J hinzufügen
"external_branch_desc": self._get_cell_value_safe(row_data, "CRM Beschreibung Branche extern"),
"summary": self._get_cell_value_safe(row_data, "Website Zusammenfassung"), "summary": self._get_cell_value_safe(row_data, "Website Zusammenfassung"),
"wiki": self._get_cell_value_safe(row_data, "Wiki Absatz") "wiki": self._get_cell_value_safe(row_data, "Wiki Absatz")
}) })
# Rufe die neue Batch-Funktion auf
batch_results = evaluate_branches_batch(companies_data_for_prompt) batch_results = evaluate_branches_batch(companies_data_for_prompt)
# ... (Rest der Funktion zum Verarbeiten der Ergebnisse und Schreiben der Updates bleibt unverändert) ...
if batch_results: if batch_results:
# Ordne die Ergebnisse den richtigen Zeilen zu
results_by_row = {res['row_num']: res for res in batch_results} results_by_row = {res['row_num']: res for res in batch_results}
for task in batch_tasks: for task in batch_tasks:
row_num = task['row_num'] row_num = task['row_num']
result = results_by_row.get(row_num) result = results_by_row.get(row_num)
if result: if result:
all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Vorschlag Branche"]["index"] + 1)}{row_num}', 'values': [[result.get('Branche')]]}) all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Vorschlag Branche"]["index"] + 1)}{row_num}', 'values': [[result.get('Branche')]]})
all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Branche Konfidenz"]["index"] + 1)}{row_num}', 'values': [[result.get('Konfidenz')]]}) all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Branche Konfidenz"]["index"] + 1)}{row_num}', 'values': [[result.get('Konfidenz')]]})
all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Begruendung Abweichung Branche"]["index"] + 1)}{row_num}', 'values': [[result.get('Begruendung')]]}) all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Begruendung Abweichung Branche"]["index"] + 1)}{row_num}', 'values': [[result.get('Begruendung')]]})
else: else:
self.logger.error(f"Kein Ergebnis für Zeile {row_num} im Batch-Resultat gefunden.")
all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Vorschlag Branche"]["index"] + 1)}{row_num}', 'values': [['FEHLER (Batch-Antwort)']]} ) all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Vorschlag Branche"]["index"] + 1)}{row_num}', 'values': [['FEHLER (Batch-Antwort)']]} )
# Timestamp immer setzen
all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Timestamp letzte Pruefung"]["index"] + 1)}{row_num}', 'values': [[now_timestamp]]}) all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Timestamp letzte Pruefung"]["index"] + 1)}{row_num}', 'values': [[now_timestamp]]})
else: else:
self.logger.error(f"Batch-Verarbeitung für Zeilen {batch_tasks[0]['row_num']} bis {batch_tasks[-1]['row_num']} fehlgeschlagen. Setze Fehlerstatus.") self.logger.error(f"Batch-Verarbeitung fehlgeschlagen. Setze Fehlerstatus.")
for task in batch_tasks: for task in batch_tasks:
row_num = task['row_num'] row_num = task['row_num']
all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Vorschlag Branche"]["index"] + 1)}{row_num}', 'values': [['FEHLER (Batch-API)']]} ) all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Vorschlag Branche"]["index"] + 1)}{row_num}', 'values': [['FEHLER (Batch-API)']]} )
all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Timestamp letzte Pruefung"]["index"] + 1)}{row_num}', 'values': [[now_timestamp]]}) all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Timestamp letzte Pruefung"]["index"] + 1)}{row_num}', 'values': [[now_timestamp]]})
# Finalen Batch-Update senden
if all_sheet_updates: if all_sheet_updates:
self.logger.info(f"Sende finales Batch-Update für {len(tasks)} bewertete Branchen...") self.logger.info(f"Sende finales Batch-Update für {len(tasks)} bewertete Branchen...")
self.sheet_handler.batch_update_cells(all_sheet_updates) self.sheet_handler.batch_update_cells(all_sheet_updates)