data_processor.py aktualisiert
This commit is contained in:
@@ -1463,31 +1463,26 @@ class DataProcessor:
|
|||||||
|
|
||||||
self.logger.info(f"FSM-Pitch-Generierung abgeschlossen. {processed_count} Zeilen bearbeitet.")
|
self.logger.info(f"FSM-Pitch-Generierung abgeschlossen. {processed_count} Zeilen bearbeitet.")
|
||||||
|
|
||||||
def reclassify_all_branches(self, start_sheet_row=None, limit=None, batch_size=20):
|
def reclassify_all_branches(self, start_sheet_row=None, limit=None, batch_size=50):
|
||||||
"""
|
"""
|
||||||
Führt für alle relevanten Zeilen eine neue Brancheneinstufung (v2.0) in Batches durch.
|
Führt für alle relevanten Zeilen eine neue Brancheneinstufung (v2.0) in Batches durch.
|
||||||
|
Nutzt nun auch die externe Branchenbeschreibung.
|
||||||
"""
|
"""
|
||||||
self.logger.info(f"Starte Modus 'reclassify_branches' im Batch-Modus (Größe: {batch_size}). Bereich: {start_sheet_row or 'Start'}, Limit: {limit or 'Unbegrenzt'}")
|
self.logger.info(f"Starte Modus 'reclassify_branches' im Batch-Modus (Größe: {batch_size}). Bereich: {start_sheet_row or 'Start'}, Limit: {limit or 'Unbegrenzt'}")
|
||||||
|
|
||||||
if not self.sheet_handler.load_data():
|
if not self.sheet_handler.load_data():
|
||||||
return
|
return
|
||||||
|
|
||||||
# DIESE ZEILEN WAREN DAS PROBLEM -> JETZT KORRIGIERT
|
|
||||||
all_data = self.sheet_handler.get_all_data_with_headers()
|
all_data = self.sheet_handler.get_all_data_with_headers()
|
||||||
header_rows = self.sheet_handler._header_rows
|
header_rows = self.sheet_handler._header_rows
|
||||||
|
|
||||||
# Wichtig: Der Start MUSS nach den Header-Zeilen sein
|
|
||||||
effective_start = max(header_rows + 1, start_sheet_row or 0)
|
effective_start = max(header_rows + 1, start_sheet_row or 0)
|
||||||
|
|
||||||
tasks = []
|
tasks = []
|
||||||
# Wir starten die Schleife erst NACH den Header-Zeilen
|
|
||||||
for i in range(effective_start - 1, len(all_data)):
|
for i in range(effective_start - 1, len(all_data)):
|
||||||
if limit is not None and len(tasks) >= limit:
|
if limit is not None and len(tasks) >= limit:
|
||||||
break
|
break
|
||||||
|
|
||||||
row_data = all_data[i]
|
row_data = all_data[i]
|
||||||
company_name = self._get_cell_value_safe(row_data, "CRM Name").strip()
|
company_name = self._get_cell_value_safe(row_data, "CRM Name").strip()
|
||||||
# Zusätzlicher Check, um sicherzustellen, dass wir keine Header-Texte verarbeiten
|
|
||||||
if company_name and "firmennamen" not in company_name.lower():
|
if company_name and "firmennamen" not in company_name.lower():
|
||||||
tasks.append({'row_num': i + 1, 'data': row_data})
|
tasks.append({'row_num': i + 1, 'data': row_data})
|
||||||
|
|
||||||
@@ -1500,51 +1495,44 @@ class DataProcessor:
|
|||||||
all_sheet_updates = []
|
all_sheet_updates = []
|
||||||
now_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
now_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
|
||||||
# Verarbeite die Tasks in Batches
|
|
||||||
for i in range(0, len(tasks), batch_size):
|
for i in range(0, len(tasks), batch_size):
|
||||||
batch_tasks = tasks[i:i + batch_size]
|
batch_tasks = tasks[i:i + batch_size]
|
||||||
self.logger.info(f"Verarbeite Batch {i//batch_size + 1}/{(len(tasks) + batch_size - 1)//batch_size} (Zeilen {batch_tasks[0]['row_num']} bis {batch_tasks[-1]['row_num']})...")
|
self.logger.info(f"Verarbeite Batch {i//batch_size + 1}/{(len(tasks) + batch_size - 1)//batch_size} (Zeilen {batch_tasks[0]['row_num']} bis {batch_tasks[-1]['row_num']})...")
|
||||||
|
|
||||||
# Bereite die Daten für den Batch-Prompt vor
|
|
||||||
companies_data_for_prompt = []
|
companies_data_for_prompt = []
|
||||||
for task in batch_tasks:
|
for task in batch_tasks:
|
||||||
row_data = task['data']
|
row_data = task['data']
|
||||||
companies_data_for_prompt.append({
|
companies_data_for_prompt.append({
|
||||||
"row_num": task['row_num'],
|
"row_num": task['row_num'],
|
||||||
"name": self._get_cell_value_safe(row_data, "CRM Name"),
|
"name": self._get_cell_value_safe(row_data, "CRM Name"),
|
||||||
|
# NEU: Spalte J hinzufügen
|
||||||
|
"external_branch_desc": self._get_cell_value_safe(row_data, "CRM Beschreibung Branche extern"),
|
||||||
"summary": self._get_cell_value_safe(row_data, "Website Zusammenfassung"),
|
"summary": self._get_cell_value_safe(row_data, "Website Zusammenfassung"),
|
||||||
"wiki": self._get_cell_value_safe(row_data, "Wiki Absatz")
|
"wiki": self._get_cell_value_safe(row_data, "Wiki Absatz")
|
||||||
})
|
})
|
||||||
|
|
||||||
# Rufe die neue Batch-Funktion auf
|
|
||||||
batch_results = evaluate_branches_batch(companies_data_for_prompt)
|
batch_results = evaluate_branches_batch(companies_data_for_prompt)
|
||||||
|
|
||||||
|
# ... (Rest der Funktion zum Verarbeiten der Ergebnisse und Schreiben der Updates bleibt unverändert) ...
|
||||||
if batch_results:
|
if batch_results:
|
||||||
# Ordne die Ergebnisse den richtigen Zeilen zu
|
|
||||||
results_by_row = {res['row_num']: res for res in batch_results}
|
results_by_row = {res['row_num']: res for res in batch_results}
|
||||||
|
|
||||||
for task in batch_tasks:
|
for task in batch_tasks:
|
||||||
row_num = task['row_num']
|
row_num = task['row_num']
|
||||||
result = results_by_row.get(row_num)
|
result = results_by_row.get(row_num)
|
||||||
|
|
||||||
if result:
|
if result:
|
||||||
all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Vorschlag Branche"]["index"] + 1)}{row_num}', 'values': [[result.get('Branche')]]})
|
all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Vorschlag Branche"]["index"] + 1)}{row_num}', 'values': [[result.get('Branche')]]})
|
||||||
all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Branche Konfidenz"]["index"] + 1)}{row_num}', 'values': [[result.get('Konfidenz')]]})
|
all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Branche Konfidenz"]["index"] + 1)}{row_num}', 'values': [[result.get('Konfidenz')]]})
|
||||||
all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Begruendung Abweichung Branche"]["index"] + 1)}{row_num}', 'values': [[result.get('Begruendung')]]})
|
all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Begruendung Abweichung Branche"]["index"] + 1)}{row_num}', 'values': [[result.get('Begruendung')]]})
|
||||||
else:
|
else:
|
||||||
self.logger.error(f"Kein Ergebnis für Zeile {row_num} im Batch-Resultat gefunden.")
|
|
||||||
all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Vorschlag Branche"]["index"] + 1)}{row_num}', 'values': [['FEHLER (Batch-Antwort)']]} )
|
all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Vorschlag Branche"]["index"] + 1)}{row_num}', 'values': [['FEHLER (Batch-Antwort)']]} )
|
||||||
|
|
||||||
# Timestamp immer setzen
|
|
||||||
all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Timestamp letzte Pruefung"]["index"] + 1)}{row_num}', 'values': [[now_timestamp]]})
|
all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Timestamp letzte Pruefung"]["index"] + 1)}{row_num}', 'values': [[now_timestamp]]})
|
||||||
else:
|
else:
|
||||||
self.logger.error(f"Batch-Verarbeitung für Zeilen {batch_tasks[0]['row_num']} bis {batch_tasks[-1]['row_num']} fehlgeschlagen. Setze Fehlerstatus.")
|
self.logger.error(f"Batch-Verarbeitung fehlgeschlagen. Setze Fehlerstatus.")
|
||||||
for task in batch_tasks:
|
for task in batch_tasks:
|
||||||
row_num = task['row_num']
|
row_num = task['row_num']
|
||||||
all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Vorschlag Branche"]["index"] + 1)}{row_num}', 'values': [['FEHLER (Batch-API)']]} )
|
all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Chat Vorschlag Branche"]["index"] + 1)}{row_num}', 'values': [['FEHLER (Batch-API)']]} )
|
||||||
all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Timestamp letzte Pruefung"]["index"] + 1)}{row_num}', 'values': [[now_timestamp]]})
|
all_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(COLUMN_MAP["Timestamp letzte Pruefung"]["index"] + 1)}{row_num}', 'values': [[now_timestamp]]})
|
||||||
|
|
||||||
# Finalen Batch-Update senden
|
|
||||||
if all_sheet_updates:
|
if all_sheet_updates:
|
||||||
self.logger.info(f"Sende finales Batch-Update für {len(tasks)} bewertete Branchen...")
|
self.logger.info(f"Sende finales Batch-Update für {len(tasks)} bewertete Branchen...")
|
||||||
self.sheet_handler.batch_update_cells(all_sheet_updates)
|
self.sheet_handler.batch_update_cells(all_sheet_updates)
|
||||||
|
|||||||
Reference in New Issue
Block a user