bugfix
This commit is contained in:
@@ -918,92 +918,90 @@ class DataProcessor:
|
|||||||
process_ml_steps=True):
|
process_ml_steps=True):
|
||||||
"""
|
"""
|
||||||
Verarbeitet nur Zeilen, die in Spalte A mit 'x' markiert sind.
|
Verarbeitet nur Zeilen, die in Spalte A mit 'x' markiert sind.
|
||||||
|
NEU: Leert zuerst alle abgeleiteten Spalten für eine saubere Neubewertung.
|
||||||
"""
|
"""
|
||||||
self.logger.info(
|
self.logger.info(
|
||||||
f"Starte Re-Evaluierungsmodus (Spalte A = 'x'). Max. Zeilen: {row_limit if row_limit is not None else 'Unbegrenzt'}")
|
f"Starte Re-Evaluierungsmodus (Spalte A = 'x'). Max. Zeilen: {row_limit if row_limit is not None else 'Unbegrenzt'}")
|
||||||
selected_steps_log = []
|
|
||||||
if process_wiki_steps:
|
|
||||||
selected_steps_log.append("Wiki")
|
|
||||||
if process_chatgpt_steps:
|
|
||||||
selected_steps_log.append("ChatGPT")
|
|
||||||
if process_website_steps:
|
|
||||||
selected_steps_log.append("Website")
|
|
||||||
if process_ml_steps:
|
|
||||||
selected_steps_log.append("ML Predict")
|
|
||||||
self.logger.info(
|
|
||||||
f"Ausgewaehlte Schritte fuer Re-Eval: {', '.join(selected_steps_log) if selected_steps_log else 'Keine'}")
|
|
||||||
|
|
||||||
steps_to_run_set = set()
|
|
||||||
if process_wiki_steps:
|
|
||||||
steps_to_run_set.add('wiki')
|
|
||||||
if process_chatgpt_steps:
|
|
||||||
steps_to_run_set.add('chat')
|
|
||||||
if process_website_steps:
|
|
||||||
steps_to_run_set.add('web')
|
|
||||||
if process_ml_steps:
|
|
||||||
steps_to_run_set.add('ml_predict')
|
|
||||||
if not steps_to_run_set:
|
|
||||||
self.logger.warning(
|
|
||||||
"Keine Verarbeitungsschritte fuer Re-Eval ausgewaehlt. Modus wird uebersprungen.")
|
|
||||||
return
|
|
||||||
|
|
||||||
if not self.sheet_handler.load_data():
|
if not self.sheet_handler.load_data():
|
||||||
self.logger.error(
|
self.logger.error("Fehler beim Laden der Daten fuer Re-Evaluation.")
|
||||||
"Fehler beim Laden der Daten fuer Re-Evaluation.")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
all_data = self.sheet_handler.get_all_data_with_headers()
|
all_data = self.sheet_handler.get_all_data_with_headers()
|
||||||
header_rows = self.sheet_handler._header_rows
|
header_rows = self.sheet_handler._header_rows
|
||||||
if not all_data or len(all_data) <= header_rows:
|
if not all_data or len(all_data) <= header_rows:
|
||||||
self.logger.warning(
|
self.logger.warning("Keine Datenzeilen fuer Re-Evaluation gefunden.")
|
||||||
"Keine Datenzeilen fuer Re-Evaluation gefunden.")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
reeval_col_idx = COLUMN_MAP.get("ReEval Flag")
|
reeval_col_idx = COLUMN_MAP.get("ReEval Flag")
|
||||||
if reeval_col_idx is None:
|
if reeval_col_idx is None:
|
||||||
self.logger.critical(
|
self.logger.critical("FEHLER: 'ReEval Flag' nicht in COLUMN_MAP. Breche ab.")
|
||||||
"FEHLER: 'ReEval Flag' Spaltenindex nicht in COLUMN_MAP gefunden. Breche ab.")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
rows_to_process = []
|
rows_to_process = []
|
||||||
for idx, row_data in enumerate(all_data):
|
for idx, row_data in enumerate(all_data):
|
||||||
if idx < header_rows:
|
if idx < header_rows: continue
|
||||||
continue
|
if self._get_cell_value_safe(row_data, "ReEval Flag").strip().lower() == "x":
|
||||||
row_num_in_sheet = idx + 1
|
rows_to_process.append({'row_num': idx + 1, 'data': row_data})
|
||||||
cell_a_value = self._get_cell_value_safe(
|
|
||||||
row_data, "ReEval Flag").strip().lower()
|
|
||||||
if cell_a_value == "x":
|
|
||||||
rows_to_process.append(
|
|
||||||
{'row_num': row_num_in_sheet, 'data': row_data})
|
|
||||||
|
|
||||||
found_count = len(rows_to_process)
|
found_count = len(rows_to_process)
|
||||||
self.logger.info(f"{found_count} Zeilen mit ReEval-Flag 'x' gefunden.")
|
self.logger.info(f"{found_count} Zeilen mit ReEval-Flag 'x' gefunden.")
|
||||||
if found_count == 0:
|
if found_count == 0: return
|
||||||
return
|
|
||||||
|
# Spalten definieren, die vor der Neubewertung geleert werden sollen
|
||||||
|
cols_to_clear_keys = [
|
||||||
|
"Wiki URL", "Wiki Sitz Stadt", "Wiki Sitz Land", "Wiki Absatz", "Wiki Branche",
|
||||||
|
"Wiki Umsatz", "Wiki Mitarbeiter", "Wiki Kategorien", "Wikipedia Timestamp",
|
||||||
|
"Wiki Verif. Timestamp", "SerpAPI Wiki Search Timestamp", "Chat Wiki Konsistenzpruefung",
|
||||||
|
"Chat Begruendung Wiki Inkonsistenz", "Chat Vorschlag Wiki Artikel", "Begruendung bei Abweichung",
|
||||||
|
"Website Rohtext", "Website Zusammenfassung", "Website Meta-Details", "Website Scrape Timestamp", "URL Prüfstatus",
|
||||||
|
"Chat Vorschlag Branche", "Chat Branche Konfidenz", "Chat Konsistenz Branche",
|
||||||
|
"Chat Begruendung Abweichung Branche", "Finaler Umsatz (Wiki>CRM)", "Finaler Mitarbeiter (Wiki>CRM)",
|
||||||
|
"Geschaetzter Techniker Bucket", "Plausibilität Umsatz", "Plausibilität Mitarbeiter",
|
||||||
|
"Plausibilität Umsatz/MA Ratio", "Abweichung Umsatz CRM/Wiki", "Abweichung MA CRM/Wiki",
|
||||||
|
"Plausibilität Begründung", "Plausibilität Prüfdatum", "Timestamp letzte Pruefung", "Version", "Tokens"
|
||||||
|
]
|
||||||
|
|
||||||
|
clear_updates = []
|
||||||
|
for task in rows_to_process:
|
||||||
|
row_num = task['row_num']
|
||||||
|
for key in cols_to_clear_keys:
|
||||||
|
col_idx = COLUMN_MAP.get(key)
|
||||||
|
if col_idx is not None:
|
||||||
|
col_letter = self.sheet_handler._get_col_letter(col_idx + 1)
|
||||||
|
clear_updates.append({'range': f'{col_letter}{row_num}', 'values': [['']]})
|
||||||
|
|
||||||
|
if clear_updates:
|
||||||
|
self.logger.info(f"Leere {len(clear_updates)} Zellen für {found_count} Re-Eval-Zeilen zur Vorbereitung...")
|
||||||
|
self.sheet_handler.batch_update_cells(clear_updates)
|
||||||
|
self.logger.info("Vorbereitung abgeschlossen. Starte eigentliche Verarbeitung...")
|
||||||
|
time.sleep(2)
|
||||||
|
self.sheet_handler.load_data()
|
||||||
|
all_data = self.sheet_handler.get_all_data_with_headers()
|
||||||
|
|
||||||
processed_count_actual = 0
|
processed_count_actual = 0
|
||||||
|
steps_to_run_set = set(key for key, value in {'wiki': process_wiki_steps, 'chat': process_chatgpt_steps, 'web': process_website_steps, 'ml_predict': process_ml_steps}.items() if value)
|
||||||
|
|
||||||
for task in rows_to_process:
|
for task in rows_to_process:
|
||||||
if row_limit is not None and processed_count_actual >= row_limit:
|
if row_limit is not None and processed_count_actual >= row_limit:
|
||||||
self.logger.info(
|
self.logger.info(f"Zeilenlimit ({row_limit}) fuer Re-Evaluation erreicht.")
|
||||||
f"Zeilenlimit ({row_limit}) fuer Re-Evaluation erreicht.")
|
|
||||||
break
|
break
|
||||||
|
|
||||||
|
current_row_data = all_data[task['row_num'] - 1]
|
||||||
self.logger.info(f"Bearbeite Re-Eval Zeile {task['row_num']}...")
|
self.logger.info(f"Bearbeite Re-Eval Zeile {task['row_num']}...")
|
||||||
processed_count_actual += 1
|
processed_count_actual += 1
|
||||||
try:
|
try:
|
||||||
self._process_single_row(
|
self._process_single_row(
|
||||||
row_num_in_sheet=task['row_num'],
|
row_num_in_sheet=task['row_num'],
|
||||||
row_data=task['data'],
|
row_data=current_row_data,
|
||||||
steps_to_run=steps_to_run_set,
|
steps_to_run=steps_to_run_set,
|
||||||
force_reeval=True,
|
force_reeval=True,
|
||||||
clear_x_flag=clear_flag
|
clear_x_flag=clear_flag
|
||||||
)
|
)
|
||||||
except Exception as e_proc_reval:
|
except Exception as e_proc_reval:
|
||||||
self.logger.exception(
|
self.logger.exception(f"FEHLER bei Re-Evaluation von Zeile {task['row_num']}: {e_proc_reval}")
|
||||||
f"FEHLER bei Re-Evaluation von Zeile {task['row_num']}: {e_proc_reval}")
|
|
||||||
|
|
||||||
self.logger.info(
|
self.logger.info(f"Re-Evaluierung abgeschlossen. {processed_count_actual} Zeilen verarbeitet.")
|
||||||
f"Re-Evaluierung abgeschlossen. {processed_count_actual} Zeilen verarbeitet.")
|
|
||||||
|
|
||||||
# ==========================================================================
|
# ==========================================================================
|
||||||
# === Batch Processing Methods ===========================================
|
# === Batch Processing Methods ===========================================
|
||||||
|
|||||||
Reference in New Issue
Block a user