From 0fec249c70449304156b9002d2781c8d4e3400f4 Mon Sep 17 00:00:00 2001 From: Floke Date: Fri, 18 Jul 2025 18:09:22 +0000 Subject: [PATCH] data_processor.py aktualisiert --- data_processor.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/data_processor.py b/data_processor.py index d07a9993..ee984f0e 100644 --- a/data_processor.py +++ b/data_processor.py @@ -2316,6 +2316,14 @@ class DataProcessor: batch_sheet_updates = [] # Iteriere über die Ergebnisse des finalen Batches for row_num, result_dict in scraping_results.items(): + # Sicherheitsprüfung: Stelle sicher, dass result_dict ein Dictionary ist. + if not isinstance(result_dict, dict): + self.logger.error(f"Fehlerhaftes Ergebnis für Zeile {row_num}: Erwartete dict, bekam {type(result_dict)}. Überspringe Update für diese Zeile.") + # Setze nur den Timestamp, um eine Endlosschleife zu verhindern + batch_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(col_indices["Website Scrape Timestamp"] + 1)}{row_num}', 'values': [[current_timestamp]]}) + continue + + # result_dict ist jetzt garantiert ein Dictionary batch_sheet_updates.extend([ {'range': f'{self.sheet_handler._get_col_letter(col_indices["Website Rohtext"] + 1)}{row_num}', 'values': [[result_dict.get('raw_text', 'k.A.')]]}, {'range': f'{self.sheet_handler._get_col_letter(col_indices["Website Meta-Details"] + 1)}{row_num}', 'values': [[result_dict.get('meta_details', 'k.A.')]]}, @@ -2400,13 +2408,20 @@ class DataProcessor: # ANPASSUNG AN NEUE LOGIK for row_num, result_dict in scraping_results.items(): + # Sicherheitsprüfung: Stelle sicher, dass result_dict ein Dictionary ist. + if not isinstance(result_dict, dict): + self.logger.error(f"Fehlerhaftes Ergebnis für Zeile {row_num}: Erwartete dict, bekam {type(result_dict)}. Überspringe Update für diese Zeile.") + # Setze nur den Timestamp, um eine Endlosschleife zu verhindern + batch_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(col_indices["Website Scrape Timestamp"] + 1)}{row_num}', 'values': [[current_timestamp]]}) + continue + + # result_dict ist jetzt garantiert ein Dictionary batch_sheet_updates.extend([ {'range': f'{self.sheet_handler._get_col_letter(col_indices["Website Rohtext"] + 1)}{row_num}', 'values': [[result_dict.get('raw_text', 'k.A.')]]}, {'range': f'{self.sheet_handler._get_col_letter(col_indices["Website Meta-Details"] + 1)}{row_num}', 'values': [[result_dict.get('meta_details', 'k.A.')]]}, {'range': f'{self.sheet_handler._get_col_letter(col_indices["Website Scrape Timestamp"] + 1)}{row_num}', 'values': [[current_timestamp]]}, {'range': f'{self.sheet_handler._get_col_letter(col_indices["Version"] + 1)}{row_num}', 'values': [[current_version]]} ]) - all_sheet_updates.extend(batch_sheet_updates) # --- Finale Sheet Updates senden --- if all_sheet_updates: @@ -2435,14 +2450,15 @@ class DataProcessor: return { 'row_num': row_num, 'raw_text': raw_text_result, - 'meta_details': meta_details_result, + 'meta_details': meta_details_result } except Exception as e: + # Dieses Sicherheitsnetz fängt alle unerwarteten Fehler ab, die die Helper nicht fangen. self.logger.error(f"FATALER FEHLER im Scraping Worker für Zeile {row_num}: {e}", exc_info=True) return { 'row_num': row_num, 'raw_text': f'k.A. (FATALER WORKER FEHLER: {e})', - 'meta_details': 'k.A.', + 'meta_details': f'k.A. (FATALER WORKER FEHLER: {e})' }