data_processor.py aktualisiert
This commit is contained in:
@@ -2316,6 +2316,14 @@ class DataProcessor:
|
|||||||
batch_sheet_updates = []
|
batch_sheet_updates = []
|
||||||
# Iteriere über die Ergebnisse des finalen Batches
|
# Iteriere über die Ergebnisse des finalen Batches
|
||||||
for row_num, result_dict in scraping_results.items():
|
for row_num, result_dict in scraping_results.items():
|
||||||
|
# Sicherheitsprüfung: Stelle sicher, dass result_dict ein Dictionary ist.
|
||||||
|
if not isinstance(result_dict, dict):
|
||||||
|
self.logger.error(f"Fehlerhaftes Ergebnis für Zeile {row_num}: Erwartete dict, bekam {type(result_dict)}. Überspringe Update für diese Zeile.")
|
||||||
|
# Setze nur den Timestamp, um eine Endlosschleife zu verhindern
|
||||||
|
batch_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(col_indices["Website Scrape Timestamp"] + 1)}{row_num}', 'values': [[current_timestamp]]})
|
||||||
|
continue
|
||||||
|
|
||||||
|
# result_dict ist jetzt garantiert ein Dictionary
|
||||||
batch_sheet_updates.extend([
|
batch_sheet_updates.extend([
|
||||||
{'range': f'{self.sheet_handler._get_col_letter(col_indices["Website Rohtext"] + 1)}{row_num}', 'values': [[result_dict.get('raw_text', 'k.A.')]]},
|
{'range': f'{self.sheet_handler._get_col_letter(col_indices["Website Rohtext"] + 1)}{row_num}', 'values': [[result_dict.get('raw_text', 'k.A.')]]},
|
||||||
{'range': f'{self.sheet_handler._get_col_letter(col_indices["Website Meta-Details"] + 1)}{row_num}', 'values': [[result_dict.get('meta_details', 'k.A.')]]},
|
{'range': f'{self.sheet_handler._get_col_letter(col_indices["Website Meta-Details"] + 1)}{row_num}', 'values': [[result_dict.get('meta_details', 'k.A.')]]},
|
||||||
@@ -2400,13 +2408,20 @@ class DataProcessor:
|
|||||||
|
|
||||||
# ANPASSUNG AN NEUE LOGIK
|
# ANPASSUNG AN NEUE LOGIK
|
||||||
for row_num, result_dict in scraping_results.items():
|
for row_num, result_dict in scraping_results.items():
|
||||||
|
# Sicherheitsprüfung: Stelle sicher, dass result_dict ein Dictionary ist.
|
||||||
|
if not isinstance(result_dict, dict):
|
||||||
|
self.logger.error(f"Fehlerhaftes Ergebnis für Zeile {row_num}: Erwartete dict, bekam {type(result_dict)}. Überspringe Update für diese Zeile.")
|
||||||
|
# Setze nur den Timestamp, um eine Endlosschleife zu verhindern
|
||||||
|
batch_sheet_updates.append({'range': f'{self.sheet_handler._get_col_letter(col_indices["Website Scrape Timestamp"] + 1)}{row_num}', 'values': [[current_timestamp]]})
|
||||||
|
continue
|
||||||
|
|
||||||
|
# result_dict ist jetzt garantiert ein Dictionary
|
||||||
batch_sheet_updates.extend([
|
batch_sheet_updates.extend([
|
||||||
{'range': f'{self.sheet_handler._get_col_letter(col_indices["Website Rohtext"] + 1)}{row_num}', 'values': [[result_dict.get('raw_text', 'k.A.')]]},
|
{'range': f'{self.sheet_handler._get_col_letter(col_indices["Website Rohtext"] + 1)}{row_num}', 'values': [[result_dict.get('raw_text', 'k.A.')]]},
|
||||||
{'range': f'{self.sheet_handler._get_col_letter(col_indices["Website Meta-Details"] + 1)}{row_num}', 'values': [[result_dict.get('meta_details', 'k.A.')]]},
|
{'range': f'{self.sheet_handler._get_col_letter(col_indices["Website Meta-Details"] + 1)}{row_num}', 'values': [[result_dict.get('meta_details', 'k.A.')]]},
|
||||||
{'range': f'{self.sheet_handler._get_col_letter(col_indices["Website Scrape Timestamp"] + 1)}{row_num}', 'values': [[current_timestamp]]},
|
{'range': f'{self.sheet_handler._get_col_letter(col_indices["Website Scrape Timestamp"] + 1)}{row_num}', 'values': [[current_timestamp]]},
|
||||||
{'range': f'{self.sheet_handler._get_col_letter(col_indices["Version"] + 1)}{row_num}', 'values': [[current_version]]}
|
{'range': f'{self.sheet_handler._get_col_letter(col_indices["Version"] + 1)}{row_num}', 'values': [[current_version]]}
|
||||||
])
|
])
|
||||||
all_sheet_updates.extend(batch_sheet_updates)
|
|
||||||
|
|
||||||
# --- Finale Sheet Updates senden ---
|
# --- Finale Sheet Updates senden ---
|
||||||
if all_sheet_updates:
|
if all_sheet_updates:
|
||||||
@@ -2435,14 +2450,15 @@ class DataProcessor:
|
|||||||
return {
|
return {
|
||||||
'row_num': row_num,
|
'row_num': row_num,
|
||||||
'raw_text': raw_text_result,
|
'raw_text': raw_text_result,
|
||||||
'meta_details': meta_details_result,
|
'meta_details': meta_details_result
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
# Dieses Sicherheitsnetz fängt alle unerwarteten Fehler ab, die die Helper nicht fangen.
|
||||||
self.logger.error(f"FATALER FEHLER im Scraping Worker für Zeile {row_num}: {e}", exc_info=True)
|
self.logger.error(f"FATALER FEHLER im Scraping Worker für Zeile {row_num}: {e}", exc_info=True)
|
||||||
return {
|
return {
|
||||||
'row_num': row_num,
|
'row_num': row_num,
|
||||||
'raw_text': f'k.A. (FATALER WORKER FEHLER: {e})',
|
'raw_text': f'k.A. (FATALER WORKER FEHLER: {e})',
|
||||||
'meta_details': 'k.A.',
|
'meta_details': f'k.A. (FATALER WORKER FEHLER: {e})'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user