bugfix
This commit is contained in:
@@ -3954,45 +3954,124 @@ class DataProcessor:
|
||||
Batch-Prozess NUR für Wikipedia-Verifizierung (Spalten S-U, AX).
|
||||
Findet Startzeile ab erster Zelle mit leerem AX.
|
||||
"""
|
||||
logging.info(f"Starte Wikipedia-Verifizierungs-Batch. Limit: {limit if limit is not None else 'Unbegrenzt'}")
|
||||
if not self.sheet_handler.load_data(): return logging.error("FEHLER beim Laden der Daten.")
|
||||
all_data = self.sheet_handler.get_all_data_with_headers(); header_rows = 5
|
||||
if not all_data or len(all_data) <= header_rows: return logging.warning("Keine Daten gefunden.")
|
||||
logging.info(
|
||||
f"Starte Wikipedia-Verifizierungs-Batch. Limit: {limit if limit is not None else 'Unbegrenzt'}"
|
||||
)
|
||||
if not self.sheet_handler.load_data():
|
||||
return logging.error("FEHLER beim Laden der Daten.")
|
||||
|
||||
all_data = self.sheet_handler.get_all_data_with_headers()
|
||||
header_rows = 5
|
||||
if not all_data or len(all_data) <= header_rows:
|
||||
return logging.warning("Keine Daten gefunden.")
|
||||
|
||||
# Schlüssel holen und prüfen
|
||||
timestamp_col_key = "Wiki Verif. Timestamp"
|
||||
timestamp_col_index = COLUMN_MAP.get(timestamp_col_key)
|
||||
if timestamp_col_index is None:
|
||||
return logging.critical(f"FEHLER: Schlüssel '{timestamp_col_key}' fehlt.")
|
||||
|
||||
timestamp_col_key = "Wiki Verif. Timestamp"; timestamp_col_index = COLUMN_MAP.get(timestamp_col_key); if timestamp_col_index is None: return logging.critical(f"FEHLER: Schlüssel '{timestamp_col_key}' fehlt.")
|
||||
ts_col_letter = self.sheet_handler._get_col_letter(timestamp_col_index + 1)
|
||||
|
||||
start_data_index = self.sheet_handler.get_start_row_index(check_column_key=timestamp_col_key, min_sheet_row=header_rows + 1); if start_data_index == -1: return logging.error(f"FEHLER bei Startzeilensuche auf Spalte '{timestamp_col_key}'."); if start_data_index >= len(self.sheet_handler.get_data()): logging.info("Alle Zeilen mit Timestamp gefüllt. Nichts zu tun."); return
|
||||
# Erste Zeile finden, in der AX leer ist
|
||||
start_data_index = self.sheet_handler.get_start_row_index(
|
||||
check_column_key=timestamp_col_key,
|
||||
min_sheet_row=header_rows + 1
|
||||
)
|
||||
if start_data_index == -1:
|
||||
return logging.error(f"FEHLER bei Startzeilensuche auf Spalte '{timestamp_col_key}'.")
|
||||
if start_data_index >= len(self.sheet_handler.get_data()):
|
||||
logging.info("Alle Zeilen mit Timestamp gefüllt. Nichts zu tun.")
|
||||
return
|
||||
|
||||
start_sheet_row = start_data_index + header_rows + 1; total_sheet_rows = len(all_data); end_sheet_row = total_sheet_rows
|
||||
if limit is not None and limit >= 0: end_sheet_row = min(start_sheet_row + limit - 1, total_sheet_rows); if limit == 0: logging.info("Limit 0."); return
|
||||
if start_sheet_row > end_sheet_row: logging.warning("Start nach Ende (Limit)."); return
|
||||
# Bereich festlegen
|
||||
start_sheet_row = start_data_index + header_rows + 1
|
||||
total_sheet_rows = len(all_data)
|
||||
end_sheet_row = total_sheet_rows
|
||||
|
||||
logging.info(f"Verarbeite Sheet-Zeilen {start_sheet_row} bis {end_sheet_row} für Wiki Verifizierung (Batch).")
|
||||
if limit is not None and limit >= 0:
|
||||
end_sheet_row = min(start_sheet_row + limit - 1, total_sheet_rows)
|
||||
if limit == 0:
|
||||
logging.info("Limit 0.")
|
||||
return
|
||||
if start_sheet_row > end_sheet_row:
|
||||
logging.warning("Start nach Ende (Limit).")
|
||||
return
|
||||
|
||||
batch_size = Config.BATCH_SIZE; current_batch = []; current_row_numbers = []; processed_count = 0
|
||||
logging.info(
|
||||
f"Verarbeite Sheet-Zeilen {start_sheet_row} bis {end_sheet_row} "
|
||||
"für Wiki-Verifizierung (Batch)."
|
||||
)
|
||||
|
||||
batch_size = Config.BATCH_SIZE
|
||||
current_batch = []
|
||||
current_row_numbers = []
|
||||
processed_count = 0
|
||||
|
||||
for i in range(start_sheet_row, end_sheet_row + 1):
|
||||
row_index_in_list = i - 1; row = all_data[row_index_in_list]
|
||||
row_index_in_list = i - 1
|
||||
row = all_data[row_index_in_list]
|
||||
|
||||
company_name = self._get_cell_value(row, "CRM Name"); crm_desc = self._get_cell_value(row, "CRM Beschreibung")
|
||||
wiki_url = self._get_cell_value(row, "Wiki URL"); wiki_paragraph = self._get_cell_value(row, "Wiki Absatz")
|
||||
wiki_categories = self._get_cell_value(row, "Wiki Kategorien")
|
||||
company_name = self._get_cell_value(row, "CRM Name")
|
||||
crm_desc = self._get_cell_value(row, "CRM Beschreibung")
|
||||
wiki_url = self._get_cell_value(row, "Wiki URL")
|
||||
wiki_paragraph = self._get_cell_value(row, "Wiki Absatz")
|
||||
wiki_categories = self._get_cell_value(row, "Wiki Kategorien")
|
||||
|
||||
if wiki_url != 'k.A.' or wiki_paragraph != 'k.A.' or wiki_categories != 'k.A.':
|
||||
entry_text = ( f"Eintrag {i}:\n" f" Firmenname: {company_name}\n" f" CRM-Beschreibung: {crm_desc[:200]}...\n" f" Wikipedia-URL: {wiki_url}\n" f" Wiki-Absatz: {wiki_paragraph[:200]}...\n" f" Wiki-Kategorien: {wiki_categories[:200]}...\n" f"----\n" )
|
||||
current_batch.append(entry_text); current_row_numbers.append(i); processed_count += 1
|
||||
if wiki_url != 'k.A.' or wiki_paragraph != 'k.A.' or wiki_categories != 'k.A.':
|
||||
entry_text = (
|
||||
f"Eintrag {i}:\n"
|
||||
f" Firmenname: {company_name}\n"
|
||||
f" CRM-Beschreibung: {crm_desc[:200]}...\n"
|
||||
f" Wikipedia-URL: {wiki_url}\n"
|
||||
f" Wiki-Absatz: {wiki_paragraph[:200]}...\n"
|
||||
f" Wiki-Kategorien: {wiki_categories[:200]}...\n"
|
||||
"----\n"
|
||||
)
|
||||
current_batch.append(entry_text)
|
||||
current_row_numbers.append(i)
|
||||
processed_count += 1
|
||||
|
||||
if len(current_batch) >= batch_size or i == end_sheet_row:
|
||||
if current_batch:
|
||||
try: _process_batch(self.sheet_handler.sheet, current_batch, current_row_numbers); # Globale Helferfunktion
|
||||
wiki_ts_updates = []; current_wiki_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S");
|
||||
for row_num in current_row_numbers: wiki_ts_updates.append({'range': f'{ts_col_letter}{row_num}', 'values': [[current_wiki_timestamp]]})
|
||||
if wiki_ts_updates: success_ts = self.sheet_handler.batch_update_cells(wiki_ts_updates); if success_ts: logging.debug(f"Wiki Verif. Timestamp {ts_col_letter} für Batch {current_row_numbers[0]}-{current_row_numbers[-1]} gesetzt."); else: logging.error(f"FEHLER beim Setzen des Wiki Verif. Timestamps {ts_col_letter} für Batch.");
|
||||
except Exception as e_batch: logging.error(f"FEHLER bei Verarbeitung von Batch {current_row_numbers[0]}-{current_row_numbers[-1]} in _process_batch: {e_batch}"); pass
|
||||
time.sleep(Config.RETRY_DELAY)
|
||||
current_batch = []; current_row_numbers = []
|
||||
logging.info(f"Wikipedia-Verifizierungs-Batch abgeschlossen. {processed_count} Zeilen in Batches verarbeitet.")
|
||||
if len(current_batch) >= batch_size or i == end_sheet_row:
|
||||
if current_batch:
|
||||
try:
|
||||
_process_batch(self.sheet_handler.sheet,
|
||||
current_batch,
|
||||
current_row_numbers)
|
||||
wiki_ts_updates = []
|
||||
current_wiki_ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
for row_num in current_row_numbers:
|
||||
wiki_ts_updates.append({
|
||||
'range': f'{ts_col_letter}{row_num}',
|
||||
'values': [[current_wiki_ts]]
|
||||
})
|
||||
if wiki_ts_updates:
|
||||
success_ts = self.sheet_handler.batch_update_cells(wiki_ts_updates)
|
||||
if success_ts:
|
||||
logging.debug(
|
||||
f"Wiki Verif. Timestamp {ts_col_letter} "
|
||||
f"für Batch {current_row_numbers[0]}–"
|
||||
f"{current_row_numbers[-1]} gesetzt."
|
||||
)
|
||||
else:
|
||||
logging.error(
|
||||
"FEHLER beim Setzen des Wiki Verif. Timestamps."
|
||||
)
|
||||
except Exception as e_batch:
|
||||
logging.error(
|
||||
f"FEHLER bei Verarbeitung von Batch "
|
||||
f"{current_row_numbers[0]}–"
|
||||
f"{current_row_numbers[-1]} in _process_batch: {e_batch}"
|
||||
)
|
||||
finally:
|
||||
time.sleep(Config.RETRY_DELAY)
|
||||
current_batch = []
|
||||
current_row_numbers = []
|
||||
|
||||
logging.info(
|
||||
f"Wikipedia-Verifizierungs-Batch abgeschlossen. "
|
||||
f"{processed_count} Zeilen in Batches verarbeitet."
|
||||
)
|
||||
|
||||
# process_website_batch Methode
|
||||
def process_website_batch(self, limit=None):
|
||||
|
||||
Reference in New Issue
Block a user