bugfix
This commit is contained in:
@@ -2260,57 +2260,55 @@ def process_website_batch(sheet_handler, start_row_index_in_sheet, end_row_index
|
||||
|
||||
# --- Timestamp-Prüfung für jede Zeile (AT) ---
|
||||
ts_value_at = "INDEX_FEHLER"
|
||||
should_skip = False # Standardmäßig nicht überspringen
|
||||
should_skip = False
|
||||
if len(row) > timestamp_col_index:
|
||||
ts_value_at = row[timestamp_col_index]
|
||||
# Vereinfachte Prüfung: Ist der String (nach strip) NICHT leer?
|
||||
if str(ts_value_at).strip():
|
||||
should_skip = True
|
||||
|
||||
# Debug Log (zeigt jetzt die 'should_skip' Variable)
|
||||
log_debug = (i < start_row_index_in_sheet + 5 or i > end_row_index_in_sheet - 5 or i % 500 == 0 or i in range(2122, 2132)) # Zeige es für die Problemzone
|
||||
# Debug Log
|
||||
log_debug = (i < start_row_index_in_sheet + 5 or i > end_row_index_in_sheet - 5 or i % 500 == 0 or i in range(2122, 2132))
|
||||
if log_debug:
|
||||
debug_print(f"Zeile {i} (Website Check): Prüfe Timestamp {ts_col_letter}. Rohwert='{ts_value_at}'. Überspringen? -> {should_skip}")
|
||||
|
||||
if should_skip:
|
||||
# --- NEU: Explizites Logging VOR dem continue ---
|
||||
debug_print(f"Zeile {i}: *** WIRD ÜBERSPRUNGEN (Timestamp AT vorhanden) ***")
|
||||
# --- Explizites Logging VOR dem continue ---
|
||||
# debug_print(f"Zeile {i}: *** WIRD ÜBERSPRUNGEN (Timestamp AT vorhanden) ***") # Weniger Lärm
|
||||
skipped_count += 1
|
||||
continue # Springe zur nächsten Iteration
|
||||
# --- Ende Timestamp-Prüfung ---
|
||||
else:
|
||||
# --- NEU: Gesamter Verarbeitungsblock im ELSE ---
|
||||
debug_print(f"Zeile {i}: Timestamp AT nicht vorhanden oder leer. Verarbeitung wird gestartet.")
|
||||
|
||||
# WENN die Funktion hier ankommt, DARF 'should_skip' NICHT True gewesen sein.
|
||||
debug_print(f"Zeile {i}: Timestamp AT nicht vorhanden oder leer. Verarbeitung wird gestartet.")
|
||||
website_url = row[website_col_idx] if len(row) > website_col_idx else ""
|
||||
if not website_url or website_url.strip().lower() == "k.a.":
|
||||
skipped_url_count += 1
|
||||
# Wichtig: Hier auch continue, sonst wird versucht zu updaten!
|
||||
continue
|
||||
|
||||
# (Rest der Logik zum Scrapen und Updaten wie zuvor)
|
||||
website_url = row[website_col_idx] if len(row) > website_col_idx else ""
|
||||
if not website_url or website_url.strip().lower() == "k.a.":
|
||||
# debug_print(f"Zeile {i}: Kein gültiger Website-Eintrag.") # Weniger Lärm
|
||||
skipped_url_count += 1
|
||||
continue
|
||||
debug_print(f"Zeile {i}: Verarbeite Website {website_url}...")
|
||||
raw_text = get_website_raw(website_url)
|
||||
summary = summarize_website_content(raw_text)
|
||||
processed_count += 1
|
||||
|
||||
debug_print(f"Zeile {i}: Verarbeite Website {website_url}...") # Loggen, dass es wirklich passiert
|
||||
raw_text = get_website_raw(website_url)
|
||||
summary = summarize_website_content(raw_text)
|
||||
processed_count += 1
|
||||
updates = []
|
||||
current_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
current_version = Config.VERSION
|
||||
|
||||
updates = []
|
||||
current_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
current_version = Config.VERSION
|
||||
updates.append({'range': f'{rohtext_col_letter}{i}', 'values': [[raw_text]]})
|
||||
updates.append({'range': f'{summary_col_letter}{i}', 'values': [[summary]]})
|
||||
updates.append({'range': f'{ts_col_letter}{i}', 'values': [[current_timestamp]]}) # AT Timestamp
|
||||
updates.append({'range': f'{version_col_letter}{i}', 'values': [[current_version]]}) # AP Version
|
||||
|
||||
updates.append({'range': f'{rohtext_col_letter}{i}', 'values': [[raw_text]]})
|
||||
updates.append({'range': f'{summary_col_letter}{i}', 'values': [[summary]]})
|
||||
updates.append({'range': f'{ts_col_letter}{i}', 'values': [[current_timestamp]]}) # AT Timestamp
|
||||
updates.append({'range': f'{version_col_letter}{i}', 'values': [[current_version]]}) # AP Version
|
||||
if updates:
|
||||
success = sheet_handler.batch_update_cells(updates)
|
||||
if success:
|
||||
debug_print(f"Zeile {i}: Website-Daten erfolgreich aktualisiert.")
|
||||
else:
|
||||
debug_print(f"FEHLER beim Schreiben der Website-Updates für Zeile {i}.")
|
||||
|
||||
if updates:
|
||||
success = sheet_handler.batch_update_cells(updates)
|
||||
if success:
|
||||
debug_print(f"Zeile {i}: Website-Daten erfolgreich aktualisiert.")
|
||||
else:
|
||||
debug_print(f"FEHLER beim Schreiben der Website-Updates für Zeile {i}.")
|
||||
|
||||
time.sleep(Config.RETRY_DELAY)
|
||||
time.sleep(Config.RETRY_DELAY)
|
||||
# --- Ende des ELSE-Blocks ---
|
||||
|
||||
debug_print(f"Website-Scraping (Batch) abgeschlossen. {processed_count} Websites gescraped, {skipped_count} Zeilen wg. Timestamp übersprungen, {skipped_url_count} Zeilen ohne URL übersprungen.")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user