bugfix
This commit is contained in:
@@ -2212,6 +2212,7 @@ def _process_batch(sheet, batches, row_numbers):
|
|||||||
# time.sleep(Config.RETRY_DELAY) # Entfernt
|
# time.sleep(Config.RETRY_DELAY) # Entfernt
|
||||||
|
|
||||||
# Komplette Funktion process_website_batch (prüft jetzt Timestamp AT mit erzwungenem Debugging)
|
# Komplette Funktion process_website_batch (prüft jetzt Timestamp AT mit erzwungenem Debugging)
|
||||||
|
# Komplette Funktion process_website_batch (mit Vereinfachung und explizitem Skip-Log)
|
||||||
def process_website_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet):
|
def process_website_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet):
|
||||||
"""
|
"""
|
||||||
Batch-Prozess für Website-Scraping. Lädt Daten neu, prüft für jede Zeile
|
Batch-Prozess für Website-Scraping. Lädt Daten neu, prüft für jede Zeile
|
||||||
@@ -2220,23 +2221,18 @@ def process_website_batch(sheet_handler, start_row_index_in_sheet, end_row_index
|
|||||||
"""
|
"""
|
||||||
debug_print(f"Starte Website-Scraping (Batch) für Zeilen {start_row_index_in_sheet} bis {end_row_index_in_sheet}...")
|
debug_print(f"Starte Website-Scraping (Batch) für Zeilen {start_row_index_in_sheet} bis {end_row_index_in_sheet}...")
|
||||||
|
|
||||||
# --- NEU: Daten explizit neu laden ---
|
|
||||||
if not sheet_handler.load_data():
|
if not sheet_handler.load_data():
|
||||||
debug_print("FEHLER beim Laden der Daten in process_website_batch.")
|
debug_print("FEHLER beim Laden der Daten in process_website_batch.")
|
||||||
return
|
return
|
||||||
all_data = sheet_handler.get_all_data_with_headers()
|
all_data = sheet_handler.get_all_data_with_headers()
|
||||||
# --- Ende Daten neu laden ---
|
|
||||||
|
|
||||||
if not all_data or len(all_data) <= 5:
|
if not all_data or len(all_data) <= 5:
|
||||||
debug_print("FEHLER/WARNUNG: Keine Daten zum Verarbeiten in process_website_batch gefunden.")
|
debug_print("FEHLER/WARNUNG: Keine Daten zum Verarbeiten in process_website_batch gefunden.")
|
||||||
return
|
return
|
||||||
|
|
||||||
sheet = sheet_handler.sheet
|
sheet = sheet_handler.sheet
|
||||||
|
|
||||||
# Hole Indizes
|
|
||||||
timestamp_col_key = "Website Scrape Timestamp"
|
timestamp_col_key = "Website Scrape Timestamp"
|
||||||
timestamp_col_index = COLUMN_MAP.get(timestamp_col_key)
|
timestamp_col_index = COLUMN_MAP.get(timestamp_col_key)
|
||||||
website_col_idx = COLUMN_MAP.get("CRM Website") # Korrigiert zu idx
|
website_col_idx = COLUMN_MAP.get("CRM Website")
|
||||||
rohtext_col_idx = COLUMN_MAP.get("Website Rohtext")
|
rohtext_col_idx = COLUMN_MAP.get("Website Rohtext")
|
||||||
summary_col_idx = COLUMN_MAP.get("Website Zusammenfassung")
|
summary_col_idx = COLUMN_MAP.get("Website Zusammenfassung")
|
||||||
version_col_idx = COLUMN_MAP.get("Version")
|
version_col_idx = COLUMN_MAP.get("Version")
|
||||||
@@ -2262,27 +2258,38 @@ def process_website_batch(sheet_handler, start_row_index_in_sheet, end_row_index
|
|||||||
|
|
||||||
row = all_data[row_index_in_list]
|
row = all_data[row_index_in_list]
|
||||||
|
|
||||||
# --- WIEDER AKTIVIERT: Timestamp-Prüfung für jede Zeile (AT) ---
|
# --- Timestamp-Prüfung für jede Zeile (AT) ---
|
||||||
ts_value_at = "INDEX_FEHLER"
|
ts_value_at = "INDEX_FEHLER"
|
||||||
ts_at_is_set = False
|
should_skip = False # Standardmäßig nicht überspringen
|
||||||
if len(row) > timestamp_col_index:
|
if len(row) > timestamp_col_index:
|
||||||
ts_value_at = row[timestamp_col_index]
|
ts_value_at = row[timestamp_col_index]
|
||||||
ts_at_is_set = bool(str(ts_value_at).strip())
|
# Vereinfachte Prüfung: Ist der String (nach strip) NICHT leer?
|
||||||
# Debug Log
|
if str(ts_value_at).strip():
|
||||||
log_debug = (i < start_row_index_in_sheet + 5 or i > end_row_index_in_sheet - 5 or i % 500 == 0)
|
should_skip = True
|
||||||
if log_debug:
|
|
||||||
debug_print(f"Zeile {i} (Website Check): Prüfe Timestamp {ts_col_letter} (Index {timestamp_col_index}). Rohwert='{ts_value_at}', Strip='{str(ts_value_at).strip()}', Überspringen? -> {ts_at_is_set}")
|
|
||||||
|
|
||||||
if ts_at_is_set:
|
# Debug Log (zeigt jetzt die 'should_skip' Variable)
|
||||||
|
log_debug = (i < start_row_index_in_sheet + 5 or i > end_row_index_in_sheet - 5 or i % 500 == 0 or i in range(2122, 2132)) # Zeige es für die Problemzone
|
||||||
|
if log_debug:
|
||||||
|
debug_print(f"Zeile {i} (Website Check): Prüfe Timestamp {ts_col_letter}. Rohwert='{ts_value_at}'. Überspringen? -> {should_skip}")
|
||||||
|
|
||||||
|
if should_skip:
|
||||||
|
# --- NEU: Explizites Logging VOR dem continue ---
|
||||||
|
debug_print(f"Zeile {i}: *** WIRD ÜBERSPRUNGEN (Timestamp AT vorhanden) ***")
|
||||||
skipped_count += 1
|
skipped_count += 1
|
||||||
continue
|
continue # Springe zur nächsten Iteration
|
||||||
# --- Ende Timestamp-Prüfung ---
|
# --- Ende Timestamp-Prüfung ---
|
||||||
|
|
||||||
|
# WENN die Funktion hier ankommt, DARF 'should_skip' NICHT True gewesen sein.
|
||||||
|
debug_print(f"Zeile {i}: Timestamp AT nicht vorhanden oder leer. Verarbeitung wird gestartet.")
|
||||||
|
|
||||||
|
# (Rest der Logik zum Scrapen und Updaten wie zuvor)
|
||||||
website_url = row[website_col_idx] if len(row) > website_col_idx else ""
|
website_url = row[website_col_idx] if len(row) > website_col_idx else ""
|
||||||
if not website_url or website_url.strip().lower() == "k.a.":
|
if not website_url or website_url.strip().lower() == "k.a.":
|
||||||
|
# debug_print(f"Zeile {i}: Kein gültiger Website-Eintrag.") # Weniger Lärm
|
||||||
skipped_url_count += 1
|
skipped_url_count += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
debug_print(f"Zeile {i}: Verarbeite Website {website_url}...") # Loggen, dass es wirklich passiert
|
||||||
raw_text = get_website_raw(website_url)
|
raw_text = get_website_raw(website_url)
|
||||||
summary = summarize_website_content(raw_text)
|
summary = summarize_website_content(raw_text)
|
||||||
processed_count += 1
|
processed_count += 1
|
||||||
|
|||||||
Reference in New Issue
Block a user