From cf029f342cc84f5aeb5bdef64682c5014635e74a Mon Sep 17 00:00:00 2001 From: Floke Date: Thu, 17 Apr 2025 09:53:56 +0000 Subject: [PATCH] bugfix --- brancheneinstufung.py | 251 +++++++++++++++--------------------------- 1 file changed, 87 insertions(+), 164 deletions(-) diff --git a/brancheneinstufung.py b/brancheneinstufung.py index 22e17be8..6dd2876d 100644 --- a/brancheneinstufung.py +++ b/brancheneinstufung.py @@ -53,44 +53,38 @@ LOG_DIR = "Log" # ==================== KONFIGURATION ==================== class Config: - VERSION = "v1.6.3" + VERSION = "v1.6.3" # Behalte Version bei, da es ein Bugfix ist LANG = "de" SHEET_URL = "https://docs.google.com/spreadsheets/d/1u_gHr9JUfmV1-iviRzbSe3575QEp7KLhK5jFV_gJcgo" MAX_RETRIES = 3 RETRY_DELAY = 5 - LOG_CSV = "gpt_antworten_log.csv" # Wird dieser Log noch verwendet? Ggf. entfernen. SIMILARITY_THRESHOLD = 0.65 DEBUG = True WIKIPEDIA_SEARCH_RESULTS = 5 HTML_PARSER = "html.parser" - BATCH_SIZE = 10 - TOKEN_MODEL = "gpt-3.5-turbo" # Oder "gpt-4" etc. - MAX_SCRAPING_WORKERS = 10 # Threads für paralleles Website-Scraping - OPENAI_BATCH_SIZE_LIMIT = 8 # Max Texte pro OpenAI Call in summarize_batch_openai + TOKEN_MODEL = "gpt-3.5-turbo" + + # --- Konfiguration für Batching & Parallelisierung --- + BATCH_SIZE = 10 # Batch-Größe für Wiki Verification (_process_batch) + PROCESSING_BATCH_SIZE = 20 # Wie viele Zeilen pro Verarbeitungs-Batch sammeln (Website Scraping) + OPENAI_BATCH_SIZE_LIMIT = 8 # Max. Texte pro OpenAI Call in summarize_batch_openai + MAX_SCRAPING_WORKERS = 10 # Threads für paralleles Website-Scraping UPDATE_BATCH_ROW_LIMIT = 50 # Zeilen sammeln für gebündelte Sheet Updates - # Zentrales API-Key-Management API_KEYS = {} - @classmethod - def load_api_keys(cls): + def load_api_keys(cls): # unverändert cls.API_KEYS['openai'] = cls._load_key_from_file(API_KEY_FILE) cls.API_KEYS['serpapi'] = cls._load_key_from_file(SERP_API_KEY_FILE) cls.API_KEYS['genderize'] = cls._load_key_from_file(GENDERIZE_API_KEY_FILE) - # Set OpenAI Key globally if loaded - if cls.API_KEYS.get('openai'): - openai.api_key = cls.API_KEYS['openai'] - else: - debug_print("⚠️ OpenAI API Key konnte nicht geladen werden.") + if cls.API_KEYS.get('openai'): openai.api_key = cls.API_KEYS['openai'] + else: debug_print("⚠️ OpenAI API Key konnte nicht geladen werden.") @staticmethod - def _load_key_from_file(filepath): + def _load_key_from_file(filepath): # unverändert try: - with open(filepath, "r") as f: - return f.read().strip() - except Exception as e: - debug_print(f"Fehler beim Lesen des API-Keys aus '{filepath}': {e}") - return None + with open(filepath, "r") as f: return f.read().strip() + except Exception as e: debug_print(f"Fehler Keys aus '{filepath}': {e}"); return None # Globales Mapping-Dictionary und Schema-String BRANCH_MAPPING = {} @@ -961,49 +955,35 @@ class GoogleSheetHandler: self.sheet = None self.sheet_values = [] self.headers = [] - try: - self._connect() - if self.sheet: - self.load_data() - except Exception as e: - debug_print(f"FATAL: Fehler bei Initialisierung von GoogleSheetHandler: {e}") - raise ConnectionError(f"Google Sheet Handler Init failed: {e}") + try: self._connect(); + except Exception as e: raise ConnectionError(f"Google Sheet Handler Init failed: {e}") + if self.sheet: self.load_data() # Lade Daten initial + @retry_on_failure def _connect(self): # ... (unverändert) ... - self.sheet = None - debug_print("Verbinde mit Google Sheets...") + self.sheet = None; debug_print("Verbinde mit Google Sheets...") try: scope = ["https://www.googleapis.com/auth/spreadsheets"] creds = ServiceAccountCredentials.from_json_keyfile_name(CREDENTIALS_FILE, scope) - gc = gspread.authorize(creds) - sh = gc.open_by_url(Config.SHEET_URL) - self.sheet = sh.sheet1 - debug_print("Verbindung zu Google Sheets erfolgreich.") - except gspread.exceptions.APIError as e: - debug_print(f"FEHLER bei Google API Verbindung: Status {e.response.status_code} - {e.response.text[:200]}") - raise e - except Exception as e: - debug_print(f"FEHLER bei der Google Sheets Verbindung: {type(e).__name__} - {e}") - raise e + gc = gspread.authorize(creds); sh = gc.open_by_url(Config.SHEET_URL) + self.sheet = sh.sheet1; debug_print("Verbindung zu Google Sheets erfolgreich.") + except Exception as e: debug_print(f"FEHLER Connect: {e}"); raise e @retry_on_failure def load_data(self): # ... (unverändert) ... - if not self.sheet: #... - return False + if not self.sheet: return False debug_print("Lade Daten aus Google Sheet...") try: self.sheet_values = self.sheet.get_all_values() - if not self.sheet_values: #... - return True + if not self.sheet_values: self.headers=[]; return True if len(self.sheet_values) >= 1: self.headers = self.sheet_values[0] else: self.headers = [] debug_print(f"Daten neu geladen: {len(self.sheet_values)} Zeilen insgesamt.") return True - except Exception as e: #... - raise e + except Exception as e: debug_print(f"FEHLER Laden: {e}"); raise e def get_data(self): # ... (unverändert) ... @@ -1018,33 +998,19 @@ class GoogleSheetHandler: def _get_col_letter(self, col_idx_1_based): # ... (unverändert) ... - string = ""; n = col_idx_1_based + string = ""; n = col_idx_1_based; if n < 1: return None while n > 0: n, remainder = divmod(n - 1, 26); string = chr(65 + remainder) + string return string - # --- ANGEPASST: Sucht jetzt nach leerem String ODER 'k.A.' --- + # Prüft jetzt auf Werte in der `empty_values` Liste (case-insensitive) def get_start_row_index(self, check_column_key, min_sheet_row=7, empty_values=None): - """ - Findet den Index der ersten Zeile (0-basiert für Daten nach Header), - ab einer Mindestzeilennummer im Sheet, in der der Wert in der - Spalte (definiert durch check_column_key) als "leer" gilt. - - Args: - check_column_key (str): Der Schlüssel in COLUMN_MAP für die zu prüfende Spalte. - min_sheet_row (int): Die 1-basierte Zeilennummer im Sheet, ab der gesucht werden soll. - empty_values (list, optional): Eine Liste von Strings (lowercase), die als leer gelten sollen. - Standard ist ["", "k.a."]. - - Returns: - int: Der 0-basierte Index in der Datenliste (ohne Header), - oder -1 bei Fehler (z.B. Schlüssel nicht gefunden), - oder der Index nach der letzten Zeile, wenn alle gefüllt sind. - """ + """Findet erste Zeile, deren Wert in check_column_key als leer gilt.""" + # --- KORRIGIERT: Standardwerte für leere Strings --- if empty_values is None: - empty_values = ["", "k.a."] # Standardwerte, die als leer gelten + empty_values = ["", "k.a.", "k.a. (nur cookie-banner erkannt)", "k.a. (fehler)"] - if not self.load_data(): return -1 # Fehlerindikator + if not self.load_data(): return -1 header_rows = 5 data_rows = self.get_data() if not data_rows: return 0 @@ -1057,32 +1023,30 @@ class GoogleSheetHandler: actual_col_letter = self._get_col_letter(check_column_index + 1) search_start_index_in_data = max(0, min_sheet_row - header_rows - 1) - debug_print(f"get_start_row_index: Suche ab Daten-Index {search_start_index_in_data} (Sheet-Zeile {search_start_index_in_data + header_rows + 1}) nach leerem Wert (in {empty_values}) in Spalte '{check_column_key}' ({actual_col_letter}, Index {check_column_index}).") + debug_print(f"get_start_row_index: Suche ab Daten-Index {search_start_index_in_data} nach Wert in {empty_values} in Spalte '{check_column_key}' ({actual_col_letter})...") if search_start_index_in_data >= len(data_rows): - debug_print(f"Start-Suchindex ({search_start_index_in_data}) liegt nach oder auf letzter Datenzeile ({len(data_rows)-1}). Alle vorherigen Zeilen scheinen gefüllt.") + debug_print(f"Start-Suchindex ({search_start_index_in_data}) >= Datenlänge ({len(data_rows)}). Alle geprüft.") return len(data_rows) for i in range(search_start_index_in_data, len(data_rows)): row = data_rows[i] current_sheet_row = i + header_rows + 1 - # Prüfe den Wert in der Zielspalte - cell_value = None - is_considered_empty = True # Annahme: Ist leer, bis Gegenteil bewiesen + cell_value_str_lower = "FEHLER_INDEX" # Fallback + is_considered_empty = True # Annahme: Ist leer if len(row) > check_column_index: - cell_value = str(row[check_column_index]).strip() # Immer als String behandeln und strippen - # Prüfe, ob der gestrippte Wert (lowercase) in der Liste der leeren Werte ist - if cell_value.lower() not in empty_values: + cell_value_str_lower = str(row[check_column_index]).strip().lower() + if cell_value_str_lower not in empty_values: is_considered_empty = False - # else: is_considered_empty bleibt True (Spalte nicht vorhanden = leer) + # else: is_considered_empty bleibt True (Spalte zu kurz = leer) - log_debug = (i == search_start_index_in_data or i % 1000 == 0 or current_sheet_row in range(10050, 10060)) # Angepasste Log-Punkte - if log_debug: - debug_print(f" -> Prüfe Daten-Index {i} (Sheet Zeile {current_sheet_row}): Wert in Spalte {actual_col_letter}='{cell_value}' -> Gilt als leer? {is_considered_empty}") + # Logge nur relevante Prüfungen + if i == search_start_index_in_data or i % 1000 == 0 or is_considered_empty: + debug_print(f" -> Prüfe Daten-Index {i} (Sheet {current_sheet_row}): Wert in {actual_col_letter}='{cell_value_str_lower}'. Gilt als leer? {is_considered_empty}") if is_considered_empty: - debug_print(f"Erste Zeile ab Zeile {min_sheet_row} mit leerem Wert (in {empty_values}) in Spalte {actual_col_letter} gefunden: Zeile {current_sheet_row} (Daten-Index {i})") + debug_print(f"Erste Zeile ab {min_sheet_row} mit leerem Wert in Spalte {actual_col_letter} gefunden: Zeile {current_sheet_row} (Daten-Index {i})") return i last_index = len(data_rows) @@ -1092,14 +1056,10 @@ class GoogleSheetHandler: @retry_on_failure def batch_update_cells(self, update_data): # ... (unverändert) ... - if not self.sheet: #... - return False + if not self.sheet: return False if not update_data: return True - try: - self.sheet.batch_update(update_data, value_input_option='USER_ENTERED') - return True - except Exception as e: #... - raise e + try: self.sheet.batch_update(update_data, value_input_option='USER_ENTERED'); return True + except Exception as e: debug_print(f"FEHLER Batch Update: {e}"); raise e # --- Ende GoogleSheetHandler Klasse --- @@ -2212,7 +2172,7 @@ def _process_batch(sheet, batches, row_numbers): def process_website_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet): """ Batch-Prozess NUR für Website-Scraping (Rohtext AR). - Lädt Daten neu, prüft Spalte AR auf Inhalt ('', 'k.A.') und überspringt ggf. + Lädt Daten neu, prüft Spalte AR auf Inhalt ('', 'k.A.', etc.) und überspringt ggf. Setzt AR + AP für bearbeitete Zeilen. Sendet Updates gebündelt. """ debug_print(f"Starte Website-Scraping NUR ROHDATEN (Batch) für Zeilen {start_row_index_in_sheet} bis {end_row_index_in_sheet}...") @@ -2234,17 +2194,14 @@ def process_website_batch(sheet_handler, start_row_index_in_sheet, end_row_index rohtext_col_letter = sheet_handler._get_col_letter(rohtext_col_index + 1) version_col_letter = sheet_handler._get_col_letter(version_col_idx + 1) - # --- Worker-Funktion für Scraping (unverändert) --- + # --- Worker-Funktion (unverändert) --- def scrape_raw_text_task(task_info): - row_num = task_info['row_num'] - url = task_info['url'] - raw_text = "k.A." - error = None + row_num = task_info['row_num']; url = task_info['url']; raw_text = "k.A."; error = None try: raw_text = get_website_raw(url) except Exception as e: error = f"Scraping Fehler Zeile {row_num}: {e}"; debug_print(error) return {"row_num": row_num, "raw_text": raw_text, "error": error} - # --- Hauptlogik: Iteriere und sammle Batches --- + # --- Hauptlogik --- tasks_for_processing_batch = [] all_sheet_updates = [] total_processed_count = 0 @@ -2252,10 +2209,11 @@ def process_website_batch(sheet_handler, start_row_index_in_sheet, end_row_index total_skipped_url_count = 0 total_error_count = 0 - # Verwende Werte aus Config + # Werte aus Config holen processing_batch_size = Config.PROCESSING_BATCH_SIZE max_scraping_workers = Config.MAX_SCRAPING_WORKERS - update_batch_row_limit = Config.UPDATE_BATCH_ROW_LIMIT # Annahme: UPDATE_BATCH_ROW_LIMIT ist auch in Config definiert + update_batch_row_limit = Config.UPDATE_BATCH_ROW_LIMIT + empty_values_for_skip = ["", "k.a.", "k.a. (nur cookie-banner erkannt)", "k.a. (fehler)"] # Werte, die als "leer" gelten for i in range(start_row_index_in_sheet, end_row_index_in_sheet + 1): row_index_in_list = i - 1 @@ -2264,16 +2222,16 @@ def process_website_batch(sheet_handler, start_row_index_in_sheet, end_row_index # --- Prüfung, ob AR schon Inhalt hat --- should_skip = False - cell_value_ar = None + cell_value_ar_str = "" if len(row) > rohtext_col_index: - cell_value_ar = str(row[rohtext_col_index]).strip() - if cell_value_ar and cell_value_ar.lower() not in ["", "k.a.", "k.a. (nur cookie-banner erkannt)", "k.a. (fehler)"]: + cell_value_ar_str = str(row[rohtext_col_index]).strip().lower() + if cell_value_ar_str not in empty_values_for_skip: # Überspringen, wenn NICHT in der Liste der leeren Werte should_skip = True + # else: Spalte zu kurz -> nicht überspringen (wird als leer behandelt) - # Debug Log log_debug = (i < start_row_index_in_sheet + 5 or i > end_row_index_in_sheet - 5 or i % 500 == 0) if log_debug: - debug_print(f"Zeile {i} (Website AR Check): Prüfe Inhalt Spalte {rohtext_col_letter}. Rohwert='{cell_value_ar}'. Überspringen? -> {should_skip}") + debug_print(f"Zeile {i} (Website AR Check): Prüfe Inhalt Spalte {rohtext_col_letter}. Wert='{cell_value_ar_str}'. Überspringen? -> {should_skip}") if should_skip: total_skipped_count += 1 @@ -2289,7 +2247,6 @@ def process_website_batch(sheet_handler, start_row_index_in_sheet, end_row_index tasks_for_processing_batch.append({"row_num": i, "url": website_url}) # --- Verarbeitungs-Batch ausführen --- - # HIER KORRIGIERT: Verwende processing_batch_size if len(tasks_for_processing_batch) >= processing_batch_size or i == end_row_index_in_sheet: if tasks_for_processing_batch: batch_start_row = tasks_for_processing_batch[0]['row_num'] @@ -2298,7 +2255,6 @@ def process_website_batch(sheet_handler, start_row_index_in_sheet, end_row_index debug_print(f"\n--- Starte Scraping-Batch ({batch_task_count} Tasks, Zeilen {batch_start_row}-{batch_end_row}) ---") scraping_results = {} - # HIER KORRIGIERT: Verwende max_scraping_workers debug_print(f" Scrape {batch_task_count} Websites parallel (max {max_scraping_workers} worker)...") with concurrent.futures.ThreadPoolExecutor(max_workers=max_scraping_workers) as executor: future_to_task = {executor.submit(scrape_raw_text_task, task): task for task in tasks_for_processing_batch} @@ -2308,12 +2264,10 @@ def process_website_batch(sheet_handler, start_row_index_in_sheet, end_row_index result = future.result() scraping_results[result['row_num']] = result['raw_text'] if result['error']: total_error_count += 1 - # Zähle erst hier, wenn Ergebnis da ist except Exception as exc: row_num = task['row_num']; err_msg = f"Generischer Fehler Scraping Task Zeile {row_num}: {exc}" debug_print(err_msg); scraping_results[row_num] = "k.A. (Fehler)"; total_error_count +=1 - # Zähle hier die Anzahl der tatsächlich bearbeiteten Ergebnisse current_batch_processed_count = len(scraping_results) total_processed_count += current_batch_processed_count debug_print(f" Scraping für Batch beendet. {current_batch_processed_count} Ergebnisse erhalten ({total_error_count} Fehler in diesem Batch).") @@ -2325,24 +2279,20 @@ def process_website_batch(sheet_handler, start_row_index_in_sheet, end_row_index for row_num, raw_text_res in scraping_results.items(): row_updates = [ {'range': f'{rohtext_col_letter}{row_num}', 'values': [[raw_text_res]]}, - # KEIN AT Timestamp mehr {'range': f'{version_col_letter}{row_num}', 'values': [[current_version]]} ] batch_sheet_updates.extend(row_updates) - all_sheet_updates.extend(batch_sheet_updates) # Sammle für größeren Batch-Update + all_sheet_updates.extend(batch_sheet_updates) - # Leere den Verarbeitungs-Batch - tasks_for_processing_batch = [] + tasks_for_processing_batch = [] # Batch leeren # --- Sheet Updates senden (wenn update_batch_row_limit erreicht) --- - # HIER KORRIGIERT: Verwende update_batch_row_limit - # Prüfe die Anzahl der *Zellen* in all_sheet_updates if len(all_sheet_updates) >= update_batch_row_limit * 2: # *2 weil 2 Updates pro Zeile debug_print(f" Sende gesammelte Sheet-Updates ({len(all_sheet_updates)} Zellen)...") success = sheet_handler.batch_update_cells(all_sheet_updates) if success: debug_print(f" Sheet-Update bis Zeile {i} erfolgreich.") else: debug_print(f" FEHLER beim Sheet-Update bis Zeile {i}.") - all_sheet_updates = [] # Zurücksetzen + all_sheet_updates = [] # --- Finale Sheet Updates senden --- if all_sheet_updates: @@ -2607,79 +2557,52 @@ def process_branch_batch(sheet_handler, start_row_index_in_sheet, end_row_index_ # Komplette run_dispatcher Funktion (Start immer basierend auf AO) def run_dispatcher(mode, sheet_handler, row_limit=None): - """ - Wählt den passenden Batch-Prozess basierend auf dem Modus. - Ermittelt die Startzeile dynamisch basierend auf der relevanten Spalte für den Modus. - """ + """Wählt passenden Batch-Prozess, ermittelt Startzeile dynamisch.""" debug_print(f"Starte Dispatcher im Modus '{mode}' mit row_limit={row_limit}.") header_rows = 5 - # --- Startzeilen-Ermittlung basierend auf Modus --- - start_col_key = "Timestamp letzte Prüfung" # Standard (AO) + # Startspalte für jeden Modus + start_col_key = "Timestamp letzte Prüfung" # Standard AO min_start_row = 7 - - # --- KORRIGIERT: Startspalte für jeden Modus --- - if mode == "website": - start_col_key = "Website Rohtext" # Spalte AR (NEU) - elif mode == "wiki": - start_col_key = "Wiki Verif. Timestamp" # Spalte AX - elif mode == "branch": - start_col_key = "Timestamp letzte Prüfung" # Spalte AO - elif mode == "summarize": - start_col_key = "Website Zusammenfassung" # Spalte AS (prüft ob Summary fehlt) - elif mode == "combined": - start_col_key = "Timestamp letzte Prüfung" # Spalte AO + if mode == "website": start_col_key = "Website Rohtext" # AR ! + elif mode == "wiki": start_col_key = "Wiki Verif. Timestamp" # AX + elif mode == "branch": start_col_key = "Timestamp letzte Prüfung" # AO + elif mode == "summarize": start_col_key = "Website Zusammenfassung" # AS + elif mode == "combined": start_col_key = "Timestamp letzte Prüfung" # AO debug_print(f"Dispatcher: Ermittle Startzeile basierend auf Spalte '{start_col_key}'...") - # get_start_row_index prüft jetzt auf leere Werte oder 'k.a.' etc. start_data_index = sheet_handler.get_start_row_index(check_column_key=start_col_key, min_sheet_row=min_start_row) - if start_data_index == -1: return # Fehler wurde geloggt + if start_data_index == -1: return debug_print(f"FEHLER: Startspalte '{start_col_key}' prüfen!") start_row_index_in_sheet = start_data_index + header_rows + 1 total_sheet_rows = len(sheet_handler.sheet_values) - # --- Endzeilen-Ermittlung und Prüfungen (wie gehabt) --- - if start_data_index >= len(sheet_handler.get_data()): return # Log in get_start_row_index - if start_row_index_in_sheet > total_sheet_rows: return # Log in get_start_row_index + # Prüfungen (wie gehabt) + if start_data_index >= len(sheet_handler.get_data()): return debug_print("Start nach Ende.") + if start_row_index_in_sheet > total_sheet_rows: return debug_print("Ungültige Startzeile.") - if row_limit is not None and row_limit > 0: - end_row_index_in_sheet = min(start_row_index_in_sheet + row_limit - 1, total_sheet_rows) - elif row_limit == 0: return debug_print("Zeilenlimit ist 0.") + # Endzeile + if row_limit is not None and row_limit > 0: end_row_index_in_sheet = min(start_row_index_in_sheet + row_limit - 1, total_sheet_rows) + elif row_limit == 0: return debug_print("Limit 0.") else: end_row_index_in_sheet = total_sheet_rows - debug_print(f"Dispatcher: Verarbeitung geplant für Sheet-Zeilen {start_row_index_in_sheet} bis {end_row_index_in_sheet}.") - if start_row_index_in_sheet > end_row_index_in_sheet: return debug_print("Start nach Ende.") + if start_row_index_in_sheet > end_row_index_in_sheet: return debug_print("Start nach Ende (berechnet).") - # --- Modusauswahl und Aufruf --- + # Modusauswahl try: - if mode == "wiki": - process_verification_only(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet) # Prüft AX, Setzt AX - elif mode == "website": - process_website_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet) # Prüft AR, Setzt AR+AP - elif mode == "branch": - process_branch_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet) # Prüft AO, Setzt AO+AP - elif mode == "summarize": - process_website_summarization_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet) # Prüft AS, Setzt AS+AP + if mode == "wiki": process_verification_only(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet) + elif mode == "website": process_website_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet) # Prüft AR, Setzt AR+AP + elif mode == "branch": process_branch_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet) + elif mode == "summarize": process_website_summarization_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet) elif mode == "combined": - debug_print("--- Start Combined Mode: Wiki ---") - process_verification_only(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet) # Prüft AX, Setzt AX - time.sleep(1) - debug_print("--- Start Combined Mode: Website Scraping ---") - process_website_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet) # Prüft AR, Setzt AR+AP - time.sleep(1) - debug_print("--- Start Combined Mode: Website Summarization ---") - process_website_summarization_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet) # Prüft AS, Setzt AS+AP - time.sleep(1) - debug_print("--- Start Combined Mode: Branch ---") - process_branch_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet) # Prüft AO, Setzt AO+AP + debug_print("--- Start Combined Mode: Wiki ---"); process_verification_only(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet); time.sleep(1) + debug_print("--- Start Combined Mode: Website Scraping ---"); process_website_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet); time.sleep(1) + debug_print("--- Start Combined Mode: Website Summarization ---"); process_website_summarization_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet); time.sleep(1) + debug_print("--- Start Combined Mode: Branch ---"); process_branch_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet) debug_print("--- Combined Mode abgeschlossen ---") - else: - debug_print(f"Ungültiger Modus '{mode}' wurde im Dispatcher übergeben.") - - except Exception as e: - debug_print(f"FEHLER im Dispatcher während Modus '{mode}': {e}") - import traceback; debug_print(traceback.format_exc()) + else: debug_print(f"Ungültiger Modus '{mode}'.") + except Exception as e: debug_print(f"FEHLER im Dispatcher: {e}"); import traceback; debug_print(traceback.format_exc()) # --- Ende run_dispatcher Funktion ---