From 5bfc252469e0d7ab8e44641d582066c1abec507a Mon Sep 17 00:00:00 2001
From: Floke <floke.com@gmail.com>
Date: Fri, 18 Apr 2025 18:14:12 +0000
Subject: [PATCH] refactor: v1.6.5 Minor code improvements and consistency

- Add HTML logging to _extract_infobox_value for debugging
- Implement _extract_infobox_value_fallback using regex
- Call fallback in extract_company_data if primary fails
- Add minor logging to _extract_first_paragraph_from_soup
- Adjust extract_numeric_value for robustness
- Add force_process flag to process_branch_batch for combined mode
- Correct indentation in alignment_demo inner function colnum_string
- Refine data preparation logic in DataProcessor.prepare_data_for_modeling
- Add Config.HEADER_ROWS constant
- Increment version to 1.6.5
---
 brancheneinstufung.py | 1449 +++++++++++++++++++++--------------------
 1 file changed, 739 insertions(+), 710 deletions(-)

diff --git a/brancheneinstufung.py b/brancheneinstufung.py
index c50e584a..fd735c7b 100644
--- a/brancheneinstufung.py
+++ b/brancheneinstufung.py
@@ -1,14 +1,14 @@
 # -*- coding: utf-8 -*-
 # Git Commit V1.6.5
-# git commit -m "feat: v1.6.5 Improve WikipediaScraper infobox extraction"
-# git commit -m "- Add HTML logging to _extract_infobox_value for debugging"
-# git commit -m "- Implement _extract_infobox_value_fallback using regex"
-# git commit -m "- Call fallback in extract_company_data if primary fails"
-# git commit -m "- Add minor logging to _extract_first_paragraph_from_soup"
-# git commit -m "- Adjust extract_numeric_value for robustness"
+# git commit -m "refactor: v1.6.5 Minor code improvements and consistency"
 # git commit -m "- Increment version to 1.6.5"
+# git commit -m "- Introduce Config.HEADER_ROWS constant"
+# git commit -m "- Improve consistency using COLUMN_MAP for cell updates"
+# git commit -m "- Enhance logging in WikipediaScraper._extract_infobox_value"
+# git commit -m "- Expand keywords in WikipediaScraper._extract_infobox_value"
+# git commit -m "- Minor robustness adjustments in extract_numeric_value"
 
-# --- Imports (unverändert lassen) ---
+# --- Imports ---
 import os
 import time
 import re
@@ -42,7 +42,7 @@ try:
 except ImportError:
     tiktoken = None
 
-# --- Konstanten & Config (unverändert lassen, außer VERSION) ---
+# ==================== KONSTANTEN ====================
 CREDENTIALS_FILE = "service_account.json"
 API_KEY_FILE = "api_key.txt"
 SERP_API_KEY_FILE = "serpApiKey.txt"
@@ -52,8 +52,9 @@ LOG_DIR = "Log"
 MODEL_FILE = "technician_decision_tree_model.pkl"
 IMPUTER_FILE = "median_imputer.pkl"
 PATTERNS_FILE_TXT = "technician_patterns.txt"
-PATTERNS_FILE_JSON = "technician_patterns.json"
+PATTERNS_FILE_JSON = "technician_patterns.json" # Optional
 
+# ==================== KONFIGURATION ====================
 class Config:
     VERSION = "v1.6.5" # Versionsnummer erhöht
     LANG = "de"
@@ -65,6 +66,8 @@ class Config:
     WIKIPEDIA_SEARCH_RESULTS = 5
     HTML_PARSER = "html.parser"
     TOKEN_MODEL = "gpt-3.5-turbo"
+
+    # --- Batching & Parallelisierung ---
     BATCH_SIZE = 10
     PROCESSING_BATCH_SIZE = 20
     OPENAI_BATCH_SIZE_LIMIT = 4
@@ -73,11 +76,12 @@ class Config:
     MAX_BRANCH_WORKERS = 10
     OPENAI_CONCURRENCY_LIMIT = 5
     PROCESSING_BRANCH_BATCH_SIZE = PROCESSING_BATCH_SIZE
-    HEADER_ROWS = 5 # NEU: Header-Zeilen als Konstante
+
+    HEADER_ROWS = 5 # NEU: Anzahl der Header-Zeilen als Konstante
 
     API_KEYS = {}
     @classmethod
-    def load_api_keys(cls):
+    def load_api_keys(cls): # unverändert
         cls.API_KEYS['openai'] = cls._load_key_from_file(API_KEY_FILE)
         cls.API_KEYS['serpapi'] = cls._load_key_from_file(SERP_API_KEY_FILE)
         cls.API_KEYS['genderize'] = cls._load_key_from_file(GENDERIZE_API_KEY_FILE)
@@ -85,16 +89,18 @@ class Config:
         else: debug_print("⚠️ OpenAI API Key konnte nicht geladen werden.")
 
     @staticmethod
-    def _load_key_from_file(filepath):
+    def _load_key_from_file(filepath): # unverändert
         try:
             with open(filepath, "r") as f: return f.read().strip()
         except Exception as e: debug_print(f"Fehler Keys aus '{filepath}': {e}"); return None
 
-# --- Globale Variablen (unverändert lassen) ---
-BRANCH_MAPPING = {}
+# --- Globale Variablen ---
+BRANCH_MAPPING = {} # Wird von load_target_schema befüllt (obwohl nicht mehr direkt genutzt)
 TARGET_SCHEMA_STRING = "Ziel-Branchenschema nicht verfügbar."
-ALLOWED_TARGET_BRANCHES = []
-COLUMN_MAP = { # (unverändert lassen)
+ALLOWED_TARGET_BRANCHES = [] # Wird von load_target_schema befüllt
+
+# Globales Spalten-Mapping (wie in v1.6.4)
+COLUMN_MAP = {
     "ReEval Flag": 0, "CRM Name": 1, "CRM Kurzform": 2, "CRM Website": 3, "CRM Ort": 4,
     "CRM Beschreibung": 5, "CRM Branche": 6, "CRM Beschreibung Branche extern": 7, "CRM Anzahl Techniker": 8,
     "CRM Umsatz": 9, "CRM Anzahl Mitarbeiter": 10, "CRM Vorschlag Wiki URL": 11, "Wiki URL": 12,
@@ -112,191 +118,92 @@ COLUMN_MAP = { # (unverändert lassen)
     "Geschätzter Techniker Bucket": 46, "Finaler Umsatz (Wiki>CRM)": 47, "Finaler Mitarbeiter (Wiki>CRM)": 48,
     "Wiki Verif. Timestamp": 49
 }
-LOG_FILE = None
+LOG_FILE = None # Wird in main() gesetzt
 
-# --- Funktionen (prepare_data_for_modeling, retry_on_failure, Logging, Helper, Branch Mapping, Token Count etc. unverändert lassen) ---
-# ... (alle diese Funktionen hier einfügen, wie im vorherigen Code) ...
-def prepare_data_for_modeling(sheet_handler): # unverändert
-    debug_print("Starte Datenvorbereitung für Modellierung...")
-    try:
-        all_data = sheet_handler.get_all_data_with_headers()
-        if len(all_data) <= Config.HEADER_ROWS:
-            debug_print("Fehler: Nicht genügend Datenzeilen im Sheet gefunden.")
-            return None
-        headers = all_data[0]
-        data_rows = all_data[Config.HEADER_ROWS:]
-        df = pd.DataFrame(data_rows, columns=headers)
-        debug_print(f"DataFrame erstellt mit {len(df)} Zeilen und {len(df.columns)} Spalten.")
-        required_cols_keys = [
-            "CRM Name", "CRM Branche", "CRM Umsatz", "Wiki Umsatz",
-            "CRM Anzahl Mitarbeiter", "Wiki Mitarbeiter", "CRM Anzahl Techniker"
-        ]
-        col_indices = {}
-        tech_col_key = "CRM Anzahl Techniker"
-        try:
-            col_indices = {
-                "name": all_data[0][COLUMN_MAP["CRM Name"]],
-                "branche": all_data[0][COLUMN_MAP["CRM Branche"]],
-                "umsatz_crm": all_data[0][COLUMN_MAP["CRM Umsatz"]],
-                "umsatz_wiki": all_data[0][COLUMN_MAP["Wiki Umsatz"]],
-                "ma_crm": all_data[0][COLUMN_MAP["CRM Anzahl Mitarbeiter"]],
-                "ma_wiki": all_data[0][COLUMN_MAP["Wiki Mitarbeiter"]],
-                "techniker": all_data[0][COLUMN_MAP[tech_col_key]]
-            }
-            cols_to_select = list(col_indices.values())
-        except KeyError as e:
-             debug_print(f"FEHLER: Konnte Mapping für Schlüssel '{e}' nicht finden oder Spalte nicht im Header.")
-             return None
-        except IndexError as e:
-             debug_print(f"FEHLER: Spaltenindex aus COLUMN_MAP ist außerhalb der Grenzen der Header-Zeile: {e}")
-             return None
-        df_subset = df[cols_to_select].copy()
-        rename_map = {v: k for k, v in col_indices.items()}
-        df_subset.rename(columns=rename_map, inplace=True)
-        debug_print(f"Benötigte Spalten ausgewählt und umbenannt: {list(df_subset.columns)}")
-        def get_valid_numeric(value_str):
-            # Adjusted slightly for robustness
-            if pd.isna(value_str) or value_str == '': return np.nan
-            text = str(value_str).strip()
-            # Remove currency symbols, prefixes etc. more broadly
-            text = re.sub(r'(?i)^(ca\.?|circa|über|unter|rund|etwa|mehr als|weniger als|bis zu)\s*', '', text)
-            text = re.sub(r'[€$£¥]', '', text).strip()
-            # Handle thousands separators (.) and decimal comma (,)
-            if '.' in text and ',' in text: # Assume dot is thousand, comma is decimal
-                text = text.replace('.', '').replace(',', '.')
-            elif ',' in text and '.' not in text: # Assume comma is decimal
-                 text = text.replace(',', '.')
-            elif '.' in text and ',' not in text: # Might be thousand or decimal - remove if many dots
-                 if text.count('.') > 1: text = text.replace('.', '')
-
-            # Multipliers (Mio/Mrd for Umsatz, Tsd potentially for both)
-            multiplier = 1.0
-            text_lower = text.lower()
-            num_part = text
-            if "mrd" in text_lower or "milliarden" in text_lower or "billion" in text_lower:
-                multiplier = 1000.0
-                num_part = re.sub(r'(?i)\s*(mrd\.?|milliarden|billion)\b.*', '', text).strip()
-            elif "mio" in text_lower or "millionen" in text_lower or "mill\." in text_lower:
-                multiplier = 1.0
-                num_part = re.sub(r'(?i)\s*(mio\.?|millionen|mill\.?)\b.*', '', text).strip()
-            elif "tsd" in text_lower or "tausend" in text_lower:
-                multiplier = 0.001 if 'Umsatz' in final_col else 1000.0 # Adjust multiplier based on target
-                num_part = re.sub(r'(?i)\s*(tsd\.?|tausend)\b.*', '', text).strip()
-
-            # Extract numeric part again after removing suffixes
-            num_part = re.match(r'([\d.\-]+)', num_part) # Find leading number (can be negative temporarily)
-            if not num_part: return np.nan
-            num_part_str = num_part.group(1)
-
-            try:
-                val = float(num_part_str) * multiplier
-                # Allow 0 for Umsatz/Mitarbeiter? Decide based on requirements. Here: > 0
-                return val if val > 0 else np.nan
-            except ValueError:
-                return np.nan
-
-        cols_to_process = {
-            'Umsatz': ('umsatz_wiki', 'umsatz_crm', 'Finaler_Umsatz'),
-            'Mitarbeiter': ('ma_wiki', 'ma_crm', 'Finaler_Mitarbeiter')
-        }
-        for base_name, (wiki_col, crm_col, final_col) in cols_to_process.items():
-            debug_print(f"Verarbeite '{base_name}'...")
-            if wiki_col not in df_subset.columns: df_subset[wiki_col] = np.nan
-            if crm_col not in df_subset.columns: df_subset[crm_col] = np.nan
-            wiki_numeric = df_subset[wiki_col].apply(lambda x: get_valid_numeric(x, final_col))
-            crm_numeric = df_subset[crm_col].apply(lambda x: get_valid_numeric(x, final_col))
-            df_subset[final_col] = np.where(
-                wiki_numeric.notna(), wiki_numeric,
-                np.where(crm_numeric.notna(), crm_numeric, np.nan)
-            )
-            debug_print(f"  -> {df_subset[final_col].notna().sum()} gültige '{final_col}' Werte erstellt.")
-        techniker_col = "techniker"
-        debug_print(f"Verarbeite Zielvariable '{techniker_col}'...")
-        df_subset['Anzahl_Servicetechniker_Numeric'] = pd.to_numeric(df_subset[techniker_col], errors='coerce')
-        initial_rows = len(df_subset)
-        df_filtered = df_subset[
-            df_subset['Anzahl_Servicetechniker_Numeric'].notna() &
-            (df_subset['Anzahl_Servicetechniker_Numeric'] > 0)
-        ].copy()
-        filtered_rows = len(df_filtered)
-        debug_print(f"{initial_rows - filtered_rows} Zeilen entfernt (fehlende/ungültige Technikerzahl).")
-        debug_print(f"Verbleibende Zeilen für Modellierung: {filtered_rows}")
-        if filtered_rows == 0: return None
-        bins = [-1, 0, 19, 49, 99, 249, 499, float('inf')]
-        labels = ['Bucket_1_(0)', 'Bucket_2_(<20)', 'Bucket_3_(<50)', 'Bucket_4_(<100)', 'Bucket_5_(<250)', 'Bucket_6_(<500)', 'Bucket_7_(>499)']
-        df_filtered['Techniker_Bucket'] = pd.cut(
-            df_filtered['Anzahl_Servicetechniker_Numeric'],
-            bins=bins, labels=labels, right=True
-        )
-        debug_print("Techniker-Buckets erstellt.")
-        debug_print(f"Verteilung der Buckets:\n{df_filtered['Techniker_Bucket'].value_counts(normalize=True).round(3)}")
-        branche_col = "branche"
-        debug_print(f"Verarbeite kategoriales Feature '{branche_col}'...")
-        df_filtered[branche_col] = df_filtered[branche_col].astype(str).fillna('Unbekannt').str.strip()
-        df_encoded = pd.get_dummies(df_filtered, columns=[branche_col], prefix='Branche', dummy_na=False)
-        debug_print(f"One-Hot Encoding für Branche durchgeführt.")
-        feature_columns = [col for col in df_encoded.columns if col.startswith('Branche_')]
-        feature_columns.extend(['Finaler_Umsatz', 'Finaler_Mitarbeiter'])
-        target_column = 'Techniker_Bucket'
-        original_data_cols = ['name', 'Anzahl_Servicetechniker_Numeric'] # Keep original tech number for reference if needed
-        df_model_ready = df_encoded[original_data_cols + feature_columns + [target_column]].copy()
-        for col in ['Finaler_Umsatz', 'Finaler_Mitarbeiter']:
-             df_model_ready[col] = pd.to_numeric(df_model_ready[col], errors='coerce')
-        df_model_ready = df_model_ready.reset_index(drop=True)
-        debug_print("Datenvorbereitung abgeschlossen.")
-        nan_counts = df_model_ready[['Finaler_Umsatz', 'Finaler_Mitarbeiter']].isna().sum()
-        debug_print(f"Fehlende Werte in numerischen Features vor Imputation:\n{nan_counts}")
-        return df_model_ready
-    except Exception as e:
-        debug_print(f"FEHLER während der Datenvorbereitung: {e}")
-        import traceback
-        debug_print(traceback.format_exc())
-        return None
-
-def retry_on_failure(func): # unverändert
+# ==================== RETRY-DECORATOR ====================
+def retry_on_failure(func): # Unverändert gegenüber v1.6.4
     def wrapper(*args, **kwargs):
         func_name = func.__name__
         self_arg = args[0] if args and hasattr(args[0], func_name) else None
         effective_func_name = f"{self_arg.__class__.__name__}.{func_name}" if self_arg else func_name
+
         for attempt in range(Config.MAX_RETRIES):
-            try: return func(*args, **kwargs)
+            try:
+                return func(*args, **kwargs)
             except Exception as e:
-                error_msg = str(e); wait_time = Config.RETRY_DELAY * (attempt + 1)
-                log_prefix = f"🚦 Rate Limit bei {effective_func_name}" if isinstance(e, gspread.exceptions.APIError) and e.response.status_code == 429 else f"⚠️ Fehler bei {effective_func_name}"
-                print(f"{log_prefix} (Versuch {attempt+1}/{Config.MAX_RETRIES}). Warte {wait_time}s... Fehler: {type(e).__name__} - {error_msg[:100]}")
-                if attempt < Config.MAX_RETRIES - 1: time.sleep(wait_time)
-                else: print(f"❌ Endgültiger Fehler bei {effective_func_name}."); return None
-        return None
+                error_msg = str(e)
+                wait_time = Config.RETRY_DELAY * (attempt + 1) # Exponential backoff standard
+
+                if isinstance(e, gspread.exceptions.APIError):
+                    if e.response.status_code == 429: # Rate Limit
+                        print(f"🚦 Rate Limit bei {effective_func_name} (Versuch {attempt+1}). Warte {wait_time}s...")
+                        # Keine zusätzliche Fehlermeldung bei Rate Limit nötig
+                    else:
+                        print(f"⚠️ Google API Fehler bei {effective_func_name} (Versuch {attempt+1}): Status {e.response.status_code} - {error_msg[:150]}")
+                elif isinstance(e, requests.exceptions.RequestException):
+                     print(f"⚠️ Netzwerkfehler bei {effective_func_name} (Versuch {attempt+1}): {error_msg[:150]}")
+                elif isinstance(e, openai.error.OpenAIError):
+                     print(f"⚠️ OpenAI Fehler bei {effective_func_name} (Versuch {attempt+1}): {error_msg[:150]}")
+                else:
+                    print(f"⚠️ Unbekannter Fehler bei {effective_func_name} (Versuch {attempt+1}): {type(e).__name__} - {error_msg[:150]}")
+
+                if attempt < Config.MAX_RETRIES - 1:
+                     time.sleep(wait_time)
+                else:
+                     print(f"❌ Endgültiger Fehler bei {effective_func_name} nach {Config.MAX_RETRIES} Versuchen.")
+                     # Die aufrufende Funktion muss mit None umgehen können
+                     return None
+        return None # Fallback, sollte nicht erreicht werden
     return wrapper
 
-def create_log_filename(mode): # unverändert
-    if not os.path.exists(LOG_DIR): os.makedirs(LOG_DIR)
+# ==================== LOGGING & HELPER FUNCTIONS ====================
+
+def create_log_filename(mode): # Unverändert
+    if not os.path.exists(LOG_DIR):
+        os.makedirs(LOG_DIR)
     now = datetime.now().strftime("%d-%m-%Y_%H-%M")
     ver_short = Config.VERSION.replace(".", "")
     return os.path.join(LOG_DIR, f"{now}_{ver_short}_Modus{mode}.txt")
 
-def debug_print(message): # unverändert
+def debug_print(message): # Unverändert
     global LOG_FILE
     log_message = f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] {message}"
-    if Config.DEBUG: print(log_message)
+    if Config.DEBUG:
+        print(log_message)
     if LOG_FILE:
         try:
-            with open(LOG_FILE, "a", encoding="utf-8") as f: f.write(log_message + "\n")
-        except Exception as e: print(f"[CRITICAL] Log-Schreibfehler: {e}")
+            # Verwende 'with' korrekt für das Dateihandling
+            with open(LOG_FILE, "a", encoding="utf-8") as f:
+                f.write(log_message + "\n")
+        except Exception as e:
+            # Kritischer Fehler, wenn Log nicht geschrieben werden kann
+            print(f"[CRITICAL] Log-Schreibfehler: {e}")
 
-def simple_normalize_url(url): # unverändert
-    if not url or not isinstance(url, str): return "k.A."
+
+def simple_normalize_url(url): # Unverändert
+    """Normalisiert URL zu www.domain.tld oder k.A."""
+    if not url or not isinstance(url, str):
+        return "k.A."
     url = url.strip()
-    if not url: return "k.A."
-    if not url.lower().startswith(("http://", "https://")): url = "https://" + url
+    if not url:
+        return "k.A."
+    if not url.lower().startswith(("http://", "https://")):
+        url = "https://" + url
     try:
-        parsed = urlparse(url); domain_part = parsed.netloc.split(":", 1)[0]
+        parsed = urlparse(url)
+        domain_part = parsed.netloc
+        domain_part = domain_part.split(":", 1)[0] # Port entfernen
+        # Füge www. hinzu, wenn nicht vorhanden und Domain Punkte enthält (keine IP)
         if not domain_part.lower().startswith("www.") and '.' in domain_part:
-             if not re.match(r"^\d{1,3}(\.\d{1,3}){3}$", domain_part): domain_part = "www." + domain_part
+             if not re.match(r"^\d{1,3}(\.\d{1,3}){3}$", domain_part):
+                 domain_part = "www." + domain_part
         return domain_part.lower()
-    except Exception as e: debug_print(f"Fehler bei URL-Normalisierung '{url}': {e}"); return "k.A."
+    except Exception as e:
+        debug_print(f"Fehler bei URL-Normalisierung '{url}': {e}")
+        return "k.A."
 
-def normalize_string(s): # unverändert
+def normalize_string(s): # Unverändert
+    """Normalisiert Umlaute und Sonderzeichen."""
     if not s or not isinstance(s, str): return ""
     replacements = {'Ä': 'Ae', 'Ö': 'Oe', 'Ü': 'Ue', 'ß': 'ss', 'ä': 'ae', 'ö': 'oe', 'ü': 'ue', 'À': 'A', 'Á': 'A', 'Â': 'A', 'Ã': 'A', 'Å': 'A', 'Æ': 'AE', 'à': 'a', 'á': 'a', 'â': 'a', 'ã': 'a', 'å': 'a', 'æ': 'ae', 'Ç': 'C', 'ç': 'c', 'È': 'E', 'É': 'E', 'Ê': 'E', 'Ë': 'E', 'è': 'e', 'é': 'e', 'ê': 'e', 'ë': 'e', 'Ì': 'I', 'Í': 'I', 'Î': 'I', 'Ï': 'I', 'ì': 'i', 'í': 'i', 'î': 'i', 'ï': 'i', 'Ñ': 'N', 'ñ': 'n', 'Ò': 'O', 'Ó': 'O', 'Ô': 'O', 'Õ': 'O', 'Ø': 'O', 'ò': 'o', 'ó': 'o', 'ô': 'o', 'õ': 'o', 'ø': 'o', 'Œ': 'OE', 'œ': 'oe', 'Š': 'S', 'š': 's', 'Ž': 'Z', 'ž': 'z', 'Ý': 'Y', 'ý': 'y', 'ÿ': 'y', 'Đ': 'D', 'đ': 'd', 'č': 'c', 'Č': 'C', 'ć': 'c', 'Ć': 'C', 'ł': 'l', 'Ł': 'L', 'ğ': 'g', 'Ğ': 'G', 'ş': 's', 'Ş': 'S', 'ă': 'a', 'Ă': 'A', 'ı': 'i', 'İ': 'I', 'ň': 'n', 'Ň': 'N', 'ř': 'r', 'Ř': 'R', 'ő': 'o', 'Ő': 'O', 'ű': 'u', 'Ű': 'U', 'ț': 't', 'Ț': 'T', 'ș': 's', 'Ș': 'S'}
     try: s = unicodedata.normalize('NFKD', s).encode('ascii', 'ignore').decode('ascii')
@@ -304,30 +211,41 @@ def normalize_string(s): # unverändert
     for src, target in replacements.items(): s = s.replace(src, target)
     return s
 
-def clean_text(text): # unverändert
+def clean_text(text): # Leicht angepasst: Entfernt auch [Bearbeiten]-Links etc.
+    """Bereinigt Text von Wikipedia etc."""
     if not text: return "k.A."
     try:
-        text = str(text); text = unicodedata.normalize("NFKC", text)
-        text = re.sub(r'\[\d+\]', '', text); text = re.sub(r'\[.*?\]', '', text) # Auch [Bearbeiten] etc.
-        text = re.sub(r'\s+', ' ', text).strip(); return text if text else "k.A."
-    except Exception as e: debug_print(f"Fehler bei clean_text: {e}"); return "k.A."
+        text = str(text)
+        text = unicodedata.normalize("NFKC", text)
+        text = re.sub(r'\[\d+\]', '', text) # Entfernt [1], [2] etc.
+        text = re.sub(r'\[.*?\]', '', text) # Entfernt aggressiver [Bearbeiten], [Quelltext bearbeiten] etc.
+        text = re.sub(r'\s+', ' ', text).strip()
+        return text if text else "k.A."
+    except Exception as e:
+        debug_print(f"Fehler bei clean_text: {e}")
+        return "k.A."
 
-def normalize_company_name(name): # unverändert
+def normalize_company_name(name): # Unverändert
+    """Entfernt Rechtsformzusätze etc. für Vergleiche."""
     if not name: return ""
-    name = clean_text(name)
+    name = clean_text(name) # Vorab bereinigen
     forms = [r'gmbh', r'ges\.?\s*m\.?\s*b\.?\s*h\.?', r'gesellschaft mit beschränkter haftung', r'ug', r'u\.g\.', r'unternehmergesellschaft', r'haftungsbeschränkt', r'ag', r'a\.g\.', r'aktiengesellschaft', r'ohg', r'o\.h\.g\.', r'offene handelsgesellschaft', r'kg', r'k\.g\.', r'kommanditgesellschaft', r'gmbh\s*&\s*co\.?\s*kg', r'ges\.?\s*m\.?\s*b\.?\s*h\.?\s*&\s*co\.?\s*k\.g\.?', r'ag\s*&\s*co\.?\s*kg', r'a\.g\.?\s*&\s*co\.?\s*k\.g\.?', r'e\.k\.', r'e\.kfm\.', r'e\.kfr\.', r'eingetragene[rn]? kauffrau', r'eingetragene[rn]? kaufmann', r'ltd\.?', r'limited', r'ltd\s*&\s*co\.?\s*kg', r's\.?a\.?r\.?l\.?', r'sàrl', r'sagl', r's\.?a\.?', r'société anonyme', r'sociedad anónima', r's\.?p\.?a\.?', r'società per azioni', r'b\.?v\.?', r'besloten vennootschap', r'n\.?v\.?', r'naamloze vennootschap', r'plc\.?', r'public limited company', r'inc\.?', r'incorporated', r'corp\.?', r'corporation', r'llc\.?', r'limited liability company', r'kgaa', r'kommanditgesellschaft auf aktien', r'se', r'societas europaea', r'e\.?g\.?', r'eingetragene genossenschaft', r'genossenschaft', r'genmbh', r'e\.?v\.?', r'eingetragener verein', r'verein', r'stiftung', r'ggmbh', r'gemeinnützige gmbh', r'gug', r'partg', r'partnerschaftsgesellschaft', r'partgmbb', r'og', r'o\.g\.', r'offene gesellschaft', r'e\.u\.', r'eingetragenes unternehmen', r'ges\.?n\.?b\.?r\.?', r'gesellschaft nach bürgerlichem recht', r'kollektivgesellschaft', r'einzelfirma', r'gruppe', r'holding', r'international', r'systeme', r'technik', r'logistik', r'solutions', r'services', r'management', r'consulting', r'produktion', r'vertrieb', r'entwicklung', r'maschinenbau', r'anlagenbau']
     pattern = r'\b(' + '|'.join(forms) + r')\b'
     normalized = re.sub(pattern, '', name, flags=re.IGNORECASE)
-    normalized = re.sub(r'[.,;:]', '', normalized); normalized = re.sub(r'[\-–/]', ' ', normalized)
-    normalized = re.sub(r'\s+', ' ', normalized).strip(); return normalized.lower()
+    normalized = re.sub(r'[.,;:]', '', normalized)
+    normalized = re.sub(r'[\-–/]', ' ', normalized)
+    normalized = re.sub(r'\s+', ' ', normalized).strip()
+    return normalized.lower()
 
 @retry_on_failure
-def is_valid_wikipedia_article_url(wiki_url): # unverändert
-    if not wiki_url or not wiki_url.lower().startswith(("http://", "https://")) or "wikipedia.org/wiki/" not in wiki_url: return False
+def is_valid_wikipedia_article_url(wiki_url): # Unverändert
+    """Prüft über die MediaWiki API, ob eine URL ein valider Artikel ist."""
+    if not wiki_url or not wiki_url.lower().startswith(("http://", "https://")) or "wikipedia.org/wiki/" not in wiki_url:
+        return False
     try:
         title = unquote(wiki_url.split('/wiki/', 1)[1]).replace('_', ' ')
         api_url = "https://de.wikipedia.org/w/api.php"
-        params = {"action": "query", "titles": title, "format": "json", "formatversion": 2, "redirects": 1}
+        params = { "action": "query", "titles": title, "format": "json", "formatversion": 2, "redirects": 1 }
         response = requests.get(api_url, params=params, timeout=5)
         response.raise_for_status(); data = response.json()
         if 'query' in data and 'pages' in data['query']:
@@ -342,62 +260,13 @@ def is_valid_wikipedia_article_url(wiki_url): # unverändert
         else: debug_print(f"  API Check '{title}': Bad format."); return False
     except Exception as e: debug_print(f"  API Check '{title}': Error - {e}"); return False
 
-def process_wiki_updates_from_chatgpt(sheet_handler, data_processor, row_limit=None): # unverändert
-    debug_print("Starte Modus: Wiki-Updates...")
-    if not sheet_handler.load_data(): return
-    all_data = sheet_handler.get_all_data_with_headers()
-    if not all_data or len(all_data) <= Config.HEADER_ROWS: return
-    data_rows = all_data[Config.HEADER_ROWS:]
-    required_keys = ["Chat Wiki Konsistenzprüfung", "Chat Vorschlag Wiki Artikel", "Wiki URL", "Wikipedia Timestamp", "Wiki Verif. Timestamp", "Timestamp letzte Prüfung", "Version", "ReEval Flag"]
-    col_indices = {}; all_keys_found = True
-    for key in required_keys:
-        idx = COLUMN_MAP.get(key); col_indices[key] = idx
-        if idx is None: debug_print(f"FEHLER: Key '{key}' fehlt!"); all_keys_found = False
-    if not all_keys_found: return
-    all_sheet_updates = []; processed_rows_count = 0; updated_url_count = 0; cleared_suggestion_count = 0
-    for idx, row in enumerate(data_rows):
-        row_num_in_sheet = idx + Config.HEADER_ROWS + 1
-        if row_limit is not None and processed_rows_count >= row_limit: break
-        def get_value(key):
-            index = col_indices.get(key)
-            if index is not None and len(row) > index: return row[index]
-            return ""
-        konsistenz_s = get_value("Chat Wiki Konsistenzprüfung"); vorschlag_u = get_value("Chat Vorschlag Wiki Artikel"); url_m = get_value("Wiki URL")
-        is_update_candidate = False; new_url = ""
-        konsistenz_s_upper = konsistenz_s.strip().upper(); vorschlag_u_cleaned = vorschlag_u.strip(); url_m_cleaned = url_m.strip()
-        condition1_status_nok = konsistenz_s_upper not in ["OK", "X (UPDATED)", "X (URL COPIED)", "X (INVALID SUGGESTION)", ""]
-        condition2_u_is_url = vorschlag_u_cleaned.lower().startswith(("http://", "https://")) and "wikipedia.org/wiki/" in vorschlag_u_cleaned.lower()
-        condition3_u_differs_m = False; condition4_u_is_valid = False
-        if condition1_status_nok and condition2_u_is_url:
-            new_url = vorschlag_u_cleaned; condition3_u_differs_m = new_url != url_m_cleaned
-            if condition3_u_differs_m: condition4_u_is_valid = is_valid_wikipedia_article_url(new_url)
-        is_update_candidate = condition1_status_nok and condition2_u_is_url and condition3_u_differs_m and condition4_u_is_valid
-        clear_invalid_suggestion = condition1_status_nok and not is_update_candidate
-        if is_update_candidate:
-            debug_print(f"Zeile {row_num_in_sheet}: Update-Kandidat VALIDIERUNG ERFOLGREICH.")
-            processed_rows_count += 1; updated_url_count += 1
-            m_l=sheet_handler._get_col_letter(col_indices["Wiki URL"]+1); s_l=sheet_handler._get_col_letter(col_indices["Chat Wiki Konsistenzprüfung"]+1); u_l=sheet_handler._get_col_letter(col_indices["Chat Vorschlag Wiki Artikel"]+1); an_l=sheet_handler._get_col_letter(col_indices["Wikipedia Timestamp"]+1); ax_l=sheet_handler._get_col_letter(col_indices["Wiki Verif. Timestamp"]+1); ao_l=sheet_handler._get_col_letter(col_indices["Timestamp letzte Prüfung"]+1); ap_l=sheet_handler._get_col_letter(col_indices["Version"]+1); a_l=sheet_handler._get_col_letter(col_indices["ReEval Flag"]+1)
-            row_updates = [{'range': f'{m_l}{row_num_in_sheet}', 'values': [[new_url]]}, {'range': f'{s_l}{row_num_in_sheet}', 'values': [["X (URL Copied)"]]}, {'range': f'{u_l}{row_num_in_sheet}', 'values': [["URL übernommen"]]}, {'range': f'{an_l}{row_num_in_sheet}', 'values': [[""]]}, {'range': f'{ax_l}{row_num_in_sheet}', 'values': [[""]]}, {'range': f'{ao_l}{row_num_in_sheet}', 'values': [[""]]}, {'range': f'{ap_l}{row_num_in_sheet}', 'values': [[""]]}, {'range': f'{a_l}{row_num_in_sheet}', 'values': [["x"]]}]
-            all_sheet_updates.extend(row_updates)
-        elif clear_invalid_suggestion:
-            debug_print(f"Zeile {row_num_in_sheet}: Status S war '{konsistenz_s}', aber Vorschlag U ('{vorschlag_u_cleaned}') ungültig/identisch. Lösche U und setze Status S.")
-            processed_rows_count += 1; cleared_suggestion_count += 1
-            s_l=sheet_handler._get_col_letter(col_indices["Chat Wiki Konsistenzprüfung"]+1); u_l=sheet_handler._get_col_letter(col_indices["Chat Vorschlag Wiki Artikel"]+1)
-            row_updates = [{'range': f'{s_l}{row_num_in_sheet}', 'values': [["X (Invalid Suggestion)"]]}, {'range': f'{u_l}{row_num_in_sheet}', 'values': [[""]]}]
-            all_sheet_updates.extend(row_updates)
-    if all_sheet_updates:
-        debug_print(f"BEREIT ZUM SENDEN: Batch-Update für {processed_rows_count} geprüfte Zeilen...")
-        success = sheet_handler.batch_update_cells(all_sheet_updates)
-        if success: debug_print(f"Sheet-Update für Wiki-Updates erfolgreich.")
-        else: debug_print(f"FEHLER beim Sheet-Update für Wiki-Updates.")
-    else: debug_print("Keine Zeilen gefunden, die eine Korrektur benötigen.")
-    debug_print(f"Wiki-Updates abgeschlossen. {processed_rows_count} geprüft. {updated_url_count} kopiert/markiert, {cleared_suggestion_count} gelöscht/markiert.")
-
 def extract_numeric_value(raw_value, is_umsatz=False): # Leicht angepasst für Robustheit
+    """Extrahiert und normalisiert Zahlenwerte (Umsatz in Mio, Mitarbeiter)."""
     if pd.isna(raw_value) or raw_value == '': return "k.A."
     raw_value = clean_text(str(raw_value))
     if raw_value == "k.A.": return "k.A."
 
+    # Entferne Präfixe, Währungen etc.
     processed_value = re.sub(r'(?i)\b(ca\.?|circa|über|unter|rund|etwa|mehr als|weniger als|bis zu)\b', '', raw_value).strip()
     processed_value = re.sub(r'[€$£¥]', '', processed_value).strip()
 
@@ -408,244 +277,334 @@ def extract_numeric_value(raw_value, is_umsatz=False): # Leicht angepasst für R
         processed_value = processed_value.replace(',', '.')
     # Wenn nur Punkt, lasse vorerst (kann Dezimal oder Tausender sein)
 
-    match = re.search(r'([\d.,]+)', processed_value) # Finde erste Zahl(engruppe)
-    if not match: return "k.A."
+    # Finde die erste Zahl(engruppe) inklusive möglicher Tausender-/Dezimaltrennzeichen
+    match = re.search(r'([\d.,]+)', processed_value)
+    if not match:
+        # debug_print(f"Keine Zahl gefunden in: '{raw_value}' -> '{processed_value}'")
+        return "k.A."
 
     num_str = match.group(1)
-    # Entferne Tausenderpunkte VOR Umwandlung
+    # Entferne Tausenderpunkte VOR Umwandlung, falls noch vorhanden
     if '.' in num_str and num_str.count('.') > 1: # Mehrere Punkte -> Tausender
          num_str = num_str.replace('.', '')
     # Komma wurde bereits zu Punkt
 
-    try: num = float(num_str)
-    except ValueError: debug_print(f"Float-Umwandlung fehlgeschlagen: '{num_str}' aus '{raw_value}'"); return "k.A."
+    try:
+        num = float(num_str)
+    except ValueError:
+        # debug_print(f"Float-Umwandlung fehlgeschlagen: '{num_str}' aus '{raw_value}'")
+        return "k.A."
 
-    # Multiplikatoren (Groß-/Kleinschreibung ignorieren)
+    # Multiplikatoren anwenden (Groß/Kleinschreibung ignorieren)
     raw_lower = raw_value.lower(); multiplier = 1.0
+    # Suche nach Multiplikatoren im *Originaltext*, nicht nur in der extrahierten Zahl
     if "mrd" in raw_lower or "milliarden" in raw_lower or "billion" in raw_lower: multiplier = 1000.0 # Für Umsatz in Mio
     elif "mio" in raw_lower or "millionen" in raw_lower or "mill." in raw_lower: multiplier = 1.0
-    elif "tsd" in raw_lower or "tausend" in raw_lower: multiplier = 0.001 if is_umsatz else 1000.0 # Umsatz in Mio, MA direkt
+    elif "tsd" in raw_lower or "tausend" in raw_lower:
+         multiplier = 0.001 if is_umsatz else 1000.0 # Umsatz in Mio, MA direkt * 1000
 
     num = num * multiplier
 
-    if is_umsatz: return str(int(round(num))) # Umsatz immer in Mio, Ganzzahl
-    else: return str(int(round(num))) # Mitarbeiter als Ganzzahl
+    # Runde auf Ganzzahl und konvertiere zu String
+    if is_umsatz:
+        # Umsatz immer auf Millionen runden (Ganzzahl)
+        return str(int(round(num)))
+    else:
+        # Mitarbeiter als Ganzzahl
+        return str(int(round(num)))
 
-def get_gender(firstname): # unverändert
+def get_gender(firstname): # Unverändert
+    """Ermittelt Geschlecht via gender-guesser und Fallback Genderize API."""
     if not firstname or not isinstance(firstname, str): return "unknown"
-    firstname = firstname.strip().split(" ")[0]
+    firstname = firstname.strip().split(" ")[0] # Nur ersten Teil des Vornamens
     if not firstname: return "unknown"
-    d = gender.Detector(case_sensitive=False); result = d.get_gender(firstname, 'germany')
+
+    d = gender.Detector(case_sensitive=False)
+    result = d.get_gender(firstname, 'germany')
     if result in ["andy", "unknown", "mostly_male", "mostly_female"]:
         genderize_key = Config.API_KEYS.get('genderize')
-        if not genderize_key: return result if result not in ["andy", "unknown"] else "unknown"
+        if not genderize_key:
+            # debug_print("Genderize API-Schlüssel nicht verfügbar.")
+            return result if result not in ["andy", "unknown"] else "unknown" # Behalte mostly_, sonst unknown
+
         params = {"name": firstname, "apikey": genderize_key, "country_id": "DE"}
         try:
             response = requests.get("https://api.genderize.io", params=params, timeout=5)
-            response.raise_for_status(); data = response.json()
-            api_gender = data.get("gender"); probability = data.get("probability", 0)
-            if api_gender and probability > 0.6: return api_gender
-            else: return result if result not in ["andy", "unknown"] else "unknown"
-        except Exception as e: debug_print(f"Fehler Genderize API für '{firstname}': {e}"); return result if result not in ["andy", "unknown"] else "unknown"
-    else: return result
+            response.raise_for_status()
+            data = response.json()
+            api_gender = data.get("gender")
+            probability = data.get("probability", 0)
+            if api_gender and probability > 0.6: # Nur bei ausreichender Sicherheit
+                return api_gender
+            else:
+                return result if result not in ["andy", "unknown"] else "unknown"
+        except requests.exceptions.RequestException as e:
+            debug_print(f"Fehler bei Genderize API für '{firstname}': {e}")
+            return result if result not in ["andy", "unknown"] else "unknown"
+        except Exception as e:
+            debug_print(f"Allgemeiner Fehler bei Genderize für '{firstname}': {e}")
+            return result if result not in ["andy", "unknown"] else "unknown"
+    else: # male, female
+        return result
 
-def get_email_address(firstname, lastname, website): # unverändert
-    if not all([firstname, lastname, website]) or not all(isinstance(x, str) for x in [firstname, lastname, website]): return ""
+def get_email_address(firstname, lastname, website): # Unverändert
+    """Generiert E-Mail: vorname.nachname@domain.tld."""
+    if not all([firstname, lastname, website]) or not all(isinstance(x, str) for x in [firstname, lastname, website]):
+        return ""
     domain = simple_normalize_url(website)
     if domain == "k.A." or not '.' in domain: return ""
     if domain.startswith("www."): domain = domain[4:]
-    normalized_first = normalize_string(firstname.lower()); normalized_last = normalize_string(lastname.lower())
+    normalized_first = normalize_string(firstname.lower())
+    normalized_last = normalize_string(lastname.lower())
     normalized_first = re.sub(r'\s+', '-', normalized_first); normalized_last = re.sub(r'\s+', '-', normalized_last)
     normalized_first = re.sub(r'[^\w\-]+', '', normalized_first); normalized_last = re.sub(r'[^\w\-]+', '', normalized_last)
     if normalized_first and normalized_last and domain: return f"{normalized_first}.{normalized_last}@{domain}"
     else: return ""
 
-def fuzzy_similarity(str1, str2): # unverändert
+def fuzzy_similarity(str1, str2): # Unverändert
+    """Berechnet Ähnlichkeit zwischen 0 und 1."""
     if not str1 or not str2: return 0.0
     return SequenceMatcher(None, str(str1).lower(), str(str2).lower()).ratio()
 
-def evaluate_branche_chatgpt(crm_branche, beschreibung, wiki_branche, wiki_kategorien, website_summary): # unverändert
-    global ALLOWED_TARGET_BRANCHES, TARGET_SCHEMA_STRING
-    if not ALLOWED_TARGET_BRANCHES: debug_print("FEHLER evaluate_branche: Schema leer."); return {"branch": crm_branche, "consistency": "error_schema_missing", "justification": "Fehler: Schema nicht geladen"}
-    allowed_branches_lookup = {b.lower(): b for b in ALLOWED_TARGET_BRANCHES}
-    prompt_parts = [TARGET_SCHEMA_STRING, "\nOrdne das Unternehmen anhand folgender Angaben exakt einer Branche des Ziel-Branchenschemas (Kurzformen) zu:"]
-    if crm_branche and crm_branche != "k.A.": prompt_parts.append(f"- CRM-Branche (Referenz): {crm_branche}")
-    if beschreibung and beschreibung != "k.A.": prompt_parts.append(f"- Beschreibung: {beschreibung[:500]}")
-    if wiki_branche and wiki_branche != "k.A.": prompt_parts.append(f"- Wikipedia-Branche: {wiki_branche}")
-    if wiki_kategorien and wiki_kategorien != "k.A.": prompt_parts.append(f"- Wikipedia-Kategorien: {wiki_kategorien[:500]}")
-    if website_summary and website_summary != "k.A.": prompt_parts.append(f"- Website-Zusammenfassung: {website_summary[:500]}")
-    if len(prompt_parts) <= 2: debug_print("Warnung evaluate_branche: Zu wenige Infos."); return {"branch": crm_branche, "consistency": "error_no_info", "justification": "Fehler: Zu wenige Informationen"}
-    prompt_parts.append("\nWICHTIG: Antworte NUR mit dem exakten Kurznamen einer Branche aus der obigen Liste. KEINE Präfixe.")
-    prompt_parts.append("\nAntworte ausschließlich im Format:")
-    prompt_parts.append("Branche: <Exakter Kurzname>"); prompt_parts.append("Übereinstimmung: <ok oder X>"); prompt_parts.append("Begründung: <Sehr kurze Begründung>")
-    prompt = "\n".join(prompt_parts)
-    chat_response = call_openai_chat(prompt, temperature=0.0)
-    if not chat_response: debug_print("Fehler evaluate_branche: Keine API Antwort."); return {"branch": crm_branche, "consistency": "error_api_no_response", "justification": "Fehler: Keine Antwort API"}
-    lines = chat_response.strip().split("\n"); result = {"branch": None, "consistency": None, "justification": ""}; suggested_branch = ""
-    for line in lines:
-        line_lower = line.lower()
-        if line_lower.startswith("branche:"): suggested_branch = line.split(":", 1)[1].strip().strip('"\'')
-        elif line_lower.startswith("begründung:"): result["justification"] = line.split(":", 1)[1].strip()
-    if not suggested_branch: debug_print(f"Fehler evaluate_branche: Parsing: {chat_response}"); return {"branch": crm_branche, "consistency": "error_parsing", "justification": f"Fehler: Parsing API Antwort. Antwort: {chat_response}"}
-    final_branch = None; suggested_branch_lower = suggested_branch.lower()
-    if suggested_branch_lower in allowed_branches_lookup:
-        final_branch = allowed_branches_lookup[suggested_branch_lower]; result["consistency"] = "pending_comparison"
-        debug_print(f"ChatGPT-Vorschlag '{suggested_branch}' gültig ('{final_branch}').")
-    else:
-        debug_print(f"ChatGPT-Vorschlag '{suggested_branch}' ungültig. Fallback...")
-        crm_short_branch = "k.A."
-        if crm_branche and ">" in crm_branche: crm_short_branch = crm_branche.split(">", 1)[1].strip()
-        elif crm_branche and crm_branche != "k.A.": crm_short_branch = crm_branche.strip()
-        if crm_short_branch != "k.A." and crm_short_branch.lower() in allowed_branches_lookup:
-            final_branch = allowed_branches_lookup[crm_short_branch.lower()]
-            result["consistency"] = "fallback_crm_valid"
-            fallback_reason = f"Fallback: Ungültiger ChatGPT-Vorschlag ('{suggested_branch}'). Gültige CRM-Kurzform '{final_branch}' verwendet."
-            result["justification"] = f"{fallback_reason} (ChatGPT: {result.get('justification', 'Keine')})"
-            debug_print(f"Fallback CRM erfolgreich: '{final_branch}'")
-        else:
-            final_branch = suggested_branch # Behalte ungültigen
-            result["consistency"] = "fallback_invalid"
-            error_reason = f"Fehler: Ungültiger ChatGPT ('{suggested_branch}') & ungültiger CRM Fallback ('{crm_short_branch}')."
-            result["justification"] = f"{error_reason} (ChatGPT: {result.get('justification', 'Keine')})"
-            debug_print(f"Fallback fehlgeschlagen. Ungültig: '{final_branch}', CRM: '{crm_short_branch}'")
-    result["branch"] = final_branch if final_branch else "FEHLER"
-    crm_short_to_compare = "k.A."
-    if crm_branche and ">" in crm_branche: crm_short_to_compare = crm_branche.split(">", 1)[1].strip()
-    elif crm_branche and crm_branche != "k.A.": crm_short_to_compare = crm_branche.strip()
-    if result["branch"] != "FEHLER" and result["branch"].lower() == crm_short_to_compare.lower():
-        if result["consistency"] == "pending_comparison": result["consistency"] = "ok"
-    elif result["consistency"] == "pending_comparison": result["consistency"] = "X"
-    if result["consistency"] == "pending_comparison": result["consistency"] = "error_comparison_failed"
-    debug_print(f"Finale Branch-Evaluation: {result}")
-    return result
 
-def load_target_schema(csv_filepath=BRANCH_MAPPING_FILE): # unverändert
-    global TARGET_SCHEMA_STRING, ALLOWED_TARGET_BRANCHES
-    allowed_branches_set = set(); debug_print(f"Lade Ziel-Schema aus '{csv_filepath}' Spalte A...")
+# ==================== BRANCH MAPPING & SCHEMA ====================
+
+def load_target_schema(csv_filepath=BRANCH_MAPPING_FILE): # Unverändert
+    """Lädt Liste erlaubter Ziele (Kurzformen) aus Spalte A der CSV."""
+    global TARGET_SCHEMA_STRING, ALLOWED_TARGET_BRANCHES, BRANCH_MAPPING # BRANCH_MAPPING wird hier geleert
+    BRANCH_MAPPING = {} # Leeren, da nicht mehr für Mapping genutzt
+    allowed_branches_set = set()
+    debug_print(f"Versuche, Ziel-Schema (Kurzformen) aus '{csv_filepath}' Spalte A zu laden...")
     line_count = 0
     try:
         with open(csv_filepath, encoding="utf-8-sig") as f:
             reader = csv.reader(f)
+            # Optional: Header überspringen
+            # next(reader, None)
             for row in reader:
                 line_count += 1
                 # if line_count <= 10 or line_count % 100 == 0: debug_print(f"Schema-Laden: Lese Zeile {line_count}: {row}")
-                if len(row) >= 1:
+                if len(row) >= 1: # Nur Spalte A (Index 0)
                     target = row[0].strip()
-                    if target: allowed_branches_set.add(target)
-                        # if line_count <= 10: debug_print(f"  -> '{target}' hinzugefügt.")
-    except FileNotFoundError: debug_print(f"Fehler: Schema-Datei '{csv_filepath}' nicht gefunden."); ALLOWED_TARGET_BRANCHES = []
-    except Exception as e: debug_print(f"Fehler beim Laden Schema '{csv_filepath}' (Zeile {line_count}): {e}"); ALLOWED_TARGET_BRANCHES = []
+                    if target: # Nur nicht-leere
+                        allowed_branches_set.add(target)
+                        # if line_count <= 10: debug_print(f"  -> '{target}' zum Set hinzugefügt.")
+    except FileNotFoundError:
+        debug_print(f"Fehler: Schema-Datei '{csv_filepath}' nicht gefunden.")
+        ALLOWED_TARGET_BRANCHES = []
+    except Exception as e:
+        debug_print(f"Fehler beim Laden des Ziel-Schemas aus '{csv_filepath}' (Zeile {line_count}): {e}")
+        ALLOWED_TARGET_BRANCHES = []
+
     ALLOWED_TARGET_BRANCHES = sorted(list(allowed_branches_set), key=str.lower)
-    debug_print(f"Ziel-Schema geladen: {len(ALLOWED_TARGET_BRANCHES)} Branchen.")
+    debug_print(f"Ziel-Schema geladen. {len(ALLOWED_TARGET_BRANCHES)} eindeutige Zielbranchen gefunden.")
+
     if ALLOWED_TARGET_BRANCHES:
-        # debug_print(f"Erste 10 Zielbranchen: {ALLOWED_TARGET_BRANCHES[:10]}")
+        # debug_print(f"Erste 10 geladene Zielbranchen: {ALLOWED_TARGET_BRANCHES[:10]}")
         schema_lines = ["Ziel-Branchenschema: Folgende Branchenbereiche sind gültig (Kurzformen):"]
         schema_lines.extend(f"- {branch}" for branch in ALLOWED_TARGET_BRANCHES)
-        schema_lines.append("Bitte ordne das Unternehmen ausschließlich in einen dieser Bereiche ein. Gib NUR den Kurznamen zurück.")
+        schema_lines.append("Bitte ordne das Unternehmen ausschließlich in einen dieser Bereiche ein. Gib NUR den Kurznamen der Branche zurück (keine Präfixe wie 'Hersteller / Produzenten >').")
         TARGET_SCHEMA_STRING = "\n".join(schema_lines)
-    else: TARGET_SCHEMA_STRING = "Ziel-Branchenschema nicht verfügbar."; ALLOWED_TARGET_BRANCHES = []
+    else:
+        TARGET_SCHEMA_STRING = "Ziel-Branchenschema nicht verfügbar (Datei leer oder Fehler)."
+        ALLOWED_TARGET_BRANCHES = []
 
+
+def map_external_branch(external_branch): # Veraltet, da evaluate_branche_chatgpt genutzt wird
+    """
+    Versucht, eine externe Branchenbezeichnung mithilfe des Mappings in das Ziel-Schema zu überführen.
+    (Diese Funktion wird aktuell nicht verwendet, da die Logik in evaluate_branche_chatgpt liegt)
+    """
+    if not external_branch or not isinstance(external_branch, str) or not BRANCH_MAPPING:
+        return external_branch
+    norm_external = normalize_string(external_branch).lower()
+    if norm_external in BRANCH_MAPPING: return BRANCH_MAPPING[norm_external]
+    sorted_keys = sorted(BRANCH_MAPPING.keys(), key=len, reverse=True)
+    for key in sorted_keys:
+        if key in norm_external:
+            debug_print(f"Teilstring-Match Branche: '{key}' in '{norm_external}' -> '{BRANCH_MAPPING[key]}'")
+            return BRANCH_MAPPING[key]
+    # debug_print(f"Kein Mapping für externe Branche '{external_branch}' gefunden.")
+    return external_branch
+
+
+# ==================== TOKEN COUNT FUNCTION ====================
 @retry_on_failure
-def token_count(text): # unverändert
+def token_count(text): # Unverändert
+    """Zählt Tokens via tiktoken oder schätzt über Leerzeichen."""
     if not text or not isinstance(text, str): return 0
     if tiktoken:
         try:
             if not hasattr(token_count, 'enc_cache'): token_count.enc_cache = {}
-            if Config.TOKEN_MODEL not in token_count.enc_cache: token_count.enc_cache[Config.TOKEN_MODEL] = tiktoken.encoding_for_model(Config.TOKEN_MODEL)
-            enc = token_count.enc_cache[Config.TOKEN_MODEL]; return len(enc.encode(text))
-        except Exception as e: debug_print(f"Fehler Token-Counting tiktoken '{Config.TOKEN_MODEL}': {e}"); return len(text.split())
-    else: return len(text.split())
+            if Config.TOKEN_MODEL not in token_count.enc_cache:
+                 token_count.enc_cache[Config.TOKEN_MODEL] = tiktoken.encoding_for_model(Config.TOKEN_MODEL)
+            enc = token_count.enc_cache[Config.TOKEN_MODEL]
+            return len(enc.encode(text))
+        except Exception as e:
+            debug_print(f"Fehler beim Token-Counting mit tiktoken für Modell '{Config.TOKEN_MODEL}': {e}")
+            return len(text.split()) # Fallback zur Schätzung
+    else:
+        return len(text.split()) # Fallback Schätzung
 
-# --- GoogleSheetHandler (unverändert lassen) ---
-class GoogleSheetHandler: # unverändert
+# ==================== GOOGLE SHEET HANDLER ====================
+class GoogleSheetHandler:
     def __init__(self):
-        self.sheet = None; self.sheet_values = []; self.headers = []
-        try: self._connect();
-        except Exception as e: debug_print(f"FATAL GSheet Init: {e}"); raise ConnectionError(f"GSheet Handler Init failed: {e}")
-        if self.sheet: self.load_data()
+        """Initialisiert den Handler, verbindet und lädt initiale Daten."""
+        self.sheet = None
+        self.sheet_values = []
+        self.headers = [] # Speichert die erste Zeile als Header-Namen
+        try:
+            self._connect()
+            if self.sheet:
+                 self.load_data() # Erste Datenladung bei Initialisierung
+        except Exception as e:
+            debug_print(f"FATAL: Fehler bei Initialisierung von GoogleSheetHandler: {e}")
+            raise ConnectionError(f"Google Sheet Handler Init failed: {e}")
+
     @retry_on_failure
     def _connect(self):
-        self.sheet = None; debug_print("Verbinde mit Google Sheets...")
+        """Stellt Verbindung zum Google Sheet her."""
+        self.sheet = None
+        debug_print("Verbinde mit Google Sheets...")
         try:
-            scope = ["https://www.googleapis.com/auth/spreadsheets"]
-            creds = ServiceAccountCredentials.from_json_keyfile_name(CREDENTIALS_FILE, scope)
-            gc = gspread.authorize(creds); sh = gc.open_by_url(Config.SHEET_URL); self.sheet = sh.sheet1
-            debug_print("Verbindung Google Sheets OK.")
-        except gspread.exceptions.APIError as e: debug_print(f"FEHLER Google API Verbindung: {e.response.status_code} - {e.response.text[:200]}"); raise e
-        except Exception as e: debug_print(f"FEHLER Google Sheets Verbindung: {type(e).__name__} - {e}"); raise e
+             scope = ["https://www.googleapis.com/auth/spreadsheets"]
+             creds = ServiceAccountCredentials.from_json_keyfile_name(CREDENTIALS_FILE, scope)
+             gc = gspread.authorize(creds)
+             sh = gc.open_by_url(Config.SHEET_URL)
+             self.sheet = sh.sheet1
+             debug_print("Verbindung zu Google Sheets erfolgreich.")
+        except gspread.exceptions.APIError as e:
+             debug_print(f"FEHLER bei Google API Verbindung: Status {e.response.status_code} - {e.response.text[:200]}")
+             raise e
+        except Exception as e:
+             debug_print(f"FEHLER bei der Google Sheets Verbindung: {type(e).__name__} - {e}")
+             raise e
+
     @retry_on_failure
     def load_data(self):
-        if not self.sheet: debug_print("Fehler: Keine Sheet-Verbindung für load_data."); self.sheet_values = []; self.headers = []; return False
-        debug_print("Lade Daten aus Google Sheet...");
+        """Lädt alle Daten aus dem Sheet und aktualisiert self.sheet_values und self.headers."""
+        if not self.sheet:
+            debug_print("Fehler: Keine Sheet-Verbindung zum Laden der Daten.")
+            self.sheet_values = []; self.headers = []; return False
+        debug_print("Lade Daten aus Google Sheet...")
         try:
             self.sheet_values = self.sheet.get_all_values()
-            if not self.sheet_values: debug_print("Warnung: Sheet leer."); self.headers = []; return True
+            if not self.sheet_values:
+                debug_print("Warnung: Google Sheet scheint leer zu sein.")
+                self.headers = []; return True # Leer ist kein Fehler
+            # Setze Header nur, wenn Daten vorhanden
             if len(self.sheet_values) >= 1: self.headers = self.sheet_values[0]
             else: self.headers = []
-            debug_print(f"Daten neu geladen: {len(self.sheet_values)} Zeilen."); return True
-        except gspread.exceptions.APIError as e: debug_print(f"Google API Fehler Laden: {e.response.status_code} - {e.response.text[:200]}"); raise e
-        except Exception as e: debug_print(f"Allg. Fehler Laden: {e}"); raise e
+            debug_print(f"Daten neu geladen: {len(self.sheet_values)} Zeilen insgesamt.")
+            return True
+        except gspread.exceptions.APIError as e:
+             debug_print(f"Google API Fehler beim Laden der Sheet Daten: Status {e.response.status_code} - {e.response.text[:200]}")
+             raise e
+        except Exception as e:
+             debug_print(f"Allgemeiner Fehler beim Laden der Google Sheet Daten: {e}")
+             raise e
+
     def get_data(self):
+        """Gibt die aktuell im Handler gespeicherten Daten zurück (ohne Header)."""
+        # Nutzt Config.HEADER_ROWS
         if not self.sheet_values or len(self.sheet_values) <= Config.HEADER_ROWS:
-             if self.sheet_values: debug_print(f"Warnung get_data: Nur {len(self.sheet_values)} Zeilen.")
+             if self.sheet_values:
+                 debug_print(f"Warnung in get_data: Nur {len(self.sheet_values)} Zeilen vorhanden, weniger als {Config.HEADER_ROWS} Header erwartet.")
              return []
         return self.sheet_values[Config.HEADER_ROWS:]
+
     def get_all_data_with_headers(self):
-         if not self.sheet_values: debug_print("Warnung get_all_data_with_headers: Keine Daten."); return []
+         """Gibt alle aktuell im Handler gespeicherten Daten inklusive Header zurück."""
+         if not self.sheet_values:
+             debug_print("Warnung in get_all_data_with_headers: Keine Daten im Handler gespeichert.")
+             return []
          return self.sheet_values
+
     def _get_col_letter(self, col_idx_1_based):
+        """ Konvertiert 1-basierten Spaltenindex in Buchstaben (A, B, ..., Z, AA, ...). """
         string = ""; n = col_idx_1_based
         if n < 1: return None
-        while n > 0: n, remainder = divmod(n - 1, 26); string = chr(65 + remainder) + string
+        while n > 0:
+            n, remainder = divmod(n - 1, 26)
+            string = chr(65 + remainder) + string
         return string
+
     def get_start_row_index(self, check_column_key, min_sheet_row=7):
+        """
+        Findet den Index der ersten Zeile (0-basiert für Daten nach Header),
+        ab einer Mindestzeilennummer, in der der Wert in der Spalte EXAKT LEER ("") ist.
+        Lädt Daten neu.
+        """
         if not self.load_data(): return -1
-        data_rows = self.get_data()
-        if not data_rows: return 0
+        # Nutzt Config.HEADER_ROWS
+        data_rows = self.get_data() # Holt Daten ohne Header
+        if not data_rows: return 0 # Wenn keine Daten (nur Header), starte bei Index 0
+
         check_column_index = COLUMN_MAP.get(check_column_key)
-        if check_column_index is None: debug_print(f"FEHLER: Key '{check_column_key}' nicht in COLUMN_MAP!"); return -1
+        if check_column_index is None:
+            debug_print(f"FEHLER: Schlüssel '{check_column_key}' nicht in COLUMN_MAP gefunden!")
+            return -1
+
         actual_col_letter = self._get_col_letter(check_column_index + 1)
+        # Berechne Startindex relativ zur data_rows Liste
         search_start_index_in_data = max(0, min_sheet_row - Config.HEADER_ROWS - 1)
-        debug_print(f"get_start_row_index: Suche ab Daten-Idx {search_start_index_in_data} nach LEER ('') in '{check_column_key}' ({actual_col_letter})...")
-        if search_start_index_in_data >= len(data_rows): debug_print(f"Start-Suchindex >= Datenlänge."); return len(data_rows)
+
+        debug_print(f"get_start_row_index: Suche ab Daten-Index {search_start_index_in_data} nach EXAKT LEEREM Wert (=='') in Spalte '{check_column_key}' ({actual_col_letter})...")
+
+        if search_start_index_in_data >= len(data_rows):
+            debug_print(f"Start-Suchindex ({search_start_index_in_data}) >= Datenlänge ({len(data_rows)}). Alle vorherigen Zeilen scheinen gefüllt.")
+            return len(data_rows) # Signalisiert, dass am Ende begonnen werden soll
+
         for i in range(search_start_index_in_data, len(data_rows)):
-            row = data_rows[i]; current_sheet_row = i + Config.HEADER_ROWS + 1
+            row = data_rows[i]
+            current_sheet_row = i + Config.HEADER_ROWS + 1
             cell_value = ""; is_exactly_empty = True
-            if len(row) > check_column_index: cell_value = row[check_column_index];
-            if cell_value != "": is_exactly_empty = False
-            # log_debug = (i == search_start_index_in_data or i % 1000 == 0 or is_exactly_empty or i in range(10110, 10116))
-            # if log_debug: debug_print(f"  -> Prüfe Daten-Idx {i} (Sheet {current_sheet_row}): Wert {actual_col_letter}='{cell_value}'. Leer? {is_exactly_empty}")
+            if len(row) > check_column_index:
+                cell_value = row[check_column_index]
+                if cell_value != "": is_exactly_empty = False
+            # Reduziertes Logging
+            # log_debug = (i == search_start_index_in_data or i % 1000 == 0 or is_exactly_empty)
+            # if log_debug: debug_print(f"  -> Prüfe Daten-Index {i} (Sheet {current_sheet_row}): Wert in {actual_col_letter}='{cell_value}'. Ist leer? {is_exactly_empty}")
             if is_exactly_empty:
-                debug_print(f"Erste Zeile ab {min_sheet_row} mit LEEREM Wert in {actual_col_letter} gefunden: Zeile {current_sheet_row} (Daten-Index {i})")
-                return i
+                debug_print(f"Erste Zeile ab {min_sheet_row} mit EXAKT LEEREM Wert in Spalte {actual_col_letter} gefunden: Zeile {current_sheet_row} (Daten-Index {i})")
+                return i # Gibt 0-basierten Index für data_rows zurück
+
+        # Wenn die Schleife durchläuft, wurde keine leere Zelle gefunden
         last_index = len(data_rows)
-        debug_print(f"Alle Zeilen ab Daten-Idx {search_start_index_in_data} nicht leer in {actual_col_letter}. Nächster Idx {last_index}.")
-        return last_index
+        debug_print(f"Alle Zeilen ab Daten-Index {search_start_index_in_data} haben einen nicht-leeren Wert in Spalte {actual_col_letter}. Nächster Daten-Index wäre {last_index}.")
+        return last_index # Nächster Index nach der letzten Zeile
+
     @retry_on_failure
     def batch_update_cells(self, update_data):
-        if not self.sheet: debug_print("FEHLER: Keine Sheet-Verbindung für Batch-Update."); return False
-        if not update_data: return True
+        """ Führt ein Batch-Update im Google Sheet durch. """
+        if not self.sheet:
+            debug_print("FEHLER: Keine Sheet-Verbindung für Batch-Update.")
+            return False
+        if not update_data: return True # Nichts zu tun ist Erfolg
+
         success = False
         try:
-            # debug_print(f"  -> Versuche sheet.batch_update mit {len(update_data)} Operationen...") # Weniger Lärm
+            # debug_print(f"  -> Versuche sheet.batch_update mit {len(update_data)} Operationen...")
             self.sheet.batch_update(update_data, value_input_option='USER_ENTERED')
             success = True
+            # debug_print(f"  -> sheet.batch_update erfolgreich.") # Log in aufrufender Funktion
         except gspread.exceptions.APIError as e:
-            debug_print(f"  -> FEHLER (Google API Error) Batch-Update: Status {e.response.status_code}")
+            debug_print(f"  -> FEHLER (Google API Error) beim Batch-Update: Status {e.response.status_code}")
             try: error_details = e.response.json(); debug_print(f"     -> Details: {str(error_details)[:500]}")
             except: debug_print(f"     -> Raw Response Text: {e.response.text[:500]}")
-            raise e
+            raise e # Damit retry greift
         except Exception as e:
-            debug_print(f"  -> FEHLER (Allgemein) Batch-Update: {type(e).__name__} - {e}")
-            import traceback; debug_print(traceback.format_exc())
-            raise e
+            debug_print(f"  -> FEHLER (Allgemein) beim Batch-Update: {type(e).__name__} - {e}")
+            debug_print(traceback.format_exc())
+            raise e # Damit retry greift
         return success
 
-# ==================== WIKIPEDIA SCRAPER (MODIFIZIERT) ====================
+# ==================== WIKIPEDIA SCRAPER ====================
 class WikipediaScraper:
+    # KEINE Fallback-Methode hier in v1.6.5, nur Anpassungen an _extract_infobox_value
     def __init__(self):
         try: wikipedia.set_lang(Config.LANG)
-        except Exception as e: debug_print(f"Fehler Setzen Wikipedia-Sprache: {e}")
+        except Exception as e: debug_print(f"Fehler beim Setzen der Wikipedia-Sprache: {e}")
 
     def _get_full_domain(self, website): # unverändert
         if not website: return ""; website = website.lower().strip()
@@ -701,38 +660,23 @@ class WikipediaScraper:
         else: debug_print(f" => Nicht validiert (Schwelle: {threshold:.2f})")
         return is_valid
 
-    def _extract_first_paragraph_from_soup(self, soup): # MODIFIZIERT: Logging hinzugefügt
+    def _extract_first_paragraph_from_soup(self, soup): # Mit Logging aus v1.6.5
         if not soup: return "k.A."
-        # Suche nach dem Hauptinhaltsbereich
         content_div = soup.find('div', class_='mw-parser-output')
-        if not content_div:
-            content_div = soup.find('div', id='bodyContent') # Fallback
-            if not content_div:
-                 content_div = soup # Fallback auf ganzen Soup
+        if not content_div: content_div = soup.find('div', id='bodyContent')
+        if not content_div: content_div = soup
 
-        # Finde alle <p>-Tags direkt unterhalb des content_div
-        # 'recursive=False' versucht, tiefer verschachtelte <p> (z.B. in Tabellen) zu vermeiden
         paragraphs = content_div.find_all('p', recursive=False)
-        if not paragraphs:
-             paragraphs = content_div.find_all('p', recursive=True) # Fallback: Alle <p>
-
-        debug_print(f"  Absatz-Extraktion: {len(paragraphs)} <p>-Tags gefunden (im Bereich {content_div.name if content_div != soup else 'soup'}).")
+        if not paragraphs: paragraphs = content_div.find_all('p', recursive=True)
 
+        debug_print(f"  Absatz-Extraktion: {len(paragraphs)} <p>-Tags gefunden (in {content_div.name if content_div != soup else 'soup'}).")
         for idx, p in enumerate(paragraphs):
-            # Ignoriere <p> innerhalb von Infoboxen oder anderen speziellen Containern
-            if p.find_parent(['table', 'aside', 'figure', 'div.thumb', 'div.gallery']):
-                # debug_print(f"    -> <p> {idx} übersprungen (in table/aside etc.)")
-                continue
-
+            if p.find_parent(['table', 'aside', 'figure', 'div.thumb', 'div.gallery']): continue
             text = clean_text(p.get_text())
-            debug_print(f"    -> Prüfe <p> {idx}: Text='{text[:100]}...' (Länge: {len(text)})")
-
-            # Nimm den ersten Absatz mit signifikanter Länge (mind. 50 Zeichen)
-            # und der nicht nur aus Koordinaten etc. besteht
+            # debug_print(f"    -> Prüfe <p> {idx}: Text='{text[:100]}...' (Länge: {len(text)})")
             if len(text) > 50 and not text.startswith("Koordinaten:"):
-                debug_print(f"      --> Erster signifikanter Absatz gefunden: '{text[:100]}...'")
-                return text[:1000] # Begrenze Länge
-
+                debug_print(f"      --> Erster signifikanter Absatz gefunden.")
+                return text[:1000]
         debug_print("  -> Kein signifikanter erster Absatz gefunden.")
         return "k.A."
 
@@ -746,37 +690,37 @@ class WikipediaScraper:
                 return ", ".join(cats) if cats else "k.A."
         return "k.A."
 
-    def _extract_infobox_value(self, soup, target): # MODIFIZIERT: Mehr Keywords, Logging HTML
+    def _extract_infobox_value(self, soup, target): # MODIFIZIERT: Logging HTML, erweiterte Keywords
          if not soup: return "k.A."
-         infobox = soup.find('table', class_=lambda c: c and any(kw in c.lower() for kw in ['infobox', 'vcard', 'unternehmen', 'konzern', 'organisation'])) # Flexiblere Suche
+         # Flexiblere Suche nach Infobox-Klassen
+         infobox = soup.find('table', class_=lambda c: c and any(kw in c.lower() for kw in ['infobox', 'vcard', 'unternehmen', 'konzern', 'organisation']))
          if not infobox:
              debug_print(f"  -> Infobox-Extraktion ('{target}'): Keine Infobox Tabelle gefunden.")
              return "k.A."
 
-         # --- NEU: Logge das HTML der gefundenen Infobox ---
+         # Logge das HTML der gefundenen Infobox für Debugging
          try:
               infobox_html = str(infobox)
               debug_print(f"  -> Infobox HTML gefunden (Auszug):\n------ INFOBOX HTML START -----\n{infobox_html[:1000]}...\n------ INFOBOX HTML END ------")
          except Exception as log_e:
               debug_print(f"  -> Fehler beim Loggen des Infobox HTML: {log_e}")
-         # --- Ende HTML Logging ---
 
          # Erweiterte Keywords (Deutsch & Englisch, Variationen)
          keywords_map = {
             'branche': [
                 'branche', 'branchen', 'industrie', 'tätigkeit', 'geschäftsfeld', 'sektor',
-                'produkte', 'leistungen', 'aktivitäten', 'wirtschaftszweig',
-                'industry', 'sector', 'business', 'products', 'services', 'field'
+                'produkte', 'leistungen', 'aktivitäten', 'wirtschaftszweig', 'produktpalette',
+                'industry', 'sector', 'business', 'products', 'services', 'field', 'area'
             ],
             'umsatz': [
                 'umsatz', 'jahresumsatz', 'konzernumsatz', 'gesamtumsatz', 'erlöse', 'umsatzerlöse',
-                'einnahmen', 'ergebnis', 'jahresergebnis', 'umsatz pro jahr',
-                'revenue', 'turnover', 'sales', 'income', 'earnings', 'annual revenue'
+                'einnahmen', 'ergebnis', 'jahresergebnis', 'umsatz pro jahr', 'geschäftsvolumen',
+                'revenue', 'turnover', 'sales', 'income', 'earnings', 'annual revenue', 'gross profit'
             ],
             'mitarbeiter': [
                 'mitarbeiter', 'mitarbeiterzahl', 'beschäftigte', 'personal', 'angestellte',
                 'belegschaft', 'personalstärke', 'kopfzahl', 'mitarbeitende', 'anzahl mitarbeiter',
-                'employees', 'number of employees', 'staff', 'headcount', 'workforce'
+                'employees', 'number of employees', 'staff', 'headcount', 'workforce', 'personnel'
             ]
          }
          keywords = keywords_map.get(target, [])
@@ -789,18 +733,15 @@ class WikipediaScraper:
              value_cell = row.find('td')
 
              if header and value_cell:
-                 # Hole Text aus th, ignoriere versteckte Elemente (z.B. in <style>)
                  header_text = header.get_text(separator=' ', strip=True)
                  header_text_lower = header_text.lower()
-                 # Hole Text aus td, ignoriere versteckte Elemente
                  raw_value_text = value_cell.get_text(separator=' ', strip=True)
 
-                 debug_print(f"    -> Prüfe Zeile {idx}: TH='{header_text}' | TD='{raw_value_text[:60]}...'")
+                 # debug_print(f"    -> Prüfe Zeile {idx}: TH='{header_text}' | TD='{raw_value_text[:60]}...'")
 
                  matched_keyword = None
                  for kw in keywords:
-                     # Prüfe, ob Keyword als ganzes Wort oder Teilstring im Header vorkommt
-                     # \b für Wortgrenzen, aber auch einfache Prüfung für Fälle wie "Mitarbeiterzahl"
+                     # Robuste Prüfung: Wortgrenze ODER einfache Inklusion
                      if re.search(r'\b' + re.escape(kw) + r'\b', header_text_lower) or kw in header_text_lower:
                          matched_keyword = kw
                          break
@@ -810,121 +751,33 @@ class WikipediaScraper:
                      cleaned_raw_value = clean_text(raw_value_text)
 
                      if target == 'branche':
-                         # Einfache Bereinigung von Referenzen und Klammern
                          clean_val = re.sub(r'\[\d+\]', '', cleaned_raw_value).strip()
                          clean_val = re.sub(r'\([^)]*\)', '', clean_val).strip()
-                         # Nimm nur den ersten Teil, falls durch Komma getrennt
-                         clean_val = clean_val.split(',')[0].strip()
+                         clean_val = clean_val.split(',')[0].strip() # Nimm ersten Teil bei Komma
                          value_found = clean_val if clean_val else "k.A."
                          debug_print(f"        --> Branche extrahiert: '{value_found}'")
-                         return value_found # Direkter Ausstieg bei Fund
+                         return value_found # Sofort zurückgeben bei Fund
                      elif target == 'umsatz':
                          numeric_val = extract_numeric_value(cleaned_raw_value, is_umsatz=True)
-                         if numeric_val != "k.A.": # Nur gültigen Wert übernehmen
+                         if numeric_val != "k.A.":
                              value_found = numeric_val
                              debug_print(f"        --> Umsatz extrahiert (aus '{cleaned_raw_value}'): '{value_found}'")
-                             return value_found # Direkter Ausstieg
-                         else:
-                              debug_print(f"        --> Umsatz: Extraktion aus '{cleaned_raw_value}' ergab 'k.A.'. Suche weiter...")
+                             return value_found # Sofort zurückgeben
+                         else: debug_print(f"        --> Umsatz: Extraktion aus '{cleaned_raw_value}' ergab 'k.A.'. Suche weiter...")
                      elif target == 'mitarbeiter':
                          numeric_val = extract_numeric_value(cleaned_raw_value, is_umsatz=False)
-                         if numeric_val != "k.A.": # Nur gültigen Wert übernehmen
+                         if numeric_val != "k.A.":
                              value_found = numeric_val
                              debug_print(f"        --> Mitarbeiter extrahiert (aus '{cleaned_raw_value}'): '{value_found}'")
-                             return value_found # Direkter Ausstieg
-                         else:
-                             debug_print(f"        --> Mitarbeiter: Extraktion aus '{cleaned_raw_value}' ergab 'k.A.'. Suche weiter...")
+                             return value_found # Sofort zurückgeben
+                         else: debug_print(f"        --> Mitarbeiter: Extraktion aus '{cleaned_raw_value}' ergab 'k.A.'. Suche weiter...")
 
          debug_print(f"  -> Kein passender Eintrag für '{target}' via TH/TD gefunden.")
-         return "k.A." # Standardwert, wenn nichts gefunden wurde
+         # KEIN FALLBACK HIER in v1.6.5
+         return "k.A." # Gibt k.A. zurück, wenn primäre Methode scheitert
 
-    # --- NEU: Fallback-Methode ---
-    def _extract_infobox_value_fallback(self, soup, target):
-        """
-        Fallback-Methode zur Extraktion von Branche, Umsatz oder Mitarbeiter
-        aus dem reinen Text der Infobox mittels RegEx.
-        """
-        if not soup: return "k.A."
-        infobox = soup.find('table', class_=lambda c: c and any(kw in c.lower() for kw in ['infobox', 'vcard', 'unternehmen', 'konzern', 'organisation']))
-        if not infobox: return "k.A." # Keine Infobox für Fallback
-
-        debug_print(f"  -> Starte Fallback-Extraktion für '{target}' via RegEx...")
-        try:
-            # Extrahiere den gesamten Text der Infobox, eine Zeile pro Zelle/Absatz
-            infobox_text = infobox.get_text(separator='\n', strip=True)
-            # Logge den Text für Debugging
-            # debug_print(f"  -> Fallback: Infobox Text:\n---\n{infobox_text[:500]}...\n---")
-        except Exception as e:
-            debug_print(f"  -> Fehler beim Extrahieren des Infobox-Textes für Fallback: {e}")
-            return "k.A."
-
-        value_found = "k.A."
-        lines = infobox_text.split('\n')
-
-        # Definiere Suchmuster (case-insensitive)
-        patterns = {}
-        if target == 'branche':
-            # Sucht nach Zeilen, die mit Keywords beginnen, gefolgt von einer Zeile mit dem Wert
-            patterns = [
-                r'^(Branche|Branchen|Industrie|Tätigkeit|Geschäftsfeld|Sektor|Industry|Sector|Business|Products|Services)\s*$', # Zeile mit Keyword
-                r'^(?!\b(Umsatz|Mitarbeiter|Revenue|Employees)\b)(.+)' # Nächste Zeile ist der Wert (nicht Umsatz/MA)
-            ]
-        elif target == 'umsatz':
-            patterns = [
-                r'^(Umsatz|Jahresumsatz|Revenue|Turnover|Sales)\s*$',
-                r'([€$£¥]?\s*[\d.,]+\s*(Mio\.?|Mrd\.?|Millionen|Milliarden|Billions?|Trillions?)?\s*[€$£¥]?)' # Wertzeile
-            ]
-        elif target == 'mitarbeiter':
-             patterns = [
-                r'^(Mitarbeiter|Beschäftigte|Mitarbeiterzahl|Employees|Staff|Headcount)\s*$',
-                r'([\d.,]+)' # Wertzeile (nur Zahlen)
-            ]
-        else: return "k.A."
-
-        # Iteriere durch die Zeilen, suche nach Keyword-Zeile, dann Wert-Zeile
-        try:
-            for i, line in enumerate(lines):
-                 line_stripped = line.strip()
-                 # debug_print(f" Fallback Prüfe Zeile {i}: '{line_stripped}'") # Sehr detailliert
-                 if re.match(patterns[0], line_stripped, re.IGNORECASE):
-                      debug_print(f"    -> Fallback: Keyword-Zeile '{line_stripped}' (Pattern 0) gefunden bei Index {i}.")
-                      # Suche Wert in der nächsten Zeile (oder übernächsten, falls Leerzeile)
-                      for j in range(i + 1, min(i + 3, len(lines))):
-                           next_line_stripped = lines[j].strip()
-                           if not next_line_stripped: continue # Überspringe Leerzeilen
-                           debug_print(f"      -> Fallback: Prüfe mögliche Wert-Zeile {j}: '{next_line_stripped}'")
-                           value_match = re.search(patterns[1], next_line_stripped, re.IGNORECASE)
-                           if value_match:
-                                extracted_raw = value_match.group(1).strip() # Gruppe 1 ist meist der Wert
-                                debug_print(f"        --> Fallback: Match gefunden! Rohwert: '{extracted_raw}'")
-                                # Bereinige und normalisiere den gefundenen Wert
-                                if target == 'branche':
-                                     value_found = clean_text(extracted_raw)
-                                     # Nimm nur den ersten Teil, falls mehrere durch Komma/Semikolon
-                                     value_found = re.split(r'[,;]', value_found)[0].strip()
-                                     if value_found: return value_found
-                                elif target == 'umsatz':
-                                     numeric_val = extract_numeric_value(extracted_raw, is_umsatz=True)
-                                     if numeric_val != "k.A.": return numeric_val
-                                elif target == 'mitarbeiter':
-                                     numeric_val = extract_numeric_value(extracted_raw, is_umsatz=False)
-                                     if numeric_val != "k.A.": return numeric_val
-                                # Wenn Wert nicht geparst/gefunden, Schleife weiter
-                                debug_print(f"        --> Fallback: Wert '{extracted_raw}' ungültig/leer nach Verarbeitung.")
-                                break # Hör auf, nächste Zeilen für dieses Keyword zu prüfen
-                           else:
-                                debug_print(f"      -> Fallback: Zeile {j} ('{next_line_stripped}') passt nicht auf Wert-Pattern.")
-                                break # Wenn die nächste Zeile nicht passt, hör auf zu suchen
-
-        except Exception as e_re:
-             debug_print(f"  -> Fehler während Fallback RegEx-Verarbeitung: {e_re}")
-             return "k.A."
-
-        debug_print(f"  -> Fallback-Extraktion für '{target}' nicht erfolgreich.")
-        return "k.A."
-
-
-    def extract_company_data(self, page_url): # MODIFIZIERT: Ruft Fallback auf
+    def extract_company_data(self, page_url): # Ruft KEINEN Fallback auf
+        """ Extrahiert Firmendaten von einer Wikipedia-URL. Holt Seite nur einmal. """
         default_result = {'url': page_url if page_url else 'k.A.', 'first_paragraph': 'k.A.', 'branche': 'k.A.', 'umsatz': 'k.A.', 'mitarbeiter': 'k.A.', 'categories': 'k.A.'}
         if not page_url or not isinstance(page_url, str) or "wikipedia.org" not in page_url: return default_result
 
@@ -934,23 +787,18 @@ class WikipediaScraper:
 
         first_paragraph = self._extract_first_paragraph_from_soup(soup)
         categories_val = self.extract_categories(soup)
-
-        # Primäre Extraktion
+        # Primäre Extraktion (ohne Fallback)
         branche_val = self._extract_infobox_value(soup, 'branche')
         umsatz_val = self._extract_infobox_value(soup, 'umsatz')
         mitarbeiter_val = self._extract_infobox_value(soup, 'mitarbeiter')
 
-        # Fallback, falls primär "k.A." lieferte
-        if branche_val == "k.A.": branche_val = self._extract_infobox_value_fallback(soup, 'branche')
-        if umsatz_val == "k.A.": umsatz_val = self._extract_infobox_value_fallback(soup, 'umsatz')
-        if mitarbeiter_val == "k.A.": mitarbeiter_val = self._extract_infobox_value_fallback(soup, 'mitarbeiter')
-
         result = { 'url': page_url, 'first_paragraph': first_paragraph, 'branche': branche_val, 'umsatz': umsatz_val, 'mitarbeiter': mitarbeiter_val, 'categories': categories_val }
-        debug_print(f"  -> Extrahierte Daten (final): P={first_paragraph[:30]}..., B='{branche_val}', U='{umsatz_val}', M='{mitarbeiter_val}', C={categories_val[:30]}...")
+        debug_print(f"  -> Extrahierte Daten: P={first_paragraph[:30]}..., B='{branche_val}', U='{umsatz_val}', M='{mitarbeiter_val}', C={categories_val[:30]}...")
         return result
 
     @retry_on_failure
     def search_company_article(self, company_name, website): # unverändert
+        """Sucht einen passenden Wikipedia-Artikel und gibt das page-Objekt zurück."""
         search_terms = self._generate_search_terms(company_name, website)
         if not search_terms: return None
         for term in search_terms:
@@ -959,31 +807,33 @@ class WikipediaScraper:
                 debug_print(f"Suchergebnisse für '{term}': {results}")
                 for title in results:
                     try:
-                        page = wikipedia.page(title, auto_suggest=False, preload=True)
-                        # TODO: Hier könnte man die Validierung nochmals prüfen, wenn weiterhin falsche Artikel kommen
+                        # Versuche, das Page-Objekt zu laden (preload=True kann Probleme machen)
+                        page = wikipedia.page(title, auto_suggest=False, preload=False) # Preload=False probieren
+                        _ = page.content # Zugriff auf Content zum Laden erzwingen
                         if self._validate_article(page, company_name, website):
                             debug_print(f"Valider Artikel gefunden: {page.url}")
                             return page
                     except wikipedia.exceptions.PageError: debug_print(f"  -> Seite '{title}' nicht gefunden (PageError)."); continue
                     except wikipedia.exceptions.DisambiguationError as e: debug_print(f"  -> Seite '{title}' ist Begriffsklärung: {e.options[:3]}..."); continue
-                    except Exception as e_page: debug_print(f"  -> Fehler bei Verarbeitung von Titel '{title}': {e_page}"); continue
-            except Exception as e_search: debug_print(f"Fehler Wikipedia-Suche '{term}': {e_search}"); continue
-        debug_print(f"Kein passender Wiki-Artikel für '{company_name}' gefunden."); return None
+                    except Exception as e_page: debug_print(f"  -> Fehler bei Verarbeitung von Titel '{title}': {e_page}"); continue # Zum nächsten Titel
+            except Exception as e_search: debug_print(f"Fehler während Wikipedia-Suche für '{term}': {e_search}"); continue # Zum nächsten Suchbegriff
+        debug_print(f"Kein passender Wikipedia-Artikel für '{company_name}' gefunden nach Prüfung aller Begriffe."); return None
+
+# ==================== WEBSITE SCRAPING ====================
 
-# --- Website Scraping, OpenAI, Batch Processing, SERP API, Alignment Demo (unverändert lassen) ---
-# ... (alle diese Funktionen/Klassen hier einfügen, wie im vorherigen Code) ...
 @retry_on_failure
-def get_website_raw(url, max_length=1000, verify_cert=False): # unverändert
+def get_website_raw(url, max_length=1000, verify_cert=False): # Unverändert
+    """Holt Textinhalt von einer Website, versucht Cookie-Banner zu umgehen."""
     if not url or not isinstance(url, str) or url.strip().lower() == 'k.a.': return "k.A."
     if not url.lower().startswith("http"): url = "https://" + url
-    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36"}
+    headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36" }
     try:
         response = requests.get(url, timeout=15, headers=headers, verify=verify_cert)
         response.raise_for_status(); response.encoding = response.apparent_encoding
         soup = BeautifulSoup(response.text, Config.HTML_PARSER)
         content_area = (soup.find('main') or soup.find('article') or soup.find(id='content') or soup.find(id='main-content') or soup.find(class_='main-content') or soup.find(class_='content'))
         if not content_area:
-            # debug_print(f"Kein spezifischer Inhalt für {url}. Nutze Body & entferne Banner.")
+            # debug_print(f"Kein spezifischer Inhaltsbereich für {url}. Nutze Body...")
             content_area = soup.find('body')
             if content_area:
                 banner_selectors = ['[id*="cookie"]', '[class*="cookie"]', '[id*="consent"]', '[class*="consent"]', '[id*="banner"]', '[class*="banner"]', '[role="dialog"]']
@@ -995,89 +845,134 @@ def get_website_raw(url, max_length=1000, verify_cert=False): # unverändert
                             banner_text = banner.get_text(" ", strip=True).lower()
                             keywords = ["cookie", "zustimm", "ablehnen", "einverstanden", "datenschutz", "privacy", "akzeptier"]
                             if any(keyword in banner_text for keyword in keywords):
-                                # debug_print(f"Entferne Banner ({selector}) Text: {banner_text[:50]}...")
+                                # debug_print(f"Entferne potenzielles Banner ({selector})...")
                                 banner.decompose(); banners_removed_count += 1
                     except Exception as e_select: debug_print(f"Fehler Banner-Entfernung '{selector}': {e_select}")
-                # if banners_removed_count > 0: debug_print(f"{banners_removed_count} Banner entfernt.")
+                # if banners_removed_count > 0: debug_print(f"{banners_removed_count} Banner-Elemente entfernt.")
         if content_area:
             for script_or_style in content_area(["script", "style"]): script_or_style.decompose()
             text = content_area.get_text(separator=' ', strip=True); text = re.sub(r'\s+', ' ', text)
             banner_keywords_strict = ["cookie", "zustimmen", "ablehnen", "einverstanden", "datenschutz", "privacy", "akzeptier", "einstellung", "partner", "analyse", "marketing"]
             text_lower = text.lower(); keyword_hits = sum(1 for keyword in banner_keywords_strict if keyword in text_lower)
-            if len(text) < 500 and keyword_hits >= 3: debug_print(f"WARNUNG: Text für {url} scheint nur Banner ({len(text)} Chars, {keyword_hits} KW). Verwerfe."); return "k.A. (Nur Cookie-Banner erkannt)"
+            if len(text) < 500 and keyword_hits >= 3: debug_print(f"WARNUNG: Text für {url} scheint nur Banner zu sein. Verwerfe."); return "k.A. (Nur Cookie-Banner erkannt)"
             result = text[:max_length]
-            # debug_print(f"Website {url} OK. Text ({len(result)}): {result[:60]}...")
+            # debug_print(f"Website {url} OK. Text ({len(result)}): {result[:100]}...")
             return result
         else: debug_print(f"Kein <body> gefunden in {url}"); return "k.A."
     except requests.exceptions.SSLError as e:
-        debug_print(f"SSL-Fehler {url}: {e}. Retry ohne verify...")
+        debug_print(f"SSL-Fehler {url}: {e}. Versuche ohne verify...")
         if verify_cert: return get_website_raw(url, max_length, verify_cert=False)
         else: return "k.A."
     except requests.exceptions.RequestException as e: debug_print(f"Netzwerk/HTTP Fehler {url}: {e}"); return "k.A."
-    except Exception as e: debug_print(f"Allg. Fehler Scraping {url}: {e}"); return "k.A."
+    except Exception as e: debug_print(f"Allgemeiner Fehler Scraping {url}: {e}"); return "k.A."
+
+# ==================== OPENAI / CHATGPT FUNCTIONS ====================
 
 @retry_on_failure
-def summarize_batch_openai(tasks_data): # unverändert
-    if not tasks_data: return {}
-    valid_tasks = [t for t in tasks_data if t.get("raw_text") and t["raw_text"] not in ["k.A.", "k.A. (Nur Cookie-Banner erkannt)", "k.A. (Fehler)"] and str(t.get("raw_text")).strip()]
-    if not valid_tasks: return {t['row_num']: "k.A. (Kein gültiger Rohtext)" for t in tasks_data}
-    # debug_print(f"Batch-Zusammenfassung für {len(valid_tasks)} Texte (Zeilen: {[t['row_num'] for t in valid_tasks]})...")
-    prompt_parts = ["Du bist ein KI-Assistent...", "Fasse jeden TEXT prägnant zusammen (Haupttätigkeit, Produkte/Dienste, Zielgruppe).", "Antworte NUR mit Zeilen im Format:", "RESULTAT <Zeilennummer>: <Zusammenfassung für diese Zeilennummer>", "\n--- Texte zur Zusammenfassung ---"]
-    text_block = ""; row_numbers_in_batch = []
-    for task in valid_tasks:
-        row_num = task['row_num']; raw_text = task['raw_text'][:1500]
-        entry_text = f"\n--- TEXT Zeile {row_num} ---\n{raw_text}\n--- ENDE TEXT Zeile {row_num} ---\n"
-        text_block += entry_text; row_numbers_in_batch.append(row_num)
-    if not row_numbers_in_batch: return {t['row_num']: "k.A. (Validierungsfehler?)" for t in tasks_data}
-    prompt_parts.append(text_block); prompt_parts.append("--- Ende der Texte ---"); prompt_parts.append("Bitte gib NUR die 'RESULTAT <Zeilennummer>: ...' Zeilen zurück.")
-    final_prompt = "\n".join(prompt_parts)
-    # try: prompt_tokens = token_count(final_prompt); debug_print(f"Geschätzte Prompt-Tokens Batch: {prompt_tokens}")
-    # except Exception as e_tc: debug_print(f"Fehler Token-Zählen: {e_tc}")
-    chat_response = call_openai_chat(final_prompt, temperature=0.2)
-    summaries = {row_num: "k.A. (Keine Antwort geparst)" for row_num in row_numbers_in_batch}
-    if chat_response:
-        lines = chat_response.strip().split('\n'); parsed_count = 0
-        for line in lines:
-            match = re.match(r"RESULTAT (\d+): (.*)", line.strip())
-            if match:
-                row_num = int(match.group(1)); summary_text = match.group(2).strip()
-                if row_num in summaries: summaries[row_num] = summary_text; parsed_count += 1
-        # debug_print(f"Batch-Zusammenfassung: {parsed_count}/{len(row_numbers_in_batch)} geparst.")
-        # if parsed_count < len(row_numbers_in_batch): debug_print(f"WARNUNG: Nicht alle geparst. Antwort: {chat_response[:100]}...")
-    # else: debug_print("Fehler: Keine Antwort OpenAI Batch-Zusammenfassung.")
-    for task in tasks_data:
-        if task['row_num'] not in summaries: summaries[task['row_num']] = "k.A. (Ungültiger Rohtext o.ä.)"
-    return summaries
-
-@retry_on_failure
-def call_openai_chat(prompt, temperature=0.3, model=None): # unverändert
-    if not Config.API_KEYS.get('openai'): debug_print("Fehler: OpenAI Key fehlt."); return None
+def call_openai_chat(prompt, temperature=0.3, model=None): # Unverändert
+    """Zentrale Funktion für OpenAI Chat API Aufrufe."""
+    if not Config.API_KEYS.get('openai'): debug_print("Fehler: OpenAI API Key fehlt."); return None
     if not prompt: debug_print("Fehler: Leerer Prompt."); return None
     current_model = model if model else Config.TOKEN_MODEL
     try:
+        # Optional: Token zählen vor Senden
         # prompt_tokens = token_count(prompt)
-        # debug_print(f"Sende Prompt OpenAI ({current_model}, {prompt_tokens} Tokens)...")
-        response = openai.ChatCompletion.create(model=current_model, messages=[{"role": "user", "content": prompt}], temperature=temperature)
+        # debug_print(f"Sende Prompt an OpenAI ({current_model}, {prompt_tokens} Tokens)...")
+        response = openai.ChatCompletion.create(
+            model=current_model, messages=[{"role": "user", "content": prompt}], temperature=temperature )
         result = response.choices[0].message.content.strip()
+        # Optional: Token zählen Antwort
         # completion_tokens = token_count(result); total_tokens = response.usage.total_tokens
-        # debug_print(f"OpenAI Antwort OK ({completion_tokens} Comp Tokens, {total_tokens} Gesamt).")
+        # debug_print(f"OpenAI Antwort erhalten ({completion_tokens}/{total_tokens} Tokens).")
         return result
     except openai.error.InvalidRequestError as e:
          debug_print(f"OpenAI Invalid Request Error: {e}")
-         if "maximum context length" in str(e): debug_print("Fehler: Token Limit.")
+         if "maximum context length" in str(e): debug_print("Fehler scheint Token Limit zu sein.")
          return None
-    except openai.error.OpenAIError as e: debug_print(f"OpenAI API Fehler: {e}"); raise e
-    except Exception as e: debug_print(f"Allg. Fehler OpenAI: {e}"); raise e
+    except openai.error.OpenAIError as e: debug_print(f"OpenAI API Fehler: {e}"); raise e # Für Retry
+    except Exception as e: debug_print(f"Allgemeiner Fehler bei OpenAI-Aufruf: {e}"); raise e # Für Retry
 
-def summarize_website_content(raw_text): # unverändert
-    if not raw_text or raw_text == "k.A." or raw_text.strip() == "": return "k.A."
+def summarize_website_content(raw_text): # Unverändert
+    """Erstellt Zusammenfassung von Website-Rohtext via OpenAI."""
+    if not raw_text or raw_text == "k.A." or raw_text == "k.A. (Nur Cookie-Banner erkannt)" or raw_text.strip() == "":
+        return "k.A."
     max_raw_length = 3000
-    if len(raw_text) > max_raw_length: debug_print(f"Kürze Rohtext für Summary: {len(raw_text)} -> {max_raw_length}."); raw_text = raw_text[:max_raw_length]
-    prompt = ("Du bist ein KI-Assistent...\n" "Fasse folgenden Text einer Unternehmenswebsite zusammen...\n" "- Haupttätigkeitsfeld\n" "- Produkte/Dienstleistungen\n" "- Zielgruppe (falls erkennbar)\n\n" f"Website-Text:\n```\n{raw_text}\n```\n\n" "Zusammenfassung (max. 100 Wörter):")
+    if len(raw_text) > max_raw_length:
+         # debug_print(f"Kürze Rohtext für Zusammenfassung: {len(raw_text)} -> {max_raw_length} Zeichen.")
+         raw_text = raw_text[:max_raw_length]
+    prompt = (
+        "Du bist ein KI-Assistent, der Webinhalte analysiert.\n"
+        "Fasse den folgenden Text einer Unternehmenswebsite prägnant zusammen. "
+        "Konzentriere dich auf:\n"
+        "- Haupttätigkeitsfeld des Unternehmens\n"
+        "- Wichtigste Produkte und/oder Dienstleistungen\n"
+        "- Zielgruppe (falls erkennbar)\n\n"
+        f"Website-Text:\n```\n{raw_text}\n```\n\n"
+        "Zusammenfassung (max. 100 Wörter):" )
     summary = call_openai_chat(prompt, temperature=0.2)
     return summary if summary else "k.A."
 
-# --- Platzhalter für nicht geänderte ChatGPT Funktionen (wie vorher) ---
+def evaluate_branche_chatgpt(crm_branche, beschreibung, wiki_branche, wiki_kategorien, website_summary): # Unverändert
+    """ Ordnet Unternehmen exakt einer Branche aus dem Ziel-Schema zu via ChatGPT. """
+    global ALLOWED_TARGET_BRANCHES, TARGET_SCHEMA_STRING
+    if not ALLOWED_TARGET_BRANCHES:
+        debug_print("FEHLER in evaluate_branche_chatgpt: Ziel-Schema leer."); return {"branch": crm_branche, "consistency": "error_schema_missing", "justification": "Fehler: Ziel-Schema nicht geladen"}
+    allowed_branches_lookup = {b.lower(): b for b in ALLOWED_TARGET_BRANCHES}
+    prompt_parts = [TARGET_SCHEMA_STRING, "\nOrdne das Unternehmen anhand folgender Angaben exakt einer Branche des Ziel-Branchenschemas (Kurzformen) zu:"]
+    if crm_branche and crm_branche != "k.A.": prompt_parts.append(f"- CRM-Branche (Referenz): {crm_branche}")
+    if beschreibung and beschreibung != "k.A.": prompt_parts.append(f"- Beschreibung: {beschreibung[:500]}")
+    if wiki_branche and wiki_branche != "k.A.": prompt_parts.append(f"- Wikipedia-Branche: {wiki_branche}")
+    if wiki_kategorien and wiki_kategorien != "k.A.": prompt_parts.append(f"- Wikipedia-Kategorien: {wiki_kategorien[:500]}")
+    if website_summary and website_summary != "k.A.": prompt_parts.append(f"- Website-Zusammenfassung: {website_summary[:500]}")
+    if len(prompt_parts) <= 2:
+        debug_print("Warnung in evaluate_branche_chatgpt: Zu wenige Infos."); return {"branch": crm_branche, "consistency": "error_no_info", "justification": "Fehler: Zu wenige Informationen"}
+    prompt_parts.append("\nWICHTIG: Antworte NUR mit dem exakten Kurznamen einer Branche aus der obigen Liste. Verwende KEINE Präfixe.")
+    prompt_parts.append("\nAntworte ausschließlich im folgenden Format:")
+    prompt_parts.append("Branche: <Exakter Kurzname der Branche aus der Liste>"); prompt_parts.append("Übereinstimmung: <ok oder X>"); prompt_parts.append("Begründung: <Sehr kurze Begründung>")
+    prompt = "\n".join(prompt_parts)
+    chat_response = call_openai_chat(prompt, temperature=0.0)
+    if not chat_response:
+        debug_print("Fehler in evaluate_branche_chatgpt: Keine API Antwort."); return {"branch": crm_branche, "consistency": "error_api_no_response", "justification": "Fehler: Keine Antwort von API"}
+    lines = chat_response.strip().split("\n"); result = {"branch": None, "consistency": None, "justification": ""}; suggested_branch = ""
+    for line in lines:
+        line_lower = line.lower()
+        if line_lower.startswith("branche:"): suggested_branch = line.split(":", 1)[1].strip().strip('"\'')
+        elif line_lower.startswith("begründung:"): result["justification"] = line.split(":", 1)[1].strip()
+    if not suggested_branch:
+         debug_print(f"Fehler in evaluate_branche_chatgpt: Parsing fehlgeschlagen: {chat_response}"); return {"branch": crm_branche, "consistency": "error_parsing", "justification": f"Fehler: Parsing API Antwort. Antwort: {chat_response}"}
+    final_branch = None; suggested_branch_lower = suggested_branch.lower()
+    if suggested_branch_lower in allowed_branches_lookup:
+        final_branch = allowed_branches_lookup[suggested_branch_lower]; result["consistency"] = "pending_comparison"
+        # debug_print(f"ChatGPT-Vorschlag '{suggested_branch}' ist gültig ('{final_branch}').")
+    else:
+        debug_print(f"ChatGPT-Vorschlag '{suggested_branch}' ist NICHT im Ziel-Schema. Starte Fallback...")
+        crm_short_branch = "k.A."
+        if crm_branche and ">" in crm_branche: crm_short_branch = crm_branche.split(">", 1)[1].strip()
+        elif crm_branche and crm_branche != "k.A.": crm_short_branch = crm_branche.strip()
+        if crm_short_branch != "k.A." and crm_short_branch.lower() in allowed_branches_lookup:
+            final_branch = allowed_branches_lookup[crm_short_branch.lower()]
+            result["consistency"] = "fallback_crm_valid"
+            fallback_reason = f"Fallback: Ungültiger ChatGPT-Vorschlag ('{suggested_branch}'). Gültige CRM-Kurzform '{final_branch}' verwendet."
+            result["justification"] = f"{fallback_reason} (ChatGPT Begründung war: {result.get('justification', 'Keine')})"
+            debug_print(f"Fallback auf gültige CRM-Kurzform erfolgreich: '{final_branch}'")
+        else:
+            final_branch = suggested_branch # Behalte ungültigen Vorschlag
+            result["consistency"] = "fallback_invalid"
+            error_reason = f"Fehler: Ungültiger ChatGPT-Vorschlag ('{suggested_branch}') und keine gültige CRM-Kurzform ('{crm_short_branch}') als Fallback verfügbar."
+            result["justification"] = f"{error_reason} (ChatGPT Begründung war: {result.get('justification', 'Keine')})"
+            debug_print(f"Fallback fehlgeschlagen. Ungültiger Vorschlag: '{final_branch}', Ungültige CRM-Kurzform: '{crm_short_branch}'")
+    result["branch"] = final_branch if final_branch else "FEHLER"
+    crm_short_to_compare = "k.A."
+    if crm_branche and ">" in crm_branche: crm_short_to_compare = crm_branche.split(">", 1)[1].strip()
+    elif crm_branche and crm_branche != "k.A.": crm_short_to_compare = crm_branche.strip()
+    if result["branch"] != "FEHLER" and result["branch"].lower() == crm_short_to_compare.lower():
+        if result["consistency"] == "pending_comparison": result["consistency"] = "ok"
+    elif result["consistency"] == "pending_comparison": result["consistency"] = "X"
+    if result["consistency"] == "pending_comparison": result["consistency"] = "error_comparison_failed"
+    # debug_print(f"Finale Branch-Evaluation: {result}")
+    return result
+
+# --- Platzhalter für weitere, aktuell nicht genutzte oder unveränderte ChatGPT-Funktionen ---
 def evaluate_fsm_suitability(company_name, company_data): return {"suitability": "k.A.", "justification": "Not Implemented"}
 def evaluate_servicetechnicians_estimate(company_name, company_data): return "k.A. (Not Implemented)"
 def map_internal_technicians(value): return "k.A. (Not Implemented)"
@@ -1086,14 +981,33 @@ def process_employee_estimation(company_name, wiki_paragraph, crm_employee): ret
 def process_employee_consistency(crm_employee, wiki_employee, emp_estimate): return "k.A. (Not Implemented)"
 def evaluate_umsatz_chatgpt(company_name, wiki_umsatz): return "k.A. (Not Implemented)"
 
-def _process_batch(sheet, batches, row_numbers): # unverändert
+# ==================== BATCH PROCESSING FUNCTIONS ====================
+
+def _process_batch(sheet, batches, row_numbers): # Unverändert
+    """
+    Hilfsfunktion für process_verification_only: Verarbeitet einen Batch von Wikipedia-Verifizierungsanfragen.
+    Aktualisiert NUR die Spalten S bis Y. Zeitstempel werden von der aufrufenden Funktion gesetzt.
+    """
     if not batches: return
-    aggregated_prompt = ("Du bist ein Experte... prüfe Plausibilität...\n" "Eintrag <Zeilennummer>: <Antwort>\n\n" "Mögliche Antworten:\n" "- 'OK'\n" "- 'X | Alternativer Artikel: <URL> | Begründung: <Text>'\n" "- 'X | Kein passender Artikel gefunden | Begründung: <Text>'\n" "- 'Kein Wikipedia-Eintrag vorhanden.'\n\n" "Einträge:\n" "----------\n")
-    aggregated_prompt += "".join(batches); aggregated_prompt += "----------\nNur 'Eintrag X: Antwort'-Zeilen ausgeben."
-    # debug_print(f"Verarbeite Verifizierungs-Batch {row_numbers[0]}-{row_numbers[-1]}.")
-    # prompt_tokens = token_count(aggregated_prompt); debug_print(f"Tokens Verif.-Batch: {prompt_tokens}")
+    aggregated_prompt = (
+        "Du bist ein Experte in der Verifizierung von Wikipedia-Artikeln für Unternehmen. "
+        "Für jeden der folgenden Einträge prüfe, ob der vorhandene Wikipedia-Artikel (URL, Absatz, Kategorien) plausibel zum Firmennamen und zur Beschreibung passt. "
+        "Gib das Ergebnis für jeden Eintrag ausschließlich im folgenden Format auf einer neuen Zeile aus:\n"
+        "Eintrag <Zeilennummer>: <Antwort>\n\n"
+        "Mögliche Antworten:\n"
+        "- 'OK' (wenn der Artikel gut passt)\n"
+        "- 'X | Alternativer Artikel: <URL> | Begründung: <Kurze Begründung>' (wenn der Artikel nicht passt, aber ein besserer gefunden wurde)\n"
+        "- 'X | Kein passender Artikel gefunden | Begründung: <Kurze Begründung>' (wenn der Artikel nicht passt und kein besserer gefunden wurde)\n"
+        "- 'Kein Wikipedia-Eintrag vorhanden.' (wenn initial keine URL angegeben wurde und keine Suche erfolgreich war)\n\n"
+        "Einträge:\n"
+        "----------\n" )
+    aggregated_prompt += "".join(batches)
+    aggregated_prompt += "----------\nBitte nur die 'Eintrag X: Antwort'-Zeilen ausgeben."
+    # debug_print(f"Verarbeite Verifizierungs-Batch für Zeilen {row_numbers[0]} bis {row_numbers[-1]}.")
+    prompt_tokens = token_count(aggregated_prompt)
+    # debug_print(f"Token-Zahl für Verifizierungs-Batch: {prompt_tokens}")
     chat_response = call_openai_chat(aggregated_prompt, temperature=0.0)
-    if not chat_response: debug_print(f"Fehler: Keine Antwort OpenAI Verif.-Batch {row_numbers[0]}-{row_numbers[-1]}."); return
+    if not chat_response: debug_print(f"Fehler: Keine Antwort OpenAI für Verif.-Batch {row_numbers[0]}-{row_numbers[-1]}."); return
     answers = {}; lines = chat_response.strip().split('\n')
     for line in lines:
         match = re.match(r"Eintrag (\d+): (.*)", line.strip())
@@ -1118,40 +1032,56 @@ def _process_batch(sheet, batches, row_numbers): # unverändert
                   if reason_part.startswith("Begründung:"): wiki_explanation = reason_part.split(":", 1)[1].strip()
                   else: wiki_explanation = reason_part
         else: wiki_confirm, wiki_explanation = "?", f"Unerwartetes Format: {answer}"
-        updates.append({'range': f'S{row_num}', 'values': [[wiki_confirm]]}); updates.append({'range': f'T{row_num}', 'values': [[alt_article]]}); updates.append({'range': f'U{row_num}', 'values': [[wiki_explanation]]}); updates.append({'range': f'V{row_num}:Y{row_num}', 'values': [[v_val, w_val, x_val, y_val]]})
+        # Nutze COLUMN_MAP indirekt via sheet_handler._get_col_letter (besser wäre direkte Nutzung hier)
+        # Annahme: S=18, T=19, U=20, V=21, W=22, X=23, Y=24 (0-basiert)
+        s_l = GoogleSheetHandler()._get_col_letter(19); t_l = GoogleSheetHandler()._get_col_letter(20); u_l = GoogleSheetHandler()._get_col_letter(21)
+        v_l = GoogleSheetHandler()._get_col_letter(22); y_l = GoogleSheetHandler()._get_col_letter(25) # V bis Y
+        updates.append({'range': f'{s_l}{row_num}', 'values': [[wiki_confirm]]})
+        updates.append({'range': f'{t_l}{row_num}', 'values': [[alt_article]]})
+        updates.append({'range': f'{u_l}{row_num}', 'values': [[wiki_explanation]]})
+        updates.append({'range': f'{v_l}{row_num}:{y_l}{row_num}', 'values': [[v_val, w_val, x_val, y_val]]})
     if updates:
-         try: sheet.batch_update(updates); debug_print(f"Verifizierungs-Batch {row_numbers[0]}-{row_numbers[-1]} (S-Y) OK.")
-         except Exception as e: debug_print(f"FEHLER Batch-Update (S-Y) {row_numbers[0]}-{row_numbers[-1]}: {e}")
-    # else: debug_print(f"Keine Updates (S-Y) für Verif.-Batch {row_numbers[0]}-{row_numbers[-1]}.")
+         try: sheet.batch_update(updates, value_input_option='USER_ENTERED'); debug_print(f"Verif.-Batch {row_numbers[0]}-{row_numbers[-1]} (S-Y) OK.")
+         except Exception as e: debug_print(f"FEHLER Batch-Update (S-Y) für Batch {row_numbers[0]}-{row_numbers[-1]}: {e}")
 
-def process_verification_only(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet): # unverändert
-    debug_print(f"Starte Wiki-Verif. (Batch) für Zeilen {start_row_index_in_sheet}-{end_row_index_in_sheet}...")
+def process_verification_only(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet): # Nutzt Config.HEADER_ROWS
+    """ Batch-Prozess nur für Wikipedia-Verifizierung (Spalten S-Y). Prüft AX. """
+    debug_print(f"Starte Wiki-Verif.-Modus (Batch) {start_row_index_in_sheet}-{end_row_index_in_sheet}...")
     if not sheet_handler.load_data(): debug_print("FEHLER Laden process_verification_only."); return
     all_data = sheet_handler.get_all_data_with_headers()
     if not all_data or len(all_data) <= Config.HEADER_ROWS: debug_print("FEHLER/WARNUNG: Keine Daten process_verification_only."); return
     timestamp_col_key = "Wiki Verif. Timestamp"; timestamp_col_index = COLUMN_MAP.get(timestamp_col_key)
     ts_col_letter = sheet_handler._get_col_letter(timestamp_col_index + 1) if timestamp_col_index is not None else "AX_FEHLER"
     if timestamp_col_index is None: debug_print(f"FEHLER: '{timestamp_col_key}' nicht in COLUMN_MAP."); return
+
     batch_size = Config.BATCH_SIZE; current_batch = []; current_row_numbers = []; processed_count = 0; skipped_count = 0
     for i in range(start_row_index_in_sheet, end_row_index_in_sheet + 1):
-        row_index_in_list = i - 1
+        row_index_in_list = i - 1 # 0-basierter Index in all_data
         if row_index_in_list >= len(all_data): continue
         row = all_data[row_index_in_list]
+
         ts_value_ax = "INDEX_FEHLER"; ts_ax_is_set = False
         if len(row) > timestamp_col_index: ts_value_ax = row[timestamp_col_index]; ts_ax_is_set = bool(str(ts_value_ax).strip())
         # log_debug = (i < start_row_index_in_sheet + 2 or i > end_row_index_in_sheet - 2 or i % 500 == 0)
         # if log_debug: debug_print(f"Zeile {i} (Wiki Verif. Check): TS {ts_col_letter}='{ts_value_ax}'. Überspringen? {ts_ax_is_set}")
         if ts_ax_is_set: skipped_count += 1; continue
-        company_name = row[COLUMN_MAP.get("CRM Name", 1)] if len(row) > COLUMN_MAP.get("CRM Name", 1) else ''
-        crm_desc = row[COLUMN_MAP.get("CRM Beschreibung", 5)] if len(row) > COLUMN_MAP.get("CRM Beschreibung", 5) else ''
-        wiki_url_idx = COLUMN_MAP.get("Wiki URL"); wiki_url = row[wiki_url_idx] if wiki_url_idx is not None and len(row) > wiki_url_idx and row[wiki_url_idx].strip() not in ['', 'k.A.'] else 'k.A.'
-        wiki_para_idx = COLUMN_MAP.get("Wiki Absatz"); wiki_paragraph = row[wiki_para_idx] if wiki_para_idx is not None and len(row) > wiki_para_idx else 'k.A.'
-        wiki_cat_idx = COLUMN_MAP.get("Wiki Kategorien"); wiki_categories = row[wiki_cat_idx] if wiki_cat_idx is not None and len(row) > wiki_cat_idx else 'k.A.'
+
+        # Daten für Prompt holen (mit Indexprüfung)
+        name_idx = COLUMN_MAP.get("CRM Name"); desc_idx = COLUMN_MAP.get("CRM Beschreibung")
+        url_idx = COLUMN_MAP.get("Wiki URL"); para_idx = COLUMN_MAP.get("Wiki Absatz"); cat_idx = COLUMN_MAP.get("Wiki Kategorien")
+        company_name = row[name_idx] if name_idx is not None and len(row) > name_idx else ''
+        crm_desc = row[desc_idx] if desc_idx is not None and len(row) > desc_idx else ''
+        wiki_url = row[url_idx] if url_idx is not None and len(row) > url_idx and row[url_idx].strip() not in ['', 'k.A.'] else 'k.A.'
+        wiki_paragraph = row[para_idx] if para_idx is not None and len(row) > para_idx else 'k.A.'
+        wiki_categories = row[cat_idx] if cat_idx is not None and len(row) > cat_idx else 'k.A.'
+
         entry_text = (f"Eintrag {i}:\n" f"  Firmenname: {company_name}\n" f"  CRM-Beschreibung: {crm_desc[:200]}...\n" f"  Wikipedia-URL: {wiki_url}\n" f"  Wiki-Absatz: {wiki_paragraph[:200]}...\n" f"  Wiki-Kategorien: {wiki_categories[:200]}...\n" f"----\n")
         current_batch.append(entry_text); current_row_numbers.append(i); processed_count += 1
+
         if len(current_batch) >= batch_size or i == end_row_index_in_sheet:
             if current_batch:
-                _process_batch(sheet_handler.sheet, current_batch, current_row_numbers)
+                _process_batch(sheet_handler.sheet, current_batch, current_row_numbers) # Schreibt S-Y
+                # Setze AX Timestamp für bearbeitete Zeilen
                 wiki_ts_updates = []; current_wiki_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                 for row_num in current_row_numbers: wiki_ts_updates.append({'range': f'{ts_col_letter}{row_num}', 'values': [[current_wiki_timestamp]]})
                 if wiki_ts_updates:
@@ -1162,27 +1092,69 @@ def process_verification_only(sheet_handler, start_row_index_in_sheet, end_row_i
             current_batch = []; current_row_numbers = []
     debug_print(f"Wiki-Verif.-Batch beendet. {processed_count} verarbeitet, {skipped_count} übersprungen.")
 
-def process_website_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet): # unverändert
+
+@retry_on_failure
+def summarize_batch_openai(tasks_data): # Unverändert
+    """ Fasst Liste von Rohtexten in einem OpenAI Call zusammen. """
+    if not tasks_data: return {}
+    valid_tasks = [t for t in tasks_data if t.get("raw_text") and t["raw_text"] not in ["k.A.", "k.A. (Nur Cookie-Banner erkannt)", "k.A. (Fehler)"] and str(t.get("raw_text")).strip()]
+    if not valid_tasks: return {t['row_num']: "k.A. (Kein gültiger Rohtext)" for t in tasks_data}
+    # debug_print(f"Starte Batch-Zusammenfassung für {len(valid_tasks)} gültige Texte...")
+    prompt_parts = ["Du bist ein KI-Assistent...", "Fasse jeden TEXT prägnant zusammen...", "Antworte NUR mit Zeilen im Format:", "RESULTAT <Zeilennummer>: <Zusammenfassung>", "\n--- Texte ---"]
+    text_block = ""; row_numbers_in_batch = []
+    for task in valid_tasks:
+        row_num = task['row_num']; raw_text = task['raw_text'][:1500] # Kürzung hier
+        entry_text = f"\n--- TEXT Zeile {row_num} ---\n{raw_text}\n--- ENDE TEXT Zeile {row_num} ---\n"
+        text_block += entry_text; row_numbers_in_batch.append(row_num)
+    if not row_numbers_in_batch: return {t['row_num']: "k.A. (Fehler)" for t in tasks_data}
+    prompt_parts.append(text_block); prompt_parts.append("--- Ende der Texte ---"); prompt_parts.append("Bitte gib NUR die 'RESULTAT <Zeilennummer>: ...' Zeilen zurück.")
+    final_prompt = "\n".join(prompt_parts)
+    # prompt_tokens = token_count(final_prompt); debug_print(f"Geschätzte Prompt-Tokens: {prompt_tokens}")
+    chat_response = call_openai_chat(final_prompt, temperature=0.2)
+    summaries = {row_num: "k.A. (Keine Antwort geparst)" for row_num in row_numbers_in_batch}
+    if chat_response:
+        lines = chat_response.strip().split('\n'); parsed_count = 0
+        for line in lines:
+            match = re.match(r"RESULTAT (\d+): (.*)", line.strip())
+            if match:
+                row_num = int(match.group(1)); summary_text = match.group(2).strip()
+                if row_num in summaries: summaries[row_num] = summary_text; parsed_count += 1
+        # debug_print(f"Batch-Zusammenfassung: {parsed_count}/{len(row_numbers_in_batch)} geparst.")
+    # else: debug_print("Fehler: Keine Antwort von OpenAI für Batch-Zusammenfassung.")
+    for task in tasks_data: # Füge Fallback für ursprünglich ungültige Tasks hinzu
+        if task['row_num'] not in summaries: summaries[task['row_num']] = "k.A. (Ungültiger Rohtext o.ä.)"
+    return summaries
+
+def process_website_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet): # Nutzt Config.HEADER_ROWS
+    """ Batch-Prozess NUR für Website-Scraping (Rohtext AR). Prüft AR. """
     debug_print(f"Starte Website-Scraping ROHDATEN (Batch) {start_row_index_in_sheet}-{end_row_index_in_sheet}...")
     if not sheet_handler.load_data(): return
-    all_data = sheet_handler.get_all_data_with_headers(); header_rows = Config.HEADER_ROWS
-    if not all_data or len(all_data) <= header_rows: return
+    all_data = sheet_handler.get_all_data_with_headers()
+    if not all_data or len(all_data) <= Config.HEADER_ROWS: return
+    header_rows = Config.HEADER_ROWS
+
     rohtext_col_key = "Website Rohtext"; rohtext_col_index = COLUMN_MAP.get(rohtext_col_key)
     website_col_idx = COLUMN_MAP.get("CRM Website"); version_col_idx = COLUMN_MAP.get("Version")
     if None in [rohtext_col_index, website_col_idx, version_col_idx]: debug_print(f"FEHLER: Indizes website_batch fehlen."); return
-    rohtext_col_letter = sheet_handler._get_col_letter(rohtext_col_index + 1); version_col_letter = sheet_handler._get_col_letter(version_col_idx + 1)
-    def scrape_raw_text_task(task_info):
+    rohtext_col_letter = sheet_handler._get_col_letter(rohtext_col_index + 1)
+    version_col_letter = sheet_handler._get_col_letter(version_col_idx + 1)
+
+    def scrape_raw_text_task(task_info): # Worker unverändert
         row_num = task_info['row_num']; url = task_info['url']; raw_text = "k.A."; error = None
         try: raw_text = get_website_raw(url)
         except Exception as e: error = f"Scraping Fehler Z{row_num}: {e}"; debug_print(error)
         return {"row_num": row_num, "raw_text": raw_text, "error": error}
-    tasks_for_processing_batch = []; all_sheet_updates = []; total_processed_count = 0; total_skipped_count = 0; total_skipped_url_count = 0; total_error_count = 0
+
+    tasks_for_processing_batch = []; all_sheet_updates = []
+    total_processed_count = 0; total_skipped_count = 0; total_skipped_url_count = 0; total_error_count = 0
     processing_batch_size = Config.PROCESSING_BATCH_SIZE; max_scraping_workers = Config.MAX_SCRAPING_WORKERS; update_batch_row_limit = Config.UPDATE_BATCH_ROW_LIMIT
     empty_values_for_skip = ["", "k.a.", "k.a. (nur cookie-banner erkannt)", "k.a. (fehler)"]
+
     for i in range(start_row_index_in_sheet, end_row_index_in_sheet + 1):
-        row_index_in_list = i - 1
+        row_index_in_list = i - 1 # 0-basierter Index in all_data
         if row_index_in_list >= len(all_data): continue
         row = all_data[row_index_in_list]
+
         should_skip = False; cell_value_ar_str_lower = "INDEX_FEHLER"
         if len(row) > rohtext_col_index:
             cell_value_ar_str_lower = str(row[rohtext_col_index]).strip().lower()
@@ -1190,9 +1162,12 @@ def process_website_batch(sheet_handler, start_row_index_in_sheet, end_row_index
         # log_debug = (i < start_row_index_in_sheet + 2 or i > end_row_index_in_sheet - 2 or i % 500 == 0)
         # if log_debug: debug_print(f"Zeile {i} (Website AR Check): Wert='{cell_value_ar_str_lower}'. Skip? {should_skip}")
         if should_skip: total_skipped_count += 1; continue
+
         website_url = row[website_col_idx] if len(row) > website_col_idx else ""
         if not website_url or website_url.strip().lower() == "k.a.": total_skipped_url_count += 1; continue
+
         tasks_for_processing_batch.append({"row_num": i, "url": website_url})
+
         if len(tasks_for_processing_batch) >= processing_batch_size or i == end_row_index_in_sheet:
             if tasks_for_processing_batch:
                 batch_start_row = tasks_for_processing_batch[0]['row_num']; batch_end_row = tasks_for_processing_batch[-1]['row_num']; batch_task_count = len(tasks_for_processing_batch)
@@ -1228,26 +1203,35 @@ def process_website_batch(sheet_handler, start_row_index_in_sheet, end_row_index
     if all_sheet_updates: debug_print(f"Sende finale Sheet-Updates ({len(all_sheet_updates)} Zellen)..."); sheet_handler.batch_update_cells(all_sheet_updates)
     debug_print(f"Website-Scraping ROHDATEN beendet. {total_processed_count} verarbeitet ({total_error_count} Fehler), {total_skipped_count} wg. Inhalt übersprungen, {total_skipped_url_count} ohne URL übersprungen.")
 
-def process_website_summarization_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet): # unverändert
+def process_website_summarization_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet): # Nutzt Config.HEADER_ROWS
+    """ Batch-Prozess NUR für Website-Zusammenfassung (AS). Prüft AR und AS. """
     debug_print(f"Starte Website-Zusammenfassung (Batch) {start_row_index_in_sheet}-{end_row_index_in_sheet}...")
     openai_batch_size = Config.OPENAI_BATCH_SIZE_LIMIT; update_batch_row_limit = Config.UPDATE_BATCH_ROW_LIMIT
     if not sheet_handler.load_data(): return
-    all_data = sheet_handler.get_all_data_with_headers(); header_rows = Config.HEADER_ROWS
-    if not all_data or len(all_data) <= header_rows: return
+    all_data = sheet_handler.get_all_data_with_headers()
+    if not all_data or len(all_data) <= Config.HEADER_ROWS: return
+    header_rows = Config.HEADER_ROWS
+
     rohtext_col_idx = COLUMN_MAP.get("Website Rohtext"); summary_col_idx = COLUMN_MAP.get("Website Zusammenfassung"); version_col_idx = COLUMN_MAP.get("Version")
     if None in [rohtext_col_idx, summary_col_idx, version_col_idx]: return debug_print(f"FEHLER: Indizes Summary fehlen.")
     summary_col_letter = sheet_handler._get_col_letter(summary_col_idx + 1); version_col_letter = sheet_handler._get_col_letter(version_col_idx + 1)
-    tasks_for_openai_batch = []; all_sheet_updates = []; rows_in_current_update_batch = 0; processed_count = 0; skipped_no_rohtext = 0; skipped_summary_exists = 0
+
+    tasks_for_openai_batch = []; all_sheet_updates = []; rows_in_current_update_batch = 0
+    processed_count = 0; skipped_no_rohtext = 0; skipped_summary_exists = 0
+
     for i in range(start_row_index_in_sheet, end_row_index_in_sheet + 1):
-        row_index_in_list = i - 1
+        row_index_in_list = i - 1 # 0-basierter Index in all_data
         if row_index_in_list >= len(all_data): continue
         row = all_data[row_index_in_list]
+
         raw_text = ""; summary_exists = False
         if len(row) > rohtext_col_idx: raw_text = str(row[rohtext_col_idx]).strip()
         if not raw_text or raw_text == "k.A." or raw_text == "k.A. (Nur Cookie-Banner erkannt)" or raw_text == "k.A. (Fehler)": skipped_no_rohtext += 1; continue
         if len(row) > summary_col_idx and str(row[summary_col_idx]).strip() and str(row[summary_col_idx]).strip() != "k.A.": summary_exists = True
         if summary_exists: skipped_summary_exists += 1; continue
+
         tasks_for_openai_batch.append({'row_num': i, 'raw_text': raw_text}); processed_count += 1
+
         if tasks_for_openai_batch and (len(tasks_for_openai_batch) >= openai_batch_size or (processed_count > 0 and i == end_row_index_in_sheet)):
              # debug_print(f"  Verarbeite OpenAI Batch {len(tasks_for_openai_batch)} Tasks (Start: {tasks_for_openai_batch[0]['row_num']})...")
              summaries_result = summarize_batch_openai(tasks_for_openai_batch)
@@ -1258,6 +1242,7 @@ def process_website_summarization_batch(sheet_handler, start_row_index_in_sheet,
                  all_sheet_updates.extend(row_updates); rows_in_current_update_batch += 1
              tasks_for_openai_batch = []
              time.sleep(Config.RETRY_DELAY) # Pause nach OpenAI Batch Call
+
         if all_sheet_updates and (rows_in_current_update_batch >= update_batch_row_limit or (processed_count > 0 and i == end_row_index_in_sheet)):
              debug_print(f"  Sende Sheet-Update für {rows_in_current_update_batch} Zusammenfassungen...")
              success = sheet_handler.batch_update_cells(all_sheet_updates)
@@ -1267,20 +1252,15 @@ def process_website_summarization_batch(sheet_handler, start_row_index_in_sheet,
     if all_sheet_updates: debug_print(f"Sende LETZTES Sheet-Update für {rows_in_current_update_batch} Zusammenfassungen..."); sheet_handler.batch_update_cells(all_sheet_updates)
     debug_print(f"Website-Zusammenfassung Batch beendet. {processed_count} angefordert, {skipped_no_rohtext} ohne Rohtext, {skipped_summary_exists} mit Summary übersprungen.")
 
-def process_branch_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet, force_process=False): # MODIFIZIERT: force_process Flag hinzugefügt
-    """
-    Batch-Prozess für Brancheneinschätzung mit paralleler Verarbeitung via Threads.
-    Prüft Timestamp AO, es sei denn force_process=True. Führt evaluate_branche_chatgpt parallel aus.
-    Setzt W, X, Y, AO + AP und sendet Sheet-Updates GEBÜNDELT PRO VERARBEITUNGS-BATCH.
-    """
+def process_branch_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet, force_process=False): # Nutzt Config.HEADER_ROWS
+    """ Batch-Prozess für Brancheneinschätzung. Prüft AO (außer bei force_process). """
     mode_desc = "(Force Process)" if force_process else "(Timestamp AO Check)"
     debug_print(f"Starte Brancheneinschätzung (Parallel Batch) {start_row_index_in_sheet}-{end_row_index_in_sheet} {mode_desc}...")
-
     if not sheet_handler.load_data(): return
-    all_data = sheet_handler.get_all_data_with_headers(); header_rows = Config.HEADER_ROWS
-    if not all_data or len(all_data) <= header_rows: return
+    all_data = sheet_handler.get_all_data_with_headers()
+    if not all_data or len(all_data) <= Config.HEADER_ROWS: return
+    header_rows = Config.HEADER_ROWS
 
-    # Indizes etc. (wie gehabt)
     timestamp_col_key = "Timestamp letzte Prüfung"; timestamp_col_index = COLUMN_MAP.get(timestamp_col_key)
     branche_crm_idx = COLUMN_MAP.get("CRM Branche"); beschreibung_idx = COLUMN_MAP.get("CRM Beschreibung")
     branche_wiki_idx = COLUMN_MAP.get("Wiki Branche"); kategorien_wiki_idx = COLUMN_MAP.get("Wiki Kategorien")
@@ -1292,10 +1272,10 @@ def process_branch_batch(sheet_handler, start_row_index_in_sheet, end_row_index_
     version_col_letter = sheet_handler._get_col_letter(version_col_idx + 1)
     branch_w_letter = sheet_handler._get_col_letter(branch_w_idx + 1); branch_x_letter = sheet_handler._get_col_letter(branch_x_idx + 1); branch_y_letter = sheet_handler._get_col_letter(branch_y_idx + 1)
 
-    # Konfig & Worker (wie gehabt)
     MAX_BRANCH_WORKERS = Config.MAX_BRANCH_WORKERS; OPENAI_CONCURRENCY_LIMIT = Config.OPENAI_CONCURRENCY_LIMIT
     openai_semaphore_branch = threading.Semaphore(OPENAI_CONCURRENCY_LIMIT); PROCESSING_BRANCH_BATCH_SIZE = Config.PROCESSING_BRANCH_BATCH_SIZE
-    def evaluate_branch_task(task_data):
+
+    def evaluate_branch_task(task_data): # Worker unverändert
         row_num = task_data['row_num']; result = {"branch": "k.A. (Fehler Task)", "consistency": "error", "justification": "Fehler Worker-Task"}; error = None
         try:
             with openai_semaphore_branch:
@@ -1303,33 +1283,34 @@ def process_branch_batch(sheet_handler, start_row_index_in_sheet, end_row_index_
         except Exception as e: error = f"Fehler Branch Eval Z{row_num}: {e}"; debug_print(error); result['justification'] = error[:500]; result['consistency'] = 'error_task'
         return {"row_num": row_num, "result": result, "error": error}
 
-    # Hauptverarbeitung
     tasks_for_processing_batch = []; total_processed_count = 0; total_skipped_count = 0; total_error_count = 0
     if not ALLOWED_TARGET_BRANCHES: load_target_schema();
     if not ALLOWED_TARGET_BRANCHES: return debug_print("FEHLER: Ziel-Schema nicht geladen.")
 
     for i in range(start_row_index_in_sheet, end_row_index_in_sheet + 1):
-        row_index_in_list = i - 1
+        row_index_in_list = i - 1 # 0-basierter Index in all_data
         if row_index_in_list >= len(all_data): continue
         row = all_data[row_index_in_list]
 
-        # Timestamp-Prüfung (AO), WENN NICHT force_process
         should_skip = False
-        if not force_process: # Nur prüfen, wenn nicht forciert
+        if not force_process:
             if len(row) > timestamp_col_index and str(row[timestamp_col_index]).strip(): should_skip = True
         if should_skip: total_skipped_count += 1; continue
 
-        # Task sammeln (wie gehabt)
-        task_data = { "row_num": i, "crm_branche": row[branche_crm_idx] if len(row) > branche_crm_idx else "", "beschreibung": row[beschreibung_idx] if len(row) > beschreibung_idx else "", "wiki_branche": row[branche_wiki_idx] if len(row) > branche_wiki_idx else "", "wiki_kategorien": row[kategorien_wiki_idx] if len(row) > kategorien_wiki_idx else "", "website_summary": row[summary_web_idx] if len(row) > summary_web_idx else ""}
+        task_data = { "row_num": i,
+                      "crm_branche": row[branche_crm_idx] if len(row) > branche_crm_idx else "",
+                      "beschreibung": row[beschreibung_idx] if len(row) > beschreibung_idx else "",
+                      "wiki_branche": row[branche_wiki_idx] if len(row) > branche_wiki_idx else "",
+                      "wiki_kategorien": row[kategorien_wiki_idx] if len(row) > kategorien_wiki_idx else "",
+                      "website_summary": row[summary_web_idx] if len(row) > summary_web_idx else "" }
         tasks_for_processing_batch.append(task_data)
 
-        # Verarbeitungs-Batch ausführen (wie gehabt)
         if len(tasks_for_processing_batch) >= PROCESSING_BRANCH_BATCH_SIZE or i == end_row_index_in_sheet:
             if tasks_for_processing_batch:
                 batch_start_row = tasks_for_processing_batch[0]['row_num']; batch_end_row = tasks_for_processing_batch[-1]['row_num']; batch_task_count = len(tasks_for_processing_batch)
                 debug_print(f"\n--- Branch-Eval Batch ({batch_task_count} Tasks, {batch_start_row}-{batch_end_row}) ---")
                 results_list = []; batch_error_count = 0
-                debug_print(f"  Evaluiere {batch_task_count} parallel (max {MAX_BRANCH_WORKERS} worker, {OPENAI_CONCURRENCY_LIMIT} OpenAI)...")
+                # debug_print(f"  Evaluiere {batch_task_count} parallel (max {MAX_BRANCH_WORKERS} worker, {OPENAI_CONCURRENCY_LIMIT} OpenAI)...")
                 with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_BRANCH_WORKERS) as executor:
                     future_to_task = {executor.submit(evaluate_branch_task, task): task for task in tasks_for_processing_batch}
                     for future in concurrent.futures.as_completed(future_to_task):
@@ -1341,9 +1322,7 @@ def process_branch_batch(sheet_handler, start_row_index_in_sheet, end_row_index_
                              batch_error_count += 1; total_error_count +=1
                         if results_list[-1]['error']: batch_error_count += 1; total_error_count +=1
                 current_batch_processed_count = len(results_list); total_processed_count += current_batch_processed_count
-                debug_print(f"  Branch-Eval Batch beendet. {current_batch_processed_count} Ergebnisse ({batch_error_count} Fehler).")
-
-                # Sheet Updates (wie gehabt, aber mit AO)
+                # debug_print(f"  Branch-Eval Batch beendet. {current_batch_processed_count} Ergebnisse ({batch_error_count} Fehler).")
                 if results_list:
                     current_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S"); current_version = Config.VERSION; batch_sheet_updates = []
                     results_list.sort(key=lambda x: x['row_num'])
@@ -1351,9 +1330,11 @@ def process_branch_batch(sheet_handler, start_row_index_in_sheet, end_row_index_
                         row_num = res_data['row_num']; result = res_data['result']
                         # debug_print(f"  Z{row_num}: Ergebnis -> B='{result.get('branch')}', C='{result.get('consistency')}', J='{result.get('justification', '')[:50]}...'")
                         row_updates = [
-                            {'range': f'{branch_w_letter}{row_num}', 'values': [[result.get("branch", "Fehler")]]}, {'range': f'{branch_x_letter}{row_num}', 'values': [[result.get("consistency", "Fehler")]]}, {'range': f'{branch_y_letter}{row_num}', 'values': [[result.get("justification", "Fehler")]]},
-                            {'range': f'{ts_col_letter}{row_num}', 'values': [[current_timestamp]]}, {'range': f'{version_col_letter}{row_num}', 'values': [[current_version]]}
-                        ]
+                            {'range': f'{branch_w_letter}{row_num}', 'values': [[result.get("branch", "Fehler")]]},
+                            {'range': f'{branch_x_letter}{row_num}', 'values': [[result.get("consistency", "Fehler")]]},
+                            {'range': f'{branch_y_letter}{row_num}', 'values': [[result.get("justification", "Fehler")]]},
+                            {'range': f'{ts_col_letter}{row_num}', 'values': [[current_timestamp]]},
+                            {'range': f'{version_col_letter}{row_num}', 'values': [[current_version]]} ]
                         batch_sheet_updates.extend(row_updates)
                     if batch_sheet_updates:
                         debug_print(f"  Sende Sheet-Update für {len(results_list)} Zeilen ({len(batch_sheet_updates)} Zellen)...")
@@ -1363,80 +1344,83 @@ def process_branch_batch(sheet_handler, start_row_index_in_sheet, end_row_index_
                 tasks_for_processing_batch = []
                 debug_print(f"--- Verarbeitungs-Batch {batch_start_row}-{batch_end_row} abgeschlossen ---")
                 time.sleep(1) # Kurze Pause nach Batch
-
     debug_print(f"Brancheneinschätzung (Parallel Batch) beendet. {total_processed_count} verarbeitet ({total_error_count} Fehler), {total_skipped_count} übersprungen.")
 
-def run_dispatcher(mode, sheet_handler, row_limit=None): # MODIFIZIERT: Übergibt force_process an branch_batch im combined mode
+# ==================== DISPATCHER ====================
+def run_dispatcher(mode, sheet_handler, row_limit=None): # Nutzt Config.HEADER_ROWS, Übergibt force_process
+    """ Wählt passenden Batch-Prozess und ermittelt Startzeile dynamisch. """
     debug_print(f"Starte Dispatcher Modus '{mode}', Limit={row_limit}.")
     header_rows = Config.HEADER_ROWS
-    start_col_key = "Timestamp letzte Prüfung"; min_start_row = 7
+    start_col_key = "Timestamp letzte Prüfung"; min_start_row = 7 # Standard AO
     if mode == "website": start_col_key = "Website Rohtext"
     elif mode == "wiki": start_col_key = "Wiki Verif. Timestamp"
     elif mode == "branch": start_col_key = "Timestamp letzte Prüfung"
     elif mode == "summarize": start_col_key = "Website Zusammenfassung"
-    elif mode == "combined": start_col_key = "Timestamp letzte Prüfung" # Combined startet basierend auf AO
+    elif mode == "combined": start_col_key = "Timestamp letzte Prüfung"
     debug_print(f"Dispatcher: Ermittle Startzeile ({start_col_key})...")
     start_data_index = sheet_handler.get_start_row_index(check_column_key=start_col_key, min_sheet_row=min_start_row)
     if start_data_index == -1: return debug_print(f"FEHLER: Startspalte '{start_col_key}' prüfen!")
     start_row_index_in_sheet = start_data_index + header_rows + 1
-    total_sheet_rows = len(sheet_handler.sheet_values)
-    if start_data_index >= len(sheet_handler.get_data()): return debug_print("Start nach Ende.")
-    if start_row_index_in_sheet > total_sheet_rows: return debug_print("Ungültige Startzeile.")
+    total_sheet_rows = len(sheet_handler.sheet_values) # Gesamtzahl Zeilen aus Handler nehmen
+    if start_data_index >= len(sheet_handler.get_data()): return debug_print("Startindex liegt hinter der letzten Datenzeile. Keine Verarbeitung.")
+    if start_row_index_in_sheet > total_sheet_rows: return debug_print("Ungültige Startzeile berechnet.")
+    end_row_index_in_sheet = total_sheet_rows # Standard: bis Ende
     if row_limit is not None and row_limit > 0: end_row_index_in_sheet = min(start_row_index_in_sheet + row_limit - 1, total_sheet_rows)
-    elif row_limit == 0: return debug_print("Limit 0.")
-    else: end_row_index_in_sheet = total_sheet_rows
-    debug_print(f"Dispatcher: Verarbeitung geplant {start_row_index_in_sheet}-{end_row_index_in_sheet}.")
-    if start_row_index_in_sheet > end_row_index_in_sheet: return debug_print("Start nach Ende (berechnet).")
+    elif row_limit == 0: return debug_print("Limit 0 -> Keine Verarbeitung.")
+    debug_print(f"Dispatcher: Verarbeitung geplant für Sheet-Zeilen {start_row_index_in_sheet} bis {end_row_index_in_sheet}.")
+    if start_row_index_in_sheet > end_row_index_in_sheet: return debug_print("Startzeile liegt hinter Endzeile. Keine Verarbeitung.")
     try:
         if mode == "wiki": process_verification_only(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet)
         elif mode == "website": process_website_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet)
-        elif mode == "branch": process_branch_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet) # Normal mit TS Check
+        elif mode == "branch": process_branch_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet, force_process=False) # Standard mit TS Check
         elif mode == "summarize": process_website_summarization_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet)
         elif mode == "combined":
             debug_print("--- Combined: Wiki (AX Check) ---"); process_verification_only(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet); time.sleep(1)
             debug_print("--- Combined: Website Scrape (AR Check) ---"); process_website_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet); time.sleep(1)
             debug_print("--- Combined: Website Summarize (AS Check) ---"); process_website_summarization_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet); time.sleep(1)
-            # Führe Branch *ohne* Timestamp-Check aus, da AO im selben Lauf gesetzt werden soll
-            debug_print("--- Combined: Branch (Force Process) ---"); process_branch_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet, force_process=True)
+            debug_print("--- Combined: Branch (Force Process) ---"); process_branch_batch(sheet_handler, start_row_index_in_sheet, end_row_index_in_sheet, force_process=True) # Erzwingen
             debug_print("--- Combined Mode abgeschlossen ---")
-        else: debug_print(f"Ungültiger Modus '{mode}'.")
-    except Exception as e: debug_print(f"FEHLER im Dispatcher: {e}"); import traceback; debug_print(traceback.format_exc())
+        else: debug_print(f"Ungültiger Dispatcher-Modus '{mode}'.")
+    except Exception as e: debug_print(f"FEHLER im Dispatcher: {e}"); debug_print(traceback.format_exc())
+
+# ==================== SERP API / LINKEDIN FUNCTIONS ====================
 
 @retry_on_failure
-def serp_website_lookup(company_name): # unverändert
-    serp_key = Config.API_KEYS.get('serpapi')
-    if not serp_key: debug_print("Fehler: SerpAPI Key fehlt Website Lookup."); return "k.A."
+def serp_website_lookup(company_name): # Unverändert
+    """Ermittelt Website via SERP API (Google Suche)."""
+    serp_key = Config.API_KEYS.get('serpapi'); blacklist = ["bloomberg.com", "northdata.de", "finanzen.net", "handelsblatt.com", "wikipedia.org", "linkedin.com"]
+    if not serp_key: debug_print("Fehler: SerpAPI Key fehlt."); return "k.A."
     if not company_name: return "k.A."
-    blacklist = ["bloomberg.com", "northdata.de", "finanzen.net", "handelsblatt.com", "wikipedia.org", "linkedin.com"]
     query = f'{company_name} offizielle Website'; params = {"engine": "google", "q": query, "api_key": serp_key, "hl": "de", "gl": "de"}
     api_url = "https://serpapi.com/search"
     try:
         response = requests.get(api_url, params=params, timeout=10); response.raise_for_status(); data = response.json()
         if "knowledge_graph" in data and "website" in data["knowledge_graph"]:
              kg_url = data["knowledge_graph"]["website"]
-             if kg_url and not any(bad_domain in kg_url for bad_domain in blacklist):
-                  normalized_url = simple_normalize_url(kg_url)
-                  if normalized_url != "k.A.": debug_print(f"SERP: Website '{normalized_url}' aus KG für '{company_name}'."); return normalized_url
+             if kg_url and not any(bad in kg_url for bad in blacklist):
+                  norm_url = simple_normalize_url(kg_url)
+                  if norm_url != "k.A.": debug_print(f"SERP: Website '{norm_url}' aus KG."); return norm_url
         if "organic_results" in data:
             for result in data["organic_results"]:
                 url = result.get("link", "")
-                if url and not any(bad_domain in url for bad_domain in blacklist) and url.startswith("http"):
-                    normalized_url = simple_normalize_url(url)
-                    if normalized_url != "k.A.":
-                         domain_part = normalized_url.replace('www.', '').split('.')[0]
-                         if domain_part in normalize_company_name(company_name): debug_print(f"SERP: Website '{normalized_url}' aus Organic für '{company_name}'."); return normalized_url
-                         # else: debug_print(f"SERP: URL '{normalized_url}' übersprungen (Domain passt nicht zu '{company_name}').")
+                if url and not any(bad in url for bad in blacklist) and url.startswith("http"):
+                    norm_url = simple_normalize_url(url)
+                    if norm_url != "k.A.":
+                         domain = norm_url.replace('www.', '').split('.')[0]
+                         if domain in normalize_company_name(company_name): debug_print(f"SERP: Website '{norm_url}' aus Organic."); return norm_url
+                         # else: debug_print(f"SERP: URL '{norm_url}' übersprungen (Domain passt nicht).")
         debug_print(f"SERP: Keine passende Website für '{company_name}'."); return "k.A."
     except requests.exceptions.RequestException as e: debug_print(f"Fehler SERP Website Lookup '{company_name}': {e}"); return "k.A."
     except Exception as e: debug_print(f"Allg. Fehler SERP Website Lookup '{company_name}': {e}"); return "k.A."
 
 @retry_on_failure
-def search_linkedin_contacts(company_name, website, position_query, crm_kurzform, num_results=10): # unverändert
+def search_linkedin_contacts(company_name, website, position_query, crm_kurzform, num_results=10): # Unverändert
+    """Sucht LinkedIn Kontakte via SERP API."""
     serp_key = Config.API_KEYS.get('serpapi')
-    if not serp_key: debug_print("Fehler: SerpAPI Key fehlt LinkedIn Suche."); return []
+    if not serp_key: debug_print("Fehler: SerpAPI Key fehlt."); return []
     if not all([company_name, position_query, crm_kurzform]): return []
-    query = f'site:linkedin.com/in "{position_query}" "{crm_kurzform}"'
-    params = {"engine": "google", "q": query, "api_key": serp_key, "hl": "de", "gl": "de", "num": num_results}
+    query = f'site:linkedin.com/in "{position_query}" "{crm_kurzform}"' # Suche Kurzform im Titel
+    params = {"engine": "google", "q": query, "api_key": serp_key, "hl": "de", "gl": "de", "num": num_results }
     api_url = "https://serpapi.com/search"
     try:
         response = requests.get(api_url, params=params, timeout=15); response.raise_for_status(); data = response.json(); contacts = []
@@ -1444,49 +1428,84 @@ def search_linkedin_contacts(company_name, website, position_query, crm_kurzform
             for result in data["organic_results"]:
                 title = result.get("title", ""); linkedin_url = result.get("link", "")
                 if not linkedin_url or "linkedin.com/in/" not in linkedin_url: continue
-                if crm_kurzform.lower() not in title.lower(): debug_print(f"LinkedIn Skip: '{crm_kurzform}' nicht in '{title}'"); continue
+                if crm_kurzform.lower() not in title.lower(): # debug_print(f"LinkedIn Skip: '{crm_kurzform}' nicht in '{title}'");
+                     continue
                 name_part = ""; pos_part = position_query; separators = ["–", "-", "|", " at ", " bei "]; title_cleaned = title.replace("...", "").strip(); found_sep = False
                 for sep in separators:
                     if sep in title_cleaned:
                         parts = title_cleaned.split(sep, 1); name_part = parts[0].strip().replace(" | LinkedIn", "").replace(" - LinkedIn", "").replace(" - Profil", "").strip()
                         potential_pos = parts[1].strip()
-                        if crm_kurzform.lower() in potential_pos.lower(): potential_pos = potential_pos.replace(crm_kurzform, "", 1).strip()
+                        if crm_kurzform.lower() in potential_pos.lower(): potential_pos = potential_pos.replace(crm_kurzform, "", 1).strip() # Case-sensitive replacement?
                         potential_pos = potential_pos.split(" | LinkedIn")[0].split(" - LinkedIn")[0].strip(); pos_part = potential_pos if potential_pos else position_query; found_sep = True; break
                 if not found_sep:
                     name_part = title_cleaned.split(" | LinkedIn")[0].split(" - LinkedIn")[0].strip()
-                    if position_query.lower() in name_part.lower(): name_part = name_part.replace(position_query, "", 1).strip()
+                    if position_query.lower() in name_part.lower(): name_part = name_part.replace(position_query, "", 1).strip() # Case-sensitive replacement?
                 firstname = ""; lastname = ""; name_parts = name_part.split()
                 if len(name_parts) > 1: firstname = name_parts[0]; lastname = " ".join(name_parts[1:])
                 elif len(name_parts) == 1: firstname = name_parts[0]
                 if not firstname: debug_print(f"Kontakt übersprungen: Name nicht extrahiert aus '{title}'"); continue
                 contact_data = {"Firmenname": company_name, "CRM Kurzform": crm_kurzform, "Website": website, "Vorname": firstname, "Nachname": lastname, "Position": pos_part, "LinkedInURL": linkedin_url}
                 contacts.append(contact_data); # debug_print(f"Gefundener LinkedIn Kontakt: {firstname} {lastname} - {pos_part}")
-        debug_print(f"LinkedIn Suche '{position_query}' bei '{crm_kurzform}' -> {len(contacts)} Kontakte."); return contacts
+        debug_print(f"LinkedIn Suche '{position_query}' @ '{crm_kurzform}' -> {len(contacts)} Kontakte."); return contacts
     except requests.exceptions.RequestException as e: debug_print(f"Fehler SERP LinkedIn Suche: {e}"); return []
     except Exception as e: debug_print(f"Allg. Fehler SERP LinkedIn Suche: {e}"); return []
 
-def process_contact_research(sheet_handler): # unverändert
+def process_contact_research(sheet_handler): # Nutzt Config.HEADER_ROWS, COLUMN_MAP durchgängiger
+    """Sucht LinkedIn Kontakte und trägt sie in 'Contacts' Sheet ein."""
     debug_print("Starte Contact Research (LinkedIn)...")
+    if not sheet_handler.load_data(): return # Lade Daten zuerst
     main_sheet = sheet_handler.sheet; all_data = sheet_handler.get_all_data_with_headers(); header_rows = Config.HEADER_ROWS
-    timestamp_col_index = COLUMN_MAP["Contact Search Timestamp"]; start_row_index_in_sheet = -1
+    if not all_data or len(all_data) <= header_rows: return
+
+    # Indizes holen
+    try:
+        ts_col_idx = COLUMN_MAP["Contact Search Timestamp"]
+        name_col_idx = COLUMN_MAP["CRM Name"]
+        kurz_col_idx = COLUMN_MAP["CRM Kurzform"]
+        web_col_idx = COLUMN_MAP["CRM Website"]
+        sl_col_idx = COLUMN_MAP["Linked Serviceleiter gefunden"]
+        it_col_idx = COLUMN_MAP["Linked It-Leiter gefunden"]
+        mg_col_idx = COLUMN_MAP["Linked Management gefunden"]
+        di_col_idx = COLUMN_MAP["Linked Disponent gefunden"]
+    except KeyError as e:
+        debug_print(f"FEHLER: Benötigter Schlüssel '{e}' für Contact Research nicht in COLUMN_MAP."); return
+
+    # Finde Startzeile basierend auf Timestamp in Spalte AM
+    start_row_index_in_sheet = -1
     for i in range(header_rows + 1, len(all_data) + 1):
-         if i < 7: continue
+         if i < 7: continue # Normalerweise ab Zeile 7
          row_index_in_list = i - 1; row = all_data[row_index_in_list]
-         if len(row) <= timestamp_col_index or not row[timestamp_col_index].strip(): start_row_index_in_sheet = i; break
+         if len(row) <= ts_col_idx or not row[ts_col_idx].strip():
+              start_row_index_in_sheet = i; break
     if start_row_index_in_sheet == -1: debug_print("Keine Zeile ohne Contact Search TS (AM, ab Z7). Skip."); return
     debug_print(f"Contact Research startet ab Zeile {start_row_index_in_sheet}.")
+
+    # Kontakte-Blatt öffnen oder erstellen
     try: contacts_sheet = sheet_handler.sheet.spreadsheet.worksheet("Contacts"); debug_print("Blatt 'Contacts' gefunden.")
     except gspread.exceptions.WorksheetNotFound:
         debug_print("Blatt 'Contacts' nicht gefunden, erstelle..."); contacts_sheet = sheet_handler.sheet.spreadsheet.add_worksheet(title="Contacts", rows="1000", cols="12")
         header = ["Firmenname", "CRM Kurzform", "Website", "Geschlecht", "Vorname", "Nachname", "Position", "Suchbegriffskategorie", "E-Mail-Adresse", "LinkedIn-Link", "Timestamp"]
         contacts_sheet.update(values=[header], range_name="A1:K1"); debug_print("Neues Blatt 'Contacts' erstellt.")
+
     positions_to_search = ["Serviceleiter", "Leiter Kundendienst", "IT-Leiter", "Leiter IT", "Geschäftsführer", "Vorstand", "Disponent", "Einsatzleiter"]
-    for i in range(start_row_index_in_sheet, len(all_data) + 1):
-        row_index_in_list = i - 1; row = all_data[row_index_in_list]
-        company_name = row[COLUMN_MAP["CRM Name"]] if len(row) > COLUMN_MAP["CRM Name"] else ""; crm_kurzform = row[COLUMN_MAP["CRM Kurzform"]] if len(row) > COLUMN_MAP["CRM Kurzform"] else ""; website = row[COLUMN_MAP["CRM Website"]] if len(row) > COLUMN_MAP["CRM Website"] else ""
-        if not all([company_name, crm_kurzform, website]): debug_print(f"Zeile {i}: Übersprungen (fehlende CRM Daten)."); continue
-        debug_print(f"Zeile {i}: Suche Kontakte für '{crm_kurzform}'...")
+
+    # Nutze get_data für Iteration
+    data_to_process = sheet_handler.get_data()
+    start_data_index = start_row_index_in_sheet - header_rows - 1
+
+    for idx in range(start_data_index, len(data_to_process)):
+        row_num_in_sheet = idx + header_rows + 1
+        row = data_to_process[idx]
+
+        company_name = row[name_col_idx] if len(row) > name_col_idx else ""
+        crm_kurzform = row[kurz_col_idx] if len(row) > kurz_col_idx else ""
+        website = row[web_col_idx] if len(row) > web_col_idx else ""
+
+        if not all([company_name, crm_kurzform, website]): debug_print(f"Zeile {row_num_in_sheet}: Übersprungen (fehlende CRM Daten)."); continue
+
+        debug_print(f"Zeile {row_num_in_sheet}: Suche Kontakte für '{crm_kurzform}'...")
         all_found_contacts = []; contact_counts = {pos: 0 for pos in ["Serviceleiter", "IT-Leiter", "Geschäftsführer", "Disponent"]}
+
         for position in positions_to_search:
             found_contacts = search_linkedin_contacts(company_name, website, position, crm_kurzform, num_results=5)
             cat = "Serviceleiter" if any(k in position.lower() for k in ["serviceleiter", "kundendienst", "einsatzleiter"]) else \
@@ -1495,26 +1514,41 @@ def process_contact_research(sheet_handler): # unverändert
                   "Disponent" if "disponent" in position.lower() else None
             if cat: contact_counts[cat] += len(found_contacts)
             for contact in found_contacts: contact["Suchbegriffskategorie"] = position; all_found_contacts.append(contact)
-            time.sleep(1.5)
-        rows_to_append = []; timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S"); unique_contacts = {c['LinkedInURL']: c for c in all_found_contacts}.values()
+            time.sleep(1.5) # Pause zwischen Suchen
+
+        rows_to_append = []; timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        unique_contacts = {c['LinkedInURL']: c for c in all_found_contacts}.values()
+
         for contact in unique_contacts:
             firstname = contact.get("Vorname", ""); lastname = contact.get("Nachname", ""); gender_value = get_gender(firstname); email = get_email_address(firstname, lastname, website)
             contact_row = [contact.get("Firmenname", ""), contact.get("CRM Kurzform", ""), contact.get("Website", ""), gender_value, firstname, lastname, contact.get("Position", ""), contact.get("Suchbegriffskategorie", ""), email, contact.get("LinkedInURL", ""), timestamp]
             rows_to_append.append(contact_row)
+
         if rows_to_append:
-            try: contacts_sheet.append_rows(rows_to_append, value_input_option='USER_ENTERED'); debug_print(f"Zeile {i}: {len(rows_to_append)} Kontakte zu 'Contacts' hinzugefügt.")
-            except Exception as e: debug_print(f"Zeile {i}: Fehler Schreiben Contacts-Sheet: {e}")
-        main_sheet_updates = []
-        main_sheet_updates.append({'range': f'AI{i}', 'values': [[str(contact_counts["Serviceleiter"])]]}); main_sheet_updates.append({'range': f'AJ{i}', 'values': [[str(contact_counts["IT-Leiter"])]]})
-        main_sheet_updates.append({'range': f'AK{i}', 'values': [[str(contact_counts["Geschäftsführer"])]]}); main_sheet_updates.append({'range': f'AL{i}', 'values': [[str(contact_counts["Disponent"])]]})
-        main_sheet_updates.append({'range': f'AM{i}', 'values': [[timestamp]]})
-        sheet_handler.batch_update_cells(main_sheet_updates); debug_print(f"Zeile {i}: Kontaktzahlen Hauptblatt aktualisiert: {contact_counts} – TS in AM.")
-        time.sleep(Config.RETRY_DELAY)
+            try: contacts_sheet.append_rows(rows_to_append, value_input_option='USER_ENTERED'); debug_print(f"Zeile {row_num_in_sheet}: {len(rows_to_append)} Kontakte zu 'Contacts' hinzugefügt.")
+            except Exception as e: debug_print(f"Zeile {row_num_in_sheet}: Fehler Schreiben Contacts-Sheet: {e}")
+
+        # Batch Update für Hauptblatt (Zähler + Timestamp)
+        # Verwende _get_col_letter für mehr Robustheit
+        sl_l = sheet_handler._get_col_letter(sl_col_idx + 1); it_l = sheet_handler._get_col_letter(it_col_idx + 1)
+        mg_l = sheet_handler._get_col_letter(mg_col_idx + 1); di_l = sheet_handler._get_col_letter(di_col_idx + 1)
+        ts_l = sheet_handler._get_col_letter(ts_col_idx + 1)
+        main_sheet_updates = [
+            {'range': f'{sl_l}{row_num_in_sheet}', 'values': [[str(contact_counts["Serviceleiter"])]]},
+            {'range': f'{it_l}{row_num_in_sheet}', 'values': [[str(contact_counts["IT-Leiter"])]]},
+            {'range': f'{mg_l}{row_num_in_sheet}', 'values': [[str(contact_counts["Geschäftsführer"])]]},
+            {'range': f'{di_l}{row_num_in_sheet}', 'values': [[str(contact_counts["Disponent"])]]},
+            {'range': f'{ts_l}{row_num_in_sheet}', 'values': [[timestamp]]} # Contact Search Timestamp
+        ]
+        sheet_handler.batch_update_cells(main_sheet_updates)
+        debug_print(f"Zeile {row_num_in_sheet}: Kontaktzahlen Hauptblatt aktualisiert: {contact_counts} – TS in {ts_l}.")
+        time.sleep(Config.RETRY_DELAY) # Pause nach Firma
+
     debug_print("Contact Research abgeschlossen.")
 
 # ==================== ALIGNMENT DEMO (Hauptblatt) ====================
-def alignment_demo(sheet):
-    """Schreibt die Header-Struktur (Zeilen 1-5, jetzt bis Spalte AX) ins angegebene Sheet."""
+def alignment_demo(sheet): # Korrigierte Version aus v1.6.5
+    """Schreibt die Header-Struktur (Zeilen 1-5, bis Spalte AX) ins angegebene Sheet."""
     new_headers = [ # Spalten A bis AX
         ["ReEval Flag", "CRM Name", "CRM Kurzform", "CRM Website", "CRM Ort", "CRM Beschreibung", "CRM Branche", "CRM Beschreibung Branche extern", "CRM Anzahl Techniker", "CRM Umsatz", "CRM Anzahl Mitarbeiter", "CRM Vorschlag Wiki URL", "Wiki URL", "Wiki Absatz", "Wiki Branche", "Wiki Umsatz", "Wiki Mitarbeiter", "Wiki Kategorien", "Chat Wiki Konsistenzprüfung", "Chat Begründung Wiki Inkonsistenz", "Chat Vorschlag Wiki Artikel", "Begründung bei Abweichung", "Chat Vorschlag Branche", "Chat Konsistenz Branche", "Chat Begründung Abweichung Branche", "Chat Prüfung FSM Relevanz", "Chat Begründung für FSM Relevanz", "Chat Schätzung Anzahl Mitarbeiter", "Chat Konsistenzprüfung Mitarbeiterzahl", "Chat Begründung Abweichung Mitarbeiterzahl", "Chat Einschätzung Anzahl Servicetechniker", "Chat Begründung Abweichung Anzahl Servicetechniker", "Chat Schätzung Umsatz", "Chat Begründung Abweichung Umsatz", "Linked Serviceleiter gefunden", "Linked It-Leiter gefunden", "Linked Management gefunden", "Linked Disponent gefunden", "Contact Search Timestamp", "Wikipedia Timestamp", "Timestamp letzte Prüfung", "Version", "Tokens", "Website Rohtext", "Website Zusammenfassung", "Website Scrape Timestamp", "Geschätzter Techniker Bucket", "Finaler Umsatz (Wiki>CRM)", "Finaler Mitarbeiter (Wiki>CRM)", "Wiki Verif. Timestamp"],
         ["CRM", "CRM", "CRM", "CRM", "CRM", "CRM", "CRM", "CRM", "CRM", "CRM", "CRM", "CRM", "Wikipediascraper", "Wikipediascraper", "Wikipediascraper", "Wikipediascraper", "Wikipediascraper", "Wikipediascraper", "Chat GPT API", "Chat GPT API", "Chat GPT API", "Chat GPT API", "Chat GPT API", "Chat GPT API", "Chat GPT API", "Chat GPT API", "Chat GPT API", "Chat GPT API", "Chat GPT API", "Chat GPT API", "Chat GPT API", "Chat GPT API", "Chat GPT API", "Chat GPT API", "LinkedIn (via SerpApi)", "LinkedIn (via SerpApi)", "LinkedIn (via SerpApi)", "LinkedIn (via SerpApi)", "System", "System", "System", "System", "System", "Web Scraper", "Chat GPT API", "System", "ML Modell / Skript", "Skript (Wiki/CRM)", "Skript (Wiki/CRM)", "System"],
@@ -1523,16 +1557,12 @@ def alignment_demo(sheet):
         ["Datenquelle", "Datenquelle", "Datenquelle", "Datenquelle", "Datenquelle", "Datenquelle", "Datenquelle", "Datenquelle", "Datenquelle", "Datenquelle", "Datenquelle", "Datenquelle", "Wird durch Wikipedia Scraper bereitgestellt", "Wird zunächst nicht verwendet...", "Wird u.a. zur finalen Ermittlung...", "Wird u.a. mit CRM-Umsatz...", "Wird u.a. mit CRM-Anzahl...", "Wenn Website-Daten fehlen...", "\"Es soll durch ChatGPT geprüft werden...", "\"Liegt eine Inkonsistenz...", "\"Sollte durch die Wikipedia-Suche...", "XXX derzeit nicht verwendet...", "\"ChatGPT soll anhand der vorliegenden...", "Die in Spalte CRM festgelegte...", "Weicht die von ChatGPT ermittelte...", "ChatGPT soll anhand der vorliegenden Daten prüfen...", "Die in 'Chat Begründung für FSM Relevanz'...", "Nur wenn kein Wikipedia-Eintrag...", "Entspricht die durch ChatGPT ermittelte...", "Weicht die von ChatGPT geschätzte...", "ChatGPT soll auf Basis öffentlich...", "Weicht die von ChatGPT geschätzte...", "Nur wenn kein Wikipedia-Eintrag...", "ChatGPT soll signifikante Umsatzabweichungen...", "Über SerpAPI wird zusammen...", "Über SerpAPI wird zusammen...", "Über SerpAPI wird zusammen...", "Über SerpAPI wird zusammen...", "Wenn die Kontaktsuche gestartet wird...", "Wenn die Wikipedia-Suche gestartet wird...", "Wenn die ChatGPT-Bewertung gestartet wird...", "Wird durch das System befüllt", "Wird durch tiktoken berechnet", "Wird durch Web Scraper...", "Wird durch ChatGPT API...", "Timestamp wird gesetzt, wenn Website Rohtext/Zusammenfassung geschrieben werden.", "Ergebnis der Schätzung durch das trainierte ML-Modell.", "Vom Skript berechneter Wert, priorisiert Wiki > CRM...", "Vom Skript berechneter Wert, priorisiert Wiki > CRM...", "Timestamp wird gesetzt, wenn Wiki-Verifikation (S-Y) durchgeführt wurde."]
     ]
     num_cols = len(new_headers[0])
-
-    # --- KORRIGIERTE Innere Funktion ---
-    def colnum_string(n):
+    def colnum_string(n): # Korrigierte innere Funktion
         string = ""
         while n > 0:
             n, remainder = divmod(n - 1, 26)
             string = chr(65 + remainder) + string
         return string
-    # --- ENDE KORRIGIERTE Innere Funktion ---
-
     end_col_letter = colnum_string(num_cols)
     header_range = f"A1:{end_col_letter}{len(new_headers)}"
     try:
@@ -1543,7 +1573,6 @@ def alignment_demo(sheet):
         print(f"FEHLER beim Schreiben der Alignment-Demo Header: {e}")
         debug_print(f"FEHLER beim Schreiben der Alignment-Demo Header: {e}")
 
-# --- DataProcessor Klasse (Rest der Implementierung) ---
 class DataProcessor:
     """
     Verarbeitet Daten aus dem Google Sheet, führt verschiedene Anreicherungs-