revoce from gemini
This commit is contained in:
145
helpers.py
145
helpers.py
@@ -1,5 +1,3 @@
|
||||
# --- START OF FILE helpers.py (Part 1/10) ---
|
||||
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
helpers.py
|
||||
@@ -8,9 +6,6 @@ Sammlung von globalen, wiederverwendbaren Hilfsfunktionen für das Projekt
|
||||
"Automatisierte Unternehmensbewertung". Enthält Decorators, Text-Normalisierung,
|
||||
API-Wrapper und andere Dienstprogramme.
|
||||
"""
|
||||
|
||||
ALLOWED_TARGET_BRANCHES = []
|
||||
|
||||
# ==============================================================================
|
||||
# 1. IMPORTS
|
||||
# ==============================================================================
|
||||
@@ -35,33 +30,9 @@ import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import pandas as pd
|
||||
import openai
|
||||
from openai.error import AuthenticationError, OpenAIError, RateLimitError, APIError, Timeout, InvalidRequestError, ServiceUnavailableError
|
||||
from config import (Config, BRANCH_MAPPING_FILE, URL_CHECK_MARKER, USER_AGENTS, LOG_DIR)
|
||||
|
||||
# ==============================================================================
|
||||
# UNIVERSAL OPENAI v0.x / v1.x IMPORTS
|
||||
# ==============================================================================
|
||||
# This block makes the code compatible with both old (v0.x) and new (v1.x)
|
||||
# versions of the OpenAI library.
|
||||
|
||||
try:
|
||||
# Attempt to import from the new (v1.x) structure
|
||||
from openai import APIError, RateLimitError, APIConnectionError, BadRequestError, AuthenticationError, Timeout
|
||||
IS_OPENAI_V1 = True
|
||||
logging.info("OpenAI library v1.x or higher detected.")
|
||||
except ImportError:
|
||||
# Fallback to the old (v0.x) structure
|
||||
from openai.error import (
|
||||
APIError,
|
||||
RateLimitError,
|
||||
APIConnectionError,
|
||||
InvalidRequestError as BadRequestError, # Alias für Kompatibilität
|
||||
AuthenticationError,
|
||||
Timeout,
|
||||
OpenAIError
|
||||
)
|
||||
IS_OPENAI_V1 = False
|
||||
logging.info("Legacy OpenAI library v0.x detected.")
|
||||
|
||||
# Optionale Bibliotheken
|
||||
try:
|
||||
import tiktoken
|
||||
@@ -71,7 +42,6 @@ except ImportError:
|
||||
|
||||
try:
|
||||
import gender_guesser.detector as gender
|
||||
# Initialisieren Sie den Detector einmal global
|
||||
gender_detector = gender.Detector()
|
||||
logging.info("gender_guesser.Detector initialisiert.")
|
||||
except ImportError:
|
||||
@@ -83,121 +53,60 @@ except Exception as e:
|
||||
gender_detector = None
|
||||
logging.warning(f"Fehler bei Initialisierung von gender_guesser: {e}. Geschlechtserkennung deaktiviert.")
|
||||
|
||||
# Import der Config-Klasse und Konstanten
|
||||
from config import Config, BRANCH_MAPPING_FILE, URL_CHECK_MARKER, USER_AGENTS
|
||||
|
||||
# ==============================================================================
|
||||
# 2. RETRY DECORATOR
|
||||
# ==============================================================================
|
||||
decorator_logger = logging.getLogger(__name__ + ".Retry")
|
||||
|
||||
def setup_logging(log_level=logging.INFO):
|
||||
"""
|
||||
Konfiguriert das zentrale Logging für alle Skripte.
|
||||
Verwendet einen StreamHandler und verhindert das Hinzufügen doppelter Handler.
|
||||
"""
|
||||
# Überprüfen, ob bereits Handler für den Root-Logger konfiguriert sind
|
||||
if not logging.getLogger().handlers:
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(log_level)
|
||||
log_format = '%(asctime)s - %(levelname)-8s - %(name)-25s - %(message)s'
|
||||
formatter = logging.Formatter(log_format)
|
||||
|
||||
# Stream Handler für die Konsolenausgabe
|
||||
stream_handler = logging.StreamHandler()
|
||||
stream_handler.setLevel(log_level)
|
||||
stream_handler.setFormatter(formatter)
|
||||
logger.addHandler(stream_handler)
|
||||
|
||||
# Wichtig: force=True in basicConfig kann zu Problemen führen, wenn es
|
||||
# mehrfach aufgerufen wird. Dieser Ansatz ist stabiler.
|
||||
logging.info("Zentrales Logging erfolgreich konfiguriert.")
|
||||
|
||||
def retry_on_failure(func):
|
||||
"""
|
||||
Decorator, der eine Funktion bei bestimmten Fehlern mehrmals wiederholt.
|
||||
Implementiert exponentiellen Backoff mit Jitter.
|
||||
Ist kompatibel mit openai v0.x und v1.x.
|
||||
"""
|
||||
def wrapper(*args, **kwargs):
|
||||
func_name = func.__name__
|
||||
self_arg = args[0] if args and hasattr(args[0], func_name) and isinstance(args[0], object) else None
|
||||
effective_func_name = f"{self_arg.__class__.__name__}.{func_name}" if self_arg else func_name
|
||||
|
||||
max_retries_config = getattr(Config, 'MAX_RETRIES', 3)
|
||||
base_delay = getattr(Config, 'RETRY_DELAY', 5)
|
||||
|
||||
if max_retries_config <= 0:
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except Exception as e:
|
||||
decorator_logger.error(f"FEHLER bei '{effective_func_name}' (keine Retries konfiguriert). {type(e).__name__} - {str(e)[:150]}...")
|
||||
# Wir fangen hier jetzt alle Fehler, da die spezifischen unten sind.
|
||||
decorator_logger.exception("Details zum Fehler:")
|
||||
raise e
|
||||
|
||||
for attempt in range(max_retries_config):
|
||||
try:
|
||||
if attempt > 0:
|
||||
decorator_logger.warning(f"Wiederhole Versuch {attempt + 1}/{max_retries_config} fuer '{effective_func_name}'...")
|
||||
return func(*args, **kwargs)
|
||||
|
||||
# Fehler, die NICHT wiederholt werden sollen
|
||||
except (gspread.exceptions.SpreadsheetNotFound, AuthenticationError, ValueError, BadRequestError) as e:
|
||||
decorator_logger.critical(f"❌ ENDGUELTIGER FEHLER bei '{effective_func_name}': Permanentes Problem erkannt. {type(e).__name__} - {str(e)[:150]}...")
|
||||
decorator_logger.exception("Details:")
|
||||
raise e
|
||||
|
||||
# HTTP-Fehler, die NICHT wiederholt werden sollen
|
||||
except (gspread.exceptions.SpreadsheetNotFound, AuthenticationError, ValueError, InvalidRequestError) as e:
|
||||
decorator_logger.critical(f"❌ ENDGUELTIGER FEHLER bei '{effective_func_name}': Permanentes Problem erkannt. {type(e).__name__} - {str(e)[:150]}...")
|
||||
decorator_logger.exception("Details:")
|
||||
raise e
|
||||
except requests.exceptions.HTTPError as e:
|
||||
if hasattr(e, 'response') and e.response is not None:
|
||||
status_code = e.response.status_code
|
||||
non_retryable_status_codes = [404, 400, 401, 403]
|
||||
if status_code in non_retryable_status_codes:
|
||||
decorator_logger.critical(f"❌ ENDGUELTIGER FEHLER bei '{effective_func_name}': HTTP Fehler {status_code} erhalten ({e.response.reason}). Nicht wiederholbar. {str(e)[:100]}...")
|
||||
decorator_logger.exception("Details:")
|
||||
raise e
|
||||
# Wenn der HTTP-Fehler wiederholbar ist (z.B. 500), wird er unten gefangen
|
||||
pass
|
||||
|
||||
# Fehler, die wiederholt werden sollen (inkl. OpenAI-Fehler)
|
||||
except (requests.exceptions.RequestException, gspread.exceptions.APIError, APIError, wikipedia.exceptions.WikipediaException) as e:
|
||||
error_msg = str(e)
|
||||
error_type = type(e).__name__
|
||||
|
||||
if attempt < max_retries_config - 1:
|
||||
wait_time = base_delay * (2 ** attempt) + random.uniform(0, 1)
|
||||
|
||||
# Spezifisches Logging für OpenAI-Fehler
|
||||
if isinstance(e, RateLimitError):
|
||||
decorator_logger.warning(f"🚦 OPENAI RATE LIMIT ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
||||
elif isinstance(e, Timeout):
|
||||
decorator_logger.warning(f"⏰ TIMEOUT ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
||||
elif isinstance(e, APIError): # Fängt alle anderen wiederholbaren OpenAI-Fehler
|
||||
decorator_logger.warning(f"🤖 OPENAI API FEHLER ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
||||
|
||||
# Spezifisches Logging für andere Bibliotheken
|
||||
elif isinstance(e, gspread.exceptions.APIError) and hasattr(e, 'response') and e.response is not None and e.response.status_code == 429:
|
||||
decorator_logger.warning(f"🚦 GSPREAD RATE LIMIT ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
||||
elif isinstance(e, requests.exceptions.RequestException):
|
||||
decorator_logger.warning(f"🌐 NETZWERKFEHLER ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
||||
elif isinstance(e, wikipedia.exceptions.WikipediaException):
|
||||
decorator_logger.warning(f"📚 WIKIPEDIA FEHLER ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
||||
else:
|
||||
decorator_logger.warning(f"♻️ WIEDERHOLBARER FEHLER ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
||||
|
||||
time.sleep(wait_time)
|
||||
else:
|
||||
decorator_logger.error(f"❌ ENDGUELTIGER FEHLER bei '{effective_func_name}' nach {max_retries_config} Versuchen.")
|
||||
raise e
|
||||
|
||||
if hasattr(e, 'response') and e.response is not None:
|
||||
status_code = e.response.status_code
|
||||
non_retryable_status_codes = [404, 400, 401, 403]
|
||||
if status_code in non_retryable_status_codes:
|
||||
decorator_logger.critical(f"❌ ENDGUELTIGER FEHLER bei '{effective_func_name}': HTTP Fehler {status_code} erhalten ({e.response.reason}). Nicht wiederholbar. {str(e)[:100]}...")
|
||||
decorator_logger.exception("Details:")
|
||||
raise e
|
||||
except (requests.exceptions.RequestException, gspread.exceptions.APIError, OpenAIError, wikipedia.exceptions.WikipediaException) as e:
|
||||
error_msg = str(e)
|
||||
error_type = type(e).__name__
|
||||
if attempt < max_retries_config - 1:
|
||||
wait_time = base_delay * (2 ** attempt) + random.uniform(0, 1)
|
||||
if isinstance(e, RateLimitError):
|
||||
decorator_logger.warning(f"🚦 RATE LIMIT ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
||||
elif isinstance(e, (Timeout, ServiceUnavailableError)):
|
||||
decorator_logger.warning(f"⏰ TIMEOUT/UNAVAILABLE ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
||||
else:
|
||||
decorator_logger.warning(f"♻️ WIEDERHOLBARER FEHLER ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
||||
time.sleep(wait_time)
|
||||
else:
|
||||
decorator_logger.error(f"❌ ENDGUELTIGER FEHLER bei '{effective_func_name}' nach {max_retries_config} Versuchen.")
|
||||
raise e
|
||||
except Exception as e:
|
||||
decorator_logger.critical(f"💥 UNERWARTETER FEHLER ({type(e).__name__}) bei '{effective_func_name}'. KEIN RETRY VERSUCHT.")
|
||||
decorator_logger.exception("Details zum unerwarteten Fehler:")
|
||||
raise e
|
||||
|
||||
raise RuntimeError(f"Retry decorator logic error: Loop completed unexpectedly for {effective_func_name}. This should not happen.")
|
||||
|
||||
return wrapper
|
||||
|
||||
# ==============================================================================
|
||||
|
||||
Reference in New Issue
Block a user