revoce from gemini
This commit is contained in:
103
helpers.py
103
helpers.py
@@ -1,5 +1,3 @@
|
|||||||
# --- START OF FILE helpers.py (Part 1/10) ---
|
|
||||||
|
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""
|
"""
|
||||||
helpers.py
|
helpers.py
|
||||||
@@ -8,9 +6,6 @@ Sammlung von globalen, wiederverwendbaren Hilfsfunktionen für das Projekt
|
|||||||
"Automatisierte Unternehmensbewertung". Enthält Decorators, Text-Normalisierung,
|
"Automatisierte Unternehmensbewertung". Enthält Decorators, Text-Normalisierung,
|
||||||
API-Wrapper und andere Dienstprogramme.
|
API-Wrapper und andere Dienstprogramme.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
ALLOWED_TARGET_BRANCHES = []
|
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# 1. IMPORTS
|
# 1. IMPORTS
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
@@ -35,33 +30,9 @@ import requests
|
|||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import openai
|
import openai
|
||||||
|
from openai.error import AuthenticationError, OpenAIError, RateLimitError, APIError, Timeout, InvalidRequestError, ServiceUnavailableError
|
||||||
from config import (Config, BRANCH_MAPPING_FILE, URL_CHECK_MARKER, USER_AGENTS, LOG_DIR)
|
from config import (Config, BRANCH_MAPPING_FILE, URL_CHECK_MARKER, USER_AGENTS, LOG_DIR)
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# UNIVERSAL OPENAI v0.x / v1.x IMPORTS
|
|
||||||
# ==============================================================================
|
|
||||||
# This block makes the code compatible with both old (v0.x) and new (v1.x)
|
|
||||||
# versions of the OpenAI library.
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Attempt to import from the new (v1.x) structure
|
|
||||||
from openai import APIError, RateLimitError, APIConnectionError, BadRequestError, AuthenticationError, Timeout
|
|
||||||
IS_OPENAI_V1 = True
|
|
||||||
logging.info("OpenAI library v1.x or higher detected.")
|
|
||||||
except ImportError:
|
|
||||||
# Fallback to the old (v0.x) structure
|
|
||||||
from openai.error import (
|
|
||||||
APIError,
|
|
||||||
RateLimitError,
|
|
||||||
APIConnectionError,
|
|
||||||
InvalidRequestError as BadRequestError, # Alias für Kompatibilität
|
|
||||||
AuthenticationError,
|
|
||||||
Timeout,
|
|
||||||
OpenAIError
|
|
||||||
)
|
|
||||||
IS_OPENAI_V1 = False
|
|
||||||
logging.info("Legacy OpenAI library v0.x detected.")
|
|
||||||
|
|
||||||
# Optionale Bibliotheken
|
# Optionale Bibliotheken
|
||||||
try:
|
try:
|
||||||
import tiktoken
|
import tiktoken
|
||||||
@@ -71,7 +42,6 @@ except ImportError:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
import gender_guesser.detector as gender
|
import gender_guesser.detector as gender
|
||||||
# Initialisieren Sie den Detector einmal global
|
|
||||||
gender_detector = gender.Detector()
|
gender_detector = gender.Detector()
|
||||||
logging.info("gender_guesser.Detector initialisiert.")
|
logging.info("gender_guesser.Detector initialisiert.")
|
||||||
except ImportError:
|
except ImportError:
|
||||||
@@ -83,72 +53,32 @@ except Exception as e:
|
|||||||
gender_detector = None
|
gender_detector = None
|
||||||
logging.warning(f"Fehler bei Initialisierung von gender_guesser: {e}. Geschlechtserkennung deaktiviert.")
|
logging.warning(f"Fehler bei Initialisierung von gender_guesser: {e}. Geschlechtserkennung deaktiviert.")
|
||||||
|
|
||||||
# Import der Config-Klasse und Konstanten
|
|
||||||
from config import Config, BRANCH_MAPPING_FILE, URL_CHECK_MARKER, USER_AGENTS
|
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# 2. RETRY DECORATOR
|
# 2. RETRY DECORATOR
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
decorator_logger = logging.getLogger(__name__ + ".Retry")
|
decorator_logger = logging.getLogger(__name__ + ".Retry")
|
||||||
|
|
||||||
def setup_logging(log_level=logging.INFO):
|
|
||||||
"""
|
|
||||||
Konfiguriert das zentrale Logging für alle Skripte.
|
|
||||||
Verwendet einen StreamHandler und verhindert das Hinzufügen doppelter Handler.
|
|
||||||
"""
|
|
||||||
# Überprüfen, ob bereits Handler für den Root-Logger konfiguriert sind
|
|
||||||
if not logging.getLogger().handlers:
|
|
||||||
logger = logging.getLogger()
|
|
||||||
logger.setLevel(log_level)
|
|
||||||
log_format = '%(asctime)s - %(levelname)-8s - %(name)-25s - %(message)s'
|
|
||||||
formatter = logging.Formatter(log_format)
|
|
||||||
|
|
||||||
# Stream Handler für die Konsolenausgabe
|
|
||||||
stream_handler = logging.StreamHandler()
|
|
||||||
stream_handler.setLevel(log_level)
|
|
||||||
stream_handler.setFormatter(formatter)
|
|
||||||
logger.addHandler(stream_handler)
|
|
||||||
|
|
||||||
# Wichtig: force=True in basicConfig kann zu Problemen führen, wenn es
|
|
||||||
# mehrfach aufgerufen wird. Dieser Ansatz ist stabiler.
|
|
||||||
logging.info("Zentrales Logging erfolgreich konfiguriert.")
|
|
||||||
|
|
||||||
def retry_on_failure(func):
|
def retry_on_failure(func):
|
||||||
"""
|
"""
|
||||||
Decorator, der eine Funktion bei bestimmten Fehlern mehrmals wiederholt.
|
Decorator, der eine Funktion bei bestimmten Fehlern mehrmals wiederholt.
|
||||||
Implementiert exponentiellen Backoff mit Jitter.
|
Implementiert exponentiellen Backoff mit Jitter.
|
||||||
Ist kompatibel mit openai v0.x und v1.x.
|
|
||||||
"""
|
"""
|
||||||
def wrapper(*args, **kwargs):
|
def wrapper(*args, **kwargs):
|
||||||
func_name = func.__name__
|
func_name = func.__name__
|
||||||
self_arg = args[0] if args and hasattr(args[0], func_name) and isinstance(args[0], object) else None
|
self_arg = args[0] if args and hasattr(args[0], func_name) and isinstance(args[0], object) else None
|
||||||
effective_func_name = f"{self_arg.__class__.__name__}.{func_name}" if self_arg else func_name
|
effective_func_name = f"{self_arg.__class__.__name__}.{func_name}" if self_arg else func_name
|
||||||
|
|
||||||
max_retries_config = getattr(Config, 'MAX_RETRIES', 3)
|
max_retries_config = getattr(Config, 'MAX_RETRIES', 3)
|
||||||
base_delay = getattr(Config, 'RETRY_DELAY', 5)
|
base_delay = getattr(Config, 'RETRY_DELAY', 5)
|
||||||
|
|
||||||
if max_retries_config <= 0:
|
|
||||||
try:
|
|
||||||
return func(*args, **kwargs)
|
|
||||||
except Exception as e:
|
|
||||||
decorator_logger.error(f"FEHLER bei '{effective_func_name}' (keine Retries konfiguriert). {type(e).__name__} - {str(e)[:150]}...")
|
|
||||||
# Wir fangen hier jetzt alle Fehler, da die spezifischen unten sind.
|
|
||||||
decorator_logger.exception("Details zum Fehler:")
|
|
||||||
raise e
|
|
||||||
|
|
||||||
for attempt in range(max_retries_config):
|
for attempt in range(max_retries_config):
|
||||||
try:
|
try:
|
||||||
if attempt > 0:
|
if attempt > 0:
|
||||||
decorator_logger.warning(f"Wiederhole Versuch {attempt + 1}/{max_retries_config} fuer '{effective_func_name}'...")
|
decorator_logger.warning(f"Wiederhole Versuch {attempt + 1}/{max_retries_config} fuer '{effective_func_name}'...")
|
||||||
return func(*args, **kwargs)
|
return func(*args, **kwargs)
|
||||||
|
except (gspread.exceptions.SpreadsheetNotFound, AuthenticationError, ValueError, InvalidRequestError) as e:
|
||||||
# Fehler, die NICHT wiederholt werden sollen
|
|
||||||
except (gspread.exceptions.SpreadsheetNotFound, AuthenticationError, ValueError, BadRequestError) as e:
|
|
||||||
decorator_logger.critical(f"❌ ENDGUELTIGER FEHLER bei '{effective_func_name}': Permanentes Problem erkannt. {type(e).__name__} - {str(e)[:150]}...")
|
decorator_logger.critical(f"❌ ENDGUELTIGER FEHLER bei '{effective_func_name}': Permanentes Problem erkannt. {type(e).__name__} - {str(e)[:150]}...")
|
||||||
decorator_logger.exception("Details:")
|
decorator_logger.exception("Details:")
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
# HTTP-Fehler, die NICHT wiederholt werden sollen
|
|
||||||
except requests.exceptions.HTTPError as e:
|
except requests.exceptions.HTTPError as e:
|
||||||
if hasattr(e, 'response') and e.response is not None:
|
if hasattr(e, 'response') and e.response is not None:
|
||||||
status_code = e.response.status_code
|
status_code = e.response.status_code
|
||||||
@@ -157,47 +87,26 @@ def retry_on_failure(func):
|
|||||||
decorator_logger.critical(f"❌ ENDGUELTIGER FEHLER bei '{effective_func_name}': HTTP Fehler {status_code} erhalten ({e.response.reason}). Nicht wiederholbar. {str(e)[:100]}...")
|
decorator_logger.critical(f"❌ ENDGUELTIGER FEHLER bei '{effective_func_name}': HTTP Fehler {status_code} erhalten ({e.response.reason}). Nicht wiederholbar. {str(e)[:100]}...")
|
||||||
decorator_logger.exception("Details:")
|
decorator_logger.exception("Details:")
|
||||||
raise e
|
raise e
|
||||||
# Wenn der HTTP-Fehler wiederholbar ist (z.B. 500), wird er unten gefangen
|
except (requests.exceptions.RequestException, gspread.exceptions.APIError, OpenAIError, wikipedia.exceptions.WikipediaException) as e:
|
||||||
pass
|
|
||||||
|
|
||||||
# Fehler, die wiederholt werden sollen (inkl. OpenAI-Fehler)
|
|
||||||
except (requests.exceptions.RequestException, gspread.exceptions.APIError, APIError, wikipedia.exceptions.WikipediaException) as e:
|
|
||||||
error_msg = str(e)
|
error_msg = str(e)
|
||||||
error_type = type(e).__name__
|
error_type = type(e).__name__
|
||||||
|
|
||||||
if attempt < max_retries_config - 1:
|
if attempt < max_retries_config - 1:
|
||||||
wait_time = base_delay * (2 ** attempt) + random.uniform(0, 1)
|
wait_time = base_delay * (2 ** attempt) + random.uniform(0, 1)
|
||||||
|
|
||||||
# Spezifisches Logging für OpenAI-Fehler
|
|
||||||
if isinstance(e, RateLimitError):
|
if isinstance(e, RateLimitError):
|
||||||
decorator_logger.warning(f"🚦 OPENAI RATE LIMIT ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
decorator_logger.warning(f"🚦 RATE LIMIT ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
||||||
elif isinstance(e, Timeout):
|
elif isinstance(e, (Timeout, ServiceUnavailableError)):
|
||||||
decorator_logger.warning(f"⏰ TIMEOUT ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
decorator_logger.warning(f"⏰ TIMEOUT/UNAVAILABLE ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
||||||
elif isinstance(e, APIError): # Fängt alle anderen wiederholbaren OpenAI-Fehler
|
|
||||||
decorator_logger.warning(f"🤖 OPENAI API FEHLER ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
|
||||||
|
|
||||||
# Spezifisches Logging für andere Bibliotheken
|
|
||||||
elif isinstance(e, gspread.exceptions.APIError) and hasattr(e, 'response') and e.response is not None and e.response.status_code == 429:
|
|
||||||
decorator_logger.warning(f"🚦 GSPREAD RATE LIMIT ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
|
||||||
elif isinstance(e, requests.exceptions.RequestException):
|
|
||||||
decorator_logger.warning(f"🌐 NETZWERKFEHLER ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
|
||||||
elif isinstance(e, wikipedia.exceptions.WikipediaException):
|
|
||||||
decorator_logger.warning(f"📚 WIKIPEDIA FEHLER ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
|
||||||
else:
|
else:
|
||||||
decorator_logger.warning(f"♻️ WIEDERHOLBARER FEHLER ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
decorator_logger.warning(f"♻️ WIEDERHOLBARER FEHLER ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
||||||
|
|
||||||
time.sleep(wait_time)
|
time.sleep(wait_time)
|
||||||
else:
|
else:
|
||||||
decorator_logger.error(f"❌ ENDGUELTIGER FEHLER bei '{effective_func_name}' nach {max_retries_config} Versuchen.")
|
decorator_logger.error(f"❌ ENDGUELTIGER FEHLER bei '{effective_func_name}' nach {max_retries_config} Versuchen.")
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
decorator_logger.critical(f"💥 UNERWARTETER FEHLER ({type(e).__name__}) bei '{effective_func_name}'. KEIN RETRY VERSUCHT.")
|
decorator_logger.critical(f"💥 UNERWARTETER FEHLER ({type(e).__name__}) bei '{effective_func_name}'. KEIN RETRY VERSUCHT.")
|
||||||
decorator_logger.exception("Details zum unerwarteten Fehler:")
|
decorator_logger.exception("Details zum unerwarteten Fehler:")
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
raise RuntimeError(f"Retry decorator logic error: Loop completed unexpectedly for {effective_func_name}. This should not happen.")
|
raise RuntimeError(f"Retry decorator logic error: Loop completed unexpectedly for {effective_func_name}. This should not happen.")
|
||||||
|
|
||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|||||||
Reference in New Issue
Block a user