manual revoce
This commit is contained in:
139
helpers.py
139
helpers.py
@@ -1,3 +1,5 @@
|
|||||||
|
# --- START OF FILE helpers.py (Part 1/10) ---
|
||||||
|
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""
|
"""
|
||||||
helpers.py
|
helpers.py
|
||||||
@@ -6,6 +8,9 @@ Sammlung von globalen, wiederverwendbaren Hilfsfunktionen für das Projekt
|
|||||||
"Automatisierte Unternehmensbewertung". Enthält Decorators, Text-Normalisierung,
|
"Automatisierte Unternehmensbewertung". Enthält Decorators, Text-Normalisierung,
|
||||||
API-Wrapper und andere Dienstprogramme.
|
API-Wrapper und andere Dienstprogramme.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
ALLOWED_TARGET_BRANCHES = []
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# 1. IMPORTS
|
# 1. IMPORTS
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
@@ -42,6 +47,7 @@ except ImportError:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
import gender_guesser.detector as gender
|
import gender_guesser.detector as gender
|
||||||
|
# Initialisieren Sie den Detector einmal global
|
||||||
gender_detector = gender.Detector()
|
gender_detector = gender.Detector()
|
||||||
logging.info("gender_guesser.Detector initialisiert.")
|
logging.info("gender_guesser.Detector initialisiert.")
|
||||||
except ImportError:
|
except ImportError:
|
||||||
@@ -53,6 +59,9 @@ except Exception as e:
|
|||||||
gender_detector = None
|
gender_detector = None
|
||||||
logging.warning(f"Fehler bei Initialisierung von gender_guesser: {e}. Geschlechtserkennung deaktiviert.")
|
logging.warning(f"Fehler bei Initialisierung von gender_guesser: {e}. Geschlechtserkennung deaktiviert.")
|
||||||
|
|
||||||
|
# Import der Config-Klasse und Konstanten
|
||||||
|
from config import Config, BRANCH_MAPPING_FILE, URL_CHECK_MARKER, USER_AGENTS
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# 2. RETRY DECORATOR
|
# 2. RETRY DECORATOR
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
@@ -67,46 +76,74 @@ def retry_on_failure(func):
|
|||||||
func_name = func.__name__
|
func_name = func.__name__
|
||||||
self_arg = args[0] if args and hasattr(args[0], func_name) and isinstance(args[0], object) else None
|
self_arg = args[0] if args and hasattr(args[0], func_name) and isinstance(args[0], object) else None
|
||||||
effective_func_name = f"{self_arg.__class__.__name__}.{func_name}" if self_arg else func_name
|
effective_func_name = f"{self_arg.__class__.__name__}.{func_name}" if self_arg else func_name
|
||||||
|
|
||||||
max_retries_config = getattr(Config, 'MAX_RETRIES', 3)
|
max_retries_config = getattr(Config, 'MAX_RETRIES', 3)
|
||||||
base_delay = getattr(Config, 'RETRY_DELAY', 5)
|
base_delay = getattr(Config, 'RETRY_DELAY', 5)
|
||||||
|
|
||||||
|
if max_retries_config <= 0:
|
||||||
|
try:
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
except Exception as e:
|
||||||
|
decorator_logger.error(f"FEHLER bei '{effective_func_name}' (keine Retries konfiguriert). {type(e).__name__} - {str(e)[:150]}...")
|
||||||
|
if not isinstance(e, (requests.exceptions.RequestException, gspread.exceptions.APIError, OpenAIError, wikipedia.exceptions.WikipediaException)):
|
||||||
|
decorator_logger.exception("Details zum Fehler:")
|
||||||
|
raise e
|
||||||
|
|
||||||
for attempt in range(max_retries_config):
|
for attempt in range(max_retries_config):
|
||||||
try:
|
try:
|
||||||
if attempt > 0:
|
if attempt > 0:
|
||||||
decorator_logger.warning(f"Wiederhole Versuch {attempt + 1}/{max_retries_config} fuer '{effective_func_name}'...")
|
decorator_logger.warning(f"Wiederhole Versuch {attempt + 1}/{max_retries_config} fuer '{effective_func_name}'...")
|
||||||
return func(*args, **kwargs)
|
return func(*args, **kwargs)
|
||||||
except (gspread.exceptions.SpreadsheetNotFound, AuthenticationError, ValueError, InvalidRequestError) as e:
|
|
||||||
decorator_logger.critical(f"❌ ENDGUELTIGER FEHLER bei '{effective_func_name}': Permanentes Problem erkannt. {type(e).__name__} - {str(e)[:150]}...")
|
except (gspread.exceptions.SpreadsheetNotFound, AuthenticationError, ValueError) as e:
|
||||||
decorator_logger.exception("Details:")
|
decorator_logger.critical(f"❌ ENDGUELTIGER FEHLER bei '{effective_func_name}': Permanentes Problem erkannt. {type(e).__name__} - {str(e)[:150]}...")
|
||||||
raise e
|
decorator_logger.exception("Details:")
|
||||||
|
raise e
|
||||||
|
|
||||||
except requests.exceptions.HTTPError as e:
|
except requests.exceptions.HTTPError as e:
|
||||||
if hasattr(e, 'response') and e.response is not None:
|
if hasattr(e, 'response') and e.response is not None:
|
||||||
status_code = e.response.status_code
|
status_code = e.response.status_code
|
||||||
non_retryable_status_codes = [404, 400, 401, 403]
|
non_retryable_status_codes = [404, 400, 401, 403]
|
||||||
if status_code in non_retryable_status_codes:
|
if status_code in non_retryable_status_codes:
|
||||||
decorator_logger.critical(f"❌ ENDGUELTIGER FEHLER bei '{effective_func_name}': HTTP Fehler {status_code} erhalten ({e.response.reason}). Nicht wiederholbar. {str(e)[:100]}...")
|
decorator_logger.critical(f"❌ ENDGUELTIGER FEHLER bei '{effective_func_name}': HTTP Fehler {status_code} erhalten ({e.response.reason}). Nicht wiederholbar. {str(e)[:100]}...")
|
||||||
decorator_logger.exception("Details:")
|
decorator_logger.exception("Details:")
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
except (requests.exceptions.RequestException, gspread.exceptions.APIError, OpenAIError, wikipedia.exceptions.WikipediaException) as e:
|
except (requests.exceptions.RequestException, gspread.exceptions.APIError, OpenAIError, wikipedia.exceptions.WikipediaException) as e:
|
||||||
error_msg = str(e)
|
error_msg = str(e)
|
||||||
error_type = type(e).__name__
|
error_type = type(e).__name__
|
||||||
if attempt < max_retries_config - 1:
|
|
||||||
wait_time = base_delay * (2 ** attempt) + random.uniform(0, 1)
|
if attempt < max_retries_config - 1:
|
||||||
if isinstance(e, RateLimitError):
|
wait_time = base_delay * (2 ** attempt) + random.uniform(0, 1)
|
||||||
decorator_logger.warning(f"🚦 RATE LIMIT ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
if isinstance(e, RateLimitError):
|
||||||
elif isinstance(e, (Timeout, ServiceUnavailableError)):
|
decorator_logger.warning(f"🚦 RATE LIMIT ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
||||||
decorator_logger.warning(f"⏰ TIMEOUT/UNAVAILABLE ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
elif isinstance(e, Timeout) and isinstance(e, OpenAIError):
|
||||||
else:
|
decorator_logger.warning(f"⏰ OPENAI TIMEOUT ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
||||||
decorator_logger.warning(f"♻️ WIEDERHOLBARER FEHLER ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
elif isinstance(e, gspread.exceptions.APIError) and hasattr(e, 'response') and e.response is not None and e.response.status_code == 429:
|
||||||
time.sleep(wait_time)
|
decorator_logger.warning(f"🚦 GSPREAD RATE LIMIT ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
||||||
else:
|
elif isinstance(e, requests.exceptions.Timeout):
|
||||||
decorator_logger.error(f"❌ ENDGUELTIGER FEHLER bei '{effective_func_name}' nach {max_retries_config} Versuchen.")
|
decorator_logger.warning(f"⏰ REQUESTS TIMEOUT ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
||||||
raise e
|
elif isinstance(e, requests.exceptions.RequestException):
|
||||||
|
decorator_logger.warning(f"🌐 NETZWERKFEHLER ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
||||||
|
elif isinstance(e, OpenAIError):
|
||||||
|
decorator_logger.warning(f"🤖 OPENAI FEHLER ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
||||||
|
elif isinstance(e, wikipedia.exceptions.WikipediaException):
|
||||||
|
decorator_logger.warning(f"📚 WIKIPEDIA FEHLER ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
||||||
|
else:
|
||||||
|
decorator_logger.warning(f"♻️ WIEDERHOLBARER FEHLER ({error_type}) bei '{effective_func_name}' (Versuch {attempt+1}/{max_retries_config}). {error_msg[:150]}... Warte {wait_time:.2f}s...")
|
||||||
|
|
||||||
|
time.sleep(wait_time)
|
||||||
|
else:
|
||||||
|
decorator_logger.error(f"❌ ENDGUELTIGER FEHLER bei '{effective_func_name}' nach {max_retries_config} Versuchen.")
|
||||||
|
raise e
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
decorator_logger.critical(f"💥 UNERWARTETER FEHLER ({type(e).__name__}) bei '{effective_func_name}'. KEIN RETRY VERSUCHT.")
|
decorator_logger.critical(f"💥 UNERWARTETER FEHLER ({type(e).__name__}) bei '{effective_func_name}'. KEIN RETRY VERSUCHT.")
|
||||||
decorator_logger.exception("Details zum unerwarteten Fehler:")
|
decorator_logger.exception("Details zum unerwarteten Fehler:")
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
raise RuntimeError(f"Retry decorator logic error: Loop completed unexpectedly for {effective_func_name}. This should not happen.")
|
raise RuntimeError(f"Retry decorator logic error: Loop completed unexpectedly for {effective_func_name}. This should not happen.")
|
||||||
|
|
||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
@@ -678,12 +715,21 @@ def initialize_target_schema():
|
|||||||
def call_openai_chat(prompt, temperature=0.3, model=None):
|
def call_openai_chat(prompt, temperature=0.3, model=None):
|
||||||
"""
|
"""
|
||||||
Zentrale Funktion fuer OpenAI Chat API Aufrufe.
|
Zentrale Funktion fuer OpenAI Chat API Aufrufe.
|
||||||
Kompatibel mit openai v0.x und v1.x.
|
Wird von anderen globalen Helfern oder DataProcessor Methoden aufgerufen.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt (str): Der Prompt-Text an die API.
|
||||||
|
temperature (float, optional): Die Temperatur fuer die Textgenerierung. Defaults to 0.3.
|
||||||
|
model (str, optional): Das zu verwendende OpenAI Modell. Defaults to Config.TOKEN_MODEL.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Der bereinigte Antwortstring von der API.
|
||||||
|
Wirft Exception bei API-Fehlern nach Retries.
|
||||||
"""
|
"""
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
if not Config.API_KEYS.get('openai'):
|
if not Config.API_KEYS.get('openai'):
|
||||||
logger.error("Fehler: OpenAI API Key nicht konfiguriert.")
|
logger.error("Fehler: OpenAI API Key nicht konfiguriert.")
|
||||||
raise AuthenticationError("OpenAI API Key nicht konfiguriert.") # Funktioniert in beiden Versionen
|
raise openai.error.AuthenticationError("OpenAI API Key nicht konfiguriert.")
|
||||||
|
|
||||||
if not prompt or not isinstance(prompt, str) or not prompt.strip():
|
if not prompt or not isinstance(prompt, str) or not prompt.strip():
|
||||||
logger.error("Fehler: Leerer Prompt fuer OpenAI.")
|
logger.error("Fehler: Leerer Prompt fuer OpenAI.")
|
||||||
@@ -692,33 +738,32 @@ def call_openai_chat(prompt, temperature=0.3, model=None):
|
|||||||
current_model = model if model else getattr(Config, 'TOKEN_MODEL', 'gpt-3.5-turbo')
|
current_model = model if model else getattr(Config, 'TOKEN_MODEL', 'gpt-3.5-turbo')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if IS_OPENAI_V1:
|
# Optional: Token-Zählung für Debugging
|
||||||
# Code für die neue v1.x Bibliothek
|
# prompt_tokens = token_count(prompt, model=current_model)
|
||||||
client = openai.OpenAI(api_key=Config.API_KEYS.get('openai'))
|
# logger.debug(f"Sende Prompt an OpenAI ({current_model}, geschaetzt {prompt_tokens} Tokens)...")
|
||||||
response = client.chat.completions.create(
|
|
||||||
model=current_model,
|
response = openai.ChatCompletion.create(
|
||||||
messages=[{"role": "user", "content": prompt}],
|
model=current_model,
|
||||||
temperature=temperature
|
messages=[{"role": "user", "content": prompt}],
|
||||||
)
|
temperature=temperature
|
||||||
result = response.choices[0].message.content.strip() if response.choices and response.choices[0].message else ""
|
)
|
||||||
else:
|
|
||||||
# Code für die alte v0.x Bibliothek
|
if not response or not hasattr(response, 'choices') or not response.choices:
|
||||||
response = openai.ChatCompletion.create(
|
logger.error(f"OpenAI Call erfolgreich, aber keine Choices in der Antwort erhalten. Response: {str(response)[:200]}...")
|
||||||
api_key=Config.API_KEYS.get('openai'), # explizit übergeben
|
raise openai.error.APIError("Keine Choices in OpenAI Antwort erhalten.")
|
||||||
model=current_model,
|
|
||||||
messages=[{"role": "user", "content": prompt}],
|
result = response.choices[0].message.content.strip() if hasattr(response.choices[0], 'message') and hasattr(response.choices[0].message, 'content') else ""
|
||||||
temperature=temperature
|
|
||||||
)
|
|
||||||
result = response.choices[0].message.content.strip() if response.choices and response.choices[0].message else ""
|
|
||||||
|
|
||||||
if not result:
|
if not result:
|
||||||
logger.warning(f"OpenAI Call erfolgreich, erhielt aber leeren Inhalt in der Antwort.")
|
logger.warning(f"OpenAI Call erfolgreich, erhielt aber leeren Inhalt in der Antwort. Prompt Anfang: {prompt[:100]}...")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise e # Wird vom @retry_on_failure Decorator gefangen
|
# Wird vom @retry_on_failure Decorator gefangen und behandelt.
|
||||||
|
# Wir heben die Exception erneut auf, damit der Decorator sie sehen kann.
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
def summarize_website_content(raw_text):
|
def summarize_website_content(raw_text):
|
||||||
|
|||||||
Reference in New Issue
Block a user