From 4504cc6d6f37924be4ba6fe91cf0afd7abc3bf58 Mon Sep 17 00:00:00 2001 From: Floke Date: Sun, 30 Mar 2025 11:15:10 +0000 Subject: [PATCH] Update brancheneinstufung.py --- brancheneinstufung.py | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/brancheneinstufung.py b/brancheneinstufung.py index 576795b1..97e84262 100644 --- a/brancheneinstufung.py +++ b/brancheneinstufung.py @@ -1,4 +1,4 @@ -# Neue Version mit Wikipedia-Validierung, GPT-Schutz und Antwortlogging +# Neue Version mit Wikipedia-Validierung, GPT-Schutz und Antwortlogging inkl. Retry import os import time @@ -12,6 +12,7 @@ from bs4 import BeautifulSoup import requests from oauth2client.service_account import ServiceAccountCredentials from datetime import datetime +from openai.error import OpenAIError # === KONFIGURATION === EXCEL = "Bestandsfirmen.xlsx" @@ -98,7 +99,6 @@ system_prompt = { ) } -# === WIKIPEDIA FUNKTION === WHITELIST_KATEGORIEN = ["Unternehmen", "Hersteller", "Produktion", "Industrie", "Maschinenbau", "Technik", "Dienstleistungsunternehmen"] def get_wikipedia_data(name, website_hint=""): @@ -156,27 +156,38 @@ def get_wikipedia_data(name, website_hint=""): continue return "", "k.A.", "k.A." -# === GPT BEWERTUNG === def classify_company(row, wikipedia_url=""): user_prompt = { "role": "user", "content": f"{row[0]};{row[1]};{row[2]};{row[4]};{row[5]}\nWikipedia-Link: {wikipedia_url}" } - response = openai.chat.completions.create( - model="gpt-3.5-turbo", - messages=[system_prompt, user_prompt], - temperature=0 - ) - full_text = response.choices[0].message.content.strip() + for attempt in range(3): + try: + response = openai.chat.completions.create( + model="gpt-3.5-turbo", + messages=[system_prompt, user_prompt], + temperature=0 + ) + full_text = response.choices[0].message.content.strip() + break + except OpenAIError as e: + print(f"⚠️ GPT-Fehler bei Versuch {attempt+1}: {e}") + time.sleep(10) + else: + print("❌ GPT 3x fehlgeschlagen – setze alles auf 'k.A.'") + full_text = "k.A.;k.A.;k.A.;k.A.;k.A.;k.A.;k.A.;k.A." + lines = full_text.splitlines() csv_line = next((l for l in lines if ";" in l and not l.lower().startswith("wikipedia-branche")), "") - parts = [v.strip().strip('"') for v in csv_line.split(";")] if csv_line else [] + parts = [v.strip().strip('"') for v in csv_line.split(";")] if csv_line else ["k.A."] * 8 + if len(parts) != 8: - print("⚠️ Antwort unvollständig → Setze alles auf 'k.A.'") parts = ["k.A."] * 8 + with open(LOG_CSV, "a", newline="", encoding="utf-8") as log: writer = csv.writer(log, delimiter=";") writer.writerow([datetime.now().strftime("%Y-%m-%d %H:%M:%S"), row[0], *parts, full_text]) + return parts # === VERARBEITUNG ===