Update brancheneinstufung.py
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
# Neue Version mit Wikipedia-Validierung, GPT-Schutz und Antwortlogging
|
||||
# Neue Version mit Wikipedia-Validierung, GPT-Schutz und Antwortlogging inkl. Retry
|
||||
|
||||
import os
|
||||
import time
|
||||
@@ -12,6 +12,7 @@ from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from oauth2client.service_account import ServiceAccountCredentials
|
||||
from datetime import datetime
|
||||
from openai.error import OpenAIError
|
||||
|
||||
# === KONFIGURATION ===
|
||||
EXCEL = "Bestandsfirmen.xlsx"
|
||||
@@ -98,7 +99,6 @@ system_prompt = {
|
||||
)
|
||||
}
|
||||
|
||||
# === WIKIPEDIA FUNKTION ===
|
||||
WHITELIST_KATEGORIEN = ["Unternehmen", "Hersteller", "Produktion", "Industrie", "Maschinenbau", "Technik", "Dienstleistungsunternehmen"]
|
||||
|
||||
def get_wikipedia_data(name, website_hint=""):
|
||||
@@ -156,27 +156,38 @@ def get_wikipedia_data(name, website_hint=""):
|
||||
continue
|
||||
return "", "k.A.", "k.A."
|
||||
|
||||
# === GPT BEWERTUNG ===
|
||||
def classify_company(row, wikipedia_url=""):
|
||||
user_prompt = {
|
||||
"role": "user",
|
||||
"content": f"{row[0]};{row[1]};{row[2]};{row[4]};{row[5]}\nWikipedia-Link: {wikipedia_url}"
|
||||
}
|
||||
response = openai.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[system_prompt, user_prompt],
|
||||
temperature=0
|
||||
)
|
||||
full_text = response.choices[0].message.content.strip()
|
||||
for attempt in range(3):
|
||||
try:
|
||||
response = openai.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[system_prompt, user_prompt],
|
||||
temperature=0
|
||||
)
|
||||
full_text = response.choices[0].message.content.strip()
|
||||
break
|
||||
except OpenAIError as e:
|
||||
print(f"⚠️ GPT-Fehler bei Versuch {attempt+1}: {e}")
|
||||
time.sleep(10)
|
||||
else:
|
||||
print("❌ GPT 3x fehlgeschlagen – setze alles auf 'k.A.'")
|
||||
full_text = "k.A.;k.A.;k.A.;k.A.;k.A.;k.A.;k.A.;k.A."
|
||||
|
||||
lines = full_text.splitlines()
|
||||
csv_line = next((l for l in lines if ";" in l and not l.lower().startswith("wikipedia-branche")), "")
|
||||
parts = [v.strip().strip('"') for v in csv_line.split(";")] if csv_line else []
|
||||
parts = [v.strip().strip('"') for v in csv_line.split(";")] if csv_line else ["k.A."] * 8
|
||||
|
||||
if len(parts) != 8:
|
||||
print("⚠️ Antwort unvollständig → Setze alles auf 'k.A.'")
|
||||
parts = ["k.A."] * 8
|
||||
|
||||
with open(LOG_CSV, "a", newline="", encoding="utf-8") as log:
|
||||
writer = csv.writer(log, delimiter=";")
|
||||
writer.writerow([datetime.now().strftime("%Y-%m-%d %H:%M:%S"), row[0], *parts, full_text])
|
||||
|
||||
return parts
|
||||
|
||||
# === VERARBEITUNG ===
|
||||
|
||||
Reference in New Issue
Block a user