Fix: SyntaxError im User-Prompt durch \n innerhalb f-String behoben

- ersetzt echten Zeilenumbruch durch escape-Zeichen `\n` innerhalb f-Strings
- Prompt wird nun korrekt an GPT übergeben
This commit is contained in:
2025-03-30 08:17:43 +00:00
parent a2b7389c30
commit dbc7e05cd9

View File

@@ -96,6 +96,32 @@ system_prompt = {
)
}
# === GPT BEWERTUNG ===
def classify_company(row, wikipedia_url=""):
user_prompt = {
"role": "user",
"content": (
f"{row[0]};{row[1]};{row[2]};{row[4]};{row[5]}\n"
f"Wikipedia-Link: {wikipedia_url}"
)
}
response = openai.chat.completions.create(
model="gpt-3.5-turbo",
messages=[system_prompt, user_prompt],
temperature=0
)
full_text = response.choices[0].message.content.strip()
lines = full_text.splitlines()
csv_line = next((l for l in lines if ";" in l and not l.lower().startswith("wikipedia-branche")), "")
parts = [v.strip().strip('"') for v in csv_line.split(";")] if csv_line else []
if len(parts) != 8:
print("⚠️ Antwort unvollständig → Setze alles auf 'k.A.'")
parts = ["k.A."] * 8
with open(LOG_CSV, "a", newline="", encoding="utf-8") as log:
writer = csv.writer(log, delimiter=";")
writer.writerow([row[0], *parts, full_text])
return parts
# === WIKIPEDIA DATEN LADEN ===
# Positivliste für Wikipedia-Kategorien, die auf Unternehmen hinweisen können
WHITELIST_KATEGORIEN = [
@@ -120,7 +146,6 @@ def get_wikipedia_data(name, website_hint=""):
for title in results:
try:
page = wikipedia.page(title)
# Titelprüfung verbessern
if any(x in page.title.lower() for x in ["krankenkasse", "versicherung"]):
continue
url = page.url
@@ -131,7 +156,6 @@ def get_wikipedia_data(name, website_hint=""):
continue
if name.lower().split()[0] not in page.title.lower():
continue
url = page.url
soup = BeautifulSoup(html, 'html.parser')
infobox = soup.find("table", class_=["infobox", "infobox vcard"])
if not infobox:
@@ -165,33 +189,6 @@ def get_wikipedia_data(name, website_hint=""):
continue
return "", "k.A.", "k.A."
# === GPT BEWERTUNG ===
def classify_company(row, wikipedia_url=""):
user_prompt = {
"role": "user",
"content": (
f"{row[0]};{row[1]};{row[2]};{row[4]};{row[5]}
"
f"Wikipedia-Link: {wikipedia_url}"
)
}
response = openai.chat.completions.create(
model="gpt-3.5-turbo",
messages=[system_prompt, user_prompt],
temperature=0
)
full_text = response.choices[0].message.content.strip()
lines = full_text.splitlines()
csv_line = next((l for l in lines if ";" in l and not l.lower().startswith("wikipedia-branche")), "")
parts = [v.strip().strip('"') for v in csv_line.split(";")] if csv_line else []
if len(parts) != 8:
print("⚠️ Antwort unvollständig → Setze alles auf 'k.A.'")
parts = ["k.A."] * 8
with open(LOG_CSV, "a", newline="", encoding="utf-8") as log:
writer = csv.writer(log, delimiter=";")
writer.writerow([row[0], *parts, full_text])
return parts
# === VERARBEITUNG ===
for i in range(start, min(start + DURCHLÄUFE, len(sheet_values))):
row = sheet_values[i]