Fix: SyntaxError im User-Prompt durch \n innerhalb f-String behoben

- ersetzt echten Zeilenumbruch durch escape-Zeichen `\n` innerhalb f-Strings - Prompt wird nun korrekt an GPT übergeben
2025-03-30 08:17:43 +00:00
parent a2b7389c30
commit dbc7e05cd9
1 changed files with 26 additions and 29 deletions
--- a/brancheneinstufung.py
+++ b/brancheneinstufung.py
@@ -96,6 +96,32 @@ system_prompt = {
    )
 }

+# === GPT BEWERTUNG ===
+def classify_company(row, wikipedia_url=""):
+    user_prompt = {
+        "role": "user",
+        "content": (
+            f"{row[0]};{row[1]};{row[2]};{row[4]};{row[5]}\n"
+            f"Wikipedia-Link: {wikipedia_url}"
+        )
+    }
+    response = openai.chat.completions.create(
+        model="gpt-3.5-turbo",
+        messages=[system_prompt, user_prompt],
+        temperature=0
+    )
+    full_text = response.choices[0].message.content.strip()
+    lines = full_text.splitlines()
+    csv_line = next((l for l in lines if ";" in l and not l.lower().startswith("wikipedia-branche")), "")
+    parts = [v.strip().strip('"') for v in csv_line.split(";")] if csv_line else []
+    if len(parts) != 8:
+        print("⚠️  Antwort unvollständig → Setze alles auf 'k.A.'")
+        parts = ["k.A."] * 8
+    with open(LOG_CSV, "a", newline="", encoding="utf-8") as log:
+        writer = csv.writer(log, delimiter=";")
+        writer.writerow([row[0], *parts, full_text])
+    return parts
+
 # === WIKIPEDIA DATEN LADEN ===
 # Positivliste für Wikipedia-Kategorien, die auf Unternehmen hinweisen können
 WHITELIST_KATEGORIEN = [
@@ -120,7 +146,6 @@ def get_wikipedia_data(name, website_hint=""):
        for title in results:
            try:
                page = wikipedia.page(title)
-                # Titelprüfung verbessern
                if any(x in page.title.lower() for x in ["krankenkasse", "versicherung"]):
                    continue
                url = page.url
@@ -131,7 +156,6 @@ def get_wikipedia_data(name, website_hint=""):
                        continue
                if name.lower().split()[0] not in page.title.lower():
                    continue
-                url = page.url
                soup = BeautifulSoup(html, 'html.parser')
                infobox = soup.find("table", class_=["infobox", "infobox vcard"])
                if not infobox:
@@ -165,33 +189,6 @@ def get_wikipedia_data(name, website_hint=""):
                continue
    return "", "k.A.", "k.A."

-# === GPT BEWERTUNG ===
-def classify_company(row, wikipedia_url=""):
-    user_prompt = {
-        "role": "user",
-        "content": (
-        f"{row[0]};{row[1]};{row[2]};{row[4]};{row[5]}
-"
-        f"Wikipedia-Link: {wikipedia_url}"
-    )
-    }
-    response = openai.chat.completions.create(
-        model="gpt-3.5-turbo",
-        messages=[system_prompt, user_prompt],
-        temperature=0
-    )
-    full_text = response.choices[0].message.content.strip()
-    lines = full_text.splitlines()
-    csv_line = next((l for l in lines if ";" in l and not l.lower().startswith("wikipedia-branche")), "")
-    parts = [v.strip().strip('"') for v in csv_line.split(";")] if csv_line else []
-    if len(parts) != 8:
-        print("⚠️  Antwort unvollständig → Setze alles auf 'k.A.'")
-        parts = ["k.A."] * 8
-    with open(LOG_CSV, "a", newline="", encoding="utf-8") as log:
-        writer = csv.writer(log, delimiter=";")
-        writer.writerow([row[0], *parts, full_text])
-    return parts
-
 # === VERARBEITUNG ===
 for i in range(start, min(start + DURCHLÄUFE, len(sheet_values))):
    row = sheet_values[i]