Refactor: Wikipedia-Analyse vollständig an GPT übergeben
- Nur noch Wikipedia-URL lokal ermittelt - URL wird an GPT im Prompt übergeben (Zeile: "Wikipedia-Link: …") - GPT soll Umsatz + Branche eigenständig aus Artikel extrahieren - HTML-Parsing und lokale Extraktion entfallen vollständig
This commit is contained in:
@@ -166,10 +166,11 @@ def get_wikipedia_data(name, website_hint=""):
|
|||||||
return "", "k.A.", "k.A."
|
return "", "k.A.", "k.A."
|
||||||
|
|
||||||
# === GPT BEWERTUNG ===
|
# === GPT BEWERTUNG ===
|
||||||
def classify_company(row):
|
def classify_company(row, wikipedia_url=""):
|
||||||
user_prompt = {
|
user_prompt = {
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": f"{row[0]};{row[1]};{row[2]};{row[4]};{row[5]}"
|
"content": f"{row[0]};{row[1]};{row[2]};{row[4]};{row[5]}
|
||||||
|
Wikipedia-Link: {wikipedia_url}"
|
||||||
}
|
}
|
||||||
response = openai.chat.completions.create(
|
response = openai.chat.completions.create(
|
||||||
model="gpt-3.5-turbo",
|
model="gpt-3.5-turbo",
|
||||||
@@ -193,11 +194,11 @@ for i in range(start, min(start + DURCHLÄUFE, len(sheet_values))):
|
|||||||
row = sheet_values[i]
|
row = sheet_values[i]
|
||||||
print(f"[{time.strftime('%H:%M:%S')}] Verarbeite Zeile {i+1}: {row[0]}")
|
print(f"[{time.strftime('%H:%M:%S')}] Verarbeite Zeile {i+1}: {row[0]}")
|
||||||
|
|
||||||
url, wiki_branche, umsatz = get_wikipedia_data(row[0], row[1])
|
url, _, _ = get_wikipedia_data(row[0], row[1])
|
||||||
wiki, linkedin, umsatz_chat, new_cat, reason, fsm, techniker, techniker_reason = classify_company(row)
|
wiki, linkedin, umsatz_chat, new_cat, reason, fsm, techniker, techniker_reason = classify_company(row, wikipedia_url=url)
|
||||||
|
|
||||||
wiki_final = wiki_branche
|
wiki_final = wiki
|
||||||
umsatz_final = umsatz if umsatz != "k.A." else umsatz_chat
|
umsatz_final = umsatz_chat
|
||||||
|
|
||||||
values = [
|
values = [
|
||||||
wiki_final,
|
wiki_final,
|
||||||
|
|||||||
Reference in New Issue
Block a user