feat(wikipedia): Versionierung und Logging verbessert – Ausgabe enthält nun Version für Nachverfolgb
🧠 Refactoring: Einführung einer globalen VERSION-Konstante (1.0.0-wiki-only) 📅 Feature: Zeitstempel und Version werden nun zusätzlich in Google Sheet eingetragen 🛠️ Fix: sheet.update() erweitert, um die neue Spalte Q (Version) zu berücksichtigen ✨ Struktur: Code durchgängig vereinheitlicht und robuster gegen Fehler gemacht
This commit is contained in:
@@ -13,6 +13,7 @@ from oauth2client.service_account import ServiceAccountCredentials
|
||||
from datetime import datetime
|
||||
|
||||
# === KONFIGURATION ===
|
||||
VERSION = "1.0.0-wiki-only"
|
||||
LANG = "de"
|
||||
CREDENTIALS = "service_account.json"
|
||||
SHEET_URL = "https://docs.google.com/spreadsheets/d/1u_gHr9JUfmV1-iviRzbSe3575QEp7KLhK5jFV_gJcgo"
|
||||
@@ -64,7 +65,7 @@ def parse_infobox_with_fallback(soup):
|
||||
if any(b in label for b in ["branche", "tätigkeitsfeld", "industriezweig", "wirtschaftszweig"]):
|
||||
branche = value
|
||||
if "umsatz" in label and "mio" in value.lower():
|
||||
match = re.search(r"(\d+[\d.,]*)\s*Mio", value)
|
||||
match = re.search(r"(\d+[\d.,]*)\\s*Mio", value)
|
||||
if match:
|
||||
umsatz = match.group(1).replace(",", ".")
|
||||
|
||||
@@ -102,7 +103,7 @@ def get_wikipedia_data(name, website_hint=""):
|
||||
continue
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
branche, umsatz = parse_infobox_with_fallback(soup)
|
||||
if not branche or branche == "k.A.":
|
||||
if (not branche or branche == "k.A.") and page.categories:
|
||||
for category in page.categories:
|
||||
if any(kw in category.lower() for kw in WHITELIST_KATEGORIEN):
|
||||
branche = category
|
||||
@@ -131,7 +132,9 @@ for i in range(start, min(start + DURCHLÄUFE, len(sheet_values))):
|
||||
datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"k.A.", "k.A."
|
||||
]
|
||||
sheet.update(range_name=f"G{i+1}:P{i+1}", values=[values])
|
||||
# Neue Spalte mit Version am Ende
|
||||
values.append(VERSION)
|
||||
sheet.update(range_name=f"G{i+1}:Q{i+1}", values=[values])
|
||||
print(f"✅ Aktualisiert: {values[:3]}...")
|
||||
time.sleep(RETRY_DELAY)
|
||||
|
||||
@@ -140,6 +143,7 @@ print("\n✅ Wikipedia-Auswertung abgeschlossen")
|
||||
|
||||
|
||||
|
||||
|
||||
# === SCHRITT 2: GPT-BEWERTUNG ===
|
||||
def classify_company(row, wikipedia_url=""):
|
||||
user_prompt = {
|
||||
|
||||
Reference in New Issue
Block a user