Erste Version
This commit is contained in:
BIN
@eaDir/brancheneinstufung - Kopie.py@SynoEAStream
Normal file
BIN
@eaDir/brancheneinstufung - Kopie.py@SynoEAStream
Normal file
Binary file not shown.
BIN
@eaDir/service_account.json@SynoEAStream
Normal file
BIN
@eaDir/service_account.json@SynoEAStream
Normal file
Binary file not shown.
BIN
Bestandsfirmen.xlsx
Normal file
BIN
Bestandsfirmen.xlsx
Normal file
Binary file not shown.
202
brancheneinstufung - Kopie.py
Normal file
202
brancheneinstufung - Kopie.py
Normal file
@@ -0,0 +1,202 @@
|
|||||||
|
import os
|
||||||
|
import time
|
||||||
|
import pandas as pd
|
||||||
|
import gspread
|
||||||
|
import openai
|
||||||
|
import wikipedia
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import requests
|
||||||
|
from oauth2client.service_account import ServiceAccountCredentials
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# === CONFIG ===
|
||||||
|
EXCEL = "Bestandsfirmen.xlsx"
|
||||||
|
SHEET_URL = "https://docs.google.com/spreadsheets/d/1u_gHr9JUfmV1-iviRzbSe3575QEp7KLhK5jFV_gJcgo"
|
||||||
|
CREDENTIALS = "service_account.json"
|
||||||
|
CHUNK = 10
|
||||||
|
LANG = "de"
|
||||||
|
|
||||||
|
# === AUTHENTICATION ===
|
||||||
|
scope = ["https://www.googleapis.com/auth/spreadsheets"]
|
||||||
|
creds = ServiceAccountCredentials.from_json_keyfile_name(CREDENTIALS, scope)
|
||||||
|
sheet = gspread.authorize(creds).open_by_url(SHEET_URL).sheet1
|
||||||
|
|
||||||
|
# OpenAI API-Key aus externer Datei laden
|
||||||
|
with open("api_key.txt", "r") as f:
|
||||||
|
openai.api_key = f.read().strip()
|
||||||
|
|
||||||
|
# === LOAD DATA ===
|
||||||
|
df = pd.read_excel(EXCEL)
|
||||||
|
for col in ["Wikipedia-URL", "Wikipedia-Branche", "LinkedIn-Branche", "Umsatz (Mio €)",
|
||||||
|
"Empfohlene Neueinstufung", "Begründung Neueinstufung", "FSM-Relevanz", "Letzte Prüfung",
|
||||||
|
"Techniker-Einschätzung (Auto)", "Techniker-Einschätzung (Begründung)", "Techniker-Einschätzung (Manuell)"]:
|
||||||
|
if col not in df.columns:
|
||||||
|
df[col] = ""
|
||||||
|
|
||||||
|
# === STARTE BEI ERSTER LEERER ZEILE IN SPALTE 'Letzte Prüfung' (Spalte N) ===
|
||||||
|
sheet_values = sheet.get_all_values()
|
||||||
|
filled_n = [row[13] if len(row) > 13 else '' for row in sheet_values[1:]]
|
||||||
|
start = next((i + 1 for i, v in enumerate(filled_n, start=1) if not str(v).strip() or str(v).lower() == 'nan'), len(filled_n) + 1)
|
||||||
|
print(f"Starte bei Zeile {start+1} (erste leere Zeile in Spalte N)")
|
||||||
|
|
||||||
|
# === ANZAHL ABFRAGEN ERMITTELN ===
|
||||||
|
try:
|
||||||
|
limit = int(input("Wieviele Firmen sollen analysiert werden? (z.B. 1000): ").strip())
|
||||||
|
except:
|
||||||
|
print("Ungültige Eingabe, verwende alle verbleibenden Firmen.")
|
||||||
|
limit = len(df) - (start - 1)
|
||||||
|
|
||||||
|
wikipedia.set_lang(LANG)
|
||||||
|
|
||||||
|
# === SYSTEMPROMPT ===
|
||||||
|
SYSTEM_PROMPT = (
|
||||||
|
"Du bist ein Klassifizierungs-Experte für Unternehmensbranchen. "
|
||||||
|
"Ordne jedes Unternehmen genau einer der folgenden Kategorien zu (nur eine):\n\n"
|
||||||
|
"1. Hersteller / Produzenten > Maschinenbau\n"
|
||||||
|
"2. Hersteller / Produzenten > Automobil\n"
|
||||||
|
"3. Hersteller / Produzenten > Anlagenbau\n"
|
||||||
|
"4. Hersteller / Produzenten > Medizintechnik\n"
|
||||||
|
"5. Hersteller / Produzenten > Chemie & Pharma\n"
|
||||||
|
"6. Hersteller / Produzenten > Elektrotechnik\n"
|
||||||
|
"7. Hersteller / Produzenten > Lebensmittelproduktion\n"
|
||||||
|
"8. Hersteller / Produzenten > IT / Telekommunikation\n"
|
||||||
|
"9. Hersteller / Produzenten > Bürotechnik\n"
|
||||||
|
"10. Hersteller / Produzenten > Automaten (Vending, Slot)\n"
|
||||||
|
"11. Hersteller / Produzenten > Gebäudetechnik Heizung, Lüftung, Klima\n"
|
||||||
|
"12. Hersteller / Produzenten > Gebäudetechnik Allgemein\n"
|
||||||
|
"13. Hersteller / Produzenten > Schädlingsbekämpfung\n"
|
||||||
|
"14. Hersteller / Produzenten > Fertigung\n"
|
||||||
|
"15. Hersteller / Produzenten > Braune & Weiße Ware\n"
|
||||||
|
"16. Versorger > Stadtwerk\n"
|
||||||
|
"17. Versorger > Verteilnetzbetreiber\n"
|
||||||
|
"18. Versorger > Telekommunikation\n"
|
||||||
|
"19. Dienstleister > Messdienstleister\n"
|
||||||
|
"20. Dienstleister > Facility Management\n"
|
||||||
|
"21. Dienstleister > Healthcare/Pflegedienste\n"
|
||||||
|
"22. Dienstleister > Servicedienstleister / Reparatur ohne Produktion\n"
|
||||||
|
"23. Handel & Logistik > Auslieferdienste\n"
|
||||||
|
"24. Handel & Logistik > Energie (Brennstoffe)\n"
|
||||||
|
"25. Handel & Logistik > Großhandel\n"
|
||||||
|
"26. Handel & Logistik > Einzelhandel\n"
|
||||||
|
"27. Handel & Logistik > Logistik Sonstige\n"
|
||||||
|
"28. Sonstige > Unternehmensberatung (old)\n"
|
||||||
|
"29. Sonstige > Sonstige\n"
|
||||||
|
"30. Sonstige > Agrar, Pellets (old)\n"
|
||||||
|
"31. Sonstige > Sonstiger Service (old)\n"
|
||||||
|
"32. Sonstige > IT Beratung\n"
|
||||||
|
"33. Sonstige > Engineering\n"
|
||||||
|
"34. Baubranche > Baustoffhandel\n"
|
||||||
|
"35. Baubranche > Baustoffindustrie\n"
|
||||||
|
"36. Baubranche > Logistiker Baustoffe\n"
|
||||||
|
"37. Baubranche > Bauunternehmen\n"
|
||||||
|
"38. Gutachter / Versicherungen > Versicherungsgutachten\n"
|
||||||
|
"39. Gutachter / Versicherungen > Technische Gutachter\n"
|
||||||
|
"40. Gutachter / Versicherungen > Medizinische Gutachten\n\n"
|
||||||
|
"Antwortformat: Wikipedia-Branche; LinkedIn-Branche; Umsatz (Mio €); Empfohlene Neueinstufung; Begründung; FSM-Relevanz; Techniker-Einschätzung (Auto); Techniker-Einschätzung (Begründung)"
|
||||||
|
)
|
||||||
|
|
||||||
|
system_prompt = {"role": "system", "content": SYSTEM_PROMPT}
|
||||||
|
|
||||||
|
# === WIKIPEDIA LOOKUP ===
|
||||||
|
def get_wikipedia_data(firmenname):
|
||||||
|
suchbegriffe = [firmenname.strip(), " ".join(firmenname.split()[:2])]
|
||||||
|
for suchbegriff in suchbegriffe:
|
||||||
|
try:
|
||||||
|
page = wikipedia.page(suchbegriff, auto_suggest=False)
|
||||||
|
url = page.url
|
||||||
|
html = requests.get(url).text
|
||||||
|
soup = BeautifulSoup(html, 'html.parser')
|
||||||
|
infobox = soup.find("table", {"class": "infobox"})
|
||||||
|
branche = ""
|
||||||
|
umsatz = ""
|
||||||
|
if infobox:
|
||||||
|
for row in infobox.find_all("tr"):
|
||||||
|
header = row.find("th")
|
||||||
|
data = row.find("td")
|
||||||
|
if not header or not data:
|
||||||
|
continue
|
||||||
|
if "Branche" in header.text:
|
||||||
|
branche = data.text.strip()
|
||||||
|
if "Umsatz" in header.text:
|
||||||
|
umsatz = data.text.strip()
|
||||||
|
if not branche:
|
||||||
|
cats = page.categories
|
||||||
|
branche = cats[0] if cats else ""
|
||||||
|
return url, branche, umsatz
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
return "", "", ""
|
||||||
|
|
||||||
|
# === KLASSIFIZIERUNG ===
|
||||||
|
def classify_company(row):
|
||||||
|
content = (
|
||||||
|
f"Beschreibung: {row['Beschreibung des Unternehmens'] or ''}\n"
|
||||||
|
f"Einstufung: {row['Aktuelle Einstufung'] or ''}\n"
|
||||||
|
f"Website: {row['Website'] or ''}"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
resp = openai.chat.completions.create(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[system_prompt, {"role": "user", "content": content}],
|
||||||
|
temperature=0
|
||||||
|
)
|
||||||
|
result = resp.choices[0].message.content.strip()
|
||||||
|
parts = [v.strip().strip('"') if v.strip() else "k.A." for v in result.split(";", 7)]
|
||||||
|
while len(parts) < 8:
|
||||||
|
parts.append("k.A.")
|
||||||
|
return parts
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ Fehler bei Zeile: {row['Firmenname']} → {e}")
|
||||||
|
return ["k.A."] * 8
|
||||||
|
|
||||||
|
# === LOOP ===
|
||||||
|
count = 0
|
||||||
|
for df_idx in range(start - 1, len(df)):
|
||||||
|
if count >= limit:
|
||||||
|
break
|
||||||
|
row = df.iloc[df_idx]
|
||||||
|
if str(row.get("Letzte Prüfung", "")).strip():
|
||||||
|
continue
|
||||||
|
|
||||||
|
print(f"[{time.strftime('%H:%M:%S')}] Verarbeite Zeile {df_idx+1}: {row['Firmenname']}")
|
||||||
|
count += 1
|
||||||
|
|
||||||
|
url, wiki_branche, umsatz = get_wikipedia_data(row['Firmenname'])
|
||||||
|
df.at[df_idx, "Wikipedia-URL"] = url or "k.A."
|
||||||
|
df.at[df_idx, "Wikipedia-Branche"] = wiki_branche.strip('"') or "k.A."
|
||||||
|
if not df.at[df_idx, "Umsatz (Mio €)"]:
|
||||||
|
df.at[df_idx, "Umsatz (Mio €)"] = umsatz or "k.A."
|
||||||
|
|
||||||
|
wiki, linkedin, umsatz_chat, new_cat, reason, fsm_relevant, techniker, techniker_reason = classify_company(row)
|
||||||
|
df.at[df_idx, "Wikipedia-Branche"] = wiki or wiki_branche or "k.A."
|
||||||
|
df.at[df_idx, "LinkedIn-Branche"] = linkedin or "k.A."
|
||||||
|
if not df.at[df_idx, "Umsatz (Mio €)"] or df.at[df_idx, "Umsatz (Mio €)"] == "k.A.":
|
||||||
|
df.at[df_idx, "Umsatz (Mio €)"] = umsatz_chat or "k.A."
|
||||||
|
df.at[df_idx, "Empfohlene Neueinstufung"] = new_cat or "k.A."
|
||||||
|
|
||||||
|
current_cat = str(row.get("Aktuelle Einstufung") or "").strip().strip('"')
|
||||||
|
if new_cat != current_cat:
|
||||||
|
df.at[df_idx, "Begründung Neueinstufung"] = reason or "k.A."
|
||||||
|
else:
|
||||||
|
df.at[df_idx, "Begründung Neueinstufung"] = ""
|
||||||
|
|
||||||
|
df.at[df_idx, "FSM-Relevanz"] = fsm_relevant or "k.A."
|
||||||
|
df.at[df_idx, "Techniker-Einschätzung (Auto)"] = techniker or "k.A."
|
||||||
|
df.at[df_idx, "Techniker-Einschätzung (Begründung)"] = techniker_reason or "k.A."
|
||||||
|
|
||||||
|
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
df.at[df_idx, "Letzte Prüfung"] = now
|
||||||
|
|
||||||
|
sheet.update(
|
||||||
|
values=[df.loc[df_idx, [
|
||||||
|
"Wikipedia-Branche", "LinkedIn-Branche", "Umsatz (Mio €)",
|
||||||
|
"Empfohlene Neueinstufung", "Begründung Neueinstufung",
|
||||||
|
"FSM-Relevanz", "Wikipedia-URL", "Letzte Prüfung",
|
||||||
|
"Techniker-Einschätzung (Auto)", "Techniker-Einschätzung (Begründung)"
|
||||||
|
]].tolist()],
|
||||||
|
range_name=f"G{df_idx+2}:Q{df_idx+2}"
|
||||||
|
)
|
||||||
|
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
print("✅ Fertig!")
|
||||||
212
brancheneinstufung.py
Normal file
212
brancheneinstufung.py
Normal file
@@ -0,0 +1,212 @@
|
|||||||
|
import os
|
||||||
|
import time
|
||||||
|
import pandas as pd
|
||||||
|
import gspread
|
||||||
|
import openai
|
||||||
|
import wikipedia
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import requests
|
||||||
|
from oauth2client.service_account import ServiceAccountCredentials
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# === CONFIG ===
|
||||||
|
EXCEL = "Bestandsfirmen.xlsx"
|
||||||
|
SHEET_URL = "https://docs.google.com/spreadsheets/d/1u_gHr9JUfmV1-iviRzbSe3575QEp7KLhK5jFV_gJcgo"
|
||||||
|
CREDENTIALS = "service_account.json"
|
||||||
|
CHUNK = 10
|
||||||
|
LANG = "de"
|
||||||
|
|
||||||
|
# === AUTHENTICATION ===
|
||||||
|
scope = ["https://www.googleapis.com/auth/spreadsheets"]
|
||||||
|
creds = ServiceAccountCredentials.from_json_keyfile_name(CREDENTIALS, scope)
|
||||||
|
sheet = gspread.authorize(creds).open_by_url(SHEET_URL).sheet1
|
||||||
|
|
||||||
|
# OpenAI API-Key aus externer Datei laden
|
||||||
|
with open("api_key.txt", "r") as f:
|
||||||
|
openai.api_key = f.read().strip()
|
||||||
|
|
||||||
|
# === LOAD DATA ===
|
||||||
|
df = pd.read_excel(EXCEL)
|
||||||
|
for col in ["Wikipedia-URL", "Wikipedia-Branche", "LinkedIn-Branche", "Umsatz (Mio €)",
|
||||||
|
"Empfohlene Neueinstufung", "Begründung Neueinstufung", "FSM-Relevanz", "Letzte Prüfung",
|
||||||
|
"Techniker-Einschätzung (Auto)", "Techniker-Einschätzung (Begründung)", "Techniker-Einschätzung (Manuell)"]:
|
||||||
|
if col not in df.columns:
|
||||||
|
df[col] = ""
|
||||||
|
|
||||||
|
# === STARTE BEI ERSTER LEERER ZEILE IN SPALTE 'Letzte Prüfung' (Spalte N) ===
|
||||||
|
sheet_values = sheet.get_all_values()
|
||||||
|
filled_n = [row[13] if len(row) > 13 else '' for row in sheet_values[1:]]
|
||||||
|
start = next((i + 1 for i, v in enumerate(filled_n, start=1) if not str(v).strip() or str(v).lower() == 'nan'), len(filled_n) + 1)
|
||||||
|
print(f"Starte bei Zeile {start+1} (erste leere Zeile in Spalte N)")
|
||||||
|
|
||||||
|
mapping_dict = {}
|
||||||
|
wikipedia.set_lang(LANG)
|
||||||
|
|
||||||
|
# === ÜBERSETZUNGSTABELLE VORBEREITEN ===
|
||||||
|
sheet_trans_title = "Branchen-Mapping"
|
||||||
|
try:
|
||||||
|
sheet_trans = sheet.spreadsheet.worksheet(sheet_trans_title)
|
||||||
|
except gspread.exceptions.WorksheetNotFound:
|
||||||
|
sheet_trans = sheet.spreadsheet.add_worksheet(title=sheet_trans_title, rows="100", cols="3")
|
||||||
|
sheet_trans.clear()
|
||||||
|
sheet_trans.update(range_name="A1:B1", values=[["Wikipedia-Branche", "Ziel-Branchenschema"]])
|
||||||
|
|
||||||
|
# === BRANCHENSCHEMA ===
|
||||||
|
branches = [
|
||||||
|
"Hersteller / Produzenten > Maschinenbau",
|
||||||
|
"Hersteller / Produzenten > Automobil",
|
||||||
|
"Hersteller / Produzenten > Anlagenbau",
|
||||||
|
"Hersteller / Produzenten > Medizintechnik",
|
||||||
|
"Hersteller / Produzenten > Chemie & Pharma",
|
||||||
|
"Hersteller / Produzenten > Elektrotechnik",
|
||||||
|
"Hersteller / Produzenten > Lebensmittelproduktion",
|
||||||
|
"Hersteller / Produzenten > IT / Telekommunikation",
|
||||||
|
"Hersteller / Produzenten > Bürotechnik",
|
||||||
|
"Hersteller / Produzenten > Automaten (Vending, Slot)",
|
||||||
|
"Hersteller / Produzenten > Gebäudetechnik Heizung, Lüftung, Klima",
|
||||||
|
"Hersteller / Produzenten > Gebäudetechnik Allgemein",
|
||||||
|
"Hersteller / Produzenten > Schädlingsbekämpfung",
|
||||||
|
"Hersteller / Produzenten > Fertigung",
|
||||||
|
"Hersteller / Produzenten > Braune & Weiße Ware",
|
||||||
|
"Versorger > Stadtwerk",
|
||||||
|
"Versorger > Verteilnetzbetreiber",
|
||||||
|
"Versorger > Telekommunikation",
|
||||||
|
"Dienstleister > Messdienstleister",
|
||||||
|
"Dienstleister > Facility Management",
|
||||||
|
"Dienstleister > Healthcare/Pflegedienste",
|
||||||
|
"Dienstleister > Servicedienstleister / Reparatur ohne Produktion",
|
||||||
|
"Handel & Logistik > Auslieferdienste",
|
||||||
|
"Handel & Logistik > Energie (Brennstoffe)",
|
||||||
|
"Handel & Logistik > Großhandel",
|
||||||
|
"Handel & Logistik > Einzelhandel",
|
||||||
|
"Handel & Logistik > Logistik Sonstige",
|
||||||
|
"Sonstige > Unternehmensberatung (old)",
|
||||||
|
"Sonstige > Sonstige",
|
||||||
|
"Sonstige > Agrar, Pellets (old)",
|
||||||
|
"Sonstige > Sonstiger Service (old)",
|
||||||
|
"Sonstige > IT Beratung",
|
||||||
|
"Sonstige > Engineering",
|
||||||
|
"Baubranche > Baustoffhandel",
|
||||||
|
"Baubranche > Baustoffindustrie",
|
||||||
|
"Baubranche > Logistiker Baustoffe",
|
||||||
|
"Baubranche > Bauunternehmen",
|
||||||
|
"Gutachter / Versicherungen > Versicherungsgutachten",
|
||||||
|
"Gutachter / Versicherungen > Technische Gutachter",
|
||||||
|
"Gutachter / Versicherungen > Medizinische Gutachten"
|
||||||
|
]
|
||||||
|
|
||||||
|
system_prompt = {
|
||||||
|
"role": "system",
|
||||||
|
"content": (
|
||||||
|
"Du bist ein Experte für Brancheneinstufung und FSM-Potenzialbewertung. Nutze das folgende Ziel‑Branchenschema als Referenz:\n\n"
|
||||||
|
+ "\n".join(branches)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
# === WIKIPEDIA LOOKUP ===
|
||||||
|
def get_wikipedia_data(firmenname):
|
||||||
|
suchbegriffe = [firmenname.strip(), " ".join(firmenname.split()[:2])]
|
||||||
|
for suchbegriff in suchbegriffe:
|
||||||
|
try:
|
||||||
|
page = wikipedia.page(suchbegriff, auto_suggest=False)
|
||||||
|
url = page.url
|
||||||
|
html = requests.get(url).text
|
||||||
|
soup = BeautifulSoup(html, 'html.parser')
|
||||||
|
infobox = soup.find("table", {"class": "infobox"})
|
||||||
|
branche = ""
|
||||||
|
umsatz = ""
|
||||||
|
if infobox:
|
||||||
|
for row in infobox.find_all("tr"):
|
||||||
|
header = row.find("th")
|
||||||
|
data = row.find("td")
|
||||||
|
if not header or not data:
|
||||||
|
continue
|
||||||
|
if "Branche" in header.text:
|
||||||
|
branche = data.text.strip()
|
||||||
|
if "Umsatz" in header.text:
|
||||||
|
umsatz = data.text.strip()
|
||||||
|
if not branche:
|
||||||
|
cats = page.categories
|
||||||
|
branche = cats[0] if cats else ""
|
||||||
|
return url, branche, umsatz
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
return "", "", ""
|
||||||
|
|
||||||
|
# === KLASSIFIZIERUNG ===
|
||||||
|
def classify_company(row):
|
||||||
|
user_prompt = {
|
||||||
|
"role": "user",
|
||||||
|
"content": (
|
||||||
|
"Bitte prüfe die vorliegenden Informationen zum Unternehmen. Gib die Antwort im CSV-Format zurück:\n"
|
||||||
|
"Wikipedia-Branche; LinkedIn-Branche; Umsatz (Mio €); Empfohlene Neueinstufung; Begründung; FSM-Relevanz (Ja/Nein/k.A. mit Begründung); Techniker-Einschätzung (<50/>50/>100/>500); Techniker-Begründung\n\n"
|
||||||
|
f"Beschreibung: {row['Beschreibung des Unternehmens'] or ''}\n"
|
||||||
|
f"Aktuelle Einstufung: {row['Aktuelle Einstufung'] or ''}\n"
|
||||||
|
f"Externe Branchenbeschreibung: {row['Beschreibung der Branche Extern'] or ''}\n"
|
||||||
|
f"Website: {row['Website'] or ''}"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
resp = openai.chat.completions.create(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[system_prompt, user_prompt],
|
||||||
|
temperature=0
|
||||||
|
)
|
||||||
|
result = resp.choices[0].message.content.strip()
|
||||||
|
parts = [v.strip().strip('"') for v in result.split(";", 7)]
|
||||||
|
while len(parts) < 8:
|
||||||
|
parts.append("k.A.")
|
||||||
|
return parts
|
||||||
|
|
||||||
|
# === LOOP ===
|
||||||
|
for df_idx in range(start - 1, len(df)):
|
||||||
|
row = df.iloc[df_idx]
|
||||||
|
if str(row.get("Letzte Prüfung", "")).strip():
|
||||||
|
continue
|
||||||
|
|
||||||
|
print(f"[{time.strftime('%H:%M:%S')}] Verarbeite Zeile {df_idx+1}: {row['Firmenname']}")
|
||||||
|
|
||||||
|
url, wiki_branche, umsatz = get_wikipedia_data(row['Firmenname'])
|
||||||
|
df.at[df_idx, "Wikipedia-URL"] = url
|
||||||
|
df.at[df_idx, "Wikipedia-Branche"] = wiki_branche.strip('"')
|
||||||
|
if not df.at[df_idx, "Umsatz (Mio €)"]:
|
||||||
|
df.at[df_idx, "Umsatz (Mio €)"] = umsatz
|
||||||
|
|
||||||
|
wiki, linkedin, umsatz_chat, new_cat, reason, fsm_relevant, techniker, techniker_reason = classify_company(row)
|
||||||
|
df.at[df_idx, "Wikipedia-Branche"] = wiki or wiki_branche
|
||||||
|
df.at[df_idx, "LinkedIn-Branche"] = linkedin
|
||||||
|
if not df.at[df_idx, "Umsatz (Mio €)"]:
|
||||||
|
df.at[df_idx, "Umsatz (Mio €)"] = umsatz_chat
|
||||||
|
df.at[df_idx, "Empfohlene Neueinstufung"] = new_cat
|
||||||
|
|
||||||
|
current_cat = str(row.get("Aktuelle Einstufung") or "").strip().strip('"')
|
||||||
|
if new_cat != current_cat:
|
||||||
|
df.at[df_idx, "Begründung Neueinstufung"] = reason
|
||||||
|
else:
|
||||||
|
df.at[df_idx, "Begründung Neueinstufung"] = ""
|
||||||
|
|
||||||
|
df.at[df_idx, "FSM-Relevanz"] = fsm_relevant
|
||||||
|
df.at[df_idx, "Techniker-Einschätzung (Auto)"] = techniker
|
||||||
|
df.at[df_idx, "Techniker-Einschätzung (Begründung)"] = techniker_reason
|
||||||
|
|
||||||
|
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
df.at[df_idx, "Letzte Prüfung"] = now
|
||||||
|
|
||||||
|
key = df.at[df_idx, "Wikipedia-Branche"]
|
||||||
|
val = df.at[df_idx, "Empfohlene Neueinstufung"]
|
||||||
|
if key and val and key not in mapping_dict:
|
||||||
|
mapping_dict[key] = val
|
||||||
|
sheet_trans.update(range_name=f"A{len(mapping_dict)+1}:B{len(mapping_dict)+1}", values=[[key, val]])
|
||||||
|
|
||||||
|
sheet.update(
|
||||||
|
values=[df.loc[df_idx, [
|
||||||
|
"Wikipedia-Branche", "LinkedIn-Branche", "Umsatz (Mio €)",
|
||||||
|
"Empfohlene Neueinstufung", "Begründung Neueinstufung",
|
||||||
|
"FSM-Relevanz", "Wikipedia-URL", "Letzte Prüfung",
|
||||||
|
"Techniker-Einschätzung (Auto)", "Techniker-Einschätzung (Begründung)"
|
||||||
|
]].tolist()],
|
||||||
|
range_name=f"G{df_idx+2}:Q{df_idx+2}"
|
||||||
|
)
|
||||||
|
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
print("✅ Fertig!")
|
||||||
13
service_account.json
Normal file
13
service_account.json
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
{
|
||||||
|
"type": "service_account",
|
||||||
|
"project_id": "kalender-400315",
|
||||||
|
"private_key_id": "e18c9ed28d358b7c1dd33767ae40d76646d09ee5",
|
||||||
|
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCon4+bfr+DL88y\n9kuyEzB0fu20N8gfz/8VPGqsLkY3fo0mklriXYS0Ofw8nlhXKUYQd/7HF1LQDbb0\n+4BZy5W6HjAzq4BxQM9X8q72oFqU20vtvVE3PGCFZ2yek2KaamMz1rV3tg0QJHBp\nD43tDXfhqsFcQtzQ5dLS06jagpw4oFQq37nTpaAHGfarA2gIz0EiKSFIusyqyMbY\nzit1wSJvC8yS14ucIa/xEthIP5IGln2nu8OKeil2X9Wlg5+y2gnkIwAaiFuJE6u6\nyyRnljK97wjxUoeq7JGj75TVvhW8/nPEJJGVt9Df2RG/mcsCGPCw1M+BK98wnLQM\nqf6DASGvAgMBAAECggEABnwwgTn0Mp7tmJepXJ1pUSmqibJVg4ez10S+E4YqFCtN\nUWzyit8u4eGTBl/OpPeE5tA+n7iRakgEv9JGYu22AfT0RKB1baWg89B1Ey4inbP7\nbRbJX1b2nVrXbhfcBHVQAGfjDcoWIYdNPXUoL7RDZo3zJtG5fV1BaPNT/KdmS7Uq\nzShY8QNTImwDvczzGo1UzZZYz+Wh1WafzvUXJrxZUrwLgZEUAHjwC46WyCVlKlVK\nWKuc1j/gpSm6vp5QtQwIEyWHIXdfxDN61Qizt3ujOSvEZwrt70/kEl9zqfILK2S4\nDuVXUfdg7RLSfoDll8RckVd6MvvKpAs2gQHN99vymQKBgQDiaei7wvTeoEh43OR9\nmtz9uRyTtGx4xfQGQZ62Tb5z6RtU2N+uEuT+bbQ3svP5yMfIqVV0NxZjUkA6A4H0\ndQwaKYcfLGXPDK3XOERh1R12MFcHmN3GXQ3htqPwHssv1XWkWtzF71DfO2prkQhQ\nsJMHQeLSdrMhJuBDFkVxzt+XtwKBgQC+qGvV29k3CsiJdA/kvdXj2Av0MCqaesyb\nDZ95pJUitSFzCwKf4TNL8lFwnp8aH2SAFMyjVfU6lW8x97kd7LrHVoEJZ8LZUFfp\ne2nJtr7zAzWhSEXvVyFTultu8f0yCRAaQSvxtNrxzK2qKSU/i5LG4UEn5/2UXIc3\nGP7vKAcVyQKBgFtY0X4XfQXo8vRYYpNOjMSND7uD+pOLghWYGfmBSkqnjNBho3Uu\nyFbiWehy+b4YRIpvzztREmZGBAWj7qV7J8PSZ2KrXfP4MJb5a5VxDy7k9+fnw+pJ\nFtWvA1tYgS+uVoVV52awVBeMeDYn0IwFgXxXF7VEEKdacPaAIMp25XznAoGAVwlP\n4Rsx+BymG4muT8ARyqewHVN3v3nbudToPoHRyzxg/sHB+va1kjAW2ZCwFR7LpGTa\nXp45Cvt4/ZiXG4AcbC+jTIXfv5Eb8Ox0CK+yUAHhE7WIp5BhGHX0k5tI+PSGM1YZ\nF4h2gvfrN8j8xhuTb9MZCXywmG/9EjwI9WiSg3ECgYEA2EX+2xAc3zpDaTdb7h4n\nBo6lzYU2JYSeNR/fBUMqZGkBEJ/NUpwihY8kvkuxr3CvfDEEjGj2xhH8qYfiO4Pp\nUVh/dPgjq4XAibXWtVsXpploCPqYFj3lBwqFbDHPkE0HG/oXxYHRFgVss8ZOdOp6\nteiDU5B/5caV7jNk+nluzMg=\n-----END PRIVATE KEY-----\n",
|
||||||
|
"client_email": "brancheneinstufung@kalender-400315.iam.gserviceaccount.com",
|
||||||
|
"client_id": "114291880309376322726",
|
||||||
|
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||||
|
"token_uri": "https://oauth2.googleapis.com/token",
|
||||||
|
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
||||||
|
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/brancheneinstufung%40kalender-400315.iam.gserviceaccount.com",
|
||||||
|
"universe_domain": "googleapis.com"
|
||||||
|
}
|
||||||
2691
update.log
Normal file
2691
update.log
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user