v1.3.11: Spalten um +1 verschoben, Kurzform in Spalte C; alle Indizes aktualisiert

- Neue Header: Spalte C enthält jetzt die Kurzform des Firmennamens.
- Website verschoben auf Spalte D, und alle nachfolgenden Spalten um +1.
- Alle Code-Referenzen (Update-Ranges, Indexe in _process_single_row, get_start_index, process_contacts, process_contact_research) wurden entsprechend angepasst.
- Für LinkedIn-Suchen wird jetzt die Kurzform (Spalte C) bevorzugt verwendet.
This commit is contained in:
2025-04-03 14:59:39 +00:00
parent e2072bfb8b
commit 6ae09b4927

View File

@@ -14,7 +14,7 @@ import csv
# ==================== KONFIGURATION ==================== # ==================== KONFIGURATION ====================
class Config: class Config:
VERSION = "v1.3.9" # v1.3.9: Alle bisherigen Funktionen inkl. Reg. Modus, Re-Eval, Alignment, Wiki, ChatGPT, Contact Research. VERSION = "v1.3.11" # v1.3.11: Spalten um +1 verschoben, Kurzform in Spalte C; alle Referenzen angepasst.
LANG = "de" LANG = "de"
CREDENTIALS_FILE = "service_account.json" CREDENTIALS_FILE = "service_account.json"
SHEET_URL = "https://docs.google.com/spreadsheets/d/1u_gHr9JUfmV1-iviRzbSe3575QEp7KLhK5jFV_gJcgo" SHEET_URL = "https://docs.google.com/spreadsheets/d/1u_gHr9JUfmV1-iviRzbSe3575QEp7KLhK5jFV_gJcgo"
@@ -335,7 +335,11 @@ def search_linkedin_contact(company_name, website, position_query):
except Exception as e: except Exception as e:
debug_print("Fehler beim Lesen des SerpAPI-Schlüssels: " + str(e)) debug_print("Fehler beim Lesen des SerpAPI-Schlüssels: " + str(e))
return None return None
query = f'site:linkedin.com/in "{position_query}" "{company_name}"' # Nutze hier die Kurzform, falls vorhanden (Spalte C, Index 2); ansonsten Firmenname (Index 1)
search_name = company_name
if company_name == "" and website != "":
search_name = website
query = f'site:linkedin.com/in "{position_query}" "{search_name}"'
debug_print(f"Erstelle LinkedIn-Query: {query}") debug_print(f"Erstelle LinkedIn-Query: {query}")
params = { params = {
"engine": "google", "engine": "google",
@@ -378,7 +382,7 @@ def search_linkedin_contact(company_name, website, position_query):
debug_print(f"Fehler bei der SerpAPI-Suche: {e}") debug_print(f"Fehler bei der SerpAPI-Suche: {e}")
return None return None
# ==================== NEUE FUNKTION: ZÄHLEN DER LINKEDIN-KONTAKTE (für Contact Research) ==================== # ==================== NEUE FUNKTION: ZÄHLEN DER LINKEDIN-KONTAKTE ====================
def count_linkedin_contacts(company_name, website, position_query): def count_linkedin_contacts(company_name, website, position_query):
try: try:
with open("serpApiKey.txt", "r") as f: with open("serpApiKey.txt", "r") as f:
@@ -416,73 +420,91 @@ def process_contact_research():
sh = gc.open_by_url(Config.SHEET_URL) sh = gc.open_by_url(Config.SHEET_URL)
main_sheet = sh.sheet1 main_sheet = sh.sheet1
data = main_sheet.get_all_values() data = main_sheet.get_all_values()
# Für jeden Datensatz werden für vier Kategorien die Trefferanzahl ermittelt: # Website ist nun in Spalte D (Index 3), Firmenname in Spalte B (Index 1)
for i, row in enumerate(data[1:], start=2): for i, row in enumerate(data[1:], start=2):
company_name = row[1] if len(row) > 1 else "" company_name = row[1] if len(row) > 1 else ""
website = row[2] if len(row) > 2 else "" # Verwende die Kurzform (Spalte C, Index 2) für die Suche, wenn vorhanden, ansonsten Firmenname
search_name = row[2].strip() if len(row) > 2 and row[2].strip() not in ["", "k.A."] else company_name
website = row[3] if len(row) > 3 else ""
if not company_name or not website: if not company_name or not website:
continue continue
count_service = count_linkedin_contacts(company_name, website, "Serviceleiter") count_service = count_linkedin_contacts(search_name, website, "Serviceleiter")
count_it = count_linkedin_contacts(company_name, website, "IT-Leiter") count_it = count_linkedin_contacts(search_name, website, "IT-Leiter")
count_management = count_linkedin_contacts(company_name, website, "Geschäftsführer") count_management = count_linkedin_contacts(search_name, website, "Geschäftsführer")
count_disponent = count_linkedin_contacts(company_name, website, "Disponent") count_disponent = count_linkedin_contacts(search_name, website, "Disponent")
current_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S") current_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# Verwende die korrekte Parameterreihenfolge: Werte zuerst, dann Range-Name main_sheet.update(values=[[str(count_service)]], range_name=f"AH{i}")
main_sheet.update([[str(count_service)]], f"AH{i}") main_sheet.update(values=[[str(count_it)]], range_name=f"AI{i}")
main_sheet.update([[str(count_it)]], f"AI{i}") main_sheet.update(values=[[str(count_management)]], range_name=f"AJ{i}")
main_sheet.update([[str(count_management)]], f"AJ{i}") main_sheet.update(values=[[str(count_disponent)]], range_name=f"AK{i}")
main_sheet.update([[str(count_disponent)]], f"AK{i}") main_sheet.update(values=[[current_dt]], range_name=f"AL{i}")
main_sheet.update([[current_dt]], f"AL{i}") debug_print(f"Zeile {i}: Serviceleiter {count_service}, IT-Leiter {count_it}, Management {count_management}, Disponent {count_disponent} Contact Search Timestamp gesetzt.")
debug_print(f"Zeile {i}: Serviceleiter {count_service}, IT-Leiter {count_it}, Management {count_management}, Disponent {count_disponent} Timestamp gesetzt.")
time.sleep(Config.RETRY_DELAY * 1.5) time.sleep(Config.RETRY_DELAY * 1.5)
debug_print("Contact Research abgeschlossen.") debug_print("Contact Research abgeschlossen.")
# ==================== ALIGNMENT DEMO (Modus 3) ==================== # ==================== NEUER MODUS: ALIGNMENT DEMO (für Hauptblatt und Contacts) ====================
def alignment_demo_full():
alignment_demo(GoogleSheetHandler().sheet)
gc = gspread.authorize(ServiceAccountCredentials.from_json_keyfile_name(
Config.CREDENTIALS_FILE, ["https://www.googleapis.com/auth/spreadsheets"]))
sh = gc.open_by_url(Config.SHEET_URL)
try:
contacts_sheet = sh.worksheet("Contacts")
except gspread.exceptions.WorksheetNotFound:
contacts_sheet = sh.add_worksheet(title="Contacts", rows="1000", cols="10")
header = ["Firmenname", "Website", "Kurzform", "Vorname", "Nachname", "Position", "Anrede", "E-Mail"]
contacts_sheet.update(values=[header], range_name="A1:H1")
debug_print("Neues Blatt 'Contacts' erstellt und Header eingetragen.")
alignment_demo(contacts_sheet)
debug_print("Alignment-Demo für Hauptblatt und Contacts abgeschlossen.")
# ==================== ALIGNMENT DEMO (Hauptblatt) ====================
def alignment_demo(sheet): def alignment_demo(sheet):
new_headers = [ new_headers = [
"Spalte A (ReEval Flag)", "Spalte A (ReEval Flag)",
"Spalte B (Firmenname)", "Spalte B (Firmenname)",
"Spalte C (Website)", "Spalte C (Kurzform des Firmennamens)",
"Spalte D (Ort)", "Spalte D (Website)",
"Spalte E (Beschreibung)", "Spalte E (Ort)",
"Spalte F (Aktuelle Branche)", "Spalte F (Beschreibung)",
"Spalte G (Beschreibung Branche extern)", "Spalte G (Aktuelle Branche)",
"Spalte H (Anzahl Techniker CRM)", "Spalte H (Beschreibung Branche extern)",
"Spalte I (Umsatz CRM)", "Spalte I (Anzahl Techniker CRM)",
"Spalte J (Anzahl Mitarbeiter CRM)", "Spalte J (Umsatz CRM)",
"Spalte K (Vorschlag Wiki URL)", "Spalte K (Anzahl Mitarbeiter CRM)",
"Spalte L (Wikipedia URL)", "Spalte L (Vorschlag Wiki URL)",
"Spalte M (Wikipedia Absatz)", "Spalte M (Wikipedia URL)",
"Spalte N (Wikipedia Branche)", "Spalte N (Wikipedia Absatz)",
"Spalte O (Wikipedia Umsatz)", "Spalte O (Wikipedia Branche)",
"Spalte P (Wikipedia Mitarbeiter)", "Spalte P (Wikipedia Umsatz)",
"Spalte Q (Wikipedia Kategorien)", "Spalte Q (Wikipedia Mitarbeiter)",
"Spalte R (Konsistenzprüfung)", "Spalte R (Wikipedia Kategorien)",
"Spalte S (Begründung bei Inkonsistenz)", "Spalte S (Konsistenzprüfung)",
"Spalte T (Vorschlag Wiki Artikel ChatGPT)", "Spalte T (Begründung bei Inkonsistenz)",
"Spalte U (Begründung bei Abweichung)", "Spalte U (Vorschlag Wiki Artikel ChatGPT)",
"Spalte V (Vorschlag neue Branche)", "Spalte V (Begründung bei Abweichung)",
"Spalte W (Konsistenzprüfung Branche)", "Spalte W (Vorschlag neue Branche)",
"Spalte X (Begründung Abweichung Branche)", "Spalte X (Konsistenzprüfung Branche)",
"Spalte Y (FSM Relevanz Ja / Nein)", "Spalte Y (Begründung Abweichung Branche)",
"Spalte Z (Begründung für FSM Relevanz)", "Spalte Z (FSM Relevanz Ja / Nein)",
"Spalte AA (Schätzung Anzahl Mitarbeiter)", "Spalte AA (Begründung für FSM Relevanz)",
"Spalte AB (Konsistenzprüfung Mitarbeiterzahl)", "Spalte AB (Schätzung Anzahl Mitarbeiter)",
"Spalte AC (Begründung für Abweichung Mitarbeiterzahl)", "Spalte AC (Konsistenzprüfung Mitarbeiterzahl)",
"Spalte AD (Einschätzung Anzahl Servicetechniker)", "Spalte AD (Begründung für Abweichung Mitarbeiterzahl)",
"Spalte AE (Begründung bei Abweichung Anzahl Servicetechniker)", "Spalte AE (Einschätzung Anzahl Servicetechniker)",
"Spalte AF (Schätzung Umsatz ChatGPT)", "Spalte AF (Begründung bei Abweichung Anzahl Servicetechniker)",
"Spalte AG (Begründung für Abweichung Umsatz)", "Spalte AG (Schätzung Umsatz ChatGPT)",
"Spalte AH (Serviceleiter gefunden)", "Spalte AH (Begründung für Abweichung Umsatz)",
"Spalte AI (IT-Leiter gefunden)", "Spalte AI (Serviceleiter gefunden)",
"Spalte AJ (Management gefunden)", "Spalte AJ (IT-Leiter gefunden)",
"Spalte AK (Disponent gefunden)", "Spalte AK (Management gefunden)",
"Spalte AL (Contact Search Timestamp)", "Spalte AL (Disponent gefunden)",
"Spalte AM (Wikipedia Timestamp)", "Spalte AM (Contact Search Timestamp)",
"Spalte AN (ChatGPT Timestamp)", "Spalte AN (Wikipedia Timestamp)",
"Spalte AO (Version)" "Spalte AO (ChatGPT Timestamp)",
"Spalte AP (Version)"
] ]
header_range = "A11200:AO11200" header_range = "A11200:AP11200"
sheet.update(values=[new_headers], range_name=header_range) sheet.update(values=[new_headers], range_name=header_range)
print("Alignment-Demo abgeschlossen: Neue Spaltenüberschriften in Zeile 11200 geschrieben.") print("Alignment-Demo abgeschlossen: Neue Spaltenüberschriften in Zeile 11200 geschrieben.")
@@ -680,7 +702,8 @@ class GoogleSheetHandler:
self.sheet = gspread.authorize(creds).open_by_url(Config.SHEET_URL).sheet1 self.sheet = gspread.authorize(creds).open_by_url(Config.SHEET_URL).sheet1
self.sheet_values = self.sheet.get_all_values() self.sheet_values = self.sheet.get_all_values()
def get_start_index(self): def get_start_index(self):
filled_n = [row[38] if len(row) > 38 else '' for row in self.sheet_values[1:]] # Spalte AM = Wikipedia Timestamp # Wikipedia Timestamp ist jetzt in Spalte AN (Index 39)
filled_n = [row[39] if len(row) > 39 else '' for row in self.sheet_values[1:]]
return next((i + 1 for i, v in enumerate(filled_n, start=1) if not str(v).strip()), len(filled_n) + 1) return next((i + 1 for i, v in enumerate(filled_n, start=1) if not str(v).strip()), len(filled_n) + 1)
# ==================== DATA PROCESSOR ==================== # ==================== DATA PROCESSOR ====================
@@ -689,27 +712,27 @@ class DataProcessor:
self.sheet_handler = GoogleSheetHandler() self.sheet_handler = GoogleSheetHandler()
self.wiki_scraper = WikipediaScraper() self.wiki_scraper = WikipediaScraper()
def process_rows(self, num_rows=None): def process_rows(self, num_rows=None):
# MODE 1: Regulärer Modus nur Zeilen ohne entsprechende Timestamps werden bearbeitet
if MODE == "2": if MODE == "2":
print("Re-Evaluierungsmodus: Verarbeitung aller Zeilen mit 'x' in Spalte A.") print("Re-Evaluierungsmodus: Verarbeitung aller Zeilen mit 'x' in Spalte A.")
for i, row in enumerate(self.sheet_handler.sheet_values[1:], start=2): for i, row in enumerate(self.sheet_handler.sheet_values[1:], start=2):
if row[0].strip().lower() == "x": if row[0].strip().lower() == "x":
self._process_single_row(i, row, force_all=True) self._process_single_row(i, row, force_all=True)
elif MODE == "3": elif MODE == "3":
print("Alignment-Demo-Modus: Schreibe neue Spaltenüberschriften in Zeile 11200.") print("Alignment-Demo-Modus: Schreibe neue Spaltenüberschriften in Hauptblatt und Contacts.")
alignment_demo(self.sheet_handler.sheet) alignment_demo_full()
elif MODE == "4": elif MODE == "4":
# Nur Wikipedia-Suche: nur Zeilen ohne Wikipedia-Timestamp (Spalte AM, Index 38) processor = DataProcessor()
for i, row in enumerate(self.sheet_handler.sheet_values[1:], start=2): for i, row in enumerate(processor.sheet_handler.sheet_values[1:], start=2):
if len(row) <= 38 or row[38].strip() == "": # Nur Zeilen ohne Wikipedia-Timestamp (Spalte AN, Index 39)
self._process_single_row(i, row, process_wiki=True, process_chatgpt=False)
elif MODE == "5":
# Nur ChatGPT Bewertung: nur Zeilen ohne ChatGPT-Timestamp (Spalte AN, Index 39)
for i, row in enumerate(self.sheet_handler.sheet_values[1:], start=2):
if len(row) <= 39 or row[39].strip() == "": if len(row) <= 39 or row[39].strip() == "":
self._process_single_row(i, row, process_wiki=False, process_chatgpt=True) processor._process_single_row(i, row, process_wiki=True, process_chatgpt=False)
elif MODE == "5":
processor = DataProcessor()
# Nur Zeilen ohne ChatGPT-Timestamp (Spalte AO, Index 40)
for i, row in enumerate(processor.sheet_handler.sheet_values[1:], start=2):
if len(row) <= 40 or row[40].strip() == "":
processor._process_single_row(i, row, process_wiki=False, process_chatgpt=True)
else: else:
# Regulärer Modus: Bearbeite nur Zeilen, die noch nicht vollständig bewertet wurden
start_index = self.sheet_handler.get_start_index() start_index = self.sheet_handler.get_start_index()
print(f"Starte bei Zeile {start_index+1}") print(f"Starte bei Zeile {start_index+1}")
rows_processed = 0 rows_processed = 0
@@ -720,93 +743,94 @@ class DataProcessor:
break break
self._process_single_row(i, row) self._process_single_row(i, row)
rows_processed += 1 rows_processed += 1
def _process_single_row(self, row_num, row_data, force_all=False, process_wiki=True, process_chatgpt=True): def _process_single_row(self, row_num, row_data, force_all=False, process_wiki=True, process_chatgpt=True):
company_name = row_data[1] if len(row_data) > 1 else "" # Spalte B: Firmenname, Spalte C: Kurzform, Spalte D: Website
website = row_data[2] if len(row_data) > 2 else "" company_name = row_data[1] if len(row_data) > 1 else ""
wiki_update_range = f"K{row_num}:Q{row_num}" website = row_data[3] if len(row_data) > 3 else ""
dt_wiki_range = f"AM{row_num}" # Wikipedia Timestamp wiki_update_range = f"L{row_num}:R{row_num}" # Vorschlag Wiki URL bis Wikipedia Kategorien (Spalte L bis R)
dt_chat_range = f"AN{row_num}" # ChatGPT Timestamp dt_wiki_range = f"AN{row_num}" # Wikipedia Timestamp (Spalte AN)
ver_range = f"AO{row_num}" # Version dt_chat_range = f"AO{row_num}" # ChatGPT Timestamp (Spalte AO)
print(f"\n[{datetime.now().strftime('%H:%M:%S')}] Verarbeite Zeile {row_num}: {company_name}") ver_range = f"AP{row_num}" # Version (Spalte AP)
current_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S") print(f"\n[{datetime.now().strftime('%H:%M:%S')}] Verarbeite Zeile {row_num}: {company_name}")
current_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# Wikipedia-Teil: Wird ausgeführt, wenn process_wiki True ist oder force_all aktiv ist.
if force_all or process_wiki: # Wikipedia-Teil
# Hier zuerst prüfen, ob wir bereits einen Wiki-Timestamp haben (Spalte AM) if force_all or process_wiki:
if len(row_data) <= 38 or row_data[38].strip() == "": if len(row_data) <= 39 or row_data[39].strip() == "":
# Führe die Wikipedia-Auswertung durch if len(row_data) > 11 and row_data[11].strip() not in ["", "k.A."]:
if len(row_data) > 10 and row_data[10].strip() not in ["", "k.A."]: wiki_url = row_data[11].strip()
wiki_url = row_data[10].strip() try:
try: wiki_data = self.wiki_scraper.extract_company_data(wiki_url)
wiki_data = self.wiki_scraper.extract_company_data(wiki_url) except Exception as e:
except Exception as e: debug_print(f"Fehler beim Laden des vorgeschlagenen Wikipedia-Artikels: {e}")
debug_print(f"Fehler beim Laden des vorgeschlagenen Wikipedia-Artikels: {e}") article = self.wiki_scraper.search_company_article(company_name, website)
wiki_data = self.wiki_scraper.extract_company_data(article.url) if article else {
'url': 'k.A.', 'first_paragraph': 'k.A.', 'branche': 'k.A.',
'umsatz': 'k.A.', 'mitarbeiter': 'k.A.', 'categories': 'k.A.',
'full_infobox': 'k.A.'
}
else:
article = self.wiki_scraper.search_company_article(company_name, website) article = self.wiki_scraper.search_company_article(company_name, website)
wiki_data = self.wiki_scraper.extract_company_data(article.url) if article else { wiki_data = self.wiki_scraper.extract_company_data(article.url) if article else {
'url': 'k.A.', 'first_paragraph': 'k.A.', 'branche': 'k.A.', 'url': 'k.A.', 'first_paragraph': 'k.A.', 'branche': 'k.A.',
'umsatz': 'k.A.', 'mitarbeiter': 'k.A.', 'categories': 'k.A.', 'umsatz': 'k.A.', 'mitarbeiter': 'k.A.', 'categories': 'k.A.',
'full_infobox': 'k.A.' 'full_infobox': 'k.A.'
} }
wiki_values = [
row_data[11] if len(row_data) > 11 and row_data[11].strip() not in ["", "k.A."] else "k.A.",
wiki_data.get('url', 'k.A.'),
wiki_data.get('first_paragraph', 'k.A.'),
wiki_data.get('branche', 'k.A.'),
wiki_data.get('umsatz', 'k.A.'),
wiki_data.get('mitarbeiter', 'k.A.'),
wiki_data.get('categories', 'k.A.')
]
self.sheet_handler.sheet.update(values=[wiki_values], range_name=wiki_update_range)
self.sheet_handler.sheet.update(values=[[current_dt]], range_name=dt_wiki_range)
else: else:
article = self.wiki_scraper.search_company_article(company_name, website) debug_print(f"Zeile {row_num}: Wikipedia-Timestamp bereits gesetzt überspringe Wiki-Auswertung.")
wiki_data = self.wiki_scraper.extract_company_data(article.url) if article else {
'url': 'k.A.', 'first_paragraph': 'k.A.', 'branche': 'k.A.', # ChatGPT-Teil
'umsatz': 'k.A.', 'mitarbeiter': 'k.A.', 'categories': 'k.A.', if force_all or process_chatgpt:
'full_infobox': 'k.A.' if len(row_data) <= 40 or row_data[40].strip() == "":
} # Umsatz CRM ist nun in Spalte J (Index 9), Anzahl Mitarbeiter in Spalte K (Index 10)
wiki_values = [ crm_umsatz = row_data[9] if len(row_data) > 9 else "k.A."
row_data[10] if len(row_data) > 10 and row_data[10].strip() not in ["", "k.A."] else "k.A.", abgleich_result = compare_umsatz_values(crm_umsatz, wiki_data.get('umsatz', 'k.A.') if 'wiki_data' in locals() else "k.A.")
wiki_data.get('url', 'k.A.'), self.sheet_handler.sheet.update(values=[[abgleich_result]], range_name=f"AG{row_num}")
wiki_data.get('first_paragraph', 'k.A.'), # CRM-Daten: von Spalte B bis K (Indices 1 bis 10)
wiki_data.get('branche', 'k.A.'), crm_data = ";".join(row_data[1:11])
wiki_data.get('umsatz', 'k.A.'), # Wiki-Daten: von Spalte L bis R (Indices 11 bis 18)
wiki_data.get('mitarbeiter', 'k.A.'), wiki_data_str = ";".join(row_data[11:18])
wiki_data.get('categories', 'k.A.') valid_result = validate_article_with_chatgpt(crm_data, wiki_data_str)
] self.sheet_handler.sheet.update(values=[[valid_result]], range_name=f"R{row_num}")
self.sheet_handler.sheet.update(values=[wiki_values], range_name=wiki_update_range) fsm_result = evaluate_fsm_suitability(company_name, wiki_data if 'wiki_data' in locals() else {})
self.sheet_handler.sheet.update(values=[[current_dt]], range_name=dt_wiki_range) self.sheet_handler.sheet.update(values=[[fsm_result["suitability"]]], range_name=f"Y{row_num}")
else: self.sheet_handler.sheet.update(values=[[fsm_result["justification"]]], range_name=f"Z{row_num}")
debug_print(f"Zeile {row_num}: Wikipedia-Timestamp bereits gesetzt überspringe Wiki-Auswertung.") st_estimate = evaluate_servicetechnicians_estimate(company_name, wiki_data if 'wiki_data' in locals() else {})
self.sheet_handler.sheet.update(values=[[st_estimate]], range_name=f"AE{row_num}")
# ChatGPT-Teil: Wird nur ausgeführt, wenn process_chatgpt True ist oder force_all aktiv ist. internal_value = row_data[8] if len(row_data) > 8 else "k.A." # Anzahl Techniker CRM in Spalte I (Index 8)
if force_all or process_chatgpt: internal_category = map_internal_technicians(internal_value) if internal_value != "k.A." else "k.A."
# Hier prüfen, ob bereits ein ChatGPT-Timestamp in Spalte AN vorliegt if internal_category != "k.A." and st_estimate != internal_category:
if len(row_data) <= 39 or row_data[39].strip() == "": explanation = evaluate_servicetechnicians_explanation(company_name, st_estimate, wiki_data if 'wiki_data' in locals() else {})
crm_umsatz = row_data[8] if len(row_data) > 8 else "k.A." discrepancy = explanation
abgleich_result = compare_umsatz_values(crm_umsatz, wiki_data.get('umsatz', 'k.A.') if 'wiki_data' in locals() else "k.A.") else:
self.sheet_handler.sheet.update(values=[[abgleich_result]], range_name=f"AG{row_num}") discrepancy = "ok"
crm_data = ";".join(row_data[1:10]) self.sheet_handler.sheet.update(values=[[discrepancy]], range_name=f"AF{row_num}")
wiki_data_str = ";".join(row_data[11:17]) self.sheet_handler.sheet.update(values=[[current_dt]], range_name=dt_chat_range)
valid_result = validate_article_with_chatgpt(crm_data, wiki_data_str)
self.sheet_handler.sheet.update(values=[[valid_result]], range_name=f"R{row_num}")
fsm_result = evaluate_fsm_suitability(company_name, wiki_data if 'wiki_data' in locals() else {})
self.sheet_handler.sheet.update(values=[[fsm_result["suitability"]]], range_name=f"Y{row_num}")
self.sheet_handler.sheet.update(values=[[fsm_result["justification"]]], range_name=f"Z{row_num}")
st_estimate = evaluate_servicetechnicians_estimate(company_name, wiki_data if 'wiki_data' in locals() else {})
self.sheet_handler.sheet.update(values=[[st_estimate]], range_name=f"AD{row_num}")
internal_value = row_data[7] if len(row_data) > 7 else "k.A."
internal_category = map_internal_technicians(internal_value) if internal_value != "k.A." else "k.A."
if internal_category != "k.A." and st_estimate != internal_category:
explanation = evaluate_servicetechnicians_explanation(company_name, st_estimate, wiki_data if 'wiki_data' in locals() else {})
discrepancy = explanation
else: else:
discrepancy = "ok" debug_print(f"Zeile {row_num}: ChatGPT-Timestamp bereits gesetzt überspringe ChatGPT-Auswertung.")
self.sheet_handler.sheet.update(values=[[discrepancy]], range_name=f"AE{row_num}")
self.sheet_handler.sheet.update(values=[[current_dt]], range_name=f"AN{row_num}") # Aktualisiere letzten Timestamp und Version (Spalte AP)
else: self.sheet_handler.sheet.update(values=[[current_dt]], range_name=f"AP{row_num}")
debug_print(f"Zeile {row_num}: ChatGPT-Timestamp bereits gesetzt überspringe ChatGPT-Auswertung.") self.sheet_handler.sheet.update(values=[[Config.VERSION]], range_name=f"AP{row_num}")
debug_print(f"✅ Aktualisiert: URL: {(wiki_data.get('url', 'k.A.') if 'wiki_data' in locals() else 'k.A.')}, "
# Aktualisiere letzten Timestamp und Version (Spalte AO) f"Branche: {(wiki_data.get('branche', 'k.A.') if 'wiki_data' in locals() else 'k.A.')}, "
self.sheet_handler.sheet.update(values=[[current_dt]], range_name=f"AO{row_num}") f"Umsatz-Abgleich: {abgleich_result if 'abgleich_result' in locals() else 'k.A.'}, "
self.sheet_handler.sheet.update(values=[[Config.VERSION]], range_name=f"AO{row_num}") f"Validierung: {valid_result if 'valid_result' in locals() else 'k.A.'}, "
debug_print(f"✅ Aktualisiert: URL: {(wiki_data.get('url', 'k.A.') if 'wiki_data' in locals() else 'k.A.')}, " f"FSM: {fsm_result['suitability'] if 'fsm_result' in locals() else 'k.A.'}, "
f"Branche: {(wiki_data.get('branche', 'k.A.') if 'wiki_data' in locals() else 'k.A.')}, " f"Servicetechniker-Schätzung: {st_estimate if 'st_estimate' in locals() else 'k.A.'}")
f"Umsatz-Abgleich: {abgleich_result if 'abgleich_result' in locals() else 'k.A.'}, " time.sleep(Config.RETRY_DELAY)
f"Validierung: {valid_result if 'valid_result' in locals() else 'k.A.'}, "
f"FSM: {fsm_result['suitability'] if 'fsm_result' in locals() else 'k.A.'}, "
f"Servicetechniker-Schätzung: {st_estimate if 'st_estimate' in locals() else 'k.A.'}")
time.sleep(Config.RETRY_DELAY)
# ==================== NEUER MODUS 6: CONTACT RESEARCH (via SerpAPI) ==================== # ==================== NEUER MODUS 6: CONTACT RESEARCH (via SerpAPI) ====================
def process_contact_research(): def process_contact_research():
debug_print("Starte Contact Research (Modus 6)...") debug_print("Starte Contact Research (Modus 6)...")
@@ -815,31 +839,31 @@ def process_contact_research():
sh = gc.open_by_url(Config.SHEET_URL) sh = gc.open_by_url(Config.SHEET_URL)
main_sheet = sh.sheet1 main_sheet = sh.sheet1
data = main_sheet.get_all_values() data = main_sheet.get_all_values()
# Für jeden Datensatz werden für vier Kategorien die Trefferanzahl ermittelt: # Website ist nun in Spalte D (Index 3); Firmenname in Spalte B; Kurzform in Spalte C
for i, row in enumerate(data[1:], start=2): for i, row in enumerate(data[1:], start=2):
company_name = row[1] if len(row) > 1 else "" company_name = row[1] if len(row) > 1 else ""
website = row[2] if len(row) > 2 else "" # Verwende Kurzform (Spalte C, Index 2) falls vorhanden, sonst Firmenname
search_name = row[2].strip() if len(row) > 2 and row[2].strip() not in ["", "k.A."] else company_name
website = row[3] if len(row) > 3 else ""
if not company_name or not website: if not company_name or not website:
continue continue
count_service = count_linkedin_contacts(company_name, website, "Serviceleiter") count_service = count_linkedin_contacts(search_name, website, "Serviceleiter")
count_it = count_linkedin_contacts(company_name, website, "IT-Leiter") count_it = count_linkedin_contacts(search_name, website, "IT-Leiter")
count_management = count_linkedin_contacts(company_name, website, "Geschäftsführer") count_management = count_linkedin_contacts(search_name, website, "Geschäftsführer")
count_disponent = count_linkedin_contacts(company_name, website, "Disponent") count_disponent = count_linkedin_contacts(search_name, website, "Disponent")
current_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S") current_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
main_sheet.update(values=[[str(count_service)]], range_name=f"AH{i}") main_sheet.update(values=[[str(count_service)]], range_name=f"AI{i}") # Neu: Spalte AI (Serviceleiter gefunden) vorher AH -> jetzt AI
main_sheet.update(f"AI{i}", [[str(count_it)]]) main_sheet.update(values=[[str(count_it)]], range_name=f"AJ{i}") # IT-Leiter gefunden in Spalte AJ
main_sheet.update(f"AJ{i}", [[str(count_management)]]) main_sheet.update(values=[[str(count_management)]], range_name=f"AK{i}") # Management gefunden in Spalte AK
main_sheet.update(f"AK{i}", [[str(count_disponent)]]) main_sheet.update(values=[[str(count_disponent)]], range_name=f"AL{i}") # Disponent gefunden in Spalte AL
main_sheet.update(f"AL{i}", [[current_dt]]) main_sheet.update(values=[[current_dt]], range_name=f"AM{i}") # Contact Search Timestamp in Spalte AM
debug_print(f"Zeile {i}: Serviceleiter {count_service}, IT-Leiter {count_it}, Management {count_management}, Disponent {count_disponent} Contact Search Timestamp gesetzt.") debug_print(f"Zeile {i}: Serviceleiter {count_service}, IT-Leiter {count_it}, Management {count_management}, Disponent {count_disponent} Contact Search Timestamp gesetzt.")
time.sleep(Config.RETRY_DELAY * 1.5) time.sleep(Config.RETRY_DELAY * 1.5)
debug_print("Contact Research abgeschlossen.") debug_print("Contact Research abgeschlossen.")
# ==================== NEUER MODUS: ALIGNMENT DEMO (für Hauptblatt und Contacts) ==================== # ==================== NEUER MODUS: ALIGNMENT DEMO (für Hauptblatt und Contacts) ====================
def alignment_demo_full(): def alignment_demo_full():
# Aktualisiere Hauptblatt
alignment_demo(GoogleSheetHandler().sheet) alignment_demo(GoogleSheetHandler().sheet)
# Aktualisiere auch das Contacts-Blatt
gc = gspread.authorize(ServiceAccountCredentials.from_json_keyfile_name( gc = gspread.authorize(ServiceAccountCredentials.from_json_keyfile_name(
Config.CREDENTIALS_FILE, ["https://www.googleapis.com/auth/spreadsheets"])) Config.CREDENTIALS_FILE, ["https://www.googleapis.com/auth/spreadsheets"]))
sh = gc.open_by_url(Config.SHEET_URL) sh = gc.open_by_url(Config.SHEET_URL)
@@ -847,11 +871,54 @@ def alignment_demo_full():
contacts_sheet = sh.worksheet("Contacts") contacts_sheet = sh.worksheet("Contacts")
except gspread.exceptions.WorksheetNotFound: except gspread.exceptions.WorksheetNotFound:
contacts_sheet = sh.add_worksheet(title="Contacts", rows="1000", cols="10") contacts_sheet = sh.add_worksheet(title="Contacts", rows="1000", cols="10")
header = ["Firmenname", "Website", "Vorname", "Nachname", "Position", "Anrede", "E-Mail"] header = ["Firmenname", "Website", "Kurzform", "Vorname", "Nachname", "Position", "Anrede", "E-Mail"]
contacts_sheet.update("A1:G1", [header]) contacts_sheet.update(values=[header], range_name="A1:H1")
debug_print("Neues Blatt 'Contacts' erstellt und Header eingetragen.")
alignment_demo(contacts_sheet) alignment_demo(contacts_sheet)
debug_print("Alignment-Demo für Hauptblatt und Contacts abgeschlossen.") debug_print("Alignment-Demo für Hauptblatt und Contacts abgeschlossen.")
# ==================== NEUER MODUS: CONTACTS (LinkedIn) ====================
def process_contacts():
debug_print("Starte LinkedIn-Kontaktsuche...")
gc = gspread.authorize(ServiceAccountCredentials.from_json_keyfile_name(
Config.CREDENTIALS_FILE, ["https://www.googleapis.com/auth/spreadsheets"]))
sh = gc.open_by_url(Config.SHEET_URL)
try:
contacts_sheet = sh.worksheet("Contacts")
except gspread.exceptions.WorksheetNotFound:
contacts_sheet = sh.add_worksheet(title="Contacts", rows="1000", cols="10")
header = ["Firmenname", "Website", "Kurzform", "Vorname", "Nachname", "Position", "Anrede", "E-Mail"]
contacts_sheet.update(values=[header], range_name="A1:H1")
debug_print("Neues Blatt 'Contacts' erstellt und Header eingetragen.")
main_sheet = sh.sheet1
data = main_sheet.get_all_values()
positions = ["Serviceleiter", "IT-Leiter", "Leiter After Sales", "Leiter Einsatzplanung"]
new_rows = []
for idx, row in enumerate(data[1:], start=2):
# Firmenname in Spalte B (Index 1), Kurzform in Spalte C (Index 2), Website in Spalte D (Index 3)
company_name = row[1] if len(row) > 1 else ""
search_name = row[2].strip() if len(row) > 2 and row[2].strip() not in ["", "k.A."] else company_name
website = row[3] if len(row) > 3 else ""
debug_print(f"Verarbeite Firma: '{company_name}' (Zeile {idx}), Website: '{website}'")
if not company_name or not website:
debug_print("Überspringe, da Firmenname oder Website fehlt.")
continue
for pos in positions:
debug_print(f"Suche nach Position: '{pos}' bei '{search_name}'")
contact = search_linkedin_contact(search_name, website, pos)
if contact:
debug_print(f"Kontakt gefunden: {contact}")
new_rows.append([contact["Firmenname"], website, search_name, contact["Vorname"], contact["Nachname"], contact["Position"], "", ""])
else:
debug_print(f"Kein Kontakt für Position '{pos}' bei '{search_name}' gefunden.")
if new_rows:
last_row = len(contacts_sheet.get_all_values()) + 1
range_str = f"A{last_row}:H{last_row + len(new_rows) - 1}"
contacts_sheet.update(values=new_rows, range_name=range_str)
debug_print(f"{len(new_rows)} Kontakte in 'Contacts' hinzugefügt.")
else:
debug_print("Keine Kontakte gefunden in der Haupttabelle.")
# ==================== MAIN PROGRAMM ==================== # ==================== MAIN PROGRAMM ====================
if __name__ == "__main__": if __name__ == "__main__":
print("Modi:") print("Modi:")
@@ -861,6 +928,7 @@ if __name__ == "__main__":
print("4 = Nur Wikipedia-Suche (Zeilen ohne Wikipedia-Timestamp)") print("4 = Nur Wikipedia-Suche (Zeilen ohne Wikipedia-Timestamp)")
print("5 = Nur ChatGPT-Bewertung (Zeilen ohne ChatGPT-Timestamp)") print("5 = Nur ChatGPT-Bewertung (Zeilen ohne ChatGPT-Timestamp)")
print("6 = Contact Research (via SerpAPI)") print("6 = Contact Research (via SerpAPI)")
print("7 = Contacts (LinkedIn) Kontakte in das Contacts-Blatt schreiben")
mode_input = input("Wählen Sie den Modus: ").strip() mode_input = input("Wählen Sie den Modus: ").strip()
if mode_input == "2": if mode_input == "2":
MODE = "2" MODE = "2"
@@ -872,6 +940,8 @@ if __name__ == "__main__":
MODE = "5" MODE = "5"
elif mode_input == "6": elif mode_input == "6":
MODE = "6" MODE = "6"
elif mode_input == "7":
MODE = "7"
else: else:
MODE = "1" MODE = "1"
if MODE == "1": if MODE == "1":
@@ -887,13 +957,16 @@ if __name__ == "__main__":
processor.process_rows() processor.process_rows()
elif MODE == "4": elif MODE == "4":
processor = DataProcessor() processor = DataProcessor()
# Für Mode 4: Nur Wikipedia-Suche for i, row in enumerate(processor.sheet_handler.sheet_values[1:], start=2):
processor.process_rows(num_rows=0) # Unser _process_single_row prüft dann die Wiki-Timestamp-Bedingung if len(row) <= 39 or row[39].strip() == "":
process_wikipedia_only() processor._process_single_row(i, row, process_wiki=True, process_chatgpt=False)
elif MODE == "5": elif MODE == "5":
processor = DataProcessor() processor = DataProcessor()
# Für Mode 5: Nur ChatGPT-Bewertung for i, row in enumerate(processor.sheet_handler.sheet_values[1:], start=2):
processor.process_rows(num_rows=0) if len(row) <= 40 or row[40].strip() == "":
processor._process_single_row(i, row, process_wiki=False, process_chatgpt=True)
elif MODE == "6": elif MODE == "6":
process_contact_research() process_contact_research()
elif MODE == "7":
process_contacts()
print(f"\n✅ Auswertung abgeschlossen ({Config.VERSION})") print(f"\n✅ Auswertung abgeschlossen ({Config.VERSION})")