v1.3.11: Spalten um +1 verschoben, Kurzform in Spalte C; alle Indizes aktualisiert

- Neue Header: Spalte C enthält jetzt die Kurzform des Firmennamens.
- Website verschoben auf Spalte D, und alle nachfolgenden Spalten um +1.
- Alle Code-Referenzen (Update-Ranges, Indexe in _process_single_row, get_start_index, process_contacts, process_contact_research) wurden entsprechend angepasst.
- Für LinkedIn-Suchen wird jetzt die Kurzform (Spalte C) bevorzugt verwendet.
This commit is contained in:
2025-04-03 14:59:39 +00:00
parent e2072bfb8b
commit 6ae09b4927

View File

@@ -14,7 +14,7 @@ import csv
# ==================== KONFIGURATION ==================== # ==================== KONFIGURATION ====================
class Config: class Config:
VERSION = "v1.3.9" # v1.3.9: Alle bisherigen Funktionen inkl. Reg. Modus, Re-Eval, Alignment, Wiki, ChatGPT, Contact Research. VERSION = "v1.3.11" # v1.3.11: Spalten um +1 verschoben, Kurzform in Spalte C; alle Referenzen angepasst.
LANG = "de" LANG = "de"
CREDENTIALS_FILE = "service_account.json" CREDENTIALS_FILE = "service_account.json"
SHEET_URL = "https://docs.google.com/spreadsheets/d/1u_gHr9JUfmV1-iviRzbSe3575QEp7KLhK5jFV_gJcgo" SHEET_URL = "https://docs.google.com/spreadsheets/d/1u_gHr9JUfmV1-iviRzbSe3575QEp7KLhK5jFV_gJcgo"
@@ -335,7 +335,11 @@ def search_linkedin_contact(company_name, website, position_query):
except Exception as e: except Exception as e:
debug_print("Fehler beim Lesen des SerpAPI-Schlüssels: " + str(e)) debug_print("Fehler beim Lesen des SerpAPI-Schlüssels: " + str(e))
return None return None
query = f'site:linkedin.com/in "{position_query}" "{company_name}"' # Nutze hier die Kurzform, falls vorhanden (Spalte C, Index 2); ansonsten Firmenname (Index 1)
search_name = company_name
if company_name == "" and website != "":
search_name = website
query = f'site:linkedin.com/in "{position_query}" "{search_name}"'
debug_print(f"Erstelle LinkedIn-Query: {query}") debug_print(f"Erstelle LinkedIn-Query: {query}")
params = { params = {
"engine": "google", "engine": "google",
@@ -378,7 +382,7 @@ def search_linkedin_contact(company_name, website, position_query):
debug_print(f"Fehler bei der SerpAPI-Suche: {e}") debug_print(f"Fehler bei der SerpAPI-Suche: {e}")
return None return None
# ==================== NEUE FUNKTION: ZÄHLEN DER LINKEDIN-KONTAKTE (für Contact Research) ==================== # ==================== NEUE FUNKTION: ZÄHLEN DER LINKEDIN-KONTAKTE ====================
def count_linkedin_contacts(company_name, website, position_query): def count_linkedin_contacts(company_name, website, position_query):
try: try:
with open("serpApiKey.txt", "r") as f: with open("serpApiKey.txt", "r") as f:
@@ -416,73 +420,91 @@ def process_contact_research():
sh = gc.open_by_url(Config.SHEET_URL) sh = gc.open_by_url(Config.SHEET_URL)
main_sheet = sh.sheet1 main_sheet = sh.sheet1
data = main_sheet.get_all_values() data = main_sheet.get_all_values()
# Für jeden Datensatz werden für vier Kategorien die Trefferanzahl ermittelt: # Website ist nun in Spalte D (Index 3), Firmenname in Spalte B (Index 1)
for i, row in enumerate(data[1:], start=2): for i, row in enumerate(data[1:], start=2):
company_name = row[1] if len(row) > 1 else "" company_name = row[1] if len(row) > 1 else ""
website = row[2] if len(row) > 2 else "" # Verwende die Kurzform (Spalte C, Index 2) für die Suche, wenn vorhanden, ansonsten Firmenname
search_name = row[2].strip() if len(row) > 2 and row[2].strip() not in ["", "k.A."] else company_name
website = row[3] if len(row) > 3 else ""
if not company_name or not website: if not company_name or not website:
continue continue
count_service = count_linkedin_contacts(company_name, website, "Serviceleiter") count_service = count_linkedin_contacts(search_name, website, "Serviceleiter")
count_it = count_linkedin_contacts(company_name, website, "IT-Leiter") count_it = count_linkedin_contacts(search_name, website, "IT-Leiter")
count_management = count_linkedin_contacts(company_name, website, "Geschäftsführer") count_management = count_linkedin_contacts(search_name, website, "Geschäftsführer")
count_disponent = count_linkedin_contacts(company_name, website, "Disponent") count_disponent = count_linkedin_contacts(search_name, website, "Disponent")
current_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S") current_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# Verwende die korrekte Parameterreihenfolge: Werte zuerst, dann Range-Name main_sheet.update(values=[[str(count_service)]], range_name=f"AH{i}")
main_sheet.update([[str(count_service)]], f"AH{i}") main_sheet.update(values=[[str(count_it)]], range_name=f"AI{i}")
main_sheet.update([[str(count_it)]], f"AI{i}") main_sheet.update(values=[[str(count_management)]], range_name=f"AJ{i}")
main_sheet.update([[str(count_management)]], f"AJ{i}") main_sheet.update(values=[[str(count_disponent)]], range_name=f"AK{i}")
main_sheet.update([[str(count_disponent)]], f"AK{i}") main_sheet.update(values=[[current_dt]], range_name=f"AL{i}")
main_sheet.update([[current_dt]], f"AL{i}") debug_print(f"Zeile {i}: Serviceleiter {count_service}, IT-Leiter {count_it}, Management {count_management}, Disponent {count_disponent} Contact Search Timestamp gesetzt.")
debug_print(f"Zeile {i}: Serviceleiter {count_service}, IT-Leiter {count_it}, Management {count_management}, Disponent {count_disponent} Timestamp gesetzt.")
time.sleep(Config.RETRY_DELAY * 1.5) time.sleep(Config.RETRY_DELAY * 1.5)
debug_print("Contact Research abgeschlossen.") debug_print("Contact Research abgeschlossen.")
# ==================== ALIGNMENT DEMO (Modus 3) ==================== # ==================== NEUER MODUS: ALIGNMENT DEMO (für Hauptblatt und Contacts) ====================
def alignment_demo_full():
alignment_demo(GoogleSheetHandler().sheet)
gc = gspread.authorize(ServiceAccountCredentials.from_json_keyfile_name(
Config.CREDENTIALS_FILE, ["https://www.googleapis.com/auth/spreadsheets"]))
sh = gc.open_by_url(Config.SHEET_URL)
try:
contacts_sheet = sh.worksheet("Contacts")
except gspread.exceptions.WorksheetNotFound:
contacts_sheet = sh.add_worksheet(title="Contacts", rows="1000", cols="10")
header = ["Firmenname", "Website", "Kurzform", "Vorname", "Nachname", "Position", "Anrede", "E-Mail"]
contacts_sheet.update(values=[header], range_name="A1:H1")
debug_print("Neues Blatt 'Contacts' erstellt und Header eingetragen.")
alignment_demo(contacts_sheet)
debug_print("Alignment-Demo für Hauptblatt und Contacts abgeschlossen.")
# ==================== ALIGNMENT DEMO (Hauptblatt) ====================
def alignment_demo(sheet): def alignment_demo(sheet):
new_headers = [ new_headers = [
"Spalte A (ReEval Flag)", "Spalte A (ReEval Flag)",
"Spalte B (Firmenname)", "Spalte B (Firmenname)",
"Spalte C (Website)", "Spalte C (Kurzform des Firmennamens)",
"Spalte D (Ort)", "Spalte D (Website)",
"Spalte E (Beschreibung)", "Spalte E (Ort)",
"Spalte F (Aktuelle Branche)", "Spalte F (Beschreibung)",
"Spalte G (Beschreibung Branche extern)", "Spalte G (Aktuelle Branche)",
"Spalte H (Anzahl Techniker CRM)", "Spalte H (Beschreibung Branche extern)",
"Spalte I (Umsatz CRM)", "Spalte I (Anzahl Techniker CRM)",
"Spalte J (Anzahl Mitarbeiter CRM)", "Spalte J (Umsatz CRM)",
"Spalte K (Vorschlag Wiki URL)", "Spalte K (Anzahl Mitarbeiter CRM)",
"Spalte L (Wikipedia URL)", "Spalte L (Vorschlag Wiki URL)",
"Spalte M (Wikipedia Absatz)", "Spalte M (Wikipedia URL)",
"Spalte N (Wikipedia Branche)", "Spalte N (Wikipedia Absatz)",
"Spalte O (Wikipedia Umsatz)", "Spalte O (Wikipedia Branche)",
"Spalte P (Wikipedia Mitarbeiter)", "Spalte P (Wikipedia Umsatz)",
"Spalte Q (Wikipedia Kategorien)", "Spalte Q (Wikipedia Mitarbeiter)",
"Spalte R (Konsistenzprüfung)", "Spalte R (Wikipedia Kategorien)",
"Spalte S (Begründung bei Inkonsistenz)", "Spalte S (Konsistenzprüfung)",
"Spalte T (Vorschlag Wiki Artikel ChatGPT)", "Spalte T (Begründung bei Inkonsistenz)",
"Spalte U (Begründung bei Abweichung)", "Spalte U (Vorschlag Wiki Artikel ChatGPT)",
"Spalte V (Vorschlag neue Branche)", "Spalte V (Begründung bei Abweichung)",
"Spalte W (Konsistenzprüfung Branche)", "Spalte W (Vorschlag neue Branche)",
"Spalte X (Begründung Abweichung Branche)", "Spalte X (Konsistenzprüfung Branche)",
"Spalte Y (FSM Relevanz Ja / Nein)", "Spalte Y (Begründung Abweichung Branche)",
"Spalte Z (Begründung für FSM Relevanz)", "Spalte Z (FSM Relevanz Ja / Nein)",
"Spalte AA (Schätzung Anzahl Mitarbeiter)", "Spalte AA (Begründung für FSM Relevanz)",
"Spalte AB (Konsistenzprüfung Mitarbeiterzahl)", "Spalte AB (Schätzung Anzahl Mitarbeiter)",
"Spalte AC (Begründung für Abweichung Mitarbeiterzahl)", "Spalte AC (Konsistenzprüfung Mitarbeiterzahl)",
"Spalte AD (Einschätzung Anzahl Servicetechniker)", "Spalte AD (Begründung für Abweichung Mitarbeiterzahl)",
"Spalte AE (Begründung bei Abweichung Anzahl Servicetechniker)", "Spalte AE (Einschätzung Anzahl Servicetechniker)",
"Spalte AF (Schätzung Umsatz ChatGPT)", "Spalte AF (Begründung bei Abweichung Anzahl Servicetechniker)",
"Spalte AG (Begründung für Abweichung Umsatz)", "Spalte AG (Schätzung Umsatz ChatGPT)",
"Spalte AH (Serviceleiter gefunden)", "Spalte AH (Begründung für Abweichung Umsatz)",
"Spalte AI (IT-Leiter gefunden)", "Spalte AI (Serviceleiter gefunden)",
"Spalte AJ (Management gefunden)", "Spalte AJ (IT-Leiter gefunden)",
"Spalte AK (Disponent gefunden)", "Spalte AK (Management gefunden)",
"Spalte AL (Contact Search Timestamp)", "Spalte AL (Disponent gefunden)",
"Spalte AM (Wikipedia Timestamp)", "Spalte AM (Contact Search Timestamp)",
"Spalte AN (ChatGPT Timestamp)", "Spalte AN (Wikipedia Timestamp)",
"Spalte AO (Version)" "Spalte AO (ChatGPT Timestamp)",
"Spalte AP (Version)"
] ]
header_range = "A11200:AO11200" header_range = "A11200:AP11200"
sheet.update(values=[new_headers], range_name=header_range) sheet.update(values=[new_headers], range_name=header_range)
print("Alignment-Demo abgeschlossen: Neue Spaltenüberschriften in Zeile 11200 geschrieben.") print("Alignment-Demo abgeschlossen: Neue Spaltenüberschriften in Zeile 11200 geschrieben.")
@@ -680,7 +702,8 @@ class GoogleSheetHandler:
self.sheet = gspread.authorize(creds).open_by_url(Config.SHEET_URL).sheet1 self.sheet = gspread.authorize(creds).open_by_url(Config.SHEET_URL).sheet1
self.sheet_values = self.sheet.get_all_values() self.sheet_values = self.sheet.get_all_values()
def get_start_index(self): def get_start_index(self):
filled_n = [row[38] if len(row) > 38 else '' for row in self.sheet_values[1:]] # Spalte AM = Wikipedia Timestamp # Wikipedia Timestamp ist jetzt in Spalte AN (Index 39)
filled_n = [row[39] if len(row) > 39 else '' for row in self.sheet_values[1:]]
return next((i + 1 for i, v in enumerate(filled_n, start=1) if not str(v).strip()), len(filled_n) + 1) return next((i + 1 for i, v in enumerate(filled_n, start=1) if not str(v).strip()), len(filled_n) + 1)
# ==================== DATA PROCESSOR ==================== # ==================== DATA PROCESSOR ====================
@@ -689,27 +712,27 @@ class DataProcessor:
self.sheet_handler = GoogleSheetHandler() self.sheet_handler = GoogleSheetHandler()
self.wiki_scraper = WikipediaScraper() self.wiki_scraper = WikipediaScraper()
def process_rows(self, num_rows=None): def process_rows(self, num_rows=None):
# MODE 1: Regulärer Modus nur Zeilen ohne entsprechende Timestamps werden bearbeitet
if MODE == "2": if MODE == "2":
print("Re-Evaluierungsmodus: Verarbeitung aller Zeilen mit 'x' in Spalte A.") print("Re-Evaluierungsmodus: Verarbeitung aller Zeilen mit 'x' in Spalte A.")
for i, row in enumerate(self.sheet_handler.sheet_values[1:], start=2): for i, row in enumerate(self.sheet_handler.sheet_values[1:], start=2):
if row[0].strip().lower() == "x": if row[0].strip().lower() == "x":
self._process_single_row(i, row, force_all=True) self._process_single_row(i, row, force_all=True)
elif MODE == "3": elif MODE == "3":
print("Alignment-Demo-Modus: Schreibe neue Spaltenüberschriften in Zeile 11200.") print("Alignment-Demo-Modus: Schreibe neue Spaltenüberschriften in Hauptblatt und Contacts.")
alignment_demo(self.sheet_handler.sheet) alignment_demo_full()
elif MODE == "4": elif MODE == "4":
# Nur Wikipedia-Suche: nur Zeilen ohne Wikipedia-Timestamp (Spalte AM, Index 38) processor = DataProcessor()
for i, row in enumerate(self.sheet_handler.sheet_values[1:], start=2): for i, row in enumerate(processor.sheet_handler.sheet_values[1:], start=2):
if len(row) <= 38 or row[38].strip() == "": # Nur Zeilen ohne Wikipedia-Timestamp (Spalte AN, Index 39)
self._process_single_row(i, row, process_wiki=True, process_chatgpt=False)
elif MODE == "5":
# Nur ChatGPT Bewertung: nur Zeilen ohne ChatGPT-Timestamp (Spalte AN, Index 39)
for i, row in enumerate(self.sheet_handler.sheet_values[1:], start=2):
if len(row) <= 39 or row[39].strip() == "": if len(row) <= 39 or row[39].strip() == "":
self._process_single_row(i, row, process_wiki=False, process_chatgpt=True) processor._process_single_row(i, row, process_wiki=True, process_chatgpt=False)
elif MODE == "5":
processor = DataProcessor()
# Nur Zeilen ohne ChatGPT-Timestamp (Spalte AO, Index 40)
for i, row in enumerate(processor.sheet_handler.sheet_values[1:], start=2):
if len(row) <= 40 or row[40].strip() == "":
processor._process_single_row(i, row, process_wiki=False, process_chatgpt=True)
else: else:
# Regulärer Modus: Bearbeite nur Zeilen, die noch nicht vollständig bewertet wurden
start_index = self.sheet_handler.get_start_index() start_index = self.sheet_handler.get_start_index()
print(f"Starte bei Zeile {start_index+1}") print(f"Starte bei Zeile {start_index+1}")
rows_processed = 0 rows_processed = 0
@@ -721,22 +744,21 @@ class DataProcessor:
self._process_single_row(i, row) self._process_single_row(i, row)
rows_processed += 1 rows_processed += 1
def _process_single_row(self, row_num, row_data, force_all=False, process_wiki=True, process_chatgpt=True): def _process_single_row(self, row_num, row_data, force_all=False, process_wiki=True, process_chatgpt=True):
# Spalte B: Firmenname, Spalte C: Kurzform, Spalte D: Website
company_name = row_data[1] if len(row_data) > 1 else "" company_name = row_data[1] if len(row_data) > 1 else ""
website = row_data[2] if len(row_data) > 2 else "" website = row_data[3] if len(row_data) > 3 else ""
wiki_update_range = f"K{row_num}:Q{row_num}" wiki_update_range = f"L{row_num}:R{row_num}" # Vorschlag Wiki URL bis Wikipedia Kategorien (Spalte L bis R)
dt_wiki_range = f"AM{row_num}" # Wikipedia Timestamp dt_wiki_range = f"AN{row_num}" # Wikipedia Timestamp (Spalte AN)
dt_chat_range = f"AN{row_num}" # ChatGPT Timestamp dt_chat_range = f"AO{row_num}" # ChatGPT Timestamp (Spalte AO)
ver_range = f"AO{row_num}" # Version ver_range = f"AP{row_num}" # Version (Spalte AP)
print(f"\n[{datetime.now().strftime('%H:%M:%S')}] Verarbeite Zeile {row_num}: {company_name}") print(f"\n[{datetime.now().strftime('%H:%M:%S')}] Verarbeite Zeile {row_num}: {company_name}")
current_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S") current_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# Wikipedia-Teil: Wird ausgeführt, wenn process_wiki True ist oder force_all aktiv ist. # Wikipedia-Teil
if force_all or process_wiki: if force_all or process_wiki:
# Hier zuerst prüfen, ob wir bereits einen Wiki-Timestamp haben (Spalte AM) if len(row_data) <= 39 or row_data[39].strip() == "":
if len(row_data) <= 38 or row_data[38].strip() == "": if len(row_data) > 11 and row_data[11].strip() not in ["", "k.A."]:
# Führe die Wikipedia-Auswertung durch wiki_url = row_data[11].strip()
if len(row_data) > 10 and row_data[10].strip() not in ["", "k.A."]:
wiki_url = row_data[10].strip()
try: try:
wiki_data = self.wiki_scraper.extract_company_data(wiki_url) wiki_data = self.wiki_scraper.extract_company_data(wiki_url)
except Exception as e: except Exception as e:
@@ -755,7 +777,7 @@ def _process_single_row(self, row_num, row_data, force_all=False, process_wiki=T
'full_infobox': 'k.A.' 'full_infobox': 'k.A.'
} }
wiki_values = [ wiki_values = [
row_data[10] if len(row_data) > 10 and row_data[10].strip() not in ["", "k.A."] else "k.A.", row_data[11] if len(row_data) > 11 and row_data[11].strip() not in ["", "k.A."] else "k.A.",
wiki_data.get('url', 'k.A.'), wiki_data.get('url', 'k.A.'),
wiki_data.get('first_paragraph', 'k.A.'), wiki_data.get('first_paragraph', 'k.A.'),
wiki_data.get('branche', 'k.A.'), wiki_data.get('branche', 'k.A.'),
@@ -768,37 +790,39 @@ def _process_single_row(self, row_num, row_data, force_all=False, process_wiki=T
else: else:
debug_print(f"Zeile {row_num}: Wikipedia-Timestamp bereits gesetzt überspringe Wiki-Auswertung.") debug_print(f"Zeile {row_num}: Wikipedia-Timestamp bereits gesetzt überspringe Wiki-Auswertung.")
# ChatGPT-Teil: Wird nur ausgeführt, wenn process_chatgpt True ist oder force_all aktiv ist. # ChatGPT-Teil
if force_all or process_chatgpt: if force_all or process_chatgpt:
# Hier prüfen, ob bereits ein ChatGPT-Timestamp in Spalte AN vorliegt if len(row_data) <= 40 or row_data[40].strip() == "":
if len(row_data) <= 39 or row_data[39].strip() == "": # Umsatz CRM ist nun in Spalte J (Index 9), Anzahl Mitarbeiter in Spalte K (Index 10)
crm_umsatz = row_data[8] if len(row_data) > 8 else "k.A." crm_umsatz = row_data[9] if len(row_data) > 9 else "k.A."
abgleich_result = compare_umsatz_values(crm_umsatz, wiki_data.get('umsatz', 'k.A.') if 'wiki_data' in locals() else "k.A.") abgleich_result = compare_umsatz_values(crm_umsatz, wiki_data.get('umsatz', 'k.A.') if 'wiki_data' in locals() else "k.A.")
self.sheet_handler.sheet.update(values=[[abgleich_result]], range_name=f"AG{row_num}") self.sheet_handler.sheet.update(values=[[abgleich_result]], range_name=f"AG{row_num}")
crm_data = ";".join(row_data[1:10]) # CRM-Daten: von Spalte B bis K (Indices 1 bis 10)
wiki_data_str = ";".join(row_data[11:17]) crm_data = ";".join(row_data[1:11])
# Wiki-Daten: von Spalte L bis R (Indices 11 bis 18)
wiki_data_str = ";".join(row_data[11:18])
valid_result = validate_article_with_chatgpt(crm_data, wiki_data_str) valid_result = validate_article_with_chatgpt(crm_data, wiki_data_str)
self.sheet_handler.sheet.update(values=[[valid_result]], range_name=f"R{row_num}") self.sheet_handler.sheet.update(values=[[valid_result]], range_name=f"R{row_num}")
fsm_result = evaluate_fsm_suitability(company_name, wiki_data if 'wiki_data' in locals() else {}) fsm_result = evaluate_fsm_suitability(company_name, wiki_data if 'wiki_data' in locals() else {})
self.sheet_handler.sheet.update(values=[[fsm_result["suitability"]]], range_name=f"Y{row_num}") self.sheet_handler.sheet.update(values=[[fsm_result["suitability"]]], range_name=f"Y{row_num}")
self.sheet_handler.sheet.update(values=[[fsm_result["justification"]]], range_name=f"Z{row_num}") self.sheet_handler.sheet.update(values=[[fsm_result["justification"]]], range_name=f"Z{row_num}")
st_estimate = evaluate_servicetechnicians_estimate(company_name, wiki_data if 'wiki_data' in locals() else {}) st_estimate = evaluate_servicetechnicians_estimate(company_name, wiki_data if 'wiki_data' in locals() else {})
self.sheet_handler.sheet.update(values=[[st_estimate]], range_name=f"AD{row_num}") self.sheet_handler.sheet.update(values=[[st_estimate]], range_name=f"AE{row_num}")
internal_value = row_data[7] if len(row_data) > 7 else "k.A." internal_value = row_data[8] if len(row_data) > 8 else "k.A." # Anzahl Techniker CRM in Spalte I (Index 8)
internal_category = map_internal_technicians(internal_value) if internal_value != "k.A." else "k.A." internal_category = map_internal_technicians(internal_value) if internal_value != "k.A." else "k.A."
if internal_category != "k.A." and st_estimate != internal_category: if internal_category != "k.A." and st_estimate != internal_category:
explanation = evaluate_servicetechnicians_explanation(company_name, st_estimate, wiki_data if 'wiki_data' in locals() else {}) explanation = evaluate_servicetechnicians_explanation(company_name, st_estimate, wiki_data if 'wiki_data' in locals() else {})
discrepancy = explanation discrepancy = explanation
else: else:
discrepancy = "ok" discrepancy = "ok"
self.sheet_handler.sheet.update(values=[[discrepancy]], range_name=f"AE{row_num}") self.sheet_handler.sheet.update(values=[[discrepancy]], range_name=f"AF{row_num}")
self.sheet_handler.sheet.update(values=[[current_dt]], range_name=f"AN{row_num}") self.sheet_handler.sheet.update(values=[[current_dt]], range_name=dt_chat_range)
else: else:
debug_print(f"Zeile {row_num}: ChatGPT-Timestamp bereits gesetzt überspringe ChatGPT-Auswertung.") debug_print(f"Zeile {row_num}: ChatGPT-Timestamp bereits gesetzt überspringe ChatGPT-Auswertung.")
# Aktualisiere letzten Timestamp und Version (Spalte AO) # Aktualisiere letzten Timestamp und Version (Spalte AP)
self.sheet_handler.sheet.update(values=[[current_dt]], range_name=f"AO{row_num}") self.sheet_handler.sheet.update(values=[[current_dt]], range_name=f"AP{row_num}")
self.sheet_handler.sheet.update(values=[[Config.VERSION]], range_name=f"AO{row_num}") self.sheet_handler.sheet.update(values=[[Config.VERSION]], range_name=f"AP{row_num}")
debug_print(f"✅ Aktualisiert: URL: {(wiki_data.get('url', 'k.A.') if 'wiki_data' in locals() else 'k.A.')}, " debug_print(f"✅ Aktualisiert: URL: {(wiki_data.get('url', 'k.A.') if 'wiki_data' in locals() else 'k.A.')}, "
f"Branche: {(wiki_data.get('branche', 'k.A.') if 'wiki_data' in locals() else 'k.A.')}, " f"Branche: {(wiki_data.get('branche', 'k.A.') if 'wiki_data' in locals() else 'k.A.')}, "
f"Umsatz-Abgleich: {abgleich_result if 'abgleich_result' in locals() else 'k.A.'}, " f"Umsatz-Abgleich: {abgleich_result if 'abgleich_result' in locals() else 'k.A.'}, "
@@ -815,31 +839,31 @@ def process_contact_research():
sh = gc.open_by_url(Config.SHEET_URL) sh = gc.open_by_url(Config.SHEET_URL)
main_sheet = sh.sheet1 main_sheet = sh.sheet1
data = main_sheet.get_all_values() data = main_sheet.get_all_values()
# Für jeden Datensatz werden für vier Kategorien die Trefferanzahl ermittelt: # Website ist nun in Spalte D (Index 3); Firmenname in Spalte B; Kurzform in Spalte C
for i, row in enumerate(data[1:], start=2): for i, row in enumerate(data[1:], start=2):
company_name = row[1] if len(row) > 1 else "" company_name = row[1] if len(row) > 1 else ""
website = row[2] if len(row) > 2 else "" # Verwende Kurzform (Spalte C, Index 2) falls vorhanden, sonst Firmenname
search_name = row[2].strip() if len(row) > 2 and row[2].strip() not in ["", "k.A."] else company_name
website = row[3] if len(row) > 3 else ""
if not company_name or not website: if not company_name or not website:
continue continue
count_service = count_linkedin_contacts(company_name, website, "Serviceleiter") count_service = count_linkedin_contacts(search_name, website, "Serviceleiter")
count_it = count_linkedin_contacts(company_name, website, "IT-Leiter") count_it = count_linkedin_contacts(search_name, website, "IT-Leiter")
count_management = count_linkedin_contacts(company_name, website, "Geschäftsführer") count_management = count_linkedin_contacts(search_name, website, "Geschäftsführer")
count_disponent = count_linkedin_contacts(company_name, website, "Disponent") count_disponent = count_linkedin_contacts(search_name, website, "Disponent")
current_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S") current_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
main_sheet.update(values=[[str(count_service)]], range_name=f"AH{i}") main_sheet.update(values=[[str(count_service)]], range_name=f"AI{i}") # Neu: Spalte AI (Serviceleiter gefunden) vorher AH -> jetzt AI
main_sheet.update(f"AI{i}", [[str(count_it)]]) main_sheet.update(values=[[str(count_it)]], range_name=f"AJ{i}") # IT-Leiter gefunden in Spalte AJ
main_sheet.update(f"AJ{i}", [[str(count_management)]]) main_sheet.update(values=[[str(count_management)]], range_name=f"AK{i}") # Management gefunden in Spalte AK
main_sheet.update(f"AK{i}", [[str(count_disponent)]]) main_sheet.update(values=[[str(count_disponent)]], range_name=f"AL{i}") # Disponent gefunden in Spalte AL
main_sheet.update(f"AL{i}", [[current_dt]]) main_sheet.update(values=[[current_dt]], range_name=f"AM{i}") # Contact Search Timestamp in Spalte AM
debug_print(f"Zeile {i}: Serviceleiter {count_service}, IT-Leiter {count_it}, Management {count_management}, Disponent {count_disponent} Contact Search Timestamp gesetzt.") debug_print(f"Zeile {i}: Serviceleiter {count_service}, IT-Leiter {count_it}, Management {count_management}, Disponent {count_disponent} Contact Search Timestamp gesetzt.")
time.sleep(Config.RETRY_DELAY * 1.5) time.sleep(Config.RETRY_DELAY * 1.5)
debug_print("Contact Research abgeschlossen.") debug_print("Contact Research abgeschlossen.")
# ==================== NEUER MODUS: ALIGNMENT DEMO (für Hauptblatt und Contacts) ==================== # ==================== NEUER MODUS: ALIGNMENT DEMO (für Hauptblatt und Contacts) ====================
def alignment_demo_full(): def alignment_demo_full():
# Aktualisiere Hauptblatt
alignment_demo(GoogleSheetHandler().sheet) alignment_demo(GoogleSheetHandler().sheet)
# Aktualisiere auch das Contacts-Blatt
gc = gspread.authorize(ServiceAccountCredentials.from_json_keyfile_name( gc = gspread.authorize(ServiceAccountCredentials.from_json_keyfile_name(
Config.CREDENTIALS_FILE, ["https://www.googleapis.com/auth/spreadsheets"])) Config.CREDENTIALS_FILE, ["https://www.googleapis.com/auth/spreadsheets"]))
sh = gc.open_by_url(Config.SHEET_URL) sh = gc.open_by_url(Config.SHEET_URL)
@@ -847,11 +871,54 @@ def alignment_demo_full():
contacts_sheet = sh.worksheet("Contacts") contacts_sheet = sh.worksheet("Contacts")
except gspread.exceptions.WorksheetNotFound: except gspread.exceptions.WorksheetNotFound:
contacts_sheet = sh.add_worksheet(title="Contacts", rows="1000", cols="10") contacts_sheet = sh.add_worksheet(title="Contacts", rows="1000", cols="10")
header = ["Firmenname", "Website", "Vorname", "Nachname", "Position", "Anrede", "E-Mail"] header = ["Firmenname", "Website", "Kurzform", "Vorname", "Nachname", "Position", "Anrede", "E-Mail"]
contacts_sheet.update("A1:G1", [header]) contacts_sheet.update(values=[header], range_name="A1:H1")
debug_print("Neues Blatt 'Contacts' erstellt und Header eingetragen.")
alignment_demo(contacts_sheet) alignment_demo(contacts_sheet)
debug_print("Alignment-Demo für Hauptblatt und Contacts abgeschlossen.") debug_print("Alignment-Demo für Hauptblatt und Contacts abgeschlossen.")
# ==================== NEUER MODUS: CONTACTS (LinkedIn) ====================
def process_contacts():
debug_print("Starte LinkedIn-Kontaktsuche...")
gc = gspread.authorize(ServiceAccountCredentials.from_json_keyfile_name(
Config.CREDENTIALS_FILE, ["https://www.googleapis.com/auth/spreadsheets"]))
sh = gc.open_by_url(Config.SHEET_URL)
try:
contacts_sheet = sh.worksheet("Contacts")
except gspread.exceptions.WorksheetNotFound:
contacts_sheet = sh.add_worksheet(title="Contacts", rows="1000", cols="10")
header = ["Firmenname", "Website", "Kurzform", "Vorname", "Nachname", "Position", "Anrede", "E-Mail"]
contacts_sheet.update(values=[header], range_name="A1:H1")
debug_print("Neues Blatt 'Contacts' erstellt und Header eingetragen.")
main_sheet = sh.sheet1
data = main_sheet.get_all_values()
positions = ["Serviceleiter", "IT-Leiter", "Leiter After Sales", "Leiter Einsatzplanung"]
new_rows = []
for idx, row in enumerate(data[1:], start=2):
# Firmenname in Spalte B (Index 1), Kurzform in Spalte C (Index 2), Website in Spalte D (Index 3)
company_name = row[1] if len(row) > 1 else ""
search_name = row[2].strip() if len(row) > 2 and row[2].strip() not in ["", "k.A."] else company_name
website = row[3] if len(row) > 3 else ""
debug_print(f"Verarbeite Firma: '{company_name}' (Zeile {idx}), Website: '{website}'")
if not company_name or not website:
debug_print("Überspringe, da Firmenname oder Website fehlt.")
continue
for pos in positions:
debug_print(f"Suche nach Position: '{pos}' bei '{search_name}'")
contact = search_linkedin_contact(search_name, website, pos)
if contact:
debug_print(f"Kontakt gefunden: {contact}")
new_rows.append([contact["Firmenname"], website, search_name, contact["Vorname"], contact["Nachname"], contact["Position"], "", ""])
else:
debug_print(f"Kein Kontakt für Position '{pos}' bei '{search_name}' gefunden.")
if new_rows:
last_row = len(contacts_sheet.get_all_values()) + 1
range_str = f"A{last_row}:H{last_row + len(new_rows) - 1}"
contacts_sheet.update(values=new_rows, range_name=range_str)
debug_print(f"{len(new_rows)} Kontakte in 'Contacts' hinzugefügt.")
else:
debug_print("Keine Kontakte gefunden in der Haupttabelle.")
# ==================== MAIN PROGRAMM ==================== # ==================== MAIN PROGRAMM ====================
if __name__ == "__main__": if __name__ == "__main__":
print("Modi:") print("Modi:")
@@ -861,6 +928,7 @@ if __name__ == "__main__":
print("4 = Nur Wikipedia-Suche (Zeilen ohne Wikipedia-Timestamp)") print("4 = Nur Wikipedia-Suche (Zeilen ohne Wikipedia-Timestamp)")
print("5 = Nur ChatGPT-Bewertung (Zeilen ohne ChatGPT-Timestamp)") print("5 = Nur ChatGPT-Bewertung (Zeilen ohne ChatGPT-Timestamp)")
print("6 = Contact Research (via SerpAPI)") print("6 = Contact Research (via SerpAPI)")
print("7 = Contacts (LinkedIn) Kontakte in das Contacts-Blatt schreiben")
mode_input = input("Wählen Sie den Modus: ").strip() mode_input = input("Wählen Sie den Modus: ").strip()
if mode_input == "2": if mode_input == "2":
MODE = "2" MODE = "2"
@@ -872,6 +940,8 @@ if __name__ == "__main__":
MODE = "5" MODE = "5"
elif mode_input == "6": elif mode_input == "6":
MODE = "6" MODE = "6"
elif mode_input == "7":
MODE = "7"
else: else:
MODE = "1" MODE = "1"
if MODE == "1": if MODE == "1":
@@ -887,13 +957,16 @@ if __name__ == "__main__":
processor.process_rows() processor.process_rows()
elif MODE == "4": elif MODE == "4":
processor = DataProcessor() processor = DataProcessor()
# Für Mode 4: Nur Wikipedia-Suche for i, row in enumerate(processor.sheet_handler.sheet_values[1:], start=2):
processor.process_rows(num_rows=0) # Unser _process_single_row prüft dann die Wiki-Timestamp-Bedingung if len(row) <= 39 or row[39].strip() == "":
process_wikipedia_only() processor._process_single_row(i, row, process_wiki=True, process_chatgpt=False)
elif MODE == "5": elif MODE == "5":
processor = DataProcessor() processor = DataProcessor()
# Für Mode 5: Nur ChatGPT-Bewertung for i, row in enumerate(processor.sheet_handler.sheet_values[1:], start=2):
processor.process_rows(num_rows=0) if len(row) <= 40 or row[40].strip() == "":
processor._process_single_row(i, row, process_wiki=False, process_chatgpt=True)
elif MODE == "6": elif MODE == "6":
process_contact_research() process_contact_research()
elif MODE == "7":
process_contacts()
print(f"\n✅ Auswertung abgeschlossen ({Config.VERSION})") print(f"\n✅ Auswertung abgeschlossen ({Config.VERSION})")