v1.3.11: Spalten um +1 verschoben, Kurzform in Spalte C; alle Indizes aktualisiert

- Neue Header: Spalte C enthält jetzt die Kurzform des Firmennamens.
- Website verschoben auf Spalte D, und alle nachfolgenden Spalten um +1.
- Alle Code-Referenzen (Update-Ranges, Indexe in _process_single_row, get_start_index, process_contacts, process_contact_research) wurden entsprechend angepasst.
- Für LinkedIn-Suchen wird jetzt die Kurzform (Spalte C) bevorzugt verwendet.
This commit is contained in:
2025-04-03 14:59:39 +00:00
parent e2072bfb8b
commit 6ae09b4927

View File

@@ -14,7 +14,7 @@ import csv
# ==================== KONFIGURATION ====================
class Config:
VERSION = "v1.3.9" # v1.3.9: Alle bisherigen Funktionen inkl. Reg. Modus, Re-Eval, Alignment, Wiki, ChatGPT, Contact Research.
VERSION = "v1.3.11" # v1.3.11: Spalten um +1 verschoben, Kurzform in Spalte C; alle Referenzen angepasst.
LANG = "de"
CREDENTIALS_FILE = "service_account.json"
SHEET_URL = "https://docs.google.com/spreadsheets/d/1u_gHr9JUfmV1-iviRzbSe3575QEp7KLhK5jFV_gJcgo"
@@ -335,7 +335,11 @@ def search_linkedin_contact(company_name, website, position_query):
except Exception as e:
debug_print("Fehler beim Lesen des SerpAPI-Schlüssels: " + str(e))
return None
query = f'site:linkedin.com/in "{position_query}" "{company_name}"'
# Nutze hier die Kurzform, falls vorhanden (Spalte C, Index 2); ansonsten Firmenname (Index 1)
search_name = company_name
if company_name == "" and website != "":
search_name = website
query = f'site:linkedin.com/in "{position_query}" "{search_name}"'
debug_print(f"Erstelle LinkedIn-Query: {query}")
params = {
"engine": "google",
@@ -378,7 +382,7 @@ def search_linkedin_contact(company_name, website, position_query):
debug_print(f"Fehler bei der SerpAPI-Suche: {e}")
return None
# ==================== NEUE FUNKTION: ZÄHLEN DER LINKEDIN-KONTAKTE (für Contact Research) ====================
# ==================== NEUE FUNKTION: ZÄHLEN DER LINKEDIN-KONTAKTE ====================
def count_linkedin_contacts(company_name, website, position_query):
try:
with open("serpApiKey.txt", "r") as f:
@@ -416,73 +420,91 @@ def process_contact_research():
sh = gc.open_by_url(Config.SHEET_URL)
main_sheet = sh.sheet1
data = main_sheet.get_all_values()
# Für jeden Datensatz werden für vier Kategorien die Trefferanzahl ermittelt:
# Website ist nun in Spalte D (Index 3), Firmenname in Spalte B (Index 1)
for i, row in enumerate(data[1:], start=2):
company_name = row[1] if len(row) > 1 else ""
website = row[2] if len(row) > 2 else ""
# Verwende die Kurzform (Spalte C, Index 2) für die Suche, wenn vorhanden, ansonsten Firmenname
search_name = row[2].strip() if len(row) > 2 and row[2].strip() not in ["", "k.A."] else company_name
website = row[3] if len(row) > 3 else ""
if not company_name or not website:
continue
count_service = count_linkedin_contacts(company_name, website, "Serviceleiter")
count_it = count_linkedin_contacts(company_name, website, "IT-Leiter")
count_management = count_linkedin_contacts(company_name, website, "Geschäftsführer")
count_disponent = count_linkedin_contacts(company_name, website, "Disponent")
count_service = count_linkedin_contacts(search_name, website, "Serviceleiter")
count_it = count_linkedin_contacts(search_name, website, "IT-Leiter")
count_management = count_linkedin_contacts(search_name, website, "Geschäftsführer")
count_disponent = count_linkedin_contacts(search_name, website, "Disponent")
current_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# Verwende die korrekte Parameterreihenfolge: Werte zuerst, dann Range-Name
main_sheet.update([[str(count_service)]], f"AH{i}")
main_sheet.update([[str(count_it)]], f"AI{i}")
main_sheet.update([[str(count_management)]], f"AJ{i}")
main_sheet.update([[str(count_disponent)]], f"AK{i}")
main_sheet.update([[current_dt]], f"AL{i}")
debug_print(f"Zeile {i}: Serviceleiter {count_service}, IT-Leiter {count_it}, Management {count_management}, Disponent {count_disponent} Timestamp gesetzt.")
main_sheet.update(values=[[str(count_service)]], range_name=f"AH{i}")
main_sheet.update(values=[[str(count_it)]], range_name=f"AI{i}")
main_sheet.update(values=[[str(count_management)]], range_name=f"AJ{i}")
main_sheet.update(values=[[str(count_disponent)]], range_name=f"AK{i}")
main_sheet.update(values=[[current_dt]], range_name=f"AL{i}")
debug_print(f"Zeile {i}: Serviceleiter {count_service}, IT-Leiter {count_it}, Management {count_management}, Disponent {count_disponent} Contact Search Timestamp gesetzt.")
time.sleep(Config.RETRY_DELAY * 1.5)
debug_print("Contact Research abgeschlossen.")
# ==================== ALIGNMENT DEMO (Modus 3) ====================
# ==================== NEUER MODUS: ALIGNMENT DEMO (für Hauptblatt und Contacts) ====================
def alignment_demo_full():
alignment_demo(GoogleSheetHandler().sheet)
gc = gspread.authorize(ServiceAccountCredentials.from_json_keyfile_name(
Config.CREDENTIALS_FILE, ["https://www.googleapis.com/auth/spreadsheets"]))
sh = gc.open_by_url(Config.SHEET_URL)
try:
contacts_sheet = sh.worksheet("Contacts")
except gspread.exceptions.WorksheetNotFound:
contacts_sheet = sh.add_worksheet(title="Contacts", rows="1000", cols="10")
header = ["Firmenname", "Website", "Kurzform", "Vorname", "Nachname", "Position", "Anrede", "E-Mail"]
contacts_sheet.update(values=[header], range_name="A1:H1")
debug_print("Neues Blatt 'Contacts' erstellt und Header eingetragen.")
alignment_demo(contacts_sheet)
debug_print("Alignment-Demo für Hauptblatt und Contacts abgeschlossen.")
# ==================== ALIGNMENT DEMO (Hauptblatt) ====================
def alignment_demo(sheet):
new_headers = [
"Spalte A (ReEval Flag)",
"Spalte B (Firmenname)",
"Spalte C (Website)",
"Spalte D (Ort)",
"Spalte E (Beschreibung)",
"Spalte F (Aktuelle Branche)",
"Spalte G (Beschreibung Branche extern)",
"Spalte H (Anzahl Techniker CRM)",
"Spalte I (Umsatz CRM)",
"Spalte J (Anzahl Mitarbeiter CRM)",
"Spalte K (Vorschlag Wiki URL)",
"Spalte L (Wikipedia URL)",
"Spalte M (Wikipedia Absatz)",
"Spalte N (Wikipedia Branche)",
"Spalte O (Wikipedia Umsatz)",
"Spalte P (Wikipedia Mitarbeiter)",
"Spalte Q (Wikipedia Kategorien)",
"Spalte R (Konsistenzprüfung)",
"Spalte S (Begründung bei Inkonsistenz)",
"Spalte T (Vorschlag Wiki Artikel ChatGPT)",
"Spalte U (Begründung bei Abweichung)",
"Spalte V (Vorschlag neue Branche)",
"Spalte W (Konsistenzprüfung Branche)",
"Spalte X (Begründung Abweichung Branche)",
"Spalte Y (FSM Relevanz Ja / Nein)",
"Spalte Z (Begründung für FSM Relevanz)",
"Spalte AA (Schätzung Anzahl Mitarbeiter)",
"Spalte AB (Konsistenzprüfung Mitarbeiterzahl)",
"Spalte AC (Begründung für Abweichung Mitarbeiterzahl)",
"Spalte AD (Einschätzung Anzahl Servicetechniker)",
"Spalte AE (Begründung bei Abweichung Anzahl Servicetechniker)",
"Spalte AF (Schätzung Umsatz ChatGPT)",
"Spalte AG (Begründung für Abweichung Umsatz)",
"Spalte AH (Serviceleiter gefunden)",
"Spalte AI (IT-Leiter gefunden)",
"Spalte AJ (Management gefunden)",
"Spalte AK (Disponent gefunden)",
"Spalte AL (Contact Search Timestamp)",
"Spalte AM (Wikipedia Timestamp)",
"Spalte AN (ChatGPT Timestamp)",
"Spalte AO (Version)"
"Spalte C (Kurzform des Firmennamens)",
"Spalte D (Website)",
"Spalte E (Ort)",
"Spalte F (Beschreibung)",
"Spalte G (Aktuelle Branche)",
"Spalte H (Beschreibung Branche extern)",
"Spalte I (Anzahl Techniker CRM)",
"Spalte J (Umsatz CRM)",
"Spalte K (Anzahl Mitarbeiter CRM)",
"Spalte L (Vorschlag Wiki URL)",
"Spalte M (Wikipedia URL)",
"Spalte N (Wikipedia Absatz)",
"Spalte O (Wikipedia Branche)",
"Spalte P (Wikipedia Umsatz)",
"Spalte Q (Wikipedia Mitarbeiter)",
"Spalte R (Wikipedia Kategorien)",
"Spalte S (Konsistenzprüfung)",
"Spalte T (Begründung bei Inkonsistenz)",
"Spalte U (Vorschlag Wiki Artikel ChatGPT)",
"Spalte V (Begründung bei Abweichung)",
"Spalte W (Vorschlag neue Branche)",
"Spalte X (Konsistenzprüfung Branche)",
"Spalte Y (Begründung Abweichung Branche)",
"Spalte Z (FSM Relevanz Ja / Nein)",
"Spalte AA (Begründung für FSM Relevanz)",
"Spalte AB (Schätzung Anzahl Mitarbeiter)",
"Spalte AC (Konsistenzprüfung Mitarbeiterzahl)",
"Spalte AD (Begründung für Abweichung Mitarbeiterzahl)",
"Spalte AE (Einschätzung Anzahl Servicetechniker)",
"Spalte AF (Begründung bei Abweichung Anzahl Servicetechniker)",
"Spalte AG (Schätzung Umsatz ChatGPT)",
"Spalte AH (Begründung für Abweichung Umsatz)",
"Spalte AI (Serviceleiter gefunden)",
"Spalte AJ (IT-Leiter gefunden)",
"Spalte AK (Management gefunden)",
"Spalte AL (Disponent gefunden)",
"Spalte AM (Contact Search Timestamp)",
"Spalte AN (Wikipedia Timestamp)",
"Spalte AO (ChatGPT Timestamp)",
"Spalte AP (Version)"
]
header_range = "A11200:AO11200"
header_range = "A11200:AP11200"
sheet.update(values=[new_headers], range_name=header_range)
print("Alignment-Demo abgeschlossen: Neue Spaltenüberschriften in Zeile 11200 geschrieben.")
@@ -680,7 +702,8 @@ class GoogleSheetHandler:
self.sheet = gspread.authorize(creds).open_by_url(Config.SHEET_URL).sheet1
self.sheet_values = self.sheet.get_all_values()
def get_start_index(self):
filled_n = [row[38] if len(row) > 38 else '' for row in self.sheet_values[1:]] # Spalte AM = Wikipedia Timestamp
# Wikipedia Timestamp ist jetzt in Spalte AN (Index 39)
filled_n = [row[39] if len(row) > 39 else '' for row in self.sheet_values[1:]]
return next((i + 1 for i, v in enumerate(filled_n, start=1) if not str(v).strip()), len(filled_n) + 1)
# ==================== DATA PROCESSOR ====================
@@ -689,27 +712,27 @@ class DataProcessor:
self.sheet_handler = GoogleSheetHandler()
self.wiki_scraper = WikipediaScraper()
def process_rows(self, num_rows=None):
# MODE 1: Regulärer Modus nur Zeilen ohne entsprechende Timestamps werden bearbeitet
if MODE == "2":
print("Re-Evaluierungsmodus: Verarbeitung aller Zeilen mit 'x' in Spalte A.")
for i, row in enumerate(self.sheet_handler.sheet_values[1:], start=2):
if row[0].strip().lower() == "x":
self._process_single_row(i, row, force_all=True)
elif MODE == "3":
print("Alignment-Demo-Modus: Schreibe neue Spaltenüberschriften in Zeile 11200.")
alignment_demo(self.sheet_handler.sheet)
print("Alignment-Demo-Modus: Schreibe neue Spaltenüberschriften in Hauptblatt und Contacts.")
alignment_demo_full()
elif MODE == "4":
# Nur Wikipedia-Suche: nur Zeilen ohne Wikipedia-Timestamp (Spalte AM, Index 38)
for i, row in enumerate(self.sheet_handler.sheet_values[1:], start=2):
if len(row) <= 38 or row[38].strip() == "":
self._process_single_row(i, row, process_wiki=True, process_chatgpt=False)
elif MODE == "5":
# Nur ChatGPT Bewertung: nur Zeilen ohne ChatGPT-Timestamp (Spalte AN, Index 39)
for i, row in enumerate(self.sheet_handler.sheet_values[1:], start=2):
processor = DataProcessor()
for i, row in enumerate(processor.sheet_handler.sheet_values[1:], start=2):
# Nur Zeilen ohne Wikipedia-Timestamp (Spalte AN, Index 39)
if len(row) <= 39 or row[39].strip() == "":
self._process_single_row(i, row, process_wiki=False, process_chatgpt=True)
processor._process_single_row(i, row, process_wiki=True, process_chatgpt=False)
elif MODE == "5":
processor = DataProcessor()
# Nur Zeilen ohne ChatGPT-Timestamp (Spalte AO, Index 40)
for i, row in enumerate(processor.sheet_handler.sheet_values[1:], start=2):
if len(row) <= 40 or row[40].strip() == "":
processor._process_single_row(i, row, process_wiki=False, process_chatgpt=True)
else:
# Regulärer Modus: Bearbeite nur Zeilen, die noch nicht vollständig bewertet wurden
start_index = self.sheet_handler.get_start_index()
print(f"Starte bei Zeile {start_index+1}")
rows_processed = 0
@@ -720,93 +743,94 @@ class DataProcessor:
break
self._process_single_row(i, row)
rows_processed += 1
def _process_single_row(self, row_num, row_data, force_all=False, process_wiki=True, process_chatgpt=True):
company_name = row_data[1] if len(row_data) > 1 else ""
website = row_data[2] if len(row_data) > 2 else ""
wiki_update_range = f"K{row_num}:Q{row_num}"
dt_wiki_range = f"AM{row_num}" # Wikipedia Timestamp
dt_chat_range = f"AN{row_num}" # ChatGPT Timestamp
ver_range = f"AO{row_num}" # Version
print(f"\n[{datetime.now().strftime('%H:%M:%S')}] Verarbeite Zeile {row_num}: {company_name}")
current_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# Wikipedia-Teil: Wird ausgeführt, wenn process_wiki True ist oder force_all aktiv ist.
if force_all or process_wiki:
# Hier zuerst prüfen, ob wir bereits einen Wiki-Timestamp haben (Spalte AM)
if len(row_data) <= 38 or row_data[38].strip() == "":
# Führe die Wikipedia-Auswertung durch
if len(row_data) > 10 and row_data[10].strip() not in ["", "k.A."]:
wiki_url = row_data[10].strip()
try:
wiki_data = self.wiki_scraper.extract_company_data(wiki_url)
except Exception as e:
debug_print(f"Fehler beim Laden des vorgeschlagenen Wikipedia-Artikels: {e}")
def _process_single_row(self, row_num, row_data, force_all=False, process_wiki=True, process_chatgpt=True):
# Spalte B: Firmenname, Spalte C: Kurzform, Spalte D: Website
company_name = row_data[1] if len(row_data) > 1 else ""
website = row_data[3] if len(row_data) > 3 else ""
wiki_update_range = f"L{row_num}:R{row_num}" # Vorschlag Wiki URL bis Wikipedia Kategorien (Spalte L bis R)
dt_wiki_range = f"AN{row_num}" # Wikipedia Timestamp (Spalte AN)
dt_chat_range = f"AO{row_num}" # ChatGPT Timestamp (Spalte AO)
ver_range = f"AP{row_num}" # Version (Spalte AP)
print(f"\n[{datetime.now().strftime('%H:%M:%S')}] Verarbeite Zeile {row_num}: {company_name}")
current_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# Wikipedia-Teil
if force_all or process_wiki:
if len(row_data) <= 39 or row_data[39].strip() == "":
if len(row_data) > 11 and row_data[11].strip() not in ["", "k.A."]:
wiki_url = row_data[11].strip()
try:
wiki_data = self.wiki_scraper.extract_company_data(wiki_url)
except Exception as e:
debug_print(f"Fehler beim Laden des vorgeschlagenen Wikipedia-Artikels: {e}")
article = self.wiki_scraper.search_company_article(company_name, website)
wiki_data = self.wiki_scraper.extract_company_data(article.url) if article else {
'url': 'k.A.', 'first_paragraph': 'k.A.', 'branche': 'k.A.',
'umsatz': 'k.A.', 'mitarbeiter': 'k.A.', 'categories': 'k.A.',
'full_infobox': 'k.A.'
}
else:
article = self.wiki_scraper.search_company_article(company_name, website)
wiki_data = self.wiki_scraper.extract_company_data(article.url) if article else {
'url': 'k.A.', 'first_paragraph': 'k.A.', 'branche': 'k.A.',
'umsatz': 'k.A.', 'mitarbeiter': 'k.A.', 'categories': 'k.A.',
'full_infobox': 'k.A.'
}
wiki_values = [
row_data[11] if len(row_data) > 11 and row_data[11].strip() not in ["", "k.A."] else "k.A.",
wiki_data.get('url', 'k.A.'),
wiki_data.get('first_paragraph', 'k.A.'),
wiki_data.get('branche', 'k.A.'),
wiki_data.get('umsatz', 'k.A.'),
wiki_data.get('mitarbeiter', 'k.A.'),
wiki_data.get('categories', 'k.A.')
]
self.sheet_handler.sheet.update(values=[wiki_values], range_name=wiki_update_range)
self.sheet_handler.sheet.update(values=[[current_dt]], range_name=dt_wiki_range)
else:
article = self.wiki_scraper.search_company_article(company_name, website)
wiki_data = self.wiki_scraper.extract_company_data(article.url) if article else {
'url': 'k.A.', 'first_paragraph': 'k.A.', 'branche': 'k.A.',
'umsatz': 'k.A.', 'mitarbeiter': 'k.A.', 'categories': 'k.A.',
'full_infobox': 'k.A.'
}
wiki_values = [
row_data[10] if len(row_data) > 10 and row_data[10].strip() not in ["", "k.A."] else "k.A.",
wiki_data.get('url', 'k.A.'),
wiki_data.get('first_paragraph', 'k.A.'),
wiki_data.get('branche', 'k.A.'),
wiki_data.get('umsatz', 'k.A.'),
wiki_data.get('mitarbeiter', 'k.A.'),
wiki_data.get('categories', 'k.A.')
]
self.sheet_handler.sheet.update(values=[wiki_values], range_name=wiki_update_range)
self.sheet_handler.sheet.update(values=[[current_dt]], range_name=dt_wiki_range)
else:
debug_print(f"Zeile {row_num}: Wikipedia-Timestamp bereits gesetzt überspringe Wiki-Auswertung.")
# ChatGPT-Teil: Wird nur ausgeführt, wenn process_chatgpt True ist oder force_all aktiv ist.
if force_all or process_chatgpt:
# Hier prüfen, ob bereits ein ChatGPT-Timestamp in Spalte AN vorliegt
if len(row_data) <= 39 or row_data[39].strip() == "":
crm_umsatz = row_data[8] if len(row_data) > 8 else "k.A."
abgleich_result = compare_umsatz_values(crm_umsatz, wiki_data.get('umsatz', 'k.A.') if 'wiki_data' in locals() else "k.A.")
self.sheet_handler.sheet.update(values=[[abgleich_result]], range_name=f"AG{row_num}")
crm_data = ";".join(row_data[1:10])
wiki_data_str = ";".join(row_data[11:17])
valid_result = validate_article_with_chatgpt(crm_data, wiki_data_str)
self.sheet_handler.sheet.update(values=[[valid_result]], range_name=f"R{row_num}")
fsm_result = evaluate_fsm_suitability(company_name, wiki_data if 'wiki_data' in locals() else {})
self.sheet_handler.sheet.update(values=[[fsm_result["suitability"]]], range_name=f"Y{row_num}")
self.sheet_handler.sheet.update(values=[[fsm_result["justification"]]], range_name=f"Z{row_num}")
st_estimate = evaluate_servicetechnicians_estimate(company_name, wiki_data if 'wiki_data' in locals() else {})
self.sheet_handler.sheet.update(values=[[st_estimate]], range_name=f"AD{row_num}")
internal_value = row_data[7] if len(row_data) > 7 else "k.A."
internal_category = map_internal_technicians(internal_value) if internal_value != "k.A." else "k.A."
if internal_category != "k.A." and st_estimate != internal_category:
explanation = evaluate_servicetechnicians_explanation(company_name, st_estimate, wiki_data if 'wiki_data' in locals() else {})
discrepancy = explanation
debug_print(f"Zeile {row_num}: Wikipedia-Timestamp bereits gesetzt überspringe Wiki-Auswertung.")
# ChatGPT-Teil
if force_all or process_chatgpt:
if len(row_data) <= 40 or row_data[40].strip() == "":
# Umsatz CRM ist nun in Spalte J (Index 9), Anzahl Mitarbeiter in Spalte K (Index 10)
crm_umsatz = row_data[9] if len(row_data) > 9 else "k.A."
abgleich_result = compare_umsatz_values(crm_umsatz, wiki_data.get('umsatz', 'k.A.') if 'wiki_data' in locals() else "k.A.")
self.sheet_handler.sheet.update(values=[[abgleich_result]], range_name=f"AG{row_num}")
# CRM-Daten: von Spalte B bis K (Indices 1 bis 10)
crm_data = ";".join(row_data[1:11])
# Wiki-Daten: von Spalte L bis R (Indices 11 bis 18)
wiki_data_str = ";".join(row_data[11:18])
valid_result = validate_article_with_chatgpt(crm_data, wiki_data_str)
self.sheet_handler.sheet.update(values=[[valid_result]], range_name=f"R{row_num}")
fsm_result = evaluate_fsm_suitability(company_name, wiki_data if 'wiki_data' in locals() else {})
self.sheet_handler.sheet.update(values=[[fsm_result["suitability"]]], range_name=f"Y{row_num}")
self.sheet_handler.sheet.update(values=[[fsm_result["justification"]]], range_name=f"Z{row_num}")
st_estimate = evaluate_servicetechnicians_estimate(company_name, wiki_data if 'wiki_data' in locals() else {})
self.sheet_handler.sheet.update(values=[[st_estimate]], range_name=f"AE{row_num}")
internal_value = row_data[8] if len(row_data) > 8 else "k.A." # Anzahl Techniker CRM in Spalte I (Index 8)
internal_category = map_internal_technicians(internal_value) if internal_value != "k.A." else "k.A."
if internal_category != "k.A." and st_estimate != internal_category:
explanation = evaluate_servicetechnicians_explanation(company_name, st_estimate, wiki_data if 'wiki_data' in locals() else {})
discrepancy = explanation
else:
discrepancy = "ok"
self.sheet_handler.sheet.update(values=[[discrepancy]], range_name=f"AF{row_num}")
self.sheet_handler.sheet.update(values=[[current_dt]], range_name=dt_chat_range)
else:
discrepancy = "ok"
self.sheet_handler.sheet.update(values=[[discrepancy]], range_name=f"AE{row_num}")
self.sheet_handler.sheet.update(values=[[current_dt]], range_name=f"AN{row_num}")
else:
debug_print(f"Zeile {row_num}: ChatGPT-Timestamp bereits gesetzt überspringe ChatGPT-Auswertung.")
# Aktualisiere letzten Timestamp und Version (Spalte AO)
self.sheet_handler.sheet.update(values=[[current_dt]], range_name=f"AO{row_num}")
self.sheet_handler.sheet.update(values=[[Config.VERSION]], range_name=f"AO{row_num}")
debug_print(f"✅ Aktualisiert: URL: {(wiki_data.get('url', 'k.A.') if 'wiki_data' in locals() else 'k.A.')}, "
f"Branche: {(wiki_data.get('branche', 'k.A.') if 'wiki_data' in locals() else 'k.A.')}, "
f"Umsatz-Abgleich: {abgleich_result if 'abgleich_result' in locals() else 'k.A.'}, "
f"Validierung: {valid_result if 'valid_result' in locals() else 'k.A.'}, "
f"FSM: {fsm_result['suitability'] if 'fsm_result' in locals() else 'k.A.'}, "
f"Servicetechniker-Schätzung: {st_estimate if 'st_estimate' in locals() else 'k.A.'}")
time.sleep(Config.RETRY_DELAY)
debug_print(f"Zeile {row_num}: ChatGPT-Timestamp bereits gesetzt überspringe ChatGPT-Auswertung.")
# Aktualisiere letzten Timestamp und Version (Spalte AP)
self.sheet_handler.sheet.update(values=[[current_dt]], range_name=f"AP{row_num}")
self.sheet_handler.sheet.update(values=[[Config.VERSION]], range_name=f"AP{row_num}")
debug_print(f"✅ Aktualisiert: URL: {(wiki_data.get('url', 'k.A.') if 'wiki_data' in locals() else 'k.A.')}, "
f"Branche: {(wiki_data.get('branche', 'k.A.') if 'wiki_data' in locals() else 'k.A.')}, "
f"Umsatz-Abgleich: {abgleich_result if 'abgleich_result' in locals() else 'k.A.'}, "
f"Validierung: {valid_result if 'valid_result' in locals() else 'k.A.'}, "
f"FSM: {fsm_result['suitability'] if 'fsm_result' in locals() else 'k.A.'}, "
f"Servicetechniker-Schätzung: {st_estimate if 'st_estimate' in locals() else 'k.A.'}")
time.sleep(Config.RETRY_DELAY)
# ==================== NEUER MODUS 6: CONTACT RESEARCH (via SerpAPI) ====================
def process_contact_research():
debug_print("Starte Contact Research (Modus 6)...")
@@ -815,31 +839,31 @@ def process_contact_research():
sh = gc.open_by_url(Config.SHEET_URL)
main_sheet = sh.sheet1
data = main_sheet.get_all_values()
# Für jeden Datensatz werden für vier Kategorien die Trefferanzahl ermittelt:
# Website ist nun in Spalte D (Index 3); Firmenname in Spalte B; Kurzform in Spalte C
for i, row in enumerate(data[1:], start=2):
company_name = row[1] if len(row) > 1 else ""
website = row[2] if len(row) > 2 else ""
# Verwende Kurzform (Spalte C, Index 2) falls vorhanden, sonst Firmenname
search_name = row[2].strip() if len(row) > 2 and row[2].strip() not in ["", "k.A."] else company_name
website = row[3] if len(row) > 3 else ""
if not company_name or not website:
continue
count_service = count_linkedin_contacts(company_name, website, "Serviceleiter")
count_it = count_linkedin_contacts(company_name, website, "IT-Leiter")
count_management = count_linkedin_contacts(company_name, website, "Geschäftsführer")
count_disponent = count_linkedin_contacts(company_name, website, "Disponent")
count_service = count_linkedin_contacts(search_name, website, "Serviceleiter")
count_it = count_linkedin_contacts(search_name, website, "IT-Leiter")
count_management = count_linkedin_contacts(search_name, website, "Geschäftsführer")
count_disponent = count_linkedin_contacts(search_name, website, "Disponent")
current_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
main_sheet.update(values=[[str(count_service)]], range_name=f"AH{i}")
main_sheet.update(f"AI{i}", [[str(count_it)]])
main_sheet.update(f"AJ{i}", [[str(count_management)]])
main_sheet.update(f"AK{i}", [[str(count_disponent)]])
main_sheet.update(f"AL{i}", [[current_dt]])
main_sheet.update(values=[[str(count_service)]], range_name=f"AI{i}") # Neu: Spalte AI (Serviceleiter gefunden) vorher AH -> jetzt AI
main_sheet.update(values=[[str(count_it)]], range_name=f"AJ{i}") # IT-Leiter gefunden in Spalte AJ
main_sheet.update(values=[[str(count_management)]], range_name=f"AK{i}") # Management gefunden in Spalte AK
main_sheet.update(values=[[str(count_disponent)]], range_name=f"AL{i}") # Disponent gefunden in Spalte AL
main_sheet.update(values=[[current_dt]], range_name=f"AM{i}") # Contact Search Timestamp in Spalte AM
debug_print(f"Zeile {i}: Serviceleiter {count_service}, IT-Leiter {count_it}, Management {count_management}, Disponent {count_disponent} Contact Search Timestamp gesetzt.")
time.sleep(Config.RETRY_DELAY * 1.5)
debug_print("Contact Research abgeschlossen.")
# ==================== NEUER MODUS: ALIGNMENT DEMO (für Hauptblatt und Contacts) ====================
def alignment_demo_full():
# Aktualisiere Hauptblatt
alignment_demo(GoogleSheetHandler().sheet)
# Aktualisiere auch das Contacts-Blatt
gc = gspread.authorize(ServiceAccountCredentials.from_json_keyfile_name(
Config.CREDENTIALS_FILE, ["https://www.googleapis.com/auth/spreadsheets"]))
sh = gc.open_by_url(Config.SHEET_URL)
@@ -847,11 +871,54 @@ def alignment_demo_full():
contacts_sheet = sh.worksheet("Contacts")
except gspread.exceptions.WorksheetNotFound:
contacts_sheet = sh.add_worksheet(title="Contacts", rows="1000", cols="10")
header = ["Firmenname", "Website", "Vorname", "Nachname", "Position", "Anrede", "E-Mail"]
contacts_sheet.update("A1:G1", [header])
header = ["Firmenname", "Website", "Kurzform", "Vorname", "Nachname", "Position", "Anrede", "E-Mail"]
contacts_sheet.update(values=[header], range_name="A1:H1")
debug_print("Neues Blatt 'Contacts' erstellt und Header eingetragen.")
alignment_demo(contacts_sheet)
debug_print("Alignment-Demo für Hauptblatt und Contacts abgeschlossen.")
# ==================== NEUER MODUS: CONTACTS (LinkedIn) ====================
def process_contacts():
debug_print("Starte LinkedIn-Kontaktsuche...")
gc = gspread.authorize(ServiceAccountCredentials.from_json_keyfile_name(
Config.CREDENTIALS_FILE, ["https://www.googleapis.com/auth/spreadsheets"]))
sh = gc.open_by_url(Config.SHEET_URL)
try:
contacts_sheet = sh.worksheet("Contacts")
except gspread.exceptions.WorksheetNotFound:
contacts_sheet = sh.add_worksheet(title="Contacts", rows="1000", cols="10")
header = ["Firmenname", "Website", "Kurzform", "Vorname", "Nachname", "Position", "Anrede", "E-Mail"]
contacts_sheet.update(values=[header], range_name="A1:H1")
debug_print("Neues Blatt 'Contacts' erstellt und Header eingetragen.")
main_sheet = sh.sheet1
data = main_sheet.get_all_values()
positions = ["Serviceleiter", "IT-Leiter", "Leiter After Sales", "Leiter Einsatzplanung"]
new_rows = []
for idx, row in enumerate(data[1:], start=2):
# Firmenname in Spalte B (Index 1), Kurzform in Spalte C (Index 2), Website in Spalte D (Index 3)
company_name = row[1] if len(row) > 1 else ""
search_name = row[2].strip() if len(row) > 2 and row[2].strip() not in ["", "k.A."] else company_name
website = row[3] if len(row) > 3 else ""
debug_print(f"Verarbeite Firma: '{company_name}' (Zeile {idx}), Website: '{website}'")
if not company_name or not website:
debug_print("Überspringe, da Firmenname oder Website fehlt.")
continue
for pos in positions:
debug_print(f"Suche nach Position: '{pos}' bei '{search_name}'")
contact = search_linkedin_contact(search_name, website, pos)
if contact:
debug_print(f"Kontakt gefunden: {contact}")
new_rows.append([contact["Firmenname"], website, search_name, contact["Vorname"], contact["Nachname"], contact["Position"], "", ""])
else:
debug_print(f"Kein Kontakt für Position '{pos}' bei '{search_name}' gefunden.")
if new_rows:
last_row = len(contacts_sheet.get_all_values()) + 1
range_str = f"A{last_row}:H{last_row + len(new_rows) - 1}"
contacts_sheet.update(values=new_rows, range_name=range_str)
debug_print(f"{len(new_rows)} Kontakte in 'Contacts' hinzugefügt.")
else:
debug_print("Keine Kontakte gefunden in der Haupttabelle.")
# ==================== MAIN PROGRAMM ====================
if __name__ == "__main__":
print("Modi:")
@@ -861,6 +928,7 @@ if __name__ == "__main__":
print("4 = Nur Wikipedia-Suche (Zeilen ohne Wikipedia-Timestamp)")
print("5 = Nur ChatGPT-Bewertung (Zeilen ohne ChatGPT-Timestamp)")
print("6 = Contact Research (via SerpAPI)")
print("7 = Contacts (LinkedIn) Kontakte in das Contacts-Blatt schreiben")
mode_input = input("Wählen Sie den Modus: ").strip()
if mode_input == "2":
MODE = "2"
@@ -872,6 +940,8 @@ if __name__ == "__main__":
MODE = "5"
elif mode_input == "6":
MODE = "6"
elif mode_input == "7":
MODE = "7"
else:
MODE = "1"
if MODE == "1":
@@ -887,13 +957,16 @@ if __name__ == "__main__":
processor.process_rows()
elif MODE == "4":
processor = DataProcessor()
# Für Mode 4: Nur Wikipedia-Suche
processor.process_rows(num_rows=0) # Unser _process_single_row prüft dann die Wiki-Timestamp-Bedingung
process_wikipedia_only()
for i, row in enumerate(processor.sheet_handler.sheet_values[1:], start=2):
if len(row) <= 39 or row[39].strip() == "":
processor._process_single_row(i, row, process_wiki=True, process_chatgpt=False)
elif MODE == "5":
processor = DataProcessor()
# Für Mode 5: Nur ChatGPT-Bewertung
processor.process_rows(num_rows=0)
for i, row in enumerate(processor.sheet_handler.sheet_values[1:], start=2):
if len(row) <= 40 or row[40].strip() == "":
processor._process_single_row(i, row, process_wiki=False, process_chatgpt=True)
elif MODE == "6":
process_contact_research()
elif MODE == "7":
process_contacts()
print(f"\n✅ Auswertung abgeschlossen ({Config.VERSION})")