diff --git a/brancheneinstufung.py b/brancheneinstufung.py index eefce486..1e9ed13b 100644 --- a/brancheneinstufung.py +++ b/brancheneinstufung.py @@ -14,7 +14,7 @@ import csv # ==================== KONFIGURATION ==================== class Config: - VERSION = "v1.3.9" # v1.3.9: Alle bisherigen Funktionen inkl. Reg. Modus, Re-Eval, Alignment, Wiki, ChatGPT, Contact Research. + VERSION = "v1.3.11" # v1.3.11: Spalten um +1 verschoben, Kurzform in Spalte C; alle Referenzen angepasst. LANG = "de" CREDENTIALS_FILE = "service_account.json" SHEET_URL = "https://docs.google.com/spreadsheets/d/1u_gHr9JUfmV1-iviRzbSe3575QEp7KLhK5jFV_gJcgo" @@ -335,7 +335,11 @@ def search_linkedin_contact(company_name, website, position_query): except Exception as e: debug_print("Fehler beim Lesen des SerpAPI-Schlüssels: " + str(e)) return None - query = f'site:linkedin.com/in "{position_query}" "{company_name}"' + # Nutze hier die Kurzform, falls vorhanden (Spalte C, Index 2); ansonsten Firmenname (Index 1) + search_name = company_name + if company_name == "" and website != "": + search_name = website + query = f'site:linkedin.com/in "{position_query}" "{search_name}"' debug_print(f"Erstelle LinkedIn-Query: {query}") params = { "engine": "google", @@ -378,7 +382,7 @@ def search_linkedin_contact(company_name, website, position_query): debug_print(f"Fehler bei der SerpAPI-Suche: {e}") return None -# ==================== NEUE FUNKTION: ZÄHLEN DER LINKEDIN-KONTAKTE (für Contact Research) ==================== +# ==================== NEUE FUNKTION: ZÄHLEN DER LINKEDIN-KONTAKTE ==================== def count_linkedin_contacts(company_name, website, position_query): try: with open("serpApiKey.txt", "r") as f: @@ -416,73 +420,91 @@ def process_contact_research(): sh = gc.open_by_url(Config.SHEET_URL) main_sheet = sh.sheet1 data = main_sheet.get_all_values() - # Für jeden Datensatz werden für vier Kategorien die Trefferanzahl ermittelt: + # Website ist nun in Spalte D (Index 3), Firmenname in Spalte B (Index 1) for i, row in enumerate(data[1:], start=2): company_name = row[1] if len(row) > 1 else "" - website = row[2] if len(row) > 2 else "" + # Verwende die Kurzform (Spalte C, Index 2) für die Suche, wenn vorhanden, ansonsten Firmenname + search_name = row[2].strip() if len(row) > 2 and row[2].strip() not in ["", "k.A."] else company_name + website = row[3] if len(row) > 3 else "" if not company_name or not website: continue - count_service = count_linkedin_contacts(company_name, website, "Serviceleiter") - count_it = count_linkedin_contacts(company_name, website, "IT-Leiter") - count_management = count_linkedin_contacts(company_name, website, "Geschäftsführer") - count_disponent = count_linkedin_contacts(company_name, website, "Disponent") + count_service = count_linkedin_contacts(search_name, website, "Serviceleiter") + count_it = count_linkedin_contacts(search_name, website, "IT-Leiter") + count_management = count_linkedin_contacts(search_name, website, "Geschäftsführer") + count_disponent = count_linkedin_contacts(search_name, website, "Disponent") current_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - # Verwende die korrekte Parameterreihenfolge: Werte zuerst, dann Range-Name - main_sheet.update([[str(count_service)]], f"AH{i}") - main_sheet.update([[str(count_it)]], f"AI{i}") - main_sheet.update([[str(count_management)]], f"AJ{i}") - main_sheet.update([[str(count_disponent)]], f"AK{i}") - main_sheet.update([[current_dt]], f"AL{i}") - debug_print(f"Zeile {i}: Serviceleiter {count_service}, IT-Leiter {count_it}, Management {count_management}, Disponent {count_disponent} – Timestamp gesetzt.") + main_sheet.update(values=[[str(count_service)]], range_name=f"AH{i}") + main_sheet.update(values=[[str(count_it)]], range_name=f"AI{i}") + main_sheet.update(values=[[str(count_management)]], range_name=f"AJ{i}") + main_sheet.update(values=[[str(count_disponent)]], range_name=f"AK{i}") + main_sheet.update(values=[[current_dt]], range_name=f"AL{i}") + debug_print(f"Zeile {i}: Serviceleiter {count_service}, IT-Leiter {count_it}, Management {count_management}, Disponent {count_disponent} – Contact Search Timestamp gesetzt.") time.sleep(Config.RETRY_DELAY * 1.5) debug_print("Contact Research abgeschlossen.") -# ==================== ALIGNMENT DEMO (Modus 3) ==================== +# ==================== NEUER MODUS: ALIGNMENT DEMO (für Hauptblatt und Contacts) ==================== +def alignment_demo_full(): + alignment_demo(GoogleSheetHandler().sheet) + gc = gspread.authorize(ServiceAccountCredentials.from_json_keyfile_name( + Config.CREDENTIALS_FILE, ["https://www.googleapis.com/auth/spreadsheets"])) + sh = gc.open_by_url(Config.SHEET_URL) + try: + contacts_sheet = sh.worksheet("Contacts") + except gspread.exceptions.WorksheetNotFound: + contacts_sheet = sh.add_worksheet(title="Contacts", rows="1000", cols="10") + header = ["Firmenname", "Website", "Kurzform", "Vorname", "Nachname", "Position", "Anrede", "E-Mail"] + contacts_sheet.update(values=[header], range_name="A1:H1") + debug_print("Neues Blatt 'Contacts' erstellt und Header eingetragen.") + alignment_demo(contacts_sheet) + debug_print("Alignment-Demo für Hauptblatt und Contacts abgeschlossen.") + +# ==================== ALIGNMENT DEMO (Hauptblatt) ==================== def alignment_demo(sheet): new_headers = [ "Spalte A (ReEval Flag)", "Spalte B (Firmenname)", - "Spalte C (Website)", - "Spalte D (Ort)", - "Spalte E (Beschreibung)", - "Spalte F (Aktuelle Branche)", - "Spalte G (Beschreibung Branche extern)", - "Spalte H (Anzahl Techniker CRM)", - "Spalte I (Umsatz CRM)", - "Spalte J (Anzahl Mitarbeiter CRM)", - "Spalte K (Vorschlag Wiki URL)", - "Spalte L (Wikipedia URL)", - "Spalte M (Wikipedia Absatz)", - "Spalte N (Wikipedia Branche)", - "Spalte O (Wikipedia Umsatz)", - "Spalte P (Wikipedia Mitarbeiter)", - "Spalte Q (Wikipedia Kategorien)", - "Spalte R (Konsistenzprüfung)", - "Spalte S (Begründung bei Inkonsistenz)", - "Spalte T (Vorschlag Wiki Artikel ChatGPT)", - "Spalte U (Begründung bei Abweichung)", - "Spalte V (Vorschlag neue Branche)", - "Spalte W (Konsistenzprüfung Branche)", - "Spalte X (Begründung Abweichung Branche)", - "Spalte Y (FSM Relevanz Ja / Nein)", - "Spalte Z (Begründung für FSM Relevanz)", - "Spalte AA (Schätzung Anzahl Mitarbeiter)", - "Spalte AB (Konsistenzprüfung Mitarbeiterzahl)", - "Spalte AC (Begründung für Abweichung Mitarbeiterzahl)", - "Spalte AD (Einschätzung Anzahl Servicetechniker)", - "Spalte AE (Begründung bei Abweichung Anzahl Servicetechniker)", - "Spalte AF (Schätzung Umsatz ChatGPT)", - "Spalte AG (Begründung für Abweichung Umsatz)", - "Spalte AH (Serviceleiter gefunden)", - "Spalte AI (IT-Leiter gefunden)", - "Spalte AJ (Management gefunden)", - "Spalte AK (Disponent gefunden)", - "Spalte AL (Contact Search Timestamp)", - "Spalte AM (Wikipedia Timestamp)", - "Spalte AN (ChatGPT Timestamp)", - "Spalte AO (Version)" + "Spalte C (Kurzform des Firmennamens)", + "Spalte D (Website)", + "Spalte E (Ort)", + "Spalte F (Beschreibung)", + "Spalte G (Aktuelle Branche)", + "Spalte H (Beschreibung Branche extern)", + "Spalte I (Anzahl Techniker CRM)", + "Spalte J (Umsatz CRM)", + "Spalte K (Anzahl Mitarbeiter CRM)", + "Spalte L (Vorschlag Wiki URL)", + "Spalte M (Wikipedia URL)", + "Spalte N (Wikipedia Absatz)", + "Spalte O (Wikipedia Branche)", + "Spalte P (Wikipedia Umsatz)", + "Spalte Q (Wikipedia Mitarbeiter)", + "Spalte R (Wikipedia Kategorien)", + "Spalte S (Konsistenzprüfung)", + "Spalte T (Begründung bei Inkonsistenz)", + "Spalte U (Vorschlag Wiki Artikel ChatGPT)", + "Spalte V (Begründung bei Abweichung)", + "Spalte W (Vorschlag neue Branche)", + "Spalte X (Konsistenzprüfung Branche)", + "Spalte Y (Begründung Abweichung Branche)", + "Spalte Z (FSM Relevanz Ja / Nein)", + "Spalte AA (Begründung für FSM Relevanz)", + "Spalte AB (Schätzung Anzahl Mitarbeiter)", + "Spalte AC (Konsistenzprüfung Mitarbeiterzahl)", + "Spalte AD (Begründung für Abweichung Mitarbeiterzahl)", + "Spalte AE (Einschätzung Anzahl Servicetechniker)", + "Spalte AF (Begründung bei Abweichung Anzahl Servicetechniker)", + "Spalte AG (Schätzung Umsatz ChatGPT)", + "Spalte AH (Begründung für Abweichung Umsatz)", + "Spalte AI (Serviceleiter gefunden)", + "Spalte AJ (IT-Leiter gefunden)", + "Spalte AK (Management gefunden)", + "Spalte AL (Disponent gefunden)", + "Spalte AM (Contact Search Timestamp)", + "Spalte AN (Wikipedia Timestamp)", + "Spalte AO (ChatGPT Timestamp)", + "Spalte AP (Version)" ] - header_range = "A11200:AO11200" + header_range = "A11200:AP11200" sheet.update(values=[new_headers], range_name=header_range) print("Alignment-Demo abgeschlossen: Neue Spaltenüberschriften in Zeile 11200 geschrieben.") @@ -680,7 +702,8 @@ class GoogleSheetHandler: self.sheet = gspread.authorize(creds).open_by_url(Config.SHEET_URL).sheet1 self.sheet_values = self.sheet.get_all_values() def get_start_index(self): - filled_n = [row[38] if len(row) > 38 else '' for row in self.sheet_values[1:]] # Spalte AM = Wikipedia Timestamp + # Wikipedia Timestamp ist jetzt in Spalte AN (Index 39) + filled_n = [row[39] if len(row) > 39 else '' for row in self.sheet_values[1:]] return next((i + 1 for i, v in enumerate(filled_n, start=1) if not str(v).strip()), len(filled_n) + 1) # ==================== DATA PROCESSOR ==================== @@ -689,27 +712,27 @@ class DataProcessor: self.sheet_handler = GoogleSheetHandler() self.wiki_scraper = WikipediaScraper() def process_rows(self, num_rows=None): - # MODE 1: Regulärer Modus – nur Zeilen ohne entsprechende Timestamps werden bearbeitet if MODE == "2": print("Re-Evaluierungsmodus: Verarbeitung aller Zeilen mit 'x' in Spalte A.") for i, row in enumerate(self.sheet_handler.sheet_values[1:], start=2): if row[0].strip().lower() == "x": self._process_single_row(i, row, force_all=True) elif MODE == "3": - print("Alignment-Demo-Modus: Schreibe neue Spaltenüberschriften in Zeile 11200.") - alignment_demo(self.sheet_handler.sheet) + print("Alignment-Demo-Modus: Schreibe neue Spaltenüberschriften in Hauptblatt und Contacts.") + alignment_demo_full() elif MODE == "4": - # Nur Wikipedia-Suche: nur Zeilen ohne Wikipedia-Timestamp (Spalte AM, Index 38) - for i, row in enumerate(self.sheet_handler.sheet_values[1:], start=2): - if len(row) <= 38 or row[38].strip() == "": - self._process_single_row(i, row, process_wiki=True, process_chatgpt=False) - elif MODE == "5": - # Nur ChatGPT Bewertung: nur Zeilen ohne ChatGPT-Timestamp (Spalte AN, Index 39) - for i, row in enumerate(self.sheet_handler.sheet_values[1:], start=2): + processor = DataProcessor() + for i, row in enumerate(processor.sheet_handler.sheet_values[1:], start=2): + # Nur Zeilen ohne Wikipedia-Timestamp (Spalte AN, Index 39) if len(row) <= 39 or row[39].strip() == "": - self._process_single_row(i, row, process_wiki=False, process_chatgpt=True) + processor._process_single_row(i, row, process_wiki=True, process_chatgpt=False) + elif MODE == "5": + processor = DataProcessor() + # Nur Zeilen ohne ChatGPT-Timestamp (Spalte AO, Index 40) + for i, row in enumerate(processor.sheet_handler.sheet_values[1:], start=2): + if len(row) <= 40 or row[40].strip() == "": + processor._process_single_row(i, row, process_wiki=False, process_chatgpt=True) else: - # Regulärer Modus: Bearbeite nur Zeilen, die noch nicht vollständig bewertet wurden start_index = self.sheet_handler.get_start_index() print(f"Starte bei Zeile {start_index+1}") rows_processed = 0 @@ -720,93 +743,94 @@ class DataProcessor: break self._process_single_row(i, row) rows_processed += 1 -def _process_single_row(self, row_num, row_data, force_all=False, process_wiki=True, process_chatgpt=True): - company_name = row_data[1] if len(row_data) > 1 else "" - website = row_data[2] if len(row_data) > 2 else "" - wiki_update_range = f"K{row_num}:Q{row_num}" - dt_wiki_range = f"AM{row_num}" # Wikipedia Timestamp - dt_chat_range = f"AN{row_num}" # ChatGPT Timestamp - ver_range = f"AO{row_num}" # Version - print(f"\n[{datetime.now().strftime('%H:%M:%S')}] Verarbeite Zeile {row_num}: {company_name}") - current_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - - # Wikipedia-Teil: Wird ausgeführt, wenn process_wiki True ist oder force_all aktiv ist. - if force_all or process_wiki: - # Hier zuerst prüfen, ob wir bereits einen Wiki-Timestamp haben (Spalte AM) - if len(row_data) <= 38 or row_data[38].strip() == "": - # Führe die Wikipedia-Auswertung durch - if len(row_data) > 10 and row_data[10].strip() not in ["", "k.A."]: - wiki_url = row_data[10].strip() - try: - wiki_data = self.wiki_scraper.extract_company_data(wiki_url) - except Exception as e: - debug_print(f"Fehler beim Laden des vorgeschlagenen Wikipedia-Artikels: {e}") + def _process_single_row(self, row_num, row_data, force_all=False, process_wiki=True, process_chatgpt=True): + # Spalte B: Firmenname, Spalte C: Kurzform, Spalte D: Website + company_name = row_data[1] if len(row_data) > 1 else "" + website = row_data[3] if len(row_data) > 3 else "" + wiki_update_range = f"L{row_num}:R{row_num}" # Vorschlag Wiki URL bis Wikipedia Kategorien (Spalte L bis R) + dt_wiki_range = f"AN{row_num}" # Wikipedia Timestamp (Spalte AN) + dt_chat_range = f"AO{row_num}" # ChatGPT Timestamp (Spalte AO) + ver_range = f"AP{row_num}" # Version (Spalte AP) + print(f"\n[{datetime.now().strftime('%H:%M:%S')}] Verarbeite Zeile {row_num}: {company_name}") + current_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + # Wikipedia-Teil + if force_all or process_wiki: + if len(row_data) <= 39 or row_data[39].strip() == "": + if len(row_data) > 11 and row_data[11].strip() not in ["", "k.A."]: + wiki_url = row_data[11].strip() + try: + wiki_data = self.wiki_scraper.extract_company_data(wiki_url) + except Exception as e: + debug_print(f"Fehler beim Laden des vorgeschlagenen Wikipedia-Artikels: {e}") + article = self.wiki_scraper.search_company_article(company_name, website) + wiki_data = self.wiki_scraper.extract_company_data(article.url) if article else { + 'url': 'k.A.', 'first_paragraph': 'k.A.', 'branche': 'k.A.', + 'umsatz': 'k.A.', 'mitarbeiter': 'k.A.', 'categories': 'k.A.', + 'full_infobox': 'k.A.' + } + else: article = self.wiki_scraper.search_company_article(company_name, website) wiki_data = self.wiki_scraper.extract_company_data(article.url) if article else { 'url': 'k.A.', 'first_paragraph': 'k.A.', 'branche': 'k.A.', 'umsatz': 'k.A.', 'mitarbeiter': 'k.A.', 'categories': 'k.A.', 'full_infobox': 'k.A.' } + wiki_values = [ + row_data[11] if len(row_data) > 11 and row_data[11].strip() not in ["", "k.A."] else "k.A.", + wiki_data.get('url', 'k.A.'), + wiki_data.get('first_paragraph', 'k.A.'), + wiki_data.get('branche', 'k.A.'), + wiki_data.get('umsatz', 'k.A.'), + wiki_data.get('mitarbeiter', 'k.A.'), + wiki_data.get('categories', 'k.A.') + ] + self.sheet_handler.sheet.update(values=[wiki_values], range_name=wiki_update_range) + self.sheet_handler.sheet.update(values=[[current_dt]], range_name=dt_wiki_range) else: - article = self.wiki_scraper.search_company_article(company_name, website) - wiki_data = self.wiki_scraper.extract_company_data(article.url) if article else { - 'url': 'k.A.', 'first_paragraph': 'k.A.', 'branche': 'k.A.', - 'umsatz': 'k.A.', 'mitarbeiter': 'k.A.', 'categories': 'k.A.', - 'full_infobox': 'k.A.' - } - wiki_values = [ - row_data[10] if len(row_data) > 10 and row_data[10].strip() not in ["", "k.A."] else "k.A.", - wiki_data.get('url', 'k.A.'), - wiki_data.get('first_paragraph', 'k.A.'), - wiki_data.get('branche', 'k.A.'), - wiki_data.get('umsatz', 'k.A.'), - wiki_data.get('mitarbeiter', 'k.A.'), - wiki_data.get('categories', 'k.A.') - ] - self.sheet_handler.sheet.update(values=[wiki_values], range_name=wiki_update_range) - self.sheet_handler.sheet.update(values=[[current_dt]], range_name=dt_wiki_range) - else: - debug_print(f"Zeile {row_num}: Wikipedia-Timestamp bereits gesetzt – überspringe Wiki-Auswertung.") - - # ChatGPT-Teil: Wird nur ausgeführt, wenn process_chatgpt True ist oder force_all aktiv ist. - if force_all or process_chatgpt: - # Hier prüfen, ob bereits ein ChatGPT-Timestamp in Spalte AN vorliegt - if len(row_data) <= 39 or row_data[39].strip() == "": - crm_umsatz = row_data[8] if len(row_data) > 8 else "k.A." - abgleich_result = compare_umsatz_values(crm_umsatz, wiki_data.get('umsatz', 'k.A.') if 'wiki_data' in locals() else "k.A.") - self.sheet_handler.sheet.update(values=[[abgleich_result]], range_name=f"AG{row_num}") - crm_data = ";".join(row_data[1:10]) - wiki_data_str = ";".join(row_data[11:17]) - valid_result = validate_article_with_chatgpt(crm_data, wiki_data_str) - self.sheet_handler.sheet.update(values=[[valid_result]], range_name=f"R{row_num}") - fsm_result = evaluate_fsm_suitability(company_name, wiki_data if 'wiki_data' in locals() else {}) - self.sheet_handler.sheet.update(values=[[fsm_result["suitability"]]], range_name=f"Y{row_num}") - self.sheet_handler.sheet.update(values=[[fsm_result["justification"]]], range_name=f"Z{row_num}") - st_estimate = evaluate_servicetechnicians_estimate(company_name, wiki_data if 'wiki_data' in locals() else {}) - self.sheet_handler.sheet.update(values=[[st_estimate]], range_name=f"AD{row_num}") - internal_value = row_data[7] if len(row_data) > 7 else "k.A." - internal_category = map_internal_technicians(internal_value) if internal_value != "k.A." else "k.A." - if internal_category != "k.A." and st_estimate != internal_category: - explanation = evaluate_servicetechnicians_explanation(company_name, st_estimate, wiki_data if 'wiki_data' in locals() else {}) - discrepancy = explanation + debug_print(f"Zeile {row_num}: Wikipedia-Timestamp bereits gesetzt – überspringe Wiki-Auswertung.") + + # ChatGPT-Teil + if force_all or process_chatgpt: + if len(row_data) <= 40 or row_data[40].strip() == "": + # Umsatz CRM ist nun in Spalte J (Index 9), Anzahl Mitarbeiter in Spalte K (Index 10) + crm_umsatz = row_data[9] if len(row_data) > 9 else "k.A." + abgleich_result = compare_umsatz_values(crm_umsatz, wiki_data.get('umsatz', 'k.A.') if 'wiki_data' in locals() else "k.A.") + self.sheet_handler.sheet.update(values=[[abgleich_result]], range_name=f"AG{row_num}") + # CRM-Daten: von Spalte B bis K (Indices 1 bis 10) + crm_data = ";".join(row_data[1:11]) + # Wiki-Daten: von Spalte L bis R (Indices 11 bis 18) + wiki_data_str = ";".join(row_data[11:18]) + valid_result = validate_article_with_chatgpt(crm_data, wiki_data_str) + self.sheet_handler.sheet.update(values=[[valid_result]], range_name=f"R{row_num}") + fsm_result = evaluate_fsm_suitability(company_name, wiki_data if 'wiki_data' in locals() else {}) + self.sheet_handler.sheet.update(values=[[fsm_result["suitability"]]], range_name=f"Y{row_num}") + self.sheet_handler.sheet.update(values=[[fsm_result["justification"]]], range_name=f"Z{row_num}") + st_estimate = evaluate_servicetechnicians_estimate(company_name, wiki_data if 'wiki_data' in locals() else {}) + self.sheet_handler.sheet.update(values=[[st_estimate]], range_name=f"AE{row_num}") + internal_value = row_data[8] if len(row_data) > 8 else "k.A." # Anzahl Techniker CRM in Spalte I (Index 8) + internal_category = map_internal_technicians(internal_value) if internal_value != "k.A." else "k.A." + if internal_category != "k.A." and st_estimate != internal_category: + explanation = evaluate_servicetechnicians_explanation(company_name, st_estimate, wiki_data if 'wiki_data' in locals() else {}) + discrepancy = explanation + else: + discrepancy = "ok" + self.sheet_handler.sheet.update(values=[[discrepancy]], range_name=f"AF{row_num}") + self.sheet_handler.sheet.update(values=[[current_dt]], range_name=dt_chat_range) else: - discrepancy = "ok" - self.sheet_handler.sheet.update(values=[[discrepancy]], range_name=f"AE{row_num}") - self.sheet_handler.sheet.update(values=[[current_dt]], range_name=f"AN{row_num}") - else: - debug_print(f"Zeile {row_num}: ChatGPT-Timestamp bereits gesetzt – überspringe ChatGPT-Auswertung.") - - # Aktualisiere letzten Timestamp und Version (Spalte AO) - self.sheet_handler.sheet.update(values=[[current_dt]], range_name=f"AO{row_num}") - self.sheet_handler.sheet.update(values=[[Config.VERSION]], range_name=f"AO{row_num}") - debug_print(f"✅ Aktualisiert: URL: {(wiki_data.get('url', 'k.A.') if 'wiki_data' in locals() else 'k.A.')}, " - f"Branche: {(wiki_data.get('branche', 'k.A.') if 'wiki_data' in locals() else 'k.A.')}, " - f"Umsatz-Abgleich: {abgleich_result if 'abgleich_result' in locals() else 'k.A.'}, " - f"Validierung: {valid_result if 'valid_result' in locals() else 'k.A.'}, " - f"FSM: {fsm_result['suitability'] if 'fsm_result' in locals() else 'k.A.'}, " - f"Servicetechniker-Schätzung: {st_estimate if 'st_estimate' in locals() else 'k.A.'}") - time.sleep(Config.RETRY_DELAY) - + debug_print(f"Zeile {row_num}: ChatGPT-Timestamp bereits gesetzt – überspringe ChatGPT-Auswertung.") + + # Aktualisiere letzten Timestamp und Version (Spalte AP) + self.sheet_handler.sheet.update(values=[[current_dt]], range_name=f"AP{row_num}") + self.sheet_handler.sheet.update(values=[[Config.VERSION]], range_name=f"AP{row_num}") + debug_print(f"✅ Aktualisiert: URL: {(wiki_data.get('url', 'k.A.') if 'wiki_data' in locals() else 'k.A.')}, " + f"Branche: {(wiki_data.get('branche', 'k.A.') if 'wiki_data' in locals() else 'k.A.')}, " + f"Umsatz-Abgleich: {abgleich_result if 'abgleich_result' in locals() else 'k.A.'}, " + f"Validierung: {valid_result if 'valid_result' in locals() else 'k.A.'}, " + f"FSM: {fsm_result['suitability'] if 'fsm_result' in locals() else 'k.A.'}, " + f"Servicetechniker-Schätzung: {st_estimate if 'st_estimate' in locals() else 'k.A.'}") + time.sleep(Config.RETRY_DELAY) + # ==================== NEUER MODUS 6: CONTACT RESEARCH (via SerpAPI) ==================== def process_contact_research(): debug_print("Starte Contact Research (Modus 6)...") @@ -815,31 +839,31 @@ def process_contact_research(): sh = gc.open_by_url(Config.SHEET_URL) main_sheet = sh.sheet1 data = main_sheet.get_all_values() - # Für jeden Datensatz werden für vier Kategorien die Trefferanzahl ermittelt: + # Website ist nun in Spalte D (Index 3); Firmenname in Spalte B; Kurzform in Spalte C for i, row in enumerate(data[1:], start=2): company_name = row[1] if len(row) > 1 else "" - website = row[2] if len(row) > 2 else "" + # Verwende Kurzform (Spalte C, Index 2) falls vorhanden, sonst Firmenname + search_name = row[2].strip() if len(row) > 2 and row[2].strip() not in ["", "k.A."] else company_name + website = row[3] if len(row) > 3 else "" if not company_name or not website: continue - count_service = count_linkedin_contacts(company_name, website, "Serviceleiter") - count_it = count_linkedin_contacts(company_name, website, "IT-Leiter") - count_management = count_linkedin_contacts(company_name, website, "Geschäftsführer") - count_disponent = count_linkedin_contacts(company_name, website, "Disponent") + count_service = count_linkedin_contacts(search_name, website, "Serviceleiter") + count_it = count_linkedin_contacts(search_name, website, "IT-Leiter") + count_management = count_linkedin_contacts(search_name, website, "Geschäftsführer") + count_disponent = count_linkedin_contacts(search_name, website, "Disponent") current_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - main_sheet.update(values=[[str(count_service)]], range_name=f"AH{i}") - main_sheet.update(f"AI{i}", [[str(count_it)]]) - main_sheet.update(f"AJ{i}", [[str(count_management)]]) - main_sheet.update(f"AK{i}", [[str(count_disponent)]]) - main_sheet.update(f"AL{i}", [[current_dt]]) + main_sheet.update(values=[[str(count_service)]], range_name=f"AI{i}") # Neu: Spalte AI (Serviceleiter gefunden) – vorher AH -> jetzt AI + main_sheet.update(values=[[str(count_it)]], range_name=f"AJ{i}") # IT-Leiter gefunden in Spalte AJ + main_sheet.update(values=[[str(count_management)]], range_name=f"AK{i}") # Management gefunden in Spalte AK + main_sheet.update(values=[[str(count_disponent)]], range_name=f"AL{i}") # Disponent gefunden in Spalte AL + main_sheet.update(values=[[current_dt]], range_name=f"AM{i}") # Contact Search Timestamp in Spalte AM debug_print(f"Zeile {i}: Serviceleiter {count_service}, IT-Leiter {count_it}, Management {count_management}, Disponent {count_disponent} – Contact Search Timestamp gesetzt.") time.sleep(Config.RETRY_DELAY * 1.5) debug_print("Contact Research abgeschlossen.") # ==================== NEUER MODUS: ALIGNMENT DEMO (für Hauptblatt und Contacts) ==================== def alignment_demo_full(): - # Aktualisiere Hauptblatt alignment_demo(GoogleSheetHandler().sheet) - # Aktualisiere auch das Contacts-Blatt gc = gspread.authorize(ServiceAccountCredentials.from_json_keyfile_name( Config.CREDENTIALS_FILE, ["https://www.googleapis.com/auth/spreadsheets"])) sh = gc.open_by_url(Config.SHEET_URL) @@ -847,11 +871,54 @@ def alignment_demo_full(): contacts_sheet = sh.worksheet("Contacts") except gspread.exceptions.WorksheetNotFound: contacts_sheet = sh.add_worksheet(title="Contacts", rows="1000", cols="10") - header = ["Firmenname", "Website", "Vorname", "Nachname", "Position", "Anrede", "E-Mail"] - contacts_sheet.update("A1:G1", [header]) + header = ["Firmenname", "Website", "Kurzform", "Vorname", "Nachname", "Position", "Anrede", "E-Mail"] + contacts_sheet.update(values=[header], range_name="A1:H1") + debug_print("Neues Blatt 'Contacts' erstellt und Header eingetragen.") alignment_demo(contacts_sheet) debug_print("Alignment-Demo für Hauptblatt und Contacts abgeschlossen.") +# ==================== NEUER MODUS: CONTACTS (LinkedIn) ==================== +def process_contacts(): + debug_print("Starte LinkedIn-Kontaktsuche...") + gc = gspread.authorize(ServiceAccountCredentials.from_json_keyfile_name( + Config.CREDENTIALS_FILE, ["https://www.googleapis.com/auth/spreadsheets"])) + sh = gc.open_by_url(Config.SHEET_URL) + try: + contacts_sheet = sh.worksheet("Contacts") + except gspread.exceptions.WorksheetNotFound: + contacts_sheet = sh.add_worksheet(title="Contacts", rows="1000", cols="10") + header = ["Firmenname", "Website", "Kurzform", "Vorname", "Nachname", "Position", "Anrede", "E-Mail"] + contacts_sheet.update(values=[header], range_name="A1:H1") + debug_print("Neues Blatt 'Contacts' erstellt und Header eingetragen.") + main_sheet = sh.sheet1 + data = main_sheet.get_all_values() + positions = ["Serviceleiter", "IT-Leiter", "Leiter After Sales", "Leiter Einsatzplanung"] + new_rows = [] + for idx, row in enumerate(data[1:], start=2): + # Firmenname in Spalte B (Index 1), Kurzform in Spalte C (Index 2), Website in Spalte D (Index 3) + company_name = row[1] if len(row) > 1 else "" + search_name = row[2].strip() if len(row) > 2 and row[2].strip() not in ["", "k.A."] else company_name + website = row[3] if len(row) > 3 else "" + debug_print(f"Verarbeite Firma: '{company_name}' (Zeile {idx}), Website: '{website}'") + if not company_name or not website: + debug_print("Überspringe, da Firmenname oder Website fehlt.") + continue + for pos in positions: + debug_print(f"Suche nach Position: '{pos}' bei '{search_name}'") + contact = search_linkedin_contact(search_name, website, pos) + if contact: + debug_print(f"Kontakt gefunden: {contact}") + new_rows.append([contact["Firmenname"], website, search_name, contact["Vorname"], contact["Nachname"], contact["Position"], "", ""]) + else: + debug_print(f"Kein Kontakt für Position '{pos}' bei '{search_name}' gefunden.") + if new_rows: + last_row = len(contacts_sheet.get_all_values()) + 1 + range_str = f"A{last_row}:H{last_row + len(new_rows) - 1}" + contacts_sheet.update(values=new_rows, range_name=range_str) + debug_print(f"{len(new_rows)} Kontakte in 'Contacts' hinzugefügt.") + else: + debug_print("Keine Kontakte gefunden in der Haupttabelle.") + # ==================== MAIN PROGRAMM ==================== if __name__ == "__main__": print("Modi:") @@ -861,6 +928,7 @@ if __name__ == "__main__": print("4 = Nur Wikipedia-Suche (Zeilen ohne Wikipedia-Timestamp)") print("5 = Nur ChatGPT-Bewertung (Zeilen ohne ChatGPT-Timestamp)") print("6 = Contact Research (via SerpAPI)") + print("7 = Contacts (LinkedIn) – Kontakte in das Contacts-Blatt schreiben") mode_input = input("Wählen Sie den Modus: ").strip() if mode_input == "2": MODE = "2" @@ -872,6 +940,8 @@ if __name__ == "__main__": MODE = "5" elif mode_input == "6": MODE = "6" + elif mode_input == "7": + MODE = "7" else: MODE = "1" if MODE == "1": @@ -887,13 +957,16 @@ if __name__ == "__main__": processor.process_rows() elif MODE == "4": processor = DataProcessor() - # Für Mode 4: Nur Wikipedia-Suche - processor.process_rows(num_rows=0) # Unser _process_single_row prüft dann die Wiki-Timestamp-Bedingung - process_wikipedia_only() + for i, row in enumerate(processor.sheet_handler.sheet_values[1:], start=2): + if len(row) <= 39 or row[39].strip() == "": + processor._process_single_row(i, row, process_wiki=True, process_chatgpt=False) elif MODE == "5": processor = DataProcessor() - # Für Mode 5: Nur ChatGPT-Bewertung - processor.process_rows(num_rows=0) + for i, row in enumerate(processor.sheet_handler.sheet_values[1:], start=2): + if len(row) <= 40 or row[40].strip() == "": + processor._process_single_row(i, row, process_wiki=False, process_chatgpt=True) elif MODE == "6": process_contact_research() + elif MODE == "7": + process_contacts() print(f"\n✅ Auswertung abgeschlossen ({Config.VERSION})")