diff --git a/brancheneinstufung.py b/brancheneinstufung.py index 4cfefc96..eefce486 100644 --- a/brancheneinstufung.py +++ b/brancheneinstufung.py @@ -720,21 +720,22 @@ class DataProcessor: break self._process_single_row(i, row) rows_processed += 1 - def _process_single_row(self, row_num, row_data, force_all=False, process_wiki=True, process_chatgpt=True): - company_name = row_data[1] if len(row_data) > 1 else "" - website = row_data[2] if len(row_data) > 2 else "" - wiki_update_range = f"K{row_num}:Q{row_num}" - dt_wiki_range = f"AM{row_num}" # Wikipedia Timestamp - dt_chat_range = f"AN{row_num}" # ChatGPT Timestamp - dt_last_range = f"AN{row_num}" # Falls benötigt, hier können Sie noch "Timestamp letzte Prüfung" anpassen - ver_range = f"AO{row_num}" # Version - print(f"\n[{datetime.now().strftime('%H:%M:%S')}] Verarbeite Zeile {row_num}: {company_name}") - current_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - - # Wikipedia-Teil (Spalte AM) – nur ausführen, wenn kein Wiki-Timestamp vorhanden oder force_all True oder explizit process_wiki=True - wiki_data = None - if force_all or process_wiki or (len(row_data) <= 38 or row_data[38].strip() == ""): - if len(row_data) > 11 and row_data[10].strip() not in ["", "k.A."]: +def _process_single_row(self, row_num, row_data, force_all=False, process_wiki=True, process_chatgpt=True): + company_name = row_data[1] if len(row_data) > 1 else "" + website = row_data[2] if len(row_data) > 2 else "" + wiki_update_range = f"K{row_num}:Q{row_num}" + dt_wiki_range = f"AM{row_num}" # Wikipedia Timestamp + dt_chat_range = f"AN{row_num}" # ChatGPT Timestamp + ver_range = f"AO{row_num}" # Version + print(f"\n[{datetime.now().strftime('%H:%M:%S')}] Verarbeite Zeile {row_num}: {company_name}") + current_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + # Wikipedia-Teil: Wird ausgeführt, wenn process_wiki True ist oder force_all aktiv ist. + if force_all or process_wiki: + # Hier zuerst prüfen, ob wir bereits einen Wiki-Timestamp haben (Spalte AM) + if len(row_data) <= 38 or row_data[38].strip() == "": + # Führe die Wikipedia-Auswertung durch + if len(row_data) > 10 and row_data[10].strip() not in ["", "k.A."]: wiki_url = row_data[10].strip() try: wiki_data = self.wiki_scraper.extract_company_data(wiki_url) @@ -764,39 +765,47 @@ class DataProcessor: ] self.sheet_handler.sheet.update(values=[wiki_values], range_name=wiki_update_range) self.sheet_handler.sheet.update(values=[[current_dt]], range_name=dt_wiki_range) - # ChatGPT-Bewertung (Spalte AN) – nur ausführen, wenn kein ChatGPT-Timestamp vorhanden oder force_all/ process_chatgpt True - if force_all or process_chatgpt or (len(row_data) <= 39 or row_data[39].strip() == ""): + else: + debug_print(f"Zeile {row_num}: Wikipedia-Timestamp bereits gesetzt – überspringe Wiki-Auswertung.") + + # ChatGPT-Teil: Wird nur ausgeführt, wenn process_chatgpt True ist oder force_all aktiv ist. + if force_all or process_chatgpt: + # Hier prüfen, ob bereits ein ChatGPT-Timestamp in Spalte AN vorliegt + if len(row_data) <= 39 or row_data[39].strip() == "": crm_umsatz = row_data[8] if len(row_data) > 8 else "k.A." - abgleich_result = compare_umsatz_values(crm_umsatz, wiki_data.get('umsatz', 'k.A.') if wiki_data else "k.A.") + abgleich_result = compare_umsatz_values(crm_umsatz, wiki_data.get('umsatz', 'k.A.') if 'wiki_data' in locals() else "k.A.") self.sheet_handler.sheet.update(values=[[abgleich_result]], range_name=f"AG{row_num}") crm_data = ";".join(row_data[1:10]) wiki_data_str = ";".join(row_data[11:17]) valid_result = validate_article_with_chatgpt(crm_data, wiki_data_str) self.sheet_handler.sheet.update(values=[[valid_result]], range_name=f"R{row_num}") - fsm_result = evaluate_fsm_suitability(company_name, wiki_data if wiki_data else {}) + fsm_result = evaluate_fsm_suitability(company_name, wiki_data if 'wiki_data' in locals() else {}) self.sheet_handler.sheet.update(values=[[fsm_result["suitability"]]], range_name=f"Y{row_num}") self.sheet_handler.sheet.update(values=[[fsm_result["justification"]]], range_name=f"Z{row_num}") - st_estimate = evaluate_servicetechnicians_estimate(company_name, wiki_data if wiki_data else {}) + st_estimate = evaluate_servicetechnicians_estimate(company_name, wiki_data if 'wiki_data' in locals() else {}) self.sheet_handler.sheet.update(values=[[st_estimate]], range_name=f"AD{row_num}") internal_value = row_data[7] if len(row_data) > 7 else "k.A." internal_category = map_internal_technicians(internal_value) if internal_value != "k.A." else "k.A." if internal_category != "k.A." and st_estimate != internal_category: - explanation = evaluate_servicetechnicians_explanation(company_name, st_estimate, wiki_data if wiki_data else {}) + explanation = evaluate_servicetechnicians_explanation(company_name, st_estimate, wiki_data if 'wiki_data' in locals() else {}) discrepancy = explanation else: discrepancy = "ok" self.sheet_handler.sheet.update(values=[[discrepancy]], range_name=f"AE{row_num}") - self.sheet_handler.sheet.update(values=[[current_dt]], range_name=dt_chat_range) - # Letzten Timestamp und Version aktualisieren (Spalte AO) - self.sheet_handler.sheet.update(values=[[current_dt]], range_name=f"AO{row_num}") - self.sheet_handler.sheet.update(values=[[Config.VERSION]], range_name=f"AO{row_num}") - debug_print(f"✅ Aktualisiert: URL: {(wiki_data.get('url', 'k.A.') if wiki_data else 'k.A.')}, " - f"Branche: {(wiki_data.get('branche', 'k.A.') if wiki_data else 'k.A.')}, " - f"Umsatz-Abgleich: {abgleich_result if 'abgleich_result' in locals() else 'k.A.'}, " - f"Validierung: {valid_result if 'valid_result' in locals() else 'k.A.'}, " - f"FSM: {fsm_result['suitability'] if 'fsm_result' in locals() else 'k.A.'}, " - f"Servicetechniker-Schätzung: {st_estimate if 'st_estimate' in locals() else 'k.A.'}") - time.sleep(Config.RETRY_DELAY) + self.sheet_handler.sheet.update(values=[[current_dt]], range_name=f"AN{row_num}") + else: + debug_print(f"Zeile {row_num}: ChatGPT-Timestamp bereits gesetzt – überspringe ChatGPT-Auswertung.") + + # Aktualisiere letzten Timestamp und Version (Spalte AO) + self.sheet_handler.sheet.update(values=[[current_dt]], range_name=f"AO{row_num}") + self.sheet_handler.sheet.update(values=[[Config.VERSION]], range_name=f"AO{row_num}") + debug_print(f"✅ Aktualisiert: URL: {(wiki_data.get('url', 'k.A.') if 'wiki_data' in locals() else 'k.A.')}, " + f"Branche: {(wiki_data.get('branche', 'k.A.') if 'wiki_data' in locals() else 'k.A.')}, " + f"Umsatz-Abgleich: {abgleich_result if 'abgleich_result' in locals() else 'k.A.'}, " + f"Validierung: {valid_result if 'valid_result' in locals() else 'k.A.'}, " + f"FSM: {fsm_result['suitability'] if 'fsm_result' in locals() else 'k.A.'}, " + f"Servicetechniker-Schätzung: {st_estimate if 'st_estimate' in locals() else 'k.A.'}") + time.sleep(Config.RETRY_DELAY) # ==================== NEUER MODUS 6: CONTACT RESEARCH (via SerpAPI) ==================== def process_contact_research():