bugfix
This commit is contained in:
@@ -1174,34 +1174,29 @@ def _process_single_row(self, row_num, row_data, process_wiki=True, process_chat
|
|||||||
else:
|
else:
|
||||||
debug_print(f"Zeile {row_num}: Keine Website gefunden für {company_name}.")
|
debug_print(f"Zeile {row_num}: Keine Website gefunden für {company_name}.")
|
||||||
|
|
||||||
# Website-Scraping: Nur durchführen, wenn der Wikipedia‑Artikel (Spalte M) "k.A." ist.
|
# Unabhängig vom process_wiki-Flag: Führe Website-Scraping durch, sofern eine Website vorliegt.
|
||||||
website_raw = "k.A."
|
website_raw = "k.A."
|
||||||
website_summary = "k.A."
|
website_summary = "k.A."
|
||||||
# Hier: Falls Wiki URL (Spalte M) "k.A." ist, dann führe das Scraping durch.
|
if website_url.strip() != "" and website_url.strip().lower() != "k.a.":
|
||||||
wiki_url_cell = row_data[11].strip().lower() if len(row_data) > 11 else "k.a."
|
# Extrahiere den Rohtext der Website
|
||||||
if wiki_url_cell == "k.a." and website_url.strip() != "" and website_url.strip().lower() != "k.a.":
|
|
||||||
website_raw = get_website_raw(website_url)
|
website_raw = get_website_raw(website_url)
|
||||||
|
# Erstelle eine Zusammenfassung des Website-Contents
|
||||||
website_summary = summarize_website_content(website_raw)
|
website_summary = summarize_website_content(website_raw)
|
||||||
try:
|
try:
|
||||||
self.sheet_handler.sheet.update(values=[[website_raw]], range_name=f"AR{row_num}")
|
self.sheet_handler.sheet.update(values=[[website_raw]], range_name=f"AR{row_num}")
|
||||||
debug_print(f"Zeile {row_num}: Spalte AR (Website-Rohtext) erfolgreich aktualisiert.")
|
debug_print(f"Zeile {row_num}: Spalte AR Update erfolgreich.")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
debug_print(f"Zeile {row_num}: Fehler beim Update von Spalte AR: {e}")
|
debug_print(f"Zeile {row_num}: Fehler beim Update von Spalte AR: {e}")
|
||||||
try:
|
try:
|
||||||
self.sheet_handler.sheet.update(values=[[website_summary]], range_name=f"AS{row_num}")
|
self.sheet_handler.sheet.update(values=[[website_summary]], range_name=f"AS{row_num}")
|
||||||
debug_print(f"Zeile {row_num}: Spalte AS (Website Zusammenfassung) erfolgreich aktualisiert.")
|
debug_print(f"Zeile {row_num}: Spalte AS Update erfolgreich.")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
debug_print(f"Zeile {row_num}: Fehler beim Update von Spalte AS: {e}")
|
debug_print(f"Zeile {row_num}: Fehler beim Update von Spalte AS: {e}")
|
||||||
# Falls row_data noch nicht lang genug ist, erweitere die Liste
|
|
||||||
if len(row_data) < 45:
|
|
||||||
row_data.extend([""] * (45 - len(row_data)))
|
|
||||||
row_data[43] = website_raw # Spalte AR (Index 43)
|
|
||||||
row_data[44] = website_summary # Spalte AS (Index 44)
|
|
||||||
debug_print(f"Zeile {row_num}: Website-Daten gescrapt. Rohtext (Länge {len(website_raw)}): {website_raw[:100]}..., Zusammenfassung: {website_summary}")
|
debug_print(f"Zeile {row_num}: Website-Daten gescrapt. Rohtext (Länge {len(website_raw)}): {website_raw[:100]}..., Zusammenfassung: {website_summary}")
|
||||||
else:
|
else:
|
||||||
debug_print(f"Zeile {row_num}: Kein gültiger Trigger für Website-Scraping (Wiki URL != 'k.A.'), überspringe Website-Scraping.")
|
debug_print(f"Zeile {row_num}: Kein gültiger Website-URL vorhanden, Website-Scraping wird übersprungen.")
|
||||||
|
|
||||||
# --- Wikipedia-Verarbeitung (falls process_wiki True) ---
|
# Weiterer Verarbeitungsteil: Wikipedia-Verarbeitung (falls process_wiki True)
|
||||||
wiki_update_range = f"L{row_num}:R{row_num}"
|
wiki_update_range = f"L{row_num}:R{row_num}"
|
||||||
dt_wiki_range = f"AN{row_num}"
|
dt_wiki_range = f"AN{row_num}"
|
||||||
company_data = {}
|
company_data = {}
|
||||||
@@ -1227,16 +1222,19 @@ def _process_single_row(self, row_num, row_data, process_wiki=True, process_chat
|
|||||||
'full_infobox': 'k.A.'
|
'full_infobox': 'k.A.'
|
||||||
}
|
}
|
||||||
self.sheet_handler.sheet.update(values=[[
|
self.sheet_handler.sheet.update(values=[[
|
||||||
row_data[11] if len(row_data) > 11 and row_data[11].strip() not in ["", "k.A."] else "k.A."
|
row_data[11] if len(row_data) > 11 and row_data[11].strip() not in ["", "k.A."] else "k.A.",
|
||||||
, company_data.get('url', 'k.A.'), company_data.get('first_paragraph', 'k.A.'),
|
company_data.get('url', 'k.A.'),
|
||||||
company_data.get('branche', 'k.A.'), company_data.get('umsatz', 'k.A.'),
|
company_data.get('first_paragraph', 'k.A.'),
|
||||||
company_data.get('mitarbeiter', 'k.A.'), company_data.get('categories', 'k.A.')
|
company_data.get('branche', 'k.A.'),
|
||||||
|
company_data.get('umsatz', 'k.A.'),
|
||||||
|
company_data.get('mitarbeiter', 'k.A.'),
|
||||||
|
company_data.get('categories', 'k.A.')
|
||||||
]], range_name=wiki_update_range)
|
]], range_name=wiki_update_range)
|
||||||
self.sheet_handler.sheet.update(values=[[datetime.now().strftime("%Y-%m-%d %H:%M:%S")]], range_name=dt_wiki_range)
|
self.sheet_handler.sheet.update(values=[[datetime.now().strftime("%Y-%m-%d %H:%M:%S")]], range_name=dt_wiki_range)
|
||||||
else:
|
else:
|
||||||
debug_print(f"Zeile {row_num}: Wikipedia-Timestamp bereits gesetzt – überspringe Wiki-Auswertung.")
|
debug_print(f"Zeile {row_num}: Wikipedia-Timestamp bereits gesetzt – überspringe Wiki-Auswertung.")
|
||||||
|
|
||||||
# --- ChatGPT-Verarbeitung (Umsatz, FSM, Mitarbeiter, Branchenevaluierung) ---
|
# ChatGPT-Verarbeitung (Umsatz, FSM, Mitarbeiter, Branchenevaluierung)
|
||||||
dt_chat_range = f"AO{row_num}"
|
dt_chat_range = f"AO{row_num}"
|
||||||
ver_range = f"AP{row_num}"
|
ver_range = f"AP{row_num}"
|
||||||
if process_chatgpt:
|
if process_chatgpt:
|
||||||
@@ -1269,10 +1267,8 @@ def _process_single_row(self, row_num, row_data, process_wiki=True, process_chat
|
|||||||
self.sheet_handler.sheet.update(values=[[emp_consistency]], range_name=f"AC{row_num}")
|
self.sheet_handler.sheet.update(values=[[emp_consistency]], range_name=f"AC{row_num}")
|
||||||
revenue_result = evaluate_umsatz_chatgpt(company_name, company_data.get('umsatz', 'k.A.'))
|
revenue_result = evaluate_umsatz_chatgpt(company_name, company_data.get('umsatz', 'k.A.'))
|
||||||
self.sheet_handler.sheet.update(values=[[revenue_result]], range_name=f"AG{row_num}")
|
self.sheet_handler.sheet.update(values=[[revenue_result]], range_name=f"AG{row_num}")
|
||||||
wiki_tokens = token_count(str(company_data.get('first_paragraph', '')))
|
# Hier NICHT mehr neu einlesen! Verwende die bereits extrahierten Website-Daten.
|
||||||
chat_tokens = token_count(crm_data + wiki_data_str)
|
total_tokens = f"Wiki: {token_count(str(company_data.get('first_paragraph', '')))}, Chat: {token_count(crm_data + wiki_data_str)}, Emp: {token_count(str(emp_estimate))}"
|
||||||
emp_tokens = token_count(str(emp_estimate))
|
|
||||||
total_tokens = f"Wiki: {wiki_tokens}, Chat: {chat_tokens}, Emp: {emp_tokens}"
|
|
||||||
self.sheet_handler.sheet.update(values=[[total_tokens]], range_name=f"AQ{row_num}")
|
self.sheet_handler.sheet.update(values=[[total_tokens]], range_name=f"AQ{row_num}")
|
||||||
self.sheet_handler.sheet.update(values=[[datetime.now().strftime('%Y-%m-%d %H:%M:%S')]], range_name=dt_chat_range)
|
self.sheet_handler.sheet.update(values=[[datetime.now().strftime('%Y-%m-%d %H:%M:%S')]], range_name=dt_chat_range)
|
||||||
else:
|
else:
|
||||||
@@ -1290,6 +1286,7 @@ def _process_single_row(self, row_num, row_data, process_wiki=True, process_chat
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# ==================== ALIGNMENT DEMO FÜR HAUPTBLATT UND CONTACTS ====================
|
# ==================== ALIGNMENT DEMO FÜR HAUPTBLATT UND CONTACTS ====================
|
||||||
def alignment_demo_full():
|
def alignment_demo_full():
|
||||||
alignment_demo(GoogleSheetHandler().sheet)
|
alignment_demo(GoogleSheetHandler().sheet)
|
||||||
|
|||||||
Reference in New Issue
Block a user