Bugfix
This commit is contained in:
@@ -581,122 +581,124 @@ class DataProcessor:
|
||||
self._process_single_row(i, row)
|
||||
rows_processed += 1
|
||||
def _process_single_row(self, row_num, row_data, process_wiki=True, process_chatgpt=True):
|
||||
total_tokens = 0
|
||||
company_name = row_data[1] if len(row_data) > 1 else ""
|
||||
website = row_data[3] if len(row_data) > 3 else ""
|
||||
# Wiki-Daten werden in Spalten L bis R abgelegt
|
||||
wiki_update_range = f"L{row_num}:R{row_num}"
|
||||
dt_wiki_range = f"AN{row_num}"
|
||||
dt_chat_range = f"AO{row_num}"
|
||||
ver_range = f"AP{row_num}"
|
||||
print(f"\n[{datetime.now().strftime('%H:%M:%S')}] Verarbeite Zeile {row_num}: {company_name}")
|
||||
current_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
# Wiki-Verarbeitung
|
||||
if process_wiki:
|
||||
if len(row_data) <= 39 or row_data[39].strip() == "":
|
||||
if len(row_data) > 10 and row_data[10].strip() not in ["", "k.A."]:
|
||||
wiki_url = row_data[10].strip()
|
||||
try:
|
||||
company_data = self.wiki_scraper.extract_company_data(wiki_url)
|
||||
except Exception as e:
|
||||
debug_print(f"Fehler beim Laden des vorgeschlagenen Wikipedia-Artikels: {e}")
|
||||
article = self.wiki_scraper.search_company_article(company_name, website)
|
||||
company_data = self.wiki_scraper.extract_company_data(article.url) if article else {
|
||||
'url': 'k.A.', 'first_paragraph': 'k.A.', 'branche': 'k.A.',
|
||||
'umsatz': 'k.A.', 'mitarbeiter': 'k.A.', 'categories': 'k.A.',
|
||||
'full_infobox': 'k.A.'
|
||||
}
|
||||
else:
|
||||
wiki_url = "k.A."
|
||||
total_tokens = 0
|
||||
company_name = row_data[1] if len(row_data) > 1 else ""
|
||||
website = row_data[3] if len(row_data) > 3 else ""
|
||||
# Default-Initialisierung für company_data, falls Wiki-Auswertung übersprungen wird
|
||||
company_data = {
|
||||
'url': 'k.A.',
|
||||
'first_paragraph': 'k.A.',
|
||||
'branche': 'k.A.',
|
||||
'umsatz': 'k.A.',
|
||||
'mitarbeiter': 'k.A.',
|
||||
'categories': 'k.A.',
|
||||
'full_infobox': 'k.A.'
|
||||
}
|
||||
# Wiki-Daten werden in Spalten L bis R abgelegt
|
||||
wiki_update_range = f"L{row_num}:R{row_num}"
|
||||
dt_wiki_range = f"AN{row_num}"
|
||||
dt_chat_range = f"AO{row_num}"
|
||||
ver_range = f"AP{row_num}"
|
||||
print(f"\n[{datetime.now().strftime('%H:%M:%S')}] Verarbeite Zeile {row_num}: {company_name}")
|
||||
current_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
# Wiki-Verarbeitung
|
||||
if process_wiki:
|
||||
if len(row_data) <= 39 or row_data[39].strip() == "":
|
||||
if len(row_data) > 10 and row_data[10].strip() not in ["", "k.A."]:
|
||||
wiki_url = row_data[10].strip()
|
||||
try:
|
||||
company_data = self.wiki_scraper.extract_company_data(wiki_url)
|
||||
except Exception as e:
|
||||
debug_print(f"Fehler beim Laden des vorgeschlagenen Wikipedia-Artikels: {e}")
|
||||
article = self.wiki_scraper.search_company_article(company_name, website)
|
||||
company_data = self.wiki_scraper.extract_company_data(article.url) if article else {
|
||||
'url': 'k.A.', 'first_paragraph': 'k.A.', 'branche': 'k.A.',
|
||||
'umsatz': 'k.A.', 'mitarbeiter': 'k.A.', 'categories': 'k.A.',
|
||||
'full_infobox': 'k.A.'
|
||||
}
|
||||
wiki_values = [
|
||||
row_data[10] if len(row_data) > 10 and row_data[10].strip() not in ["", "k.A."] else "k.A.",
|
||||
company_data.get('url', 'k.A.'),
|
||||
company_data.get('first_paragraph', 'k.A.'),
|
||||
company_data.get('branche', 'k.A.'),
|
||||
company_data.get('umsatz', 'k.A.'),
|
||||
company_data.get('mitarbeiter', 'k.A.'),
|
||||
company_data.get('categories', 'k.A.')
|
||||
]
|
||||
self.sheet_handler.sheet.update(values=[wiki_values], range_name=wiki_update_range)
|
||||
self.sheet_handler.sheet.update(values=[[current_dt]], range_name=dt_wiki_range)
|
||||
else:
|
||||
debug_print(f"Zeile {row_num}: Wikipedia-Timestamp bereits gesetzt – überspringe Wiki-Auswertung.")
|
||||
# ChatGPT-Verarbeitung
|
||||
if process_chatgpt:
|
||||
# Umsatzvergleich: Ergebnis in "Chat Wiki Konsistenzprüfung" (Spalte S)
|
||||
crm_umsatz = row_data[9] if len(row_data) > 9 else "k.A."
|
||||
consistency_result = compare_umsatz_values(crm_umsatz, company_data.get('umsatz', 'k.A.'))
|
||||
self.sheet_handler.sheet.update(values=[[consistency_result]], range_name=f"S{row_num}")
|
||||
# Validierung Wikipedia-Artikel: Ergebnis wird aufgeteilt in "Chat Begründung Wiki Inkonsistenz" (T) und
|
||||
# "Chat Vorschlag Wiki Artikel" (U)
|
||||
crm_data = ";".join(row_data[1:10])
|
||||
wiki_data_str = ";".join(row_data[11:18])
|
||||
prompt = ("Bitte überprüfe, ob die folgenden beiden Datensätze grundsätzlich zum gleichen Unternehmen gehören. "
|
||||
f"CRM-Daten: {crm_data} | Wikipedia-Daten: {wiki_data_str}")
|
||||
valid_result, tokens = safe_chatgpt_call(prompt, crm_data + " " + wiki_data_str, row_data[10] if len(row_data)>10 else "k.A.")
|
||||
wiki_url = "k.A."
|
||||
article = self.wiki_scraper.search_company_article(company_name, website)
|
||||
company_data = self.wiki_scraper.extract_company_data(article.url) if article else {
|
||||
'url': 'k.A.', 'first_paragraph': 'k.A.', 'branche': 'k.A.',
|
||||
'umsatz': 'k.A.', 'mitarbeiter': 'k.A.', 'categories': 'k.A.',
|
||||
'full_infobox': 'k.A.'
|
||||
}
|
||||
wiki_values = [
|
||||
row_data[10] if len(row_data) > 10 and row_data[10].strip() not in ["", "k.A."] else "k.A.",
|
||||
company_data.get('url', 'k.A.'),
|
||||
company_data.get('first_paragraph', 'k.A.'),
|
||||
company_data.get('branche', 'k.A.'),
|
||||
company_data.get('umsatz', 'k.A.'),
|
||||
company_data.get('mitarbeiter', 'k.A.'),
|
||||
company_data.get('categories', 'k.A.')
|
||||
]
|
||||
self.sheet_handler.sheet.update(values=[wiki_values], range_name=wiki_update_range)
|
||||
self.sheet_handler.sheet.update(values=[[current_dt]], range_name=dt_wiki_range)
|
||||
else:
|
||||
debug_print(f"Zeile {row_num}: Wikipedia-Timestamp bereits gesetzt – überspringe Wiki-Auswertung.")
|
||||
# ChatGPT-Verarbeitung
|
||||
if process_chatgpt:
|
||||
crm_umsatz = row_data[9] if len(row_data) > 9 else "k.A."
|
||||
consistency_result = compare_umsatz_values(crm_umsatz, company_data.get('umsatz', 'k.A.'))
|
||||
self.sheet_handler.sheet.update(values=[[consistency_result]], range_name=f"S{row_num}")
|
||||
crm_data = ";".join(row_data[1:10])
|
||||
wiki_data_str = ";".join(row_data[11:18])
|
||||
prompt = ("Bitte überprüfe, ob die folgenden beiden Datensätze grundsätzlich zum gleichen Unternehmen gehören. "
|
||||
f"CRM-Daten: {crm_data} | Wikipedia-Daten: {wiki_data_str}")
|
||||
valid_result, tokens = safe_chatgpt_call(prompt, crm_data + " " + wiki_data_str, row_data[10] if len(row_data) > 10 else "k.A.")
|
||||
total_tokens += tokens
|
||||
if valid_result.strip().upper() == "OK":
|
||||
wiki_consistency = "OK"
|
||||
wiki_article_suggestion = ""
|
||||
else:
|
||||
wiki_consistency = "X"
|
||||
wiki_article_suggestion = valid_result
|
||||
self.sheet_handler.sheet.update(values=[[wiki_consistency]], range_name=f"T{row_num}")
|
||||
self.sheet_handler.sheet.update(values=[[wiki_article_suggestion]], range_name=f"U{row_num}")
|
||||
prompt_fsm = f"Bitte bewerte, ob das Unternehmen '{company_name}' für den Einsatz einer Field Service Management Lösung geeignet ist. Antworte mit 'Ja' oder 'Nein' und begründe kurz."
|
||||
fsm_result, tokens = safe_chatgpt_call(prompt_fsm, company_name, row_data[10] if len(row_data) > 10 else "k.A.")
|
||||
total_tokens += tokens
|
||||
parts = fsm_result.split("-", 1)
|
||||
fsm_suitability = parts[0].strip() if parts else fsm_result
|
||||
fsm_justification = parts[1].strip() if len(parts) > 1 else ""
|
||||
self.sheet_handler.sheet.update(values=[[fsm_suitability]], range_name=f"Z{row_num}")
|
||||
self.sheet_handler.sheet.update(values=[[fsm_justification]], range_name=f"AA{row_num}")
|
||||
prompt_st = f"Bitte schätze die Anzahl der Servicetechniker für das Unternehmen '{company_name}' ein. Antwortoptionen: '<50 Techniker', '>100 Techniker', '>200 Techniker', '>500 Techniker'."
|
||||
st_estimate, tokens = safe_chatgpt_call(prompt_st, company_name, row_data[10] if len(row_data) > 10 else "k.A.")
|
||||
total_tokens += tokens
|
||||
self.sheet_handler.sheet.update(values=[[st_estimate]], range_name=f"AE{row_num}")
|
||||
internal_value = row_data[7] if len(row_data) > 7 else "k.A."
|
||||
internal_category = map_internal_technicians(internal_value) if internal_value != "k.A." else "k.A."
|
||||
if internal_category != "k.A." and st_estimate != internal_category:
|
||||
prompt_st_expl = f"Bitte erkläre, warum du für das Unternehmen '{company_name}' die Anzahl der Servicetechniker als '{st_estimate}' geschätzt hast."
|
||||
st_explanation, tokens = safe_chatgpt_call(prompt_st_expl, company_name, row_data[10] if len(row_data) > 10 else "k.A.")
|
||||
total_tokens += tokens
|
||||
if valid_result.strip().upper() == "OK":
|
||||
wiki_consistency = "OK"
|
||||
wiki_article_suggestion = ""
|
||||
else:
|
||||
wiki_consistency = "X"
|
||||
wiki_article_suggestion = valid_result
|
||||
self.sheet_handler.sheet.update(values=[[wiki_consistency]], range_name=f"T{row_num}")
|
||||
self.sheet_handler.sheet.update(values=[[wiki_article_suggestion]], range_name=f"U{row_num}")
|
||||
# FSM-Relevanz: Ergebnisse in "Chat Prüfung FSM Relevanz" (Z) und "Chat Begründung für FSM Relevanz" (AA)
|
||||
prompt_fsm = f"Bitte bewerte, ob das Unternehmen '{company_name}' für den Einsatz einer Field Service Management Lösung geeignet ist. Antworte mit 'Ja' oder 'Nein' und begründe kurz."
|
||||
fsm_result, tokens = safe_chatgpt_call(prompt_fsm, company_name, row_data[10] if len(row_data)>10 else "k.A.")
|
||||
total_tokens += tokens
|
||||
parts = fsm_result.split("-", 1)
|
||||
fsm_suitability = parts[0].strip() if parts else fsm_result
|
||||
fsm_justification = parts[1].strip() if len(parts) > 1 else ""
|
||||
self.sheet_handler.sheet.update(values=[[fsm_suitability]], range_name=f"Z{row_num}")
|
||||
self.sheet_handler.sheet.update(values=[[fsm_justification]], range_name=f"AA{row_num}")
|
||||
# Servicetechniker-Schätzung: Ergebnisse in "Chat Einschätzung Anzahl Servicetechniker" (AE) und "Chat Begründung Abweichung Servicetechniker" (AF)
|
||||
prompt_st = f"Bitte schätze die Anzahl der Servicetechniker für das Unternehmen '{company_name}' ein. Antwortoptionen: '<50 Techniker', '>100 Techniker', '>200 Techniker', '>500 Techniker'."
|
||||
st_estimate, tokens = safe_chatgpt_call(prompt_st, company_name, row_data[10] if len(row_data)>10 else "k.A.")
|
||||
total_tokens += tokens
|
||||
self.sheet_handler.sheet.update(values=[[st_estimate]], range_name=f"AE{row_num}")
|
||||
internal_value = row_data[7] if len(row_data) > 7 else "k.A."
|
||||
internal_category = map_internal_technicians(internal_value) if internal_value != "k.A." else "k.A."
|
||||
if internal_category != "k.A." and st_estimate != internal_category:
|
||||
prompt_st_expl = f"Bitte erkläre, warum du für das Unternehmen '{company_name}' die Anzahl der Servicetechniker als '{st_estimate}' geschätzt hast."
|
||||
st_explanation, tokens = safe_chatgpt_call(prompt_st_expl, company_name, row_data[10] if len(row_data)>10 else "k.A.")
|
||||
total_tokens += tokens
|
||||
technician_explanation = st_explanation
|
||||
else:
|
||||
technician_explanation = "ok"
|
||||
self.sheet_handler.sheet.update(values=[[technician_explanation]], range_name=f"AF{row_num}")
|
||||
# Mitarbeiter Konsistenzprüfung: Ergebnisse in "Chat Schätzung Anzahl Mitarbeiter" (AB)
|
||||
crm_mitarbeiter = row_data[10] if len(row_data) > 10 else "k.A."
|
||||
wiki_mitarbeiter = company_data.get('mitarbeiter', "k.A.")
|
||||
try:
|
||||
crm_emp = float(crm_mitarbeiter)
|
||||
wiki_emp = float(wiki_mitarbeiter)
|
||||
diff = abs(crm_emp - wiki_emp) / ((crm_emp + wiki_emp) / 2) * 100
|
||||
reason = "Beide Werte ähnlich" if diff < 30 else "Signifikante Abweichung"
|
||||
mitarbeiter_result = f"CRM: {crm_mitarbeiter}, Wikipedia: {wiki_mitarbeiter}, Differenz: {diff:.2f}%, Einschätzung: {reason}"
|
||||
except Exception as e:
|
||||
mitarbeiter_result = "k.A."
|
||||
self.sheet_handler.sheet.update(values=[[mitarbeiter_result]], range_name=f"AB{row_num}")
|
||||
# Umsatz-Schätzung: Ergebnisse in "Chat Schätzung Umsatz" (AG) und "Chat Begründung Abweichung Umsatz" (AH)
|
||||
prompt_umsatz = f"Bitte schätze den Jahresumsatz (in Mio. €) für das Unternehmen '{company_name}' ein basierend auf den Daten: CRM: {crm_umsatz}, Wikipedia: {company_data.get('umsatz', 'k.A.')}. Antworte nur mit der Zahl."
|
||||
umsatz_estimate, tokens = safe_chatgpt_call(prompt_umsatz, company_name, row_data[10] if len(row_data)>10 else "k.A.")
|
||||
total_tokens += tokens
|
||||
self.sheet_handler.sheet.update(values=[[umsatz_estimate]], range_name=f"AG{row_num}")
|
||||
# Gesamt-Tokens in Spalte "Tokens" (AQ) speichern
|
||||
self.sheet_handler.sheet.update(values=[[str(total_tokens)]], range_name=f"AQ{row_num}")
|
||||
self.sheet_handler.sheet.update(values=[[current_dt]], range_name=dt_chat_range)
|
||||
self.sheet_handler.sheet.update(values=[[current_dt]], range_name=ver_range)
|
||||
self.sheet_handler.sheet.update(values=[[Config.VERSION]], range_name=ver_range)
|
||||
debug_print(f"Zeile {row_num} verifiziert: URL: {company_data.get('url', 'k.A.')}, Branche: {company_data.get('branche', 'k.A.')}")
|
||||
time.sleep(Config.RETRY_DELAY)
|
||||
technician_explanation = st_explanation
|
||||
else:
|
||||
technician_explanation = "ok"
|
||||
self.sheet_handler.sheet.update(values=[[technician_explanation]], range_name=f"AF{row_num}")
|
||||
crm_mitarbeiter = row_data[10] if len(row_data) > 10 else "k.A."
|
||||
wiki_mitarbeiter = company_data.get('mitarbeiter', "k.A.")
|
||||
try:
|
||||
crm_emp = float(crm_mitarbeiter)
|
||||
wiki_emp = float(wiki_mitarbeiter)
|
||||
diff = abs(crm_emp - wiki_emp) / ((crm_emp + wiki_emp) / 2) * 100
|
||||
reason = "Beide Werte ähnlich" if diff < 30 else "Signifikante Abweichung"
|
||||
mitarbeiter_result = f"CRM: {crm_mitarbeiter}, Wikipedia: {wiki_mitarbeiter}, Differenz: {diff:.2f}%, Einschätzung: {reason}"
|
||||
except Exception as e:
|
||||
mitarbeiter_result = "k.A."
|
||||
self.sheet_handler.sheet.update(values=[[mitarbeiter_result]], range_name=f"AB{row_num}")
|
||||
prompt_umsatz = f"Bitte schätze den Jahresumsatz (in Mio. €) für das Unternehmen '{company_name}' ein basierend auf den Daten: CRM: {crm_umsatz}, Wikipedia: {company_data.get('umsatz', 'k.A.')}. Antworte nur mit der Zahl."
|
||||
umsatz_estimate, tokens = safe_chatgpt_call(prompt_umsatz, company_name, row_data[10] if len(row_data) > 10 else "k.A.")
|
||||
total_tokens += tokens
|
||||
self.sheet_handler.sheet.update(values=[[umsatz_estimate]], range_name=f"AG{row_num}")
|
||||
self.sheet_handler.sheet.update(values=[[str(total_tokens)]], range_name=f"AQ{row_num}")
|
||||
self.sheet_handler.sheet.update(values=[[current_dt]], range_name=dt_chat_range)
|
||||
self.sheet_handler.sheet.update(values=[[current_dt]], range_name=ver_range)
|
||||
self.sheet_handler.sheet.update(values=[[Config.VERSION]], range_name=ver_range)
|
||||
debug_print(f"Zeile {row_num} verifiziert: URL: {company_data.get('url', 'k.A.')}, Branche: {company_data.get('branche', 'k.A.')}")
|
||||
time.sleep(Config.RETRY_DELAY)
|
||||
|
||||
# ==================== NEUER MODUS: CONTACT RESEARCH (via SerpAPI) ====================
|
||||
def process_contact_research():
|
||||
|
||||
Reference in New Issue
Block a user