duplicate_checker.py aktualisiert
This commit is contained in:
@@ -426,15 +426,22 @@ def main(job_id=None):
|
||||
final_df = match_df.join(result_df)
|
||||
|
||||
# Bereinige die temporären Spalten für eine saubere Ausgabe
|
||||
cols_to_drop = ['normalized_name', 'normalized_domain', 'block_keys', 'Effektive Website', 'domain_use_flag']
|
||||
final_df = final_df.drop(columns=[col for col in cols_to_drop if col in final_df.columns])
|
||||
# KORREKTUR: 'block_key' statt 'block_keys'
|
||||
cols_to_drop = ['normalized_name', 'normalized_domain', 'block_key', 'Effektive Website', 'domain_use_flag']
|
||||
final_df = final_df.drop(columns=[col for col in cols_to_drop if col in final_df.columns], errors='ignore')
|
||||
|
||||
# NEU: Robuster Schreibprozess zur Vermeidung von Typ-Fehlern
|
||||
# 1. Alle Spalten explizit in String konvertieren, um Inkompatibilitäten mit der API (z.B. numpy-Typen) zu vermeiden.
|
||||
# 2. NaN/None-Werte mit einem leeren String füllen.
|
||||
upload_df = final_df.astype(str).replace({'nan': '', 'None': ''})
|
||||
|
||||
# Konvertiere in Liste von Listen für den Upload
|
||||
data_to_write = [final_df.columns.tolist()] + final_df.fillna('').values.tolist()
|
||||
data_to_write = [upload_df.columns.tolist()] + upload_df.values.tolist()
|
||||
|
||||
logger.info(f"Versuche, {len(data_to_write) - 1} Ergebniszeilen in das Sheet '{MATCHING_SHEET_NAME}' zu schreiben...")
|
||||
|
||||
ok = sheet_handler.clear_and_write_data(MATCHING_SHEET_NAME, data_to_write)
|
||||
# KORREKTUR: 'sheet' statt 'sheet_handler' verwenden
|
||||
ok = sheet.clear_and_write_data(MATCHING_SHEET_NAME, data_to_write)
|
||||
|
||||
if ok:
|
||||
logger.info("Ergebnisse erfolgreich in das Google Sheet geschrieben.")
|
||||
@@ -443,9 +450,9 @@ def main(job_id=None):
|
||||
logger.error("Fehler beim Schreiben der Ergebnisse ins Google Sheet.")
|
||||
update_status(job_id, "Fehlgeschlagen", "Fehler beim Schreiben ins Google Sheet.")
|
||||
|
||||
|
||||
# Summary
|
||||
serp_counts = Counter((str(x).lower() for x in write_df.get('Serp Vertrauen', [])))
|
||||
# KORREKTUR: 'final_df' statt 'write_df' verwenden
|
||||
serp_counts = Counter((str(x).lower() for x in final_df.get('Serp Vertrauen', [])))
|
||||
logger.info("===== Summary =====")
|
||||
logger.info(f"Matches total: {metrics['matches_total']} | mit Domain: {metrics['matches_domain']} | mit Ort: {metrics['matches_with_loc']} | nur Name: {metrics['matches_name_only']}")
|
||||
logger.info(f"Serp Vertrauen: {dict(serp_counts)}")
|
||||
|
||||
Reference in New Issue
Block a user