bugfix
This commit is contained in:
@@ -4718,40 +4718,110 @@ class DataProcessor:
|
||||
# Diese Methoden führen eine spezifische Aufgabe aus und arbeiten oft über das gesamte Sheet
|
||||
# oder eine gefilterte Menge.
|
||||
|
||||
# process_serp_website_lookup Methode (früher process_serp_website_lookup_for_empty)
|
||||
def process_serp_website_lookup(self, limit=None): # <<< Methode in DataProcessor
|
||||
def process_serp_website_lookup(self, limit=None):
|
||||
"""
|
||||
Sucht fehlende Websites (Spalte D ist leer oder "k.A.") via SERP API
|
||||
(Google Search) und trägt gefundene URLs in Spalte D ein.
|
||||
und trägt gefundene URLs in Spalte D ein.
|
||||
|
||||
Args:
|
||||
limit (int, optional): Maximale Anzahl zu verarbeitender Zeilen. Defaults to None.
|
||||
limit (int, optional): Maximale Anzahl zu verarbeitender Zeilen.
|
||||
"""
|
||||
logging.info(f"Starte Modus: SERP API Website Lookup für leere Zellen in Spalte D. Limit: {limit if limit is not None else 'Unbegrenzt'}")
|
||||
if not self.sheet_handler.load_data(): return logging.error("FEHLER beim Laden der Daten.")
|
||||
data_rows = self.sheet_handler.get_data(); header_rows = 5;
|
||||
rows_processed_count = 0; updates = [];
|
||||
try: website_col_idx = COLUMN_MAP["CRM Website"]; name_col_idx = COLUMN_MAP["CRM Name"]; website_col_letter = self.sheet_handler._get_col_letter(website_col_idx + 1);
|
||||
except KeyError as e: logging.critical(f"FEHLER: Benötigte Spalte '{e}' fehlt."); return;
|
||||
except Exception as e: logging.critical(f"FEHLER beim Holen der Spaltenbuchstaben: {e}"); return;
|
||||
logging.info(
|
||||
f"Starte Modus: SERP API Website Lookup für leere Zellen in Spalte D. "
|
||||
f"Limit: {limit if limit is not None else 'Unbegrenzt'}"
|
||||
)
|
||||
if not self.sheet_handler.load_data():
|
||||
return logging.error("FEHLER beim Laden der Daten.")
|
||||
|
||||
data_rows = self.sheet_handler.get_data()
|
||||
header_rows = 5
|
||||
rows_processed_count = 0
|
||||
updates = []
|
||||
|
||||
# Spaltenindizes ermitteln
|
||||
try:
|
||||
website_col_idx = COLUMN_MAP["CRM Website"]
|
||||
name_col_idx = COLUMN_MAP["CRM Name"]
|
||||
except KeyError as e:
|
||||
logging.critical(f"FEHLER: Benötigte Spalte '{e.args[0]}' fehlt.")
|
||||
return
|
||||
except Exception as e:
|
||||
logging.critical(f"FEHLER beim Holen der Spaltenbuchstaben: {e}")
|
||||
return
|
||||
|
||||
website_col_letter = self.sheet_handler._get_col_letter(website_col_idx + 1)
|
||||
|
||||
# Durch alle Zeilen iterieren
|
||||
for i, row in enumerate(data_rows):
|
||||
row_num_in_sheet = i + header_rows + 1;
|
||||
if limit is not None and rows_processed_count >= limit: logging.info(f"Limit ({limit}) erreicht."); break;
|
||||
max_needed_idx = max(website_col_idx, name_col_idx); if len(row) <= max_needed_idx: logging.debug(f"Zeile {row_num_in_sheet}: Übersprungen (Zeile zu kurz)."); continue;
|
||||
current_website = row[website_col_idx] if len(row) > website_col_idx else "";
|
||||
if not current_website or str(current_website).strip().lower() == "k.a.":
|
||||
company_name = row[name_col_idx] if len(row) > name_col_idx else ""; if not company_name or str(company_name).strip() == "": logging.warning(f"Zeile {row_num_in_sheet}: Übersprungen (kein Firmenname)."); continue;
|
||||
logging.info(f"Zeile {row_num_in_sheet}: Suche Website für '{company_name}'...");
|
||||
new_website = serp_website_lookup(company_name); # Globale Funktion mit Retry
|
||||
rows_processed_count += 1;
|
||||
if new_website != "k.A.": updates.append({'range': f'{website_col_letter}{row_num_in_sheet}', 'values': [[new_website]]}); logging.info(f"Zeile {row_num_in_sheet}: Neue Website '{new_website}' gefunden.");
|
||||
else: logging.info(f"Zeile {row_num_in_sheet}: Keine Website gefunden.");
|
||||
time.sleep(getattr(Config, 'RETRY_DELAY', 5) * 0.3);
|
||||
row_num_in_sheet = i + header_rows + 1
|
||||
|
||||
if updates: logging.info(f"Sende Batch-Update für {len(updates)} Zellen ({rows_processed_count} Zeilen geprüft)..."); success = self.sheet_handler.batch_update_cells(updates); if success: logging.info(f"Batch-Update erfolgreich."); else: logging.error(f"FEHLER beim Batch-Update.");
|
||||
else: logging.info("Keine fehlenden Websites gefunden oder keine Updates nötig.");
|
||||
logging.info(f"Modus 'website_lookup' abgeschlossen. {rows_processed_count} Zeilen geprüft.")
|
||||
# Limit prüfen
|
||||
if limit is not None and rows_processed_count >= limit:
|
||||
logging.info(f"Limit ({limit}) erreicht.")
|
||||
break
|
||||
|
||||
# Zeile überspringen, wenn sie zu kurz ist
|
||||
max_needed_idx = max(website_col_idx, name_col_idx)
|
||||
if len(row) <= max_needed_idx:
|
||||
logging.debug(
|
||||
f"Zeile {row_num_in_sheet}: Übersprungen (Zeile zu kurz)."
|
||||
)
|
||||
continue
|
||||
|
||||
# Bestehende Website auslesen
|
||||
current_website = row[website_col_idx] if len(row) > website_col_idx else ""
|
||||
if not current_website or str(current_website).strip().lower() == "k.a.":
|
||||
# Firmenname prüfen
|
||||
company_name = row[name_col_idx] if len(row) > name_col_idx else ""
|
||||
if not company_name or not str(company_name).strip():
|
||||
logging.warning(
|
||||
f"Zeile {row_num_in_sheet}: Übersprungen (kein Firmenname)."
|
||||
)
|
||||
continue
|
||||
|
||||
# SERP-Abfrage
|
||||
logging.info(
|
||||
f"Zeile {row_num_in_sheet}: Suche Website für '{company_name}'..."
|
||||
)
|
||||
new_website = serp_website_lookup(company_name)
|
||||
rows_processed_count += 1
|
||||
|
||||
if new_website != "k.A.":
|
||||
updates.append({
|
||||
'range': f'{website_col_letter}{row_num_in_sheet}',
|
||||
'values': [[new_website]]
|
||||
})
|
||||
logging.info(
|
||||
f"Zeile {row_num_in_sheet}: Neue Website '{new_website}' gefunden."
|
||||
)
|
||||
else:
|
||||
logging.info(
|
||||
f"Zeile {row_num_in_sheet}: Keine Website gefunden."
|
||||
)
|
||||
|
||||
# Kurze Pause nach jedem Lookup
|
||||
delay = getattr(Config, "RETRY_DELAY", 5) * 0.3
|
||||
time.sleep(delay)
|
||||
|
||||
# Batch-Update abschicken, falls Änderungen vorliegen
|
||||
if updates:
|
||||
logging.info(
|
||||
f"Sende Batch-Update für {len(updates)} Zellen "
|
||||
f"({rows_processed_count} Zeilen geprüft)..."
|
||||
)
|
||||
success = self.sheet_handler.batch_update_cells(updates)
|
||||
if success:
|
||||
logging.info("Batch-Update erfolgreich.")
|
||||
else:
|
||||
logging.error("FEHLER beim Batch-Update.")
|
||||
else:
|
||||
logging.info(
|
||||
"Keine fehlenden Websites gefunden oder keine Updates nötig."
|
||||
)
|
||||
|
||||
logging.info(
|
||||
f"Modus 'website_lookup' abgeschlossen. {rows_processed_count} Zeilen geprüft."
|
||||
)
|
||||
|
||||
# process_find_wiki_serp Methode
|
||||
def process_find_wiki_serp(self, limit=None, min_employees=500, min_umsatz=200): # <<< Methode in DataProcessor
|
||||
|
||||
Reference in New Issue
Block a user