bugfix
This commit is contained in:
@@ -5215,53 +5215,129 @@ class DataProcessor:
|
||||
f"{cleared_suggestion_count} ungültige Vorschläge gelöscht/markiert."
|
||||
)
|
||||
|
||||
# process_website_details Methode (früher process_website_details_for_marked_rows)
|
||||
def process_website_details(self, limit=None): # <<< Methode in DataProcessor
|
||||
def process_website_details(self, limit=None):
|
||||
"""
|
||||
EXPERIMENTELL: Extrahiert Website-Details für Zeilen, die mit 'x' in Spalte A markiert sind.
|
||||
Schreibt die Details in eine definierte Spalte (Website Details oder AR als Fallback).
|
||||
Löscht NICHT das 'x'-Flag.
|
||||
|
||||
Args:
|
||||
limit (int, optional): Maximale Anzahl zu verarbeitender Zeilen. Defaults to None.
|
||||
limit (int, optional): Maximale Anzahl zu verarbeitender Zeilen.
|
||||
"""
|
||||
logging.info(f"Starte Modus (EXPERIMENTELL): Website Detail Extraction für Zeilen mit 'x' in Spalte A. Limit: {limit if limit is not None else 'Unbegrenzt'}");
|
||||
if not self.sheet_handler.load_data(): return logging.error("FEHLER beim Laden der Daten.");
|
||||
data_rows = self.sheet_handler.get_data(); header_rows = 5;
|
||||
rows_processed_count = 0; updates = [];
|
||||
try: reeval_col_idx = COLUMN_MAP["ReEval Flag"]; website_col_idx = COLUMN_MAP["CRM Website"];
|
||||
# Versuche zuerst die dedizierte Spalte 'Website Details'
|
||||
details_col_idx = COLUMN_MAP.get("Website Details", None);
|
||||
if details_col_idx is None:
|
||||
# Fallback auf 'Website Rohtext' (AR) wenn 'Website Details' nicht in COLUMN_MAP
|
||||
details_col_idx = COLUMN_MAP.get("Website Rohtext");
|
||||
if details_col_idx is None: logging.critical("FEHLER: Weder 'Website Details' noch 'Website Rohtext' Spaltenindex fehlt."); return;
|
||||
logging.warning("Keine Spalte 'Website Details' in COLUMN_MAP, nutze 'Website Rohtext' (AR) als Fallback.");
|
||||
details_col_letter = self.sheet_handler._get_col_letter(details_col_idx + 1);
|
||||
except KeyError as e: logging.critical(f"FEHLER: Benötigte Spalte '{e}' fehlt."); return;
|
||||
except Exception as e: logging.critical(f"FEHLER beim Holen der Spaltenbuchstaben: {e}"); return;
|
||||
logging.info(
|
||||
f"Starte Modus (EXPERIMENTELL): Website Detail Extraction "
|
||||
f"für Zeilen mit 'x' in Spalte A. "
|
||||
f"Limit: {limit if limit is not None else 'Unbegrenzt'}"
|
||||
)
|
||||
|
||||
if not self.sheet_handler.load_data():
|
||||
return logging.error("FEHLER beim Laden der Daten.")
|
||||
|
||||
data_rows = self.sheet_handler.get_data()
|
||||
header_rows = 5
|
||||
rows_processed_count = 0
|
||||
updates = []
|
||||
|
||||
# Spalten-Indizes ermitteln
|
||||
try:
|
||||
reeval_col_idx = COLUMN_MAP["ReEval Flag"]
|
||||
website_col_idx = COLUMN_MAP["CRM Website"]
|
||||
|
||||
# Versuche zuerst die dedizierte Spalte 'Website Details'
|
||||
details_col_idx = COLUMN_MAP.get("Website Details")
|
||||
if details_col_idx is None:
|
||||
# Fallback auf 'Website Rohtext' (AR), falls 'Website Details' nicht vorhanden
|
||||
details_col_idx = COLUMN_MAP.get("Website Rohtext")
|
||||
if details_col_idx is None:
|
||||
logging.critical(
|
||||
"FEHLER: Weder 'Website Details' noch 'Website Rohtext' "
|
||||
"als Spalte vorhanden."
|
||||
)
|
||||
return
|
||||
logging.warning(
|
||||
"Keine Spalte 'Website Details' in COLUMN_MAP gefunden, "
|
||||
"nutze 'Website Rohtext' als Fallback."
|
||||
)
|
||||
|
||||
details_col_letter = self.sheet_handler._get_col_letter(details_col_idx + 1)
|
||||
|
||||
except KeyError as e:
|
||||
logging.critical(f"FEHLER: Benötigte Spalte '{e.args[0]}' fehlt.")
|
||||
return
|
||||
except Exception as e:
|
||||
logging.critical(f"FEHLER beim Holen der Spaltenbuchstaben: {e}")
|
||||
return
|
||||
|
||||
# Über alle Zeilen iterieren
|
||||
for i, row in enumerate(data_rows):
|
||||
row_num_in_sheet = i + header_rows + 1;
|
||||
if limit is not None and rows_processed_count >= limit: logging.info(f"Limit ({limit}) erreicht."); break;
|
||||
if len(row) <= reeval_col_idx or str(row[reeval_col_idx]).strip().lower() != "x": continue; # Prüfen, ob Zeile mit 'x' markiert ist
|
||||
row_num_in_sheet = i + header_rows + 1
|
||||
|
||||
website_url = row[website_col_idx] if len(row) > website_col_idx else "";
|
||||
if not website_url or str(website_url).strip().lower() == "k.a.": logging.warning(f"Zeile {row_num_in_sheet}: Keine gültige Website URL, überspringe."); continue;
|
||||
# Limit prüfen
|
||||
if limit is not None and rows_processed_count >= limit:
|
||||
logging.info(f"Limit ({limit}) erreicht.")
|
||||
break
|
||||
|
||||
logging.info(f"Zeile {row_num_in_sheet}: Extrahiere Website Details von {website_url}...");
|
||||
rows_processed_count += 1;
|
||||
try: details = scrape_website_details(website_url); # Annahme: scrape_website_details ist global
|
||||
except NameError: logging.critical("FEHLER: Funktion 'scrape_website_details' nicht definiert!"); details = "FEHLER: Funktion nicht definiert";
|
||||
except Exception as e_detail: logging.exception(f"Fehler bei scrape_website_details für {website_url}: {e_detail}"); details = f"FEHLER: {e_detail}";
|
||||
# Nur Zeilen mit 'x' im ReEval-Flag betrachten
|
||||
if len(row) <= reeval_col_idx or str(row[reeval_col_idx]).strip().lower() != "x":
|
||||
continue
|
||||
|
||||
updates.append({'range': f'{details_col_letter}{row_num_in_sheet}', 'values': [[str(details)]]});
|
||||
time.sleep(getattr(Config, 'RETRY_DELAY', 5) * 0.2);
|
||||
# Website-URL holen und validieren
|
||||
website_url = ""
|
||||
if len(row) > website_col_idx:
|
||||
website_url = str(row[website_col_idx]).strip()
|
||||
|
||||
if updates: logging.info(f"Sende Batch-Update für {len(updates)} Zellen ({rows_processed_count} Zeilen geprüft)..."); success = self.sheet_handler.batch_update_cells(updates); if success: logging.info(f"Batch-Update erfolgreich."); else: logging.error(f"FEHLER beim Batch-Update.");
|
||||
else: logging.info("Keine 'x' Zeilen gefunden für Detail-Extraktion.");
|
||||
logging.info(f"Modus 'website_details' abgeschlossen. {rows_processed_count} Zeilen geprüft.")
|
||||
if not website_url or website_url.lower() == "k.a.":
|
||||
logging.warning(
|
||||
f"Zeile {row_num_in_sheet}: Keine gültige Website-URL, überspringe."
|
||||
)
|
||||
continue
|
||||
|
||||
logging.info(
|
||||
f"Zeile {row_num_in_sheet}: Extrahiere Website Details von '{website_url}'..."
|
||||
)
|
||||
rows_processed_count += 1
|
||||
|
||||
# Details extrahieren
|
||||
try:
|
||||
details = scrape_website_details(website_url)
|
||||
except NameError:
|
||||
logging.critical(
|
||||
"FEHLER: Funktion 'scrape_website_details' nicht definiert!"
|
||||
)
|
||||
details = "FEHLER: Funktion nicht definiert"
|
||||
except Exception as e_detail:
|
||||
logging.exception(
|
||||
f"Fehler bei scrape_website_details für {website_url}: {e_detail}"
|
||||
)
|
||||
details = f"FEHLER: {e_detail}"
|
||||
|
||||
# Update vorbereiten
|
||||
updates.append({
|
||||
'range': f'{details_col_letter}{row_num_in_sheet}',
|
||||
'values': [[ str(details) ]]
|
||||
})
|
||||
|
||||
# Kurze Pause, um Rate-Limits zu schonen
|
||||
time.sleep(getattr(Config, 'RETRY_DELAY', 5) * 0.2)
|
||||
|
||||
# Batch-Update senden, falls Änderungen vorhanden
|
||||
if updates:
|
||||
logging.info(
|
||||
f"Sende Batch-Update für {len(updates)} Zellen "
|
||||
f"({rows_processed_count} Zeilen geprüft)..."
|
||||
)
|
||||
success = self.sheet_handler.batch_update_cells(updates)
|
||||
if success:
|
||||
logging.info("Batch-Update erfolgreich.")
|
||||
else:
|
||||
logging.error("FEHLER beim Batch-Update.")
|
||||
else:
|
||||
logging.info("Keine 'x'-Zeilen gefunden für Detail-Extraktion.")
|
||||
|
||||
logging.info(
|
||||
f"Modus 'website_details' abgeschlossen. "
|
||||
f"{rows_processed_count} Zeilen geprüft."
|
||||
)
|
||||
|
||||
# process_contact_research Methode
|
||||
def process_contact_research(self, limit=None): # <<< Methode in DataProcessor
|
||||
|
||||
Reference in New Issue
Block a user