diff --git a/dealfront_enrichment.py b/dealfront_enrichment.py index f7e68f16..ecf3b7c4 100644 --- a/dealfront_enrichment.py +++ b/dealfront_enrichment.py @@ -113,50 +113,44 @@ class DealfrontScraper: return False def extract_current_page_results(self): - # Erst auf das erste Daten-Element warten, dann Puffer-Pause + # Implicit-Wait kurz absenken für schnellen Fallback bei fehlenden Elementen + self.driver.implicitly_wait(1) + + # Erst auf das erste Daten-Element warten, dann optional kurzen Puffer first_row_locator = (By.CSS_SELECTOR, ".sticky-column a.t-highlight-text") - self.wait.until( - EC.visibility_of_element_located(first_row_locator) - ) - time.sleep(2) + self.wait.until(EC.visibility_of_element_located(first_row_locator)) + time.sleep(1) try: logger.info("Extrahiere Ergebnisse von der aktuellen Seite...") results = [] + # Warten bis mindestens eine Daten-Zeile im DOM steht rows_selector = (By.CSS_SELECTOR, "table#t-result-table tbody tr[id]") - # Warte, bis mindestens eine Daten-Zeile im DOM ist (bis zu 15 Sekunden) self.wait.until(EC.presence_of_all_elements_located(rows_selector)) - # Optionaler Minimal-Puffer für Rest-Rendering - time.sleep(1) rows = self.driver.find_elements(*rows_selector) logger.info(f"{len(rows)} Firmen-Datenzeilen zur Verarbeitung gefunden.") - self.driver.implicitly_wait(1) for i, row in enumerate(rows, 1): - # Name per find_elements (kein Exception-Overhead) - name_elems = row.find_elements(By.CSS_SELECTOR, ".sticky-column a.t-highlight-text") + # Name per find_elements (vermeidet lange Exceptions) + name_elems = row.find_elements(By.CSS_SELECTOR, "td.sticky-column a") if not name_elems: logger.warning(f"Zeile {i}: Kein Name-Element gefunden. Überspringe.") continue name_elem = name_elems[0] company_name = (name_elem.get_attribute("title") or name_elem.text).strip() - # Website per find_elements + # Website per find_elements aus dritter Spalte web_elems = row.find_elements(By.CSS_SELECTOR, "td:nth-of-type(3) a") if web_elems: website = web_elems[0].get_attribute("href").split("://", 1)[1].rstrip("/") else: - # no-link-Fallback: Text aus td text_elems = row.find_elements(By.CSS_SELECTOR, "td:nth-of-type(3)") website = text_elems[0].text.strip() if text_elems else "" results.append({'name': company_name, 'website': website}) - # Implicit-Wait wiederherstellen (empfehlenswert 10 s) - self.driver.implicitly_wait(10) - logger.info(f"Extraktion abgeschlossen. {len(results)} Firmen gefunden.") return results @@ -164,6 +158,11 @@ class DealfrontScraper: logger.error(f"Schwerwiegender Fehler bei der Extraktion: {type(e).__name__}", exc_info=True) self._save_debug_artifacts() return [] + + finally: + # Implicit-Wait wiederherstellen (Standard 10 s) + self.driver.implicitly_wait(10) + def close(self): if self.driver: