diff --git a/dealfront_enrichment.py b/dealfront_enrichment.py index 7c2d6b89..55823185 100644 --- a/dealfront_enrichment.py +++ b/dealfront_enrichment.py @@ -73,37 +73,45 @@ class DealfrontScraper: time.sleep(1) def extract_current_page_results(self): - # kurzer Implicit-Wait für schnelles Fallback + # 1) Kurz Implicit-Wait absenken, damit Fehlversuche sofort zurückkehren self.driver.implicitly_wait(1) - # auf ≥1 Zeile warten - rows = self.wait.until(EC.presence_of_all_elements_located(( - By.CSS_SELECTOR, "table#t-result-table tbody tr[id]" - ))) + # 2) Auf erstes Firmen-Element warten (bis zu 15 s), dann kurzen Puffer + first = (By.CSS_SELECTOR, ".sticky-column a.t-highlight-text") + self.wait.until(EC.visibility_of_element_located(first)) + time.sleep(1) - data = [] - for row in rows: - # Name-Element - ne = row.find_elements(By.CSS_SELECTOR, "a.t-highlight-text.t-highlight-text-snippet") - if not ne: + logger.info("Extrahiere Ergebnisse von der aktuellen Seite...") + results = [] + + # 3) Auf mindestens eine Tabellenzeile warten + rows_sel = (By.CSS_SELECTOR, "table#t-result-table tbody tr[id]") + self.wait.until(EC.presence_of_all_elements_located(rows_sel)) + rows = self.driver.find_elements(*rows_sel) + logger.info(f"{len(rows)} Firmen-Zeilen gefunden.") + + # 4) Schleife ganz ohne Sleeps oder Implicit-Waits + for i, row in enumerate(rows, 1): + # Name per bewährtem Selector + name_elems = row.find_elements(By.CSS_SELECTOR, ".sticky-column a.t-highlight-text") + if not name_elems: + logger.warning(f"Zeile {i}: Kein Name-Element gefunden. Überspringe.") continue - name = (ne[0].get_attribute("title") or ne[0].text).strip() + ne = name_elems[0] + company_name = (ne.get_attribute("title") or ne.text).strip() - # Website-Element - we = row.find_elements(By.CSS_SELECTOR, "a.text-gray-400.t-highlight-text") - if we: - site = we[0].get_attribute("href").split("://")[-1].rstrip("/") - else: - # Fallback: Zellen-Text - txt = row.find_elements(By.CSS_SELECTOR, "td:nth-of-type(3)") - site = txt[0].text.strip() if txt else "" + # Website per bewährtem Selector + web_elems = row.find_elements(By.CSS_SELECTOR, "a.text-gray-400.t-highlight-text") + website = web_elems[0].text.strip() if web_elems else "" - data.append({"name": name, "website": site}) + results.append({'name': company_name, 'website': website}) - # Implicit-Wait zurücksetzen + logger.info(f"Extraktion abgeschlossen: {len(results)} Firmen.") + return results + + finally: + # 5) Implicit-Wait zurück auf Standard (z. B. 10 s) self.driver.implicitly_wait(10) - logger.info(f" Extrahiert: {len(data)} Zeilen") - return data def click_next_page(self): # Paginator-Buttons greifen