diff --git a/dealfront_enrichment.py b/dealfront_enrichment.py index ab40026b..77c5b172 100644 --- a/dealfront_enrichment.py +++ b/dealfront_enrichment.py @@ -113,42 +113,52 @@ class DealfrontScraper: return False def extract_current_page_results(self): - # 1) Warten bis mindestens eine Ergebnis-Zeile sichtbar ist - rows_locator = (By.CSS_SELECTOR, "table#t-result-table tbody tr[id]") - WebDriverWait(self.driver, 20).until( - EC.visibility_of_element_located(rows_locator) - ) + # 1) Kurz Implicit-Wait absenken, um nicht an Default-Timeouts zu hängen + self.driver.implicitly_wait(1) - rows = self.driver.find_elements(*rows_locator) - logger.info(f"{len(rows)} Firmen-Zeilen gefunden.") + # 2) Auf das erste Laden der Daten warten (erstes Firmen-Element) + first_row_locator = (By.CSS_SELECTOR, ".sticky-column a.t-highlight-text") + self.wait.until(EC.visibility_of_element_located(first_row_locator)) + time.sleep(1) - results = [] - for i, row in enumerate(rows, 1): - # Name:
- name_divs = row.find_elements( - By.CSS_SELECTOR, - "div.t-highlight-text.t-highlight-text-product" - ) - if not name_divs: - logger.warning(f"Zeile {i}: Kein Name-Element gefunden. Überspringe.") - continue - name_elem = name_divs[0] - company_name = (name_elem.get_attribute("title") or name_elem.text).strip() # :contentReference[oaicite:0]{index=0} + try: + logger.info("Extrahiere Ergebnisse von der aktuellen Seite...") + results = [] - # Website: externer Link (target="_blank") in der dritten Spalte - ext_links = row.find_elements(By.CSS_SELECTOR, "td:nth-of-type(3) a[target='_blank']") - if ext_links: - href = ext_links[0].get_attribute("href") - website = href.replace("https://", "").replace("http://", "").rstrip("/") - else: - # Fallback: reiner Text in td[3] - cells = row.find_elements(By.TAG_NAME, "td") - website = cells[2].text.strip() if len(cells) >= 3 else "" + # 3) Auf mindestens eine Tabellenzeile warten + rows_selector = (By.CSS_SELECTOR, "table#t-result-table tbody tr[id]") + self.wait.until(EC.presence_of_all_elements_located(rows_selector)) - results.append({'name': company_name, 'website': website}) + rows = self.driver.find_elements(*rows_selector) + logger.info(f"{len(rows)} Firmen-Zeilen gefunden.") - logger.info(f"Extraktion abgeschlossen: {len(results)} Firmen.") - return results + # 4) Schleife ohne weitere Warte-Blocks + for i, row in enumerate(rows, 1): + # Name: bewährter Selector + name_elems = row.find_elements(By.CSS_SELECTOR, ".sticky-column a.t-highlight-text") + if not name_elems: + logger.warning(f"Zeile {i}: Kein Name-Element gefunden. Überspringe.") + continue + name_elem = name_elems[0] + company_name = (name_elem.get_attribute("title") or name_elem.text).strip() + + # Website: bewährter Selector + web_elems = row.find_elements(By.CSS_SELECTOR, "a.text-gray-400.t-highlight-text") + website = web_elems[0].text.strip() if web_elems else "" + + results.append({'name': company_name, 'website': website}) + + logger.info(f"Extraktion abgeschlossen: {len(results)} Firmen.") + return results + + except Exception as e: + logger.error(f"Schwerwiegender Fehler bei der Extraktion: {type(e).__name__}", exc_info=True) + self._save_debug_artifacts() + return [] + + finally: + # 5) Implicit-Wait wieder auf Standard setzen (z.B. 10 s) + self.driver.implicitly_wait(10) def close(self):