From 483d21405fe73cb2c8a52865b51b06b3478a637c Mon Sep 17 00:00:00 2001 From: Floke Date: Tue, 8 Jul 2025 17:12:17 +0000 Subject: [PATCH] dealfront_enrichment.py aktualisiert --- dealfront_enrichment.py | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/dealfront_enrichment.py b/dealfront_enrichment.py index b752ac7e..f7e68f16 100644 --- a/dealfront_enrichment.py +++ b/dealfront_enrichment.py @@ -135,24 +135,26 @@ class DealfrontScraper: self.driver.implicitly_wait(1) for i, row in enumerate(rows, 1): - try: - # Firmennamen holen... - name_elem = row.find_element(By.CSS_SELECTOR, ".sticky-column a.t-highlight-text") - company_name = (name_elem.get_attribute("title") or name_elem.text).strip() - - # Website aus der dritten Spalte: erst href, dann Text-Fallback - elems = row.find_elements(By.CSS_SELECTOR, "td:nth-of-type(3) a") - if elems: - website = elems[0].get_attribute("href").split("://", 1)[1].rstrip("/") - else: - website = row.find_element(By.CSS_SELECTOR, "td:nth-of-type(3)").text.strip() - - results.append({'name': company_name, 'website': website}) - except NoSuchElementException: - logger.warning(f"Zeile {i}: Name oder Webseite nicht extrahierbar. Überspringe.") + # Name per find_elements (kein Exception-Overhead) + name_elems = row.find_elements(By.CSS_SELECTOR, ".sticky-column a.t-highlight-text") + if not name_elems: + logger.warning(f"Zeile {i}: Kein Name-Element gefunden. Überspringe.") continue + name_elem = name_elems[0] + company_name = (name_elem.get_attribute("title") or name_elem.text).strip() - # Implicit-Wait wiederherstellen (z. B. 10 Sekunden) + # Website per find_elements + web_elems = row.find_elements(By.CSS_SELECTOR, "td:nth-of-type(3) a") + if web_elems: + website = web_elems[0].get_attribute("href").split("://", 1)[1].rstrip("/") + else: + # no-link-Fallback: Text aus td + text_elems = row.find_elements(By.CSS_SELECTOR, "td:nth-of-type(3)") + website = text_elems[0].text.strip() if text_elems else "" + + results.append({'name': company_name, 'website': website}) + + # Implicit-Wait wiederherstellen (empfehlenswert 10 s) self.driver.implicitly_wait(10) logger.info(f"Extraktion abgeschlossen. {len(results)} Firmen gefunden.")