From a2b60e2137510de9fe74aba0115949ee1af66520 Mon Sep 17 00:00:00 2001 From: Floke Date: Tue, 8 Jul 2025 17:52:35 +0000 Subject: [PATCH] dealfront_enrichment.py aktualisiert --- dealfront_enrichment.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/dealfront_enrichment.py b/dealfront_enrichment.py index 4a89d990..ab40026b 100644 --- a/dealfront_enrichment.py +++ b/dealfront_enrichment.py @@ -113,7 +113,7 @@ class DealfrontScraper: return False def extract_current_page_results(self): - # 1) Warte auf erste Ergebniszeile (bis zu 20 Sek.) + # 1) Warten bis mindestens eine Ergebnis-Zeile sichtbar ist rows_locator = (By.CSS_SELECTOR, "table#t-result-table tbody tr[id]") WebDriverWait(self.driver, 20).until( EC.visibility_of_element_located(rows_locator) @@ -124,24 +124,24 @@ class DealfrontScraper: results = [] for i, row in enumerate(rows, 1): - # 2) Name: erstes Profil-Link () - name_links = row.find_elements( - By.XPATH, - ".//td[contains(@class,'sticky-column')]//a[contains(@href,'/h/company/')]" + # Name:
+ name_divs = row.find_elements( + By.CSS_SELECTOR, + "div.t-highlight-text.t-highlight-text-product" ) - if not name_links: - logger.warning(f"Zeile {i}: Kein Name-Link gefunden. Überspringe.") + if not name_divs: + logger.warning(f"Zeile {i}: Kein Name-Element gefunden. Überspringe.") continue - name_elem = name_links[0] - company_name = (name_elem.get_attribute("title") or name_elem.text).strip() + name_elem = name_divs[0] + company_name = (name_elem.get_attribute("title") or name_elem.text).strip() # :contentReference[oaicite:0]{index=0} - # 3) Website: erster externer Link (target="_blank") - ext_links = row.find_elements(By.XPATH, ".//a[@target='_blank']") + # Website: externer Link (target="_blank") in der dritten Spalte + ext_links = row.find_elements(By.CSS_SELECTOR, "td:nth-of-type(3) a[target='_blank']") if ext_links: href = ext_links[0].get_attribute("href") website = href.replace("https://", "").replace("http://", "").rstrip("/") else: - # Fallback: Text der 3. + # Fallback: reiner Text in td[3] cells = row.find_elements(By.TAG_NAME, "td") website = cells[2].text.strip() if len(cells) >= 3 else ""