diff --git a/dealfront_enrichment.py b/dealfront_enrichment.py index cf1ae3a5..4e47542b 100644 --- a/dealfront_enrichment.py +++ b/dealfront_enrichment.py @@ -133,8 +133,17 @@ class DealfrontScraper: logger.info(f"{len(rows)} Firmen-Datenzeilen zur Verarbeitung gefunden.") for i, row in enumerate(rows, 1): try: - company_name = row.find_element(By.CSS_SELECTOR, ".sticky-column a.t-highlight-text").get_attribute("title").strip() - website = row.find_element(By.CSS_SELECTOR, "a.text-gray-400.t-highlight-text").text.strip() + # Name: erst title, dann Fallback auf Text + name_elem = row.find_element(By.CSS_SELECTOR, ".sticky-column a.t-highlight-text") + company_name = (name_elem.get_attribute("title") or name_elem.text).strip() + + # Website: erst Link in td[2], sonst reiner Zellen-Text + try: + website_elem = row.find_element(By.XPATH, ".//td[2]//a") + website = website_elem.text.strip() + except NoSuchElementException: + website = row.find_element(By.XPATH, ".//td[2]").text.strip() + results.append({'name': company_name, 'website': website}) except NoSuchElementException: logger.warning(f"Zeile {i}: Name oder Webseite nicht extrahierbar. Überspringe.")