Erste funktionierende Version!

2025-07-08 15:48:44 +00:00
parent 9481297b33
commit 8ec5ce77f7
1 changed files with 11 additions and 2 deletions
--- a/dealfront_enrichment.py
+++ b/dealfront_enrichment.py
@@ -133,8 +133,17 @@ class DealfrontScraper:
            logger.info(f"{len(rows)} Firmen-Datenzeilen zur Verarbeitung gefunden.")
            for i, row in enumerate(rows, 1):
                try:
-                    company_name = row.find_element(By.CSS_SELECTOR, ".sticky-column a.t-highlight-text").get_attribute("title").strip()
-                    website = row.find_element(By.CSS_SELECTOR, "a.text-gray-400.t-highlight-text").text.strip()
+                    # Name: erst title, dann Fallback auf Text
+                    name_elem = row.find_element(By.CSS_SELECTOR, ".sticky-column a.t-highlight-text")
+                    company_name = (name_elem.get_attribute("title") or name_elem.text).strip()
+
+                    # Website: erst Link in td[2], sonst reiner Zellen-Text
+                    try:
+                        website_elem = row.find_element(By.XPATH, ".//td[2]//a")
+                        website = website_elem.text.strip()
+                    except NoSuchElementException:
+                        website = row.find_element(By.XPATH, ".//td[2]").text.strip()
+
                    results.append({'name': company_name, 'website': website})
                except NoSuchElementException:
                    logger.warning(f"Zeile {i}: Name oder Webseite nicht extrahierbar. Überspringe.")