Erste funktionierende Version!
This commit is contained in:
@@ -133,8 +133,17 @@ class DealfrontScraper:
|
||||
logger.info(f"{len(rows)} Firmen-Datenzeilen zur Verarbeitung gefunden.")
|
||||
for i, row in enumerate(rows, 1):
|
||||
try:
|
||||
company_name = row.find_element(By.CSS_SELECTOR, ".sticky-column a.t-highlight-text").get_attribute("title").strip()
|
||||
website = row.find_element(By.CSS_SELECTOR, "a.text-gray-400.t-highlight-text").text.strip()
|
||||
# Name: erst title, dann Fallback auf Text
|
||||
name_elem = row.find_element(By.CSS_SELECTOR, ".sticky-column a.t-highlight-text")
|
||||
company_name = (name_elem.get_attribute("title") or name_elem.text).strip()
|
||||
|
||||
# Website: erst Link in td[2], sonst reiner Zellen-Text
|
||||
try:
|
||||
website_elem = row.find_element(By.XPATH, ".//td[2]//a")
|
||||
website = website_elem.text.strip()
|
||||
except NoSuchElementException:
|
||||
website = row.find_element(By.XPATH, ".//td[2]").text.strip()
|
||||
|
||||
results.append({'name': company_name, 'website': website})
|
||||
except NoSuchElementException:
|
||||
logger.warning(f"Zeile {i}: Name oder Webseite nicht extrahierbar. Überspringe.")
|
||||
|
||||
Reference in New Issue
Block a user