Erste funktionierende Version!
This commit is contained in:
@@ -133,8 +133,17 @@ class DealfrontScraper:
|
|||||||
logger.info(f"{len(rows)} Firmen-Datenzeilen zur Verarbeitung gefunden.")
|
logger.info(f"{len(rows)} Firmen-Datenzeilen zur Verarbeitung gefunden.")
|
||||||
for i, row in enumerate(rows, 1):
|
for i, row in enumerate(rows, 1):
|
||||||
try:
|
try:
|
||||||
company_name = row.find_element(By.CSS_SELECTOR, ".sticky-column a.t-highlight-text").get_attribute("title").strip()
|
# Name: erst title, dann Fallback auf Text
|
||||||
website = row.find_element(By.CSS_SELECTOR, "a.text-gray-400.t-highlight-text").text.strip()
|
name_elem = row.find_element(By.CSS_SELECTOR, ".sticky-column a.t-highlight-text")
|
||||||
|
company_name = (name_elem.get_attribute("title") or name_elem.text).strip()
|
||||||
|
|
||||||
|
# Website: erst Link in td[2], sonst reiner Zellen-Text
|
||||||
|
try:
|
||||||
|
website_elem = row.find_element(By.XPATH, ".//td[2]//a")
|
||||||
|
website = website_elem.text.strip()
|
||||||
|
except NoSuchElementException:
|
||||||
|
website = row.find_element(By.XPATH, ".//td[2]").text.strip()
|
||||||
|
|
||||||
results.append({'name': company_name, 'website': website})
|
results.append({'name': company_name, 'website': website})
|
||||||
except NoSuchElementException:
|
except NoSuchElementException:
|
||||||
logger.warning(f"Zeile {i}: Name oder Webseite nicht extrahierbar. Überspringe.")
|
logger.warning(f"Zeile {i}: Name oder Webseite nicht extrahierbar. Überspringe.")
|
||||||
|
|||||||
Reference in New Issue
Block a user