navigation über die seiten

This commit is contained in:
2025-07-08 18:53:54 +00:00
parent b407612fed
commit 325285d0f5

View File

@@ -113,64 +113,94 @@ class DealfrontScraper:
return False
def extract_current_page_results(self):
# 1) Kurz Implicit-Wait absenken
def extract_current_page_results(self):
# 1) Kurzes Absenken des Implicit-Waits
self.driver.implicitly_wait(1)
# 2) Auf das erste Daten-Element warten
# 2) Warten auf erstes Firmen-Element
first_row_locator = (By.CSS_SELECTOR, ".sticky-column a.t-highlight-text")
self.wait.until(EC.visibility_of_element_located(first_row_locator))
time.sleep(1)
try:
logger.info("Extrahiere Ergebnisse von der aktuellen Seite...")
results = []
logger.info("Extrahiere Ergebnisse von der aktuellen Seite...")
results = []
# 3) Warten auf mindestens eine Tabellen-Zeile
rows_selector = (By.CSS_SELECTOR, "table#t-result-table tbody tr[id]")
self.wait.until(EC.presence_of_all_elements_located(rows_selector))
rows = self.driver.find_elements(*rows_selector)
logger.info(f"{len(rows)} Firmen-Zeilen gefunden.")
# 3) Warten auf mindestens eine Tabellenzeile
rows_selector = (By.CSS_SELECTOR, "table#t-result-table tbody tr[id]")
self.wait.until(EC.presence_of_all_elements_located(rows_selector))
rows = self.driver.find_elements(*rows_selector)
logger.info(f"{len(rows)} Firmen-Zeilen gefunden.")
for i, row in enumerate(rows, 1):
# Name-Extraktion (bewährter Selector)
name_elems = row.find_elements(By.CSS_SELECTOR, ".sticky-column a.t-highlight-text")
if not name_elems:
logger.warning(f"Zeile {i}: Kein Name-Element gefunden. Überspringe.")
continue
name_elem = name_elems[0]
company_name = (name_elem.get_attribute("title") or name_elem.text).strip()
# 4) Daten sammeln, ohne weitere Sleeps/Exceptions
for i, row in enumerate(rows, 1):
name_elems = row.find_elements(By.CSS_SELECTOR, ".sticky-column a.t-highlight-text")
if not name_elems:
logger.warning(f"Zeile {i}: Kein Name-Element gefunden. Überspringe.")
continue
name_elem = name_elems[0]
company_name = (name_elem.get_attribute("title") or name_elem.text).strip()
# Website-Extraktion aus 3. Spalte
web_elems = row.find_elements(By.CSS_SELECTOR, "td:nth-of-type(3) a")
if web_elems:
# Link-Text ist der Domain-Name
website = web_elems[0].text.strip()
else:
# Fallback: reiner Zellen-Text
cell = row.find_elements(By.CSS_SELECTOR, "td:nth-of-type(3)")
website = cell[0].text.strip() if cell else ""
web_elems = row.find_elements(By.CSS_SELECTOR, "td:nth-of-type(3) a")
if web_elems:
website = web_elems[0].text.strip()
else:
cell = row.find_elements(By.CSS_SELECTOR, "td:nth-of-type(3)")
website = cell[0].text.strip() if cell else ""
results.append({'name': company_name, 'website': website})
results.append({'name': company_name, 'website': website})
logger.info(f"Extraktion abgeschlossen: {len(results)} Firmen.")
return results
logger.info(f"Extraktion abgeschlossen: {len(results)} Firmen.")
return results
finally:
# 4) Implicit-Wait auf Standard zurücksetzen
self.driver.implicitly_wait(10)
def click_next_page(self) -> bool:
"""
Klickt auf den 'Next'-Paginator-Button.
Gibt False zurück, wenn kein Next-Button (mehr) klickbar ist.
"""
# alle Buttons (Prev, Seiten, Next) abgreifen
buttons = self.driver.find_elements(By.CSS_SELECTOR, "nav.eb-pagination a.eb-pagination-button")
next_btn = buttons[-1] # letzter ist Next
# Ist er deaktiviert?
if not next_btn.is_enabled() or "disabled" in next_btn.get_attribute("class"):
return False
def close(self):
if self.driver:
logger.info("Schließe den WebDriver.")
self.driver.quit()
# Merke aktuelle Seite
current = self.driver.find_element(
By.CSS_SELECTOR,
"nav.eb-pagination a.eb-pagination-button.active"
).text
next_btn.click()
# Warte, bis die aktive Seite sich ändert
WebDriverWait(self.driver, 10).until(
lambda d: d.find_element(
By.CSS_SELECTOR,
"nav.eb-pagination a.eb-pagination-button.active"
).text != current
)
return True
def run(self, search_name):
# 1) Login & Suche laden
self.login_and_find_list(search_name)
# 2) Alle Seiten durchgehen
all_results = []
while True:
page_results = self.extract_current_page_results()
all_results.extend(page_results)
if not self.click_next_page():
break
return all_results
if __name__ == "__main__":
logger.info("Starte Dealfront Automatisierung - DEBUG-MODUS")
scraper = None
try:
scraper = DealfrontScraper()
scraper = DealfrontScraper(driver, wait)
results = scraper.run("Facility Management")
if not scraper.driver:
raise Exception("WebDriver konnte nicht initialisiert werden.")