navigation über die seiten

This commit is contained in:
2025-07-08 18:53:54 +00:00
parent d79187fce9
commit 0c5b471695

View File

@@ -113,64 +113,94 @@ class DealfrontScraper:
return False return False
def extract_current_page_results(self): def extract_current_page_results(self):
# 1) Kurz Implicit-Wait absenken def extract_current_page_results(self):
# 1) Kurzes Absenken des Implicit-Waits
self.driver.implicitly_wait(1) self.driver.implicitly_wait(1)
# 2) Auf das erste Daten-Element warten # 2) Warten auf erstes Firmen-Element
first_row_locator = (By.CSS_SELECTOR, ".sticky-column a.t-highlight-text") first_row_locator = (By.CSS_SELECTOR, ".sticky-column a.t-highlight-text")
self.wait.until(EC.visibility_of_element_located(first_row_locator)) self.wait.until(EC.visibility_of_element_located(first_row_locator))
time.sleep(1) time.sleep(1)
try: logger.info("Extrahiere Ergebnisse von der aktuellen Seite...")
logger.info("Extrahiere Ergebnisse von der aktuellen Seite...") results = []
results = []
# 3) Warten auf mindestens eine Tabellen-Zeile # 3) Warten auf mindestens eine Tabellenzeile
rows_selector = (By.CSS_SELECTOR, "table#t-result-table tbody tr[id]") rows_selector = (By.CSS_SELECTOR, "table#t-result-table tbody tr[id]")
self.wait.until(EC.presence_of_all_elements_located(rows_selector)) self.wait.until(EC.presence_of_all_elements_located(rows_selector))
rows = self.driver.find_elements(*rows_selector) rows = self.driver.find_elements(*rows_selector)
logger.info(f"{len(rows)} Firmen-Zeilen gefunden.") logger.info(f"{len(rows)} Firmen-Zeilen gefunden.")
for i, row in enumerate(rows, 1): # 4) Daten sammeln, ohne weitere Sleeps/Exceptions
# Name-Extraktion (bewährter Selector) for i, row in enumerate(rows, 1):
name_elems = row.find_elements(By.CSS_SELECTOR, ".sticky-column a.t-highlight-text") name_elems = row.find_elements(By.CSS_SELECTOR, ".sticky-column a.t-highlight-text")
if not name_elems: if not name_elems:
logger.warning(f"Zeile {i}: Kein Name-Element gefunden. Überspringe.") logger.warning(f"Zeile {i}: Kein Name-Element gefunden. Überspringe.")
continue continue
name_elem = name_elems[0] name_elem = name_elems[0]
company_name = (name_elem.get_attribute("title") or name_elem.text).strip() company_name = (name_elem.get_attribute("title") or name_elem.text).strip()
# Website-Extraktion aus 3. Spalte web_elems = row.find_elements(By.CSS_SELECTOR, "td:nth-of-type(3) a")
web_elems = row.find_elements(By.CSS_SELECTOR, "td:nth-of-type(3) a") if web_elems:
if web_elems: website = web_elems[0].text.strip()
# Link-Text ist der Domain-Name else:
website = web_elems[0].text.strip() cell = row.find_elements(By.CSS_SELECTOR, "td:nth-of-type(3)")
else: website = cell[0].text.strip() if cell else ""
# Fallback: reiner Zellen-Text
cell = row.find_elements(By.CSS_SELECTOR, "td:nth-of-type(3)")
website = cell[0].text.strip() if cell else ""
results.append({'name': company_name, 'website': website}) results.append({'name': company_name, 'website': website})
logger.info(f"Extraktion abgeschlossen: {len(results)} Firmen.") logger.info(f"Extraktion abgeschlossen: {len(results)} Firmen.")
return results return results
finally: def click_next_page(self) -> bool:
# 4) Implicit-Wait auf Standard zurücksetzen """
self.driver.implicitly_wait(10) Klickt auf den 'Next'-Paginator-Button.
Gibt False zurück, wenn kein Next-Button (mehr) klickbar ist.
"""
# alle Buttons (Prev, Seiten, Next) abgreifen
buttons = self.driver.find_elements(By.CSS_SELECTOR, "nav.eb-pagination a.eb-pagination-button")
next_btn = buttons[-1] # letzter ist Next
# Ist er deaktiviert?
if not next_btn.is_enabled() or "disabled" in next_btn.get_attribute("class"):
return False
# Merke aktuelle Seite
def close(self): current = self.driver.find_element(
if self.driver: By.CSS_SELECTOR,
logger.info("Schließe den WebDriver.") "nav.eb-pagination a.eb-pagination-button.active"
self.driver.quit() ).text
next_btn.click()
# Warte, bis die aktive Seite sich ändert
WebDriverWait(self.driver, 10).until(
lambda d: d.find_element(
By.CSS_SELECTOR,
"nav.eb-pagination a.eb-pagination-button.active"
).text != current
)
return True
def run(self, search_name):
# 1) Login & Suche laden
self.login_and_find_list(search_name)
# 2) Alle Seiten durchgehen
all_results = []
while True:
page_results = self.extract_current_page_results()
all_results.extend(page_results)
if not self.click_next_page():
break
return all_results
if __name__ == "__main__": if __name__ == "__main__":
logger.info("Starte Dealfront Automatisierung - DEBUG-MODUS") logger.info("Starte Dealfront Automatisierung - DEBUG-MODUS")
scraper = None scraper = None
try: try:
scraper = DealfrontScraper() scraper = DealfrontScraper(driver, wait)
results = scraper.run("Facility Management")
if not scraper.driver: if not scraper.driver:
raise Exception("WebDriver konnte nicht initialisiert werden.") raise Exception("WebDriver konnte nicht initialisiert werden.")