import os import time import json import logging from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from config import TempConfig # Deine Konfigurationsklasse mit Pfaden und URLs # Logging konfigurieren template = '%(asctime)s - %(levelname)s - %(name)s - %(message)s' logging.basicConfig(level=logging.INFO, format=template) logger = logging.getLogger(__name__) class DealfrontScraper: def __init__(self): logger.info("Initialisiere den DealfrontScraper...") # Chrome-Optionen chrome_options = Options() prefs = {"profile.managed_default_content_settings.images": 2} chrome_options.add_experimental_option("prefs", prefs) chrome_options.add_argument("--headless") chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--disable-dev-shm-usage") chrome_options.add_argument("--window-size=1920,1080") # WebDriver-Service service = Service(executable_path=TempConfig.CHROMEDRIVER_PATH) try: self.driver = webdriver.Chrome(service=service, options=chrome_options) except Exception: logger.critical("WebDriver konnte nicht initialisiert werden.", exc_info=True) raise # Explicit Wait self.wait = WebDriverWait(self.driver, TempConfig.DEFAULT_TIMEOUT) # Credentials laden self.username, self.password = self._load_credentials() logger.info("WebDriver erfolgreich initialisiert.") def _load_credentials(self): try: with open(TempConfig.DEALFRONT_CREDENTIALS_FILE, 'r', encoding='utf-8') as f: creds = json.load(f) return creds['username'], creds['password'] except Exception as e: logger.error(f"Credentials-Datei konnte nicht geladen werden: {e}") raise def login_and_find_list(self, search_name): # Login-Flow logger.info(f"Navigiere zur Login-Seite: {TempConfig.DEALFRONT_LOGIN_URL}") self.driver.get(TempConfig.DEALFRONT_LOGIN_URL) self.wait.until(EC.visibility_of_element_located((By.NAME, 'email'))).send_keys(self.username) self.driver.find_element(By.NAME, 'password').send_keys(self.password) self.driver.find_element(By.XPATH, "//button[normalize-space()='Log in']").click() logger.info("Login gesendet.") # Klicken auf 'Prospects finden' tile = self.wait.until( EC.element_to_be_clickable((By.CSS_SELECTOR, "a[data-test-target-product-tile='Prospects finden']")) ) tile.click() logger.info("'Prospects finden' geklickt.") # Vordefinierte Suche auswählen selector = (By.XPATH, f"//div[contains(@class,'truncate') and normalize-space()='{search_name}']") item = self.wait.until(EC.element_to_be_clickable(selector)) item.click() logger.info(f"Suche '{search_name}' geladen.") def extract_current_page_results(self): # Kurz Implicit-Wait self.driver.implicitly_wait(1) # Warten auf erstes Daten-Element first = (By.CSS_SELECTOR, ".sticky-column a.t-highlight-text") self.wait.until(EC.visibility_of_element_located(first)) logger.info("Extrahiere aktuelle Seite...") results = [] # Warten auf mindestens eine Tabellen-Zeile rows_sel = (By.CSS_SELECTOR, "table#t-result-table tbody tr[id]") self.wait.until(EC.presence_of_all_elements_located(rows_sel)) rows = self.driver.find_elements(*rows_sel) logger.info(f"{len(rows)} Zeilen gefunden.") # Extraktion Namen & Website for i, row in enumerate(rows, 1): # Name name_el = row.find_elements(By.CSS_SELECTOR, ".sticky-column a.t-highlight-text") if not name_el: logger.warning(f"Zeile {i}: Kein Name gefunden. Überspringe.") continue elem = name_el[0] name = (elem.get_attribute('title') or elem.text).strip() # Website web_el = row.find_elements(By.CSS_SELECTOR, "td:nth-of-type(3) a") if web_el: web = web_el[0].text.strip() else: cell = row.find_elements(By.CSS_SELECTOR, "td:nth-of-type(3)") web = cell[0].text.strip() if cell else '' results.append({'name': name, 'website': web}) logger.info(f"Extraktion abgeschlossen: {len(results)} Firmen.") # Implicit-Wait zurücksetzen self.driver.implicitly_wait(TempConfig.IMPLICIT_WAIT) return results def click_next_page(self) -> bool: buttons = self.driver.find_elements(By.CSS_SELECTOR, "nav.eb-pagination a.eb-pagination-button") if not buttons: return False nxt = buttons[-1] if not nxt.is_enabled() or 'disabled' in nxt.get_attribute('class'): return False current = self.driver.find_element(By.CSS_SELECTOR, "nav.eb-pagination a.eb-pagination-button.active").text nxt.click() self.wait.until(lambda d: d.find_element(By.CSS_SELECTOR, "nav.eb-pagination a.eb-pagination-button.active").text != current) return True def run(self, search_name): try: self.login_and_find_list(search_name) all_data = [] while True: page = self.extract_current_page_results() all_data.extend(page) if not self.click_next_page(): break return all_data finally: self.driver.quit() if __name__ == '__main__': scraper = DealfrontScraper() data = scraper.run('Facility Management') for entry in data: print(entry)