From 30ab546f0fd35e2f80f636e3c600718bbeed16e5 Mon Sep 17 00:00:00 2001
From: Floke <floke.com@gmail.com>
Date: Tue, 8 Jul 2025 19:03:02 +0000
Subject: [PATCH] dealfront_enrichment.py aktualisiert

---
 dealfront_enrichment.py | 106 ++++++++++++++++++----------------------
 1 file changed, 48 insertions(+), 58 deletions(-)

diff --git a/dealfront_enrichment.py b/dealfront_enrichment.py
index cd4f214e..764792fe 100644
--- a/dealfront_enrichment.py
+++ b/dealfront_enrichment.py
@@ -4,37 +4,43 @@ import json
 import logging
 
 from selenium import webdriver
-from selenium.webdriver.chrome.options import Options, ChromeOptions
+from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.chrome.service import Service
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 
-from config import TempConfig  # Import deiner Konfigurationsklasse
+from config import TempConfig  # Deine Konfigurationsklasse mit Pfaden und URLs
 
 # Logging konfigurieren
-LOG_FORMAT = '%(asctime)s - %(levelname)s - %(name)s - %(message)s'
-logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)
+template = '%(asctime)s - %(levelname)s - %(name)s - %(message)s'
+logging.basicConfig(level=logging.INFO, format=template)
 logger = logging.getLogger(__name__)
 
 
 class DealfrontScraper:
     def __init__(self):
         logger.info("Initialisiere den DealfrontScraper...")
-        chrome_options = ChromeOptions()
+        # Chrome-Optionen
+        chrome_options = Options()
         prefs = {"profile.managed_default_content_settings.images": 2}
         chrome_options.add_experimental_option("prefs", prefs)
         chrome_options.add_argument("--headless")
         chrome_options.add_argument("--no-sandbox")
         chrome_options.add_argument("--disable-dev-shm-usage")
         chrome_options.add_argument("--window-size=1920,1080")
+
+        # WebDriver-Service
         service = Service(executable_path=TempConfig.CHROMEDRIVER_PATH)
         try:
             self.driver = webdriver.Chrome(service=service, options=chrome_options)
         except Exception:
             logger.critical("WebDriver konnte nicht initialisiert werden.", exc_info=True)
             raise
+
+        # Explicit Wait
         self.wait = WebDriverWait(self.driver, TempConfig.DEFAULT_TIMEOUT)
+        # Credentials laden
         self.username, self.password = self._load_credentials()
         logger.info("WebDriver erfolgreich initialisiert.")
 
@@ -42,76 +48,64 @@ class DealfrontScraper:
         try:
             with open(TempConfig.DEALFRONT_CREDENTIALS_FILE, 'r', encoding='utf-8') as f:
                 creds = json.load(f)
-            return creds.get('username'), creds.get('password')
+            return creds['username'], creds['password']
         except Exception as e:
             logger.error(f"Credentials-Datei konnte nicht geladen werden: {e}")
             raise
 
-    def _save_debug_artifacts(self):
-        try:
-            os.makedirs(TempConfig.OUTPUT_DIR, exist_ok=True)
-            ts = time.strftime("%Y%m%d-%H%M%S")
-            png = os.path.join(TempConfig.OUTPUT_DIR, f"error_{ts}.png")
-            html = os.path.join(TempConfig.OUTPUT_DIR, f"error_{ts}.html")
-            self.driver.save_screenshot(png)
-            logger.error(f"Screenshot '{png}' gespeichert.")
-            with open(html, 'w', encoding='utf-8') as f:
-                f.write(self.driver.page_source)
-            logger.error(f"HTML-Source '{html}' gespeichert.")
-        except Exception as e:
-            logger.error(f"Debug-Artefakte konnten nicht gespeichert werden: {e}")
-
     def login_and_find_list(self, search_name):
-        # Login
+        # Login-Flow
         logger.info(f"Navigiere zur Login-Seite: {TempConfig.DEALFRONT_LOGIN_URL}")
         self.driver.get(TempConfig.DEALFRONT_LOGIN_URL)
         self.wait.until(EC.visibility_of_element_located((By.NAME, 'email'))).send_keys(self.username)
-        self.driver.find_element(By.CSS_SELECTOR, "input[type='password']").send_keys(self.password)
+        self.driver.find_element(By.NAME, 'password').send_keys(self.password)
         self.driver.find_element(By.XPATH, "//button[normalize-space()='Log in']").click()
         logger.info("Login gesendet.")
 
-        # 'Prospects finden'
+        # Klicken auf 'Prospects finden'
         tile = self.wait.until(
-            EC.element_to_be_clickable((By.CSS_SELECTOR, "a[data-test-target-product-tile]"))
+            EC.element_to_be_clickable((By.CSS_SELECTOR, "a[data-test-target-product-tile='Prospects finden']"))
         )
         tile.click()
         logger.info("'Prospects finden' geklickt.")
 
         # Vordefinierte Suche auswählen
-        sel = (By.XPATH, f"//div[contains(@class,'truncate') and normalize-space()='{search_name}']")
-        item = self.wait.until(EC.element_to_be_clickable(sel))
+        selector = (By.XPATH, f"//div[contains(@class,'truncate') and normalize-space()='{search_name}']")
+        item = self.wait.until(EC.element_to_be_clickable(selector))
         item.click()
         logger.info(f"Suche '{search_name}' geladen.")
 
     def extract_current_page_results(self):
-        # 1) Kurzer Implicit-Wait
+        # Kurz Implicit-Wait
         self.driver.implicitly_wait(1)
 
-        # 2) Warten auf erstes Daten-Element
-        first_locator = (By.CSS_SELECTOR, ".sticky-column a.t-highlight-text")
-        self.wait.until(EC.visibility_of_element_located(first_locator))
+        # Warten auf erstes Daten-Element
+        first = (By.CSS_SELECTOR, ".sticky-column a.t-highlight-text")
+        self.wait.until(EC.visibility_of_element_located(first))
 
         logger.info("Extrahiere aktuelle Seite...")
         results = []
 
-        # 3) Auf mindestens eine Zeile warten
+        # Warten auf mindestens eine Tabellen-Zeile
         rows_sel = (By.CSS_SELECTOR, "table#t-result-table tbody tr[id]")
         self.wait.until(EC.presence_of_all_elements_located(rows_sel))
         rows = self.driver.find_elements(*rows_sel)
         logger.info(f"{len(rows)} Zeilen gefunden.")
 
-        # 4) Namen & Websites extrahieren
+        # Extraktion Namen & Website
         for i, row in enumerate(rows, 1):
-            names = row.find_elements(By.CSS_SELECTOR, ".sticky-column a.t-highlight-text")
-            if not names:
-                logger.warning(f"Zeile {i}: Kein Name gefunden.")
+            # Name
+            name_el = row.find_elements(By.CSS_SELECTOR, ".sticky-column a.t-highlight-text")
+            if not name_el:
+                logger.warning(f"Zeile {i}: Kein Name gefunden. Überspringe.")
                 continue
-            name_elem = names[0]
-            name = (name_elem.get_attribute('title') or name_elem.text).strip()
+            elem = name_el[0]
+            name = (elem.get_attribute('title') or elem.text).strip()
 
-            webs = row.find_elements(By.CSS_SELECTOR, "td:nth-of-type(3) a")
-            if webs:
-                web = webs[0].text.strip()
+            # Website
+            web_el = row.find_elements(By.CSS_SELECTOR, "td:nth-of-type(3) a")
+            if web_el:
+                web = web_el[0].text.strip()
             else:
                 cell = row.find_elements(By.CSS_SELECTOR, "td:nth-of-type(3)")
                 web = cell[0].text.strip() if cell else ''
@@ -119,38 +113,34 @@ class DealfrontScraper:
             results.append({'name': name, 'website': web})
 
         logger.info(f"Extraktion abgeschlossen: {len(results)} Firmen.")
-        # Implicit-Wait reset
+        # Implicit-Wait zurücksetzen
         self.driver.implicitly_wait(TempConfig.IMPLICIT_WAIT)
         return results
 
     def click_next_page(self) -> bool:
-        btns = self.driver.find_elements(By.CSS_SELECTOR, "nav.eb-pagination a.eb-pagination-button")
-        if not btns:
+        buttons = self.driver.find_elements(By.CSS_SELECTOR, "nav.eb-pagination a.eb-pagination-button")
+        if not buttons:
             return False
-        nxt = btns[-1]
+        nxt = buttons[-1]
         if not nxt.is_enabled() or 'disabled' in nxt.get_attribute('class'):
             return False
-        curr = self.driver.find_element(
-            By.CSS_SELECTOR, "nav.eb-pagination a.eb-pagination-button.active"
-        ).text
+        current = self.driver.find_element(By.CSS_SELECTOR,
+                                           "nav.eb-pagination a.eb-pagination-button.active").text
         nxt.click()
-        self.wait.until(
-            lambda d: d.find_element(
-                By.CSS_SELECTOR, "nav.eb-pagination a.eb-pagination-button.active"
-            ).text != curr
-        )
+        self.wait.until(lambda d: d.find_element(By.CSS_SELECTOR,
+                                                  "nav.eb-pagination a.eb-pagination-button.active").text != current)
         return True
 
     def run(self, search_name):
         try:
             self.login_and_find_list(search_name)
-            all_res = []
+            all_data = []
             while True:
-                page_res = self.extract_current_page_results()
-                all_res.extend(page_res)
+                page = self.extract_current_page_results()
+                all_data.extend(page)
                 if not self.click_next_page():
                     break
-            return all_res
+            return all_data
         finally:
             self.driver.quit()
 
@@ -158,5 +148,5 @@ class DealfrontScraper:
 if __name__ == '__main__':
     scraper = DealfrontScraper()
     data = scraper.run('Facility Management')
-    for d in data:
-        print(d)
+    for entry in data:
+        print(entry)