From 64980a85d3ccd11dcf70e8677c987f00a0a2644f Mon Sep 17 00:00:00 2001 From: Floke Date: Wed, 16 Jul 2025 15:20:26 +0000 Subject: [PATCH] scrape_fotograf.py aktualisiert --- scrape_fotograf.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/scrape_fotograf.py b/scrape_fotograf.py index 5f5cc159..8a173b0d 100644 --- a/scrape_fotograf.py +++ b/scrape_fotograf.py @@ -16,25 +16,24 @@ OUTPUT_DIR = 'output' OUTPUT_FILE = os.path.join(OUTPUT_DIR, 'nutzer_ohne_logins.csv') LOGIN_URL = 'https://app.fotograf.de/login/login' -# --- Selektoren (FINALE, KORREKTE VERSION) --- +# --- Selektoren (FINALE VERSION 2.0) --- SELECTORS = { "cookie_accept_button": "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll", "login_user": "#login-email", "login_pass": "#login-password", "login_button": "#login-submit", "job_name": "h1", - # Album-Übersicht (basierend auf Ihrem XPath, der enthielt) + # Album-Übersicht (funktioniert) "album_overview_rows": "//table/tbody/tr", - "album_overview_link": ".//td[2]//a", # Link ist in der 2. Spalte - "album_overview_logins": ".//td[7]", # Logins sind in der 7. Spalte + "album_overview_link": ".//td[2]//a", - # Einzelpersonen-Ansicht (innerhalb eines Albums) - "person_rows": "//section[.//h3[contains(., 'Einzelfotos')]]//table/tbody/tr", - "person_vorname": ".//td[4]", - "person_logins": ".//td[6]", - "person_buyer_link": ".//td[7]//a", + # Einzelpersonen-Ansicht (basierend auf Ihrem finalen HTML) + "person_rows": "//div[contains(@class, 'border-legacy-silver-550') and .//span[text()='Logins']]", + "person_vorname": ".//span[text()='Vorname']/following-sibling::strong", + "person_logins": ".//span[text()='Logins']/following-sibling::strong", + "person_buyer_link": ".//a[contains(@data-qa-id, 'guest-access-banner-access-code')]", - # Käufer-Detailseite + # Käufer-Detailseite (funktioniert) "buyer_email": "//span[contains(., '@')]" } @@ -64,7 +63,6 @@ def setup_driver(): print(f"Fehler bei der Initialisierung des WebDrivers: {e}") return None -# HIER IST DIE FEHLENDE FUNKTION WIEDER EINGEFÜGT def load_all_credentials(): try: with open(CREDENTIALS_FILE, 'r') as f: @@ -127,8 +125,8 @@ def process_full_job(driver, job_url): album_link = row.find_element(By.XPATH, SELECTORS["album_overview_link"]).get_attribute('href') albums_to_visit.append({"name": album_name, "url": album_link}) except NoSuchElementException: - continue # Ignoriere Zeilen, die kein Album-Link haben (z.B. Team-Bilder) - print(f"Sammeln der Album-Links abgeschlossen.") + continue + print(f"{len(albums_to_visit)} gültige Album-Links gesammelt.") except TimeoutException: print("Konnte die Album-Liste nicht finden.") take_error_screenshot(driver, "album_overview_error") @@ -139,12 +137,14 @@ def process_full_job(driver, job_url): print(f"\n--- Betrete Album: {album['name']} ---") driver.get(album['url']) try: + # Warten, bis die Personen-DIVs geladen sind person_rows = wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["person_rows"]))) print(f"{len(person_rows)} Personen in diesem Album gefunden.") for person_row in person_rows: try: - login_count = int(person_row.find_element(By.XPATH, SELECTORS["person_logins"]).text) + login_count_text = person_row.find_element(By.XPATH, SELECTORS["person_logins"]).text + login_count = int(login_count_text) if login_count == 0: vorname = person_row.find_element(By.XPATH, SELECTORS["person_vorname"]).text @@ -153,7 +153,7 @@ def process_full_job(driver, job_url): buyer_link_element = person_row.find_element(By.XPATH, SELECTORS["person_buyer_link"]) buyer_page_url = buyer_link_element.get_attribute('href') - current_window = driver.current_window_handle + # E-Mail in einem neuen Tab holen, um den Kontext nicht zu verlieren driver.execute_script("window.open(arguments[0]);", buyer_page_url) driver.switch_to.window(driver.window_handles[-1]) @@ -168,12 +168,12 @@ def process_full_job(driver, job_url): }) driver.close() - driver.switch_to.window(current_window) + driver.switch_to.window(driver.window_handles[0]) except (ValueError, NoSuchElementException): continue except TimeoutException: - print(f" Keine Personen-Tabelle im Album '{album['name']}' gefunden. Überspringe.") + print(f" Keine Personen-Daten im Album '{album['name']}' gefunden. Überspringe.") take_error_screenshot(driver, f"album_{album['name']}_error") continue