Letzte Version war Super

Hat gut funktioniert, jetzt versuchen wir noch die Pagination zu ergänzen
This commit is contained in:
2025-07-16 18:51:29 +00:00
parent 6b1b2d441c
commit 265f32181d

View File

@@ -2,6 +2,7 @@ import json
import os import os
import time import time
import csv import csv
import math
from datetime import datetime from datetime import datetime
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.options import Options
@@ -16,7 +17,7 @@ OUTPUT_DIR = 'output'
OUTPUT_FILE = os.path.join(OUTPUT_DIR, 'nutzer_ohne_logins.csv') OUTPUT_FILE = os.path.join(OUTPUT_DIR, 'nutzer_ohne_logins.csv')
LOGIN_URL = 'https://app.fotograf.de/login/login' LOGIN_URL = 'https://app.fotograf.de/login/login'
# --- Selektoren --- # --- Selektoren (FINALE, VOLLSTÄNDIGE VERSION) ---
SELECTORS = { SELECTORS = {
"cookie_accept_button": "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll", "cookie_accept_button": "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll",
"login_user": "#login-email", "login_user": "#login-email",
@@ -25,6 +26,8 @@ SELECTORS = {
"job_name": "h1", "job_name": "h1",
"album_overview_rows": "//table/tbody/tr", "album_overview_rows": "//table/tbody/tr",
"album_overview_link": ".//td[2]//a", "album_overview_link": ".//td[2]//a",
# NEU: Selector für die Gesamtzahl der Zugangscodes
"access_code_count": "//span[text()='Zugangscodes']/following-sibling::strong",
"person_rows": "//div[contains(@class, 'border-legacy-silver-550') and .//span[text()='Logins']]", "person_rows": "//div[contains(@class, 'border-legacy-silver-550') and .//span[text()='Logins']]",
"person_vorname": ".//span[text()='Vorname']/following-sibling::strong", "person_vorname": ".//span[text()='Vorname']/following-sibling::strong",
"person_logins": ".//span[text()='Logins']/following-sibling::strong", "person_logins": ".//span[text()='Logins']/following-sibling::strong",
@@ -100,7 +103,7 @@ def process_full_job(driver, job_url):
try: try:
driver.get(job_url) driver.get(job_url)
except InvalidArgumentException: except InvalidArgumentException:
print(f"!!! FEHLER: Die URL '{job_url}' wurde von Selenium als ungültig angesehen. Bitte prüfen Sie die Eingabe.") print(f"!!! FEHLER: Die URL '{job_url}' wurde von Selenium als ungültig angesehen.")
return [] return []
try: try:
@@ -137,59 +140,69 @@ def process_full_job(driver, job_url):
print(f"\n--- Betrete Album: {album['name']} ---") print(f"\n--- Betrete Album: {album['name']} ---")
driver.get(album['url']) driver.get(album['url'])
try: try:
num_persons = len(wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["person_rows"])))) # NEU: Pagination-Logik
print(f"{num_persons} Personen in diesem Album gefunden.") total_codes_text = wait.until(EC.visibility_of_element_located((By.XPATH, SELECTORS["access_code_count"]))).text
num_pages = math.ceil(int(total_codes_text) / 20)
print(f"Album hat {total_codes_text} Zugangscodes auf {num_pages} Seite(n).")
for i in range(num_persons): for page_num in range(1, num_pages + 1):
person_rows = wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["person_rows"]))) current_page_url = album['url']
person_row = person_rows[i] if page_num > 1:
current_page_url += f"?page_guest_accesses={page_num}"
login_count_text = person_row.find_element(By.XPATH, SELECTORS["person_logins"]).text print(f" Verarbeite Seite {page_num}...")
if int(login_count_text) == 0: driver.get(current_page_url)
vorname = person_row.find_element(By.XPATH, SELECTORS["person_vorname"]).text
print(f" --> ERFOLG: '{vorname}' mit 0 Logins gefunden!")
access_code_page_url = person_row.find_element(By.XPATH, SELECTORS["person_access_code_link"]).get_attribute('href') num_persons = len(wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["person_rows"]))))
driver.get(access_code_page_url) print(f" {num_persons} Personen auf dieser Seite gefunden.")
print(f" Navigiere zur Kommunikations-Seite für '{vorname}'...")
for attempt in range(3): for i in range(num_persons):
try: person_rows = wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["person_rows"])))
wait.until(EC.visibility_of_element_located((By.XPATH, SELECTORS["quick_login_url"]))) person_row = person_rows[i]
schnell_login_url = driver.find_element(By.XPATH, SELECTORS["quick_login_url"]).get_attribute('href')
potential_buyer_element = driver.find_element(By.XPATH, SELECTORS["potential_buyer_link"])
kaeufer_name = potential_buyer_element.text
print(f" Käufer: '{kaeufer_name}', Schnell-Login: GEFUNDEN") login_count_text = person_row.find_element(By.XPATH, SELECTORS["person_logins"]).text
potential_buyer_element.click() if int(login_count_text) == 0:
vorname = person_row.find_element(By.XPATH, SELECTORS["person_vorname"]).text
print(f" --> ERFOLG: '{vorname}' mit 0 Logins gefunden!")
print(f" Navigiere zur Käufer-Detailseite...") access_code_page_url = person_row.find_element(By.XPATH, SELECTORS["person_access_code_link"]).get_attribute('href')
email = wait.until(EC.visibility_of_element_located((By.XPATH, SELECTORS["buyer_email"]))).text driver.get(access_code_page_url)
print(f" FINALE ERFOLG: E-Mail gefunden: {email}") print(f" Navigiere zur Kommunikations-Seite für '{vorname}'...")
final_results.append({ for attempt in range(3):
"Name des Kindes": vorname, try:
"Name Käufer": kaeufer_name, wait.until(EC.visibility_of_element_located((By.XPATH, SELECTORS["quick_login_url"])))
"E-Mail-Adresse Käufer": email, schnell_login_url = driver.find_element(By.XPATH, SELECTORS["quick_login_url"]).get_attribute('href')
"Schnell Login URL": schnell_login_url potential_buyer_element = driver.find_element(By.XPATH, SELECTORS["potential_buyer_link"])
}) kaeufer_name = potential_buyer_element.text
break
except StaleElementReferenceException: print(f" Käufer: '{kaeufer_name}', Schnell-Login: GEFUNDEN")
print(f" Timing-Fehler (StaleElement), Versuch {attempt + 1}/3. Warte kurz...") potential_buyer_element.click()
time.sleep(1)
if attempt == 2:
print(" Fehler war persistent, überspringe diese Person.")
take_error_screenshot(driver, f"stale_error_{vorname}")
except TimeoutException: print(f" Navigiere zur Käufer-Detailseite...")
print(f" Timeout beim Warten auf Details für '{vorname}'. Überspringe.") email = wait.until(EC.visibility_of_element_located((By.XPATH, SELECTORS["buyer_email"]))).text
take_error_screenshot(driver, f"timeout_error_{vorname}") print(f" FINALE ERFOLG: E-Mail gefunden: {email}")
break
print(f" Kehre zurück zur Album-Übersicht '{album['name']}'...") final_results.append({
driver.get(album['url']) "Name des Kindes": vorname,
wait.until(EC.presence_of_element_located((By.XPATH, SELECTORS["person_rows"]))) "Name Käufer": kaeufer_name,
"E-Mail-Adresse Käufer": email,
"Schnell Login URL": schnell_login_url
})
break
except StaleElementReferenceException:
print(f" Timing-Fehler, Versuch {attempt + 1}/3...")
time.sleep(1)
if attempt == 2: raise
except TimeoutException:
print(f" Timeout beim Warten auf Details für '{vorname}'.")
take_error_screenshot(driver, f"timeout_error_{vorname}")
break
print(f" Kehre zurück zur Album-Seite {page_num}...")
driver.get(current_page_url)
wait.until(EC.presence_of_element_located((By.XPATH, SELECTORS["person_rows"])))
except TimeoutException: except TimeoutException:
print(f" Keine Personen-Daten im Album '{album['name']}' gefunden. Überspringe.") print(f" Keine Personen-Daten im Album '{album['name']}' gefunden. Überspringe.")
take_error_screenshot(driver, f"album_{album['name']}_error") take_error_screenshot(driver, f"album_{album['name']}_error")
@@ -231,7 +244,6 @@ def main():
credentials = get_profile_choice() credentials = get_profile_choice()
if not credentials: return if not credentials: return
# GEÄNDERT: URL-Eingabe wird explizit von "Bracketed Paste" Codes bereinigt
job_url_raw = input("Bitte gib die URL des zu bearbeitenden Fotoauftrags ein (Einstellungs-Seite): ") job_url_raw = input("Bitte gib die URL des zu bearbeitenden Fotoauftrags ein (Einstellungs-Seite): ")
job_url_cleaned = job_url_raw.replace("\x1b[200~", "").replace("\x1b[201~", "") job_url_cleaned = job_url_raw.replace("\x1b[200~", "").replace("\x1b[201~", "")
job_url = job_url_cleaned.strip() job_url = job_url_cleaned.strip()