diff --git a/scrape_fotograf.py b/scrape_fotograf.py index 02f26f65..5f5cc159 100644 --- a/scrape_fotograf.py +++ b/scrape_fotograf.py @@ -39,7 +39,6 @@ SELECTORS = { } def take_error_screenshot(driver, error_name): - # ... (Funktion bleibt unverändert) ... os.makedirs(OUTPUT_DIR, exist_ok=True) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") filename = f"error_{error_name}_{timestamp}.png" @@ -51,7 +50,6 @@ def take_error_screenshot(driver, error_name): print(f"!!! Konnte keinen Screenshot speichern: {e}") def setup_driver(): - # ... (Funktion bleibt unverändert) ... print("Initialisiere Chrome WebDriver...") options = Options() options.add_argument('--headless') @@ -66,8 +64,15 @@ def setup_driver(): print(f"Fehler bei der Initialisierung des WebDrivers: {e}") return None +# HIER IST DIE FEHLENDE FUNKTION WIEDER EINGEFÜGT +def load_all_credentials(): + try: + with open(CREDENTIALS_FILE, 'r') as f: + return json.load(f) + except (FileNotFoundError, json.JSONDecodeError): + return None + def login(driver, username, password): - # ... (Funktion bleibt unverändert) ... print("Starte Login-Vorgang...") try: driver.get(LOGIN_URL) @@ -94,11 +99,9 @@ def login(driver, username, password): take_error_screenshot(driver, "login_error") return False -# Die finale, vollständige Logik def process_full_job(driver, job_url): wait = WebDriverWait(driver, 15) - # 1. Job-Namen holen print(f"\nVerarbeite Job-URL: {job_url}") driver.get(job_url) try: @@ -109,7 +112,6 @@ def process_full_job(driver, job_url): take_error_screenshot(driver, "job_name_not_found") return [] - # 2. Alle Album-Links von der Übersichtsseite sammeln job_id = job_url.split('/')[-1] albums_overview_url = f"https://app.fotograf.de/config_jobs_photos/index/{job_id}" print(f"Navigiere zur Alben-Übersicht: {albums_overview_url}") @@ -120,16 +122,18 @@ def process_full_job(driver, job_url): album_rows = wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["album_overview_rows"]))) print(f"{len(album_rows)} Alben in der Übersicht gefunden.") for row in album_rows: - album_name = row.find_element(By.XPATH, SELECTORS["album_overview_link"]).text - album_link = row.find_element(By.XPATH, SELECTORS["album_overview_link"]).get_attribute('href') - albums_to_visit.append({"name": album_name, "url": album_link}) + try: + album_name = row.find_element(By.XPATH, SELECTORS["album_overview_link"]).text + album_link = row.find_element(By.XPATH, SELECTORS["album_overview_link"]).get_attribute('href') + albums_to_visit.append({"name": album_name, "url": album_link}) + except NoSuchElementException: + continue # Ignoriere Zeilen, die kein Album-Link haben (z.B. Team-Bilder) print(f"Sammeln der Album-Links abgeschlossen.") except TimeoutException: print("Konnte die Album-Liste nicht finden.") take_error_screenshot(driver, "album_overview_error") return [] - # 3. Jedes Album besuchen und die Personen mit 0 Logins finden final_results = [] for album in albums_to_visit: print(f"\n--- Betrete Album: {album['name']} ---") @@ -149,8 +153,6 @@ def process_full_job(driver, job_url): buyer_link_element = person_row.find_element(By.XPATH, SELECTORS["person_buyer_link"]) buyer_page_url = buyer_link_element.get_attribute('href') - # Temporär eine neue Seite öffnen, um die E-Mail zu holen - # (man könnte auch die buyer_page_url speichern und später abarbeiten) current_window = driver.current_window_handle driver.execute_script("window.open(arguments[0]);", buyer_page_url) driver.switch_to.window(driver.window_handles[-1]) @@ -165,12 +167,10 @@ def process_full_job(driver, job_url): "Käufer E-Mail": email }) - # Tab schließen und zurückkehren driver.close() driver.switch_to.window(current_window) except (ValueError, NoSuchElementException): - # Ignoriere Zeilen, die nicht dem Format entsprechen continue except TimeoutException: print(f" Keine Personen-Tabelle im Album '{album['name']}' gefunden. Überspringe.") @@ -179,13 +179,11 @@ def process_full_job(driver, job_url): return final_results -# ... (Rest des Skripts: save_results_to_csv, get_profile_choice, etc. bleiben gleich) ... def save_results_to_csv(results): if not results: print("\nKeine Daten zum Speichern vorhanden.") return os.makedirs(OUTPUT_DIR, exist_ok=True) - # Feldnamen an die neue Struktur anpassen fieldnames = ["Auftragsname", "Album", "Kind Vorname", "Käufer E-Mail"] print(f"\nSpeichere {len(results)} Ergebnisse in '{OUTPUT_FILE}'...") with open(OUTPUT_FILE, 'w', newline='', encoding='utf-8') as f: