From 856de778735e29a5cf441dd09423a45e855f9c5f Mon Sep 17 00:00:00 2001 From: Floke Date: Wed, 16 Jul 2025 19:07:44 +0000 Subject: [PATCH] scrape_fotograf.py aktualisiert --- scrape_fotograf.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scrape_fotograf.py b/scrape_fotograf.py index 7b7ef3d8..efef72cb 100644 --- a/scrape_fotograf.py +++ b/scrape_fotograf.py @@ -14,10 +14,10 @@ from selenium.common.exceptions import TimeoutException, NoSuchElementException, # --- Konfiguration & Konstanten --- CREDENTIALS_FILE = 'fotograf_credentials.json' OUTPUT_DIR = 'output' -OUTPUT_FILE = os.path.join(OUTPUT_DIR, 'nutzer_ohne_logins.csv') +OUTPUT_FILE = os.path.join(OUTPUT_DIR, 'nutzer_mit_wenig_logins.csv') # Dateiname angepasst LOGIN_URL = 'https://app.fotograf.de/login/login' -# --- Selektoren (FINALE, VOLLSTÄNDIGE VERSION) --- +# --- Selektoren (unverändert) --- SELECTORS = { "cookie_accept_button": "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll", "login_user": "#login-email", @@ -26,7 +26,6 @@ SELECTORS = { "job_name": "h1", "album_overview_rows": "//table/tbody/tr", "album_overview_link": ".//td[2]//a", - # NEU: Selector für die Gesamtzahl der Zugangscodes "access_code_count": "//span[text()='Zugangscodes']/following-sibling::strong", "person_rows": "//div[contains(@class, 'border-legacy-silver-550') and .//span[text()='Logins']]", "person_vorname": ".//span[text()='Vorname']/following-sibling::strong", @@ -140,7 +139,6 @@ def process_full_job(driver, job_url): print(f"\n--- Betrete Album: {album['name']} ---") driver.get(album['url']) try: - # NEU: Pagination-Logik total_codes_text = wait.until(EC.visibility_of_element_located((By.XPATH, SELECTORS["access_code_count"]))).text num_pages = math.ceil(int(total_codes_text) / 20) print(f"Album hat {total_codes_text} Zugangscodes auf {num_pages} Seite(n).") @@ -161,9 +159,11 @@ def process_full_job(driver, job_url): person_row = person_rows[i] login_count_text = person_row.find_element(By.XPATH, SELECTORS["person_logins"]).text - if int(login_count_text) == 0: + + # --- HIER IST DIE GEÄNDERTE LOGIK --- + if int(login_count_text) <= 1: vorname = person_row.find_element(By.XPATH, SELECTORS["person_vorname"]).text - print(f" --> ERFOLG: '{vorname}' mit 0 Logins gefunden!") + print(f" --> ERFOLG: '{vorname}' mit {login_count_text} Login(s) gefunden!") access_code_page_url = person_row.find_element(By.XPATH, SELECTORS["person_access_code_link"]).get_attribute('href') driver.get(access_code_page_url) @@ -240,7 +240,7 @@ def get_profile_choice(): except ValueError: print("Ungültige Eingabe.") def main(): - print("--- Fotograf.de Scraper für Nutzer ohne Logins (FINALE VERSION) ---") + print("--- Fotograf.de Scraper für Nutzer mit wenig Logins (0 oder 1) ---") credentials = get_profile_choice() if not credentials: return