scrape_fotograf.py aktualisiert
This commit is contained in:
@@ -95,7 +95,6 @@ def login(driver, username, password):
|
||||
take_error_screenshot(driver, "login_unexpected")
|
||||
return False
|
||||
|
||||
# GEÄNDERT: Mit detailliertem Debugging-Output
|
||||
def process_job(driver, job_url):
|
||||
print(f"\nVerarbeite Job-URL: {job_url}")
|
||||
job_id = job_url.split('/')[-1]
|
||||
@@ -121,21 +120,17 @@ def process_job(driver, job_url):
|
||||
album_rows = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, SELECTORS["album_rows"])))
|
||||
print(f"{len(album_rows)} Alben gefunden. Prüfe auf Logins...")
|
||||
|
||||
# NEU: Detailliertes Logging für jede Zeile
|
||||
for i, row in enumerate(album_rows):
|
||||
print(f"\n--- Analysiere Zeile {i+1} ---")
|
||||
try:
|
||||
# Gib einen Teil des HTMLs aus, um den Kontext zu sehen
|
||||
row_html = row.get_attribute('outerHTML')
|
||||
print(f"DEBUG (HTML-Ausschnitt): {row_html[:400]}...")
|
||||
|
||||
# Finde das Element mit der Login-Anzahl
|
||||
login_count_element = row.find_element(By.CSS_SELECTOR, SELECTORS["login_count"])
|
||||
login_count_text = login_count_element.text.strip() # .strip() entfernt Leerzeichen
|
||||
login_count_text = login_count_element.text.strip()
|
||||
|
||||
print(f"DEBUG (Gefundener Login-Text): '{login_count_text}'")
|
||||
|
||||
# Versuche, den Text in eine Zahl umzuwandeln
|
||||
if int(login_count_text) == 0:
|
||||
album_link_element = row.find_element(By.CSS_SELECTOR, SELECTORS["album_link"])
|
||||
child_name = album_link_element.text
|
||||
@@ -150,7 +145,6 @@ def process_job(driver, job_url):
|
||||
print(f" --> INFO: Album wird übersprungen (Logins > 0).")
|
||||
|
||||
except (NoSuchElementException, ValueError) as e:
|
||||
# Wenn wir einen Fehler haben (z.B. Text ist keine Zahl), loggen wir das.
|
||||
print(f" --> FEHLER: Konnte Zeile nicht verarbeiten. Grund: {e}")
|
||||
|
||||
except TimeoutException:
|
||||
@@ -161,11 +155,33 @@ def process_job(driver, job_url):
|
||||
results = []
|
||||
print(f"\nVerarbeite {len(albums_to_process)} Alben mit 0 Logins im Detail...")
|
||||
if not albums_to_process:
|
||||
return # Beenden, wenn keine Alben gefunden wurden
|
||||
# Hier geben wir die Funktion nur zurück, wenn keine Alben zu verarbeiten sind
|
||||
pass # Placeholder
|
||||
|
||||
for album in albums_to_process:
|
||||
# ... (Rest der Funktion bleibt gleich) ...
|
||||
pass
|
||||
try:
|
||||
print(f" Rufe Detailseite für '{album['child_name']}' auf...")
|
||||
driver.get(album["album_detail_url"])
|
||||
buyer_link_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, SELECTORS["buyer_link"])))
|
||||
buyer_name = buyer_link_element.text.replace('Käufer ', '').strip()
|
||||
buyer_page_url = buyer_link_element.get_attribute('href')
|
||||
print(f" Käufer gefunden: '{buyer_name}'. Rufe Käuferseite auf...")
|
||||
driver.get(buyer_page_url)
|
||||
buyer_email = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, SELECTORS["buyer_email"]))).text
|
||||
print(f" E-Mail gefunden: {buyer_email}")
|
||||
results.append({
|
||||
"Auftragsname": job_name,
|
||||
"Kind Vorname": album["child_name"],
|
||||
"Käufer Name": buyer_name,
|
||||
"Käufer E-Mail": buyer_email,
|
||||
})
|
||||
time.sleep(1)
|
||||
except TimeoutException:
|
||||
print(f" Fehler: Timeout bei '{album['child_name']}'.")
|
||||
take_error_screenshot(driver, f"detail_page_timeout_{album['child_name']}")
|
||||
except Exception as e:
|
||||
print(f" Unerwarteter Fehler bei '{album['child_name']}': {e}")
|
||||
take_error_screenshot(driver, f"detail_page_unexpected_{album['child_name']}")
|
||||
|
||||
return results
|
||||
|
||||
@@ -199,7 +215,8 @@ def get_profile_choice():
|
||||
|
||||
def main():
|
||||
print("--- Fotograf.de Scraper für Nutzer ohne Logins ---")
|
||||
credentials = get_pr_choice()
|
||||
# KORRIGIERTE ZEILE
|
||||
credentials = get_profile_choice()
|
||||
if not credentials: return
|
||||
job_url = input("Bitte gib die URL des zu bearbeitenden Fotoauftrags ein: ")
|
||||
if "fotograf.de/config_jobs_settings/index/" not in job_url:
|
||||
|
||||
Reference in New Issue
Block a user