import os from dotenv import load_dotenv from fastapi import FastAPI, HTTPException, BackgroundTasks from fastapi.middleware.cors import CORSMiddleware from typing import List, Dict, Any, Optional import time from datetime import datetime from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException, NoSuchElementException, StaleElementReferenceException, InvalidArgumentException import re import asyncio # Load environment variables load_dotenv() app = FastAPI(title="Fotograf.de Scraper API") # Configure CORS app.add_middleware( CORSMiddleware, allow_origins=["*"], # Adjust this to your frontend origin in production allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # --- Configuration & Constants --- LOGIN_URL = 'https://app.fotograf.de/login/login' # --- Selectors from original scraper, expanded for dashboard jobs --- SELECTORS = { "cookie_accept_button": "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll", "login_user": "#login-email", "login_pass": "#login-password", "login_button": "#login-submit", "dashboard_jobs_table_rows": "//table[contains(@class, 'table-legacy')]/tbody/tr", # Assuming there's a table for jobs "job_row_name_link": ".//td[contains(@class, 'table-col-jobname')]//a", "job_row_status": ".//td[contains(@class, 'table-col-status')]//span", "job_row_date": ".//td[contains(@class, 'table-col-shootingDate')]", "job_row_shooting_type": ".//td[contains(@class, 'table-col-shootingType')]", } # --- Utility functions from original scraper --- # (setup_driver, login, etc. will be adapted or moved into this file) def setup_driver(): print("Initialisiere Chrome WebDriver...") options = Options() options.add_argument('--headless') options.add_argument('--no-sandbox') options.add_argument('--disable-dev-shm-usage') options.add_argument('--window-size=1920,1200') options.binary_location = '/usr/bin/google-chrome' # Path to Chrome in Docker try: driver = webdriver.Chrome(options=options) return driver except Exception as e: print(f"Fehler bei der Initialisierung des WebDrivers: {e}") return None def login(driver, username, password): print("Starte Login-Vorgang...") try: driver.get(LOGIN_URL) wait = WebDriverWait(driver, 10) try: print("Suche nach Cookie-Banner...") wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, SELECTORS["cookie_accept_button"]))).click() print("Cookie-Banner akzeptiert.") time.sleep(1) except TimeoutException: print("Kein Cookie-Banner gefunden, fahre fort.") print("Fülle Anmeldeformular aus...") wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, SELECTORS["login_user"]))).send_keys(username) driver.find_element(By.CSS_SELECTOR, SELECTORS["login_pass"]).send_keys(password) print("Klicke auf Login...") driver.find_element(By.CSS_SELECTOR, SELECTORS["login_button"]).click() print("Warte auf die nächste Seite...") wait.until(EC.url_contains('/config_dashboard/index')) print("Login erfolgreich!") return True except Exception as e: print(f"Login fehlgeschlagen. Grund: {e}") # take_error_screenshot(driver, "login_error") # Removed for now, will re-add later if needed return False # --- New function to get jobs from dashboard --- def get_jobs_from_dashboard(driver) -> List[Dict[str, Any]]: print("Navigiere zum Dashboard, um Aufträge abzurufen...") dashboard_url = "https://app.fotograf.de/config_dashboard/index" driver.get(dashboard_url) wait = WebDriverWait(driver, 20) # Increased timeout for dashboard load jobs = [] try: # Wait for the table rows to be present job_rows = wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["dashboard_jobs_table_rows"]))) print(f"[{len(job_rows)}] Auftragszeilen auf dem Dashboard gefunden.") for row in job_rows: try: name_element = row.find_element(By.XPATH, SELECTORS["job_row_name_link"]) job_name = name_element.text.strip() job_url = name_element.get_attribute('href') # Extract Job ID from URL job_id_match = re.search(r'/(\d+)$', job_url) job_id = job_id_match.group(1) if job_id_match else None status_element = row.find_element(By.XPATH, SELECTORS["job_row_status"]) job_status = status_element.text.strip() date_element = row.find_element(By.XPATH, SELECTORS["job_row_date"]) job_date = date_element.text.strip() type_element = row.find_element(By.XPATH, SELECTORS["job_row_shooting_type"]) shooting_type = type_element.text.strip() jobs.append({ "id": job_id, "name": job_name, "url": job_url, "status": job_status, "date": job_date, "shooting_type": shooting_type, }) except NoSuchElementException as e: print(f"Warnung: Konnte nicht alle Elemente in einer Auftragszeile finden. Fehler: {e}") except Exception as e: print(f"Ein unerwarteter Fehler beim Parsen einer Auftragszeile: {e}") except TimeoutException: print("Timeout: Keine Auftrags-Tabelle oder -Zeilen auf dem Dashboard gefunden.") except Exception as e: print(f"Ein Fehler ist aufgetreten beim Abrufen der Aufträge vom Dashboard: {e}") return jobs @app.get("/health") async def health_check(): return {"status": "ok"} @app.get("/api/jobs", response_model=List[Dict[str, Any]]) async def get_jobs(account_type: str): username_env_var = f"{account_type.upper()}_USER" password_env_var = f"{account_type.upper()}_PW" username = os.getenv(username_env_var) password = os.getenv(password_env_var) if not username or not password: raise HTTPException(status_code=400, detail=f"Credentials for {account_type} not found. Please set {username_env_var} and {password_env_var} in your .env file.") driver = None try: driver = setup_driver() if not driver: raise HTTPException(status_code=500, detail="Failed to initialize WebDriver.") if not login(driver, username, password): raise HTTPException(status_code=401, detail="Login failed. Please check credentials.") jobs = get_jobs_from_dashboard(driver) if not jobs: print("Keine Aufträge gefunden oder Fehler beim Abrufen vom Dashboard.") # Depending on desired behavior, might raise HTTPException or return empty list # For now, returning empty list if no jobs found but login was successful. return jobs except HTTPException as e: raise e # Re-raise HTTP exceptions except Exception as e: print(f"Ein unerwarteter Serverfehler ist aufgetreten: {e}") raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") finally: if driver: print("Schließe WebDriver.") driver.quit() # Integrate other scraper functions (process_reminder_mode, process_statistics_mode) as new API endpoints later