186 lines
7.6 KiB
Python
186 lines
7.6 KiB
Python
import os
|
|
from dotenv import load_dotenv
|
|
from fastapi import FastAPI, HTTPException, BackgroundTasks
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from typing import List, Dict, Any, Optional
|
|
import time
|
|
from datetime import datetime
|
|
from selenium import webdriver
|
|
from selenium.webdriver.chrome.options import Options
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
from selenium.common.exceptions import TimeoutException, NoSuchElementException, StaleElementReferenceException, InvalidArgumentException
|
|
import re
|
|
import asyncio
|
|
|
|
# Load environment variables
|
|
load_dotenv()
|
|
|
|
app = FastAPI(title="Fotograf.de Scraper API")
|
|
|
|
# Configure CORS
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=["*"], # Adjust this to your frontend origin in production
|
|
allow_credentials=True,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
# --- Configuration & Constants ---
|
|
LOGIN_URL = 'https://app.fotograf.de/login/login'
|
|
|
|
# --- Selectors from original scraper, expanded for dashboard jobs ---
|
|
SELECTORS = {
|
|
"cookie_accept_button": "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll",
|
|
"login_user": "#login-email",
|
|
"login_pass": "#login-password",
|
|
"login_button": "#login-submit",
|
|
"dashboard_jobs_table_rows": "//table[contains(@class, 'table-legacy')]/tbody/tr", # Assuming there's a table for jobs
|
|
"job_row_name_link": ".//td[contains(@class, 'table-col-jobname')]//a",
|
|
"job_row_status": ".//td[contains(@class, 'table-col-status')]//span",
|
|
"job_row_date": ".//td[contains(@class, 'table-col-shootingDate')]",
|
|
"job_row_shooting_type": ".//td[contains(@class, 'table-col-shootingType')]",
|
|
}
|
|
|
|
# --- Utility functions from original scraper ---
|
|
# (setup_driver, login, etc. will be adapted or moved into this file)
|
|
|
|
def setup_driver():
|
|
print("Initialisiere Chrome WebDriver...")
|
|
options = Options()
|
|
options.add_argument('--headless')
|
|
options.add_argument('--no-sandbox')
|
|
options.add_argument('--disable-dev-shm-usage')
|
|
options.add_argument('--window-size=1920,1200')
|
|
options.binary_location = '/usr/bin/google-chrome' # Path to Chrome in Docker
|
|
try:
|
|
driver = webdriver.Chrome(options=options)
|
|
return driver
|
|
except Exception as e:
|
|
print(f"Fehler bei der Initialisierung des WebDrivers: {e}")
|
|
return None
|
|
|
|
def login(driver, username, password):
|
|
print("Starte Login-Vorgang...")
|
|
try:
|
|
driver.get(LOGIN_URL)
|
|
wait = WebDriverWait(driver, 10)
|
|
try:
|
|
print("Suche nach Cookie-Banner...")
|
|
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, SELECTORS["cookie_accept_button"]))).click()
|
|
print("Cookie-Banner akzeptiert.")
|
|
time.sleep(1)
|
|
except TimeoutException:
|
|
print("Kein Cookie-Banner gefunden, fahre fort.")
|
|
print("Fülle Anmeldeformular aus...")
|
|
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, SELECTORS["login_user"]))).send_keys(username)
|
|
driver.find_element(By.CSS_SELECTOR, SELECTORS["login_pass"]).send_keys(password)
|
|
print("Klicke auf Login...")
|
|
driver.find_element(By.CSS_SELECTOR, SELECTORS["login_button"]).click()
|
|
print("Warte auf die nächste Seite...")
|
|
wait.until(EC.url_contains('/config_dashboard/index'))
|
|
print("Login erfolgreich!")
|
|
return True
|
|
except Exception as e:
|
|
print(f"Login fehlgeschlagen. Grund: {e}")
|
|
# take_error_screenshot(driver, "login_error") # Removed for now, will re-add later if needed
|
|
return False
|
|
|
|
# --- New function to get jobs from dashboard ---
|
|
def get_jobs_from_dashboard(driver) -> List[Dict[str, Any]]:
|
|
print("Navigiere zum Dashboard, um Aufträge abzurufen...")
|
|
dashboard_url = "https://app.fotograf.de/config_dashboard/index"
|
|
driver.get(dashboard_url)
|
|
wait = WebDriverWait(driver, 20) # Increased timeout for dashboard load
|
|
|
|
jobs = []
|
|
try:
|
|
# Wait for the table rows to be present
|
|
job_rows = wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["dashboard_jobs_table_rows"])))
|
|
print(f"[{len(job_rows)}] Auftragszeilen auf dem Dashboard gefunden.")
|
|
|
|
for row in job_rows:
|
|
try:
|
|
name_element = row.find_element(By.XPATH, SELECTORS["job_row_name_link"])
|
|
job_name = name_element.text.strip()
|
|
job_url = name_element.get_attribute('href')
|
|
|
|
# Extract Job ID from URL
|
|
job_id_match = re.search(r'/(\d+)$', job_url)
|
|
job_id = job_id_match.group(1) if job_id_match else None
|
|
|
|
status_element = row.find_element(By.XPATH, SELECTORS["job_row_status"])
|
|
job_status = status_element.text.strip()
|
|
|
|
date_element = row.find_element(By.XPATH, SELECTORS["job_row_date"])
|
|
job_date = date_element.text.strip()
|
|
|
|
type_element = row.find_element(By.XPATH, SELECTORS["job_row_shooting_type"])
|
|
shooting_type = type_element.text.strip()
|
|
|
|
jobs.append({
|
|
"id": job_id,
|
|
"name": job_name,
|
|
"url": job_url,
|
|
"status": job_status,
|
|
"date": job_date,
|
|
"shooting_type": shooting_type,
|
|
})
|
|
except NoSuchElementException as e:
|
|
print(f"Warnung: Konnte nicht alle Elemente in einer Auftragszeile finden. Fehler: {e}")
|
|
except Exception as e:
|
|
print(f"Ein unerwarteter Fehler beim Parsen einer Auftragszeile: {e}")
|
|
|
|
except TimeoutException:
|
|
print("Timeout: Keine Auftrags-Tabelle oder -Zeilen auf dem Dashboard gefunden.")
|
|
except Exception as e:
|
|
print(f"Ein Fehler ist aufgetreten beim Abrufen der Aufträge vom Dashboard: {e}")
|
|
|
|
return jobs
|
|
|
|
@app.get("/health")
|
|
async def health_check():
|
|
return {"status": "ok"}
|
|
|
|
@app.get("/api/jobs", response_model=List[Dict[str, Any]])
|
|
async def get_jobs(account_type: str):
|
|
username_env_var = f"{account_type.upper()}_USER"
|
|
password_env_var = f"{account_type.upper()}_PW"
|
|
|
|
username = os.getenv(username_env_var)
|
|
password = os.getenv(password_env_var)
|
|
|
|
if not username or not password:
|
|
raise HTTPException(status_code=400, detail=f"Credentials for {account_type} not found. Please set {username_env_var} and {password_env_var} in your .env file.")
|
|
|
|
driver = None
|
|
try:
|
|
driver = setup_driver()
|
|
if not driver:
|
|
raise HTTPException(status_code=500, detail="Failed to initialize WebDriver.")
|
|
|
|
if not login(driver, username, password):
|
|
raise HTTPException(status_code=401, detail="Login failed. Please check credentials.")
|
|
|
|
jobs = get_jobs_from_dashboard(driver)
|
|
if not jobs:
|
|
print("Keine Aufträge gefunden oder Fehler beim Abrufen vom Dashboard.")
|
|
# Depending on desired behavior, might raise HTTPException or return empty list
|
|
# For now, returning empty list if no jobs found but login was successful.
|
|
|
|
return jobs
|
|
|
|
except HTTPException as e:
|
|
raise e # Re-raise HTTP exceptions
|
|
except Exception as e:
|
|
print(f"Ein unerwarteter Serverfehler ist aufgetreten: {e}")
|
|
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
|
finally:
|
|
if driver:
|
|
print("Schließe WebDriver.")
|
|
driver.quit()
|
|
|
|
# Integrate other scraper functions (process_reminder_mode, process_statistics_mode) as new API endpoints later
|