feat(fotograf-de-scraper): initial setup with backend and frontend scaffold [32788f42]
This commit is contained in:
185
fotograf-de-scraper/backend/main.py
Normal file
185
fotograf-de-scraper/backend/main.py
Normal file
@@ -0,0 +1,185 @@
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from fastapi import FastAPI, HTTPException, BackgroundTasks
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from typing import List, Dict, Any, Optional
|
||||
import time
|
||||
from datetime import datetime
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.common.exceptions import TimeoutException, NoSuchElementException, StaleElementReferenceException, InvalidArgumentException
|
||||
import re
|
||||
import asyncio
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
app = FastAPI(title="Fotograf.de Scraper API")
|
||||
|
||||
# Configure CORS
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"], # Adjust this to your frontend origin in production
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# --- Configuration & Constants ---
|
||||
LOGIN_URL = 'https://app.fotograf.de/login/login'
|
||||
|
||||
# --- Selectors from original scraper, expanded for dashboard jobs ---
|
||||
SELECTORS = {
|
||||
"cookie_accept_button": "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll",
|
||||
"login_user": "#login-email",
|
||||
"login_pass": "#login-password",
|
||||
"login_button": "#login-submit",
|
||||
"dashboard_jobs_table_rows": "//table[contains(@class, 'table-legacy')]/tbody/tr", # Assuming there's a table for jobs
|
||||
"job_row_name_link": ".//td[contains(@class, 'table-col-jobname')]//a",
|
||||
"job_row_status": ".//td[contains(@class, 'table-col-status')]//span",
|
||||
"job_row_date": ".//td[contains(@class, 'table-col-shootingDate')]",
|
||||
"job_row_shooting_type": ".//td[contains(@class, 'table-col-shootingType')]",
|
||||
}
|
||||
|
||||
# --- Utility functions from original scraper ---
|
||||
# (setup_driver, login, etc. will be adapted or moved into this file)
|
||||
|
||||
def setup_driver():
|
||||
print("Initialisiere Chrome WebDriver...")
|
||||
options = Options()
|
||||
options.add_argument('--headless')
|
||||
options.add_argument('--no-sandbox')
|
||||
options.add_argument('--disable-dev-shm-usage')
|
||||
options.add_argument('--window-size=1920,1200')
|
||||
options.binary_location = '/usr/bin/google-chrome' # Path to Chrome in Docker
|
||||
try:
|
||||
driver = webdriver.Chrome(options=options)
|
||||
return driver
|
||||
except Exception as e:
|
||||
print(f"Fehler bei der Initialisierung des WebDrivers: {e}")
|
||||
return None
|
||||
|
||||
def login(driver, username, password):
|
||||
print("Starte Login-Vorgang...")
|
||||
try:
|
||||
driver.get(LOGIN_URL)
|
||||
wait = WebDriverWait(driver, 10)
|
||||
try:
|
||||
print("Suche nach Cookie-Banner...")
|
||||
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, SELECTORS["cookie_accept_button"]))).click()
|
||||
print("Cookie-Banner akzeptiert.")
|
||||
time.sleep(1)
|
||||
except TimeoutException:
|
||||
print("Kein Cookie-Banner gefunden, fahre fort.")
|
||||
print("Fülle Anmeldeformular aus...")
|
||||
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, SELECTORS["login_user"]))).send_keys(username)
|
||||
driver.find_element(By.CSS_SELECTOR, SELECTORS["login_pass"]).send_keys(password)
|
||||
print("Klicke auf Login...")
|
||||
driver.find_element(By.CSS_SELECTOR, SELECTORS["login_button"]).click()
|
||||
print("Warte auf die nächste Seite...")
|
||||
wait.until(EC.url_contains('/config_dashboard/index'))
|
||||
print("Login erfolgreich!")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Login fehlgeschlagen. Grund: {e}")
|
||||
# take_error_screenshot(driver, "login_error") # Removed for now, will re-add later if needed
|
||||
return False
|
||||
|
||||
# --- New function to get jobs from dashboard ---
|
||||
def get_jobs_from_dashboard(driver) -> List[Dict[str, Any]]:
|
||||
print("Navigiere zum Dashboard, um Aufträge abzurufen...")
|
||||
dashboard_url = "https://app.fotograf.de/config_dashboard/index"
|
||||
driver.get(dashboard_url)
|
||||
wait = WebDriverWait(driver, 20) # Increased timeout for dashboard load
|
||||
|
||||
jobs = []
|
||||
try:
|
||||
# Wait for the table rows to be present
|
||||
job_rows = wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["dashboard_jobs_table_rows"])))
|
||||
print(f"[{len(job_rows)}] Auftragszeilen auf dem Dashboard gefunden.")
|
||||
|
||||
for row in job_rows:
|
||||
try:
|
||||
name_element = row.find_element(By.XPATH, SELECTORS["job_row_name_link"])
|
||||
job_name = name_element.text.strip()
|
||||
job_url = name_element.get_attribute('href')
|
||||
|
||||
# Extract Job ID from URL
|
||||
job_id_match = re.search(r'/(\d+)$', job_url)
|
||||
job_id = job_id_match.group(1) if job_id_match else None
|
||||
|
||||
status_element = row.find_element(By.XPATH, SELECTORS["job_row_status"])
|
||||
job_status = status_element.text.strip()
|
||||
|
||||
date_element = row.find_element(By.XPATH, SELECTORS["job_row_date"])
|
||||
job_date = date_element.text.strip()
|
||||
|
||||
type_element = row.find_element(By.XPATH, SELECTORS["job_row_shooting_type"])
|
||||
shooting_type = type_element.text.strip()
|
||||
|
||||
jobs.append({
|
||||
"id": job_id,
|
||||
"name": job_name,
|
||||
"url": job_url,
|
||||
"status": job_status,
|
||||
"date": job_date,
|
||||
"shooting_type": shooting_type,
|
||||
})
|
||||
except NoSuchElementException as e:
|
||||
print(f"Warnung: Konnte nicht alle Elemente in einer Auftragszeile finden. Fehler: {e}")
|
||||
except Exception as e:
|
||||
print(f"Ein unerwarteter Fehler beim Parsen einer Auftragszeile: {e}")
|
||||
|
||||
except TimeoutException:
|
||||
print("Timeout: Keine Auftrags-Tabelle oder -Zeilen auf dem Dashboard gefunden.")
|
||||
except Exception as e:
|
||||
print(f"Ein Fehler ist aufgetreten beim Abrufen der Aufträge vom Dashboard: {e}")
|
||||
|
||||
return jobs
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
return {"status": "ok"}
|
||||
|
||||
@app.get("/api/jobs", response_model=List[Dict[str, Any]])
|
||||
async def get_jobs(account_type: str):
|
||||
username_env_var = f"{account_type.upper()}_USER"
|
||||
password_env_var = f"{account_type.upper()}_PW"
|
||||
|
||||
username = os.getenv(username_env_var)
|
||||
password = os.getenv(password_env_var)
|
||||
|
||||
if not username or not password:
|
||||
raise HTTPException(status_code=400, detail=f"Credentials for {account_type} not found. Please set {username_env_var} and {password_env_var} in your .env file.")
|
||||
|
||||
driver = None
|
||||
try:
|
||||
driver = setup_driver()
|
||||
if not driver:
|
||||
raise HTTPException(status_code=500, detail="Failed to initialize WebDriver.")
|
||||
|
||||
if not login(driver, username, password):
|
||||
raise HTTPException(status_code=401, detail="Login failed. Please check credentials.")
|
||||
|
||||
jobs = get_jobs_from_dashboard(driver)
|
||||
if not jobs:
|
||||
print("Keine Aufträge gefunden oder Fehler beim Abrufen vom Dashboard.")
|
||||
# Depending on desired behavior, might raise HTTPException or return empty list
|
||||
# For now, returning empty list if no jobs found but login was successful.
|
||||
|
||||
return jobs
|
||||
|
||||
except HTTPException as e:
|
||||
raise e # Re-raise HTTP exceptions
|
||||
except Exception as e:
|
||||
print(f"Ein unerwarteter Serverfehler ist aufgetreten: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
||||
finally:
|
||||
if driver:
|
||||
print("Schließe WebDriver.")
|
||||
driver.quit()
|
||||
|
||||
# Integrate other scraper functions (process_reminder_mode, process_statistics_mode) as new API endpoints later
|
||||
Reference in New Issue
Block a user