[32788f42] feat: implement database persistence, modernized UI with Tailwind, and Calendly-integrated QR card generator for Fotograf.de scraper
This commit is contained in:
@@ -10,10 +10,14 @@ import tempfile
|
||||
import shutil
|
||||
import time
|
||||
from dotenv import load_dotenv
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi import FastAPI, HTTPException, Depends, BackgroundTasks
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import FileResponse
|
||||
from typing import List, Dict, Any, Optional
|
||||
from sqlalchemy.orm import Session
|
||||
from database import get_db, Job as DBJob, engine, Base
|
||||
import math
|
||||
import uuid
|
||||
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
@@ -35,6 +39,9 @@ logger = logging.getLogger("fotograf-scraper")
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Ensure DB is created
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
app = FastAPI(title="Fotograf.de Scraper & ERP API")
|
||||
|
||||
# Configure CORS
|
||||
@@ -61,6 +68,14 @@ SELECTORS = {
|
||||
"job_row_shooting_type": ".//td[count(//th[contains(., 'Typ')]/preceding-sibling::th) + 1]",
|
||||
"export_dropdown": "[data-qa-id='dropdown:export']",
|
||||
"export_csv_link": "button[data-qa-id='button:csv']",
|
||||
# --- Statistics Selectors ---
|
||||
"album_overview_rows": "//table/tbody/tr",
|
||||
"album_overview_link": ".//td[2]//a",
|
||||
"access_code_count": "//span[text()='Zugangscodes']/following-sibling::strong",
|
||||
"person_rows": "//div[contains(@class, 'border-legacy-silver-550') and .//span[text()='Logins']]",
|
||||
"person_all_photos": ".//div[@data-key]",
|
||||
"person_purchased_photos": ".//div[@data-key and .//img[@alt='Bestellungen mit diesem Foto']]",
|
||||
"person_access_card_photo": ".//div[@data-key and contains(@class, 'opacity-50')]",
|
||||
}
|
||||
|
||||
# --- PDF Generation Logic ---
|
||||
@@ -278,15 +293,214 @@ def get_jobs_list(driver) -> List[Dict[str, Any]]:
|
||||
|
||||
return jobs
|
||||
|
||||
# --- Background Task Engine ---
|
||||
|
||||
task_store: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
def process_statistics(task_id: str, job_id: str, account_type: str):
|
||||
logger.info(f"Task {task_id}: Starting statistics calculation for job {job_id}")
|
||||
task_store[task_id] = {"status": "running", "progress": "Initialisiere Browser...", "result": None}
|
||||
|
||||
username = os.getenv(f"{account_type.upper()}_USER")
|
||||
password = os.getenv(f"{account_type.upper()}_PW")
|
||||
driver = None
|
||||
|
||||
try:
|
||||
driver = setup_driver()
|
||||
if not driver or not login(driver, username, password):
|
||||
task_store[task_id] = {"status": "error", "progress": "Login fehlgeschlagen. Überprüfe die Zugangsdaten."}
|
||||
return
|
||||
|
||||
task_store[task_id]["progress"] = f"Lade Alben-Übersicht für Auftrag..."
|
||||
|
||||
albums_overview_url = f"https://app.fotograf.de/config_jobs_photos/index/{job_id}"
|
||||
logger.info(f"Navigating to albums: {albums_overview_url}")
|
||||
driver.get(albums_overview_url)
|
||||
wait = WebDriverWait(driver, 15)
|
||||
|
||||
albums_to_visit = []
|
||||
try:
|
||||
album_rows = wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["album_overview_rows"])))
|
||||
for row in album_rows:
|
||||
try:
|
||||
album_link = row.find_element(By.XPATH, SELECTORS["album_overview_link"])
|
||||
albums_to_visit.append({"name": album_link.text, "url": album_link.get_attribute('href')})
|
||||
except NoSuchElementException:
|
||||
continue
|
||||
except TimeoutException:
|
||||
task_store[task_id] = {"status": "error", "progress": "Konnte die Album-Liste nicht finden."}
|
||||
return
|
||||
|
||||
total_albums = len(albums_to_visit)
|
||||
task_store[task_id]["progress"] = f"{total_albums} Alben gefunden. Starte Auswertung..."
|
||||
|
||||
statistics = []
|
||||
|
||||
for index, album in enumerate(albums_to_visit):
|
||||
album_name = album['name']
|
||||
task_store[task_id]["progress"] = f"Bearbeite Album {index + 1}/{total_albums}: '{album_name}'..."
|
||||
driver.get(album['url'])
|
||||
|
||||
try:
|
||||
total_codes_text = wait.until(EC.visibility_of_element_located((By.XPATH, SELECTORS["access_code_count"]))).text
|
||||
num_pages = math.ceil(int(total_codes_text) / 20)
|
||||
|
||||
total_children_in_album = 0
|
||||
children_with_purchase = 0
|
||||
children_with_all_purchased = 0
|
||||
|
||||
for page_num in range(1, num_pages + 1):
|
||||
task_store[task_id]["progress"] = f"Bearbeite Album {index + 1}/{total_albums}: '{album_name}' (Seite {page_num}/{num_pages})..."
|
||||
|
||||
if page_num > 1:
|
||||
driver.get(album['url'] + f"?page_guest_accesses={page_num}")
|
||||
|
||||
person_rows = wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["person_rows"])))
|
||||
|
||||
for person_row in person_rows:
|
||||
total_children_in_album += 1
|
||||
try:
|
||||
photo_container = person_row.find_element(By.XPATH, "./following-sibling::div[1]")
|
||||
|
||||
num_total_photos = len(photo_container.find_elements(By.XPATH, SELECTORS["person_all_photos"]))
|
||||
num_purchased_photos = len(photo_container.find_elements(By.XPATH, SELECTORS["person_purchased_photos"]))
|
||||
num_access_cards = len(photo_container.find_elements(By.XPATH, SELECTORS["person_access_card_photo"]))
|
||||
|
||||
buyable_photos = num_total_photos - num_access_cards
|
||||
|
||||
if num_purchased_photos > 0:
|
||||
children_with_purchase += 1
|
||||
|
||||
if buyable_photos > 0 and buyable_photos == num_purchased_photos:
|
||||
children_with_all_purchased += 1
|
||||
except NoSuchElementException:
|
||||
continue
|
||||
|
||||
statistics.append({
|
||||
"Album": album_name,
|
||||
"Kinder_insgesamt": total_children_in_album,
|
||||
"Kinder_mit_Käufen": children_with_purchase,
|
||||
"Kinder_Alle_Bilder_gekauft": children_with_all_purchased
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei Auswertung von Album '{album_name}': {e}")
|
||||
continue
|
||||
|
||||
task_store[task_id] = {
|
||||
"status": "completed",
|
||||
"progress": "Auswertung erfolgreich abgeschlossen!",
|
||||
"result": statistics
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"Unexpected error in task {task_id}")
|
||||
task_store[task_id] = {"status": "error", "progress": f"Unerwarteter Fehler: {str(e)}"}
|
||||
finally:
|
||||
if driver:
|
||||
logger.debug(f"Task {task_id}: Closing driver.")
|
||||
driver.quit()
|
||||
|
||||
from fastapi import FastAPI, HTTPException, Depends, BackgroundTasks, UploadFile, File, Form
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import FileResponse, JSONResponse
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
from database import get_db, Job as DBJob, engine, Base
|
||||
import math
|
||||
import uuid
|
||||
from qr_generator import get_calendly_events, overlay_text_on_pdf
|
||||
|
||||
# --- API Endpoints ---
|
||||
|
||||
@app.get("/api/calendly/events")
|
||||
async def fetch_calendly_events(start_time: str, end_time: str, event_type_name: Optional[str] = None):
|
||||
"""
|
||||
Debug endpoint to fetch and inspect raw Calendly data.
|
||||
"""
|
||||
api_token = os.getenv("CALENDLY_TOKEN")
|
||||
if not api_token:
|
||||
raise HTTPException(status_code=400, detail="Calendly API token missing.")
|
||||
|
||||
try:
|
||||
from qr_generator import get_calendly_events_raw
|
||||
raw_data = get_calendly_events_raw(api_token, start_time, end_time, event_type_name)
|
||||
return {"count": len(raw_data), "events": raw_data}
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching Calendly events: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.post("/api/qr-cards/generate")
|
||||
async def generate_qr_cards(
|
||||
start_time: str = Form(...),
|
||||
end_time: str = Form(...),
|
||||
event_type_name: str = Form(None),
|
||||
pdf_file: UploadFile = File(...)
|
||||
):
|
||||
logger.info(f"API Request: Generate QR cards from {start_time} to {end_time} for event type '{event_type_name}'")
|
||||
api_token = os.getenv("CALENDLY_TOKEN")
|
||||
if not api_token:
|
||||
raise HTTPException(status_code=400, detail="Calendly API token missing.")
|
||||
|
||||
try:
|
||||
# Save uploaded PDF temporarily
|
||||
temp_dir = tempfile.gettempdir()
|
||||
base_pdf_path = os.path.join(temp_dir, f"upload_{uuid.uuid4()}.pdf")
|
||||
with open(base_pdf_path, "wb") as buffer:
|
||||
shutil.copyfileobj(pdf_file.file, buffer)
|
||||
|
||||
# 1. Fetch formatted data from Calendly
|
||||
texts = get_calendly_events(api_token, start_time, end_time, event_type_name)
|
||||
if not texts:
|
||||
os.remove(base_pdf_path)
|
||||
return JSONResponse(status_code=404, content={"message": "Keine passenden Termine gefunden."})
|
||||
|
||||
# 2. Overlay text on blank PDF
|
||||
output_name = f"QR_Karten_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf"
|
||||
output_path = os.path.join(temp_dir, output_name)
|
||||
|
||||
overlay_text_on_pdf(base_pdf_path, output_path, texts)
|
||||
|
||||
# Cleanup uploaded file
|
||||
os.remove(base_pdf_path)
|
||||
|
||||
return FileResponse(path=output_path, filename=output_name, media_type="application/pdf")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating QR cards: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
return {"status": "ok"}
|
||||
|
||||
@app.get("/api/jobs", response_model=List[Dict[str, Any]])
|
||||
async def get_jobs(account_type: str):
|
||||
logger.info(f"API Request: GET /api/jobs for {account_type}")
|
||||
async def get_jobs(account_type: str, force_refresh: bool = False, db: Session = Depends(get_db)):
|
||||
logger.info(f"API Request: GET /api/jobs for {account_type} (force_refresh={force_refresh})")
|
||||
|
||||
# 1. Check database first if not forcing a refresh
|
||||
if not force_refresh:
|
||||
cached_jobs = db.query(DBJob).filter(DBJob.account_type == account_type).all()
|
||||
if cached_jobs:
|
||||
logger.info(f"Returning {len(cached_jobs)} cached jobs for {account_type}")
|
||||
return [
|
||||
{
|
||||
"id": job.id,
|
||||
"name": job.name,
|
||||
"url": job.url,
|
||||
"status": job.status,
|
||||
"date": job.date,
|
||||
"shooting_type": job.shooting_type,
|
||||
"last_updated": job.last_updated.isoformat() if job.last_updated else None
|
||||
}
|
||||
for job in cached_jobs
|
||||
]
|
||||
else:
|
||||
logger.info(f"No cached jobs found for {account_type}. Initiating scrape...")
|
||||
|
||||
# 2. Scrape from fotograf.de if forcing refresh or no cached jobs
|
||||
username = os.getenv(f"{account_type.upper()}_USER")
|
||||
password = os.getenv(f"{account_type.upper()}_PW")
|
||||
if not username or not password:
|
||||
@@ -298,12 +512,61 @@ async def get_jobs(account_type: str):
|
||||
driver = setup_driver()
|
||||
if not driver or not login(driver, username, password):
|
||||
raise HTTPException(status_code=401, detail="Login failed.")
|
||||
return get_jobs_list(driver)
|
||||
|
||||
scraped_jobs = get_jobs_list(driver)
|
||||
|
||||
# 3. Save to database
|
||||
if scraped_jobs:
|
||||
logger.info(f"Saving {len(scraped_jobs)} jobs to database for {account_type}...")
|
||||
# Clear old jobs for this account type
|
||||
db.query(DBJob).filter(DBJob.account_type == account_type).delete()
|
||||
|
||||
# Insert new jobs
|
||||
now = datetime.datetime.utcnow()
|
||||
for job_data in scraped_jobs:
|
||||
if job_data["id"]: # Ensure we have an ID
|
||||
new_job = DBJob(
|
||||
id=job_data["id"],
|
||||
name=job_data["name"],
|
||||
url=job_data["url"],
|
||||
status=job_data["status"],
|
||||
date=job_data["date"],
|
||||
shooting_type=job_data["shooting_type"],
|
||||
account_type=account_type,
|
||||
last_updated=now
|
||||
)
|
||||
db.add(new_job)
|
||||
|
||||
# Update dict for return value
|
||||
job_data["last_updated"] = now.isoformat()
|
||||
|
||||
db.commit()
|
||||
logger.info("Database updated successfully.")
|
||||
|
||||
return scraped_jobs
|
||||
except Exception as e:
|
||||
logger.error(f"Error during scraping or database save: {e}")
|
||||
db.rollback()
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
if driver:
|
||||
logger.debug("Closing driver.")
|
||||
driver.quit()
|
||||
|
||||
@app.get("/api/tasks/{task_id}")
|
||||
async def get_task_status(task_id: str):
|
||||
logger.debug(f"API Request: Check task status for {task_id}")
|
||||
if task_id not in task_store:
|
||||
raise HTTPException(status_code=404, detail="Task nicht gefunden.")
|
||||
return task_store[task_id]
|
||||
|
||||
@app.post("/api/jobs/{job_id}/statistics")
|
||||
async def start_statistics(job_id: str, account_type: str, background_tasks: BackgroundTasks):
|
||||
logger.info(f"API Request: Start statistics for job {job_id} ({account_type})")
|
||||
task_id = str(uuid.uuid4())
|
||||
background_tasks.add_task(process_statistics, task_id, job_id, account_type)
|
||||
return {"task_id": task_id}
|
||||
|
||||
@app.get("/api/jobs/{job_id}/generate-pdf")
|
||||
async def generate_pdf(job_id: str, account_type: str):
|
||||
logger.info(f"API Request: Generate PDF for job {job_id} ({account_type})")
|
||||
|
||||
Reference in New Issue
Block a user