[32788f42] Keine Zusammenfassung angegeben.

Keine Zusammenfassung angegeben.
This commit is contained in:
2026-04-08 08:21:54 +00:00
parent 5d28a34f02
commit 4baece46bb
4 changed files with 102 additions and 5 deletions

View File

@@ -1,6 +1,7 @@
import os
import logging
import datetime
from zoneinfo import ZoneInfo
import base64
import re
import pandas as pd
@@ -36,6 +37,34 @@ logging.basicConfig(
)
logger = logging.getLogger("fotograf-scraper")
# --- Global State for Last Generated File ---
# Simple and robust: persists as long as the container runs.
LATEST_FILE_STATE = {
"path": None,
"display_name": None,
"timestamp": None,
"type": None # 'pdf' or 'csv'
}
def update_latest_file(file_path: str, display_name: str, file_type: str):
try:
# Copy file to a stable location inside the container (/app/data is persistent)
# but for simplicity, /tmp is also fine for "just the last one"
stable_path = os.path.join("/tmp", f"latest_result_{file_type}.{file_type}")
shutil.copy2(file_path, stable_path)
now_berlin = datetime.datetime.now(ZoneInfo("Europe/Berlin"))
LATEST_FILE_STATE["path"] = stable_path
LATEST_FILE_STATE["display_name"] = display_name
LATEST_FILE_STATE["timestamp"] = now_berlin.strftime("%H:%M Uhr")
LATEST_FILE_STATE["type"] = file_type
logger.info(f"Updated latest file state: {display_name}")
except Exception as e:
logger.error(f"Failed to update latest file state: {e}")
def get_berlin_now_str():
return datetime.datetime.now(ZoneInfo("Europe/Berlin")).strftime("%d.%m.%Y %H:%M Uhr")
def format_job_date(date_str: str) -> str:
import re
import datetime
@@ -171,7 +200,7 @@ def generate_pdf_from_csv(csv_path: str, institution: str, date_info: str, list_
env = Environment(loader=FileSystemLoader(template_dir))
template = env.get_template("school_list.html")
current_time = datetime.datetime.now().strftime("%d.%m.%Y %H:%M Uhr")
current_time = get_berlin_now_str()
logo_base64 = get_logo_base64()
render_context = {
@@ -191,6 +220,7 @@ def generate_pdf_from_csv(csv_path: str, institution: str, date_info: str, list_
html_out = template.render(render_context)
logger.info(f"Writing PDF to: {output_path}")
HTML(string=html_out).write_pdf(output_path)
update_latest_file(output_path, f"Teilnehmerliste {institution}", "pdf")
def generate_appointment_overview_pdf(raw_events: list, job_name: str, event_type_name: str, output_path: str):
from collections import defaultdict
@@ -315,7 +345,7 @@ def generate_appointment_overview_pdf(raw_events: list, job_name: str, event_typ
env = Environment(loader=FileSystemLoader(template_dir))
template = env.get_template("appointment_list.html")
current_time = datetime.datetime.now().strftime("%d.%m.%Y %H:%M Uhr")
current_time = get_berlin_now_str()
logo_base64 = get_logo_base64()
render_context = {
@@ -328,6 +358,7 @@ def generate_appointment_overview_pdf(raw_events: list, job_name: str, event_typ
html_out = template.render(render_context)
HTML(string=html_out).write_pdf(output_path)
update_latest_file(output_path, f"Terminübersicht {job_name}", "pdf")
# --- Selenium Scraper Functions ---
@@ -790,6 +821,9 @@ async def generate_qr_cards(
# Cleanup uploaded file
os.remove(base_pdf_path)
# Update latest file tracking
update_latest_file(output_path, f"QR-Karten ({event_type_name or 'Calendly'})", "pdf")
return FileResponse(path=output_path, filename=output_name, media_type="application/pdf")
except Exception as e:
@@ -853,6 +887,29 @@ async def generate_appointment_list(job_id: str, event_type_name: str, db: Sessi
logger.error(f"Error generating appointment overview pdf: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/jobs/latest-file")
async def get_latest_file_info():
if not LATEST_FILE_STATE["path"] or not os.path.exists(LATEST_FILE_STATE["path"]):
return {"has_file": False}
return {
"has_file": True,
"display_name": LATEST_FILE_STATE["display_name"],
"timestamp": LATEST_FILE_STATE["timestamp"],
"type": LATEST_FILE_STATE["type"]
}
@app.get("/api/jobs/download-latest")
async def download_latest_file():
if not LATEST_FILE_STATE["path"] or not os.path.exists(LATEST_FILE_STATE["path"]):
raise HTTPException(status_code=404, detail="Keine Datei gefunden.")
filename = f"Letzte_Datei_{LATEST_FILE_STATE['type']}.{LATEST_FILE_STATE['type']}"
return FileResponse(
path=LATEST_FILE_STATE["path"],
filename=filename,
media_type="application/pdf" if LATEST_FILE_STATE["type"] == "pdf" else "text/csv"
)
@app.get("/health")
async def health_check():
return {"status": "ok"}
@@ -968,7 +1025,11 @@ async def download_task_csv(task_id: str):
df = pd.DataFrame(result)
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
df.to_csv(temp_file.name, index=False, encoding='utf-8-sig')
return FileResponse(path=temp_file.name, filename=f"Supermailer_Liste_{task_id[:8]}.csv", media_type="text/csv")
filename = f"Supermailer_Liste_{task_id[:8]}.csv"
update_latest_file(temp_file.name, "Supermailer Liste", "csv")
return FileResponse(path=temp_file.name, filename=filename, media_type="text/csv")
except Exception as e:
logger.error(f"Export error: {e}")
raise HTTPException(status_code=500, detail="CSV Export fehlgeschlagen.")
@@ -1054,7 +1115,7 @@ async def generate_pdf(job_id: str, account_type: str, db: Session = Depends(get
if job_record and job_record.date:
final_date_info = format_job_date(job_record.date)
else:
final_date_info = datetime.datetime.now().strftime("%d.%m.%Y")
final_date_info = datetime.datetime.now(ZoneInfo("Europe/Berlin")).strftime("%d.%m.%Y")
generate_pdf_from_csv(
csv_path=csv_file,