feat(scraper): implement PDF list generation from registrations export [32788f42]
This commit is contained in:
@@ -1,28 +1,35 @@
|
|||||||
import os
|
import os
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from fastapi import FastAPI, HTTPException, BackgroundTasks
|
from fastapi import FastAPI, HTTPException
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
from fastapi.responses import FileResponse
|
||||||
from typing import List, Dict, Any, Optional
|
from typing import List, Dict, Any, Optional
|
||||||
import time
|
import time
|
||||||
from datetime import datetime
|
import datetime
|
||||||
|
import base64
|
||||||
|
import re
|
||||||
|
import pandas as pd
|
||||||
|
from jinja2 import Environment, FileSystemLoader
|
||||||
|
from weasyprint import HTML
|
||||||
|
import tempfile
|
||||||
|
import shutil
|
||||||
|
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
from selenium.webdriver.chrome.options import Options
|
from selenium.webdriver.chrome.options import Options
|
||||||
from selenium.webdriver.common.by import By
|
from selenium.webdriver.common.by import By
|
||||||
from selenium.webdriver.support.ui import WebDriverWait
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
from selenium.webdriver.support import expected_conditions as EC
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
from selenium.common.exceptions import TimeoutException, NoSuchElementException, StaleElementReferenceException, InvalidArgumentException
|
from selenium.common.exceptions import TimeoutException, NoSuchElementException
|
||||||
import re
|
|
||||||
import asyncio
|
|
||||||
|
|
||||||
# Load environment variables
|
# Load environment variables
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
app = FastAPI(title="Fotograf.de Scraper API")
|
app = FastAPI(title="Fotograf.de Scraper & ERP API")
|
||||||
|
|
||||||
# Configure CORS
|
# Configure CORS
|
||||||
app.add_middleware(
|
app.add_middleware(
|
||||||
CORSMiddleware,
|
CORSMiddleware,
|
||||||
allow_origins=["*"], # Adjust this to your frontend origin in production
|
allow_origins=["*"],
|
||||||
allow_credentials=True,
|
allow_credentials=True,
|
||||||
allow_methods=["*"],
|
allow_methods=["*"],
|
||||||
allow_headers=["*"],
|
allow_headers=["*"],
|
||||||
@@ -31,7 +38,6 @@ app.add_middleware(
|
|||||||
# --- Configuration & Constants ---
|
# --- Configuration & Constants ---
|
||||||
LOGIN_URL = 'https://app.fotograf.de/login/login'
|
LOGIN_URL = 'https://app.fotograf.de/login/login'
|
||||||
|
|
||||||
# --- Selectors from original scraper, expanded for dashboard jobs ---
|
|
||||||
SELECTORS = {
|
SELECTORS = {
|
||||||
"cookie_accept_button": "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll",
|
"cookie_accept_button": "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll",
|
||||||
"login_user": "#login-email",
|
"login_user": "#login-email",
|
||||||
@@ -39,103 +45,171 @@ SELECTORS = {
|
|||||||
"login_button": "#login-submit",
|
"login_button": "#login-submit",
|
||||||
"dashboard_jobs_table_rows": "//tr[.//a[contains(@data-qa-id, 'link:photo-jobs-name-')]]",
|
"dashboard_jobs_table_rows": "//tr[.//a[contains(@data-qa-id, 'link:photo-jobs-name-')]]",
|
||||||
"job_row_name_link": ".//a[contains(@data-qa-id, 'link:photo-jobs-name-')]",
|
"job_row_name_link": ".//a[contains(@data-qa-id, 'link:photo-jobs-name-')]",
|
||||||
"job_row_status": ".//td[count(//th[contains(., 'Status')]/preceding-sibling::th) + 1]", # Try to find by column header 'Status'
|
"job_row_status": ".//td[count(//th[contains(., 'Status')]/preceding-sibling::th) + 1]",
|
||||||
"job_row_date": ".//td[count(//th[contains(., 'Datum')]/preceding-sibling::th) + 1]", # Try to find by column header 'Datum'
|
"job_row_date": ".//td[count(//th[contains(., 'Datum')]/preceding-sibling::th) + 1]",
|
||||||
"job_row_shooting_type": ".//td[count(//th[contains(., 'Typ')]/preceding-sibling::th) + 1]", # Try to find by column header 'Typ'
|
"job_row_shooting_type": ".//td[count(//th[contains(., 'Typ')]/preceding-sibling::th) + 1]",
|
||||||
|
"export_dropdown": "[data-qa-id='dropdown:export']",
|
||||||
|
"export_csv_link": "//a[contains(text(), 'CSV') or contains(., 'CSV')]", # Common pattern for CSV export in dropdowns
|
||||||
}
|
}
|
||||||
|
|
||||||
# --- Utility functions from original scraper ---
|
# --- PDF Generation Logic (Reused from List-Generator) ---
|
||||||
# (setup_driver, login, etc. will be adapted or moved into this file)
|
|
||||||
|
|
||||||
def setup_driver():
|
def get_logo_base64():
|
||||||
|
logo_path = os.path.join(os.path.dirname(__file__), "assets", "logo.png")
|
||||||
|
try:
|
||||||
|
with open(logo_path, "rb") as image_file:
|
||||||
|
return base64.b64encode(image_file.read()).decode('utf-8')
|
||||||
|
except FileNotFoundError:
|
||||||
|
print(f"Warning: Logo file not found at {logo_path}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def generate_pdf_from_csv(csv_path: str, institution: str, date_info: str, list_type: str, output_path: str):
|
||||||
|
df = None
|
||||||
|
# Try different separators
|
||||||
|
for sep in [";", ","]:
|
||||||
|
try:
|
||||||
|
test_df = pd.read_csv(csv_path, sep=sep, encoding="utf-8-sig", nrows=5)
|
||||||
|
if len(test_df.columns) > 1:
|
||||||
|
df = pd.read_csv(csv_path, sep=sep, encoding="utf-8-sig")
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
if df is None:
|
||||||
|
try:
|
||||||
|
df = pd.read_csv(csv_path, sep=";", encoding="latin1")
|
||||||
|
except:
|
||||||
|
raise Exception("CSV konnte nicht gelesen werden.")
|
||||||
|
|
||||||
|
df.columns = df.columns.str.strip().str.replace("\"", "")
|
||||||
|
|
||||||
|
group_label = "Gruppe" if list_type == 'k' else "Klasse"
|
||||||
|
person_label_plural = "Kinder" if list_type == 'k' else "Schüler"
|
||||||
|
|
||||||
|
col_mapping = {}
|
||||||
|
for col in df.columns:
|
||||||
|
lower_col = col.lower().strip()
|
||||||
|
if lower_col in ["vorname kind", "vorname", "first name"]:
|
||||||
|
col_mapping[col] = "Vorname"
|
||||||
|
elif lower_col in ["nachname kind", "nachname", "last name"]:
|
||||||
|
col_mapping[col] = "Nachname"
|
||||||
|
elif lower_col in ["gruppe", "klasse", "group", "class"]:
|
||||||
|
col_mapping[col] = group_label
|
||||||
|
|
||||||
|
df = df.rename(columns=col_mapping)
|
||||||
|
df = df.fillna("")
|
||||||
|
|
||||||
|
for col in ["Vorname", "Nachname", group_label]:
|
||||||
|
if col not in df.columns:
|
||||||
|
df[col] = "Alle" if col == group_label else ""
|
||||||
|
|
||||||
|
df = df.sort_values(by=[group_label, "Nachname", "Vorname"])
|
||||||
|
grouped = df.groupby(group_label)
|
||||||
|
|
||||||
|
class_data = []
|
||||||
|
for class_name, group in grouped:
|
||||||
|
class_data.append({"name": class_name, "students": group.to_dict("records")})
|
||||||
|
|
||||||
|
class_counts = [{"name": c, "count": len(g)} for c, g in grouped]
|
||||||
|
total_students = len(df)
|
||||||
|
|
||||||
|
template_dir = os.path.join(os.path.dirname(__file__), "templates")
|
||||||
|
env = Environment(loader=FileSystemLoader(template_dir))
|
||||||
|
template = env.get_template("school_list.html")
|
||||||
|
|
||||||
|
current_time = datetime.datetime.now().strftime("%d.%m.%Y %H:%M Uhr")
|
||||||
|
logo_base64 = get_logo_base64()
|
||||||
|
|
||||||
|
render_context = {
|
||||||
|
"institution": institution,
|
||||||
|
"date_info": date_info,
|
||||||
|
"class_counts": class_counts,
|
||||||
|
"total_students": total_students,
|
||||||
|
"class_data": class_data,
|
||||||
|
"current_time": current_time,
|
||||||
|
"logo_base64": logo_base64,
|
||||||
|
"group_label": group_label,
|
||||||
|
"person_label_plural": person_label_plural,
|
||||||
|
"group_column_name": group_label
|
||||||
|
}
|
||||||
|
|
||||||
|
html_out = template.render(render_context)
|
||||||
|
HTML(string=html_out).write_pdf(output_path)
|
||||||
|
|
||||||
|
# --- Selenium Scraper Functions ---
|
||||||
|
|
||||||
|
def setup_driver(download_path: str = None):
|
||||||
print("Initialisiere Chrome WebDriver...")
|
print("Initialisiere Chrome WebDriver...")
|
||||||
options = Options()
|
options = Options()
|
||||||
options.add_argument('--headless')
|
options.add_argument('--headless')
|
||||||
options.add_argument('--no-sandbox')
|
options.add_argument('--no-sandbox')
|
||||||
options.add_argument('--disable-dev-shm-usage')
|
options.add_argument('--disable-dev-shm-usage')
|
||||||
options.add_argument('--window-size=1920,1200')
|
options.add_argument('--window-size=1920,1200')
|
||||||
options.binary_location = '/usr/bin/google-chrome' # Path to Chrome in Docker
|
options.binary_location = '/usr/bin/chromium'
|
||||||
|
|
||||||
|
if download_path:
|
||||||
|
prefs = {
|
||||||
|
"download.default_directory": download_path,
|
||||||
|
"download.prompt_for_download": False,
|
||||||
|
"download.directory_upgrade": True,
|
||||||
|
"safebrowsing.enabled": True
|
||||||
|
}
|
||||||
|
options.add_experimental_option("prefs", prefs)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
driver = webdriver.Chrome(options=options)
|
driver = webdriver.Chrome(options=options)
|
||||||
|
|
||||||
|
if download_path:
|
||||||
|
# Crucial for headless mode: Allow downloads
|
||||||
|
driver.execute_cdp_cmd('Page.setDownloadBehavior', {
|
||||||
|
'behavior': 'allow',
|
||||||
|
'downloadPath': download_path
|
||||||
|
})
|
||||||
|
|
||||||
return driver
|
return driver
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Fehler bei der Initialisierung des WebDrivers: {e}")
|
print(f"Fehler bei der Initialisierung des WebDrivers: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def take_error_screenshot(driver, error_name):
|
|
||||||
# Ensure the errors directory exists
|
|
||||||
errors_dir = os.path.join(os.path.dirname(__file__), 'errors')
|
|
||||||
os.makedirs(errors_dir, exist_ok=True)
|
|
||||||
|
|
||||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
||||||
filename = f"error_{error_name}_{timestamp}.png"
|
|
||||||
filepath = os.path.join(errors_dir, filename)
|
|
||||||
try:
|
|
||||||
driver.save_screenshot(filepath)
|
|
||||||
print(f"!!! Fehler aufgetreten. Screenshot gespeichert unter: {filepath}")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"!!! Konnte keinen Screenshot speichern: {e}")
|
|
||||||
|
|
||||||
def login(driver, username, password):
|
def login(driver, username, password):
|
||||||
print("Starte Login-Vorgang...")
|
print("Starte Login-Vorgang...")
|
||||||
try:
|
try:
|
||||||
driver.get(LOGIN_URL)
|
driver.get(LOGIN_URL)
|
||||||
wait = WebDriverWait(driver, 45) # Generous timeout for the entire process
|
wait = WebDriverWait(driver, 45)
|
||||||
try:
|
try:
|
||||||
print("Suche nach Cookie-Banner...")
|
|
||||||
cookie_wait = WebDriverWait(driver, 5)
|
cookie_wait = WebDriverWait(driver, 5)
|
||||||
cookie_wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, SELECTORS["cookie_accept_button"]))).click()
|
cookie_wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, SELECTORS["cookie_accept_button"]))).click()
|
||||||
print("Cookie-Banner akzeptiert.")
|
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
except TimeoutException:
|
except:
|
||||||
print("Kein Cookie-Banner gefunden, fahre fort.")
|
pass
|
||||||
|
|
||||||
print("Fülle Anmeldeformular aus...")
|
|
||||||
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, SELECTORS["login_user"]))).send_keys(username)
|
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, SELECTORS["login_user"]))).send_keys(username)
|
||||||
driver.find_element(By.CSS_SELECTOR, SELECTORS["login_pass"]).send_keys(password)
|
driver.find_element(By.CSS_SELECTOR, SELECTORS["login_pass"]).send_keys(password)
|
||||||
print("Klicke auf Login...")
|
|
||||||
driver.find_element(By.CSS_SELECTOR, SELECTORS["login_button"]).click()
|
driver.find_element(By.CSS_SELECTOR, SELECTORS["login_button"]).click()
|
||||||
|
|
||||||
print("Warte auf Bestätigung des Logins durch URL-Wechsel...")
|
|
||||||
# This is a faster and more reliable check for a successful login redirect
|
|
||||||
wait.until(EC.url_contains('/config_dashboard/index'))
|
wait.until(EC.url_contains('/config_dashboard/index'))
|
||||||
|
|
||||||
print("Login erfolgreich! Session ist aktiv.")
|
|
||||||
return True
|
return True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Login fehlgeschlagen. Grund: {e}")
|
print(f"Login fehlgeschlagen: {e}")
|
||||||
take_error_screenshot(driver, "login_error")
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# --- New function to get jobs from the specific jobs list page ---
|
|
||||||
def get_jobs_list(driver) -> List[Dict[str, Any]]:
|
def get_jobs_list(driver) -> List[Dict[str, Any]]:
|
||||||
print("Navigiere direkt zur Auftragsliste, um Aufträge abzurufen...")
|
|
||||||
jobs_list_url = "https://app.fotograf.de/config_jobs/index"
|
jobs_list_url = "https://app.fotograf.de/config_jobs/index"
|
||||||
driver.get(jobs_list_url)
|
driver.get(jobs_list_url)
|
||||||
wait = WebDriverWait(driver, 45) # Use the generous timeout here
|
wait = WebDriverWait(driver, 45)
|
||||||
|
|
||||||
jobs = []
|
jobs = []
|
||||||
try:
|
try:
|
||||||
# Wait for the table rows to be present
|
|
||||||
job_rows = wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["dashboard_jobs_table_rows"])))
|
job_rows = wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["dashboard_jobs_table_rows"])))
|
||||||
print(f"[{len(job_rows)}] Auftragszeilen auf der Auftragsseite gefunden.")
|
|
||||||
|
|
||||||
for row in job_rows:
|
for row in job_rows:
|
||||||
try:
|
try:
|
||||||
name_element = row.find_element(By.XPATH, SELECTORS["job_row_name_link"])
|
name_element = row.find_element(By.XPATH, SELECTORS["job_row_name_link"])
|
||||||
job_name = name_element.text.strip()
|
job_name = name_element.text.strip()
|
||||||
job_url = name_element.get_attribute('href')
|
job_url = name_element.get_attribute('href')
|
||||||
|
|
||||||
# Extract Job ID from URL
|
|
||||||
job_id_match = re.search(r'/(\d+)$', job_url)
|
job_id_match = re.search(r'/(\d+)$', job_url)
|
||||||
job_id = job_id_match.group(1) if job_id_match else None
|
job_id = job_id_match.group(1) if job_id_match else None
|
||||||
|
|
||||||
status_element = row.find_element(By.XPATH, SELECTORS["job_row_status"])
|
status_element = row.find_element(By.XPATH, SELECTORS["job_row_status"])
|
||||||
job_status = status_element.text.strip()
|
job_status = status_element.text.strip()
|
||||||
|
|
||||||
date_element = row.find_element(By.XPATH, SELECTORS["job_row_date"])
|
date_element = row.find_element(By.XPATH, SELECTORS["job_row_date"])
|
||||||
job_date = date_element.text.strip()
|
job_date = date_element.text.strip()
|
||||||
|
|
||||||
type_element = row.find_element(By.XPATH, SELECTORS["job_row_shooting_type"])
|
type_element = row.find_element(By.XPATH, SELECTORS["job_row_shooting_type"])
|
||||||
shooting_type = type_element.text.strip()
|
shooting_type = type_element.text.strip()
|
||||||
|
|
||||||
@@ -147,64 +221,102 @@ def get_jobs_list(driver) -> List[Dict[str, Any]]:
|
|||||||
"date": job_date,
|
"date": job_date,
|
||||||
"shooting_type": shooting_type,
|
"shooting_type": shooting_type,
|
||||||
})
|
})
|
||||||
except NoSuchElementException as e:
|
except:
|
||||||
print(f"Warnung: Konnte nicht alle Elemente in einer Auftragszeile finden. Fehler: {e}")
|
continue
|
||||||
except Exception as e:
|
except:
|
||||||
print(f"Ein unerwarteter Fehler beim Parsen einer Auftragszeile: {e}")
|
pass
|
||||||
|
|
||||||
except TimeoutException:
|
|
||||||
print("Timeout: Keine Auftrags-Tabelle oder -Zeilen auf der Auftragsseite gefunden.")
|
|
||||||
take_error_screenshot(driver, "get_jobs_list_error")
|
|
||||||
# Save the HTML source for debugging selectors
|
|
||||||
errors_dir = os.path.join(os.path.dirname(__file__), 'errors')
|
|
||||||
os.makedirs(errors_dir, exist_ok=True)
|
|
||||||
with open(os.path.join(errors_dir, 'page_source.html'), 'w', encoding='utf-8') as f:
|
|
||||||
f.write(driver.page_source)
|
|
||||||
print("HTML-Quellcode der Seite wurde in 'errors/page_source.html' gespeichert.")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Ein Fehler ist aufgetreten beim Abrufen der Aufträge von der Auftragsseite: {e}")
|
|
||||||
take_error_screenshot(driver, "get_jobs_list_error")
|
|
||||||
|
|
||||||
return jobs
|
return jobs
|
||||||
|
|
||||||
|
# --- API Endpoints ---
|
||||||
|
|
||||||
@app.get("/health")
|
@app.get("/health")
|
||||||
async def health_check():
|
async def health_check():
|
||||||
return {"status": "ok"}
|
return {"status": "ok"}
|
||||||
|
|
||||||
@app.get("/api/jobs", response_model=List[Dict[str, Any]])
|
@app.get("/api/jobs", response_model=List[Dict[str, Any]])
|
||||||
async def get_jobs(account_type: str):
|
async def get_jobs(account_type: str):
|
||||||
username_env_var = f"{account_type.upper()}_USER"
|
username = os.getenv(f"{account_type.upper()}_USER")
|
||||||
password_env_var = f"{account_type.upper()}_PW"
|
password = os.getenv(f"{account_type.upper()}_PW")
|
||||||
|
|
||||||
username = os.getenv(username_env_var)
|
|
||||||
password = os.getenv(password_env_var)
|
|
||||||
|
|
||||||
if not username or not password:
|
if not username or not password:
|
||||||
raise HTTPException(status_code=400, detail=f"Credentials for {account_type} not found. Please set {username_env_var} and {password_env_var} in your .env file.")
|
raise HTTPException(status_code=400, detail="Credentials not found.")
|
||||||
|
|
||||||
driver = None
|
driver = None
|
||||||
try:
|
try:
|
||||||
driver = setup_driver()
|
driver = setup_driver()
|
||||||
if not driver:
|
if not driver or not login(driver, username, password):
|
||||||
raise HTTPException(status_code=500, detail="Failed to initialize WebDriver.")
|
raise HTTPException(status_code=401, detail="Login failed.")
|
||||||
|
return get_jobs_list(driver)
|
||||||
if not login(driver, username, password):
|
|
||||||
raise HTTPException(status_code=401, detail="Login failed. Please check credentials.")
|
|
||||||
|
|
||||||
jobs = get_jobs_list(driver) # Call the new function
|
|
||||||
if not jobs:
|
|
||||||
print("Keine Aufträge gefunden oder Fehler beim Abrufen.")
|
|
||||||
|
|
||||||
return jobs
|
|
||||||
|
|
||||||
except HTTPException as e:
|
|
||||||
raise e # Re-raise HTTP exceptions
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Ein unerwarteter Serverfehler ist aufgetreten: {e}")
|
|
||||||
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
|
||||||
finally:
|
finally:
|
||||||
if driver:
|
if driver: driver.quit()
|
||||||
print("Schließe WebDriver.")
|
|
||||||
driver.quit()
|
|
||||||
|
|
||||||
# Integrate other scraper functions (process_reminder_mode, process_statistics_mode) as new API endpoints later
|
@app.get("/api/jobs/{job_id}/generate-pdf")
|
||||||
|
async def generate_pdf(job_id: str, account_type: str):
|
||||||
|
username = os.getenv(f"{account_type.upper()}_USER")
|
||||||
|
password = os.getenv(f"{account_type.upper()}_PW")
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
driver = setup_driver(download_path=temp_dir)
|
||||||
|
try:
|
||||||
|
if not login(driver, username, password):
|
||||||
|
raise HTTPException(status_code=401, detail="Login failed.")
|
||||||
|
|
||||||
|
# 1. Navigate to registrations page
|
||||||
|
reg_url = f"https://app.fotograf.de/config_children/view_registrations/{job_id}"
|
||||||
|
print(f"Navigiere zu Registrierungen: {reg_url}")
|
||||||
|
driver.get(reg_url)
|
||||||
|
wait = WebDriverWait(driver, 30)
|
||||||
|
|
||||||
|
# Get Institution Name for PDF
|
||||||
|
try:
|
||||||
|
institution = driver.find_element(By.TAG_NAME, "h1").text.strip()
|
||||||
|
except:
|
||||||
|
institution = "Fotoauftrag"
|
||||||
|
|
||||||
|
# 2. Click Export and trigger CSV download
|
||||||
|
print("Trigger Export...")
|
||||||
|
export_btn = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, SELECTORS["export_dropdown"])))
|
||||||
|
export_btn.click()
|
||||||
|
time.sleep(1) # Wait for menu
|
||||||
|
|
||||||
|
csv_btn = wait.until(EC.element_to_be_clickable((By.XPATH, SELECTORS["export_csv_link"])))
|
||||||
|
csv_btn.click()
|
||||||
|
|
||||||
|
# 3. Wait for download to finish
|
||||||
|
print("Warte auf CSV Download...")
|
||||||
|
timeout = 30
|
||||||
|
start_time = time.time()
|
||||||
|
csv_file = None
|
||||||
|
while time.time() - start_time < timeout:
|
||||||
|
files = os.listdir(temp_dir)
|
||||||
|
csv_files = [f for f in files if f.endswith('.csv')]
|
||||||
|
if csv_files:
|
||||||
|
csv_file = os.path.join(temp_dir, csv_files[0])
|
||||||
|
break
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
if not csv_file:
|
||||||
|
raise HTTPException(status_code=500, detail="CSV Download fehlgeschlagen.")
|
||||||
|
|
||||||
|
# 4. Generate PDF
|
||||||
|
print(f"Generiere PDF aus {csv_file}...")
|
||||||
|
output_pdf_name = f"Listen_{job_id}.pdf"
|
||||||
|
output_pdf_path = os.path.join(temp_dir, output_pdf_name)
|
||||||
|
|
||||||
|
generate_pdf_from_csv(
|
||||||
|
csv_path=csv_file,
|
||||||
|
institution=institution,
|
||||||
|
date_info=datetime.datetime.now().strftime("%d.%m.%Y"),
|
||||||
|
list_type=account_type, # 'k' or 'schule'
|
||||||
|
output_path=output_pdf_path
|
||||||
|
)
|
||||||
|
|
||||||
|
# 5. Return PDF
|
||||||
|
final_storage = os.path.join("/tmp", output_pdf_name)
|
||||||
|
shutil.copy(output_pdf_path, final_storage)
|
||||||
|
return FileResponse(path=final_storage, filename=output_pdf_name, media_type="application/pdf")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Fehler bei PDF Generierung: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
finally:
|
||||||
|
if driver: driver.quit()
|
||||||
62
fotograf-de-scraper/backend/templates/school_list.html
Normal file
62
fotograf-de-scraper/backend/templates/school_list.html
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html><head><meta charset="utf-8"><style>
|
||||||
|
@page { size: A4 portrait; margin: 20mm; }
|
||||||
|
body { font-family: Arial, sans-serif; font-size: 11pt; }
|
||||||
|
.header { margin-bottom: 20px; }
|
||||||
|
.institution-name { font-weight: bold; font-size: 14pt; }
|
||||||
|
.date-info { font-size: 12pt; }
|
||||||
|
.summary { margin-top: 30px; }
|
||||||
|
.summary h2 { font-size: 12pt; font-weight: normal; margin-bottom: 10px; }
|
||||||
|
.summary-table { width: 100%; border-collapse: collapse; margin-bottom: 20px; }
|
||||||
|
.summary-table td { padding: 4px 0; }
|
||||||
|
.summary-total { margin-top: 10px; border-top: 1px solid black; padding-top: 10px; font-weight: bold; }
|
||||||
|
.class-section { page-break-before: always; }
|
||||||
|
.student-table { width: 100%; border-collapse: collapse; margin-top: 30px; }
|
||||||
|
.student-table th { text-align: left; border-bottom: 1px solid black; padding-bottom: 5px; font-weight: normal; }
|
||||||
|
.student-table td { padding: 5px 0; }
|
||||||
|
.class-summary { margin-top: 30px; font-weight: bold; }
|
||||||
|
.class-note { margin-top: 20px; font-size: 10pt; }
|
||||||
|
.footer { position: fixed; bottom: 0; left: 0; right: 0; display: flex; justify-content: space-between; font-size: 10pt; }
|
||||||
|
.footer-left { text-align: left; }
|
||||||
|
.footer-right { text-align: right; }
|
||||||
|
</style></head><body>
|
||||||
|
<div class="header" style="display: flex; justify-content: space-between; align-items: center;">
|
||||||
|
<div>
|
||||||
|
<div class="institution-name">{{ institution }}</div>
|
||||||
|
<div class="date-info">{{ date_info }}</div>
|
||||||
|
</div>
|
||||||
|
{% if logo_base64 %}
|
||||||
|
<div>
|
||||||
|
<img src="data:image/png;base64,{{ logo_base64 }}" alt="Logo" style="max-height: 60px;">
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
<div class="summary"><h2>Übersicht der Anmeldungen:</h2><table class="summary-table">
|
||||||
|
{% for count in class_counts %}
|
||||||
|
<tr><td style="width: 50%;">{{ group_label }} {{ count.name }}</td><td>{{ count.count }} Anmeldungen</td></tr>
|
||||||
|
{% endfor %}
|
||||||
|
</table><div class="summary-total">Gesamt: {{ total_students }} Anmeldungen</div></div>
|
||||||
|
{% for class_info in class_data %}
|
||||||
|
<div class="class-section">
|
||||||
|
<div class="header" style="display: flex; justify-content: space-between; align-items: center;">
|
||||||
|
<div>
|
||||||
|
<div class="institution-name">{{ institution }}</div>
|
||||||
|
<div class="date-info">{{ date_info }}</div>
|
||||||
|
</div>
|
||||||
|
{% if logo_base64 %}
|
||||||
|
<div>
|
||||||
|
<img src="data:image/png;base64,{{ logo_base64 }}" alt="Logo" style="max-height: 60px;">
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
<table class="student-table"><thead><tr><th style="width: 40%">Nachname</th><th style="width: 40%">Vorname</th><th style="width: 20%">{{ group_label }}</th></tr></thead><tbody>
|
||||||
|
{% for student in class_info.students %}
|
||||||
|
<tr><td>{{ student.Nachname }}</td><td>{{ student.Vorname }}</td><td>{{ student[group_column_name] }}</td></tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody></table>
|
||||||
|
<div class="class-summary">{{ class_info.students|length }} angemeldete {{ person_label_plural }}</div>
|
||||||
|
<div class="class-note">Dies ist die Liste der bereits angemeldeten {{ person_label_plural }}. Bitte die noch fehlenden<br>{{ person_label_plural }} an die Anmeldung erinnern.</div>
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
|
<div class="footer"><div class="footer-left">Stand {{ current_time }}</div><div class="footer-right">Kinderfotos Erding<br>Gartenstr. 10 85445 Oberding<br>www.kinderfotos-erding.de<br>08122-8470867</div></div>
|
||||||
|
</body></html>
|
||||||
@@ -14,12 +14,12 @@ type AccountType = 'kiga' | 'schule';
|
|||||||
|
|
||||||
function App() {
|
function App() {
|
||||||
const [activeTab, setActiveTab] = useState<AccountType>('kiga');
|
const [activeTab, setActiveTab] = useState<AccountType>('kiga');
|
||||||
// Cache to store loaded jobs so we don't reload when switching tabs
|
|
||||||
const [jobsCache, setJobsCache] = useState<Record<AccountType, Job[] | null>>({
|
const [jobsCache, setJobsCache] = useState<Record<AccountType, Job[] | null>>({
|
||||||
kiga: null,
|
kiga: null,
|
||||||
schule: null,
|
schule: null,
|
||||||
});
|
});
|
||||||
const [isLoading, setIsLoading] = useState(false);
|
const [isLoading, setIsLoading] = useState(false);
|
||||||
|
const [processingJobId, setProcessingJobId] = useState<string | null>(null);
|
||||||
const [error, setError] = useState<string | null>(null);
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
|
||||||
const API_BASE_URL = import.meta.env.VITE_API_BASE_URL || 'http://192.168.178.6:8002';
|
const API_BASE_URL = import.meta.env.VITE_API_BASE_URL || 'http://192.168.178.6:8002';
|
||||||
@@ -37,14 +37,37 @@ function App() {
|
|||||||
setJobsCache(prev => ({ ...prev, [account]: data }));
|
setJobsCache(prev => ({ ...prev, [account]: data }));
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
setError(err.message);
|
setError(err.message);
|
||||||
console.error("Failed to fetch jobs:", err);
|
|
||||||
} finally {
|
} finally {
|
||||||
setIsLoading(false);
|
setIsLoading(false);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const handleRefresh = () => {
|
const handleRefresh = () => fetchJobs(activeTab);
|
||||||
fetchJobs(activeTab);
|
|
||||||
|
const handleGeneratePdf = async (job: Job) => {
|
||||||
|
setProcessingJobId(job.id);
|
||||||
|
setError(null);
|
||||||
|
try {
|
||||||
|
const response = await fetch(`${API_BASE_URL}/api/jobs/${job.id}/generate-pdf?account_type=${activeTab}`);
|
||||||
|
if (!response.ok) {
|
||||||
|
const errData = await response.json();
|
||||||
|
throw new Error(errData.detail || 'PDF Generierung fehlgeschlagen');
|
||||||
|
}
|
||||||
|
|
||||||
|
const blob = await response.blob();
|
||||||
|
const url = window.URL.createObjectURL(blob);
|
||||||
|
const a = document.createElement("a");
|
||||||
|
a.href = url;
|
||||||
|
a.download = `Listen_${job.name.replace(/\s+/g, "_")}.pdf`;
|
||||||
|
document.body.appendChild(a);
|
||||||
|
a.click();
|
||||||
|
a.remove();
|
||||||
|
window.URL.revokeObjectURL(url);
|
||||||
|
} catch (err: any) {
|
||||||
|
setError(`PDF Fehler (${job.name}): ${err.message}`);
|
||||||
|
} finally {
|
||||||
|
setProcessingJobId(null);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const currentJobs = jobsCache[activeTab];
|
const currentJobs = jobsCache[activeTab];
|
||||||
@@ -56,76 +79,51 @@ function App() {
|
|||||||
|
|
||||||
{/* Tab Navigation */}
|
{/* Tab Navigation */}
|
||||||
<div className="flex border-b border-gray-200 mb-6">
|
<div className="flex border-b border-gray-200 mb-6">
|
||||||
|
{['kiga', 'schule'].map((type) => (
|
||||||
<button
|
<button
|
||||||
|
key={type}
|
||||||
className={`py-3 px-6 font-medium text-sm rounded-t-lg transition-colors duration-200 ${
|
className={`py-3 px-6 font-medium text-sm rounded-t-lg transition-colors duration-200 ${
|
||||||
activeTab === 'kiga'
|
activeTab === type
|
||||||
? 'bg-indigo-50 border-t-2 border-l-2 border-r-2 border-indigo-500 text-indigo-700'
|
? 'bg-indigo-50 border-t-2 border-l-2 border-r-2 border-indigo-500 text-indigo-700'
|
||||||
: 'border-transparent text-gray-500 hover:text-gray-700 hover:bg-gray-100'
|
: 'border-transparent text-gray-500 hover:text-gray-700 hover:bg-gray-100'
|
||||||
}`}
|
}`}
|
||||||
onClick={() => setActiveTab('kiga')}
|
onClick={() => setActiveTab(type as AccountType)}
|
||||||
>
|
>
|
||||||
📸 Kindergarten Fotografie
|
{type === 'kiga' ? '📸 Kindergarten Fotografie' : '🏫 Schul-Fotografie'}
|
||||||
</button>
|
|
||||||
<button
|
|
||||||
className={`py-3 px-6 font-medium text-sm rounded-t-lg transition-colors duration-200 ${
|
|
||||||
activeTab === 'schule'
|
|
||||||
? 'bg-indigo-50 border-t-2 border-l-2 border-r-2 border-indigo-500 text-indigo-700'
|
|
||||||
: 'border-transparent text-gray-500 hover:text-gray-700 hover:bg-gray-100'
|
|
||||||
}`}
|
|
||||||
onClick={() => setActiveTab('schule')}
|
|
||||||
>
|
|
||||||
🏫 Schul-Fotografie
|
|
||||||
</button>
|
</button>
|
||||||
|
))}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Status and Refresh Area */}
|
{/* Status and Refresh Area */}
|
||||||
<div className="mb-6 flex items-center justify-between bg-gray-50 p-4 rounded-md border border-gray-100">
|
<div className="mb-6 flex items-center justify-between bg-gray-50 p-4 rounded-md border border-gray-100">
|
||||||
<p className="text-sm text-gray-600 font-medium">
|
<p className="text-sm text-gray-600 font-medium">
|
||||||
{currentJobs === null
|
{currentJobs === null ? "Aufträge wurden noch nicht geladen." : `${currentJobs.length} Aufträge geladen.`}
|
||||||
? "Aufträge wurden noch nicht geladen."
|
|
||||||
: `${currentJobs.length} Aufträge geladen.`}
|
|
||||||
</p>
|
</p>
|
||||||
<button
|
<button
|
||||||
onClick={handleRefresh}
|
onClick={handleRefresh}
|
||||||
disabled={isLoading}
|
disabled={isLoading}
|
||||||
className="inline-flex items-center px-4 py-2 border border-transparent text-sm font-medium rounded-md shadow-sm text-white bg-indigo-600 hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500 disabled:opacity-50 transition-colors"
|
className="inline-flex items-center px-4 py-2 border border-transparent text-sm font-medium rounded-md shadow-sm text-white bg-indigo-600 hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500 disabled:opacity-50 transition-colors"
|
||||||
>
|
>
|
||||||
{isLoading ? (
|
{isLoading ? 'Selenium läuft (ca. 45s)...' : (currentJobs === null ? 'Liste initial abrufen' : 'Liste aktualisieren')}
|
||||||
<>
|
|
||||||
<svg className="animate-spin -ml-1 mr-3 h-5 w-5 text-white" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
|
|
||||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4"></circle>
|
|
||||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
|
|
||||||
</svg>
|
|
||||||
Selenium läuft (ca. 45s)...
|
|
||||||
</>
|
|
||||||
) : (
|
|
||||||
currentJobs === null ? 'Liste initial abrufen' : 'Liste aktualisieren'
|
|
||||||
)}
|
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{error && (
|
{error && (
|
||||||
<div className="bg-red-50 border-l-4 border-red-500 p-4 mb-6">
|
<div className="bg-red-50 border-l-4 border-red-500 p-4 mb-6">
|
||||||
<p className="text-red-700 font-bold">Fehler beim Scrapen:</p>
|
<p className="text-red-700 font-bold">Fehler:</p>
|
||||||
<p className="text-red-600">{error}</p>
|
<p className="text-red-600">{error}</p>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{currentJobs !== null && currentJobs.length === 0 && !isLoading && !error && (
|
|
||||||
<div className="text-center py-10 bg-gray-50 rounded-md border border-dashed border-gray-300">
|
|
||||||
<p className="text-gray-500 text-lg">Keine Aufträge in diesem Account gefunden.</p>
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
|
|
||||||
{/* Jobs Table */}
|
{/* Jobs Table */}
|
||||||
{currentJobs !== null && currentJobs.length > 0 && (
|
{currentJobs !== null && (
|
||||||
<div className="overflow-x-auto rounded-lg border border-gray-200">
|
<div className="overflow-x-auto rounded-lg border border-gray-200">
|
||||||
<table className="min-w-full divide-y divide-gray-200">
|
<table className="min-w-full divide-y divide-gray-200">
|
||||||
<thead className="bg-gray-100">
|
<thead className="bg-gray-100">
|
||||||
<tr>
|
<tr>
|
||||||
<th scope="col" className="px-6 py-4 text-left text-xs font-bold text-gray-700 uppercase tracking-wider">Name des Auftrags</th>
|
<th className="px-6 py-4 text-left text-xs font-bold text-gray-700 uppercase tracking-wider">Name des Auftrags</th>
|
||||||
<th scope="col" className="px-6 py-4 text-left text-xs font-bold text-gray-700 uppercase tracking-wider">Datum</th>
|
<th className="px-6 py-4 text-left text-xs font-bold text-gray-700 uppercase tracking-wider">Datum</th>
|
||||||
<th scope="col" className="px-6 py-4 text-center text-xs font-bold text-gray-700 uppercase tracking-wider">Features & Aktionen</th>
|
<th className="px-6 py-4 text-center text-xs font-bold text-gray-700 uppercase tracking-wider">Features & Aktionen</th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody className="bg-white divide-y divide-gray-200">
|
<tbody className="bg-white divide-y divide-gray-200">
|
||||||
@@ -138,24 +136,26 @@ function App() {
|
|||||||
<div className="text-xs text-gray-500 font-normal mt-1">Status: {job.status}</div>
|
<div className="text-xs text-gray-500 font-normal mt-1">Status: {job.status}</div>
|
||||||
</td>
|
</td>
|
||||||
<td className="px-6 py-4 whitespace-nowrap text-sm text-gray-600">{job.date}</td>
|
<td className="px-6 py-4 whitespace-nowrap text-sm text-gray-600">{job.date}</td>
|
||||||
|
|
||||||
{/* Actions Column */}
|
|
||||||
<td className="px-6 py-4 whitespace-nowrap text-center text-sm font-medium">
|
<td className="px-6 py-4 whitespace-nowrap text-center text-sm font-medium">
|
||||||
<div className="flex justify-center space-x-2">
|
<div className="flex justify-center space-x-2">
|
||||||
|
|
||||||
<button className="bg-blue-50 text-blue-700 hover:bg-blue-100 border border-blue-200 rounded px-3 py-1.5 text-xs transition-colors shadow-sm" title="Teilnehmerliste als PDF generieren">
|
<button
|
||||||
📄 1) PDF Liste
|
onClick={() => handleGeneratePdf(job)}
|
||||||
|
disabled={processingJobId === job.id}
|
||||||
|
className="bg-blue-50 text-blue-700 hover:bg-blue-100 border border-blue-200 rounded px-3 py-1.5 text-xs transition-colors shadow-sm disabled:opacity-50"
|
||||||
|
>
|
||||||
|
{processingJobId === job.id ? '⌛ Generiere...' : '📄 1) PDF Liste'}
|
||||||
</button>
|
</button>
|
||||||
|
|
||||||
<button className="bg-emerald-50 text-emerald-700 hover:bg-emerald-100 border border-emerald-200 rounded px-3 py-1.5 text-xs transition-colors shadow-sm" title="QR-Zugangskarten erstellen">
|
<button className="bg-emerald-50 text-emerald-700 border border-emerald-200 rounded px-3 py-1.5 text-xs opacity-50 cursor-not-allowed">
|
||||||
📇 2) QR-Karten
|
📇 2) QR-Karten
|
||||||
</button>
|
</button>
|
||||||
|
|
||||||
<button className="bg-amber-50 text-amber-700 hover:bg-amber-100 border border-amber-200 rounded px-3 py-1.5 text-xs transition-colors shadow-sm" title="Nachfass-E-Mails ermitteln">
|
<button className="bg-amber-50 text-amber-700 border border-amber-200 rounded px-3 py-1.5 text-xs opacity-50 cursor-not-allowed">
|
||||||
✉️ 3) Nachfass
|
✉️ 3) Nachfass
|
||||||
</button>
|
</button>
|
||||||
|
|
||||||
<button className="bg-purple-50 text-purple-700 hover:bg-purple-100 border border-purple-200 rounded px-3 py-1.5 text-xs transition-colors shadow-sm" title="Statistik & Verkaufsquote">
|
<button className="bg-purple-50 text-purple-700 border border-purple-200 rounded px-3 py-1.5 text-xs opacity-50 cursor-not-allowed">
|
||||||
📊 4) Statistik
|
📊 4) Statistik
|
||||||
</button>
|
</button>
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user