chore(backend): enable verbose DEBUG logging for troubleshooting [32788f42]

This commit is contained in:
2026-03-20 18:44:50 +00:00
parent 5c69c44ed3
commit 39c3a59744
2 changed files with 107 additions and 44 deletions

View File

@@ -38,8 +38,11 @@ RUN pip install --no-cache-dir -r requirements.txt
# Copy the application code
COPY . .
# Create directory for error screenshots
RUN mkdir -p /app/errors && chmod 777 /app/errors
# Expose the port FastAPI will run on
EXPOSE 8000
# Command to run the application
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
# Command to run the application with DEBUG logging
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--log-level", "debug"]

View File

@@ -1,10 +1,5 @@
import os
from dotenv import load_dotenv
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
from typing import List, Dict, Any, Optional
import time
import logging
import datetime
import base64
import re
@@ -13,6 +8,12 @@ from jinja2 import Environment, FileSystemLoader
from weasyprint import HTML
import tempfile
import shutil
import time
from dotenv import load_dotenv
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
from typing import List, Dict, Any, Optional
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
@@ -21,6 +22,16 @@ from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
# --- Logging Configuration ---
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler()
]
)
logger = logging.getLogger("fotograf-scraper")
# Load environment variables
load_dotenv()
@@ -49,38 +60,46 @@ SELECTORS = {
"job_row_date": ".//td[count(//th[contains(., 'Datum')]/preceding-sibling::th) + 1]",
"job_row_shooting_type": ".//td[count(//th[contains(., 'Typ')]/preceding-sibling::th) + 1]",
"export_dropdown": "[data-qa-id='dropdown:export']",
"export_csv_link": "//a[contains(text(), 'CSV') or contains(., 'CSV')]", # Common pattern for CSV export in dropdowns
"export_csv_link": "//a[contains(text(), 'CSV') or contains(., 'CSV')]",
}
# --- PDF Generation Logic (Reused from List-Generator) ---
# --- PDF Generation Logic ---
def get_logo_base64():
logo_path = os.path.join(os.path.dirname(__file__), "assets", "logo.png")
logger.debug(f"Loading logo from: {logo_path}")
try:
with open(logo_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
except FileNotFoundError:
print(f"Warning: Logo file not found at {logo_path}")
logger.warning(f"Logo file not found at {logo_path}")
return None
def generate_pdf_from_csv(csv_path: str, institution: str, date_info: str, list_type: str, output_path: str):
logger.info(f"Generating PDF for {institution} from {csv_path}")
df = None
# Try different separators
for sep in [";", ","]:
for sep in [';', ',']:
try:
logger.debug(f"Trying CSV separator: '{sep}'")
test_df = pd.read_csv(csv_path, sep=sep, encoding="utf-8-sig", nrows=5)
if len(test_df.columns) > 1:
df = pd.read_csv(csv_path, sep=sep, encoding="utf-8-sig")
logger.debug(f"Successfully read CSV with separator '{sep}'")
break
except Exception:
except Exception as e:
logger.debug(f"Failed to read with separator '{sep}': {e}")
continue
if df is None:
logger.error("Could not read CSV with standard separators.")
try:
df = pd.read_csv(csv_path, sep=";", encoding="latin1")
logger.info("Fallback to latin1 encoding successful.")
except:
raise Exception("CSV konnte nicht gelesen werden.")
df.columns = df.columns.str.strip().str.replace("\"", "")
logger.debug(f"CSV Columns: {list(df.columns)}")
group_label = "Gruppe" if list_type == 'k' else "Klasse"
person_label_plural = "Kinder" if list_type == 'k' else "Schüler"
@@ -100,6 +119,7 @@ def generate_pdf_from_csv(csv_path: str, institution: str, date_info: str, list_
for col in ["Vorname", "Nachname", group_label]:
if col not in df.columns:
logger.warning(f"Column '{col}' not found in CSV, using default values.")
df[col] = "Alle" if col == group_label else ""
df = df.sort_values(by=[group_label, "Nachname", "Vorname"])
@@ -113,6 +133,7 @@ def generate_pdf_from_csv(csv_path: str, institution: str, date_info: str, list_
total_students = len(df)
template_dir = os.path.join(os.path.dirname(__file__), "templates")
logger.debug(f"Using template directory: {template_dir}")
env = Environment(loader=FileSystemLoader(template_dir))
template = env.get_template("school_list.html")
@@ -132,13 +153,27 @@ def generate_pdf_from_csv(csv_path: str, institution: str, date_info: str, list_
"group_column_name": group_label
}
logger.debug("Rendering HTML template...")
html_out = template.render(render_context)
logger.info(f"Writing PDF to: {output_path}")
HTML(string=html_out).write_pdf(output_path)
# --- Selenium Scraper Functions ---
def take_error_screenshot(driver, error_name):
errors_dir = os.path.join(os.path.dirname(__file__), 'errors')
os.makedirs(errors_dir, exist_ok=True)
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"error_{error_name}_{timestamp}.png"
filepath = os.path.join(errors_dir, filename)
try:
driver.save_screenshot(filepath)
logger.error(f"!!! Error screenshot saved to: {filepath}")
except Exception as e:
logger.error(f"!!! Could not save screenshot: {e}")
def setup_driver(download_path: str = None):
print("Initialisiere Chrome WebDriver...")
logger.info("Initializing Chrome WebDriver...")
options = Options()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
@@ -147,6 +182,7 @@ def setup_driver(download_path: str = None):
options.binary_location = '/usr/bin/chromium'
if download_path:
logger.debug(f"Configuring download path: {download_path}")
prefs = {
"download.default_directory": download_path,
"download.prompt_for_download": False,
@@ -157,48 +193,57 @@ def setup_driver(download_path: str = None):
try:
driver = webdriver.Chrome(options=options)
if download_path:
# Crucial for headless mode: Allow downloads
logger.debug("Allowing downloads in headless mode via CDP...")
driver.execute_cdp_cmd('Page.setDownloadBehavior', {
'behavior': 'allow',
'downloadPath': download_path
})
return driver
except Exception as e:
print(f"Fehler bei der Initialisierung des WebDrivers: {e}")
logger.error(f"Failed to initialize WebDriver: {e}")
return None
def login(driver, username, password):
print("Starte Login-Vorgang...")
logger.info(f"Starting login process for user: {username}")
try:
driver.get(LOGIN_URL)
wait = WebDriverWait(driver, 45)
wait = WebDriverWait(driver, 30)
try:
logger.debug("Checking for cookie banner...")
cookie_wait = WebDriverWait(driver, 5)
cookie_wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, SELECTORS["cookie_accept_button"]))).click()
time.sleep(1)
logger.info("Cookie banner accepted.")
except:
pass
logger.debug("No cookie banner found.")
logger.debug("Entering credentials...")
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, SELECTORS["login_user"]))).send_keys(username)
driver.find_element(By.CSS_SELECTOR, SELECTORS["login_pass"]).send_keys(password)
logger.info("Clicking login button...")
driver.find_element(By.CSS_SELECTOR, SELECTORS["login_button"]).click()
logger.info("Waiting for dashboard redirect...")
wait.until(EC.url_contains('/config_dashboard/index'))
logger.info("Login successful!")
return True
except Exception as e:
print(f"Login fehlgeschlagen: {e}")
logger.error(f"Login failed: {e}")
take_error_screenshot(driver, "login_error")
return False
def get_jobs_list(driver) -> List[Dict[str, Any]]:
jobs_list_url = "https://app.fotograf.de/config_jobs/index"
logger.info(f"Navigating to jobs list: {jobs_list_url}")
driver.get(jobs_list_url)
wait = WebDriverWait(driver, 45)
wait = WebDriverWait(driver, 30)
jobs = []
try:
logger.debug("Waiting for job rows to appear...")
job_rows = wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["dashboard_jobs_table_rows"])))
logger.info(f"Found {len(job_rows)} job rows.")
for row in job_rows:
try:
name_element = row.find_element(By.XPATH, SELECTORS["job_row_name_link"])
@@ -206,6 +251,9 @@ def get_jobs_list(driver) -> List[Dict[str, Any]]:
job_url = name_element.get_attribute('href')
job_id_match = re.search(r'/(\d+)$', job_url)
job_id = job_id_match.group(1) if job_id_match else None
logger.debug(f"Parsing job: {job_name} (ID: {job_id})")
status_element = row.find_element(By.XPATH, SELECTORS["job_row_status"])
job_status = status_element.text.strip()
date_element = row.find_element(By.XPATH, SELECTORS["job_row_date"])
@@ -221,10 +269,13 @@ def get_jobs_list(driver) -> List[Dict[str, Any]]:
"date": job_date,
"shooting_type": shooting_type,
})
except:
except Exception as e:
logger.warning(f"Error parsing single job row: {e}")
continue
except:
pass
except Exception as e:
logger.error(f"Error retrieving job list: {e}")
take_error_screenshot(driver, "job_list_error")
return jobs
# --- API Endpoints ---
@@ -235,9 +286,11 @@ async def health_check():
@app.get("/api/jobs", response_model=List[Dict[str, Any]])
async def get_jobs(account_type: str):
logger.info(f"API Request: GET /api/jobs for {account_type}")
username = os.getenv(f"{account_type.upper()}_USER")
password = os.getenv(f"{account_type.upper()}_PW")
if not username or not password:
logger.error(f"Credentials for {account_type} not found in .env")
raise HTTPException(status_code=400, detail="Credentials not found.")
driver = None
@@ -247,43 +300,47 @@ async def get_jobs(account_type: str):
raise HTTPException(status_code=401, detail="Login failed.")
return get_jobs_list(driver)
finally:
if driver: driver.quit()
if driver:
logger.debug("Closing driver.")
driver.quit()
@app.get("/api/jobs/{job_id}/generate-pdf")
async def generate_pdf(job_id: str, account_type: str):
logger.info(f"API Request: Generate PDF for job {job_id} ({account_type})")
username = os.getenv(f"{account_type.upper()}_USER")
password = os.getenv(f"{account_type.upper()}_PW")
with tempfile.TemporaryDirectory() as temp_dir:
logger.debug(f"Using temp directory for download: {temp_dir}")
driver = setup_driver(download_path=temp_dir)
try:
if not login(driver, username, password):
raise HTTPException(status_code=401, detail="Login failed.")
# 1. Navigate to registrations page
reg_url = f"https://app.fotograf.de/config_children/view_registrations/{job_id}"
print(f"Navigiere zu Registrierungen: {reg_url}")
logger.info(f"Navigating to registrations page: {reg_url}")
driver.get(reg_url)
wait = WebDriverWait(driver, 30)
# Get Institution Name for PDF
try:
institution = driver.find_element(By.TAG_NAME, "h1").text.strip()
logger.debug(f"Detected institution name: {institution}")
except:
institution = "Fotoauftrag"
# 2. Click Export and trigger CSV download
print("Trigger Export...")
logger.info("Triggering CSV Export...")
export_btn = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, SELECTORS["export_dropdown"])))
export_btn.click()
time.sleep(1) # Wait for menu
logger.debug("Export dropdown clicked, waiting for menu items...")
time.sleep(1)
csv_btn = wait.until(EC.element_to_be_clickable((By.XPATH, SELECTORS["export_csv_link"])))
csv_btn.click()
logger.info("CSV Export link clicked.")
# 3. Wait for download to finish
print("Warte auf CSV Download...")
timeout = 30
# Wait for file to appear
logger.debug("Waiting for CSV file in download directory...")
timeout = 45
start_time = time.time()
csv_file = None
while time.time() - start_time < timeout:
@@ -291,14 +348,15 @@ async def generate_pdf(job_id: str, account_type: str):
csv_files = [f for f in files if f.endswith('.csv')]
if csv_files:
csv_file = os.path.join(temp_dir, csv_files[0])
logger.info(f"Download complete: {csv_file}")
break
time.sleep(1)
if not csv_file:
logger.error(f"Download timed out after {timeout} seconds.")
take_error_screenshot(driver, "download_timeout")
raise HTTPException(status_code=500, detail="CSV Download fehlgeschlagen.")
# 4. Generate PDF
print(f"Generiere PDF aus {csv_file}...")
output_pdf_name = f"Listen_{job_id}.pdf"
output_pdf_path = os.path.join(temp_dir, output_pdf_name)
@@ -306,17 +364,19 @@ async def generate_pdf(job_id: str, account_type: str):
csv_path=csv_file,
institution=institution,
date_info=datetime.datetime.now().strftime("%d.%m.%Y"),
list_type=account_type, # 'k' or 'schule'
list_type=account_type,
output_path=output_pdf_path
)
# 5. Return PDF
final_storage = os.path.join("/tmp", output_pdf_name)
logger.info(f"PDF successfully generated. Copying to {final_storage}")
shutil.copy(output_pdf_path, final_storage)
return FileResponse(path=final_storage, filename=output_pdf_name, media_type="application/pdf")
except Exception as e:
print(f"Fehler bei PDF Generierung: {e}")
logger.exception("Unexpected error during PDF generation")
raise HTTPException(status_code=500, detail=str(e))
finally:
if driver: driver.quit()
if driver:
logger.debug("Closing driver.")
driver.quit()