chore(backend): enable verbose DEBUG logging for troubleshooting [32788f42]

2026-03-20 18:44:50 +00:00
parent 5c69c44ed3
commit 39c3a59744
2 changed files with 107 additions and 44 deletions
--- a/fotograf-de-scraper/backend/Dockerfile
+++ b/fotograf-de-scraper/backend/Dockerfile
@@ -38,8 +38,11 @@ RUN pip install --no-cache-dir -r requirements.txt
 # Copy the application code
 COPY . .

+# Create directory for error screenshots
+RUN mkdir -p /app/errors && chmod 777 /app/errors
+
 # Expose the port FastAPI will run on
 EXPOSE 8000

-# Command to run the application
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
+# Command to run the application with DEBUG logging
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--log-level", "debug"]
--- a/fotograf-de-scraper/backend/main.py
+++ b/fotograf-de-scraper/backend/main.py
@@ -1,10 +1,5 @@
 import os
-from dotenv import load_dotenv
-from fastapi import FastAPI, HTTPException
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import FileResponse
-from typing import List, Dict, Any, Optional
-import time
+import logging
 import datetime
 import base64
 import re
@@ -13,6 +8,12 @@ from jinja2 import Environment, FileSystemLoader
 from weasyprint import HTML
 import tempfile
 import shutil
+import time
+from dotenv import load_dotenv
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import FileResponse
+from typing import List, Dict, Any, Optional

 from selenium import webdriver
 from selenium.webdriver.chrome.options import Options
@@ -21,6 +22,16 @@ from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.common.exceptions import TimeoutException, NoSuchElementException

+# --- Logging Configuration ---
+logging.basicConfig(
+    level=logging.DEBUG,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.StreamHandler()
+    ]
+)
+logger = logging.getLogger("fotograf-scraper")
+
 # Load environment variables
 load_dotenv()

@@ -49,38 +60,46 @@ SELECTORS = {
    "job_row_date": ".//td[count(//th[contains(., 'Datum')]/preceding-sibling::th) + 1]",
    "job_row_shooting_type": ".//td[count(//th[contains(., 'Typ')]/preceding-sibling::th) + 1]",
    "export_dropdown": "[data-qa-id='dropdown:export']",
-    "export_csv_link": "//a[contains(text(), 'CSV') or contains(., 'CSV')]", # Common pattern for CSV export in dropdowns
+    "export_csv_link": "//a[contains(text(), 'CSV') or contains(., 'CSV')]",
 }

-# --- PDF Generation Logic (Reused from List-Generator) ---
+# --- PDF Generation Logic ---

 def get_logo_base64():
    logo_path = os.path.join(os.path.dirname(__file__), "assets", "logo.png")
+    logger.debug(f"Loading logo from: {logo_path}")
    try:
        with open(logo_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')
    except FileNotFoundError:
-        print(f"Warning: Logo file not found at {logo_path}")
+        logger.warning(f"Logo file not found at {logo_path}")
        return None

 def generate_pdf_from_csv(csv_path: str, institution: str, date_info: str, list_type: str, output_path: str):
+    logger.info(f"Generating PDF for {institution} from {csv_path}")
    df = None
-    # Try different separators
-    for sep in [";", ","]:
+    for sep in [';', ',']:
        try:
+            logger.debug(f"Trying CSV separator: '{sep}'")
            test_df = pd.read_csv(csv_path, sep=sep, encoding="utf-8-sig", nrows=5)
            if len(test_df.columns) > 1:
                df = pd.read_csv(csv_path, sep=sep, encoding="utf-8-sig")
+                logger.debug(f"Successfully read CSV with separator '{sep}'")
                break
-        except Exception:
+        except Exception as e:
+            logger.debug(f"Failed to read with separator '{sep}': {e}")
            continue
+    
    if df is None:
+        logger.error("Could not read CSV with standard separators.")
        try:
            df = pd.read_csv(csv_path, sep=";", encoding="latin1")
+            logger.info("Fallback to latin1 encoding successful.")
        except:
            raise Exception("CSV konnte nicht gelesen werden.")

    df.columns = df.columns.str.strip().str.replace("\"", "")
+    logger.debug(f"CSV Columns: {list(df.columns)}")
    
    group_label = "Gruppe" if list_type == 'k' else "Klasse"
    person_label_plural = "Kinder" if list_type == 'k' else "Schüler"
@@ -100,6 +119,7 @@ def generate_pdf_from_csv(csv_path: str, institution: str, date_info: str, list_
    
    for col in ["Vorname", "Nachname", group_label]:
        if col not in df.columns:
+            logger.warning(f"Column '{col}' not found in CSV, using default values.")
            df[col] = "Alle" if col == group_label else ""

    df = df.sort_values(by=[group_label, "Nachname", "Vorname"])
@@ -113,6 +133,7 @@ def generate_pdf_from_csv(csv_path: str, institution: str, date_info: str, list_
    total_students = len(df)
    
    template_dir = os.path.join(os.path.dirname(__file__), "templates")
+    logger.debug(f"Using template directory: {template_dir}")
    env = Environment(loader=FileSystemLoader(template_dir))
    template = env.get_template("school_list.html")
    
@@ -132,13 +153,27 @@ def generate_pdf_from_csv(csv_path: str, institution: str, date_info: str, list_
        "group_column_name": group_label
    }

+    logger.debug("Rendering HTML template...")
    html_out = template.render(render_context)
+    logger.info(f"Writing PDF to: {output_path}")
    HTML(string=html_out).write_pdf(output_path)

 # --- Selenium Scraper Functions ---

+def take_error_screenshot(driver, error_name):
+    errors_dir = os.path.join(os.path.dirname(__file__), 'errors')
+    os.makedirs(errors_dir, exist_ok=True)
+    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    filename = f"error_{error_name}_{timestamp}.png"
+    filepath = os.path.join(errors_dir, filename)
+    try:
+        driver.save_screenshot(filepath)
+        logger.error(f"!!! Error screenshot saved to: {filepath}")
+    except Exception as e:
+        logger.error(f"!!! Could not save screenshot: {e}")
+
 def setup_driver(download_path: str = None):
-    print("Initialisiere Chrome WebDriver...")
+    logger.info("Initializing Chrome WebDriver...")
    options = Options()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
@@ -147,6 +182,7 @@ def setup_driver(download_path: str = None):
    options.binary_location = '/usr/bin/chromium'

    if download_path:
+        logger.debug(f"Configuring download path: {download_path}")
        prefs = {
            "download.default_directory": download_path,
            "download.prompt_for_download": False,
@@ -157,48 +193,57 @@ def setup_driver(download_path: str = None):

    try:
        driver = webdriver.Chrome(options=options)
-        
        if download_path:
-            # Crucial for headless mode: Allow downloads
+            logger.debug("Allowing downloads in headless mode via CDP...")
            driver.execute_cdp_cmd('Page.setDownloadBehavior', {
                'behavior': 'allow',
                'downloadPath': download_path
            })
-            
        return driver
    except Exception as e:
-        print(f"Fehler bei der Initialisierung des WebDrivers: {e}")
+        logger.error(f"Failed to initialize WebDriver: {e}")
        return None

 def login(driver, username, password):
-    print("Starte Login-Vorgang...")
+    logger.info(f"Starting login process for user: {username}")
    try:
        driver.get(LOGIN_URL)
-        wait = WebDriverWait(driver, 45)
+        wait = WebDriverWait(driver, 30)
        try:
+            logger.debug("Checking for cookie banner...")
            cookie_wait = WebDriverWait(driver, 5)
            cookie_wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, SELECTORS["cookie_accept_button"]))).click()
-            time.sleep(1)
+            logger.info("Cookie banner accepted.")
        except:
-            pass
+            logger.debug("No cookie banner found.")

+        logger.debug("Entering credentials...")
        wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, SELECTORS["login_user"]))).send_keys(username)
        driver.find_element(By.CSS_SELECTOR, SELECTORS["login_pass"]).send_keys(password)
+        
+        logger.info("Clicking login button...")
        driver.find_element(By.CSS_SELECTOR, SELECTORS["login_button"]).click()

+        logger.info("Waiting for dashboard redirect...")
        wait.until(EC.url_contains('/config_dashboard/index'))
+        logger.info("Login successful!")
        return True
    except Exception as e:
-        print(f"Login fehlgeschlagen: {e}")
+        logger.error(f"Login failed: {e}")
+        take_error_screenshot(driver, "login_error")
        return False

 def get_jobs_list(driver) -> List[Dict[str, Any]]:
    jobs_list_url = "https://app.fotograf.de/config_jobs/index"
+    logger.info(f"Navigating to jobs list: {jobs_list_url}")
    driver.get(jobs_list_url)
-    wait = WebDriverWait(driver, 45)
+    wait = WebDriverWait(driver, 30)
    jobs = []
    try:
+        logger.debug("Waiting for job rows to appear...")
        job_rows = wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["dashboard_jobs_table_rows"])))
+        logger.info(f"Found {len(job_rows)} job rows.")
+        
        for row in job_rows:
            try:
                name_element = row.find_element(By.XPATH, SELECTORS["job_row_name_link"])
@@ -206,6 +251,9 @@ def get_jobs_list(driver) -> List[Dict[str, Any]]:
                job_url = name_element.get_attribute('href')
                job_id_match = re.search(r'/(\d+)$', job_url)
                job_id = job_id_match.group(1) if job_id_match else None
+                
+                logger.debug(f"Parsing job: {job_name} (ID: {job_id})")
+                
                status_element = row.find_element(By.XPATH, SELECTORS["job_row_status"])
                job_status = status_element.text.strip()
                date_element = row.find_element(By.XPATH, SELECTORS["job_row_date"])
@@ -221,10 +269,13 @@ def get_jobs_list(driver) -> List[Dict[str, Any]]:
                    "date": job_date,
                    "shooting_type": shooting_type,
                })
-            except:
+            except Exception as e:
+                logger.warning(f"Error parsing single job row: {e}")
                continue
-    except:
-        pass
+    except Exception as e:
+        logger.error(f"Error retrieving job list: {e}")
+        take_error_screenshot(driver, "job_list_error")
+    
    return jobs

 # --- API Endpoints ---
@@ -235,9 +286,11 @@ async def health_check():

@app.get("/api/jobs", response_model=List[Dict[str, Any]])
 async def get_jobs(account_type: str):
+    logger.info(f"API Request: GET /api/jobs for {account_type}")
    username = os.getenv(f"{account_type.upper()}_USER")
    password = os.getenv(f"{account_type.upper()}_PW")
    if not username or not password:
+        logger.error(f"Credentials for {account_type} not found in .env")
        raise HTTPException(status_code=400, detail="Credentials not found.")

    driver = None
@@ -247,43 +300,47 @@ async def get_jobs(account_type: str):
            raise HTTPException(status_code=401, detail="Login failed.")
        return get_jobs_list(driver)
    finally:
-        if driver: driver.quit()
+        if driver: 
+            logger.debug("Closing driver.")
+            driver.quit()

@app.get("/api/jobs/{job_id}/generate-pdf")
 async def generate_pdf(job_id: str, account_type: str):
+    logger.info(f"API Request: Generate PDF for job {job_id} ({account_type})")
    username = os.getenv(f"{account_type.upper()}_USER")
    password = os.getenv(f"{account_type.upper()}_PW")
    
    with tempfile.TemporaryDirectory() as temp_dir:
+        logger.debug(f"Using temp directory for download: {temp_dir}")
        driver = setup_driver(download_path=temp_dir)
        try:
            if not login(driver, username, password):
                raise HTTPException(status_code=401, detail="Login failed.")

-            # 1. Navigate to registrations page
            reg_url = f"https://app.fotograf.de/config_children/view_registrations/{job_id}"
-            print(f"Navigiere zu Registrierungen: {reg_url}")
+            logger.info(f"Navigating to registrations page: {reg_url}")
            driver.get(reg_url)
            wait = WebDriverWait(driver, 30)

-            # Get Institution Name for PDF
            try:
                institution = driver.find_element(By.TAG_NAME, "h1").text.strip()
+                logger.debug(f"Detected institution name: {institution}")
            except:
                institution = "Fotoauftrag"

-            # 2. Click Export and trigger CSV download
-            print("Trigger Export...")
+            logger.info("Triggering CSV Export...")
            export_btn = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, SELECTORS["export_dropdown"])))
            export_btn.click()
-            time.sleep(1) # Wait for menu
+            logger.debug("Export dropdown clicked, waiting for menu items...")
+            time.sleep(1)
            
            csv_btn = wait.until(EC.element_to_be_clickable((By.XPATH, SELECTORS["export_csv_link"])))
            csv_btn.click()
+            logger.info("CSV Export link clicked.")
            
-            # 3. Wait for download to finish
-            print("Warte auf CSV Download...")
-            timeout = 30
+            # Wait for file to appear
+            logger.debug("Waiting for CSV file in download directory...")
+            timeout = 45
            start_time = time.time()
            csv_file = None
            while time.time() - start_time < timeout:
@@ -291,14 +348,15 @@ async def generate_pdf(job_id: str, account_type: str):
                csv_files = [f for f in files if f.endswith('.csv')]
                if csv_files:
                    csv_file = os.path.join(temp_dir, csv_files[0])
+                    logger.info(f"Download complete: {csv_file}")
                    break
                time.sleep(1)
            
            if not csv_file:
+                logger.error(f"Download timed out after {timeout} seconds.")
+                take_error_screenshot(driver, "download_timeout")
                raise HTTPException(status_code=500, detail="CSV Download fehlgeschlagen.")

-            # 4. Generate PDF
-            print(f"Generiere PDF aus {csv_file}...")
            output_pdf_name = f"Listen_{job_id}.pdf"
            output_pdf_path = os.path.join(temp_dir, output_pdf_name)
            
@@ -306,17 +364,19 @@ async def generate_pdf(job_id: str, account_type: str):
                csv_path=csv_file,
                institution=institution,
                date_info=datetime.datetime.now().strftime("%d.%m.%Y"),
-                list_type=account_type, # 'k' or 'schule'
+                list_type=account_type,
                output_path=output_pdf_path
            )

-            # 5. Return PDF
            final_storage = os.path.join("/tmp", output_pdf_name)
+            logger.info(f"PDF successfully generated. Copying to {final_storage}")
            shutil.copy(output_pdf_path, final_storage)
            return FileResponse(path=final_storage, filename=output_pdf_name, media_type="application/pdf")

        except Exception as e:
-            print(f"Fehler bei PDF Generierung: {e}")
+            logger.exception("Unexpected error during PDF generation")
            raise HTTPException(status_code=500, detail=str(e))
        finally:
-            if driver: driver.quit()
+            if driver: 
+                logger.debug("Closing driver.")
+                driver.quit()