From 446211e9cb30be626784135b63d500dfe17b9471 Mon Sep 17 00:00:00 2001 From: Floke Date: Fri, 20 Mar 2026 20:23:00 +0000 Subject: [PATCH] feat(scraper): PDF generation is now fully functional [32788f42] --- fotograf-de-scraper/backend/main.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/fotograf-de-scraper/backend/main.py b/fotograf-de-scraper/backend/main.py index ae064e8b..50848a50 100644 --- a/fotograf-de-scraper/backend/main.py +++ b/fotograf-de-scraper/backend/main.py @@ -78,7 +78,7 @@ def get_logo_base64(): def generate_pdf_from_csv(csv_path: str, institution: str, date_info: str, list_type: str, output_path: str): logger.info(f"Generating PDF for {institution} from {csv_path}") df = None - for sep in [';', ',']: + for sep in [";", ","]: try: logger.debug(f"Trying CSV separator: '{sep}'") test_df = pd.read_csv(csv_path, sep=sep, encoding="utf-8-sig", nrows=5) @@ -337,11 +337,11 @@ async def generate_pdf(job_id: str, account_type: str): # Wait for the export button to become present on the new tab logger.info("Waiting for Export Dropdown...") - export_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, SELECTORS["export_dropdown"])))) + export_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, SELECTORS["export_dropdown"]))) # Scroll to it and click via JS to avoid obscuring elements driver.execute_script("arguments[0].scrollIntoView(true);", export_btn) - time.sleep(1) # small pause after scroll + time.sleep(1) logger.info("Clicking Export Dropdown...") driver.execute_script("arguments[0].click();", export_btn) @@ -349,11 +349,13 @@ async def generate_pdf(job_id: str, account_type: str): time.sleep(2) try: - csv_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, SELECTORS["export_csv_link"])))) + csv_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, SELECTORS["export_csv_link"]))) logger.info("CSV Export button found. Clicking...") driver.execute_script("arguments[0].click();", csv_btn) except TimeoutException: - + logger.error("CSV Button not found after clicking dropdown.") + take_error_screenshot(driver, "csv_button_missing") + raise HTTPException(status_code=500, detail="CSV Export Button konnte nicht gefunden werden.") # Wait for file to appear logger.debug("Waiting for CSV file in download directory...") @@ -390,10 +392,12 @@ async def generate_pdf(job_id: str, account_type: str): shutil.copy(output_pdf_path, final_storage) return FileResponse(path=final_storage, filename=output_pdf_name, media_type="application/pdf") + except HTTPException as he: + raise he except Exception as e: logger.exception("Unexpected error during PDF generation") raise HTTPException(status_code=500, detail=str(e)) finally: if driver: logger.debug("Closing driver.") - driver.quit() + driver.quit() \ No newline at end of file