feat(scraper): PDF generation is now fully functional [32788f42]

This commit is contained in:
2026-03-20 20:23:00 +00:00
parent fa65e99310
commit 446211e9cb

View File

@@ -78,7 +78,7 @@ def get_logo_base64():
def generate_pdf_from_csv(csv_path: str, institution: str, date_info: str, list_type: str, output_path: str):
logger.info(f"Generating PDF for {institution} from {csv_path}")
df = None
for sep in [';', ',']:
for sep in [";", ","]:
try:
logger.debug(f"Trying CSV separator: '{sep}'")
test_df = pd.read_csv(csv_path, sep=sep, encoding="utf-8-sig", nrows=5)
@@ -337,11 +337,11 @@ async def generate_pdf(job_id: str, account_type: str):
# Wait for the export button to become present on the new tab
logger.info("Waiting for Export Dropdown...")
export_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, SELECTORS["export_dropdown"]))))
export_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, SELECTORS["export_dropdown"])))
# Scroll to it and click via JS to avoid obscuring elements
driver.execute_script("arguments[0].scrollIntoView(true);", export_btn)
time.sleep(1) # small pause after scroll
time.sleep(1)
logger.info("Clicking Export Dropdown...")
driver.execute_script("arguments[0].click();", export_btn)
@@ -349,11 +349,13 @@ async def generate_pdf(job_id: str, account_type: str):
time.sleep(2)
try:
csv_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, SELECTORS["export_csv_link"]))))
csv_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, SELECTORS["export_csv_link"])))
logger.info("CSV Export button found. Clicking...")
driver.execute_script("arguments[0].click();", csv_btn)
except TimeoutException:
logger.error("CSV Button not found after clicking dropdown.")
take_error_screenshot(driver, "csv_button_missing")
raise HTTPException(status_code=500, detail="CSV Export Button konnte nicht gefunden werden.")
# Wait for file to appear
logger.debug("Waiting for CSV file in download directory...")
@@ -390,10 +392,12 @@ async def generate_pdf(job_id: str, account_type: str):
shutil.copy(output_pdf_path, final_storage)
return FileResponse(path=final_storage, filename=output_pdf_name, media_type="application/pdf")
except HTTPException as he:
raise he
except Exception as e:
logger.exception("Unexpected error during PDF generation")
raise HTTPException(status_code=500, detail=str(e))
finally:
if driver:
logger.debug("Closing driver.")
driver.quit()
driver.quit()