feat(scraper): PDF generation is now fully functional [32788f42]
This commit is contained in:
@@ -78,7 +78,7 @@ def get_logo_base64():
|
||||
def generate_pdf_from_csv(csv_path: str, institution: str, date_info: str, list_type: str, output_path: str):
|
||||
logger.info(f"Generating PDF for {institution} from {csv_path}")
|
||||
df = None
|
||||
for sep in [';', ',']:
|
||||
for sep in [";", ","]:
|
||||
try:
|
||||
logger.debug(f"Trying CSV separator: '{sep}'")
|
||||
test_df = pd.read_csv(csv_path, sep=sep, encoding="utf-8-sig", nrows=5)
|
||||
@@ -337,11 +337,11 @@ async def generate_pdf(job_id: str, account_type: str):
|
||||
|
||||
# Wait for the export button to become present on the new tab
|
||||
logger.info("Waiting for Export Dropdown...")
|
||||
export_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, SELECTORS["export_dropdown"]))))
|
||||
export_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, SELECTORS["export_dropdown"])))
|
||||
|
||||
# Scroll to it and click via JS to avoid obscuring elements
|
||||
driver.execute_script("arguments[0].scrollIntoView(true);", export_btn)
|
||||
time.sleep(1) # small pause after scroll
|
||||
time.sleep(1)
|
||||
logger.info("Clicking Export Dropdown...")
|
||||
driver.execute_script("arguments[0].click();", export_btn)
|
||||
|
||||
@@ -349,11 +349,13 @@ async def generate_pdf(job_id: str, account_type: str):
|
||||
time.sleep(2)
|
||||
|
||||
try:
|
||||
csv_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, SELECTORS["export_csv_link"]))))
|
||||
csv_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, SELECTORS["export_csv_link"])))
|
||||
logger.info("CSV Export button found. Clicking...")
|
||||
driver.execute_script("arguments[0].click();", csv_btn)
|
||||
except TimeoutException:
|
||||
|
||||
logger.error("CSV Button not found after clicking dropdown.")
|
||||
take_error_screenshot(driver, "csv_button_missing")
|
||||
raise HTTPException(status_code=500, detail="CSV Export Button konnte nicht gefunden werden.")
|
||||
|
||||
# Wait for file to appear
|
||||
logger.debug("Waiting for CSV file in download directory...")
|
||||
@@ -390,10 +392,12 @@ async def generate_pdf(job_id: str, account_type: str):
|
||||
shutil.copy(output_pdf_path, final_storage)
|
||||
return FileResponse(path=final_storage, filename=output_pdf_name, media_type="application/pdf")
|
||||
|
||||
except HTTPException as he:
|
||||
raise he
|
||||
except Exception as e:
|
||||
logger.exception("Unexpected error during PDF generation")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
if driver:
|
||||
logger.debug("Closing driver.")
|
||||
driver.quit()
|
||||
driver.quit()
|
||||
Reference in New Issue
Block a user