diff --git a/list-generator/backend/app/services/pdf_generator.py b/list-generator/backend/app/services/pdf_generator.py index 7f7864cd..d9141ad2 100644 --- a/list-generator/backend/app/services/pdf_generator.py +++ b/list-generator/backend/app/services/pdf_generator.py @@ -5,48 +5,56 @@ from weasyprint import HTML import datetime def generate_school_pdf(institution: str, date_info: str, list_type: str, students_csv_path: str, families_csv_path: str = None, output_dir: str = "/tmp") -> str: - try: - df = pd.read_csv(students_csv_path, sep=";", encoding="utf-8-sig") - except Exception: + # Try different separators and encodings + df = None + for sep in [";", ","]: + try: + test_df = pd.read_csv(students_csv_path, sep=sep, encoding="utf-8-sig", nrows=5) + if len(test_df.columns) > 1: + df = pd.read_csv(students_csv_path, sep=sep, encoding="utf-8-sig") + break + except Exception: + continue + + if df is None: df = pd.read_csv(students_csv_path, sep=";", encoding="latin1") - df.columns = df.columns.str.strip() + + df.columns = df.columns.str.strip().str.replace("\"", "") + print(f"Detected columns: {list(df.columns)}") + col_mapping = {} for col in df.columns: - lower_col = col.lower() - if lower_col in ["vorname kind", "vorname"]: + lower_col = col.lower().strip() + if lower_col in ["vorname kind", "vorname", "first name"]: col_mapping[col] = "Vorname" - elif lower_col in ["nachname kind", "nachname"]: + elif lower_col in ["nachname kind", "nachname", "last name"]: col_mapping[col] = "Nachname" - elif lower_col in ["gruppe", "klasse"]: + elif lower_col in ["gruppe", "klasse", "group", "class"]: col_mapping[col] = "Klasse" + df = df.rename(columns=col_mapping) df = df.fillna("") - if "Vorname" not in df.columns: df["Vorname"] = "" - if "Nachname" not in df.columns: df["Nachname"] = "" - if "Klasse" not in df.columns: df["Klasse"] = "Alle" + + for col in ["Vorname", "Nachname", "Klasse"]: + if col not in df.columns: + df[col] = "Alle" if col == "Klasse" else "" + df = df.sort_values(by=["Klasse", "Nachname", "Vorname"]) grouped = df.groupby("Klasse") class_data = [] for class_name, group in grouped: - students = group.to_dict("records") - class_data.append({"name": class_name, "students": students}) + class_data.append({"name": class_name, "students": group.to_dict("records")}) + class_counts = [{"name": c, "count": len(g)} for c, g in grouped] - total_students = sum(c["count"] for c in class_counts) + total_students = len(df) + template_dir = os.path.join(os.path.dirname(__file__), "..", "templates") env = Environment(loader=FileSystemLoader(template_dir)) template = env.get_template("school_list.html") current_time = datetime.datetime.now().strftime("%d.%m.%Y %H:%M Uhr") - html_out = template.render( - institution=institution, - date_info=date_info, - class_counts=class_counts, - total_students=total_students, - class_data=class_data, - current_time=current_time - ) + html_out = template.render(institution=institution, date_info=date_info, class_counts=class_counts, total_students=total_students, class_data=class_data, current_time=current_time) clean_inst = institution.replace(" ", "_").replace("/", "-") - time_str = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M") - output_filename = f"Listen_{clean_inst}_{list_type}_{time_str}.pdf" + output_filename = f"Listen_{clean_inst}_{list_type}_{datetime.datetime.now().strftime(\"%Y-%m-%d_%H-%M\")}.pdf" output_path = os.path.join(output_dir, output_filename) HTML(string=html_out).write_pdf(output_path) return output_path