[32788f42] fix(list-generator): improve CSV parsing with auto-separator detection and robust column mapping

This commit is contained in:
2026-03-18 20:14:29 +00:00
parent ef74aeefe0
commit 56fea34fc5

View File

@@ -5,48 +5,56 @@ from weasyprint import HTML
import datetime
def generate_school_pdf(institution: str, date_info: str, list_type: str, students_csv_path: str, families_csv_path: str = None, output_dir: str = "/tmp") -> str:
try:
df = pd.read_csv(students_csv_path, sep=";", encoding="utf-8-sig")
except Exception:
# Try different separators and encodings
df = None
for sep in [";", ","]:
try:
test_df = pd.read_csv(students_csv_path, sep=sep, encoding="utf-8-sig", nrows=5)
if len(test_df.columns) > 1:
df = pd.read_csv(students_csv_path, sep=sep, encoding="utf-8-sig")
break
except Exception:
continue
if df is None:
df = pd.read_csv(students_csv_path, sep=";", encoding="latin1")
df.columns = df.columns.str.strip()
df.columns = df.columns.str.strip().str.replace("\"", "")
print(f"Detected columns: {list(df.columns)}")
col_mapping = {}
for col in df.columns:
lower_col = col.lower()
if lower_col in ["vorname kind", "vorname"]:
lower_col = col.lower().strip()
if lower_col in ["vorname kind", "vorname", "first name"]:
col_mapping[col] = "Vorname"
elif lower_col in ["nachname kind", "nachname"]:
elif lower_col in ["nachname kind", "nachname", "last name"]:
col_mapping[col] = "Nachname"
elif lower_col in ["gruppe", "klasse"]:
elif lower_col in ["gruppe", "klasse", "group", "class"]:
col_mapping[col] = "Klasse"
df = df.rename(columns=col_mapping)
df = df.fillna("")
if "Vorname" not in df.columns: df["Vorname"] = ""
if "Nachname" not in df.columns: df["Nachname"] = ""
if "Klasse" not in df.columns: df["Klasse"] = "Alle"
for col in ["Vorname", "Nachname", "Klasse"]:
if col not in df.columns:
df[col] = "Alle" if col == "Klasse" else ""
df = df.sort_values(by=["Klasse", "Nachname", "Vorname"])
grouped = df.groupby("Klasse")
class_data = []
for class_name, group in grouped:
students = group.to_dict("records")
class_data.append({"name": class_name, "students": students})
class_data.append({"name": class_name, "students": group.to_dict("records")})
class_counts = [{"name": c, "count": len(g)} for c, g in grouped]
total_students = sum(c["count"] for c in class_counts)
total_students = len(df)
template_dir = os.path.join(os.path.dirname(__file__), "..", "templates")
env = Environment(loader=FileSystemLoader(template_dir))
template = env.get_template("school_list.html")
current_time = datetime.datetime.now().strftime("%d.%m.%Y %H:%M Uhr")
html_out = template.render(
institution=institution,
date_info=date_info,
class_counts=class_counts,
total_students=total_students,
class_data=class_data,
current_time=current_time
)
html_out = template.render(institution=institution, date_info=date_info, class_counts=class_counts, total_students=total_students, class_data=class_data, current_time=current_time)
clean_inst = institution.replace(" ", "_").replace("/", "-")
time_str = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")
output_filename = f"Listen_{clean_inst}_{list_type}_{time_str}.pdf"
output_filename = f"Listen_{clean_inst}_{list_type}_{datetime.datetime.now().strftime(\"%Y-%m-%d_%H-%M\")}.pdf"
output_path = os.path.join(output_dir, output_filename)
HTML(string=html_out).write_pdf(output_path)
return output_path