[32788f42] fix(list-generator): improve CSV parsing with auto-separator detection and robust column mapping
This commit is contained in:
@@ -5,48 +5,56 @@ from weasyprint import HTML
|
||||
import datetime
|
||||
|
||||
def generate_school_pdf(institution: str, date_info: str, list_type: str, students_csv_path: str, families_csv_path: str = None, output_dir: str = "/tmp") -> str:
|
||||
try:
|
||||
df = pd.read_csv(students_csv_path, sep=";", encoding="utf-8-sig")
|
||||
except Exception:
|
||||
# Try different separators and encodings
|
||||
df = None
|
||||
for sep in [";", ","]:
|
||||
try:
|
||||
test_df = pd.read_csv(students_csv_path, sep=sep, encoding="utf-8-sig", nrows=5)
|
||||
if len(test_df.columns) > 1:
|
||||
df = pd.read_csv(students_csv_path, sep=sep, encoding="utf-8-sig")
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if df is None:
|
||||
df = pd.read_csv(students_csv_path, sep=";", encoding="latin1")
|
||||
df.columns = df.columns.str.strip()
|
||||
|
||||
df.columns = df.columns.str.strip().str.replace("\"", "")
|
||||
print(f"Detected columns: {list(df.columns)}")
|
||||
|
||||
col_mapping = {}
|
||||
for col in df.columns:
|
||||
lower_col = col.lower()
|
||||
if lower_col in ["vorname kind", "vorname"]:
|
||||
lower_col = col.lower().strip()
|
||||
if lower_col in ["vorname kind", "vorname", "first name"]:
|
||||
col_mapping[col] = "Vorname"
|
||||
elif lower_col in ["nachname kind", "nachname"]:
|
||||
elif lower_col in ["nachname kind", "nachname", "last name"]:
|
||||
col_mapping[col] = "Nachname"
|
||||
elif lower_col in ["gruppe", "klasse"]:
|
||||
elif lower_col in ["gruppe", "klasse", "group", "class"]:
|
||||
col_mapping[col] = "Klasse"
|
||||
|
||||
df = df.rename(columns=col_mapping)
|
||||
df = df.fillna("")
|
||||
if "Vorname" not in df.columns: df["Vorname"] = ""
|
||||
if "Nachname" not in df.columns: df["Nachname"] = ""
|
||||
if "Klasse" not in df.columns: df["Klasse"] = "Alle"
|
||||
|
||||
for col in ["Vorname", "Nachname", "Klasse"]:
|
||||
if col not in df.columns:
|
||||
df[col] = "Alle" if col == "Klasse" else ""
|
||||
|
||||
df = df.sort_values(by=["Klasse", "Nachname", "Vorname"])
|
||||
grouped = df.groupby("Klasse")
|
||||
class_data = []
|
||||
for class_name, group in grouped:
|
||||
students = group.to_dict("records")
|
||||
class_data.append({"name": class_name, "students": students})
|
||||
class_data.append({"name": class_name, "students": group.to_dict("records")})
|
||||
|
||||
class_counts = [{"name": c, "count": len(g)} for c, g in grouped]
|
||||
total_students = sum(c["count"] for c in class_counts)
|
||||
total_students = len(df)
|
||||
|
||||
template_dir = os.path.join(os.path.dirname(__file__), "..", "templates")
|
||||
env = Environment(loader=FileSystemLoader(template_dir))
|
||||
template = env.get_template("school_list.html")
|
||||
current_time = datetime.datetime.now().strftime("%d.%m.%Y %H:%M Uhr")
|
||||
html_out = template.render(
|
||||
institution=institution,
|
||||
date_info=date_info,
|
||||
class_counts=class_counts,
|
||||
total_students=total_students,
|
||||
class_data=class_data,
|
||||
current_time=current_time
|
||||
)
|
||||
html_out = template.render(institution=institution, date_info=date_info, class_counts=class_counts, total_students=total_students, class_data=class_data, current_time=current_time)
|
||||
clean_inst = institution.replace(" ", "_").replace("/", "-")
|
||||
time_str = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")
|
||||
output_filename = f"Listen_{clean_inst}_{list_type}_{time_str}.pdf"
|
||||
output_filename = f"Listen_{clean_inst}_{list_type}_{datetime.datetime.now().strftime(\"%Y-%m-%d_%H-%M\")}.pdf"
|
||||
output_path = os.path.join(output_dir, output_filename)
|
||||
HTML(string=html_out).write_pdf(output_path)
|
||||
return output_path
|
||||
|
||||
Reference in New Issue
Block a user