[32788f42] fix(list-generator): improve CSV parsing with auto-separator detection and robust column mapping
This commit is contained in:
@@ -5,48 +5,56 @@ from weasyprint import HTML
|
|||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
def generate_school_pdf(institution: str, date_info: str, list_type: str, students_csv_path: str, families_csv_path: str = None, output_dir: str = "/tmp") -> str:
|
def generate_school_pdf(institution: str, date_info: str, list_type: str, students_csv_path: str, families_csv_path: str = None, output_dir: str = "/tmp") -> str:
|
||||||
try:
|
# Try different separators and encodings
|
||||||
df = pd.read_csv(students_csv_path, sep=";", encoding="utf-8-sig")
|
df = None
|
||||||
except Exception:
|
for sep in [";", ","]:
|
||||||
|
try:
|
||||||
|
test_df = pd.read_csv(students_csv_path, sep=sep, encoding="utf-8-sig", nrows=5)
|
||||||
|
if len(test_df.columns) > 1:
|
||||||
|
df = pd.read_csv(students_csv_path, sep=sep, encoding="utf-8-sig")
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if df is None:
|
||||||
df = pd.read_csv(students_csv_path, sep=";", encoding="latin1")
|
df = pd.read_csv(students_csv_path, sep=";", encoding="latin1")
|
||||||
df.columns = df.columns.str.strip()
|
|
||||||
|
df.columns = df.columns.str.strip().str.replace("\"", "")
|
||||||
|
print(f"Detected columns: {list(df.columns)}")
|
||||||
|
|
||||||
col_mapping = {}
|
col_mapping = {}
|
||||||
for col in df.columns:
|
for col in df.columns:
|
||||||
lower_col = col.lower()
|
lower_col = col.lower().strip()
|
||||||
if lower_col in ["vorname kind", "vorname"]:
|
if lower_col in ["vorname kind", "vorname", "first name"]:
|
||||||
col_mapping[col] = "Vorname"
|
col_mapping[col] = "Vorname"
|
||||||
elif lower_col in ["nachname kind", "nachname"]:
|
elif lower_col in ["nachname kind", "nachname", "last name"]:
|
||||||
col_mapping[col] = "Nachname"
|
col_mapping[col] = "Nachname"
|
||||||
elif lower_col in ["gruppe", "klasse"]:
|
elif lower_col in ["gruppe", "klasse", "group", "class"]:
|
||||||
col_mapping[col] = "Klasse"
|
col_mapping[col] = "Klasse"
|
||||||
|
|
||||||
df = df.rename(columns=col_mapping)
|
df = df.rename(columns=col_mapping)
|
||||||
df = df.fillna("")
|
df = df.fillna("")
|
||||||
if "Vorname" not in df.columns: df["Vorname"] = ""
|
|
||||||
if "Nachname" not in df.columns: df["Nachname"] = ""
|
for col in ["Vorname", "Nachname", "Klasse"]:
|
||||||
if "Klasse" not in df.columns: df["Klasse"] = "Alle"
|
if col not in df.columns:
|
||||||
|
df[col] = "Alle" if col == "Klasse" else ""
|
||||||
|
|
||||||
df = df.sort_values(by=["Klasse", "Nachname", "Vorname"])
|
df = df.sort_values(by=["Klasse", "Nachname", "Vorname"])
|
||||||
grouped = df.groupby("Klasse")
|
grouped = df.groupby("Klasse")
|
||||||
class_data = []
|
class_data = []
|
||||||
for class_name, group in grouped:
|
for class_name, group in grouped:
|
||||||
students = group.to_dict("records")
|
class_data.append({"name": class_name, "students": group.to_dict("records")})
|
||||||
class_data.append({"name": class_name, "students": students})
|
|
||||||
class_counts = [{"name": c, "count": len(g)} for c, g in grouped]
|
class_counts = [{"name": c, "count": len(g)} for c, g in grouped]
|
||||||
total_students = sum(c["count"] for c in class_counts)
|
total_students = len(df)
|
||||||
|
|
||||||
template_dir = os.path.join(os.path.dirname(__file__), "..", "templates")
|
template_dir = os.path.join(os.path.dirname(__file__), "..", "templates")
|
||||||
env = Environment(loader=FileSystemLoader(template_dir))
|
env = Environment(loader=FileSystemLoader(template_dir))
|
||||||
template = env.get_template("school_list.html")
|
template = env.get_template("school_list.html")
|
||||||
current_time = datetime.datetime.now().strftime("%d.%m.%Y %H:%M Uhr")
|
current_time = datetime.datetime.now().strftime("%d.%m.%Y %H:%M Uhr")
|
||||||
html_out = template.render(
|
html_out = template.render(institution=institution, date_info=date_info, class_counts=class_counts, total_students=total_students, class_data=class_data, current_time=current_time)
|
||||||
institution=institution,
|
|
||||||
date_info=date_info,
|
|
||||||
class_counts=class_counts,
|
|
||||||
total_students=total_students,
|
|
||||||
class_data=class_data,
|
|
||||||
current_time=current_time
|
|
||||||
)
|
|
||||||
clean_inst = institution.replace(" ", "_").replace("/", "-")
|
clean_inst = institution.replace(" ", "_").replace("/", "-")
|
||||||
time_str = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")
|
output_filename = f"Listen_{clean_inst}_{list_type}_{datetime.datetime.now().strftime(\"%Y-%m-%d_%H-%M\")}.pdf"
|
||||||
output_filename = f"Listen_{clean_inst}_{list_type}_{time_str}.pdf"
|
|
||||||
output_path = os.path.join(output_dir, output_filename)
|
output_path = os.path.join(output_dir, output_filename)
|
||||||
HTML(string=html_out).write_pdf(output_path)
|
HTML(string=html_out).write_pdf(output_path)
|
||||||
return output_path
|
return output_path
|
||||||
|
|||||||
Reference in New Issue
Block a user