import sys import os import csv import argparse from datetime import datetime # Setup Environment sys.path.append(os.path.join(os.path.dirname(__file__), "../../")) from backend.database import SessionLocal, RawJobTitle, init_db, engine, Base def import_titles(file_path: str, delimiter: str = ';'): print(f"šŸš€ Starting Import from {file_path}...") # Ensure Table Exists RawJobTitle.__table__.create(bind=engine, checkfirst=True) db = SessionLocal() total_rows = 0 new_titles = 0 updated_titles = 0 try: with open(file_path, 'r', encoding='utf-8-sig') as f: # utf-8-sig handles BOM from Excel # Try to detect header sample = f.read(1024) has_header = csv.Sniffer().has_header(sample) f.seek(0) reader = csv.reader(f, delimiter=delimiter) if has_header: headers = next(reader) print(f"ā„¹ļø Header detected: {headers}") # Try to find the right column index col_idx = 0 for i, h in enumerate(headers): if h.lower() in ['funktion', 'jobtitle', 'title', 'position', 'rolle']: col_idx = i print(f" -> Using column '{h}' (Index {i})") break else: col_idx = 0 print("ā„¹ļø No header detected, using first column.") # Process Rows for row in reader: if not row: continue if len(row) <= col_idx: continue raw_title = row[col_idx].strip() if not raw_title: continue # Skip empty total_rows += 1 # Check existance existing = db.query(RawJobTitle).filter(RawJobTitle.title == raw_title).first() if existing: existing.count += 1 existing.updated_at = datetime.utcnow() updated_titles += 1 else: db.add(RawJobTitle(title=raw_title, count=1)) new_titles += 1 if total_rows % 100 == 0: db.commit() print(f" Processed {total_rows} rows...", end='\r') db.commit() except Exception as e: print(f"\nāŒ Error: {e}") db.rollback() finally: db.close() print(f"\nāœ… Import Complete.") print(f" Total Processed: {total_rows}") print(f" New Unique Titles: {new_titles}") print(f" Updated Frequencies: {updated_titles}") if __name__ == "__main__": parser = argparse.ArgumentParser(description="Import Job Titles from CSV") parser.add_argument("file", help="Path to CSV file") parser.add_argument("--delimiter", default=";", help="CSV Delimiter (default: ';')") args = parser.parse_args() if not os.path.exists(args.file): print(f"āŒ File not found: {args.file}") sys.exit(1) import_titles(args.file, args.delimiter)