import sys import os import csv from collections import Counter import argparse # Add the 'backend' directory to the path sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from database import SessionLocal, RawJobTitle from lib.logging_setup import setup_logging import logging setup_logging() logger = logging.getLogger(__name__) def import_job_titles_from_csv(file_path: str): db = SessionLocal() try: logger.info(f"Starting import of job titles from {file_path}") # Use Counter to get frequencies directly from the CSV job_title_counts = Counter() total_rows = 0 with open(file_path, 'r', encoding='utf-8') as f: reader = csv.reader(f) # Assuming the CSV contains only job titles, one per row for row in reader: if row and row[0].strip(): title = row[0].strip() job_title_counts[title] += 1 total_rows += 1 logger.info(f"Read {total_rows} total job title entries. Found {len(job_title_counts)} unique titles.") added_count = 0 updated_count = 0 for title, count in job_title_counts.items(): existing_title = db.query(RawJobTitle).filter(RawJobTitle.title == title).first() if existing_title: if existing_title.count != count: existing_title.count = count updated_count += 1 # If it exists and count is the same, do nothing. else: new_title = RawJobTitle(title=title, count=count, source="csv_import", is_mapped=False) db.add(new_title) added_count += 1 db.commit() logger.info(f"Import complete. Added {added_count} new unique titles, updated {updated_count} existing titles.") except Exception as e: logger.error(f"Error during job title import: {e}", exc_info=True) db.rollback() finally: db.close() if __name__ == "__main__": parser = argparse.ArgumentParser(description="Import job titles from a CSV file into the RawJobTitle database table.") parser.add_argument("file_path", type=str, help="Path to the CSV file containing job titles.") args = parser.parse_args() import_job_titles_from_csv(args.file_path)