66 lines
2.3 KiB
Python
66 lines
2.3 KiB
Python
import sys
|
|
import os
|
|
import csv
|
|
from collections import Counter
|
|
import argparse
|
|
|
|
# Add the 'backend' directory to the path
|
|
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
|
|
|
from database import SessionLocal, RawJobTitle
|
|
from lib.logging_setup import setup_logging
|
|
import logging
|
|
|
|
setup_logging()
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def import_job_titles_from_csv(file_path: str):
|
|
db = SessionLocal()
|
|
try:
|
|
logger.info(f"Starting import of job titles from {file_path}")
|
|
|
|
# Use Counter to get frequencies directly from the CSV
|
|
job_title_counts = Counter()
|
|
total_rows = 0
|
|
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
reader = csv.reader(f)
|
|
# Assuming the CSV contains only job titles, one per row
|
|
for row in reader:
|
|
if row and row[0].strip():
|
|
title = row[0].strip()
|
|
job_title_counts[title] += 1
|
|
total_rows += 1
|
|
|
|
logger.info(f"Read {total_rows} total job title entries. Found {len(job_title_counts)} unique titles.")
|
|
|
|
added_count = 0
|
|
updated_count = 0
|
|
|
|
for title, count in job_title_counts.items():
|
|
existing_title = db.query(RawJobTitle).filter(RawJobTitle.title == title).first()
|
|
if existing_title:
|
|
if existing_title.count != count:
|
|
existing_title.count = count
|
|
updated_count += 1
|
|
# If it exists and count is the same, do nothing.
|
|
else:
|
|
new_title = RawJobTitle(title=title, count=count, source="csv_import", is_mapped=False)
|
|
db.add(new_title)
|
|
added_count += 1
|
|
|
|
db.commit()
|
|
logger.info(f"Import complete. Added {added_count} new unique titles, updated {updated_count} existing titles.")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error during job title import: {e}", exc_info=True)
|
|
db.rollback()
|
|
finally:
|
|
db.close()
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Import job titles from a CSV file into the RawJobTitle database table.")
|
|
parser.add_argument("file_path", type=str, help="Path to the CSV file containing job titles.")
|
|
args = parser.parse_args()
|
|
|
|
import_job_titles_from_csv(args.file_path) |