96 lines
3.1 KiB
Python
96 lines
3.1 KiB
Python
import sys
|
||
import os
|
||
import csv
|
||
import argparse
|
||
from datetime import datetime
|
||
|
||
# Setup Environment
|
||
sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
|
||
|
||
from backend.database import SessionLocal, RawJobTitle, init_db, engine, Base
|
||
|
||
def import_titles(file_path: str, delimiter: str = ';'):
|
||
print(f"🚀 Starting Import from {file_path}...")
|
||
|
||
# Ensure Table Exists
|
||
RawJobTitle.__table__.create(bind=engine, checkfirst=True)
|
||
|
||
db = SessionLocal()
|
||
total_rows = 0
|
||
new_titles = 0
|
||
updated_titles = 0
|
||
|
||
try:
|
||
with open(file_path, 'r', encoding='utf-8-sig') as f: # utf-8-sig handles BOM from Excel
|
||
# Try to detect header
|
||
sample = f.read(1024)
|
||
has_header = csv.Sniffer().has_header(sample)
|
||
f.seek(0)
|
||
|
||
reader = csv.reader(f, delimiter=delimiter)
|
||
|
||
if has_header:
|
||
headers = next(reader)
|
||
print(f"ℹ️ Header detected: {headers}")
|
||
# Try to find the right column index
|
||
col_idx = 0
|
||
for i, h in enumerate(headers):
|
||
if h.lower() in ['funktion', 'jobtitle', 'title', 'position', 'rolle']:
|
||
col_idx = i
|
||
print(f" -> Using column '{h}' (Index {i})")
|
||
break
|
||
else:
|
||
col_idx = 0
|
||
print("ℹ️ No header detected, using first column.")
|
||
|
||
# Process Rows
|
||
for row in reader:
|
||
if not row: continue
|
||
if len(row) <= col_idx: continue
|
||
|
||
raw_title = row[col_idx].strip()
|
||
if not raw_title: continue # Skip empty
|
||
|
||
total_rows += 1
|
||
|
||
# Check existance
|
||
existing = db.query(RawJobTitle).filter(RawJobTitle.title == raw_title).first()
|
||
|
||
if existing:
|
||
existing.count += 1
|
||
existing.updated_at = datetime.utcnow()
|
||
updated_titles += 1
|
||
else:
|
||
db.add(RawJobTitle(title=raw_title, count=1))
|
||
new_titles += 1
|
||
|
||
if total_rows % 100 == 0:
|
||
db.commit()
|
||
print(f" Processed {total_rows} rows...", end='\r')
|
||
|
||
db.commit()
|
||
|
||
except Exception as e:
|
||
print(f"\n❌ Error: {e}")
|
||
db.rollback()
|
||
finally:
|
||
db.close()
|
||
|
||
print(f"\n✅ Import Complete.")
|
||
print(f" Total Processed: {total_rows}")
|
||
print(f" New Unique Titles: {new_titles}")
|
||
print(f" Updated Frequencies: {updated_titles}")
|
||
|
||
if __name__ == "__main__":
|
||
parser = argparse.ArgumentParser(description="Import Job Titles from CSV")
|
||
parser.add_argument("file", help="Path to CSV file")
|
||
parser.add_argument("--delimiter", default=";", help="CSV Delimiter (default: ';')")
|
||
|
||
args = parser.parse_args()
|
||
|
||
if not os.path.exists(args.file):
|
||
print(f"❌ File not found: {args.file}")
|
||
sys.exit(1)
|
||
|
||
import_titles(args.file, args.delimiter)
|