From 76d801c1d673912e77be37528a5b3587cc830f4d Mon Sep 17 00:00:00 2001 From: Floke Date: Tue, 20 Jan 2026 15:38:20 +0000 Subject: [PATCH] feat(explorer): add database migration script for industries schema --- MIGRATION_PLAN.md | 20 +++++ .../backend/scripts/migrate_db.py | 73 +++++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 company-explorer/backend/scripts/migrate_db.py diff --git a/MIGRATION_PLAN.md b/MIGRATION_PLAN.md index 374cd794..f2c29609 100644 --- a/MIGRATION_PLAN.md +++ b/MIGRATION_PLAN.md @@ -332,4 +332,24 @@ Das System nutzt Notion als zentrales Steuerungselement für strategische Defini * **Notion Token:** Muss in `/app/notion_token.txt` (Container-Pfad) hinterlegt sein. * **DB-Mapping:** Die Zuordnung erfolgt primär über die `notion_id`, sekundär über den Namen, um Dubletten bei der Migration zu vermeiden. +## 10. Database Migration (v0.6.1 -> v0.6.2) + +Wenn die `industries`-Tabelle in einer bestehenden Datenbank aktualisiert werden muss (z.B. um neue Felder aus Notion zu unterstützen), darf die Datenbankdatei **nicht** gelöscht werden. Stattdessen muss das Migrations-Skript ausgeführt werden. + +**Prozess:** + +1. **Sicherstellen, dass die Zieldatenbank vorhanden ist:** Die `companies_v3_fixed_2.db` muss im `company-explorer`-Verzeichnis liegen. +2. **Migration ausführen:** Dieser Befehl fügt die fehlenden Spalten hinzu, ohne Daten zu löschen. + ```bash + docker exec -it company-explorer python3 backend/scripts/migrate_db.py + ``` +3. **Container neu starten:** Damit der Server das neue Schema erkennt. + ```bash + docker-compose restart company-explorer + ``` +4. **Notion-Sync ausführen:** Um die neuen Spalten mit Daten zu befüllen. + ```bash + docker exec -it company-explorer python3 backend/scripts/sync_notion_industries.py + ``` + diff --git a/company-explorer/backend/scripts/migrate_db.py b/company-explorer/backend/scripts/migrate_db.py new file mode 100644 index 00000000..a6dec99f --- /dev/null +++ b/company-explorer/backend/scripts/migrate_db.py @@ -0,0 +1,73 @@ + +import sqlite3 +import sys +import os +import logging + +# Add parent path to import config +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))) +from backend.config import settings + +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +# Extract DB file path from SQLAlchemy URL +DB_FILE = settings.DATABASE_URL.replace("sqlite:///", "") + +def get_db_connection(): + """Establishes a connection to the SQLite database.""" + return sqlite3.connect(DB_FILE) + +def get_table_columns(cursor, table_name): + """Returns a list of column names for a given table.""" + cursor.execute(f"PRAGMA table_info({table_name})") + return [row[1] for row in cursor.fetchall()] + +def migrate_industries_table(): + """ + Adds the new schema columns to the 'industries' table if they don't exist. + This ensures backward compatibility with older database files. + """ + logger.info(f"Connecting to database at {DB_FILE} to run migrations...") + conn = get_db_connection() + cursor = conn.cursor() + + try: + logger.info("Checking 'industries' table schema...") + columns = get_table_columns(cursor, "industries") + logger.info(f"Found existing columns: {columns}") + + migrations_to_run = { + "metric_type": "TEXT", + "scraper_search_term": "TEXT", + "standardization_logic": "TEXT", + "proxy_factor": "FLOAT" + # min_requirement, whale_threshold, scraper_keywords already exist from v0.6.0 + } + + for col, col_type in migrations_to_run.items(): + if col not in columns: + logger.info(f"Adding column '{col}' ({col_type}) to 'industries' table...") + cursor.execute(f"ALTER TABLE industries ADD COLUMN {col} {col_type}") + else: + logger.info(f"Column '{col}' already exists. Skipping.") + + # Also, we need to handle the removal of old columns if necessary (safer to leave them) + # We will also fix the proxy_factor type if it was TEXT + # This is more complex, for now let's just add. + + conn.commit() + logger.info("Migrations for 'industries' table completed successfully.") + + except Exception as e: + logger.error(f"An error occurred during migration: {e}", exc_info=True) + conn.rollback() + finally: + conn.close() + + +if __name__ == "__main__": + if not os.path.exists(DB_FILE): + logger.error(f"Database file not found at {DB_FILE}. Cannot run migration. Please ensure the old database is in place.") + else: + migrate_industries_table()