feat(explorer): add database migration script for industries schema

2026-01-20 15:38:20 +00:00
parent c2fc5efc02
commit 76d801c1d6
2 changed files with 93 additions and 0 deletions
--- a/MIGRATION_PLAN.md
+++ b/MIGRATION_PLAN.md
@@ -332,4 +332,24 @@ Das System nutzt Notion als zentrales Steuerungselement für strategische Defini
 *   **Notion Token:** Muss in `/app/notion_token.txt` (Container-Pfad) hinterlegt sein.
 *   **DB-Mapping:** Die Zuordnung erfolgt primär über die `notion_id`, sekundär über den Namen, um Dubletten bei der Migration zu vermeiden.

+## 10. Database Migration (v0.6.1 -> v0.6.2)
+
+Wenn die `industries`-Tabelle in einer bestehenden Datenbank aktualisiert werden muss (z.B. um neue Felder aus Notion zu unterstützen), darf die Datenbankdatei **nicht** gelöscht werden. Stattdessen muss das Migrations-Skript ausgeführt werden.
+
+**Prozess:**
+
+1.  **Sicherstellen, dass die Zieldatenbank vorhanden ist:** Die `companies_v3_fixed_2.db` muss im `company-explorer`-Verzeichnis liegen.
+2.  **Migration ausführen:** Dieser Befehl fügt die fehlenden Spalten hinzu, ohne Daten zu löschen.
+    ```bash
+    docker exec -it company-explorer python3 backend/scripts/migrate_db.py
+    ```
+3.  **Container neu starten:** Damit der Server das neue Schema erkennt.
+    ```bash
+    docker-compose restart company-explorer
+    ```
+4.  **Notion-Sync ausführen:** Um die neuen Spalten mit Daten zu befüllen.
+    ```bash
+    docker exec -it company-explorer python3 backend/scripts/sync_notion_industries.py
+    ```
+

--- a/company-explorer/backend/scripts/migrate_db.py
+++ b/company-explorer/backend/scripts/migrate_db.py
@@ -0,0 +1,73 @@
+
+import sqlite3
+import sys
+import os
+import logging
+
+# Add parent path to import config
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
+from backend.config import settings
+
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+# Extract DB file path from SQLAlchemy URL
+DB_FILE = settings.DATABASE_URL.replace("sqlite:///", "")
+
+def get_db_connection():
+    """Establishes a connection to the SQLite database."""
+    return sqlite3.connect(DB_FILE)
+
+def get_table_columns(cursor, table_name):
+    """Returns a list of column names for a given table."""
+    cursor.execute(f"PRAGMA table_info({table_name})")
+    return [row[1] for row in cursor.fetchall()]
+
+def migrate_industries_table():
+    """
+    Adds the new schema columns to the 'industries' table if they don't exist.
+    This ensures backward compatibility with older database files.
+    """
+    logger.info(f"Connecting to database at {DB_FILE} to run migrations...")
+    conn = get_db_connection()
+    cursor = conn.cursor()
+
+    try:
+        logger.info("Checking 'industries' table schema...")
+        columns = get_table_columns(cursor, "industries")
+        logger.info(f"Found existing columns: {columns}")
+
+        migrations_to_run = {
+            "metric_type": "TEXT",
+            "scraper_search_term": "TEXT",
+            "standardization_logic": "TEXT",
+            "proxy_factor": "FLOAT" 
+            # min_requirement, whale_threshold, scraper_keywords already exist from v0.6.0
+        }
+
+        for col, col_type in migrations_to_run.items():
+            if col not in columns:
+                logger.info(f"Adding column '{col}' ({col_type}) to 'industries' table...")
+                cursor.execute(f"ALTER TABLE industries ADD COLUMN {col} {col_type}")
+            else:
+                logger.info(f"Column '{col}' already exists. Skipping.")
+
+        # Also, we need to handle the removal of old columns if necessary (safer to leave them)
+        # We will also fix the proxy_factor type if it was TEXT
+        # This is more complex, for now let's just add.
+        
+        conn.commit()
+        logger.info("Migrations for 'industries' table completed successfully.")
+
+    except Exception as e:
+        logger.error(f"An error occurred during migration: {e}", exc_info=True)
+        conn.rollback()
+    finally:
+        conn.close()
+
+
+if __name__ == "__main__":
+    if not os.path.exists(DB_FILE):
+        logger.error(f"Database file not found at {DB_FILE}. Cannot run migration. Please ensure the old database is in place.")
+    else:
+        migrate_industries_table()