feat(explorer): add database migration script for industries schema

This commit is contained in:
2026-01-20 15:38:20 +00:00
parent c2fc5efc02
commit 76d801c1d6
2 changed files with 93 additions and 0 deletions

View File

@@ -332,4 +332,24 @@ Das System nutzt Notion als zentrales Steuerungselement für strategische Defini
* **Notion Token:** Muss in `/app/notion_token.txt` (Container-Pfad) hinterlegt sein.
* **DB-Mapping:** Die Zuordnung erfolgt primär über die `notion_id`, sekundär über den Namen, um Dubletten bei der Migration zu vermeiden.
## 10. Database Migration (v0.6.1 -> v0.6.2)
Wenn die `industries`-Tabelle in einer bestehenden Datenbank aktualisiert werden muss (z.B. um neue Felder aus Notion zu unterstützen), darf die Datenbankdatei **nicht** gelöscht werden. Stattdessen muss das Migrations-Skript ausgeführt werden.
**Prozess:**
1. **Sicherstellen, dass die Zieldatenbank vorhanden ist:** Die `companies_v3_fixed_2.db` muss im `company-explorer`-Verzeichnis liegen.
2. **Migration ausführen:** Dieser Befehl fügt die fehlenden Spalten hinzu, ohne Daten zu löschen.
```bash
docker exec -it company-explorer python3 backend/scripts/migrate_db.py
```
3. **Container neu starten:** Damit der Server das neue Schema erkennt.
```bash
docker-compose restart company-explorer
```
4. **Notion-Sync ausführen:** Um die neuen Spalten mit Daten zu befüllen.
```bash
docker exec -it company-explorer python3 backend/scripts/sync_notion_industries.py
```

View File

@@ -0,0 +1,73 @@
import sqlite3
import sys
import os
import logging
# Add parent path to import config
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
from backend.config import settings
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# Extract DB file path from SQLAlchemy URL
DB_FILE = settings.DATABASE_URL.replace("sqlite:///", "")
def get_db_connection():
"""Establishes a connection to the SQLite database."""
return sqlite3.connect(DB_FILE)
def get_table_columns(cursor, table_name):
"""Returns a list of column names for a given table."""
cursor.execute(f"PRAGMA table_info({table_name})")
return [row[1] for row in cursor.fetchall()]
def migrate_industries_table():
"""
Adds the new schema columns to the 'industries' table if they don't exist.
This ensures backward compatibility with older database files.
"""
logger.info(f"Connecting to database at {DB_FILE} to run migrations...")
conn = get_db_connection()
cursor = conn.cursor()
try:
logger.info("Checking 'industries' table schema...")
columns = get_table_columns(cursor, "industries")
logger.info(f"Found existing columns: {columns}")
migrations_to_run = {
"metric_type": "TEXT",
"scraper_search_term": "TEXT",
"standardization_logic": "TEXT",
"proxy_factor": "FLOAT"
# min_requirement, whale_threshold, scraper_keywords already exist from v0.6.0
}
for col, col_type in migrations_to_run.items():
if col not in columns:
logger.info(f"Adding column '{col}' ({col_type}) to 'industries' table...")
cursor.execute(f"ALTER TABLE industries ADD COLUMN {col} {col_type}")
else:
logger.info(f"Column '{col}' already exists. Skipping.")
# Also, we need to handle the removal of old columns if necessary (safer to leave them)
# We will also fix the proxy_factor type if it was TEXT
# This is more complex, for now let's just add.
conn.commit()
logger.info("Migrations for 'industries' table completed successfully.")
except Exception as e:
logger.error(f"An error occurred during migration: {e}", exc_info=True)
conn.rollback()
finally:
conn.close()
if __name__ == "__main__":
if not os.path.exists(DB_FILE):
logger.error(f"Database file not found at {DB_FILE}. Cannot run migration. Please ensure the old database is in place.")
else:
migrate_industries_table()