train_model.py aktualisiert

2025-09-24 14:17:29 +00:00
parent 3ef2465cdc
commit 894d6f50df
1 changed files with 18 additions and 9 deletions
--- a/train_model.py
+++ b/train_model.py
@@ -11,6 +11,8 @@ from collections import Counter
 import logging
 import sys
 import os
+import treelite
+import treelite_runtime

 # Importiere deine bestehenden Helfer
 from google_sheet_handler import GoogleSheetHandler
@@ -187,17 +189,24 @@ if __name__ == "__main__":
    log.info("\n" + classification_report(y_test, y_pred, zero_division=0))
    
    try:
-        log.info(f"Speichere Modell in '{MODEL_OUTPUT_FILE}'...")
+        # Speichern des Standard-Modells
        model.save_model(MODEL_OUTPUT_FILE)
-        log.info("...erfolgreich.")
+        logging.info(f"Modell in '{MODEL_OUTPUT_FILE}' erfolgreich gespeichert.")
        
-        log.info(f"Speichere Wortgewichte in '{TERM_WEIGHTS_OUTPUT_FILE}'...")
-        joblib.dump(term_weights, TERM_WEIGHTS_OUTPUT_FILE)
-        log.info("...erfolgreich.")
+        # NEU: Speichern des Modells im Treelite-Format
+        TREELITE_MODEL_FILE = 'xgb_model.treelite'
+        treelite_model = treelite.Model.from_xgboost(model)
+        treelite_model.export_lib(
+            toolchain='gcc',
+            libpath=TREELITE_MODEL_FILE,
+            params={'parallel_comp': 4}, # Anzahl der CPU-Kerne nutzen
+            verbose=True
+        )
+        logging.info(f"Leichtgewichtiges Modell in '{TREELITE_MODEL_FILE}' erfolgreich gespeichert.")

-        log.info(f"Speichere CRM-Daten in '{CRM_PREDICTION_FILE}'...")
+        joblib.dump(term_weights, TERM_WEIGHTS_OUTPUT_FILE)
+        logging.info(f"Wortgewichte in '{TERM_WEIGHTS_OUTPUT_FILE}' erfolgreich gespeichert.")
        crm_df.to_pickle(CRM_PREDICTION_FILE)
-        log.info("...erfolgreich.")
-        log.info("Alle Dateien wurden erfolgreich erstellt.")
+        logging.info(f"CRM-Daten in '{CRM_PREDICTION_FILE}' erfolgreich gespeichert.")
    except Exception as e:
-        log.critical(f"FEHLER BEIM SPEICHERN DER DATEIEN: {e}")
+        logging.critical(f"FEHLER BEIM SPEICHERN DER DATEIEN: {e}")