data_processor.py aktualisiert
This commit is contained in:
@@ -5055,36 +5055,29 @@ class DataProcessor:
|
||||
umsatz_pro_ma = (umsatz_val / ma_val) if pd.notna(umsatz_val) and pd.notna(ma_val) and ma_val > 0 else np.nan
|
||||
|
||||
# 4. Branchen-Gruppen-Feature (entscheidende Korrektur)
|
||||
# Nutze die KI-Branche als Input
|
||||
branche_ki_val = self._get_cell_value_safe(row_data, "Chat Vorschlag Branche")
|
||||
|
||||
# Erstelle das Mapping von Detail-Branche zu Gruppe
|
||||
branch_group_map = {branch_name: details.get('gruppe', 'Sonstige') for branch_name, details in Config.BRANCH_GROUP_MAPPING.items()}
|
||||
|
||||
# Führe das Mapping durch
|
||||
branchen_gruppe = branch_group_map.get(branche_ki_val, 'Sonstige')
|
||||
|
||||
# 5. DataFrame mit allen möglichen Features erstellen (wie im Training)
|
||||
single_row_data = {
|
||||
# 5. DataFrame mit allen möglichen Features erstellen
|
||||
data_for_prediction = {
|
||||
'Log_Finaler_Umsatz_ML': log_umsatz,
|
||||
'Log_Finaler_Mitarbeiter_ML': log_ma,
|
||||
'Umsatz_pro_MA_ML': umsatz_pro_ma,
|
||||
'is_part_of_group': is_group
|
||||
'is_part_of_group': is_group,
|
||||
}
|
||||
|
||||
# Füge die One-Hot-Encoded Branchen-Gruppen hinzu
|
||||
for expected_col in self._expected_features:
|
||||
if expected_col.startswith('Gruppe_'):
|
||||
# Extrahiere den Gruppennamen aus dem Spaltennamen
|
||||
gruppe_name = expected_col.replace('Gruppe_', '')
|
||||
single_row_data[expected_col] = 1 if gruppe_name == branchen_gruppe else 0
|
||||
for expected_feature in self._expected_features:
|
||||
if expected_feature.startswith('Gruppe_'):
|
||||
gruppe_name_from_column = expected_feature.replace('Gruppe_', '')
|
||||
data_for_prediction[expected_feature] = 1 if gruppe_name_from_column == branchen_gruppe else 0
|
||||
|
||||
# Erstelle den finalen DataFrame in der korrekten Spaltenreihenfolge
|
||||
df_for_prediction = pd.DataFrame([single_row_data], columns=self._expected_features)
|
||||
df_processed = pd.DataFrame([data_for_prediction], columns=self._expected_features)
|
||||
|
||||
# 6. Vorhersage mit der Pipeline durchführen
|
||||
# Die Pipeline kümmert sich um die Imputation
|
||||
prediction_proba = self.model.predict_proba(df_for_prediction)
|
||||
# 6. Vorhersage durchführen
|
||||
# self.model ist die komplette Pipeline, die die Imputation intern durchführt
|
||||
prediction_proba = self.model.predict_proba(df_processed)
|
||||
predicted_bucket_label = self.model.classes_[np.argmax(prediction_proba[0])]
|
||||
|
||||
self.logger.debug(f" -> ML Vorhersage Ergebnis: '{predicted_bucket_label}'")
|
||||
@@ -5094,19 +5087,6 @@ class DataProcessor:
|
||||
self.logger.exception(f"FEHLER bei der ML-Vorhersage für Zeile ({company_name[:50]}...): {e_predict}")
|
||||
return f"FEHLER Schaetzung: {str(e_predict)[:100]}..."
|
||||
|
||||
prediction_proba = self.model.predict_proba(df_imputed_array)
|
||||
predicted_bucket_label = self.model.classes_[
|
||||
np.argmax(prediction_proba[0])]
|
||||
|
||||
self.logger.debug(
|
||||
f" -> ML Vorhersage Ergebnis: '{predicted_bucket_label}'")
|
||||
return predicted_bucket_label
|
||||
|
||||
except Exception as e_predict:
|
||||
self.logger.exception(
|
||||
f"FEHLER bei der ML-Vorhersage für Zeile ({company_name[:50]}...): {e_predict}")
|
||||
return f"FEHLER Schaetzung: {str(e_predict)[:100]}..."
|
||||
|
||||
def _load_ml_model(self, model_path, imputer_path):
|
||||
"""
|
||||
Laedt das trainierte ML-Modell, den Imputer und die Feature-Liste.
|
||||
|
||||
Reference in New Issue
Block a user