data_processor.py aktualisiert
This commit is contained in:
@@ -5075,45 +5075,78 @@ class DataProcessor:
|
|||||||
ma_for_pred) and ma_for_pred > 0:
|
ma_for_pred) and ma_for_pred > 0:
|
||||||
umsatz_pro_ma_val = umsatz_for_pred / ma_for_pred
|
umsatz_pro_ma_val = umsatz_for_pred / ma_for_pred
|
||||||
|
|
||||||
# 4. Branchen-Feature holen
|
def _predict_technician_bucket(self, row_data):
|
||||||
# Wichtig: Hier die gleiche Branchenspalte wie im Training
|
"""
|
||||||
# verwenden!
|
Führt eine Vorhersage des Servicetechniker-Buckets für eine einzelne Zeile durch.
|
||||||
branche_val_str = self._get_cell_value_safe(
|
Die Feature-Erstellung ist exakt auf den Trainingsprozess abgestimmt.
|
||||||
row_data, "CRM Branche")
|
"""
|
||||||
|
company_name = self._get_cell_value_safe(row_data, 'CRM Name').strip()
|
||||||
|
self.logger.debug(f"Versuche ML-Schaetzung fuer Zeile ({company_name[:50]}...)")
|
||||||
|
|
||||||
# DataFrame mit einer Zeile und den internen Namen (wie in
|
if not self.is_setup_complete or self.model is None or self.imputer is None or self._expected_features is None:
|
||||||
# prepare_data_for_modeling) erstellen
|
self.logger.error("ML-Artefakte (Modell/Imputer/Features) nicht initialisiert. Überspringe Vorhersage.")
|
||||||
single_row_dict = {
|
return "FEHLER Schaetzung (Setup fehlt)"
|
||||||
'Log_Finaler_Umsatz_ML': [log_umsatz_val],
|
|
||||||
'Log_Finaler_Mitarbeiter_ML': [log_ma_val],
|
try:
|
||||||
'Umsatz_pro_MA_ML': [umsatz_pro_ma_val],
|
# === Feature Erstellung (exakt wie im Training) ===
|
||||||
'is_part_of_group': [is_group_val],
|
|
||||||
'branche_crm': [
|
|
||||||
str(branche_val_str).strip() if branche_val_str else 'Unbekannt']}
|
|
||||||
df_single_row = pd.DataFrame.from_dict(single_row_dict)
|
|
||||||
|
|
||||||
# One-Hot Encoding
|
# 1. Numerische Werte holen
|
||||||
df_encoded = pd.get_dummies(
|
umsatz_val = get_numeric_filter_value(self._get_cell_value_safe(row_data, "Finaler Umsatz (Wiki>CRM)"), is_umsatz=True)
|
||||||
df_single_row,
|
ma_val = get_numeric_filter_value(self._get_cell_value_safe(row_data, "Finaler Mitarbeiter (Wiki>CRM)"), is_umsatz=False)
|
||||||
columns=['branche_crm'],
|
|
||||||
prefix='Branche',
|
umsatz_val = np.nan if umsatz_val == 0 else umsatz_val
|
||||||
dummy_na=False)
|
ma_val = np.nan if ma_val == 0 else ma_val
|
||||||
|
|
||||||
# Angleichung an die im Training verwendeten Features
|
# 2. 'is_part_of_group' Feature
|
||||||
# Erstelle einen DataFrame mit einer Zeile und den erwarteten
|
parent_d = self._get_cell_value_safe(row_data, "Parent Account Name").strip().lower()
|
||||||
# Spalten
|
parent_o = self._get_cell_value_safe(row_data, "System Vorschlag Parent Account").strip().lower()
|
||||||
data_for_df_processed = {col: [0]
|
parent_p = self._get_cell_value_safe(row_data, "Parent Vorschlag Status").strip().lower()
|
||||||
for col in self._expected_features}
|
is_group = 1 if (parent_d and parent_d != 'k.a.') or (parent_o and parent_o != 'k.a.' and parent_p == 'x') else 0
|
||||||
for col in self._expected_features:
|
|
||||||
if col in df_encoded.columns:
|
|
||||||
data_for_df_processed[col] = [df_encoded[col].iloc[0]]
|
|
||||||
|
|
||||||
df_processed = pd.DataFrame(
|
# 3. Ratio & Log Features
|
||||||
data_for_df_processed,
|
log_umsatz = np.log1p(umsatz_val) if pd.notna(umsatz_val) else np.nan
|
||||||
columns=self._expected_features)
|
log_ma = np.log1p(ma_val) if pd.notna(ma_val) else np.nan
|
||||||
|
umsatz_pro_ma = (umsatz_val / ma_val) if pd.notna(umsatz_val) and pd.notna(ma_val) and ma_val > 0 else np.nan
|
||||||
|
|
||||||
# Imputation und Vorhersage
|
# 4. Branchen-Gruppen-Feature (entscheidende Korrektur)
|
||||||
df_imputed_array = self.imputer.transform(df_processed)
|
# Nutze die KI-Branche als Input
|
||||||
|
branche_ki_val = self._get_cell_value_safe(row_data, "Chat Vorschlag Branche")
|
||||||
|
|
||||||
|
# Erstelle das Mapping von Detail-Branche zu Gruppe
|
||||||
|
branch_group_map = {branch_name: details.get('gruppe', 'Sonstige') for branch_name, details in Config.BRANCH_GROUP_MAPPING.items()}
|
||||||
|
|
||||||
|
# Führe das Mapping durch
|
||||||
|
branchen_gruppe = branch_group_map.get(branche_ki_val, 'Sonstige')
|
||||||
|
|
||||||
|
# 5. DataFrame mit allen möglichen Features erstellen (wie im Training)
|
||||||
|
single_row_data = {
|
||||||
|
'Log_Finaler_Umsatz_ML': log_umsatz,
|
||||||
|
'Log_Finaler_Mitarbeiter_ML': log_ma,
|
||||||
|
'Umsatz_pro_MA_ML': umsatz_pro_ma,
|
||||||
|
'is_part_of_group': is_group
|
||||||
|
}
|
||||||
|
|
||||||
|
# Füge die One-Hot-Encoded Branchen-Gruppen hinzu
|
||||||
|
for expected_col in self._expected_features:
|
||||||
|
if expected_col.startswith('Gruppe_'):
|
||||||
|
# Extrahiere den Gruppennamen aus dem Spaltennamen
|
||||||
|
gruppe_name = expected_col.replace('Gruppe_', '')
|
||||||
|
single_row_data[expected_col] = 1 if gruppe_name == branchen_gruppe else 0
|
||||||
|
|
||||||
|
# Erstelle den finalen DataFrame in der korrekten Spaltenreihenfolge
|
||||||
|
df_for_prediction = pd.DataFrame([single_row_data], columns=self._expected_features)
|
||||||
|
|
||||||
|
# 6. Vorhersage mit der Pipeline durchführen
|
||||||
|
# Die Pipeline kümmert sich um die Imputation
|
||||||
|
prediction_proba = self.model.predict_proba(df_for_prediction)
|
||||||
|
predicted_bucket_label = self.model.classes_[np.argmax(prediction_proba[0])]
|
||||||
|
|
||||||
|
self.logger.debug(f" -> ML Vorhersage Ergebnis: '{predicted_bucket_label}'")
|
||||||
|
return predicted_bucket_label
|
||||||
|
|
||||||
|
except Exception as e_predict:
|
||||||
|
self.logger.exception(f"FEHLER bei der ML-Vorhersage für Zeile ({company_name[:50]}...): {e_predict}")
|
||||||
|
return f"FEHLER Schaetzung: {str(e_predict)[:100]}..."
|
||||||
|
|
||||||
prediction_proba = self.model.predict_proba(df_imputed_array)
|
prediction_proba = self.model.predict_proba(df_imputed_array)
|
||||||
predicted_bucket_label = self.model.classes_[
|
predicted_bucket_label = self.model.classes_[
|
||||||
|
|||||||
Reference in New Issue
Block a user