bugfix
This commit is contained in:
@@ -104,6 +104,8 @@ class Config:
|
||||
logger = logging.getLogger(cls.__name__)
|
||||
logger.info("Lade API-Schluessel...")
|
||||
cls.API_KEYS['openai'] = cls._load_key_from_file(cls.API_KEY_FILE)
|
||||
cls.API_KEYS['serpapi'] = cls._load_key_from_file(cls.SERP_API_KEY_FILE)
|
||||
cls.API_KEYS['genderize'] = cls._load_key_from_file(cls.GENDERIZE_API_KEY_FILE)
|
||||
if cls.API_KEYS.get('openai'):
|
||||
openai.api_key = cls.API_KEYS['openai']
|
||||
logger.info("OpenAI API Key erfolgreich geladen.")
|
||||
@@ -129,8 +131,8 @@ class Config:
|
||||
# 3. GLOBALE HILFSFUNKTIONEN
|
||||
# ==============================================================================
|
||||
|
||||
TARGET_SCHEMA_STRING = "" # Wird von load_target_schema befüllt
|
||||
FOCUS_BRANCHES_PROMPT_PART = "" # Wird von load_target_schema befüllt
|
||||
TARGET_SCHEMA_STRING = ""
|
||||
FOCUS_BRANCHES_PROMPT_PART = ""
|
||||
|
||||
def normalize_for_mapping(text):
|
||||
if not isinstance(text, str): return ""
|
||||
@@ -151,13 +153,17 @@ def load_branch_mapping(file_path=Config.BRANCH_MAPPING_FILE):
|
||||
if not all(col in df_mapping.columns for col in expected_cols):
|
||||
logger.error(f"FEHLER: Spalten {expected_cols} in '{file_path}' nicht gefunden. Gefunden: {list(df_mapping.columns)}")
|
||||
return {}
|
||||
df_mapping['normalized_keys'] = df_mapping['Branch'].apply(normalize_for_mapping)
|
||||
if df_mapping['normalized_keys'].duplicated().any():
|
||||
duplicates = df_mapping[df_mapping['normalized_keys'].duplicated()]['normalized_keys']
|
||||
logger.warning(f"WARNUNG: Duplikate in normalisierten Branchen-Keys: {list(duplicates)}")
|
||||
branch_map_dict = pd.Series(
|
||||
df_mapping['Branch Group'].str.strip().values,
|
||||
index=df_mapping['Branch'].apply(normalize_for_mapping)
|
||||
index=df_mapping['normalized_keys']
|
||||
).to_dict()
|
||||
logger.info(f"Branchen-Mapping aus '{file_path}' erfolgreich geladen ({len(branch_map_dict)} Einträge).")
|
||||
return branch_map_dict
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.error(f"FATALER FEHLER beim Laden der Branchen-Mapping-Datei '{file_path}':\n{traceback.format_exc()}")
|
||||
return {}
|
||||
|
||||
@@ -165,39 +171,68 @@ def load_target_schema(csv_filepath=Config.SCHEMA_FILE):
|
||||
logger = logging.getLogger(__name__)
|
||||
global TARGET_SCHEMA_STRING, FOCUS_BRANCHES_PROMPT_PART
|
||||
logger.info(f"Lade Ziel-Schema und Fokus-Branchen aus '{csv_filepath}'...")
|
||||
ziel_schema, fokus_branchen = [], []
|
||||
if not os.path.exists(csv_filepath):
|
||||
logger.error(f"DATEI NICHT GEFUNDEN: '{os.path.abspath(csv_filepath)}'.")
|
||||
return [], []
|
||||
try:
|
||||
allowed_branches_set, focus_branches_set = set(), set()
|
||||
with open(csv_filepath, "r", encoding="utf-8-sig") as f:
|
||||
reader = csv.reader(f, delimiter=';')
|
||||
next(reader) # Header überspringen
|
||||
next(reader)
|
||||
for row in reader:
|
||||
if row and len(row) >= 1 and row[0].strip():
|
||||
target_branch = row[0].strip()
|
||||
ziel_schema.append(target_branch)
|
||||
allowed_branches_set.add(target_branch)
|
||||
if len(row) >= 2 and row[1].strip().upper() in ["X", "FOKUS", "JA", "TRUE", "1"]:
|
||||
fokus_branchen.append(target_branch)
|
||||
ALLOWED_TARGET_BRANCHES = sorted(list(set(ziel_schema)), key=str.lower)
|
||||
FOCUS_TARGET_BRANCHES = sorted(list(set(fokus_branchen)), key=str.lower)
|
||||
focus_branches_set.add(target_branch)
|
||||
ALLOWED_TARGET_BRANCHES = sorted(list(allowed_branches_set))
|
||||
FOCUS_TARGET_BRANCHES = sorted(list(focus_branches_set))
|
||||
logger.info(f"Ziel-Schema geladen: {len(ALLOWED_TARGET_BRANCHES)} Branchen, davon {len(FOCUS_TARGET_BRANCHES)} Fokusbranchen.")
|
||||
if ALLOWED_TARGET_BRANCHES:
|
||||
# ... (Logik zum Erstellen von TARGET_SCHEMA_STRING und FOCUS_BRANCHES_PROMPT_PART) ...
|
||||
pass # Platzhalter, Ihre Logik hier war in Ordnung
|
||||
# Hier Ihre Logik zum Erstellen der Prompt-Strings
|
||||
schema_lines = ["..."] # Platzhalter für Ihre Logik
|
||||
TARGET_SCHEMA_STRING = "\n".join(schema_lines)
|
||||
if FOCUS_TARGET_BRANCHES:
|
||||
focus_prompt_lines = ["..."] # Platzhalter
|
||||
FOCUS_BRANCHES_PROMPT_PART = "\n".join(focus_prompt_lines)
|
||||
else:
|
||||
FOCUS_BRANCHES_PROMPT_PART = ""
|
||||
return ALLOWED_TARGET_BRANCHES, FOCUS_TARGET_BRANCHES
|
||||
except Exception:
|
||||
logger.error(f"FEHLER beim Laden der Schema-Datei '{csv_filepath}':\n{traceback.format_exc()}")
|
||||
return [], []
|
||||
|
||||
def parse_arguments():
|
||||
# ... (Ihre parse_arguments-Funktion von vorher, die jetzt Config.XYZ verwendet) ...
|
||||
parser = argparse.ArgumentParser(description=f"Unternehmensbewertung {Config.VERSION}")
|
||||
# ...
|
||||
"""Parst und validiert Kommandozeilen-Argumente."""
|
||||
parser = argparse.ArgumentParser(description=f"Unternehmensbewertung {Config.VERSION}", formatter_class=argparse.RawTextHelpFormatter)
|
||||
mode_categories = {
|
||||
"Sequentielle Verarbeitung (Zeilenweise)": ["full_run"],
|
||||
"Re-Evaluate Markierte Zeilen (Spalte A='x')": ["reeval"],
|
||||
"Einzelne Dienstprogramme / Suchen": [
|
||||
"find_wiki_serp", "website_lookup", "check_urls", "contacts", "update_wiki_suggestions",
|
||||
"wiki_reextract_missing_an", "website_details", "train_technician_model", "alignment",
|
||||
"reparatur_sitz", "plausi_check_data", "branch_eval", "suggest_parents", "analyze_ml_by_branch"
|
||||
],
|
||||
"Kombinierte Laeufe (Vordefiniert)": ["combined_all"]
|
||||
}
|
||||
valid_modes = [mode for modes in mode_categories.values() for mode in modes]
|
||||
mode_help_text = "Betriebsmodus. Waehlen Sie einen der folgenden:\n"
|
||||
for category, modes in mode_categories.items():
|
||||
mode_help_text += f"\n{category}:\n"
|
||||
for mode in modes:
|
||||
mode_help_text += f" - {mode}\n"
|
||||
parser.add_argument("--mode", type=str, help=mode_help_text)
|
||||
parser.add_argument("--limit", type=int, help="Maximale Anzahl zu verarbeitender Zeilen.", default=None)
|
||||
parser.add_argument("--start_sheet_row", type=int, help="Startzeile im Sheet (1-basiert).", default=None)
|
||||
parser.add_argument("--end_sheet_row", type=int, help="Endzeile im Sheet (1-basiert).", default=None)
|
||||
valid_single_row_steps = ['wiki', 'chat', 'web', 'ml_predict']
|
||||
default_steps_arg = ','.join(valid_single_row_steps)
|
||||
parser.add_argument("--steps", type=str, help=f"Schritte im 'reeval'/'full_run' Modus. Möglich: {valid_single_row_steps}", default=default_steps_arg)
|
||||
parser.add_argument("--min_umsatz", type=float, help="Mindestumsatz in MIO € für find_wiki_serp.", default=Config.PLAUSI_UMSATZ_MIN_SCHWELLE_FUER_MA_CHECK / 1_000_000)
|
||||
parser.add_argument("--min_employees", type=int, help="Mindestmitarbeiterzahl für find_wiki_serp.", default=500)
|
||||
parser.add_argument("--model_out", type=str, default=Config.MODEL_FILE, help="Pfad für trainiertes Modell.")
|
||||
parser.add_argument("--imputer_out", type=str, default=Config.IMPUTER_FILE, help="Pfad für Imputer.")
|
||||
parser.add_argument("--patterns_out", type=str, default=Config.PATTERNS_FILE_JSON, help="Pfad für Feature-Patterns.")
|
||||
# ... alle anderen Argumente
|
||||
return parser.parse_args()
|
||||
|
||||
# --- Globale Spalten-Mapping (WICHTIG: MUSS ZU IHREM SHEET PASSEN!) ---
|
||||
|
||||
Reference in New Issue
Block a user