v1.2.1 - Bugfix Silent Logging & Robuste Konfiguration
- Bugfix: Behebt das Problem, bei dem keine Log-Ausgaben im Terminal oder in der Log-Datei erschienen sind. - Die Skript-Struktur wurde angepasst, um eine robuste `setup_logging`-Funktion zu implementieren, die eine implizite Standard-Konfiguration durch importierte Module erkennt und überschreibt. - Stellt sicher, dass die benutzerdefinierte Logging-Konfiguration (DEBUG-Level, File-Handler) zuverlässig angewendet wird. - Das Skript protokolliert nun alle Prozessschritte wie vorgesehen, insbesondere die Analyse der Branchen-Spezifität.
This commit is contained in:
@@ -1,14 +1,17 @@
|
|||||||
# knowledge_base_builder.py
|
# knowledge_base_builder.py
|
||||||
|
|
||||||
__version__ = "v1.2.0"
|
__version__ = "v1.2.1"
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from google_sheet_handler import GoogleSheetHandler
|
from google_sheet_handler import GoogleSheetHandler
|
||||||
|
from helpers import create_log_filename
|
||||||
from config import Config
|
from config import Config
|
||||||
|
|
||||||
# --- Konfiguration ---
|
# --- Konfiguration ---
|
||||||
@@ -54,10 +57,7 @@ BRANCH_GROUP_RULES = {
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
# --- NEU: Angepasste und konfigurierbare Schwellenwerte ---
|
|
||||||
# Ein Department muss mindestens so viele Einträge haben, um eine Branchen-Regel zu bekommen.
|
|
||||||
MIN_SAMPLES_FOR_BRANCH_RULE = 5
|
MIN_SAMPLES_FOR_BRANCH_RULE = 5
|
||||||
# Wenn >X% der Jobtitel eines Departments in EINER Branchengruppe liegen, gilt es als spezifisch.
|
|
||||||
BRANCH_SPECIFICITY_THRESHOLD = 0.7
|
BRANCH_SPECIFICITY_THRESHOLD = 0.7
|
||||||
|
|
||||||
STOP_WORDS = {
|
STOP_WORDS = {
|
||||||
@@ -69,9 +69,40 @@ STOP_WORDS = {
|
|||||||
'cfo', 'cto', 'coo', 'von', 'of', 'und', 'für', 'der', 'die', 'das', '&'
|
'cfo', 'cto', 'coo', 'von', 'of', 'und', 'für', 'der', 'die', 'das', '&'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def setup_logging():
|
||||||
|
"""Konfiguriert das Logging, um sowohl in der Konsole als auch in einer Datei zu loggen."""
|
||||||
|
log_filename = create_log_filename("knowledge_base_builder")
|
||||||
|
if not log_filename:
|
||||||
|
print("KRITISCHER FEHLER: Log-Datei konnte nicht erstellt werden. Logge nur in die Konsole.")
|
||||||
|
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler()])
|
||||||
|
return
|
||||||
|
|
||||||
|
log_level = logging.DEBUG
|
||||||
|
|
||||||
|
root_logger = logging.getLogger()
|
||||||
|
if root_logger.handlers:
|
||||||
|
for handler in root_logger.handlers[:]:
|
||||||
|
root_logger.removeHandler(handler)
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=log_level,
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||||
|
handlers=[
|
||||||
|
logging.FileHandler(log_filename, encoding='utf-8'),
|
||||||
|
logging.StreamHandler()
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
logging.getLogger("gspread").setLevel(logging.WARNING)
|
||||||
|
logging.getLogger("oauth2client").setLevel(logging.WARNING)
|
||||||
|
|
||||||
|
logging.info(f"Logging erfolgreich initialisiert. Log-Datei: {log_filename}")
|
||||||
|
|
||||||
|
|
||||||
def build_knowledge_base():
|
def build_knowledge_base():
|
||||||
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
"""
|
||||||
|
Hauptfunktion zur Erstellung der Wissensbasis.
|
||||||
|
"""
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
logger.info(f"Starte Erstellung der Wissensbasis (Version {__version__})...")
|
logger.info(f"Starte Erstellung der Wissensbasis (Version {__version__})...")
|
||||||
|
|
||||||
@@ -130,7 +161,6 @@ def build_knowledge_base():
|
|||||||
department_branches = branches_by_department.get(department, [])
|
department_branches = branches_by_department.get(department, [])
|
||||||
total_titles_in_dept = len(department_branches)
|
total_titles_in_dept = len(department_branches)
|
||||||
|
|
||||||
# Angepasste Logik mit transparentem Logging
|
|
||||||
if total_titles_in_dept >= MIN_SAMPLES_FOR_BRANCH_RULE:
|
if total_titles_in_dept >= MIN_SAMPLES_FOR_BRANCH_RULE:
|
||||||
branch_group_counts = Counter()
|
branch_group_counts = Counter()
|
||||||
for branch_name in department_branches:
|
for branch_name in department_branches:
|
||||||
@@ -151,7 +181,6 @@ def build_knowledge_base():
|
|||||||
else:
|
else:
|
||||||
logger.debug(f" -> Department '{department}' hat zu wenige Datenpunkte ({total_titles_in_dept} < {MIN_SAMPLES_FOR_BRANCH_RULE}) für eine Branchen-Regel.")
|
logger.debug(f" -> Department '{department}' hat zu wenige Datenpunkte ({total_titles_in_dept} < {MIN_SAMPLES_FOR_BRANCH_RULE}) für eine Branchen-Regel.")
|
||||||
|
|
||||||
|
|
||||||
keyword_rules[department] = rule
|
keyword_rules[department] = rule
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -166,4 +195,5 @@ def build_knowledge_base():
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
setup_logging()
|
||||||
build_knowledge_base()
|
build_knowledge_base()
|
||||||
Reference in New Issue
Block a user