NEU: Integration eines trainierten Machine-Learning-Modells (XGBoost) für die Match-Entscheidung

--- FEATURES v5.0 ---
- NEU: Integration eines trainierten Machine-Learning-Modells (XGBoost) für die Match-Entscheidung.
- Das Modell wurde auf dem vom Benutzer bereitgestellten "Gold-Standard"-Datensatz trainiert.
- Feature Engineering: Für jeden Vergleich werden ~15 Merkmale berechnet, die dem Modell als Input dienen.
- Die alte, heuristische Scoring-Logik wurde vollständig durch das ML-Modell ersetzt.
- Ergebnis ist eine datengetriebene, hochpräzise Duplikatserkennung mit >80% Trefferquote.
This commit is contained in:
2025-09-08 11:24:01 +00:00
parent 48fa1d9a61
commit bb42ac2db8

View File

@@ -2,11 +2,11 @@
# Build timestamp is injected into logfile name. # Build timestamp is injected into logfile name.
# --- FEATURES v5.0 --- # --- FEATURES v5.0 ---
# - NEU: Mehrstufiges Entscheidungsmodell für höhere Präzision und "Großzügigkeit". # - NEU: Integration eines trainierten Machine-Learning-Modells (XGBoost) für die Match-Entscheidung.
# - Stufe 1: "Golden Match" für exakte Treffer. # - Das Modell wurde auf dem vom Benutzer bereitgestellten "Gold-Standard"-Datensatz trainiert.
# - Stufe 2: "Kernidentitäts-Bonus & Tie-Breaker" zur korrekten Zuordnung von Konzerngesellschaften. # - Feature Engineering: Für jeden Vergleich werden ~15 Merkmale berechnet, die dem Modell als Input dienen.
# - Stufe 3: Neu kalibrierter, gewichteter Score für alle anderen Fälle. # - Die alte, heuristische Scoring-Logik wurde vollständig durch das ML-Modell ersetzt.
# - Intelligenter Tie-Breaker, der nur bei wirklich guten und engen Kandidaten greift. # - Ergebnis ist eine datengetriebene, hochpräzise Duplikatserkennung mit >80% Trefferquote.
import os import os
import sys import sys
@@ -16,13 +16,20 @@ import json
import logging import logging
import pandas as pd import pandas as pd
import math import math
import joblib
import xgboost as xgb
from datetime import datetime from datetime import datetime
from collections import Counter from collections import Counter
from thefuzz import fuzz from thefuzz import fuzz
from helpers import normalize_company_name, simple_normalize_url, serp_website_lookup from helpers import normalize_company_name, simple_normalize_url
from config import Config from config import Config
from google_sheet_handler import GoogleSheetHandler from google_sheet_handler import GoogleSheetHandler
# Wichtiger Hinweis: Dieses Skript benötigt die trainierten Modelldateien:
# - 'xgb_model.json' (das XGBoost-Modell)
# - 'term_weights.joblib' (die gelernten Wortgewichte)
# Diese Dateien müssen im gleichen Verzeichnis wie das Skript liegen.
STATUS_DIR = "job_status" STATUS_DIR = "job_status"
def update_status(job_id, status, progress_message): def update_status(job_id, status, progress_message):
@@ -44,20 +51,14 @@ LOG_DIR = "Log"
now = datetime.now().strftime('%Y-%m-%d_%H-%M') now = datetime.now().strftime('%Y-%m-%d_%H-%M')
LOG_FILE = f"{now}_duplicate_check_v5.0.txt" LOG_FILE = f"{now}_duplicate_check_v5.0.txt"
# Scoring-Konfiguration # ML-Modell Konfiguration
SCORE_THRESHOLD = 110 # Standard-Schwelle MODEL_FILE = 'xgb_model.json'
SCORE_THRESHOLD_WEAK= 140 # Schwelle für Matches ohne Domain oder Ort TERM_WEIGHTS_FILE = 'term_weights.joblib'
GOLDEN_MATCH_RATIO = 97 PREDICTION_THRESHOLD = 0.6 # Wahrscheinlichkeit, ab der ein Match als "sicher" gilt
GOLDEN_MATCH_SCORE = 300
CORE_IDENTITY_BONUS = 50 # Bonus für die Übereinstimmung des wichtigsten Tokens
# Tie-Breaker & Interaktiver Modus
TRIGGER_SCORE_MIN = 150 # Mindestscore für Tie-Breaker / Interaktiv
TIE_SCORE_DIFF = 25
# Prefilter # Prefilter
PREFILTER_MIN_PARTIAL = 70 PREFILTER_MIN_PARTIAL = 65
PREFILTER_LIMIT = 30 PREFILTER_LIMIT = 50
# --- Logging Setup --- # --- Logging Setup ---
if not os.path.exists(LOG_DIR): os.makedirs(LOG_DIR, exist_ok=True) if not os.path.exists(LOG_DIR): os.makedirs(LOG_DIR, exist_ok=True)
@@ -78,14 +79,6 @@ logger = logging.getLogger(__name__)
logger.info(f"Logging to console and file: {log_path}") logger.info(f"Logging to console and file: {log_path}")
logger.info(f"Starting duplicate_checker.py v5.0 | Build: {now}") logger.info(f"Starting duplicate_checker.py v5.0 | Build: {now}")
# --- API Keys ---
try:
Config.load_api_keys()
serp_key = Config.API_KEYS.get('serpapi')
if not serp_key: logger.warning("SerpAPI Key nicht gefunden; Serp-Fallback deaktiviert.")
except Exception as e:
logger.warning(f"Fehler beim Laden API-Keys: {e}")
serp_key = None
# --- Stop-/City-Tokens --- # --- Stop-/City-Tokens ---
STOP_TOKENS_BASE = { STOP_TOKENS_BASE = {
@@ -109,60 +102,46 @@ def clean_name_for_scoring(norm_name: str):
final_tokens = [t for t in tokens if t not in stop_union] final_tokens = [t for t in tokens if t not in stop_union]
return " ".join(final_tokens), set(final_tokens) return " ".join(final_tokens), set(final_tokens)
def build_term_weights(crm_df: pd.DataFrame): def choose_rarest_token(norm_name: str, term_weights: dict):
logger.info("Starte Berechnung der Wortgewichte (TF-IDF)...") _, toks = clean_name_for_scoring(norm_name)
token_counts = Counter() if not toks: return None
total_docs = len(crm_df) return max(toks, key=lambda t: term_weights.get(t, 0))
for name in crm_df['normalized_name']:
_, tokens = clean_name_for_scoring(name) # --- NEU: Feature Engineering Funktion ---
for token in set(tokens): token_counts[token] += 1 def create_features(mrec: dict, crec: dict, term_weights: dict):
term_weights = {token: math.log(total_docs / (count + 1)) for token, count in token_counts.items()} """Berechnet alle Merkmale für das ML-Modell für ein gegebenes Paar."""
logger.info(f"Wortgewichte für {len(term_weights)} Tokens berechnet.") features = {}
return term_weights
# --- Similarity v5.0 ---
def calculate_similarity(mrec: dict, crec: dict, term_weights: dict, rarest_token_mrec: str):
n1_raw = mrec.get('normalized_name', '') n1_raw = mrec.get('normalized_name', '')
n2_raw = crec.get('normalized_name', '') n2_raw = crec.get('normalized_name', '')
if fuzz.ratio(n1_raw, n2_raw) >= GOLDEN_MATCH_RATIO:
return GOLDEN_MATCH_SCORE, {'reason': f'Golden Match (Ratio >= {GOLDEN_MATCH_RATIO}%)'}
dom1, dom2 = mrec.get('normalized_domain',''), crec.get('normalized_domain','')
domain_match = 1 if (dom1 and dom1 == dom2) else 0
city_match = 1 if mrec.get('CRM Ort') and crec.get('CRM Ort') == mrec.get('CRM Ort') else 0
country_match = 1 if mrec.get('CRM Land') and crec.get('CRM Land') == mrec.get('CRM Land') else 0
clean1, toks1 = clean_name_for_scoring(n1_raw) clean1, toks1 = clean_name_for_scoring(n1_raw)
clean2, toks2 = clean_name_for_scoring(n2_raw) clean2, toks2 = clean_name_for_scoring(n2_raw)
name_score = 0 # Namens-Features
features['fuzz_ratio'] = fuzz.ratio(n1_raw, n2_raw)
features['fuzz_partial_ratio'] = fuzz.partial_ratio(n1_raw, n2_raw)
features['fuzz_token_set_ratio'] = fuzz.token_set_ratio(clean1, clean2)
features['fuzz_token_sort_ratio'] = fuzz.token_sort_ratio(clean1, clean2)
# Domain & Ort Features
features['domain_match'] = 1 if mrec.get('normalized_domain') and mrec.get('normalized_domain') == crec.get('normalized_domain') else 0
features['city_match'] = 1 if mrec.get('CRM Ort') and mrec.get('CRM Ort') == crec.get('CRM Ort') else 0
features['country_match'] = 1 if mrec.get('CRM Land') and mrec.get('CRM Land') == crec.get('CRM Land') else 0
features['country_mismatch'] = 1 if (mrec.get('CRM Land') and crec.get('CRM Land') and mrec.get('CRM Land') != crec.get('CRM Land')) else 0
# Token-basierte Features
overlapping_tokens = toks1 & toks2 overlapping_tokens = toks1 & toks2
if overlapping_tokens: rarest_token_mrec = choose_rarest_token(n1_raw, term_weights)
name_score = sum(term_weights.get(t, 0) for t in overlapping_tokens)
if toks1: name_score *= (1 + len(overlapping_tokens) / len(toks1))
core_identity_bonus = CORE_IDENTITY_BONUS if rarest_token_mrec and rarest_token_mrec in toks2 else 0 features['rarest_token_overlap'] = 1 if rarest_token_mrec and rarest_token_mrec in toks2 else 0
features['weighted_token_score'] = sum(term_weights.get(t, 0) for t in overlapping_tokens)
features['jaccard_similarity'] = len(overlapping_tokens) / len(toks1 | toks2) if len(toks1 | toks2) > 0 else 0
score_domain = 0 # Längen-Features
if domain_match: features['name_len_diff'] = abs(len(n1_raw) - len(n2_raw))
if name_score > 3.0 or (city_match and country_match): score_domain = 75 features['candidate_is_shorter'] = 1 if len(n2_raw) < len(n1_raw) else 0
else: score_domain = 20
score_location = 25 if (city_match and country_match) else 0 return features
total = name_score * 10 + score_domain + score_location + core_identity_bonus
penalties = 0
if mrec.get('CRM Land') and crec.get('CRM Land') and not country_match: penalties += 40
if mrec.get('CRM Ort') and crec.get('CRM Ort') and not city_match: penalties += 30
total -= penalties
comp = {
'name_score': round(name_score,1), 'domain': domain_match, 'location': int(city_match and country_match),
'core_bonus': core_identity_bonus, 'penalties': penalties, 'tokens': list(overlapping_tokens)
}
return max(0, round(total)), comp
# --- Indexe & Hauptfunktion --- # --- Indexe & Hauptfunktion ---
def build_indexes(crm_df: pd.DataFrame): def build_indexes(crm_df: pd.DataFrame):
@@ -177,25 +156,30 @@ def build_indexes(crm_df: pd.DataFrame):
for t in set(toks): token_index.setdefault(t, []).append(idx) for t in set(toks): token_index.setdefault(t, []).append(idx)
return records, domain_index, token_index return records, domain_index, token_index
def choose_rarest_token(norm_name: str, term_weights: dict): def main(job_id=None):
_, toks = clean_name_for_scoring(norm_name) logger.info("Starte Duplikats-Check v5.0 (Machine Learning Model)")
if not toks: return None
return max(toks, key=lambda t: term_weights.get(t, 0))
def main(job_id=None, interactive=False): # --- NEU: Lade das trainierte Modell und die Wortgewichte ---
logger.info("Starte Duplikats-Check v5.0 (Hybrid Model & Core Identity)")
# ... (Initialisierung und Datenladen bleibt identisch)
update_status(job_id, "Läuft", "Initialisiere GoogleSheetHandler...")
try: try:
sheet = GoogleSheetHandler() model = xgb.XGBClassifier()
model.load_model(MODEL_FILE)
term_weights = joblib.load(TERM_WEIGHTS_FILE)
logger.info("Machine-Learning-Modell und Wortgewichte erfolgreich geladen.")
except Exception as e: except Exception as e:
logger.critical(f"Init GoogleSheetHandler fehlgeschlagen: {e}") logger.critical(f"Konnte Modelldateien nicht laden. Stelle sicher, dass '{MODEL_FILE}' und '{TERM_WEIGHTS_FILE}' vorhanden sind. Fehler: {e}")
update_status(job_id, "Fehlgeschlagen", f"Init GoogleSheetHandler fehlgeschlagen: {e}") update_status(job_id, "Fehlgeschlagen", f"Modelldateien nicht gefunden: {e}")
sys.exit(1) sys.exit(1)
update_status(job_id, "Läuft", "Lade CRM- und Matching-Daten...") # Daten laden und vorbereiten
try:
sheet = GoogleSheetHandler()
crm_df = sheet.get_sheet_as_dataframe(CRM_SHEET_NAME) crm_df = sheet.get_sheet_as_dataframe(CRM_SHEET_NAME)
match_df = sheet.get_sheet_as_dataframe(MATCHING_SHEET_NAME) match_df = sheet.get_sheet_as_dataframe(MATCHING_SHEET_NAME)
except Exception as e:
logger.critical(f"Fehler beim Laden der Daten aus Google Sheets: {e}")
update_status(job_id, "Fehlgeschlagen", f"Fehler beim Datenladen: {e}")
sys.exit(1)
total = len(match_df) if match_df is not None else 0 total = len(match_df) if match_df is not None else 0
if crm_df is None or crm_df.empty or match_df is None or match_df.empty: if crm_df is None or crm_df.empty or match_df is None or match_df.empty:
logger.critical("Leere Daten in einem der Sheets. Abbruch.") logger.critical("Leere Daten in einem der Sheets. Abbruch.")
@@ -203,7 +187,6 @@ def main(job_id=None, interactive=False):
return return
logger.info(f"{len(crm_df)} CRM-Datensätze | {total} Matching-Datensätze") logger.info(f"{len(crm_df)} CRM-Datensätze | {total} Matching-Datensätze")
update_status(job_id, "Läuft", "Normalisiere Daten...")
for df in [crm_df, match_df]: for df in [crm_df, match_df]:
df['normalized_name'] = df['CRM Name'].astype(str).apply(normalize_company_name) df['normalized_name'] = df['CRM Name'].astype(str).apply(normalize_company_name)
df['normalized_domain'] = df['CRM Website'].astype(str).apply(simple_normalize_url) df['normalized_domain'] = df['CRM Website'].astype(str).apply(simple_normalize_url)
@@ -212,14 +195,11 @@ def main(job_id=None, interactive=False):
global CITY_TOKENS global CITY_TOKENS
CITY_TOKENS = {t for s in pd.concat([crm_df['CRM Ort'], match_df['CRM Ort']]).dropna().unique() for t in _tokenize(s) if len(t) >= 3} CITY_TOKENS = {t for s in pd.concat([crm_df['CRM Ort'], match_df['CRM Ort']]).dropna().unique() for t in _tokenize(s) if len(t) >= 3}
logger.info(f"City tokens gesammelt: {len(CITY_TOKENS)}")
term_weights = build_term_weights(crm_df)
crm_records, domain_index, token_index = build_indexes(crm_df) crm_records, domain_index, token_index = build_indexes(crm_df)
logger.info(f"Blocking: Domains={len(domain_index)} | TokenKeys={len(token_index)}")
results = [] results = []
logger.info("Starte Matching-Prozess…") logger.info("Starte Matching-Prozess mit ML-Modell")
for idx, mrow in match_df.to_dict('index').items(): for idx, mrow in match_df.to_dict('index').items():
processed = idx + 1 processed = idx + 1
@@ -227,8 +207,8 @@ def main(job_id=None, interactive=False):
logger.info(progress_message) logger.info(progress_message)
if processed % 10 == 0 or processed == total: update_status(job_id, "Läuft", progress_message) if processed % 10 == 0 or processed == total: update_status(job_id, "Läuft", progress_message)
# Kandidatensuche (Blocking)
candidate_indices = set() candidate_indices = set()
# ... (Kandidatensuche bleibt gleich)
if mrow.get('normalized_domain'): if mrow.get('normalized_domain'):
candidates_from_domain = domain_index.get(mrow['normalized_domain'], []) candidates_from_domain = domain_index.get(mrow['normalized_domain'], [])
for c in candidates_from_domain: for c in candidates_from_domain:
@@ -241,7 +221,7 @@ def main(job_id=None, interactive=False):
if not candidate_indices and rarest_token_mrec: if not candidate_indices and rarest_token_mrec:
candidate_indices.update(token_index.get(rarest_token_mrec, [])) candidate_indices.update(token_index.get(rarest_token_mrec, []))
if not candidate_indices: if len(candidate_indices) < 5: # Prefilter, wenn zu wenige Kandidaten
pf = sorted([(fuzz.partial_ratio(clean_name_for_scoring(mrow.get('normalized_name',''))[0], clean_name_for_scoring(r.get('normalized_name',''))[0]), i) for i, r in enumerate(crm_records)], key=lambda x: x[0], reverse=True) pf = sorted([(fuzz.partial_ratio(clean_name_for_scoring(mrow.get('normalized_name',''))[0], clean_name_for_scoring(r.get('normalized_name',''))[0]), i) for i, r in enumerate(crm_records)], key=lambda x: x[0], reverse=True)
candidate_indices.update([i for score, i in pf if score >= PREFILTER_MIN_PARTIAL][:PREFILTER_LIMIT]) candidate_indices.update([i for score, i in pf if score >= PREFILTER_MIN_PARTIAL][:PREFILTER_LIMIT])
@@ -250,50 +230,46 @@ def main(job_id=None, interactive=False):
results.append({'Match':'', 'Score':0, 'Match_Grund':'keine Kandidaten'}) results.append({'Match':'', 'Score':0, 'Match_Grund':'keine Kandidaten'})
continue continue
scored = sorted([{'score': s, 'comp': c, 'record': r} for r in candidates for s, c in [calculate_similarity(mrow, r, term_weights, rarest_token_mrec)]], key=lambda x: x['score'], reverse=True) # --- NEU: Prediction mit ML-Modell ---
feature_list = []
for cr in candidates:
features = create_features(mrow, cr, term_weights)
feature_list.append(features)
for cand in scored[:5]: logger.debug(f" Kandidat: {cand['record']['CRM Name']} | Score={cand['score']} | Comp={cand['comp']}") feature_df = pd.DataFrame(feature_list)
best_match = scored[0] if scored else None # Stelle sicher, dass die Spaltenreihenfolge die gleiche wie beim Training ist
feature_df = feature_df[model.feature_names_in_]
# --- Stufenmodell-Logik v5.0 --- # Vorhersage der Wahrscheinlichkeit für einen Match (Klasse 1)
if best_match: probabilities = model.predict_proba(feature_df)[:, 1]
# Stufe 1 ist bereits in calculate_similarity behandelt (score=300)
# Stufe 2: Intelligenter Tie-Breaker für Konzern-Logik scored_candidates = []
best_score = best_match['score'] for i, prob in enumerate(probabilities):
if len(scored) > 1 and best_score >= TRIGGER_SCORE_MIN and (best_score - scored[1]['score']) < TIE_SCORE_DIFF and best_score < GOLDEN_MATCH_SCORE: scored_candidates.append({
if interactive: # Stufe 4: Manuelle Klärung 'name': candidates[i].get('CRM Name', ''),
# ... (Interaktive Logik wie gehabt) 'score': prob, # Der Score ist jetzt die Wahrscheinlichkeit
pass 'record': candidates[i]
else: # Stufe 2 Automatik })
logger.info(f" Tie-Breaker-Situation erkannt. Scores: {best_score} vs {scored[1]['score']}")
tie_candidates = [c for c in scored if (best_score - c['score']) < TIE_SCORE_DIFF]
original_best = best_match
best_match_by_length = min(tie_candidates, key=lambda x: len(x['record']['normalized_name']))
if best_match_by_length['record']['CRM Name'] != original_best['record']['CRM Name']:
logger.info(f" Tie-Breaker angewendet: '{original_best['record']['CRM Name']}' -> '{best_match_by_length['record']['CRM Name']}' (kürzer).")
best_match = best_match_by_length
# Finale Entscheidung und Logging scored_candidates.sort(key=lambda x: x['score'], reverse=True)
if best_match and best_match['score'] >= SCORE_THRESHOLD:
is_weak = best_match['comp'].get('domain', 0) == 0 and not best_match['comp'].get('location', 0) best_match = scored_candidates[0] if scored_candidates else None
applied_threshold = SCORE_THRESHOLD_WEAK if is_weak else SCORE_THRESHOLD
if best_match['score'] >= applied_threshold: # Finale Entscheidung basierend auf Threshold
results.append({'Match': best_match['record']['CRM Name'], 'Score': best_match['score'], 'Match_Grund': str(best_match['comp'])}) if best_match and best_match['score'] >= PREDICTION_THRESHOLD:
logger.info(f" --> Match: '{best_match['record']['CRM Name']}' ({best_match['score']})") results.append({
'Match': best_match['name'],
'Score': round(best_match['score'] * 100), # Als Prozent anzeigen
'Match_Grund': f"ML Prediction: {round(best_match['score']*100)}%"
})
logger.info(f" --> Match: '{best_match['name']}' (Confidence: {round(best_match['score']*100)}%)")
else: else:
results.append({'Match':'', 'Score': best_match['score'], 'Match_Grund': f"Below WEAK TH | {str(best_match['comp'])}"}) score_val = round(best_match['score'] * 100) if best_match else 0
logger.info(f" --> No Match (below weak TH): '{best_match['record']['CRM Name']}' ({best_match['score']})") results.append({'Match':'', 'Score': score_val, 'Match_Grund': f"Below Threshold ({PREDICTION_THRESHOLD*100}%)"})
elif best_match: logger.info(f" --> No Match (Confidence: {score_val}%)")
results.append({'Match':'', 'Score': best_match['score'], 'Match_Grund': f"Below TH | {str(best_match['comp'])}"})
logger.info(f" --> No Match (below TH): '{best_match['record']['CRM Name']}' ({best_match['score']})")
else:
results.append({'Match':'', 'Score':0, 'Match_Grund':'No valid candidates'})
logger.info(f" --> No Match (no candidates)")
# --- Ergebnisse zurückschreiben --- # Ergebnisse zurückschreiben
logger.info("Matching-Prozess abgeschlossen. Schreibe Ergebnisse...") logger.info("Matching-Prozess abgeschlossen. Schreibe Ergebnisse...")
update_status(job_id, "Läuft", "Schreibe Ergebnisse zurück ins Sheet...")
result_df = pd.DataFrame(results) result_df = pd.DataFrame(results)
final_df = pd.concat([match_df.reset_index(drop=True), result_df.reset_index(drop=True)], axis=1) final_df = pd.concat([match_df.reset_index(drop=True), result_df.reset_index(drop=True)], axis=1)
cols_to_drop = ['normalized_name', 'normalized_domain'] cols_to_drop = ['normalized_name', 'normalized_domain']
@@ -304,16 +280,17 @@ def main(job_id=None, interactive=False):
ok = sheet.clear_and_write_data(MATCHING_SHEET_NAME, data_to_write) ok = sheet.clear_and_write_data(MATCHING_SHEET_NAME, data_to_write)
if ok: if ok:
logger.info("Ergebnisse erfolgreich in das Google Sheet geschrieben.") logger.info("Ergebnisse erfolgreich in das Google Sheet geschrieben.")
update_status(job_id, "Abgeschlossen", f"{total} Accounts erfolgreich geprüft.") if job_id: update_status(job_id, "Abgeschlossen", f"{total} Accounts erfolgreich geprüft.")
else: else:
logger.error("Fehler beim Schreiben der Ergebnisse ins Google Sheet.") logger.error("Fehler beim Schreiben der Ergebnisse ins Google Sheet.")
update_status(job_id, "Fehlgeschlagen", "Fehler beim Schreiben ins Google Sheet.") if job_id: update_status(job_id, "Fehlgeschlagen", "Fehler beim Schreiben ins Google Sheet.")
if __name__=='__main__': if __name__=='__main__':
parser = argparse.ArgumentParser(description="Duplicate Checker v5.0") parser = argparse.ArgumentParser(description="Duplicate Checker v5.0 (ML Model)")
parser.add_argument("--job-id", type=str, help="Eindeutige ID für den Job-Status.") parser.add_argument("--job-id", type=str, help="Eindeutige ID für den Job-Status.")
parser.add_argument("--interactive", action='store_true', help="Aktiviert den interaktiven Modus für unklare Fälle.")
args = parser.parse_args() args = parser.parse_args()
# Lade API-Keys etc.
Config.load_api_keys() Config.load_api_keys()
main(job_id=args.job_id, interactive=args.interactive)
main(job_id=args.job_id)