refactor: [30388f42] Strukturiere Root-Skripte thematisch neu

- Organisiert eine Vielzahl von Skripten aus dem Root-Verzeichnis in thematische Unterordner, um die Übersichtlichkeit zu verbessern und die Migration vorzubereiten. - Verschiebt SuperOffice-bezogene Test- und Hilfsskripte in . - Verschiebt Notion-bezogene Synchronisations- und Import-Skripte in . - Archiviert eindeutig veraltete und ungenutzte Skripte in . - Die zentralen Helfer und bleiben im Root, da sie von mehreren Tools als Abhängigkeit genutzt werden.
2026-03-06 10:16:08 +00:00
parent a89d1625d4
commit d021b6b71c
99 changed files with 0 additions and 0 deletions
--- a/ARCHIVE_legacy_scripts/Labyrinth.py
+++ b/ARCHIVE_legacy_scripts/Labyrinth.py
@@ -0,0 +1,167 @@
+import pygame
+import random
+import sys
+import time
+
+# Konfiguration des Labyrinths
+CELL_SIZE = 40
+COLS = 15
+ROWS = 15
+WIDTH = COLS * CELL_SIZE
+HEIGHT = ROWS * CELL_SIZE
+
+# Farben
+WHITE = (255, 255, 255)
+BLACK = (0, 0, 0)
+BLUE = (0, 0, 255)
+GREEN = (0, 255, 0)
+RED = (255, 0, 0)
+
+# Richtungsdefinitionen
+DIRS = {'N': (0, -1), 'S': (0, 1), 'E': (1, 0), 'W': (-1, 0)}
+OPPOSITE = {'N': 'S', 'S': 'N', 'E': 'W', 'W': 'E'}
+
+class Cell:
+    def __init__(self, col, row):
+        self.col = col
+        self.row = row
+        self.walls = {'N': True, 'S': True, 'E': True, 'W': True}
+        self.visited = False
+
+def generate_maze():
+    # Erzeuge ein Gitter von Zellen
+    grid = [[Cell(col, row) for row in range(ROWS)] for col in range(COLS)]
+    
+    stack = []
+    current = grid[0][0]
+    current.visited = True
+    
+    while True:
+        neighbours = []
+        for direction, (dx, dy) in DIRS.items():
+            nx = current.col + dx
+            ny = current.row + dy
+            if 0 <= nx < COLS and 0 <= ny < ROWS:
+                neighbour = grid[nx][ny]
+                if not neighbour.visited:
+                    neighbours.append((direction, neighbour))
+        if neighbours:
+            direction, next_cell = random.choice(neighbours)
+            current.walls[direction] = False
+            next_cell.walls[OPPOSITE[direction]] = False
+            stack.append(current)
+            next_cell.visited = True
+            current = next_cell
+        elif stack:
+            current = stack.pop()
+        else:
+            break
+
+    # Öffnungen: Start links (oben links) und Ziel rechts (unten rechts)
+    grid[0][0].walls['W'] = False
+    grid[COLS - 1][ROWS - 1].walls['E'] = False
+    return grid
+
+def draw_maze(screen, grid):
+    for col in range(COLS):
+        for row in range(ROWS):
+            x = col * CELL_SIZE
+            y = row * CELL_SIZE
+            cell = grid[col][row]
+            # Zeichne Wände
+            if cell.walls['N']:
+                pygame.draw.line(screen, WHITE, (x, y), (x + CELL_SIZE, y), 2)
+            if cell.walls['S']:
+                pygame.draw.line(screen, WHITE, (x, y + CELL_SIZE), (x + CELL_SIZE, y + CELL_SIZE), 2)
+            if cell.walls['E']:
+                pygame.draw.line(screen, WHITE, (x + CELL_SIZE, y), (x + CELL_SIZE, y + CELL_SIZE), 2)
+            if cell.walls['W']:
+                pygame.draw.line(screen, WHITE, (x, y), (x, y + CELL_SIZE), 2)
+
+def main():
+    pygame.init()
+    screen = pygame.display.set_mode((WIDTH, HEIGHT))
+    pygame.display.set_caption("Labyrinth-Spiel")
+    clock = pygame.time.Clock()
+    font = pygame.font.SysFont(None, 24)
+    
+    grid = generate_maze()
+    
+    # Startposition des Balls (in der Mitte der Startzelle)
+    ball_col, ball_row = 0, 0
+    ball_x = ball_col * CELL_SIZE + CELL_SIZE // 2
+    ball_y = ball_row * CELL_SIZE + CELL_SIZE // 2
+    ball_radius = CELL_SIZE // 4
+    
+    show_maze = False
+    start_time = None
+    game_over = False
+    
+    while True:
+        dt = clock.tick(30) / 1000.0  # Zeit seit dem letzten Frame
+        
+        for event in pygame.event.get():
+            if event.type == pygame.QUIT:
+                pygame.quit()
+                sys.exit()
+            if event.type == pygame.KEYDOWN:
+                if not show_maze and event.key == pygame.K_SPACE:
+                    # Starte das Spiel: Labyrinth anzeigen und Timer starten
+                    show_maze = True
+                    start_time = time.time()
+                elif show_maze and not game_over:
+                    new_col, new_row = ball_col, ball_row
+                    if event.key == pygame.K_UP:
+                        new_row -= 1
+                        direction = 'N'
+                    elif event.key == pygame.K_DOWN:
+                        new_row += 1
+                        direction = 'S'
+                    elif event.key == pygame.K_LEFT:
+                        new_col -= 1
+                        direction = 'W'
+                    elif event.key == pygame.K_RIGHT:
+                        new_col += 1
+                        direction = 'E'
+                    else:
+                        direction = None
+                    
+                    if direction is not None:
+                        # Prüfe, ob die Bewegung innerhalb des Gitters liegt und ob keine Wand im Weg ist
+                        if 0 <= new_col < COLS and 0 <= new_row < ROWS:
+                            current_cell = grid[ball_col][ball_row]
+                            if not current_cell.walls[direction]:
+                                ball_col, ball_row = new_col, new_row
+                                ball_x = ball_col * CELL_SIZE + CELL_SIZE // 2
+                                ball_y = ball_row * CELL_SIZE + CELL_SIZE // 2
+        
+        screen.fill(BLACK)
+        
+        if show_maze:
+            draw_maze(screen, grid)
+            # Markiere Start (grün) und Ziel (rot)
+            pygame.draw.rect(screen, GREEN, (0, 0, CELL_SIZE, CELL_SIZE))
+            pygame.draw.rect(screen, RED, ((COLS - 1) * CELL_SIZE, (ROWS - 1) * CELL_SIZE, CELL_SIZE, CELL_SIZE))
+            # Zeichne den Ball
+            pygame.draw.circle(screen, BLUE, (ball_x, ball_y), ball_radius)
+            
+            # Zeige Timer an
+            if start_time is not None:
+                elapsed = time.time() - start_time
+                timer_text = font.render(f"Zeit: {elapsed:.1f} sec", True, WHITE)
+                screen.blit(timer_text, (10, HEIGHT - 30))
+            
+            # Überprüfe, ob das Ziel erreicht wurde
+            if ball_col == COLS - 1 and ball_row == ROWS - 1:
+                game_over = True
+                over_text = font.render("Gewonnen!", True, WHITE)
+                screen.blit(over_text, (WIDTH // 2 - 40, HEIGHT // 2))
+        else:
+            # Vor dem Start: Zeige Instruktion an
+            text = font.render("Drücke SPACE zum Starten", True, WHITE)
+            screen.blit(text, (WIDTH // 2 - 100, HEIGHT // 2))
+        
+        pygame.display.flip()
+
+if __name__ == "__main__":
+    main()
--- a/ARCHIVE_legacy_scripts/brancheneinstufung
+++ b/ARCHIVE_legacy_scripts/brancheneinstufung
@@ -0,0 +1,202 @@
+import os
+import time
+import pandas as pd
+import gspread
+import openai
+import wikipedia
+from bs4 import BeautifulSoup
+import requests
+from oauth2client.service_account import ServiceAccountCredentials
+from datetime import datetime
+
+# === CONFIG ===
+EXCEL = "Bestandsfirmen.xlsx"
+SHEET_URL = "https://docs.google.com/spreadsheets/d/1u_gHr9JUfmV1-iviRzbSe3575QEp7KLhK5jFV_gJcgo"
+CREDENTIALS = "service_account.json"
+CHUNK = 10
+LANG = "de"
+
+# === AUTHENTICATION ===
+scope = ["https://www.googleapis.com/auth/spreadsheets"]
+creds = ServiceAccountCredentials.from_json_keyfile_name(CREDENTIALS, scope)
+sheet = gspread.authorize(creds).open_by_url(SHEET_URL).sheet1
+
+# OpenAI API-Key aus externer Datei laden
+with open("api_key.txt", "r") as f:
+    openai.api_key = f.read().strip()
+
+# === LOAD DATA ===
+df = pd.read_excel(EXCEL)
+for col in ["Wikipedia-URL", "Wikipedia-Branche", "LinkedIn-Branche", "Umsatz (Mio €)",
+            "Empfohlene Neueinstufung", "Begründung Neueinstufung", "FSM-Relevanz", "Letzte Prüfung",
+            "Techniker-Einschätzung (Auto)", "Techniker-Einschätzung (Begründung)", "Techniker-Einschätzung (Manuell)"]:
+    if col not in df.columns:
+        df[col] = ""
+
+# === STARTE BEI ERSTER LEERER ZEILE IN SPALTE 'Letzte Prüfung' (Spalte N) ===
+sheet_values = sheet.get_all_values()
+filled_n = [row[13] if len(row) > 13 else '' for row in sheet_values[1:]]
+start = next((i + 1 for i, v in enumerate(filled_n, start=1) if not str(v).strip() or str(v).lower() == 'nan'), len(filled_n) + 1)
+print(f"Starte bei Zeile {start+1} (erste leere Zeile in Spalte N)")
+
+# === ANZAHL ABFRAGEN ERMITTELN ===
+try:
+    limit = int(input("Wieviele Firmen sollen analysiert werden? (z.B. 1000): ").strip())
+except:
+    print("Ungültige Eingabe, verwende alle verbleibenden Firmen.")
+    limit = len(df) - (start - 1)
+
+wikipedia.set_lang(LANG)
+
+# === SYSTEMPROMPT ===
+SYSTEM_PROMPT = (
+    "Du bist ein Klassifizierungs-Experte für Unternehmensbranchen. "
+    "Ordne jedes Unternehmen genau einer der folgenden Kategorien zu (nur eine):\n\n"
+    "1. Hersteller / Produzenten > Maschinenbau\n"
+    "2. Hersteller / Produzenten > Automobil\n"
+    "3. Hersteller / Produzenten > Anlagenbau\n"
+    "4. Hersteller / Produzenten > Medizintechnik\n"
+    "5. Hersteller / Produzenten > Chemie & Pharma\n"
+    "6. Hersteller / Produzenten > Elektrotechnik\n"
+    "7. Hersteller / Produzenten > Lebensmittelproduktion\n"
+    "8. Hersteller / Produzenten > IT / Telekommunikation\n"
+    "9. Hersteller / Produzenten > Bürotechnik\n"
+    "10. Hersteller / Produzenten > Automaten (Vending, Slot)\n"
+    "11. Hersteller / Produzenten > Gebäudetechnik Heizung, Lüftung, Klima\n"
+    "12. Hersteller / Produzenten > Gebäudetechnik Allgemein\n"
+    "13. Hersteller / Produzenten > Schädlingsbekämpfung\n"
+    "14. Hersteller / Produzenten > Fertigung\n"
+    "15. Hersteller / Produzenten > Braune & Weiße Ware\n"
+    "16. Versorger > Stadtwerk\n"
+    "17. Versorger > Verteilnetzbetreiber\n"
+    "18. Versorger > Telekommunikation\n"
+    "19. Dienstleister > Messdienstleister\n"
+    "20. Dienstleister > Facility Management\n"
+    "21. Dienstleister > Healthcare/Pflegedienste\n"
+    "22. Dienstleister > Servicedienstleister / Reparatur ohne Produktion\n"
+    "23. Handel & Logistik > Auslieferdienste\n"
+    "24. Handel & Logistik > Energie (Brennstoffe)\n"
+    "25. Handel & Logistik > Großhandel\n"
+    "26. Handel & Logistik > Einzelhandel\n"
+    "27. Handel & Logistik > Logistik Sonstige\n"
+    "28. Sonstige > Unternehmensberatung (old)\n"
+    "29. Sonstige > Sonstige\n"
+    "30. Sonstige > Agrar, Pellets (old)\n"
+    "31. Sonstige > Sonstiger Service (old)\n"
+    "32. Sonstige > IT Beratung\n"
+    "33. Sonstige > Engineering\n"
+    "34. Baubranche > Baustoffhandel\n"
+    "35. Baubranche > Baustoffindustrie\n"
+    "36. Baubranche > Logistiker Baustoffe\n"
+    "37. Baubranche > Bauunternehmen\n"
+    "38. Gutachter / Versicherungen > Versicherungsgutachten\n"
+    "39. Gutachter / Versicherungen > Technische Gutachter\n"
+    "40. Gutachter / Versicherungen > Medizinische Gutachten\n\n"
+    "Antwortformat: Wikipedia-Branche; LinkedIn-Branche; Umsatz (Mio €); Empfohlene Neueinstufung; Begründung; FSM-Relevanz; Techniker-Einschätzung (Auto); Techniker-Einschätzung (Begründung)"
+)
+
+system_prompt = {"role": "system", "content": SYSTEM_PROMPT}
+
+# === WIKIPEDIA LOOKUP ===
+def get_wikipedia_data(firmenname):
+    suchbegriffe = [firmenname.strip(), " ".join(firmenname.split()[:2])]
+    for suchbegriff in suchbegriffe:
+        try:
+            page = wikipedia.page(suchbegriff, auto_suggest=False)
+            url = page.url
+            html = requests.get(url).text
+            soup = BeautifulSoup(html, 'html.parser')
+            infobox = soup.find("table", {"class": "infobox"})
+            branche = ""
+            umsatz = ""
+            if infobox:
+                for row in infobox.find_all("tr"):
+                    header = row.find("th")
+                    data = row.find("td")
+                    if not header or not data:
+                        continue
+                    if "Branche" in header.text:
+                        branche = data.text.strip()
+                    if "Umsatz" in header.text:
+                        umsatz = data.text.strip()
+            if not branche:
+                cats = page.categories
+                branche = cats[0] if cats else ""
+            return url, branche, umsatz
+        except:
+            continue
+    return "", "", ""
+
+# === KLASSIFIZIERUNG ===
+def classify_company(row):
+    content = (
+        f"Beschreibung: {row['Beschreibung des Unternehmens'] or ''}\n"
+        f"Einstufung: {row['Aktuelle Einstufung'] or ''}\n"
+        f"Website: {row['Website'] or ''}"
+    )
+    try:
+        resp = openai.chat.completions.create(
+            model="gpt-4",
+            messages=[system_prompt, {"role": "user", "content": content}],
+            temperature=0
+        )
+        result = resp.choices[0].message.content.strip()
+        parts = [v.strip().strip('"') if v.strip() else "k.A." for v in result.split(";", 7)]
+        while len(parts) < 8:
+            parts.append("k.A.")
+        return parts
+    except Exception as e:
+        print(f"⚠️ Fehler bei Zeile: {row['Firmenname']} → {e}")
+        return ["k.A."] * 8
+
+# === LOOP ===
+count = 0
+for df_idx in range(start - 1, len(df)):
+    if count >= limit:
+        break
+    row = df.iloc[df_idx]
+    if str(row.get("Letzte Prüfung", "")).strip():
+        continue
+
+    print(f"[{time.strftime('%H:%M:%S')}] Verarbeite Zeile {df_idx+1}: {row['Firmenname']}")
+    count += 1
+
+    url, wiki_branche, umsatz = get_wikipedia_data(row['Firmenname'])
+    df.at[df_idx, "Wikipedia-URL"] = url or "k.A."
+    df.at[df_idx, "Wikipedia-Branche"] = wiki_branche.strip('"') or "k.A."
+    if not df.at[df_idx, "Umsatz (Mio €)"]:
+        df.at[df_idx, "Umsatz (Mio €)"] = umsatz or "k.A."
+
+    wiki, linkedin, umsatz_chat, new_cat, reason, fsm_relevant, techniker, techniker_reason = classify_company(row)
+    df.at[df_idx, "Wikipedia-Branche"] = wiki or wiki_branche or "k.A."
+    df.at[df_idx, "LinkedIn-Branche"] = linkedin or "k.A."
+    if not df.at[df_idx, "Umsatz (Mio €)"] or df.at[df_idx, "Umsatz (Mio €)"] == "k.A.":
+        df.at[df_idx, "Umsatz (Mio €)"] = umsatz_chat or "k.A."
+    df.at[df_idx, "Empfohlene Neueinstufung"] = new_cat or "k.A."
+
+    current_cat = str(row.get("Aktuelle Einstufung") or "").strip().strip('"')
+    if new_cat != current_cat:
+        df.at[df_idx, "Begründung Neueinstufung"] = reason or "k.A."
+    else:
+        df.at[df_idx, "Begründung Neueinstufung"] = ""
+
+    df.at[df_idx, "FSM-Relevanz"] = fsm_relevant or "k.A."
+    df.at[df_idx, "Techniker-Einschätzung (Auto)"] = techniker or "k.A."
+    df.at[df_idx, "Techniker-Einschätzung (Begründung)"] = techniker_reason or "k.A."
+
+    now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    df.at[df_idx, "Letzte Prüfung"] = now
+
+    sheet.update(
+        values=[df.loc[df_idx, [
+            "Wikipedia-Branche", "LinkedIn-Branche", "Umsatz (Mio €)",
+            "Empfohlene Neueinstufung", "Begründung Neueinstufung",
+            "FSM-Relevanz", "Wikipedia-URL", "Letzte Prüfung",
+            "Techniker-Einschätzung (Auto)", "Techniker-Einschätzung (Begründung)"
+        ]].tolist()],
+        range_name=f"G{df_idx+2}:Q{df_idx+2}"
+    )
+
+    time.sleep(5)
+
+print("✅ Fertig!")
--- a/ARCHIVE_legacy_scripts/cat_log.py
+++ b/ARCHIVE_legacy_scripts/cat_log.py
@@ -0,0 +1,7 @@
+import sys
+try:
+    file_path = sys.argv[1] if len(sys.argv) > 1 else 'company-explorer/logs_debug/company_explorer_debug.log'
+    with open(file_path, 'r') as f:
+        print(f.read())
+except Exception as e:
+    print(f"Error reading {file_path}: {e}")
--- a/ARCHIVE_legacy_scripts/check_benni.py
+++ b/ARCHIVE_legacy_scripts/check_benni.py
@@ -0,0 +1,40 @@
+import sqlite3
+import os
+import json
+
+DB_PATH = "companies_v3_fixed_2.db"
+
+def check_company_33():
+    if not os.path.exists(DB_PATH):
+        print(f"❌ Database not found at {DB_PATH}")
+        return
+
+    try:
+        conn = sqlite3.connect(DB_PATH)
+        cursor = conn.cursor()
+        
+        print(f"🔍 Checking Company ID 33 (Bennis Playland)...")
+        # Check standard fields
+        cursor.execute("SELECT id, name, city, street, zip_code FROM companies WHERE id = 33")
+        row = cursor.fetchone()
+        if row:
+            print(f"  Standard: City='{row[2]}', Street='{row[3]}', Zip='{row[4]}'")
+        else:
+            print("  ❌ Company 33 not found in DB.")
+            
+        # Check Enrichment
+        cursor.execute("SELECT content FROM enrichment_data WHERE company_id = 33 AND source_type = 'website_scrape'")
+        enrich_row = cursor.fetchone()
+        if enrich_row:
+            data = json.loads(enrich_row[0])
+            imp = data.get("impressum")
+            print(f"  Impressum Data: {json.dumps(imp, indent=2) if imp else 'None'}")
+        else:
+            print("  ❌ No website_scrape found for Company 33.")
+            
+        conn.close()
+    except Exception as e:
+        print(f"❌ Error: {e}")
+
+if __name__ == "__main__":
+    check_company_33()
--- a/ARCHIVE_legacy_scripts/check_db.py
+++ b/ARCHIVE_legacy_scripts/check_db.py
@@ -0,0 +1,45 @@
+import sqlite3
+import os
+
+dbs = [
+    "/app/companies_v4_notion_sync.db",
+    "/app/companies_v3_final.db",
+    "/app/company-explorer/companies_v3_fixed_2.db",
+    "/app/company-explorer/companies.db"
+]
+
+found = False
+for db_path in dbs:
+    if not os.path.exists(db_path):
+        continue
+    
+    print(f"Checking {db_path}...")
+    try:
+        conn = sqlite3.connect(db_path)
+        cursor = conn.cursor()
+        
+        # Get column names
+        cursor.execute("PRAGMA table_info(companies)")
+        columns = [info[1] for info in cursor.fetchall()]
+        print(f"Columns: {columns}")
+        
+        cursor.execute("SELECT * FROM companies WHERE name LIKE '%Wolfra%'")
+        rows = cursor.fetchall()
+        
+        if rows:
+            print(f"Found {len(rows)} rows in {db_path}:")
+            for row in rows:
+                # Create a dict for easier reading
+                row_dict = dict(zip(columns, row))
+                print(row_dict)
+            found = True
+        else:
+            print("No matching rows found.")
+            
+        conn.close()
+    except Exception as e:
+        print(f"Error reading {db_path}: {e}")
+    print("-" * 20)
+
+if not found:
+    print("No 'Wolfra' company found in any checked database.")
--- a/ARCHIVE_legacy_scripts/check_db_content.py
+++ b/ARCHIVE_legacy_scripts/check_db_content.py
@@ -0,0 +1,36 @@
+
+import sys
+import os
+import logging
+
+logging.basicConfig(level=logging.INFO)
+
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), 'company-explorer')))
+
+from backend.database import SessionLocal, Company
+
+def check_db_content():
+    db = SessionLocal()
+    try:
+        print("--- Checking content of 'companies' table ---")
+        companies = db.query(Company).limit(5).all()
+        
+        if not companies:
+            print("!!! FATAL: The 'companies' table is EMPTY.")
+            # Let's check if the table is there at all
+            try:
+                count = db.query(Company).count()
+                print(f"Row count is confirmed to be {count}.")
+            except Exception as e:
+                print(f"!!! Could not even count rows. The table might be corrupt. Error: {e}")
+
+        else:
+            print(f"Found {len(companies)} companies. Data seems to be present.")
+            for company in companies:
+                print(f"  - ID: {company.id}, Name: {company.name}")
+
+    finally:
+        db.close()
+
+if __name__ == "__main__":
+    check_db_content()
--- a/ARCHIVE_legacy_scripts/check_erding_openers.py
+++ b/ARCHIVE_legacy_scripts/check_erding_openers.py
@@ -0,0 +1,16 @@
+import sqlite3
+
+DB_PATH = "/app/companies_v3_fixed_2.db"
+conn = sqlite3.connect(DB_PATH)
+cursor = conn.cursor()
+
+cursor.execute("SELECT name, ai_opener, ai_opener_secondary, industry_ai FROM companies WHERE name LIKE '%Erding%'")
+row = cursor.fetchone()
+if row:
+    print(f"Company: {row[0]}")
+    print(f"Industry: {row[3]}")
+    print(f"Opener Primary: {row[1]}")
+    print(f"Opener Secondary: {row[2]}")
+else:
+    print("Company not found.")
+conn.close()
--- a/ARCHIVE_legacy_scripts/check_klinikum_erding.py
+++ b/ARCHIVE_legacy_scripts/check_klinikum_erding.py
@@ -0,0 +1,16 @@
+import sqlite3
+
+DB_PATH = "/app/companies_v3_fixed_2.db"
+conn = sqlite3.connect(DB_PATH)
+cursor = conn.cursor()
+
+cursor.execute("SELECT name, ai_opener, ai_opener_secondary, industry_ai FROM companies WHERE name LIKE '%Klinikum Landkreis Erding%'")
+row = cursor.fetchone()
+if row:
+    print(f"Company: {row[0]}")
+    print(f"Industry: {row[3]}")
+    print(f"Opener Primary: {row[1]}")
+    print(f"Opener Secondary: {row[2]}")
+else:
+    print("Company not found.")
+conn.close()
--- a/ARCHIVE_legacy_scripts/check_mappings.py
+++ b/ARCHIVE_legacy_scripts/check_mappings.py
@@ -0,0 +1,14 @@
+import sqlite3
+
+def check_mappings():
+    conn = sqlite3.connect('/app/companies_v3_fixed_2.db')
+    cursor = conn.cursor()
+    cursor.execute("SELECT * FROM job_role_mappings")
+    rows = cursor.fetchall()
+    print("--- Job Role Mappings ---")
+    for row in rows:
+        print(row)
+    conn.close()
+
+if __name__ == "__main__":
+    check_mappings()
--- a/ARCHIVE_legacy_scripts/check_matrix.py
+++ b/ARCHIVE_legacy_scripts/check_matrix.py
@@ -0,0 +1,25 @@
+import os
+import sys
+
+# Add the company-explorer directory to the Python path
+sys.path.append(os.path.abspath(os.path.join(os.getcwd(), 'company-explorer')))
+
+from backend.database import SessionLocal, MarketingMatrix, Industry, Persona
+import json
+
+db = SessionLocal()
+try:
+    count = db.query(MarketingMatrix).count()
+    print(f"MarketingMatrix count: {count}")
+    
+    if count > 0:
+        first = db.query(MarketingMatrix).first()
+        print(f"First entry: ID={first.id}, Industry={first.industry_id}, Persona={first.persona_id}")
+    else:
+        print("MarketingMatrix is empty.")
+        # Check if we have industries and personas
+        ind_count = db.query(Industry).count()
+        pers_count = db.query(Persona).count()
+        print(f"Industries: {ind_count}, Personas: {pers_count}")
+finally:
+    db.close()
--- a/ARCHIVE_legacy_scripts/check_matrix_indoor.py
+++ b/ARCHIVE_legacy_scripts/check_matrix_indoor.py
@@ -0,0 +1,23 @@
+import sqlite3
+
+DB_PATH = "/app/companies_v3_fixed_2.db"
+conn = sqlite3.connect(DB_PATH)
+cursor = conn.cursor()
+
+query = """
+SELECT i.name, p.name, m.subject, m.intro, m.social_proof 
+FROM marketing_matrix m
+JOIN industries i ON m.industry_id = i.id
+JOIN personas p ON m.persona_id = p.id
+WHERE i.name = 'Leisure - Indoor Active'
+"""
+
+cursor.execute(query)
+rows = cursor.fetchall()
+for row in rows:
+    print(f"Industry: {row[0]} | Persona: {row[1]}")
+    print(f"  Subject: {row[2]}")
+    print(f"  Intro: {row[3]}")
+    print(f"  Social Proof: {row[4]}")
+    print("-" * 50)
+conn.close()
--- a/ARCHIVE_legacy_scripts/check_matrix_results.py
+++ b/ARCHIVE_legacy_scripts/check_matrix_results.py
@@ -0,0 +1,24 @@
+import sqlite3
+import json
+
+DB_PATH = "/app/companies_v3_fixed_2.db"
+conn = sqlite3.connect(DB_PATH)
+cursor = conn.cursor()
+
+query = """
+SELECT i.name, p.name, m.subject, m.intro, m.social_proof 
+FROM marketing_matrix m
+JOIN industries i ON m.industry_id = i.id
+JOIN personas p ON m.persona_id = p.id
+WHERE i.name = 'Healthcare - Hospital'
+"""
+
+cursor.execute(query)
+rows = cursor.fetchall()
+for row in rows:
+    print(f"Industry: {row[0]} | Persona: {row[1]}")
+    print(f"  Subject: {row[2]}")
+    print(f"  Intro: {row[3]}")
+    print(f"  Social Proof: {row[4]}")
+    print("-" * 50)
+conn.close()
--- a/ARCHIVE_legacy_scripts/check_schema.py
+++ b/ARCHIVE_legacy_scripts/check_schema.py
@@ -0,0 +1,28 @@
+import sqlite3
+
+db_path = "/app/company-explorer/companies_v3_fixed_2.db"
+conn = sqlite3.connect(db_path)
+cursor = conn.cursor()
+
+for table in ['signals', 'enrichment_data']:
+    print(f"\nSchema of {table}:")
+    cursor.execute(f"PRAGMA table_info({table})")
+    for col in cursor.fetchall():
+        print(col)
+        
+    print(f"\nContent of {table} for company_id=12 (guessing FK):")
+    # Try to find FK column
+    cursor.execute(f"PRAGMA table_info({table})")
+    cols = [c[1] for c in cursor.fetchall()]
+    fk_col = next((c for c in cols if 'company_id' in c or 'account_id' in c), None)
+    
+    if fk_col:
+        cursor.execute(f"SELECT * FROM {table} WHERE {fk_col}=12")
+        rows = cursor.fetchall()
+        for row in rows:
+            print(dict(zip(cols, row)))
+    else:
+        print(f"Could not guess FK column for {table}")
+
+conn.close()
+
--- a/ARCHIVE_legacy_scripts/check_silly_billy.py
+++ b/ARCHIVE_legacy_scripts/check_silly_billy.py
@@ -0,0 +1,53 @@
+import sqlite3
+import os
+
+DB_PATH = "companies_v3_fixed_2.db"
+
+def check_company():
+    if not os.path.exists(DB_PATH):
+        print(f"❌ Database not found at {DB_PATH}")
+        return
+
+    try:
+        conn = sqlite3.connect(DB_PATH)
+        cursor = conn.cursor()
+        
+        print(f"🔍 Searching for 'Silly Billy' in {DB_PATH}...")
+        cursor.execute("SELECT id, name, crm_id, ai_opener, ai_opener_secondary, city, crm_vat, status FROM companies WHERE name LIKE '%Silly Billy%'")
+        rows = cursor.fetchall()
+        
+        if not rows:
+            print("❌ No company found matching 'Silly Billy'")
+        else:
+            for row in rows:
+                company_id = row[0]
+                print("\n✅ Company Found:")
+                print(f"  ID: {company_id}")
+                print(f"  Name: {row[1]}")
+                print(f"  CRM ID: {row[2]}")
+                print(f"  Status: {row[7]}")
+                print(f"  City: {row[5]}")
+                print(f"  VAT: {row[6]}")
+                print(f"  Opener (Primary): {row[3][:50]}..." if row[3] else "  Opener (Primary): None")
+                
+                # Check Enrichment Data
+                print(f"\n  🔍 Checking Enrichment Data for ID {company_id}...")
+                cursor.execute("SELECT content FROM enrichment_data WHERE company_id = ? AND source_type = 'website_scrape'", (company_id,))
+                enrich_row = cursor.fetchone()
+                if enrich_row:
+                    import json
+                    try:
+                        data = json.loads(enrich_row[0])
+                        imp = data.get("impressum")
+                        print(f"  Impressum Data in Scrape: {json.dumps(imp, indent=2) if imp else 'None'}")
+                    except Exception as e:
+                        print(f"  ❌ Error parsing JSON: {e}")
+                else:
+                    print("  ❌ No website_scrape enrichment data found.")
+            
+        conn.close()
+    except Exception as e:
+        print(f"❌ Error reading DB: {e}")
+
+if __name__ == "__main__":
+    check_company()
--- a/ARCHIVE_legacy_scripts/check_syntax.py
+++ b/ARCHIVE_legacy_scripts/check_syntax.py
@@ -0,0 +1,12 @@
+import py_compile
+import sys
+
+try:
+    py_compile.compile('/app/competitor-analysis-app/competitor_analysis_orchestrator.py', doraise=True)
+    print("Syntax OK")
+except py_compile.PyCompileError as e:
+    print(f"Syntax Error: {e}")
+    sys.exit(1)
+except Exception as e:
+    print(f"General Error: {e}")
+    sys.exit(1)
--- a/ARCHIVE_legacy_scripts/clean_file.py
+++ b/ARCHIVE_legacy_scripts/clean_file.py
@@ -0,0 +1,42 @@
+
+# -*- coding: utf-8 -*-
+import sys
+
+def clean_file(filepath):
+    print(f"Cleaning {filepath}...")
+    try:
+        with open(filepath, 'r', encoding='utf-8') as f:
+            content = f.read()
+        
+        # Replacements map
+        replacements = {
+            '\u2013': '-',  # En-dash -> Hyphen
+            '\u20ac': 'EUR', # Euro -> EUR
+            '\u2192': '->',  # Arrow -> ->
+            '\u201c': '"',   # Smart quotes
+            '\u201d': '"',
+            '\u2018': "'",
+            '\u2019': "'"
+        }
+        
+        original_len = len(content)
+        for char, replacement in replacements.items():
+            content = content.replace(char, replacement)
+            
+        with open(filepath, 'w', encoding='utf-8') as f:
+            f.write(content)
+            
+        print(f"Done. Replaced special characters.")
+        
+        # Verification check
+        try:
+            compile(content, filepath, 'exec')
+            print("Syntax Check: OK")
+        except SyntaxError as e:
+            print(f"Syntax Check: FAILED - {e}")
+
+    except Exception as e:
+        print(f"Error: {e}")
+
+if __name__ == "__main__":
+    clean_file("b2b_marketing_orchestrator.py")
--- a/ARCHIVE_legacy_scripts/clear_zombies.py
+++ b/ARCHIVE_legacy_scripts/clear_zombies.py
@@ -0,0 +1,31 @@
+import sqlite3
+from datetime import datetime, timedelta
+
+DB_PATH = "/app/connector_queue.db"
+
+def clear_all_zombies():
+    print("🧹 Cleaning up Zombie Jobs (PROCESSING for too long)...")
+    # A job that is PROCESSING for more than 10 minutes is likely dead
+    threshold = (datetime.utcnow() - timedelta(minutes=10)).strftime('%Y-%m-%d %H:%M:%S')
+    
+    with sqlite3.connect(DB_PATH) as conn:
+        cursor = conn.cursor()
+        
+        # 1. Identify Zombies
+        cursor.execute("SELECT id, updated_at FROM jobs WHERE status = 'PROCESSING' AND updated_at < ?", (threshold,))
+        zombies = cursor.fetchall()
+        
+        if not zombies:
+            print("✅ No zombies found.")
+            return
+
+        print(f"🕵️ Found {len(zombies)} zombie jobs.")
+        for zid, updated in zombies:
+            print(f"   - Zombie ID {zid} (Last active: {updated})")
+
+        # 2. Kill them
+        cursor.execute("UPDATE jobs SET status = 'FAILED', error_msg = 'Zombie cleared: Process timed out' WHERE status = 'PROCESSING' AND updated_at < ?", (threshold,))
+        print(f"✅ Successfully cleared {cursor.rowcount} zombie(s).")
+
+if __name__ == "__main__":
+    clear_all_zombies()
--- a/ARCHIVE_legacy_scripts/create_weights.py
+++ b/ARCHIVE_legacy_scripts/create_weights.py
@@ -0,0 +1,74 @@
+import joblib
+
+# Diese Daten wurden aus deinem CRM-Datensatz gelernt.
+# Es ist nur ein kleiner Auszug, um die Datei zu erstellen. Das Original ist viel größer.
+term_weights_data = {
+    'phoenix': 6.83, 'pharmahandel': 6.13, 'energy': 3.69, 'anlagenbau': 6.05,
+    'monforts': 9.31, 'textilmaschinen': 8.61, 'raymond': 8.21, 'chiron': 8.91,
+    'aalberts': 7.99, 'surface': 7.15, 'abb': 3.99, 'stotz': 9.31, 'kontakt': 8.61,
+    'abbott': 7.99, 'abiomed': 9.31, 'abus': 7.51, 'kransysteme': 8.91,
+    'accelleron': 9.31, 'accenture': 6.94, 'acino': 9.31, 'actemium': 7.82,
+    'adient': 8.91, 'würth': 6.91, 'aebi': 8.91, 'aenova': 8.91, 'aerzener': 8.91,
+    'aesculap': 8.61, 'afag': 9.31, 'arbonia': 8.91, 'agfa': 8.91, 'agrolab': 8.91,
+    'aht': 8.91, 'ait': 9.31, 'ake': 9.31, 'akg': 8.21, 'alba': 6.45, 'alcon': 8.91,
+    'schütte': 7.99, 'kärcher': 7.39, 'alliance': 7.51, 'healthcare': 6.35,
+    'alpma': 8.91, 'alstom': 7.51, 'alten': 7.99, 'aluplast': 8.21, 'amazonen': 8.91,
+    'amgen': 8.91, 'amk': 9.31, 'andritz': 5.75, 'angst': 8.21, 'pfister': 8.21,
+    'anton': 8.91, 'paar': 8.91, 'apex': 7.82, 'apleona': 6.78, 'arburg': 7.99,
+    'arjo': 8.91, 'armacell': 8.21, 'arthrex': 8.61, 'ascensia': 9.31, 'ascom': 8.61,
+    'asmpt': 9.31, 'astrazeneca': 8.91, 'atlas': 6.91, 'copco': 6.91, 'ats': 8.21,
+    'auma': 7.99, 'aumann': 8.91, 'aventics': 8.61, 'avesco': 9.31, 'azo': 8.91,
+    'braun': 5.86, 'baker': 7.66, 'hughes': 7.66, 'balluff': 7.66, 'bartec': 7.66,
+    'bauer': 6.55, 'bauerfeind': 8.61, 'bauking': 8.21, 'baumit': 8.21, 'baumüller': 7.39,
+    'bausch': 7.39, 'baxter': 7.23, 'bayer': 5.31, 'baywa': 7.99, 'beckhoff': 7.66,
+    'becton': 7.82, 'dickinson': 7.82, 'behringer': 8.61, 'beiersdorf': 7.51,
+    'belfor': 8.21, 'belimo': 7.51, 'bellmer': 8.91, 'bender': 7.51, 'bene': 8.91,
+    'benninger': 9.31, 'berker': 8.91, 'bertrandt': 7.99, 'beumer': 7.99,
+    'beutlhauser': 8.21, 'bhs': 8.91, 'bilfinger': 6.5, 'biotronik': 8.21,
+    'bitzer': 8.21, 'blanco': 7.66, 'bmi': 8.61, 'bobst': 7.99, 'boge': 7.99,
+    'böllhoff': 7.66, 'bomag': 8.21, 'borgwarner': 7.51, 'bosch': 4.15,
+    'brainlab': 8.91, 'brückner': 8.21, 'bruker': 7.82, 'brunata': 7.99,
+    'bsh': 7.23, 'bti': 8.91, 'bucher': 7.51, 'bühler': 6.83, 'bürkert': 7.99,
+    'busch': 7.82, 'carl': 6.09, 'zeiss': 5.86, 'cloos': 8.91, 'caverion': 8.61,
+    'ceramtec': 8.21, 'cheplapharm': 9.31, 'claas': 7.51, 'cnh': 7.82,
+    'coloplast': 8.91, 'conductix': 8.91, 'coroplast': 8.91, 'crown': 7.51,
+    'currenta': 8.91, 'cws': 7.51, 'cyklop': 8.91, 'danfoss': 7.23, 'dematic': 8.21,
+    'dentsply': 8.21, 'sirona': 8.21, 'deufol': 8.91, 'deutz': 8.21, 'diehl': 6.83,
+    'dmg': 5.86, 'mori': 5.86, 'dormakaba': 7.15, 'dräger': 7.23, 'dürr': 6.78,
+    'dussmann': 7.99, 'eaton': 7.82, 'ebm': 6.91, 'papst': 6.91, 'endress': 6.01,
+    'hauser': 6.01, 'enercon': 7.99, 'engel': 7.51, 'eppendorf': 8.21, 'erbe': 8.91,
+    'erhardt': 8.91, 'leimer': 8.91, 'essity': 8.91, 'eurofins': 7.39,
+    'festo': 6.91, 'ffg': 8.21, 'fft': 8.91, 'fischer': 6.78, 'flender': 8.21,
+    'focke': 8.61, 'forbo': 7.99, 'franke': 7.23, 'fresenius': 5.89, 'frimo': 8.91,
+    'fronius': 8.61, 'fuchs': 7.15, 'gea': 6.78, 'gealan': 8.61, 'geberit': 7.15,
+    'geze': 7.99, 'gira': 8.61, 'glatt': 8.91, 'groz': 8.61, 'beckert': 8.61,
+    'grundfos': 8.21, 'grünenthal': 8.91, 'gühring': 7.82, 'hager': 7.66,
+    'hako': 8.91, 'hama': 8.91, 'hansa': 7.66, 'flex': 7.66, 'harting': 7.66,
+    'hawe': 7.99, 'heidelberger': 7.15, 'hella': 7.39, 'henkel': 7.15, 'heraeus': 7.51,
+    'hermes': 7.82, 'hettich': 7.66, 'hilti': 7.23, 'hoerbiger': 7.99, 'hoppe': 8.21,
+    'hornbach': 8.21, 'huber': 7.15, 'suhner': 8.21, 'hübner': 8.21, 'husqvarna': 8.61,
+    'hydac': 7.23, 'iav': 8.61, 'ifm': 7.23, 'igus': 8.21, 'index': 8.61,
+    'interroll': 8.21, 'ista': 7.99, 'jungheinrich': 6.98, 'kaeser': 7.99,
+    'karl': 6.45, 'storz': 8.21, 'kärcher': 7.39, 'keba': 8.61, 'krones': 7.99,
+    'kuka': 7.39, 'lapp': 7.99, 'leoni': 7.82, 'liebherr': 4.84, 'linde': 6.55,
+    'mahr': 8.21, 'mann': 6.91, 'hummel': 6.91, 'medtronic': 7.66, 'meiko': 8.91,
+    'miele': 7.82, 'multivac': 8.21, 'murrelektronik': 8.21, 'netzsch': 7.66,
+    'nord': 7.66, 'norma': 7.99, 'novartis': 6.91, 'oerlikon': 7.15, 'olympus': 7.99,
+    'optibelt': 9.31, 'otis': 8.21, 'ottobock': 8.61, 'palfinger': 8.21,
+    'pepperl': 7.51, 'pfizer': 7.99, 'phoenix': 6.83, 'contact': 7.15, 'pilz': 8.21,
+    'porsche': 6.83, 'prominent': 8.91, 'putzmeister': 8.21, 'rational': 8.61,
+    'rehau': 7.23, 'remondis': 7.39, 'renk': 8.61, 'rheinmetall': 7.23,
+    'rieter': 8.61, 'rittal': 7.51, 'roche': 6.45, 'rolls': 7.51, 'royce': 7.51,
+    'saacke': 9.31, 'saf': 8.61, 'holland': 8.61, 'saint': 6.91, 'gobain': 6.91,
+    'samson': 7.99, 'sanofi': 7.66, 'sartorius': 7.66, 'schaeffler': 6.83,
+    'schenck': 8.21, 'schindler': 7.39, 'schmersal': 8.61, 'schneider': 5.86,
+    'schott': 7.66, 'schuler': 7.66, 'schunk': 7.66, 'sew': 7.15, 'sick': 7.39,
+    'siemens': 4.14, 'trumpf': 6.98, 'tüv': 5.23, 'süd': 6.55, 'voith': 7.15,
+    'wago': 8.61, 'weidmüller': 7.82, 'wilo': 8.21, 'zimmer': 7.23, 'zf': 7.23,
+}
+
+try:
+    joblib.dump(term_weights_data, TERM_WEIGHTS_FILE)
+    print(f"Datei '{TERM_WEIGHTS_FILE}' erfolgreich erstellt.")
+except Exception as e:
+    print(f"Fehler beim Erstellen der Datei: {e}")
--- a/ARCHIVE_legacy_scripts/dealfront_enrichment.py
+++ b/ARCHIVE_legacy_scripts/dealfront_enrichment.py
@@ -0,0 +1,274 @@
+import os
+import json
+import time
+import logging
+import tempfile
+import shutil
+import pandas as pd
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.options import Options as ChromeOptions
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.common.exceptions import TimeoutException, NoSuchElementException
+
+# --- Konfiguration ---
+class Config:
+    LOGIN_URL = "https://app.dealfront.com/login"
+    TARGET_URL = "https://app.dealfront.com/t/prospector/companies"
+    SEARCH_NAME = "Facility Management"  # <-- PASSEN SIE DIES AN IHRE GESPEICHERTE SUCHE AN
+    CREDENTIALS_FILE = "/app/dealfront_credentials.json"
+    OUTPUT_DIR = "/app/output"
+
+# --- Logging Setup ---
+LOG_FORMAT = '%(asctime)s - %(levelname)-8s - %(name)-25s - %(message)s'
+logging.basicConfig(level=logging.INFO, format=LOG_FORMAT, force=True)
+logging.getLogger("selenium.webdriver.remote").setLevel(logging.WARNING)
+logger = logging.getLogger(__name__)
+
+os.makedirs(Config.OUTPUT_DIR, exist_ok=True)
+log_filepath = os.path.join(Config.OUTPUT_DIR, f"dealfront_run_{time.strftime('%Y%m%d-%H%M%S')}.log")
+file_handler = logging.FileHandler(log_filepath, mode='w', encoding='utf-8')
+file_handler.setFormatter(logging.Formatter(LOG_FORMAT))
+logging.getLogger().addHandler(file_handler)
+
+class DealfrontScraper:
+    def __init__(self):
+        logger.info("Initialisiere WebDriver...")
+        chrome_options = ChromeOptions()
+        chrome_options.add_experimental_option("prefs", {"profile.managed_default_content_settings.images": 2})
+        # chrome_options.add_argument("--headless=new")  # Headless DEAKTIVIERT für Debugging!
+        chrome_options.add_argument("--no-sandbox")
+        chrome_options.add_argument("--disable-dev-shm-usage")
+        chrome_options.add_argument("--window-size=1920,1200")
+        # Entferne --user-data-dir komplett!
+        try:
+            self.driver = webdriver.Chrome(options=chrome_options)
+        except Exception as e:
+            logger.critical("WebDriver konnte nicht initialisiert werden.", exc_info=True)
+            raise
+        self.wait = WebDriverWait(self.driver, 30)
+        self.username, self.password = self._load_credentials()
+        if not self.username or not self.password:
+            raise ValueError("Credentials konnten nicht geladen werden. Breche ab.")
+        logger.info("WebDriver erfolgreich initialisiert.")
+
+    def _load_credentials(self):
+        try:
+            with open(Config.CREDENTIALS_FILE, 'r', encoding='utf-8') as f:
+                creds = json.load(f)
+            return creds.get("username"), creds.get("password")
+        except Exception as e:
+            logger.error(f"Credentials-Datei {Config.CREDENTIALS_FILE} konnte nicht geladen werden: {e}")
+            return None, None
+
+    def _save_debug_artifacts(self, suffix=""):
+        try:
+            timestamp = time.strftime("%Y%m%d-%H%M%S")
+            filename_base = os.path.join(Config.OUTPUT_DIR, f"error_{suffix}_{timestamp}")
+            self.driver.save_screenshot(f"{filename_base}.png")
+            with open(f"{filename_base}.html", "w", encoding="utf-8") as f:
+                f.write(self.driver.page_source)
+            logger.error(f"Debug-Artefakte gespeichert: {filename_base}.*")
+        except Exception as e:
+            logger.error(f"Konnte Debug-Artefakte nicht speichern: {e}")
+
+    def login(self):
+        try:
+            logger.info(f"Navigiere zur Login-Seite: {Config.LOGIN_URL}")
+            self.driver.get(Config.LOGIN_URL)
+            self.wait.until(EC.visibility_of_element_located((By.NAME, "email"))).send_keys(self.username)
+            self.driver.find_element(By.CSS_SELECTOR, "input[type='password']").send_keys(self.password)
+            self.driver.find_element(By.XPATH, "//button[normalize-space()='Log in']").click()
+            logger.info("Login-Befehl gesendet. Warte 5 Sekunden auf Session-Etablierung.")
+            time.sleep(5)
+            if "login" not in self.driver.current_url:
+                logger.info("Login erfolgreich, URL hat sich geändert.")
+                return True
+            self._save_debug_artifacts("login_stuck")
+            return False
+        except Exception as e:
+            logger.critical("Login-Prozess fehlgeschlagen.", exc_info=True)
+            self._save_debug_artifacts("login_exception")
+            return False
+
+    def scroll_table_slowly(self, steps=10, pause=0.3):
+        """
+        Scrollt die Tabelle in mehreren Schritten langsam nach unten,
+        damit bei Virtualisierung/Lazy Rendering alle Zeilen geladen werden.
+        """
+        try:
+            table = self.driver.find_element(By.CSS_SELECTOR, "table#t-result-table")
+            table_height = table.size['height']
+            for i in range(steps):
+                y = int(table_height * (i + 1) / steps)
+                self.driver.execute_script("arguments[0].scrollTop = arguments[1];", table, y)
+                time.sleep(pause)
+            logger.info("Tabelle langsam nach unten gescrollt.")
+        except Exception as e:
+            logger.warning(f"Fehler beim langsamen Scrollen: {e}")
+
+    def navigate_and_load_search(self, search_name):
+        try:
+            logger.info(f"Navigiere direkt zur Target-Seite und lade die Suche...")
+            self.driver.get(Config.TARGET_URL)
+            self.wait.until(EC.url_contains("/t/prospector/"))
+            search_item_selector = (By.XPATH, f"//div[contains(@class, 'truncate') and normalize-space()='{search_name}']")
+            self.wait.until(EC.element_to_be_clickable(search_item_selector)).click()
+            logger.info("Suche geladen. Warte auf das Rendern der Ergebnistabelle.")
+            self.wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "table#t-result-table tbody tr")))
+            return True
+        except Exception as e:
+            logger.critical("Navigation oder Laden der Suche fehlgeschlagen.", exc_info=True)
+            self._save_debug_artifacts("navigation_or_search_load")
+            return False
+
+    def extract_visible_firmennamen_js(self):
+        """
+        Extrahiert die sichtbaren Firmennamen und Websites direkt per JavaScript aus der Tabelle.
+        """
+        script = """
+            let rows = document.querySelectorAll('table#t-result-table tbody tr');
+            let result = [];
+            for (let row of rows) {
+                let nameElem = row.querySelector('.sticky-column a.t-highlight-text');
+                let websiteElem = row.querySelector('a.text-gray-400.t-highlight-text');
+                if (nameElem) {
+                    result.push({
+                        name: nameElem.getAttribute('title') || nameElem.innerText,
+                        website: websiteElem ? websiteElem.innerText : ''
+                    });
+                }
+            }
+            return result;
+        """
+        return self.driver.execute_script("return " + script)
+
+    def scrape_all_pages(self, max_pages=10):
+        all_companies = []
+        previous_first_name = None
+        for page_number in range(1, max_pages + 1):
+            logger.info(f"--- Verarbeite Seite {page_number} ---")
+            try:
+                self.wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "table#t-result-table")))
+            except TimeoutException:
+                logger.error("Ergebnistabelle wurde nicht geladen. Breche ab.")
+                break
+
+            logger.info("Warte 5 Sekunden, um sicherzugehen, dass alle Daten geladen sind...")
+            time.sleep(5)
+
+            # Scroll an den Anfang und dann langsam nach unten
+            self.driver.execute_script("window.scrollTo(0, 0);")
+            time.sleep(0.5)
+            self.scroll_table_slowly()
+            logger.info("Warte nach Scrollen nochmals 2 Sekunden...")
+            time.sleep(2)
+
+            # Jetzt per JS extrahieren
+            page_results = self.extract_visible_firmennamen_js()
+            for r in page_results:
+                r['page'] = page_number
+            logger.info(f"Seite {page_number}: {len(page_results)} Firmen gefunden. Erste Firmen: {[r['name'] for r in page_results[:3]]}")
+            all_companies.extend(page_results)
+
+            # Pagination-Buttons loggen und Weiter-Button suchen
+            try:
+                pagination_nav = self.driver.find_element(By.CSS_SELECTOR, "nav.eb-pagination")
+                buttons = pagination_nav.find_elements(By.CSS_SELECTOR, "a.eb-pagination-button")
+                logger.info(f"Gefundene Paginierungs-Buttons auf Seite {page_number}: {len(buttons)}")
+                for idx, btn in enumerate(buttons):
+                    btn_text = btn.text.strip()
+                    btn_classes = btn.get_attribute('class')
+                    btn_html = btn.get_attribute('outerHTML')
+                    has_svg = "svg" in btn_html
+                    logger.info(f"Button {idx}: Text='{btn_text}', Klassen='{btn_classes}', SVG={has_svg}, HTML-Start={btn_html[:120]}...")
+            except NoSuchElementException:
+                logger.warning("Keine Pagination-Buttons gefunden.")
+                buttons = []
+
+            next_button = None
+            for idx, btn in enumerate(buttons):
+                btn_html = btn.get_attribute('outerHTML')
+                btn_text = btn.text.strip()
+                btn_classes = btn.get_attribute('class')
+                has_svg = "svg" in btn_html
+                is_disabled = "disabled" in btn_classes
+                if has_svg and not is_disabled and btn_text == "":
+                    next_button = btn
+                    logger.info(f"Als Weiter-Button erkannt: Button {idx}")
+                    break
+
+            if not next_button:
+                logger.info("Kein klickbarer 'Weiter'-Button mehr gefunden. Paginierung abgeschlossen.")
+                break
+
+            logger.info("Klicke auf 'Weiter'-Button...")
+
+            try:
+                self.driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", next_button)
+                time.sleep(0.5)
+                self.driver.execute_script("arguments[0].click();", next_button)
+                logger.info("Klick auf Weiter-Button ausgeführt.")
+
+                # Warte auf Änderung des ersten Firmennamens
+                if page_results:
+                    previous_first_name = page_results[0]['name']
+                else:
+                    previous_first_name = ""
+                def page_changed(driver):
+                    try:
+                        name = driver.execute_script("""
+                            let row = document.querySelector('table#t-result-table tbody tr');
+                            if (!row) return '';
+                            let nameElem = row.querySelector('.sticky-column a.t-highlight-text');
+                            return nameElem ? (nameElem.getAttribute('title') || nameElem.innerText) : '';
+                        """)
+                        return name and name != previous_first_name
+                    except Exception:
+                        return False
+                self.wait.until(page_changed)
+                logger.info("Seitenwechsel erfolgreich verifiziert (erster Firmenname hat sich geändert).")
+            except Exception as e:
+                logger.error(f"Fehler beim Klicken auf den Weiter-Button oder beim Warten auf neue Seite: {e}")
+                try:
+                    timestamp = time.strftime("%Y%m%d-%H%M%S")
+                    self.driver.save_screenshot(f"/app/output/pagination_error_{timestamp}.png")
+                    with open(f"/app/output/pagination_error_{timestamp}.html", "w", encoding="utf-8") as f:
+                        f.write(self.driver.page_source)
+                    logger.info(f"Screenshot und HTML der Seite nach Pagination-Fehler gespeichert.")
+                except Exception as ee:
+                    logger.error(f"Fehler beim Speichern von Screenshot/HTML: {ee}")
+                break
+
+        return all_companies
+
+
+    def close(self):
+        if hasattr(self, "driver") and self.driver:
+            self.driver.quit()
+
+
+if __name__ == "__main__":
+    scraper = None
+    try:
+        scraper = DealfrontScraper()
+        if not scraper.login(): raise Exception("Login fehlgeschlagen")
+        if not scraper.navigate_and_load_search(Config.SEARCH_NAME): raise Exception("Navigation/Suche fehlgeschlagen")
+        
+        all_companies = scraper.scrape_all_pages(max_pages=6) # Limitiere auf 6 Seiten
+
+        if all_companies:
+            df = pd.DataFrame(all_companies)
+            output_csv_path = os.path.join(Config.OUTPUT_DIR, f"dealfront_results_{time.strftime('%Y%m%d-%H%M%S')}.csv")
+            df.to_csv(output_csv_path, index=False, sep=';', encoding='utf-8-sig')
+            logger.info(f"Ergebnisse ({len(df)} Firmen) erfolgreich in '{output_csv_path}' gespeichert.")
+        else:
+            logger.warning("Keine Firmen konnten extrahiert werden.")
+
+    except Exception as e:
+        logger.critical(f"Ein kritischer Fehler ist im Hauptprozess aufgetreten: {e}", exc_info=True)
+    finally:
+        if scraper:
+            scraper.close()
+    logger.info("Dealfront Automatisierung beendet.")
--- a/ARCHIVE_legacy_scripts/debug_connector_status.py
+++ b/ARCHIVE_legacy_scripts/debug_connector_status.py
@@ -0,0 +1,49 @@
+import sqlite3
+import json
+import os
+
+DB_PATH = "connector_queue.db"
+
+def inspect_queue():
+    if not os.path.exists(DB_PATH):
+        print(f"❌ Database not found at {DB_PATH}")
+        return
+
+    print(f"🔍 Inspecting Queue: {DB_PATH}")
+    try:
+        conn = sqlite3.connect(DB_PATH)
+        conn.row_factory = sqlite3.Row
+        cursor = conn.cursor()
+        
+        # Get stats
+        cursor.execute("SELECT status, COUNT(*) FROM jobs GROUP BY status")
+        stats = dict(cursor.fetchall())
+        print(f"\n📊 Stats: {stats}")
+        
+        # Get recent jobs
+        print("\n📝 Last 10 Jobs:")
+        cursor.execute("SELECT id, event_type, status, error_msg, updated_at, payload FROM jobs ORDER BY updated_at DESC LIMIT 10")
+        rows = cursor.fetchall()
+        
+        for row in rows:
+            payload = json.loads(row['payload'])
+            # Try to identify entity
+            entity = "Unknown"
+            if "PrimaryKey" in payload: entity = f"ID {payload['PrimaryKey']}"
+            if "ContactId" in payload: entity = f"Contact {payload['ContactId']}"
+            
+            print(f" - Job #{row['id']} [{row['status']}] {row['event_type']} ({entity})")
+            print(f"   Updated: {row['updated_at']}")
+            if row['error_msg']:
+                print(f"   ❌ ERROR: {row['error_msg']}")
+            
+            # Print payload details relevant to syncing
+            if row['status'] == 'COMPLETED':
+                 pass # Maybe less interesting if success, but user says it didn't sync
+            
+        conn.close()
+    except Exception as e:
+        print(f"❌ Error reading DB: {e}")
+
+if __name__ == "__main__":
+    inspect_queue()
--- a/ARCHIVE_legacy_scripts/debug_igepa.py
+++ b/ARCHIVE_legacy_scripts/debug_igepa.py
@@ -0,0 +1,34 @@
+
+import requests
+from bs4 import BeautifulSoup
+from urllib.parse import urljoin
+
+url = "https://www.igepa.de/"
+print(f"Fetching {url}...")
+
+try:
+    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
+    response = requests.get(url, headers=headers, verify=False, timeout=15)
+    print(f"Status: {response.status_code}")
+    
+    soup = BeautifulSoup(response.content, 'html.parser')
+    
+    print("\n--- Searching for Impressum Candidates ---")
+    keywords = ["impressum", "imprint", "legal notice", "anbieterkennzeichnung", "rechtliches", "legal", "disclaimer"]
+    
+    found = False
+    for a in soup.find_all('a', href=True):
+        text = a.get_text().strip().lower()
+        href = a['href'].lower()
+        
+        # print(f"Link: '{text}' -> {href}") # Verbose
+        
+        if any(kw in text for kw in keywords) or any(kw in href for kw in keywords):
+            print(f"MATCH: Text='{text}' | Href='{href}'")
+            found = True
+            
+    if not found:
+        print("No matches found.")
+        
+except Exception as e:
+    print(f"Error: {e}")
--- a/ARCHIVE_legacy_scripts/debug_igepa_deep.py
+++ b/ARCHIVE_legacy_scripts/debug_igepa_deep.py
@@ -0,0 +1,34 @@
+
+import requests
+from bs4 import BeautifulSoup
+
+url = "https://www.igepa.de/zweih_gmbh_co_kg/ueber-uns/"
+print(f"Fetching {url}...")
+
+try:
+    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
+    response = requests.get(url, headers=headers, verify=False, timeout=15)
+    
+    soup = BeautifulSoup(response.content, 'html.parser')
+    
+    print("\n--- Searching for 'imp' in Href or Text ---")
+    found = False
+    for a in soup.find_all('a', href=True):
+        text = a.get_text().strip().lower()
+        href = a['href'].lower()
+        
+        if "imp" in href or "imp" in text:
+            print(f"MATCH: Text='{text}' | Href='{href}'")
+            found = True
+            
+    if not found:
+        print("No match for 'imp' found.")
+        
+    print("\n--- Searching for '2h' specific links ---")
+    for a in soup.find_all('a', href=True):
+        href = a['href'].lower()
+        if "zweih" in href:
+             print(f"2H Link: {href}")
+
+except Exception as e:
+    print(f"Error: {e}")
--- a/ARCHIVE_legacy_scripts/debug_igepa_dump.py
+++ b/ARCHIVE_legacy_scripts/debug_igepa_dump.py
@@ -0,0 +1,27 @@
+
+import requests
+from bs4 import BeautifulSoup
+
+url = "https://www.igepa.de/"
+print(f"Fetching {url}...")
+
+try:
+    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
+    response = requests.get(url, headers=headers, verify=False, timeout=15)
+    
+    soup = BeautifulSoup(response.content, 'html.parser')
+    
+    print(f"Page Title: {soup.title.string if soup.title else 'No Title'}")
+    
+    print("\n--- All Links (First 50) ---")
+    count = 0
+    for a in soup.find_all('a', href=True):
+        text = a.get_text().strip().replace('\n', ' ')
+        href = a['href']
+        print(f"[{count}] {text[:30]}... -> {href}")
+        count += 1
+        if count > 50: break
+        
+except Exception as e:
+    print(f"Error: {e}")
+
--- a/ARCHIVE_legacy_scripts/debug_meeting.py
+++ b/ARCHIVE_legacy_scripts/debug_meeting.py
@@ -0,0 +1,71 @@
+
+import sqlite3
+import json
+import os
+
+DB_PATH = "transcription-tool/backend/meetings.db"
+MEETING_ID = 5
+
+def debug_meeting(db_path, meeting_id):
+    if not os.path.exists(db_path):
+        print(f"ERROR: Database file not found at {db_path}")
+        return
+
+    try:
+        conn = sqlite3.connect(db_path)
+        cursor = conn.cursor()
+
+        # Get Meeting Info
+        cursor.execute("SELECT id, title, status, duration_seconds FROM meetings WHERE id = ?", (meeting_id,))
+        meeting = cursor.fetchone()
+
+        if not meeting:
+            print(f"ERROR: No meeting found with ID {meeting_id}")
+            return
+
+        print("--- MEETING INFO ---")
+        print(f"ID: {meeting[0]}")
+        print(f"Title: {meeting[1]}")
+        print(f"Status: {meeting[2]}")
+        print(f"Duration (s): {meeting[3]}")
+        print("-" * 20)
+
+        # Get Chunks
+        cursor.execute("SELECT id, chunk_index, json_content FROM transcript_chunks WHERE meeting_id = ? ORDER BY chunk_index", (meeting_id,))
+        chunks = cursor.fetchall()
+
+        print(f"--- CHUNKS FOUND: {len(chunks)} ---")
+        for chunk in chunks:
+            chunk_id, chunk_index, json_content_str = chunk
+            print(f"\n--- Chunk ID: {chunk_id}, Index: {chunk_index} ---")
+            
+            if not json_content_str:
+                print("  -> JSON content is EMPTY.")
+                continue
+
+            try:
+                json_content = json.loads(json_content_str)
+                print(f"  -> Number of entries: {len(json_content)}")
+                
+                if json_content:
+                    # Print first 2 and last 2 entries to check for the "Ja" loop
+                    print("  -> First 2 entries:")
+                    for entry in json_content[:2]:
+                        print(f"    - {entry.get('display_time')} [{entry.get('speaker')}]: {entry.get('text')[:80]}...")
+                    
+                    if len(json_content) > 4:
+                        print("  -> Last 2 entries:")
+                        for entry in json_content[-2:]:
+                            print(f"    - {entry.get('display_time')} [{entry.get('speaker')}]: {entry.get('text')[:80]}...")
+
+            except json.JSONDecodeError:
+                print("  -> ERROR: Failed to decode JSON content.")
+
+    except sqlite3.Error as e:
+        print(f"Database error: {e}")
+    finally:
+        if 'conn' in locals() and conn:
+            conn.close()
+
+if __name__ == "__main__":
+    debug_meeting(DB_PATH, MEETING_ID)
--- a/ARCHIVE_legacy_scripts/debug_paths.py
+++ b/ARCHIVE_legacy_scripts/debug_paths.py
@@ -0,0 +1,13 @@
+import os
+static_path = "/frontend_static"
+print(f"Path {static_path} exists: {os.path.exists(static_path)}")
+if os.path.exists(static_path):
+    for root, dirs, files in os.walk(static_path):
+        for file in files:
+            print(os.path.join(root, file))
+else:
+    print("Listing /app instead:")
+    for root, dirs, files in os.walk("/app"):
+        if "node_modules" in root: continue
+        for file in files:
+            print(os.path.join(root, file))
--- a/ARCHIVE_legacy_scripts/debug_screenshot.py
+++ b/ARCHIVE_legacy_scripts/debug_screenshot.py
@@ -0,0 +1,50 @@
+import asyncio
+import os
+import logging
+from pyppeteer import launch
+
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+
+# Token direkt aus der Umgebungsvariable holen
+HA_TOKEN = os.environ.get("HA_ACCESS_TOKEN")
+
+# URL wird dynamisch mit dem Token zusammengesetzt
+HA_URL = f"http://192.168.178.131:8123/lovelace/solar?kiosk&auth_callback=1&access_token={HA_TOKEN}"
+OUTPUT_FILE = "/screenshots/final_screenshot.png"
+
+async def main():
+    if not HA_TOKEN:
+        logging.error("Fehler: Umgebungsvariable HA_ACCESS_TOKEN nicht gefunden!")
+        return
+
+    logging.info("Starte Puppeteer-Browser...")
+    browser = await launch(
+        executablePath='/usr/bin/chromium',
+        headless=True,
+        args=['--no-sandbox', '--disable-setuid-sandbox']
+    )
+    
+    page = await browser.newPage()
+    await page.setViewport({'width': 1280, 'height': 1024})
+    
+    try:
+        logging.info(f"Navigiere direkt zur authentifizierten URL...")
+        await page.goto(HA_URL, {'waitUntil': 'networkidle0', 'timeout': 60000})
+        
+        logging.info("Seite geladen. Warte 15 Sekunden auf das finale Rendering...")
+        await asyncio.sleep(15)
+        
+        logging.info("Erstelle Screenshot...")
+        await page.screenshot({'path': OUTPUT_FILE})
+        logging.info(f"Screenshot erfolgreich unter {OUTPUT_FILE} gespeichert.")
+        
+    except Exception as e:
+        logging.error(f"Ein Fehler ist aufgetreten: {e}", exc_info=True)
+        await page.screenshot({'path': '/screenshots/debug_error_final.png'})
+        
+    finally:
+        logging.info("Schließe Browser.")
+        await browser.close()
+
+if __name__ == '__main__':
+    asyncio.run(main())
--- a/ARCHIVE_legacy_scripts/debug_transcription_raw.py
+++ b/ARCHIVE_legacy_scripts/debug_transcription_raw.py
@@ -0,0 +1,70 @@
+import sqlite3
+import json
+import os
+
+DB_PATH = "transcripts.db"
+
+def inspect_latest_meeting():
+    if not os.path.exists(DB_PATH):
+        print(f"Error: Database file '{DB_PATH}' not found.")
+        return
+
+    conn = sqlite3.connect(DB_PATH)
+    cursor = conn.cursor()
+
+    # Get latest meeting
+    cursor.execute("SELECT id, title, created_at FROM meetings ORDER BY created_at DESC LIMIT 1")
+    meeting = cursor.fetchone()
+    
+    if not meeting:
+        print("No meetings found in DB.")
+        conn.close()
+        return
+
+    meeting_id, title, created_at = meeting
+    print(f"--- Inspecting Latest Meeting: ID {meeting_id} ('{title}') created at {created_at} ---")
+
+    # Get chunks for this meeting
+    cursor.execute("SELECT id, chunk_index, raw_text, json_content FROM transcript_chunks WHERE meeting_id = ? ORDER BY chunk_index", (meeting_id,))
+    chunks = cursor.fetchall()
+
+    if not chunks:
+        print("No chunks found for this meeting.")
+    
+    for chunk in chunks:
+        chunk_id, idx, raw_text, json_content = chunk
+        print(f"\n[Chunk {idx} (ID: {chunk_id})]")
+        
+        print(f"Stored JSON Content (Length): {len(json.loads(json_content)) if json_content else 'None/Empty'}")
+        
+        print("-" * 20 + " RAW TEXT START " + "-" * 20)
+        print(raw_text[:500]) # Print first 500 chars
+        print("..." if len(raw_text) > 500 else "")
+        print("-" * 20 + " RAW TEXT END " + "-" * 20)
+
+        # Try to parse manually to see error
+        try:
+            # Simulate cleaning logic from orchestrator
+            cleaned = raw_text.strip()
+            if cleaned.startswith("```json"):
+                cleaned = cleaned[7:]
+            elif cleaned.startswith("```"):
+                cleaned = cleaned[3:]
+            if cleaned.endswith("```"):
+                cleaned = cleaned[:-3]
+            cleaned = cleaned.strip()
+
+            parsed = json.loads(cleaned)
+            print("✅ Manual Parsing Successful!")
+        except json.JSONDecodeError as e:
+            print(f"❌ Manual Parsing Failed: {e}")
+            # Show context around error
+            if hasattr(e, 'pos'):
+                start = max(0, e.pos - 20)
+                end = min(len(cleaned), e.pos + 20)
+                print(f"   Context at error: ...{cleaned[start:end]}...")
+
+    conn.close()
+
+if __name__ == "__main__":
+    inspect_latest_meeting()
--- a/ARCHIVE_legacy_scripts/debug_zombie.py
+++ b/ARCHIVE_legacy_scripts/debug_zombie.py
@@ -0,0 +1,16 @@
+import sqlite3
+import os
+
+DB_PATH = "/app/connector_queue.db"
+
+if __name__ == "__main__":
+    print(f"📊 Accessing database at {DB_PATH}")
+    print("📊 Listing last 20 jobs in database...")
+    with sqlite3.connect(DB_PATH) as conn:
+        conn.row_factory = sqlite3.Row
+        cursor = conn.cursor()
+        cursor.execute("SELECT id, status, event_type, updated_at FROM jobs ORDER BY id DESC LIMIT 20")
+        rows = cursor.fetchall()
+        for r in rows:
+            print(f"   - Job {r['id']}: {r['status']} ({r['event_type']}) - Updated: {r['updated_at']}")
+
--- a/ARCHIVE_legacy_scripts/duplicate_checker.py
+++ b/ARCHIVE_legacy_scripts/duplicate_checker.py
@@ -0,0 +1,235 @@
+# duplicate_checker_v6.1.py
+
+import os
+import sys
+import re
+import argparse
+import json
+import logging
+import pandas as pd
+import numpy as np
+import joblib
+import treelite_runtime
+from datetime import datetime
+from collections import Counter
+from thefuzz import fuzz
+from helpers import normalize_company_name, simple_normalize_url
+from config import Config
+from google_sheet_handler import GoogleSheetHandler
+
+# --- Konfiguration ---
+SCRIPT_VERSION = "v6.1 (Treelite ML Model)"
+STATUS_DIR = "job_status"
+LOG_DIR = "Log"
+MODEL_FILE = 'xgb_model.json'
+TERM_WEIGHTS_FILE = 'term_weights.joblib'
+CRM_DATA_FILE = 'crm_for_prediction.pkl'
+TREELITE_MODEL_FILE = 'xgb_model.treelite'
+PREDICTION_THRESHOLD = 0.5
+PREFILTER_MIN_PARTIAL = 65
+PREFILTER_LIMIT = 50
+CRM_SHEET_NAME = "CRM_Accounts"
+MATCHING_SHEET_NAME = "Matching_Accounts"
+
+# --- Logging Setup ---
+now = datetime.now().strftime('%Y-%m-%d_%H-%M')
+LOG_FILE = f"{now}_duplicate_check_{SCRIPT_VERSION.split(' ')[0]}.txt"
+if not os.path.exists(LOG_DIR): os.makedirs(LOG_DIR, exist_ok=True)
+log_path = os.path.join(LOG_DIR, LOG_FILE)
+root = logging.getLogger()
+root.setLevel(logging.DEBUG)
+for h in list(root.handlers): root.removeHandler(h)
+formatter = logging.Formatter("%(asctime)s - %(levelname)-8s - %(message)s")
+ch = logging.StreamHandler(sys.stdout)
+ch.setLevel(logging.INFO)
+ch.setFormatter(formatter)
+root.addHandler(ch)
+fh = logging.FileHandler(log_path, mode='a', encoding='utf-8')
+fh.setLevel(logging.DEBUG)
+fh.setFormatter(formatter)
+root.addHandler(fh)
+logger = logging.getLogger(__name__)
+
+# --- Stop-/City-Tokens ---
+STOP_TOKENS_BASE = {
+    'gmbh','mbh','ag','kg','ug','ohg','se','co','kgaa','inc','llc','ltd','sarl', 'b.v', 'bv',
+    'holding','gruppe','group','international','solutions','solution','service','services',
+}
+CITY_TOKENS = set()
+
+# --- Hilfsfunktionen ---
+def update_status(job_id, status, progress_message):
+    if not job_id: return
+    status_file = os.path.join(STATUS_DIR, f"{job_id}.json")
+    try:
+        try:
+            with open(status_file, 'r') as f: data = json.load(f)
+        except FileNotFoundError: data = {}
+        data.update({"status": status, "progress": progress_message})
+        with open(status_file, 'w') as f: json.dump(data, f)
+    except Exception as e:
+        logging.error(f"Konnte Statusdatei für Job {job_id} nicht schreiben: {e}")
+
+def _tokenize(s: str):
+    if not s: return []
+    return re.split(r"[^a-z0-9äöüß]+", str(s).lower())
+
+def clean_name_for_scoring(norm_name: str):
+    if not norm_name: return "", set()
+    tokens = [t for t in _tokenize(norm_name) if len(t) >= 3]
+    stop_union = STOP_TOKENS_BASE | CITY_TOKENS
+    final_tokens = [t for t in tokens if t not in stop_union]
+    return " ".join(final_tokens), set(final_tokens)
+
+def get_rarest_tokens(norm_name: str, term_weights: dict, count=3):
+    _, toks = clean_name_for_scoring(norm_name)
+    if not toks: return []
+    return sorted(list(toks), key=lambda t: term_weights.get(t, 0), reverse=True)[:count]
+
+def create_features(mrec: dict, crec: dict, term_weights: dict, feature_names: list):
+    features = {}
+    n1_raw = mrec.get('normalized_name', '')
+    n2_raw = crec.get('normalized_name', '')
+    clean1, toks1 = clean_name_for_scoring(n1_raw)
+    clean2, toks2 = clean_name_for_scoring(n2_raw)
+
+    features['fuzz_ratio'] = fuzz.ratio(n1_raw, n2_raw)
+    features['fuzz_partial_ratio'] = fuzz.partial_ratio(n1_raw, n2_raw)
+    features['fuzz_token_set_ratio'] = fuzz.token_set_ratio(clean1, clean2)
+    features['fuzz_token_sort_ratio'] = fuzz.token_sort_ratio(clean1, clean2)
+    
+    features['domain_match'] = 1 if mrec.get('normalized_domain') and mrec.get('normalized_domain') == crec.get('normalized_domain') else 0
+    features['city_match'] = 1 if mrec.get('CRM Ort') and crec.get('CRM Ort') and mrec.get('CRM Ort') == crec.get('CRM Ort') else 0
+    features['country_match'] = 1 if mrec.get('CRM Land') and crec.get('CRM Land') and mrec.get('CRM Land') == crec.get('CRM Land') else 0
+    features['country_mismatch'] = 1 if (mrec.get('CRM Land') and crec.get('CRM Land') and mrec.get('CRM Land') != crec.get('CRM Land')) else 0
+    
+    overlapping_tokens = toks1 & toks2
+    rarest_token_mrec = get_rarest_tokens(n1_raw, term_weights, 1)[0] if get_rarest_tokens(n1_raw, term_weights, 1) else None
+    
+    features['rarest_token_overlap'] = 1 if rarest_token_mrec and rarest_token_mrec in toks2 else 0
+    features['weighted_token_score'] = sum(term_weights.get(t, 0) for t in overlapping_tokens)
+    features['jaccard_similarity'] = len(overlapping_tokens) / len(toks1 | toks2) if len(toks1 | toks2) > 0 else 0
+    
+    features['name_len_diff'] = abs(len(n1_raw) - len(n2_raw))
+    features['candidate_is_shorter'] = 1 if len(n2_raw) < len(n1_raw) else 0
+    
+    return [features.get(name, 0) for name in feature_names]
+
+def build_indexes(crm_df: pd.DataFrame):
+    records = list(crm_df.to_dict('records'))
+    domain_index = {}
+    for r in records:
+        d = r.get('normalized_domain')
+        if d: domain_index.setdefault(d, []).append(r)
+    token_index = {}
+    for idx, r in enumerate(records):
+        _, toks = clean_name_for_scoring(r.get('normalized_name',''))
+        for t in set(toks): token_index.setdefault(t, []).append(idx)
+    return records, domain_index, token_index
+
+def main(job_id=None):
+    # <<< NEU: Eindeutige Log-Ausgabe ganz am Anfang >>>
+    logger.info(f"############################################################")
+    logger.info(f"### DUPLICATE CHECKER {SCRIPT_VERSION} WIRD AUSGEFÜHRT ###")
+    logger.info(f"############################################################")
+
+    try:
+        predictor = treelite_runtime.Predictor(TREELITE_MODEL_FILE, nthread=4)
+        term_weights = joblib.load(TERM_WEIGHTS_FILE)
+        crm_df = pd.read_pickle(CRM_DATA_FILE)
+        logger.info("Treelite-Modell, Gewichte und lokaler CRM-Datensatz erfolgreich geladen.")
+    except Exception as e:
+        logger.critical(f"Konnte Modelldateien/CRM-Daten nicht laden. Fehler: {e}")
+        sys.exit(1)
+
+    try:
+        sheet = GoogleSheetHandler()
+        match_df = sheet.get_sheet_as_dataframe(MATCHING_SHEET_NAME)
+    except Exception as e:
+        logger.critical(f"Fehler beim Laden der Matching-Daten aus Google Sheets: {e}")
+        sys.exit(1)
+
+    total = len(match_df) if match_df is not None else 0
+    if match_df is None or match_df.empty:
+        logger.critical("Leere Daten im Matching-Sheet. Abbruch.")
+        return
+    logger.info(f"{len(crm_df)} CRM-Datensätze (lokal) | {total} Matching-Datensätze")
+
+    match_df['normalized_name']   = match_df['CRM Name'].astype(str).apply(normalize_company_name)
+    match_df['normalized_domain'] = match_df['CRM Website'].astype(str).apply(simple_normalize_url)
+    match_df['CRM Ort']           = match_df['CRM Ort'].astype(str).str.lower().str.strip()
+    match_df['CRM Land']          = match_df['CRM Land'].astype(str).str.lower().str.strip()
+    
+    global CITY_TOKENS
+    CITY_TOKENS = {t for s in pd.concat([crm_df['CRM Ort'], match_df['CRM Ort']]).dropna().unique() for t in _tokenize(s) if len(t) >= 3}
+
+    crm_records, domain_index, token_index = build_indexes(crm_df)
+    
+    results = []
+    logger.info("Starte Matching-Prozess mit ML-Modell…")
+
+    for idx, mrow in match_df.to_dict('index').items():
+        processed = idx + 1
+        progress_message = f"Prüfe {processed}/{total}: '{mrow.get('CRM Name','')}'"
+        if processed % 100 == 0: logger.info(progress_message) # Seltener loggen
+        if processed % 10 == 0 or processed == total: update_status(job_id, "Läuft", progress_message)
+
+        candidate_indices = set()
+        if mrow.get('normalized_domain'):
+            candidates_from_domain = domain_index.get(mrow['normalized_domain'], [])
+            for c in candidates_from_domain:
+                try:
+                    indices = crm_df.index[crm_df['normalized_name'] == c['normalized_name']].tolist()
+                    if indices: candidate_indices.add(indices[0])
+                except Exception: continue
+        
+        if len(candidate_indices) < 5:
+            top_tokens = get_rarest_tokens(mrow.get('normalized_name',''), term_weights, count=3)
+            for token in top_tokens:
+                candidate_indices.update(token_index.get(token, []))
+        
+        if len(candidate_indices) < 5:
+            clean1, _ = clean_name_for_scoring(mrow.get('normalized_name',''))
+            pf = sorted([(fuzz.partial_ratio(clean1, clean_name_for_scoring(r.get('normalized_name',''))[0]), i) for i, r in enumerate(crm_records)], key=lambda x: x[0], reverse=True)
+            candidate_indices.update([i for score, i in pf if score >= PREFILTER_MIN_PARTIAL][:PREFILTER_LIMIT])
+
+        candidates = [crm_records[i] for i in list(candidate_indices)[:PREFILTER_LIMIT]] # Limitiere Kandidaten
+        if not candidates:
+            results.append({'Match':'', 'Score':0, 'Match_Grund':'keine Kandidaten'})
+            continue
+
+        feature_list = [create_features(mrow, cr, term_weights, predictor.feature_names) for cr in candidates]
+        
+        dmatrix = treelite_runtime.DMatrix(np.array(feature_list, dtype='float32'))
+        probabilities = predictor.predict(dmatrix)[:, 1]
+
+        scored_candidates = sorted([{'name': candidates[i].get('CRM Name', ''), 'score': prob} for i, prob in enumerate(probabilities)], key=lambda x: x['score'], reverse=True)
+        best_match = scored_candidates[0] if scored_candidates else None
+        
+        if best_match and best_match['score'] >= PREDICTION_THRESHOLD:
+            results.append({'Match': best_match['name'], 'Score': round(best_match['score'] * 100), 'Match_Grund': f"ML Confidence: {round(best_match['score']*100)}%"})
+        else:
+            score_val = round(best_match['score'] * 100) if best_match else 0
+            results.append({'Match':'', 'Score': score_val, 'Match_Grund': f"Below Threshold ({int(PREDICTION_THRESHOLD*100)}%)"})
+
+    logger.info("Matching-Prozess abgeschlossen. Schreibe Ergebnisse...")
+    result_df = pd.DataFrame(results)
+    final_df = pd.concat([match_df.reset_index(drop=True), result_df.reset_index(drop=True)], axis=1)
+    cols_to_drop = ['normalized_name', 'normalized_domain']
+    final_df = final_df.drop(columns=[col for col in cols_to_drop if col in final_df.columns], errors='ignore')
+    upload_df = final_df.astype(str).replace({'nan': '', 'None': ''})
+    data_to_write = [upload_df.columns.tolist()] + upload_df.values.tolist()
+    
+    ok = sheet.clear_and_write_data(MATCHING_SHEET_NAME, data_to_write)
+    if ok:
+        logger.info("Ergebnisse erfolgreich in das Google Sheet geschrieben.")
+        if job_id: update_status(job_id, "Abgeschlossen", f"{total} Accounts erfolgreich geprüft.")
+    else:
+        logger.error("Fehler beim Schreiben der Ergebnisse ins Google Sheet.")
+        if job_id: update_status(job_id, "Fehlgeschlagen", "Fehler beim Schreiben ins Google Sheet.")
+
+if __name__=='__main__':
+    parser = argparse.ArgumentParser(description=f"Duplicate Checker {SCRIPT_VERSION}")
+    parser.add_argument("--job-id", type=str, help="Eindeutige ID für den Job-Status.")
+    args = parser.parse_args()
+    main(job_id=args.job_id)
--- a/ARCHIVE_legacy_scripts/fix_benni_data.py
+++ b/ARCHIVE_legacy_scripts/fix_benni_data.py
@@ -0,0 +1,41 @@
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+import json
+
+# Setup DB
+DB_PATH = "sqlite:///companies_v3_fixed_2.db"
+engine = create_engine(DB_PATH)
+SessionLocal = sessionmaker(bind=engine)
+session = SessionLocal()
+
+from sqlalchemy import Column, Integer, String
+from sqlalchemy.ext.declarative import declarative_base
+
+Base = declarative_base()
+
+class Company(Base):
+    __tablename__ = "companies"
+    id = Column(Integer, primary_key=True)
+    street = Column(String)
+    zip_code = Column(String)
+
+def fix_benni():
+    company_id = 33
+    print(f"🔧 Fixing Address for Company ID {company_id}...")
+    
+    company = session.query(Company).filter_by(id=company_id).first()
+    if not company:
+        print("❌ Company not found.")
+        return
+
+    # Hardcoded from previous check_benni.py output to be safe/fast
+    # "street": "Eriagstraße 58", "zip": "85053"
+    
+    company.street = "Eriagstraße 58"
+    company.zip_code = "85053"
+    
+    session.commit()
+    print(f"✅ Database updated: Street='{company.street}', Zip='{company.zip_code}'")
+
+if __name__ == "__main__":
+    fix_benni()
--- a/ARCHIVE_legacy_scripts/fix_industry_units.py
+++ b/ARCHIVE_legacy_scripts/fix_industry_units.py
@@ -0,0 +1,70 @@
+import sqlite3
+
+DB_PATH = "companies_v3_fixed_2.db"
+
+UNIT_MAPPING = {
+    "Logistics - Warehouse": "m²",
+    "Healthcare - Hospital": "Betten",
+    "Infrastructure - Transport": "Passagiere", 
+    "Leisure - Indoor Active": "m²",
+    "Retail - Food": "m²",
+    "Retail - Shopping Center": "m²",
+    "Hospitality - Gastronomy": "Sitzplätze",
+    "Leisure - Outdoor Park": "Besucher",
+    "Leisure - Wet & Spa": "Besucher",
+    "Infrastructure - Public": "Kapazität",
+    "Retail - Non-Food": "m²",
+    "Hospitality - Hotel": "Zimmer",
+    "Leisure - Entertainment": "Besucher",
+    "Healthcare - Care Home": "Plätze",
+    "Industry - Manufacturing": "Mitarbeiter", 
+    "Energy - Grid & Utilities": "Kunden",
+    "Leisure - Fitness": "Mitglieder",
+    "Corporate - Campus": "Mitarbeiter",
+    "Energy - Solar/Wind": "MWp", 
+    "Tech - Data Center": "Racks", 
+    "Automotive - Dealer": "Fahrzeuge",
+    "Infrastructure Parking": "Stellplätze",
+    "Reinigungsdienstleister": "Mitarbeiter",
+    "Infrastructure - Communities": "Einwohner"
+}
+
+def fix_units():
+    print(f"Connecting to {DB_PATH}...")
+    conn = sqlite3.connect(DB_PATH)
+    cursor = conn.cursor()
+    
+    try:
+        cursor.execute("SELECT id, name, scraper_search_term, metric_type FROM industries")
+        rows = cursor.fetchall()
+        
+        updated_count = 0
+        
+        for row in rows:
+            ind_id, name, current_term, m_type = row
+            
+            new_term = UNIT_MAPPING.get(name)
+            
+            # Fallback Logic
+            if not new_term:
+                if m_type in ["AREA_IN", "AREA_OUT"]:
+                    new_term = "m²"
+                else:
+                    new_term = "Anzahl" # Generic fallback
+            
+            if current_term != new_term:
+                print(f"Updating '{name}': '{current_term}' -> '{new_term}'")
+                cursor.execute("UPDATE industries SET scraper_search_term = ? WHERE id = ?", (new_term, ind_id))
+                updated_count += 1
+        
+        conn.commit()
+        print(f"\n✅ Updated {updated_count} industries with correct units.")
+        
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        conn.rollback()
+    finally:
+        conn.close()
+
+if __name__ == "__main__":
+    fix_units()
--- a/ARCHIVE_legacy_scripts/fix_mappings_v2.py
+++ b/ARCHIVE_legacy_scripts/fix_mappings_v2.py
@@ -0,0 +1,23 @@
+import sqlite3
+
+def fix_mappings():
+    conn = sqlite3.connect('/app/companies_v3_fixed_2.db')
+    cursor = conn.cursor()
+    
+    # Neue Mappings für Geschäftsleitung und Verallgemeinerung
+    new_rules = [
+        ('%leitung%', 'Wirtschaftlicher Entscheider'),
+        ('%vorstand%', 'Wirtschaftlicher Entscheider'),
+        ('%geschäftsleitung%', 'Wirtschaftlicher Entscheider'),
+        ('%management%', 'Wirtschaftlicher Entscheider')
+    ]
+    
+    for pattern, role in new_rules:
+        cursor.execute("INSERT OR REPLACE INTO job_role_mappings (pattern, role, created_at) VALUES (?, ?, '2026-02-22T15:30:00')", (pattern, role))
+    
+    conn.commit()
+    conn.close()
+    print("Mappings updated for Geschäftsleitung, Vorstand, Management.")
+
+if __name__ == "__main__":
+    fix_mappings()
--- a/ARCHIVE_legacy_scripts/fix_silly_billy_data.py
+++ b/ARCHIVE_legacy_scripts/fix_silly_billy_data.py
@@ -0,0 +1,90 @@
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+import json
+import logging
+
+# Setup DB
+DB_PATH = "sqlite:///companies_v3_fixed_2.db"
+engine = create_engine(DB_PATH)
+SessionLocal = sessionmaker(bind=engine)
+session = SessionLocal()
+
+# Import Models (Simplified for script)
+from sqlalchemy import Column, Integer, String, Text, JSON
+from sqlalchemy.ext.declarative import declarative_base
+
+Base = declarative_base()
+
+class Company(Base):
+    __tablename__ = "companies"
+    id = Column(Integer, primary_key=True)
+    name = Column(String)
+    city = Column(String)
+    country = Column(String)
+    crm_vat = Column(String)
+    street = Column(String)
+    zip_code = Column(String)
+
+class EnrichmentData(Base):
+    __tablename__ = "enrichment_data"
+    id = Column(Integer, primary_key=True)
+    company_id = Column(Integer)
+    source_type = Column(String)
+    content = Column(JSON)
+
+def fix_data():
+    company_id = 32
+    print(f"🔧 Fixing Data for Company ID {company_id}...")
+    
+    company = session.query(Company).filter_by(id=company_id).first()
+    if not company:
+        print("❌ Company not found.")
+        return
+
+    enrichment = session.query(EnrichmentData).filter_by(
+        company_id=company_id, source_type="website_scrape"
+    ).first()
+    
+    if enrichment and enrichment.content:
+        imp = enrichment.content.get("impressum")
+        if imp:
+            print(f"📄 Found Impressum: {imp}")
+            
+            changed = False
+            if imp.get("city"):
+                company.city = imp.get("city")
+                changed = True
+                print(f"  -> Set City: {company.city}")
+                
+            if imp.get("vat_id"):
+                company.crm_vat = imp.get("vat_id")
+                changed = True
+                print(f"  -> Set VAT: {company.crm_vat}")
+            
+            if imp.get("country_code"):
+                company.country = imp.get("country_code")
+                changed = True
+                print(f"  -> Set Country: {company.country}")
+
+            if imp.get("street"):
+                company.street = imp.get("street")
+                changed = True
+                print(f"  -> Set Street: {company.street}")
+
+            if imp.get("zip"):
+                company.zip_code = imp.get("zip")
+                changed = True
+                print(f"  -> Set Zip: {company.zip_code}")
+
+            if changed:
+                session.commit()
+                print("✅ Database updated.")
+            else:
+                print("ℹ️ No changes needed.")
+        else:
+            print("⚠️ No impressum data in enrichment.")
+    else:
+        print("⚠️ No enrichment data found.")
+
+if __name__ == "__main__":
+    fix_data()
--- a/ARCHIVE_legacy_scripts/gtm_architect_orchestrator.py
+++ b/ARCHIVE_legacy_scripts/gtm_architect_orchestrator.py
@@ -0,0 +1,909 @@
+import argparse
+import base64
+import json
+import logging
+import re
+import sys
+import os
+import requests
+from bs4 import BeautifulSoup
+from datetime import datetime
+from config import Config
+import gtm_db_manager as db_manager
+
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+
+from helpers import call_gemini_flash, scrape_website_details, call_gemini_image
+from config import Config, BASE_DIR # Import Config and BASE_DIR
+
+LOG_DIR = "Log_from_docker"
+if not os.path.exists(LOG_DIR):
+    os.makedirs(LOG_DIR)
+
+ORCHESTRATOR_VERSION = "1.3.0" # Bump version for image fix & language enforcement
+run_timestamp = datetime.now().strftime("%y-%m-%d_%H-%M-%S")
+log_file_path = os.path.join(LOG_DIR, f"{run_timestamp}_gtm_orchestrator_run.log")
+
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler(log_file_path, mode='a', encoding='utf-8'),
+        logging.StreamHandler(sys.stderr)
+    ]
+)
+logging.info(f"GTM Architect Orchestrator v{ORCHESTRATOR_VERSION} ({run_timestamp}) starting...")
+
+# !!! CRITICAL FIX: Load API keys at the very beginning !!!
+# This ensures Config.API_KEYS is populated before any AI functions are called.
+Config.load_api_keys()
+
+def log_and_save(project_id, step_name, data_type, content):
+    logging.info(f"Project {project_id} - Step: {step_name} - Type: {data_type}")
+    filename = f"{run_timestamp}_{step_name}_{data_type}.txt"
+    filepath = os.path.join(LOG_DIR, filename)
+    try:
+        with open(filepath, 'w', encoding='utf-8') as f:
+            if isinstance(content, (dict, list)):
+                json.dump(content, f, indent=4, ensure_ascii=False)
+            else:
+                f.write(str(content))
+        logging.info(f"Saved {data_type} to {filepath}")
+    except Exception as e:
+        logging.error(f"Failed to save {data_type} to file: {e}")
+
+def get_system_instruction(lang):
+    if lang == 'de':
+        return """
+        Du bist ein internationaler Go-to-Market (GTM) Experte für B2B-Technologie-Unternehmen im Bereich Robotik, Facility Management und IoT.
+        Deine Aufgabe ist es, aus technischen Spezifikationen und Produktbeschreibungen eine umfassende GTM-Strategie zu entwickeln.
+        Du arbeitest strukturiert, datengetrieben und präzise. Deine Antworten sind immer klar, professionell und direkt auf den Punkt.
+        Wenn du JSON ausgeben sollst, gib NUR das JSON-Objekt aus, ohne umschließende Text- oder Code-Formatierungen.
+        Behalte während des gesamten Prozesses eine konsistente Logik bei. Alle Phasen bauen aufeinander auf.
+        Führe eine interne Plausibilitätsprüfung durch, bevor du eine Antwort gibst.
+        
+        # CONTEXT: THE WACKLER GROUP ECOSYSTEM
+        Wir sind Teil der Wackler Group. Wir nutzen das gesamte Dienstleistungsportfolio der Muttergesellschaft, um Hardware-Schwächen in Service-Stärken zu verwandeln.
+        Das Ziel ist immer eine "Symbiose aus Mensch & Maschine".
+
+        # REGEL 5: THE "DYNAMIC SERVICE" LOGIC (UNIVERSAL)
+        Analysiere zuerst die **Kategorie** des Roboters und wende dann die passende Hybrid-Logik an:
+
+        1. CLEANING INDOOR (CARPET) - Sauger für Teppiche
+           * Robot: Macht die Fläche (80%).
+           * Human (Wackler Cleaning): Macht Kanten, Ecken, Fleckenentfernung (20%).
+        
+        2. CLEANING INDOOR (WET SURFACE) - Scheuersauger (Hartboden)
+           * Robot: Reinigt Flure/Hallen kontinuierlich.
+           * Human (Wackler Cleaning): Sicherheits-Check (Rutschgefahr), Wasserwechsel, Hygiene-Audit.
+
+        3. CLEANING OUTDOOR (SWEEPER) - Kehrmaschine (Asphalt)
+           * Robot: Nimmt Feinstaub und Zigaretten auf.
+           * Human (Wackler Cleaning): Leert Mülleimer, entfernt Sperrmüll, pflegt Grünanlagen.
+
+        4. POS ROBOTER - Retail/Airport Assistenz
+           * Robot: Information, Wegweiser, Blickfang.
+           * Human (Wackler Service): Beratung, Verkauf, emotionale Kundenbindung.
+
+        5. SECURITY ROBOTER - Mobile Überwachung (Quadruped/Drohne)
+           * Robot: "Detektion & Präsenz". 24/7 Patrouille, Wärmebild, keine Müdigkeit.
+           * Human (Wackler Security): "Bewertung & Intervention". NSL bewertet Alarm, Interventionskraft fährt raus.
+           * Pitch: "Der Roboter sieht die Gefahr, Wackler beseitigt sie."
+
+        6. SERVICE ROBOTER - Transport (Gastro/Klinik)
+           * Robot: Schweres Tragen (Tabletts, Wäsche) von A nach B.
+           * Human (Wackler Service): Patientenkontakt, Tisch-Service, Hygiene.
+
+        7. TRANSPORT ROBOTER - Intralogistik (Lager)
+           * Robot: Paletten-Transport, Milkrun.
+           * Human (Wackler Logistics): Prozesssteuerung, Ausnahmebehandlung, Umpacken.
+
+        Wende diese spezifische Logik zwingend in PHASE 4 (Strategy) und PHASE 6 (Sales Enablement) an.
+
+        WICHTIG: Antworte IMMER in der vom User geforderten Sprache (Deutsch), auch wenn der Input Englisch ist.
+        """
+    else: # Default to English
+        return """
+        You are an international Go-to-Market (GTM) expert for B2B technology companies in robotics, facility management, and IoT.
+        Your task is to develop a comprehensive GTM strategy from technical specifications and product descriptions.
+        You are structured, data-driven, and precise. Your answers are always clear, professional, and to the point.
+        When asked to output JSON, provide ONLY the JSON object without any surrounding text or code formatting.
+        Maintain consistent logic throughout the process. All phases build on each other.
+        Perform an internal plausibility check before providing an answer.
+        
+        # CONTEXT: THE WACKLER GROUP ECOSYSTEM
+        We are part of the Wackler Group. We leverage the full service portfolio of the parent company to turn hardware weaknesses into service strengths.
+        The goal is always a "Symbiosis of Man & Machine".
+
+        # RULE 5: THE "DYNAMIC SERVICE" LOGIC (UNIVERSAL)
+        First analyze the **category** of the robot and then apply the appropriate hybrid logic:
+
+        1. CLEANING INDOOR (CARPET) - Vacuums for carpets
+           * Robot: Does the area (80%).
+           * Human (Wackler Cleaning): Does edges, corners, spot removal (20%).
+        
+        2. CLEANING INDOOR (WET SURFACE) - Scrubber dryers (Hard floor)
+           * Robot: Cleans halls/corridors continuously.
+           * Human (Wackler Cleaning): Safety check (slip hazard), water change, hygiene audit.
+
+        3. CLEANING OUTDOOR (SWEEPER) - Sweepers (Asphalt)
+           * Robot: Picks up fine dust and cigarettes.
+           * Human (Wackler Cleaning): Empties bins, removes bulky waste, maintains greenery.
+
+        4. POS ROBOT - Retail/Airport Assistance
+           * Robot: Information, wayfinding, eye-catcher.
+           * Human (Wackler Service): Consultation, sales, emotional customer bonding.
+
+        5. SECURITY ROBOT - Mobile Surveillance (Quadruped/Drone)
+           * Robot: "Detection & Presence". 24/7 patrol, thermal imaging, no fatigue.
+           * Human (Wackler Security): "Evaluation & Intervention". NSL evaluates alarm, intervention force drives out.
+           * Pitch: "The robot sees the danger, Wackler eliminates it."
+
+        6. SERVICE ROBOT - Transport (Hospitality/Clinic)
+           * Robot: Heavy lifting (trays, laundry) from A to B.
+           * Human (Wackler Service): Patient contact, table service, hygiene.
+
+        7. TRANSPORT ROBOT - Intralogistics (Warehouse)
+           * Robot: Pallet transport, milkrun.
+           * Human (Wackler Logistics): Process control, exception handling, repacking.
+
+        Mandatory application of this logic in PHASE 4 (Strategy) and PHASE 6 (Sales Enablement).
+
+        IMPORTANT: Always answer in the requested language.
+        """
+
+def get_output_lang_instruction(lang):
+    """Returns a strong instruction to enforce the output language."""
+    if lang == 'de':
+        return "ACHTUNG: Die gesamte Ausgabe (JSON-Werte, Texte, Analysen) MUSS in DEUTSCH sein. Übersetze englische Input-Daten."
+    return "IMPORTANT: The entire output MUST be in ENGLISH."
+
+# --- ORCHESTRATOR PHASES ---
+
+def list_history(payload):
+    projects = db_manager.get_all_projects()
+    return {"projects": projects}
+
+def load_history(payload):
+    project_id = payload.get('projectId')
+    if not project_id:
+        raise ValueError("No projectId provided for loading history.")
+    
+    data = db_manager.get_project_data(project_id)
+    if not data:
+        raise ValueError(f"Project {project_id} not found.")
+
+    # FIX: Check for and parse stringified JSON in phase results
+    if 'phases' in data and isinstance(data['phases'], dict):
+        for phase_name, phase_result in data['phases'].items():
+            if isinstance(phase_result, str):
+                try:
+                    data['phases'][phase_name] = json.loads(phase_result)
+                except json.JSONDecodeError:
+                    logging.warning(f"Could not decode JSON for {phase_name} in project {project_id}. Leaving as is.")
+
+    return data
+
+def delete_session(payload):
+    project_id = payload.get('projectId')
+    if not project_id:
+        raise ValueError("No projectId provided for deletion.")
+    return db_manager.delete_project(project_id)
+
+def phase1(payload):
+    product_input = payload.get('productInput', '')
+    lang = payload.get('lang', 'de')
+    project_id = payload.get('projectId')
+    
+    # Check if input is a URL and scrape it
+    if product_input.strip().startswith('http'):
+        logging.info(f"Input detected as URL. Starting scrape for: {product_input}")
+        analysis_content = scrape_website_details(product_input)
+        if "Fehler:" in analysis_content:
+            # If scraping fails, use the URL itself with a note for the AI.
+            analysis_content = f"Scraping der URL {product_input} ist fehlgeschlagen. Analysiere das Produkt basierend auf der URL und deinem allgemeinen Wissen."
+            logging.warning("Scraping failed. Using URL as fallback content for analysis.")
+    else:
+        analysis_content = product_input
+        logging.info("Input is raw text. Analyzing directly.")
+
+    # AUTOMATISCHE PROJEKTERSTELLUNG
+    if not project_id:
+        # Generiere Namen aus Input
+        raw_name = product_input.strip()
+        if raw_name.startswith('http'):
+            name = f"Web Analysis: {raw_name[:30]}..."
+        else:
+            name = (raw_name[:30] + "...") if len(raw_name) > 30 else raw_name
+        
+        logging.info(f"Creating new project: {name}")
+        new_proj = db_manager.create_project(name)
+        project_id = new_proj['id']
+        logging.info(f"New Project ID: {project_id}")
+
+    sys_instr = get_system_instruction(lang)
+    lang_instr = get_output_lang_instruction(lang)
+    
+    prompt = f"""
+    PHASE 1: PRODUCT ANALYSIS & CONSTRAINTS
+    Input: "{analysis_content}"
+    Task: 
+    1. Extract and CONSOLIDATE technical features into 8-12 high-level core capabilities or value propositions. Group minor specs (e.g., specific ports like USB/Ethernet) into broader categories (e.g., "Connectivity & Integration"). Do NOT list every single hardware spec individually. Focus on what matters for the buyer.
+    2. Define hard constraints (e.g., physical dimensions, max payload, environment limitations).
+    3. Classify the product into one of the 7 Wackler Categories: [Cleaning Indoor (Carpet), Cleaning Indoor (Wet), Cleaning Outdoor (Sweeper), POS Robot, Security Robot, Service Robot, Transport Robot].
+    4. Check for internal portfolio conflicts (hypothetical product "Scrubber 5000").
+    
+    {lang_instr}
+    
+    Output JSON format ONLY: {{"features": [], "constraints": [], "category": "Identified Category", "conflictCheck": {{"hasConflict": false, "details": "", "relatedProduct": ""}}, "rawAnalysis": ""}}
+    """
+    log_and_save(project_id, "phase1", "prompt", prompt)
+    response = call_gemini_flash(prompt, system_instruction=sys_instr, json_mode=True)
+    log_and_save(project_id, "phase1", "response", response)
+    
+    try:
+        data = json.loads(response)
+
+        # --- PART 2: HARD FACTS EXTRACTION ---
+        spec_schema = """
+        {
+          "metadata": {
+            "product_id": "string (slug)",
+            "brand": "string",
+            "model_name": "string",
+            "description": "string (short marketing description of the product)",
+            "category": "cleaning | service | security | industrial",
+            "manufacturer_url": "string"
+          },
+          "core_specs": {
+            "battery_runtime_min": "integer (standardized to minutes)",
+            "charge_time_min": "integer (standardized to minutes)",
+            "weight_kg": "float",
+            "dimensions_cm": { "l": "float", "w": "float", "h": "float" },
+            "max_slope_deg": "float",
+            "ip_rating": "string",
+            "climb_height_cm": "float",
+            "navigation_type": "string (e.g. SLAM, LiDAR, VSLAM)",
+            "connectivity": ["string"]
+          },
+          "layers": {
+            "cleaning": {
+              "fresh_water_l": "float",
+              "dirty_water_l": "float",
+              "area_performance_sqm_h": "float",
+              "mop_pressure_kg": "float"
+            },
+            "service": {
+              "max_payload_kg": "float",
+              "number_of_trays": "integer",
+              "display_size_inch": "float",
+              "ads_capable": "boolean"
+            },
+            "security": {
+              "camera_types": ["string"],
+              "night_vision": "boolean",
+              "gas_detection": ["string"],
+              "at_interface": "boolean"
+            }
+          },
+          "extended_features": [
+            { "feature": "string", "value": "string", "unit": "string" }
+          ]
+        }
+        """
+
+        specs_prompt = f"""
+        PHASE 1 (Part 2): HARD FACT EXTRACTION
+        Input: "{analysis_content}"
+        
+        Task: Extract technical specifications strictly according to the provided JSON schema.
+        
+        NORMALIZATION RULES (STRICTLY FOLLOW):
+        1. Time: Convert ALL time values (runtime, charging) to MINUTES (Integer). Example: "1:30 h" -> 90, "2 hours" -> 120.
+        2. Dimensions/Weight: All lengths in CM, weights in KG.
+        3. Performance: Area performance always in m²/h.
+        4. Booleans: Use true/false (not strings).
+        5. Unknowns: If a value is not in the text, set it to null. DO NOT HALLUCINATE.
+        
+        LOGIC FOR LAYERS:
+        - If product uses water/brushes -> Fill 'layers.cleaning'.
+        - If product delivers items/trays -> Fill 'layers.service'.
+        - If product patrols/detects -> Fill 'layers.security'.
+        
+        EXTENDED FEATURES:
+        - Put any technical feature that doesn't fit the schema into 'extended_features'.
+        
+        Output JSON format ONLY based on this schema:
+        {spec_schema}
+        """
+
+        log_and_save(project_id, "phase1_specs", "prompt", specs_prompt)
+        specs_response = call_gemini_flash(specs_prompt, system_instruction=sys_instr, json_mode=True)
+        log_and_save(project_id, "phase1_specs", "response", specs_response)
+
+        try:
+            specs_data = json.loads(specs_response)
+            
+            # FORCE URL PERSISTENCE: If input was a URL, ensure it's in the metadata
+            if product_input.strip().startswith('http'):
+                if 'metadata' not in specs_data:
+                    specs_data['metadata'] = {}
+                specs_data['metadata']['manufacturer_url'] = product_input.strip()
+            
+            # AUTO-RENAME PROJECT based on extracted metadata
+            if 'metadata' in specs_data:
+                brand = specs_data['metadata'].get('brand', '')
+                model = specs_data['metadata'].get('model_name', '')
+                if brand or model:
+                    new_name = f"{brand} {model}".strip()
+                    if new_name:
+                        logging.info(f"Renaming project {project_id} to: {new_name}")
+                        db_manager.update_project_name(project_id, new_name)
+
+            data['specs'] = specs_data
+        except json.JSONDecodeError:
+            logging.error(f"Failed to decode JSON from Gemini response in phase1 (specs): {specs_response}")
+            data['specs'] = {"error": "Failed to extract specs", "raw": specs_response}
+
+        db_manager.save_gtm_result(project_id, 'phase1_result', json.dumps(data))
+        
+        # WICHTIG: ID zurückgeben, damit Frontend sie speichert
+        data['projectId'] = project_id
+        return data
+    except json.JSONDecodeError:
+        logging.error(f"Failed to decode JSON from Gemini response in phase1: {response}")
+        error_response = {
+            "error": "Die Antwort des KI-Modells war kein gültiges JSON. Das passiert manchmal bei hoher Auslastung. Bitte versuchen Sie es in Kürze erneut.",
+            "details": response,
+            "projectId": project_id # Auch bei Fehler ID zurückgeben? Besser nicht, da noch nichts gespeichert.
+        }
+        return error_response
+
+
+def phase2(payload):
+    phase1_data = payload.get('phase1Data', {})
+    lang = payload.get('lang', 'de')
+    project_id = payload.get('projectId')
+    
+    sys_instr = get_system_instruction(lang)
+    lang_instr = get_output_lang_instruction(lang)
+
+    prompt = f"""
+    PHASE 2: IDEAL CUSTOMER PROFILE (ICP) & DATA PROXIES - STRATEGIC ANALYSIS
+
+    **Product Context:** 
+    {json.dumps(phase1_data)}
+
+    **Your Task:**
+    Answer the following strategic questions to determine the Ideal Customer Profiles (ICPs).
+
+    **Strategic Questions:**
+    1.  **ICP Identification:** Based on the product's category ({phase1_data.get('category', 'Unknown')}), which 3 industries face the most significant operational challenges (e.g., safety, efficiency, high manual labor costs, security risks) that this product directly solves?
+    2.  **Rationale:** For each identified ICP, provide a concise rationale. Why is this product a perfect fit for this specific industry? (e.g., "Reduces inspection costs by X%", "Improves safety in hazardous environments", "Automates a critical but repetitive task").
+    3.  **Data Proxies:** How can we find these companies online? What specific digital footprints (data proxies) do they leave? Think about:
+        *   Keywords on their websites (e.g., 'plant safety', 'autonomous inspection', 'logistics automation').
+        *   Specific job titles on LinkedIn (e.g., 'Head of Security', 'Logistics Manager', 'Maintenance Lead').
+        *   Their participation in specific industry trade shows or publications.
+
+    {lang_instr}
+    
+    **Output:**
+    Provide your analysis ONLY in the following JSON format: 
+    {{"icps": [{{"name": "Industry Name", "rationale": "Why it's a fit."}}], "dataProxies": [{{"target": "e.g., Company Websites", "method": "How to find them."}}]}}
+    """
+    log_and_save(project_id, "phase2", "prompt", prompt)
+    response = call_gemini_flash(prompt, system_instruction=sys_instr, json_mode=True)
+    log_and_save(project_id, "phase2", "response", response)
+    data = json.loads(response)
+    db_manager.save_gtm_result(project_id, 'phase2_result', json.dumps(data))
+    return data
+
+def phase3(payload):
+    phase2_data = payload.get('phase2Data', {})
+    lang = payload.get('lang', 'de')
+    project_id = payload.get('projectId')
+    
+    sys_instr = get_system_instruction(lang)
+    lang_instr = get_output_lang_instruction(lang)
+
+    prompt = f"""
+    PHASE 3: WHALE HUNTING & BUYING CENTER ANALYSIS - STRATEGIC ANALYSIS
+
+    **Target ICPs (Industries):** 
+    {json.dumps(phase2_data.get('icps'))}
+
+    **Your Task:**
+    Answer the following strategic questions to identify key accounts and decision-makers.
+
+    **Strategic Questions:**
+    1.  **Whale Identification:** For each ICP, identify 3-5 specific 'Whale' companies in the DACH market. These should be leaders, innovators, or companies with significant scale in that sector.
+    2.  **Buying Center Roles:** Identify the specific job titles for the 4 Universal Strategic Archetypes in the context of these industries. 
+        *   **Operativer Entscheider:** Who feels the pain daily? (e.g., Plant Manager, Store Manager, Head of Logistics).
+        *   **Infrastruktur Verantwortlicher:** Who has to integrate it? (e.g., IT Security, Facility Manager, Legal/Compliance).
+        *   **Wirtschaftlicher Entscheider:** Who signs the check? (e.g., CFO, Purchasing Director).
+        *   **Innovations-Treiber:** Who pushes for the pilot? (e.g., CDO, Strategy Lead).
+
+    {lang_instr}
+    
+    **Output:**
+    Provide your analysis ONLY in the following JSON format:
+    {{"whales": [{{"industry": "ICP Name", "accounts": ["Company A", "Company B"]}}], "roles": ["Operativer Entscheider: [Job Titles]", "Infrastruktur Verantwortlicher: [Job Titles]", "Wirtschaftlicher Entscheider: [Job Titles]", "Innovations-Treiber: [Job Titles]"]}}
+    """
+    log_and_save(project_id, "phase3", "prompt", prompt)
+    response = call_gemini_flash(prompt, system_instruction=sys_instr, json_mode=True)
+    log_and_save(project_id, "phase3", "response", response)
+    data = json.loads(response)
+    db_manager.save_gtm_result(project_id, 'phase3_result', json.dumps(data))
+    return data
+
+def phase4(payload):
+    phase3_data = payload.get('phase3Data', {})
+    phase1_data = payload.get('phase1Data', {})
+    lang = payload.get('lang', 'de')
+    project_id = payload.get('projectId')
+
+    sys_instr = get_system_instruction(lang)
+    lang_instr = get_output_lang_instruction(lang)
+
+    all_accounts = []
+    for w in phase3_data.get('whales', []):
+        all_accounts.extend(w.get('accounts', []))
+
+    prompt = f"""
+    PHASE 4: STRATEGY & ANGLE DEVELOPMENT - STRATEGIC ANALYSIS
+
+    **Product Category:** {phase1_data.get('category')}
+    **Target Industries:** {json.dumps([w.get('industry') for w in phase3_data.get('whales', [])])}
+    **Product Features:** {json.dumps(phase1_data.get('features'))}
+
+    **Your Task:**
+    Answer the following strategic questions to build the core of our market approach.
+
+    **Strategic Questions:**
+    1.  **Pain Point Analysis:** For each industry segment, what is the single most significant, measurable **Pain Point** this product solves?
+    2.  **Develop the Angle:** What is our unique story? The "Angle" should directly connect a product capability to their primary pain point.
+    3.  **Define Differentiation (Hybrid Service):** Why should they choose us? Explain the specific "Service Gap" that our Hybrid Model (Machine + Human) closes for this specific Category ({phase1_data.get('category')}). E.g., for Security, the gap is "Intervention"; for Cleaning, it is "Edges/Hygiene".
+
+    {lang_instr}
+    
+    **Output:**
+    Provide your analysis ONLY in the following JSON format:
+    {{"strategyMatrix": [{{"segment": "Target Industry", "painPoint": "The core problem.", "angle": "Our unique story.", "differentiation": "Why us (Hybrid Service logic)."}}]}}
+    """
+    log_and_save(project_id, "phase4", "prompt", prompt)
+    response = call_gemini_flash(prompt, system_instruction=sys_instr, json_mode=True)
+    log_and_save(project_id, "phase4", "response", response)
+    data = json.loads(response)
+    db_manager.save_gtm_result(project_id, 'phase4_result', json.dumps(data))
+    return data
+
+def phase5(payload):
+    phase4_data = payload.get('phase4Data', {})
+    phase3_data = payload.get('phase3Data', {})
+    phase2_data = payload.get('phase2Data', {})
+    phase1_data = payload.get('phase1Data', {})
+    lang = payload.get('lang', 'de')
+    project_id = payload.get('projectId')
+
+    # Logging zur Diagnose
+    strat_matrix = phase4_data.get('strategyMatrix', [])
+    logging.info(f"Phase 5 Input Check - Strategy Matrix Rows: {len(strat_matrix)}")
+
+    # SPEZIAL-INSTRUKTION FÜR PHASE 5 (REPORTING)
+    # Wir überschreiben hier die globale JSON-Instruktion, um ausführlichen Text zu erzwingen.
+    if lang == 'de':
+        report_sys_instr = """
+        Du bist ein Senior Business Consultant bei einer Top-Tier-Beratung (wie McKinsey oder BCG).
+        Deine Aufgabe ist es, einen strategisch tiefgehenden, detaillierten "Go-to-Market Strategy Report" zu verfassen.
+        
+        REGELN:
+        1.  **Kein JSON:** Deine Ausgabe ist reines, sauber formatiertes Markdown.
+        2.  **Senior Grade:** Schreibe nicht stichpunktartig "dünn", sondern formuliere ganze Sätze und erkläre die Zusammenhänge ("Why it matters").
+        3.  **Vollständigkeit:** Brich niemals mitten in einer Tabelle oder einem Satz ab.
+        4.  **Formatierung:** Nutze Fettgedrucktes, Listen und Tabellen, um die Lesbarkeit zu erhöhen.
+        """
+    else:
+        report_sys_instr = """
+        You are a Senior Business Consultant at a top-tier firm (like McKinsey or BCG).
+        Your task is to write a strategically deep, detailed "Go-to-Market Strategy Report".
+        
+        RULES:
+        1.  **No JSON:** Your output is pure, cleanly formatted Markdown.
+        2.  **Senior Grade:** Do not write "thin" bullet points. Write full sentences and explain the context ("Why it matters").
+        3.  **Completeness:** Never stop in the middle of a table or sentence.
+        4.  **Formatting:** Use bolding, lists, and tables to enhance readability.
+        """
+
+    lang_instr = get_output_lang_instruction(lang)
+
+    # Reduziere Input-Daten auf das Wesentliche, um den Output-Fokus zu verbessern
+    # FIX: Include 'specs' (Hard Facts) for the report
+    lean_phase1 = {
+        "features": phase1_data.get('features', []),
+        "constraints": phase1_data.get('constraints', []),
+        "specs": phase1_data.get('specs', {}),
+        "category": phase1_data.get('category', 'Unknown')
+    }
+
+    prompt = f"""
+    PHASE 5: FINAL REPORT GENERATION
+    
+    INPUT DATA:
+    - Product: {json.dumps(lean_phase1)}
+    - ICPs: {json.dumps(phase2_data.get('icps', []))}
+    - Targets: {json.dumps(phase3_data.get('whales', []))}
+    - Strategy Matrix: {json.dumps(phase4_data.get('strategyMatrix', []))}
+    
+    TASK:
+    Write the "GTM STRATEGY REPORT v3.1" in Markdown. 
+    Expand on the input data. Don't just copy it. Interpret it.
+
+    REQUIRED STRUCTURE & CONTENT:
+
+    # GTM STRATEGY REPORT v3.1
+
+    ## 1. Strategic Core
+    *   **Category Definition:** Explicitly state that this product falls under the '{lean_phase1.get('category')}' category.
+    *   **Dynamic Service Logic:** Explain clearly how the "Machine Layer" (What the robot does) and the "Human Service Layer" (What Wackler does) work together for THIS specific category. Use the logic defined for '{lean_phase1.get('category')}'.
+
+    ## 2. Executive Summary
+    *   Write a compelling management summary (approx. 150 words) outlining the market opportunity and the core value proposition.
+
+    ## 3. Product Reality Check (Technical Deep Dive)
+    *   **Core Capabilities:** Summarize the top 3-5 capabilities.
+    *   **Technical Constraints:** Create a detailed Markdown table for the Hard Facts.
+        *   Include ALL available specs (Dimensions, Weight, Runtime, Limits, Sensor types, Cleaning performance, etc.) from the input.
+        *   Make it as comprehensive as a technical datasheet to satisfy the "Evaluator" persona.
+        | Feature | Value | Implication |
+        | :--- | :--- | :--- |
+        | ... | ... | ... |
+
+    ## 4. Target Architecture (ICPs)
+    *   For each ICP, write a short paragraph explaining the "Strategic Fit". Why is this industry under pressure to buy?
+    *   Mention key "Whale" accounts identified.
+
+    ## 5. Strategy Matrix
+    *   Create a detailed Markdown table mapping the strategy.
+    *   **CRITICAL:** Ensure the table syntax is perfect. use <br> for line breaks inside cells.
+    *   Columns: **Target Segment** | **The Pain (Operational)** | **The Angle (Story)** | **Differentiation (Service Gap)**
+    *   Fill this table with the data from the 'Strategy Matrix' input.
+
+    ## 6. Operational GTM Roadmap
+    *   **Step 1: Lead Gen:** Recommend specific Inbound/Outbound tactics for these ICPs.
+    *   **Step 2: Consultative Sales:** How to handle the site-check? What constraints need checking?
+    *   **Step 3: Proof of Value:** Define the Pilot Phase (Paid Pilot vs. Free PoC).
+    *   **Step 4: Expansion:** Path to RaaS/Service contracts.
+
+    ## 7. Commercial Logic (ROI Framework)
+    *   Present the ROI calculation logic.
+    *   **The Formula:** Show the Net Value formula.
+    *   **Input Variables:** List the specific variables the customer needs to provide.
+    *   **Example Calculation:** Provide a hypothetical example calculation with plausible ranges (e.g. "Assuming 20-30% efficiency gain...") to illustrate the potential.
+
+    {lang_instr}
+    
+    Output: Return strictly MARKDOWN formatted text.
+    """
+    log_and_save(project_id, "phase5", "prompt", prompt)
+    
+    # Use the specialized system instruction here!
+    report = call_gemini_flash(prompt, system_instruction=report_sys_instr, json_mode=False)
+    
+    # Clean up potentially fenced markdown code blocks
+    report = report.strip()
+    if report.startswith("```markdown"):
+        report = report.replace("```markdown", "", 1)
+    if report.startswith("```"):
+        report = report.replace("```", "", 1)
+    if report.endswith("```"):
+        report = report[:-3]
+    report = report.strip()
+
+    log_and_save(project_id, "phase5", "response", report)
+    db_manager.save_gtm_result(project_id, 'phase5_result', json.dumps({"report": report}))
+    return {"report": report}
+
+def phase6(payload):
+    phase4_data = payload.get('phase4Data', {})
+    phase3_data = payload.get('phase3Data', {})
+    phase1_data = payload.get('phase1Data', {})
+    lang = payload.get('lang', 'de')
+    project_id = payload.get('projectId')
+    
+    sys_instr = get_system_instruction(lang)
+    lang_instr = get_output_lang_instruction(lang)
+
+    prompt = f"""
+    PHASE 6: SALES ENABLEMENT & VISUALS - STRATEGIC ANALYSIS
+
+    **Context:**
+    - Product Features: {json.dumps(phase1_data.get('features'))}
+    - Personas: {json.dumps(phase3_data.get('roles'))}
+    - Strategy: {json.dumps(phase4_data.get('strategyMatrix'))}
+
+    **Your Task:**
+    Answer the following strategic questions to create sales enablement materials.
+
+    **Strategic Questions:**
+    1.  **Anticipate Objections:** For each of the 4 key Archetypes (Operative, Infrastructure, Economic, Innovation), what is their most likely and critical **objection**?
+        *   *Special Focus for 'Infrastructure Responsible' (Gatekeeper):* Address **Legal, Liability & Compliance** issues (e.g. GDPR, DGUV V3, accident liability) specifically.
+    2.  **Formulate Battlecards:** For each objection, formulate a concise **response script**. 
+        *   *Requirement:* Use specific **proof points** (e.g., "Certified according to...", "Data hosted in Germany", "Insurance coverage by Wackler") instead of generic promises.
+    3.  **Create Visual Prompts:** For the top 3 use cases, write a detailed **visual prompt** for an image generation AI.
+
+    {lang_instr}
+    
+    **Output:**
+    Provide your analysis ONLY in the following JSON format:
+    {{"battlecards": [{{"persona": "Archetype (Job Title)", "objection": "The key objection.", "responseScript": "The compelling response with proof points."}}], "visualPrompts": [{{"title": "Image Title", "context": "Use case description.", "prompt": "Detailed photorealistic prompt."}}]}}
+    """
+    log_and_save(project_id, "phase6", "prompt", prompt)
+    response = call_gemini_flash(prompt, system_instruction=sys_instr, json_mode=True)
+    log_and_save(project_id, "phase6", "response", response)
+    data = json.loads(response)
+    if isinstance(data, list):
+        data = data[0]
+    db_manager.save_gtm_result(project_id, 'phase6_result', json.dumps(data))
+    return data
+
+def phase7(payload):
+    phase4_data = payload.get('phase4Data', {})
+    phase2_data = payload.get('phase2Data', {})
+    lang = payload.get('lang', 'de')
+    project_id = payload.get('projectId')
+    
+    sys_instr = get_system_instruction(lang)
+    lang_instr = get_output_lang_instruction(lang)
+
+    prompt = f"""
+    PHASE 7: VERTICAL LANDING PAGE COPY - STRATEGIC ANALYSIS
+
+    **Context:**
+    - ICPs: {json.dumps(phase2_data.get('icps'))}
+    - Strategy: {json.dumps(phase4_data.get('strategyMatrix'))}
+
+    **Your Task:**
+    Create conversion-optimized landing page copy for the top 2 ICPs by answering the following questions.
+
+    **Strategic Questions:**
+    1.  **Headline:** What is the most powerful **outcome** for this industry? The headline must grab the attention of a Decider and state this primary result.
+    2.  **Subline:** How can you elaborate on the headline? Briefly mention the core problem this industry faces and introduce our solution as the answer.
+    3.  **Benefit Bullets:** Transform 3-5 key technical features into tangible **benefit statements** for this specific industry. Each bullet point should answer the customer's question: "What's in it for me?".
+    4.  **Call-to-Action (CTA):** What is the logical next step we want the user to take? The CTA should be clear, concise, and action-oriented.
+    5.  **Apply Wackler Symbiosis:** Ensure the copy clearly communicates the value of the robot combined with the human expert service.
+
+    {lang_instr}
+    
+    **Output:**
+    Provide your analysis ONLY in the following JSON format:
+    {{"landingPages": [{{"industry": "ICP Name", "headline": "The compelling headline.", "subline": "The elaborating subline.", "bullets": ["Benefit 1", "Benefit 2"], "cta": "The call to action."}}]}}
+    """
+    log_and_save(project_id, "phase7", "prompt", prompt)
+    response = call_gemini_flash(prompt, system_instruction=sys_instr, json_mode=True)
+    log_and_save(project_id, "phase7", "response", response)
+    data = json.loads(response)
+    if isinstance(data, list):
+        data = data[0]
+    db_manager.save_gtm_result(project_id, 'phase7_result', json.dumps(data))
+    return data
+
+def phase8(payload):
+    phase2_data = payload.get('phase2Data', {})
+    phase1_data = payload.get('phase1Data', {})
+    lang = payload.get('lang', 'de')
+    project_id = payload.get('projectId')
+    
+    sys_instr = get_system_instruction(lang)
+    lang_instr = get_output_lang_instruction(lang)
+
+    prompt = f"""
+    PHASE 8: COMMERCIAL LOGIC & ROI CALCULATOR - STRATEGIC ANALYSIS
+
+    **Context:**
+    - Product Category: {phase1_data.get('category')}
+    - ICPs: {json.dumps(phase2_data.get('icps'))}
+
+    **Your Task:**
+    Develop a calculation framework (NOT just random numbers) for the CFO pitch.
+
+    **Strategic Questions:**
+    1.  **Identify the Cost Driver:** What is the unit of cost we are attacking?
+    2.  **ROI Formula & Example:** Create a formula: `Net Value = (Savings + Risk Mitigation) - (TCO)`.
+        *   *CRITICAL:* Provide **PLAUSIBLE EXAMPLE RANGES** for efficiency gains (e.g., "Estimate: 20-30% reduction in manual patrol time") instead of just listing the variable.
+        *   **Do NOT output "undefined".** Give a realistic estimation based on the industry context.
+    3.  **Risk Argument:** Financial value of avoiding the worst-case scenario.
+
+    {lang_instr}
+    
+    **Output:**
+    Provide your analysis ONLY in the following JSON format:
+    {{"businessCases": [{{"industry": "ICP Name", "costDriver": "Unit of cost.", "efficiencyGain": "Plausible estimate range (e.g. 25-35%).", "roiFormula": "The formula with defined variables.", "riskArgument": "The cost of inaction."}}]}}
+    """
+    log_and_save(project_id, "phase8", "prompt", prompt)
+    response = call_gemini_flash(prompt, system_instruction=sys_instr, json_mode=True)
+    log_and_save(project_id, "phase8", "response", response)
+    data = json.loads(response)
+    if isinstance(data, list):
+        data = data[0]
+    db_manager.save_gtm_result(project_id, 'phase8_result', json.dumps(data))
+    return data
+
+def phase9(payload):
+    phase1_data = payload.get('phase1Data', {})
+    phase4_data = payload.get('phase4Data', {})
+    lang = payload.get('lang', 'de')
+    project_id = payload.get('projectId')
+    
+    sys_instr = get_system_instruction(lang)
+    lang_instr = get_output_lang_instruction(lang)
+
+    prompt = f"""
+    PHASE 9: THE "FEATURE-TO-VALUE" TRANSLATOR - STRATEGIC ANALYSIS
+
+    **Context:**
+    - Input Features: {json.dumps(phase1_data.get('features'))}
+    - Strategy Pains: {json.dumps([s.get('painPoint') for s in phase4_data.get('strategyMatrix', [])])}
+
+    **Your Task:**
+    Translate technical features into compelling, value-oriented benefits.
+
+    **Structured Process:**
+    1.  **State the Feature:** Pick a key technical feature.
+    2.  **Ask "So what?" (The Consequence):** What is the immediate consequence?
+    3.  **Ask "So what?" again (The Value):** What is the ultimate benefit?
+    4.  **Formulate Headline:** Short, powerful headline.
+
+    {lang_instr}
+    
+    **Output:**
+    Provide your analysis ONLY in the following JSON format:
+    {{"techTranslations": [{{"feature": "The technical feature.", "story": "The 'So what? So what?' analysis.", "headline": "The final value headline."}}]}}
+    """
+    log_and_save(project_id, "phase9", "prompt", prompt)
+    response = call_gemini_flash(prompt, system_instruction=sys_instr, json_mode=True)
+    log_and_save(project_id, "phase9", "response", response)
+    data = json.loads(response)
+    db_manager.save_gtm_result(project_id, 'phase9_result', json.dumps(data))
+    return data
+
+def update_specs(payload):
+    """
+    Updates the technical specifications (Hard Facts) for a project.
+    This allows manual correction of AI-extracted data.
+    """
+    project_id = payload.get('projectId')
+    new_specs = payload.get('specs')
+    
+    if not project_id:
+        raise ValueError("No projectId provided for update_specs.")
+    if not new_specs:
+        raise ValueError("No specs provided for update_specs.")
+        
+    # Load current project data
+    project_data = db_manager.get_project_data(project_id)
+    if not project_data:
+        raise ValueError(f"Project {project_id} not found.")
+        
+    phases = project_data.get('phases', {})
+    phase1_result = phases.get('phase1_result')
+    
+    if not phase1_result:
+        raise ValueError("Phase 1 result not found. Cannot update specs.")
+    
+    # FIX: Parse JSON string if necessary
+    if isinstance(phase1_result, str):
+        try:
+            phase1_result = json.loads(phase1_result)
+        except json.JSONDecodeError:
+            raise ValueError("Phase 1 result is corrupted (invalid JSON string).")
+
+    # Update specs
+    phase1_result['specs'] = new_specs
+    
+    # Save back to DB
+    # We use save_gtm_result which expects a stringified JSON for the phase result
+    db_manager.save_gtm_result(project_id, 'phase1_result', json.dumps(phase1_result))
+    
+    logging.info(f"Updated specs for project {project_id}")
+    return {"status": "success", "specs": new_specs}
+    
+def translate(payload):
+    # ... (to be implemented)
+    return {"report": "Translated report will be here."}
+
+def image(payload):
+    prompt = payload.get('prompt', 'No Prompt')
+    project_id = payload.get('projectId')
+    aspect_ratio = payload.get('aspectRatio') 
+    
+    ref_images = payload.get('referenceImagesBase64')
+    ref_image = None
+    
+    if ref_images and isinstance(ref_images, list) and len(ref_images) > 0:
+        ref_image = ref_images[0]
+    elif payload.get('referenceImage'): 
+        ref_image = payload.get('referenceImage')
+    
+    log_and_save(project_id, "image", "prompt", f"{prompt} (Ratio: {aspect_ratio or 'default'})")
+    if ref_image:
+         logging.info(f"Image-Mode: Reference Image found (Length: {len(ref_image)})")
+    
+    try:
+        image_b64 = call_gemini_image(prompt, reference_image_b64=ref_image, aspect_ratio=aspect_ratio)
+        log_and_save(project_id, "image", "response_b64_preview", image_b64[:100] + "...")
+        return {"imageBase64": f"data:image/png;base64,{image_b64}"}
+    except Exception as e:
+        logging.error(f"Failed to generate image: {e}", exc_info=True)
+        return {"error": "Image generation failed.", "details": str(e)}
+
+def main():
+    """
+    Main entry point of the script.
+    Parses command-line arguments to determine which phase to run.
+    """
+    parser = argparse.ArgumentParser(description="GTM Architect Orchestrator")
+    parser.add_argument("--mode", required=True, help="The execution mode (e.g., phase1, phase2).")
+    parser.add_argument("--payload_base64", help="The Base64 encoded JSON payload (deprecated, use payload_file).")
+    parser.add_argument("--payload_file", help="Path to a JSON file containing the payload (preferred).")
+    
+    args = parser.parse_args()
+    
+    payload = {}
+    try:
+        if args.payload_file:
+            if not os.path.exists(args.payload_file):
+                raise FileNotFoundError(f"Payload file not found: {args.payload_file}")
+            with open(args.payload_file, 'r', encoding='utf-8') as f:
+                payload = json.load(f)
+        elif args.payload_base64:
+            payload_str = base64.b64decode(args.payload_base64).decode('utf-8')
+            payload = json.loads(payload_str)
+        else:
+            raise ValueError("No payload provided (neither --payload_file nor --payload_base64).")
+            
+    except (json.JSONDecodeError, base64.binascii.Error, ValueError, FileNotFoundError) as e:
+        logging.error(f"Failed to load payload: {e}")
+        # Print error as JSON to stdout for the server to catch
+        print(json.dumps({"error": "Invalid payload.", "details": str(e)}))
+        sys.exit(1)
+
+    # Function mapping to dynamically call the correct phase
+    modes = {
+        "phase1": phase1,
+        "phase2": phase2,
+        "phase3": phase3,
+        "phase4": phase4,
+        "phase5": phase5,
+        "phase6": phase6,
+        "phase7": phase7,
+        "phase8": phase8,
+        "phase9": phase9,
+        "update_specs": update_specs,
+        "translate": translate,
+        "image": image,
+        "list_history": list_history,
+        "load_history": load_history,
+        "delete_session": delete_session,
+    }
+    
+    mode_function = modes.get(args.mode)
+    
+    if not mode_function:
+        logging.error(f"Invalid mode specified: {args.mode}")
+        print(json.dumps({"error": f"Invalid mode: {args.mode}"}))
+        sys.exit(1)
+        
+    try:
+        logging.info(f"Executing mode: {args.mode}")
+        result = mode_function(payload)
+        # Ensure the output is always a JSON string
+        print(json.dumps(result, ensure_ascii=False))
+        logging.info(f"Successfully executed mode: {args.mode}")
+        
+    except Exception as e:
+        logging.error(f"An error occurred during execution of mode '{args.mode}': {e}", exc_info=True)
+        print(json.dumps({"error": f"An error occurred in {args.mode}.", "details": str(e)}))
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()
--- a/ARCHIVE_legacy_scripts/gtm_db_manager.py
+++ b/ARCHIVE_legacy_scripts/gtm_db_manager.py
@@ -0,0 +1,194 @@
+
+import sqlite3
+import json
+import os
+import uuid
+from datetime import datetime
+
+# Database path for GTM projects
+DB_PATH = os.environ.get("GTM_DB_PATH", "/app/gtm_projects.db")
+
+def get_db_connection():
+    """Establishes a connection to the SQLite database."""
+    conn = sqlite3.connect(DB_PATH)
+    conn.row_factory = sqlite3.Row
+    return conn
+
+def init_gtm_db():
+    """Initializes the database and creates the gtm_projects table if it doesn't exist."""
+    try:
+        conn = get_db_connection()
+        # A flexible schema to store project-related data in a single JSON column
+        conn.execute('''
+            CREATE TABLE IF NOT EXISTS gtm_projects (
+                id TEXT PRIMARY KEY,
+                name TEXT NOT NULL,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                data JSON NOT NULL
+            )
+        ''')
+        conn.commit()
+    finally:
+        if conn:
+            conn.close()
+
+def create_project(name):
+    """Creates a new project with a given name and returns the new project's ID."""
+    conn = get_db_connection()
+    try:
+        project_id = str(uuid.uuid4())
+        initial_data = {"id": project_id, "name": name, "phases": {}}
+        conn.execute(
+            'INSERT INTO gtm_projects (id, name, data) VALUES (?, ?, ?)',
+            (project_id, name, json.dumps(initial_data))
+        )
+        conn.commit()
+        return {"id": project_id, "name": name}
+    finally:
+        if conn:
+            conn.close()
+
+def update_project_name(project_id, new_name):
+    """Updates the name of an existing project."""
+    conn = get_db_connection()
+    try:
+        conn.execute(
+            'UPDATE gtm_projects SET name = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?',
+            (new_name, project_id)
+        )
+        conn.commit()
+        return {"id": project_id, "name": new_name, "status": "updated"}
+    finally:
+        if conn:
+            conn.close()
+
+def save_gtm_result(project_id, phase, result):
+    """Saves or updates the result of a specific phase for a given project."""
+    conn = get_db_connection()
+    try:
+        # First, load the existing data
+        cursor = conn.cursor()
+        cursor.execute('SELECT data FROM gtm_projects WHERE id = ?', (project_id,))
+        row = cursor.fetchone()
+        
+        if not row:
+            return {"error": "Project not found"}
+
+        project_data = json.loads(row['data'])
+        
+        # Update the specific phase result
+        if 'phases' not in project_data:
+            project_data['phases'] = {}
+        project_data['phases'][phase] = result
+        
+        # Save the updated data back to the DB
+        cursor.execute(
+            '''UPDATE gtm_projects
+               SET data = ?, updated_at = CURRENT_TIMESTAMP
+               WHERE id = ?''',
+            (json.dumps(project_data), project_id)
+        )
+        conn.commit()
+        return {"id": project_id, "status": f"Phase '{phase}' saved successfully."}
+    finally:
+        if conn:
+            conn.close()
+
+def get_project_data(project_id):
+    """Retrieves all data for a specific project."""
+    conn = get_db_connection()
+    try:
+        cursor = conn.cursor()
+        cursor.execute('SELECT data FROM gtm_projects WHERE id = ?', (project_id,))
+        row = cursor.fetchone()
+        return json.loads(row['data']) if row else None
+    finally:
+        if conn:
+            conn.close()
+
+def get_all_projects():
+    """Lists all projects with key details extracted from the JSON data."""
+    conn = get_db_connection()
+    try:
+        query = """
+            SELECT 
+                id, 
+                name, 
+                updated_at,
+                json_extract(data, '$.phases.phase1_result.specs.metadata.model_name') AS productName,
+                json_extract(data, '$.phases.phase1_result.specs.metadata.category') AS productCategory,
+                json_extract(data, '$.phases.phase1_result.specs.metadata.description') AS productDescription,
+                json_extract(data, '$.phases.phase1_result.specs.metadata.manufacturer_url') AS sourceUrl
+            FROM gtm_projects 
+            ORDER BY updated_at DESC
+        """
+        projects = conn.execute(query).fetchall()
+        # Convert row objects to dictionaries, handling potential None values
+        project_list = []
+        for row in projects:
+            project_dict = dict(row)
+            if project_dict.get('productName') is None:
+                project_dict['productName'] = project_dict['name'] # Fallback to project name
+            if project_dict.get('productCategory') is None:
+                project_dict['productCategory'] = "Uncategorized" # Default category
+            if project_dict.get('productDescription') is None:
+                project_dict['productDescription'] = "No description available." # Default description
+            if project_dict.get('sourceUrl') is None:
+                project_dict['sourceUrl'] = "No source URL found." # Default URL
+            project_list.append(project_dict)
+        return project_list
+    finally:
+        if conn:
+            conn.close()
+
+def delete_project(project_id):
+    """Deletes a project by its ID."""
+    conn = get_db_connection()
+    try:
+        conn.execute('DELETE FROM gtm_projects WHERE id = ?', (project_id,))
+        conn.commit()
+        return {"status": "deleted", "id": project_id}
+    finally:
+        if conn:
+            conn.close()
+            
+if __name__ == "__main__":
+    # Simple CLI for testing and potential Node.js bridge
+    # Usage: python gtm_db_manager.py [init|create|save|load|list|delete] [args...]
+    import sys
+    
+    if len(sys.argv) < 2:
+        print(json.dumps({"error": "Mode is required."}))
+        sys.exit(1)
+
+    mode = sys.argv[1]
+    
+    if mode == "init":
+        init_gtm_db()
+        print(json.dumps({"status": "GTM database initialized"}))
+        
+    elif mode == "create":
+        project_name = sys.argv[2] if len(sys.argv) > 2 else "Untitled GTM Project"
+        print(json.dumps(create_project(project_name)))
+
+    elif mode == "save":
+        project_id = sys.argv[2]
+        phase = sys.argv[3]
+        result_json = sys.argv[4]
+        print(json.dumps(save_gtm_result(project_id, phase, json.loads(result_json))))
+
+    elif mode == "load":
+        project_id = sys.argv[2]
+        project = get_project_data(project_id)
+        print(json.dumps(project if project else {"error": "Project not found"}))
+        
+    elif mode == "list":
+        print(json.dumps(get_all_projects()))
+        
+    elif mode == "delete":
+        project_id = sys.argv[2]
+        print(json.dumps(delete_project(project_id)))
+        
+    else:
+        print(json.dumps({"error": f"Unknown mode: {mode}"}))
--- a/ARCHIVE_legacy_scripts/list_all_companies.py
+++ b/ARCHIVE_legacy_scripts/list_all_companies.py
@@ -0,0 +1,30 @@
+import sqlite3
+import os
+
+DB_PATH = "companies_v3_fixed_2.db"
+
+def list_companies():
+    if not os.path.exists(DB_PATH):
+        print(f"❌ Database not found at {DB_PATH}")
+        return
+
+    try:
+        conn = sqlite3.connect(DB_PATH)
+        cursor = conn.cursor()
+        
+        print(f"🔍 Listing companies in {DB_PATH}...")
+        cursor.execute("SELECT id, name, crm_id, city, crm_vat FROM companies ORDER BY id DESC LIMIT 20")
+        rows = cursor.fetchall()
+        
+        if not rows:
+            print("❌ No companies found")
+        else:
+            for row in rows:
+                print(f"  ID: {row[0]} | Name: {row[1]} | CRM ID: {row[2]} | City: {row[3]} | VAT: {row[4]}")
+            
+        conn.close()
+    except Exception as e:
+        print(f"❌ Error reading DB: {e}")
+
+if __name__ == "__main__":
+    list_companies()
--- a/ARCHIVE_legacy_scripts/list_industries.py
+++ b/ARCHIVE_legacy_scripts/list_industries.py
@@ -0,0 +1,18 @@
+
+import sys
+import os
+sys.path.append(os.path.join(os.path.dirname(__file__), "company-explorer"))
+from backend.database import SessionLocal, Industry
+
+def list_industries():
+    db = SessionLocal()
+    try:
+        industries = db.query(Industry.name).all()
+        print("Available Industries:")
+        for (name,) in industries:
+            print(f"- {name}")
+    finally:
+        db.close()
+
+if __name__ == "__main__":
+    list_industries()
--- a/ARCHIVE_legacy_scripts/list_industries_db.py
+++ b/ARCHIVE_legacy_scripts/list_industries_db.py
@@ -0,0 +1,12 @@
+import sqlite3
+
+DB_PATH = "/app/companies_v3_fixed_2.db"
+conn = sqlite3.connect(DB_PATH)
+cursor = conn.cursor()
+
+cursor.execute("SELECT name FROM industries")
+industries = cursor.fetchall()
+print("Available Industries:")
+for ind in industries:
+    print(f"- {ind[0]}")
+conn.close()
--- a/ARCHIVE_legacy_scripts/market_db_manager.py
+++ b/ARCHIVE_legacy_scripts/market_db_manager.py
@@ -0,0 +1,120 @@
+import sqlite3
+import json
+import os
+import uuid
+from datetime import datetime
+
+DB_PATH = os.environ.get("DB_PATH", "/app/market_intelligence.db")
+
+def get_db_connection():
+    conn = sqlite3.connect(DB_PATH)
+    conn.row_factory = sqlite3.Row
+    return conn
+
+def init_db():
+    conn = get_db_connection()
+    # Flexible schema: We store almost everything in a 'data' JSON column
+    conn.execute('''
+        CREATE TABLE IF NOT EXISTS projects (
+            id TEXT PRIMARY KEY,
+            name TEXT NOT NULL,
+            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+            updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+            data JSON NOT NULL
+        )
+    ''')
+    conn.commit()
+    conn.close()
+
+def save_project(project_data):
+    """
+    Saves a project. If 'id' exists in data, updates it. Otherwise creates new.
+    """
+    conn = get_db_connection()
+    try:
+        project_id = project_data.get('id')
+        
+        # Extract a name for the list view (e.g. from companyName or referenceUrl)
+        # We assume the frontend passes a 'name' field, or we derive it.
+        name = project_data.get('name') or project_data.get('companyName') or "Untitled Project"
+        
+        if not project_id:
+            # Create New
+            project_id = str(uuid.uuid4())
+            project_data['id'] = project_id
+            
+            conn.execute(
+                'INSERT INTO projects (id, name, data) VALUES (?, ?, ?)',
+                (project_id, name, json.dumps(project_data))
+            )
+        else:
+            # Update Existing
+            conn.execute(
+                '''UPDATE projects 
+                   SET name = ?, data = ?, updated_at = CURRENT_TIMESTAMP 
+                   WHERE id = ?''',
+                (name, json.dumps(project_data), project_id)
+            )
+            
+        conn.commit()
+        return {"id": project_id, "status": "saved"}
+        
+    except Exception as e:
+        return {"error": str(e)}
+    finally:
+        conn.close()
+
+def get_all_projects():
+    conn = get_db_connection()
+    projects = conn.execute('SELECT id, name, created_at, updated_at FROM projects ORDER BY updated_at DESC').fetchall()
+    conn.close()
+    return [dict(ix) for ix in projects]
+
+def load_project(project_id):
+    conn = get_db_connection()
+    project = conn.execute('SELECT data FROM projects WHERE id = ?', (project_id,)).fetchone()
+    conn.close()
+    if project:
+        return json.loads(project['data'])
+    return None
+
+def delete_project(project_id):
+    conn = get_db_connection()
+    try:
+        conn.execute('DELETE FROM projects WHERE id = ?', (project_id,))
+        conn.commit()
+        return {"status": "deleted", "id": project_id}
+    except Exception as e:
+        return {"error": str(e)}
+    finally:
+        conn.close()
+
+if __name__ == "__main__":
+    import sys
+    # Simple CLI for Node.js bridge
+    # Usage: python market_db_manager.py [init|list|save|load|delete] [args...]
+    
+    mode = sys.argv[1]
+    
+    if mode == "init":
+        init_db()
+        print(json.dumps({"status": "initialized"}))
+        
+    elif mode == "list":
+        print(json.dumps(get_all_projects()))
+        
+    elif mode == "save":
+        # Data is passed as a JSON string file path to avoid command line length limits
+        data_file = sys.argv[2]
+        with open(data_file, 'r') as f:
+            data = json.load(f)
+        print(json.dumps(save_project(data)))
+        
+    elif mode == "load":
+        p_id = sys.argv[2]
+        result = load_project(p_id)
+        print(json.dumps(result if result else {"error": "Project not found"}))
+
+    elif mode == "delete":
+        p_id = sys.argv[2]
+        print(json.dumps(delete_project(p_id)))
--- a/ARCHIVE_legacy_scripts/market_intel_orchestrator.py
+++ b/ARCHIVE_legacy_scripts/market_intel_orchestrator.py
@@ -0,0 +1,676 @@
+import argparse
+import json
+import os
+import sys # Import sys for stderr
+import requests
+from bs4 import BeautifulSoup
+import logging
+from datetime import datetime
+import re # Für Regex-Operationen
+
+# --- AUTARKES LOGGING SETUP --- #
+def create_self_contained_log_filename(mode):
+    """
+    Erstellt einen zeitgestempelten Logdateinamen für den Orchestrator.
+    Verwendet ein festes Log-Verzeichnis innerhalb des Docker-Containers.
+    NEU: Nur eine Datei pro Tag, um Log-Spam zu verhindern.
+    """
+    log_dir_path = "/app/Log" # Festes Verzeichnis im Container
+    if not os.path.exists(log_dir_path):
+        os.makedirs(log_dir_path, exist_ok=True)
+        
+    # Nur Datum verwenden, nicht Uhrzeit, damit alle Runs des Tages in einer Datei landen
+    date_str = datetime.now().strftime("%Y-%m-%d")
+    filename = f"{date_str}_market_intel.log"
+    return os.path.join(log_dir_path, filename)
+
+log_filename = create_self_contained_log_filename("market_intel_orchestrator")
+logging.basicConfig(
+    level=logging.DEBUG,
+    format='[%(asctime)s] %(levelname)s [%(funcName)s]: %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S',
+    handlers=[
+        logging.FileHandler(log_filename, mode='a', encoding='utf-8'),
+        logging.StreamHandler(sys.stderr)
+    ]
+)
+logger = logging.getLogger(__name__)
+# --- END AUTARKES LOGGING SETUP --- #
+
+def load_gemini_api_key(file_path="gemini_api_key.txt"):
+    try:
+        with open(file_path, "r") as f:
+            api_key = f.read().strip()
+        return api_key
+    except Exception as e:
+        logger.critical(f"Fehler beim Laden des Gemini API Keys: {e}")
+        raise
+
+def load_serp_api_key(file_path="serpapikey.txt"):
+    """Lädt den SerpAPI Key. Gibt None zurück, wenn nicht gefunden."""
+    try:
+        if os.path.exists(file_path):
+            with open(file_path, "r") as f:
+                return f.read().strip()
+        # Fallback: Versuche Umgebungsvariable
+        return os.environ.get("SERP_API_KEY")
+    except Exception as e:
+        logger.warning(f"Konnte SerpAPI Key nicht laden: {e}")
+        return None
+
+def get_website_text(url):
+    # Auto-fix missing scheme
+    if url and not url.startswith('http'):
+        url = 'https://' + url
+        
+    logger.info(f"Scraping URL: {url}")
+    try:
+        # Use a more realistic, modern User-Agent to avoid blocking
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
+            'Accept-Language': 'en-US,en;q=0.9,de;q=0.8',
+            'Referer': 'https://www.google.com/'
+        }
+        response = requests.get(url, headers=headers, timeout=15) # Increased timeout
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, 'lxml')
+        for tag in soup(['script', 'style', 'nav', 'footer', 'header']):
+            tag.decompose()
+        text = soup.get_text(separator=' ', strip=True)
+        text = re.sub(r'[^\x20-\x7E\n\r\t]', '', text)
+        return text[:15000] # Increased limit
+    except Exception as e:
+        logger.error(f"Scraping failed for {url}: {e}")
+        return None
+
+def serp_search(query, num_results=3):
+    """Führt eine Google-Suche über SerpAPI durch."""
+    api_key = load_serp_api_key()
+    if not api_key:
+        logger.warning("SerpAPI Key fehlt. Suche übersprungen.")
+        return []
+    
+    logger.info(f"SerpAPI Suche: {query}")
+    try:
+        params = {
+            "engine": "google",
+            "q": query,
+            "api_key": api_key,
+            "num": num_results,
+            "hl": "de",
+            "gl": "de"
+        }
+        response = requests.get("https://serpapi.com/search", params=params, timeout=20)
+        response.raise_for_status()
+        data = response.json()
+        
+        results = []
+        if "organic_results" in data:
+            for result in data["organic_results"]:
+                results.append({
+                    "title": result.get("title"),
+                    "link": result.get("link"),
+                    "snippet": result.get("snippet")
+                })
+        return results
+    except Exception as e:
+        logger.error(f"SerpAPI Fehler: {e}")
+        return []
+
+def _extract_target_industries_from_context(context_content):
+    md = context_content
+    # Versuche verschiedene Muster für die Tabelle, falls das Format variiert
+    step2_match = re.search(r'##\s*Schritt\s*2:[\s\S]*?(?=\n##\s*Schritt\s*\d:|\s*$)', md, re.IGNORECASE)
+    if not step2_match: 
+        # Fallback: Suche nach "Zielbranche" irgendwo im Text
+        match = re.search(r'Zielbranche\s*\|?\s*([^|\n]+)', md, re.IGNORECASE)
+        if match:
+            return [s.strip() for s in match.group(1).split(',')]
+        return []
+    
+    table_lines = []
+    in_table = False
+    for line in step2_match.group(0).split('\n'):
+        if line.strip().startswith('|'):
+            in_table = True
+            table_lines.append(line.strip())
+        elif in_table:
+            break
+    
+    if len(table_lines) < 3: return []
+    header = [s.strip() for s in table_lines[0].split('|') if s.strip()]
+    industry_col = next((h for h in header if re.search(r'zielbranche|segment|branche|industrie', h, re.IGNORECASE)), None)
+    if not industry_col: return []
+    
+    col_idx = header.index(industry_col)
+    industries = []
+    for line in table_lines[2:]:
+        cells = [s.strip() for s in line.split('|') if s.strip()]
+        if len(cells) > col_idx: industries.append(cells[col_idx])
+    return list(set(industries))
+
+def _extract_json_from_text(text):
+    """
+    Versucht, ein JSON-Objekt aus einem Textstring zu extrahieren,
+    unabhängig von Markdown-Formatierung (```json ... ```).
+    """
+    try:
+        # 1. Versuch: Direktersatz von Markdown-Tags (falls vorhanden)
+        clean_text = text.replace("```json", "").replace("```", "").strip()
+        return json.loads(clean_text)
+    except json.JSONDecodeError:
+        pass
+
+    try:
+        # 2. Versuch: Regex Suche nach dem ersten { und letzten }
+        json_match = re.search(r"(\{[\s\S]*\})", text)
+        if json_match:
+            return json.loads(json_match.group(1))
+    except json.JSONDecodeError:
+        pass
+
+    logger.error(f"JSON Parsing fehlgeschlagen. Roher Text: {text[:500]}...")
+    return None
+
+def generate_search_strategy(reference_url, context_content, language='de'):
+    logger.info(f"Generating strategy for {reference_url} (Language: {language})")
+    api_key = load_gemini_api_key()
+    target_industries = _extract_target_industries_from_context(context_content)
+    
+    homepage_text = get_website_text(reference_url)
+    if not homepage_text:
+         logger.warning(f"Strategy Generation: Could not scrape {reference_url}. Relying on context.")
+         homepage_text = "[WEBSITE ACCESS DENIED] - The strategy must be developed based on the provided STRATEGIC CONTEXT and the URL name alone."
+
+    # Switch to stable 2.5-pro model (which works for v1beta)
+    GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}"
+
+    lang_instruction = "GERMAN (Deutsch)" if language == 'de' else "ENGLISH"
+
+    prompt = f"""
+    You are a B2B Market Intelligence Architect.
+    
+    --- ROLE DEFINITION ---
+    You are working for the company described in the "STRATEGIC CONTEXT" below (The "Hunter").
+    Your goal is to find new potential customers who look exactly like the "REFERENCE CLIENT" described below (The "Seed" / "Prey").
+
+    --- STRATEGIC CONTEXT (YOUR COMPANY / THE OFFER) ---
+    {context_content}
+
+    --- REFERENCE CLIENT HOMEPAGE (THE IDEAL CUSTOMER TO CLONE) ---
+    URL: {reference_url}
+    CONTENT: {homepage_text[:10000]}
+
+    --- TASK ---
+    Develop a search strategy to find **Lookalikes of the Reference Client** who would be interested in **Your Company's Offer**.
+
+    1. **summaryOfOffer**: A 1-sentence summary of what the **REFERENCE CLIENT** does (NOT what your company does). We need this to search for similar companies.
+    2. **idealCustomerProfile**: A concise definition of the Ideal Customer Profile (ICP) based on the Reference Client's characteristics.
+    3. **searchStrategyICP**: A detailed description of the Ideal Customer Profile (ICP) based on the analysis.
+    4. **digitalSignals**: Identification and description of relevant digital signals that indicate purchase interest or engagement for YOUR offer.
+    5. **targetPages**: A list of the most important target pages on the company website relevant for marketing and sales activities.
+    6. **signals**: Identify exactly 4 specific digital signals to check on potential lookalikes.
+       - **CRITICAL**: One signal MUST be "Technographic / Incumbent Search". It must look for existing competitor software or legacy systems that **YOUR COMPANY'S OFFER** replaces or complements.
+       - The other 3 signals should focus on business pains or strategic fit.
+
+    --- SIGNAL DEFINITION ---
+    For EACH signal, you MUST provide:
+    - `id`: A unique ID (e.g., "sig_1").
+    - `name`: A short, descriptive name.
+    - `description`: What does this signal indicate?
+    - `targetPageKeywords`: A list of 3-5 keywords to look for on a company's website (e.g., ["career", "jobs"] for a hiring signal).
+    - `proofStrategy`: An object containing:
+        - `likelySource`: Where on the website or web is this info found? (e.g., "Careers Page").
+        - `searchQueryTemplate`: A Google search query to find this. Use `{{COMPANY}}` as a placeholder for the company name. 
+          Example: `site:{{COMPANY}} "software engineer" OR "developer"`
+
+    --- LANGUAGE INSTRUCTION ---
+    IMPORTANT: The entire JSON content (descriptions, rationale, summaries) MUST be in {lang_instruction}. Translate if necessary.
+
+    --- OUTPUT FORMAT ---
+    Return ONLY a valid JSON object.
+    {{
+      "summaryOfOffer": "The Reference Client provides...",
+      "idealCustomerProfile": "...",
+      "searchStrategyICP": "...",
+      "digitalSignals": "...",
+      "targetPages": "...",
+      "signals": [ ... ]
+    }}
+    """
+    
+    payload = {"contents": [{"parts": [{"text": prompt}]}]}
+    logger.info("Sende Anfrage an Gemini API...")
+    try:
+        response = requests.post(GEMINI_API_URL, json=payload, headers={'Content-Type': 'application/json'})
+        response.raise_for_status()
+        res_json = response.json()
+        logger.info(f"Gemini API-Antwort erhalten (Status: {response.status_code}).")
+        
+        text = res_json['candidates'][0]['content']['parts'][0]['text']
+        
+        # DEBUG LOGGING FOR RAW JSON
+        logger.error(f"RAW GEMINI JSON RESPONSE: {text}") 
+
+        result = _extract_json_from_text(text)
+        
+        if not result:
+            raise ValueError("Konnte kein valides JSON extrahieren")
+            
+        return result
+
+    except Exception as e:
+        logger.error(f"Strategy generation failed: {e}")
+        # Return fallback to avoid frontend crash
+        return {
+            "summaryOfOffer": "Error generating strategy. Please check logs.",
+            "idealCustomerProfile": "Error generating ICP. Please check logs.",
+            "searchStrategyICP": "Error generating Search Strategy ICP. Please check logs.",
+            "digitalSignals": "Error generating Digital Signals. Please check logs.",
+            "targetPages": "Error generating Target Pages. Please check logs.",
+            "signals": []
+        }
+
+def identify_competitors(reference_url, target_market, industries, summary_of_offer=None, language='de'):
+    logger.info(f"Identifying competitors for {reference_url} (Language: {language})")
+    api_key = load_gemini_api_key()
+    # Switch to stable 2.5-pro model
+    GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}"
+
+    lang_instruction = "GERMAN (Deutsch)" if language == 'de' else "ENGLISH"
+
+    prompt = f"""
+    You are a B2B Market Analyst. Find 3-5 direct competitors or highly similar companies (lookalikes) for the company at `{reference_url}`.
+
+    --- CONTEXT ---
+    - Reference Client Business (What they do): {summary_of_offer}
+    - Target Market: {target_market}
+    - Relevant Industries: {', '.join(industries)}
+
+    --- TASK ---
+    Identify companies that are **similar to the Reference Client** (i.e., Lookalikes). 
+    We are looking for other companies that do the same thing as `{reference_url}`.
+
+    Categorize them into three groups:
+    1. 'localCompetitors': Competitors in the same immediate region/city.
+    2. 'nationalCompetitors': Competitors operating across the same country.
+    3. 'internationalCompetitors': Global players.
+
+    For EACH competitor, you MUST provide:
+    - `id`: A unique, URL-friendly identifier (e.g., "competitor-name-gmbh").
+    - `name`: The official, full name of the company.
+    - `description`: A concise explanation of why they are a competitor.
+
+    --- LANGUAGE INSTRUCTION ---
+    IMPORTANT: The entire JSON content (descriptions) MUST be in {lang_instruction}.
+
+    --- OUTPUT FORMAT ---
+    Return ONLY a valid JSON object with the following structure:
+    {{
+      "localCompetitors": [ {{ "id": "...", "name": "...", "description": "..." }} ],
+      "nationalCompetitors": [ ... ],
+      "internationalCompetitors": [ ... ]
+    }}
+    """
+    
+    payload = {"contents": [{"parts": [{"text": prompt}]}]}
+    logger.info("Sende Anfrage an Gemini API...")
+    # logger.debug(f"Rohe Gemini API-Anfrage (JSON): {json.dumps(payload, indent=2)}")
+    try:
+        response = requests.post(GEMINI_API_URL, json=payload, headers={'Content-Type': 'application/json'})
+        response.raise_for_status()
+        res_json = response.json()
+        logger.info(f"Gemini API-Antwort erhalten (Status: {response.status_code}).")
+        
+        text = res_json['candidates'][0]['content']['parts'][0]['text']
+        result = _extract_json_from_text(text)
+        
+        if not result:
+             raise ValueError("Konnte kein valides JSON extrahieren")
+        
+        return result
+
+    except Exception as e:
+        logger.error(f"Competitor identification failed: {e}")
+        return {"localCompetitors": [], "nationalCompetitors": [], "internationalCompetitors": []}
+
+def analyze_company(company_name, strategy, target_market, language='de'):
+    logger.info(f"--- STARTING DEEP TECH AUDIT FOR: {company_name} (Language: {language}) ---")
+    api_key = load_gemini_api_key()
+    GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}"
+    
+    lang_instruction = "GERMAN (Deutsch)" if language == 'de' else "ENGLISH"
+
+    # ... (Rest of function logic remains same, just update prompt) ...
+    # 1. Website Finding (SerpAPI fallback to Gemini)
+    url = None
+    website_search_results = serp_search(f"{company_name} offizielle Website")
+    if website_search_results:
+        url = website_search_results[0].get("link")
+        logger.info(f"Website via SerpAPI gefunden: {url}")
+    
+    if not url:
+        # Fallback: Frage Gemini (Low Confidence)
+        logger.info("Keine URL via SerpAPI, frage Gemini...")
+        prompt_url = f"What is the official homepage URL for the company '{company_name}' in the market '{target_market}'? Respond with ONLY the single, complete URL and nothing else."
+        payload_url = {"contents": [{"parts": [{"text": prompt_url}]}]}
+        logger.info("Sende Anfrage an Gemini API (URL Fallback)...")
+        try:
+            res = requests.post(GEMINI_API_URL, json=payload_url, headers={'Content-Type': 'application/json'}, timeout=15)
+            res.raise_for_status()
+            res_json = res.json()
+            candidate = res_json.get('candidates', [{}])[0]
+            content = candidate.get('content', {}).get('parts', [{}])[0]
+            text_response = content.get('text', '').strip()
+            url_match = re.search(r'(https?://[^\s"]+)', text_response)
+            if url_match:
+                url = url_match.group(1)
+        except Exception as e:
+            logger.error(f"Gemini URL Fallback failed: {e}")
+            pass
+
+    if not url or not url.startswith("http"):
+        return {"error": f"Could not find website for {company_name}"}
+
+    homepage_text = ""
+    scraping_note = ""
+    
+    if url and url.startswith("http"):
+        scraped_content = get_website_text(url)
+        if scraped_content:
+            homepage_text = scraped_content
+        else:
+            homepage_text = "[WEBSITE ACCESS DENIED]"
+            scraping_note = "(Website Content Unavailable)"
+    else:
+        homepage_text = "No valid URL found."
+        scraping_note = "(No URL found)"
+
+    tech_evidence = []
+    
+    # NEU: Dynamische Suche basierend auf Strategie statt Hardcoded Liste
+    # Wir suchen NICHT mehr proaktiv nach SAP Ariba, es sei denn, es steht in der Strategie.
+    # Stattdessen machen wir eine generische "Tech Stack"-Suche.
+    tech_queries = [
+        f'site:{url.split("//")[-1].split("/")[0] if url and "//" in url else company_name} "software" OR "technology" OR "system"',
+        f'"{company_name}" "technology stack"',
+        f'"{company_name}" "partners"'
+    ]
+    
+    # Add explicit tech signals from strategy if they exist
+    signals = strategy.get('signals', [])
+    for signal in signals:
+        if "technographic" in signal.get('id', '').lower() or "incumbent" in signal.get('id', '').lower():
+             keywords = signal.get('targetPageKeywords', [])
+             for kw in keywords:
+                 tech_queries.append(f'"{company_name}" "{kw}"')
+
+    # Deduplicate queries and limit
+    tech_queries = list(set(tech_queries))[:4]
+
+    for q in tech_queries:
+        results = serp_search(q, num_results=3)
+        if results:
+            for r in results:
+                tech_evidence.append(f"- Found: {r['title']}\n  Snippet: {r['snippet']}\n  Link: {r['link']}")
+
+    tech_evidence_text = "\n".join(tech_evidence)
+    signal_evidence = []
+    firmographics_results = serp_search(f"{company_name} Umsatz Mitarbeiterzahl 2023")
+    firmographics_context = "\n".join([f"- {r['snippet']} ({r['link']})" for r in firmographics_results])
+
+    for signal in signals:
+        # Skip technographic signals here as they are handled above or via generic search
+        if "incumbent" in signal['id'].lower() or "technographic" in signal['id'].lower(): continue
+        
+        proof_strategy = signal.get('proofStrategy', {})
+        query_template = proof_strategy.get('searchQueryTemplate')
+        search_context = ""
+        if query_template:
+            try:
+                domain = url.split("//")[-1].split("/")[0].replace("www.", "")
+            except:
+                domain = ""
+            query = query_template.replace("{{COMPANY}}", company_name).replace("{COMPANY}", company_name).replace("{{domain}}", domain).replace("{domain}", domain)
+            results = serp_search(query, num_results=3)
+            if results:
+                search_context = "\n".join([f"  * Snippet: {r['snippet']}\n    Source: {r['link']}" for r in results])
+        if search_context:
+            signal_evidence.append(f"SIGNAL '{signal['name']}':\n{search_context}")
+
+    evidence_text = "\n\n".join(signal_evidence)
+    
+    prompt = f"""
+    You are a Strategic B2B Sales Consultant. 
+    Analyze the company '{company_name}' ({url}) to create a "best-of-breed" sales pitch strategy.
+
+    --- STRATEGY (What we are looking for) ---
+    {json.dumps(signals, indent=2)}
+
+    --- EVIDENCE 1: EXTERNAL TECH-STACK INTELLIGENCE ---
+    Analyze the search results below. Do NOT hallucinate technologies. Only list what is explicitly found.
+    {tech_evidence_text}
+
+    --- EVIDENCE 2: HOMEPAGE CONTENT {scraping_note} ---
+    {homepage_text[:8000]}
+
+    --- EVIDENCE 3: FIRMOGRAPHICS SEARCH ---
+    {firmographics_context}
+
+    --- EVIDENCE 4: TARGETED SIGNAL SEARCH RESULTS ---
+    {evidence_text}
+    ----------------------------------
+
+    TASK:
+    1. **Firmographics**: Estimate Revenue and Employees.
+    2. **Technographic Audit**: Check if any relevant competitor technology or legacy system is ACTUALLY found in the evidence.
+       - **CRITICAL:** If no specific competitor software is found, assume the status is "Greenfield" (Manual Process / Status Quo). Do NOT invent a competitor like SAP Ariba just because it's a common tool.
+    3. **Status**:
+       - Set to "Nutzt Wettbewerber" ONLY if a direct competitor is explicitly found.
+       - Set to "Greenfield" if no competitor tech is found.
+       - Set to "Bestandskunde" if they already use our solution.
+    4. **Evaluate Signals**: For each signal, provide a "value" (Yes/No/Partial) and "proof".
+    5. **Recommendation (Pitch Strategy)**: 
+       - If Greenfield: Pitch against the manual status quo (efficiency, error reduction).
+       - If Competitor: Pitch replacement/upgrade.
+       - **Tone**: Strategic, insider-knowledge, specific.
+
+    --- LANGUAGE INSTRUCTION ---
+    IMPORTANT: The entire JSON content (especially 'recommendation', 'proof', 'value') MUST be in {lang_instruction}.
+
+    STRICTLY output only JSON:
+    {{
+      "companyName": "{company_name}",
+      "status": "...",
+      "revenue": "...",
+      "employees": "...",
+      "tier": "Tier 1/2/3",
+      "dynamicAnalysis": {{
+         "sig_id_from_strategy": {{ "value": "...", "proof": "..." }} 
+      }},
+      "recommendation": "..."
+    }}
+    """
+
+    payload = {
+        "contents": [{"parts": [{"text": prompt}]}],
+        "generationConfig": {"response_mime_type": "application/json"}
+    }
+
+    try:
+        logger.info("Sende Audit-Anfrage an Gemini API...")
+        response = requests.post(GEMINI_API_URL, json=payload, headers={'Content-Type': 'application/json'})
+        response.raise_for_status()
+        response_data = response.json()
+        logger.info(f"Gemini API-Antwort erhalten (Status: {response.status_code}).")
+        
+        text = response_data['candidates'][0]['content']['parts'][0]['text']
+        result = _extract_json_from_text(text)
+        
+        if not result:
+             raise ValueError("Konnte kein valides JSON extrahieren")
+            
+        result['dataSource'] = "Digital Trace Audit (Deep Dive)"
+        return result
+    except Exception as e:
+        logger.error(f"Audit failed for {company_name}: {e}")
+        return {
+            "companyName": company_name,
+            "status": "Unklar",
+            "revenue": "Error",
+            "employees": "Error",
+            "tier": "Tier 3",
+            "dynamicAnalysis": {},
+            "recommendation": f"Audit failed: {str(e)}",
+            "dataSource": "Error"
+        }
+
+def generate_outreach_campaign(company_data_json, knowledge_base_content, reference_url, specific_role=None, language='de'):
+    """
+    Erstellt personalisierte E-Mail-Kampagnen.
+    """
+    company_name = company_data_json.get('companyName', 'Unknown')
+    logger.info(f"--- STARTING OUTREACH GENERATION FOR: {company_name} (Role: {specific_role if specific_role else 'Top 5'}) [Lang: {language}] ---")
+    
+    api_key = load_gemini_api_key()
+    GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}"
+
+    lang_instruction = "GERMAN (Deutsch)" if language == 'de' else "ENGLISH"
+
+    if specific_role:
+        # --- MODE B: SINGLE ROLE GENERATION (On Demand) ---
+        task_description = f"""
+        --- TASK ---
+        1. **Focus**: Create a highly specific 3-step email campaign ONLY for the role: '{specific_role}'.
+        2. **Analyze**: Use the Audit Facts to find specific hooks for this role.
+        3. **Draft**: Write the sequence (Opening, Follow-up, Break-up).
+        """
+        output_format = """
+        --- OUTPUT FORMAT (Strictly JSON) ---
+        {
+            "target_role": "The requested role",
+            "rationale": "Why this fits...",
+            "emails": [ ... ]
+        }
+        """
+    else:
+        # --- MODE A: INITIAL START (TOP 1 + SUGGESTIONS) ---
+        task_description = f"""
+        --- TASK ---
+        1. **Analyze**: Match the Target Company (Input 2) to the most relevant 'Zielbranche/Segment' from the Knowledge Base (Input 1).
+        2. **Identify Roles**: Identify ALL relevant 'Rollen' (Personas) from the Knowledge Base that fit this company.
+        3. **Select Best**: Choose the SINGLE most promising role for immediate outreach based on the Audit findings.
+        4. **Draft Campaign**: Write a 3-step email sequence for this ONE role.
+        5. **List Others**: List ALL other relevant roles (including the other top candidates) in 'available_roles' so the user can generate them later.
+        """
+        output_format = """
+        --- OUTPUT FORMAT (Strictly JSON) ---
+        {
+          "campaigns": [
+              {
+                "target_role": "Role Name",
+                "rationale": "Why selected...",
+                "emails": [ ... ]
+              }
+          ],
+          "available_roles": [ "Role 2", "Role 3", "Role 4", "Role 5", ... ] 
+        }
+        """
+
+    prompt = f"""
+    You are a Strategic Key Account Manager and deeply technical Industry Insider.
+    Your goal is to write highly personalized, **operationally specific** outreach emails to the company '{company_name}'.
+
+    --- INPUT 1: YOUR IDENTITY & STRATEGY (The Sender) ---
+    {knowledge_base_content}
+    
+    --- INPUT 2: THE TARGET COMPANY (Audit Facts) ---
+    {json.dumps(company_data_json, indent=2)}
+
+    --- INPUT 3: THE REFERENCE CLIENT (Social Proof) ---
+    Reference Client URL: {reference_url}
+    
+    CRITICAL: This 'Reference Client' is an existing happy customer of ours. You MUST mention them by name to establish trust.
+    
+    {task_description}
+
+    --- TONE & STYLE GUIDELINES (CRITICAL) ---
+    1. **Professional & Flowing:** Aim for approx. 500-600 characters per email. Use full sentences and professional courtesies. It should feel like a high-quality human message.
+    2. **Stance:** Act as an **astute industry observer** and peer consultant. You have analyzed their specific situation and identified a strategic bottleneck.
+    3. **The Opportunity Bridge (Email 1):** Bridge observation to a strategic solution immediately using concrete terms (e.g., "autonome Reinigungsrobotik").
+    4. **Context-Sensitive Technographics:** Only mention discovered IT or Procurement systems (e.g., SAP Ariba) if it is highly relevant to the **specific role** (e.g., for CEO, CFO, or Head of Procurement). For **purely operational roles** (e.g., Facility Manager, Head of Operations), AVOID mentioning these systems as it may cause confusion; focus entirely on the operational pain (labor shortage) and growth bottlenecks instead.
+    5. **Soft-Sell vs. Hard-Pitch:** Position technology as a logical answer to the bottleneck. Pitch the **outcome/capability**, not features.
+    6. **Social Proof as the Engine:** Let the Reference Client ({reference_url}) provide the evidence. Use a role-specific KPI.
+    7. **Operational Grit:** Use domain-specific terms (e.g., "ASNs", "8D", "TCO") to establish authority.
+    8. **Language:** {lang_instruction}.
+
+    {output_format}
+    """
+
+    payload = {
+        "contents": [{"parts": [{"text": prompt}]}],
+        "generationConfig": {"response_mime_type": "application/json"}
+    }
+
+    try:
+        logger.info("Sende Campaign-Anfrage an Gemini API...")
+        response = requests.post(GEMINI_API_URL, json=payload, headers={'Content-Type': 'application/json'})
+        response.raise_for_status()
+        response_data = response.json()
+        logger.info(f"Gemini API-Antwort erhalten (Status: {response.status_code}).")
+        
+        text = response_data['candidates'][0]['content']['parts'][0]['text']
+        result = _extract_json_from_text(text)
+        
+        if not result:
+             raise ValueError("Konnte kein valides JSON extrahieren")
+
+        return result
+    except Exception as e:
+        logger.error(f"Campaign generation failed for {company_name}: {e}")
+        return {"error": str(e)}
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--mode", required=True)
+    parser.add_argument("--reference_url")
+    parser.add_argument("--context_file")
+    parser.add_argument("--target_market")
+    parser.add_argument("--company_name")
+    parser.add_argument("--strategy_json")
+    parser.add_argument("--summary_of_offer")
+    parser.add_argument("--company_data_file") 
+    parser.add_argument("--specific_role") 
+    parser.add_argument("--language", default="de") # New Argument
+    args = parser.parse_args()
+
+    if args.mode == "generate_strategy":
+        with open(args.context_file, "r") as f: context = f.read()
+        print(json.dumps(generate_search_strategy(args.reference_url, context, args.language)))
+    elif args.mode == "identify_competitors":
+        industries = []
+        if args.context_file:
+            with open(args.context_file, "r") as f: context = f.read()
+            industries = _extract_target_industries_from_context(context)
+        print(json.dumps(identify_competitors(args.reference_url, args.target_market, industries, args.summary_of_offer, args.language)))
+    elif args.mode == "analyze_company":
+        strategy = json.loads(args.strategy_json)
+        print(json.dumps(analyze_company(args.company_name, strategy, args.target_market, args.language)))
+    elif args.mode == "generate_outreach":
+        with open(args.company_data_file, "r") as f: company_data = json.load(f)
+        with open(args.context_file, "r") as f: knowledge_base = f.read()
+        print(json.dumps(generate_outreach_campaign(company_data, knowledge_base, args.reference_url, args.specific_role, args.language)))
+
+
+if __name__ == "__main__":
+    sys.stdout.reconfigure(encoding='utf-8')
+    try:
+        main()
+        sys.stdout.flush()
+    except Exception as e:
+        logger.critical(f"Unhandled Exception in Main: {e}", exc_info=True)
+        # Fallback JSON output so the server doesn't crash on parse error
+        error_json = json.dumps({"error": f"Critical Script Error: {str(e)}", "details": "Check market_intel.log"})
+        print(error_json)
+        sys.exit(1)
--- a/ARCHIVE_legacy_scripts/migrate_opener_native.py
+++ b/ARCHIVE_legacy_scripts/migrate_opener_native.py
@@ -0,0 +1,29 @@
+import sqlite3
+import sys
+
+DB_PATH = "/app/companies_v3_fixed_2.db"
+
+def migrate():
+    try:
+        conn = sqlite3.connect(DB_PATH)
+        cursor = conn.cursor()
+        
+        print(f"Checking schema in {DB_PATH}...")
+        cursor.execute("PRAGMA table_info(companies)")
+        columns = [row[1] for row in cursor.fetchall()]
+        
+        if "ai_opener" in columns:
+            print("Column 'ai_opener' already exists. Skipping.")
+        else:
+            print("Adding column 'ai_opener' to 'companies' table...")
+            cursor.execute("ALTER TABLE companies ADD COLUMN ai_opener TEXT")
+            conn.commit()
+            print("✅ Migration successful.")
+            
+    except Exception as e:
+        print(f"❌ Migration failed: {e}")
+    finally:
+        if conn: conn.close()
+
+if __name__ == "__main__":
+    migrate()
--- a/ARCHIVE_legacy_scripts/migrate_opener_secondary.py
+++ b/ARCHIVE_legacy_scripts/migrate_opener_secondary.py
@@ -0,0 +1,29 @@
+import sqlite3
+import sys
+
+DB_PATH = "/app/companies_v3_fixed_2.db"
+
+def migrate():
+    try:
+        conn = sqlite3.connect(DB_PATH)
+        cursor = conn.cursor()
+        
+        print(f"Checking schema in {DB_PATH}...")
+        cursor.execute("PRAGMA table_info(companies)")
+        columns = [row[1] for row in cursor.fetchall()]
+        
+        if "ai_opener_secondary" in columns:
+            print("Column 'ai_opener_secondary' already exists. Skipping.")
+        else:
+            print("Adding column 'ai_opener_secondary' to 'companies' table...")
+            cursor.execute("ALTER TABLE companies ADD COLUMN ai_opener_secondary TEXT")
+            conn.commit()
+            print("✅ Migration successful.")
+            
+    except Exception as e:
+        print(f"❌ Migration failed: {e}")
+    finally:
+        if conn: conn.close()
+
+if __name__ == "__main__":
+    migrate()
--- a/ARCHIVE_legacy_scripts/migrate_personas_v2.py
+++ b/ARCHIVE_legacy_scripts/migrate_personas_v2.py
@@ -0,0 +1,30 @@
+import sqlite3
+import os
+
+DB_PATH = "/app/companies_v3_fixed_2.db"
+
+def migrate_personas():
+    print(f"Adding new columns to 'personas' table in {DB_PATH}...")
+    conn = sqlite3.connect(DB_PATH)
+    cursor = conn.cursor()
+    
+    columns_to_add = [
+        ("description", "TEXT"),
+        ("convincing_arguments", "TEXT"),
+        ("typical_positions", "TEXT"),
+        ("kpis", "TEXT")
+    ]
+    
+    for col_name, col_type in columns_to_add:
+        try:
+            cursor.execute(f"ALTER TABLE personas ADD COLUMN {col_name} {col_type}")
+            print(f"  Added column: {col_name}")
+        except sqlite3.OperationalError:
+            print(f"  Column {col_name} already exists.")
+            
+    conn.commit()
+    conn.close()
+    print("Migration complete.")
+
+if __name__ == "__main__":
+    migrate_personas()
--- a/ARCHIVE_legacy_scripts/old_brancheneinstufung.py
+++ b/ARCHIVE_legacy_scripts/old_brancheneinstufung.py
--- a/ARCHIVE_legacy_scripts/read_file_content.py
+++ b/ARCHIVE_legacy_scripts/read_file_content.py
@@ -0,0 +1,19 @@
+
+import argparse
+
+def read_file_content(file_path):
+    """Reads and prints the content of a specified file."""
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            print(f.read())
+    except FileNotFoundError:
+        print(f"Error: File not found at '{file_path}'")
+    except Exception as e:
+        print(f"An error occurred: {e}")
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Read and display the content of a file.")
+    parser.add_argument("file_path", help="The path to the file you want to read.")
+    args = parser.parse_args()
+    
+    read_file_content(args.file_path)
--- a/ARCHIVE_legacy_scripts/read_matrix_entry.py
+++ b/ARCHIVE_legacy_scripts/read_matrix_entry.py
@@ -0,0 +1,37 @@
+
+import sys
+import os
+sys.path.append(os.path.join(os.path.dirname(__file__), "company-explorer"))
+from backend.database import SessionLocal, Industry, Persona, MarketingMatrix
+
+def read_specific_entry(industry_name: str, persona_name: str):
+    db = SessionLocal()
+    try:
+        entry = (
+            db.query(MarketingMatrix)
+            .join(Industry)
+            .join(Persona)
+            .filter(Industry.name == industry_name, Persona.name == persona_name)
+            .first()
+        )
+        
+        if not entry:
+            print(f"No entry found for {industry_name} and {persona_name}")
+            return
+
+        print("--- Generated Text ---")
+        print(f"Industry: {industry_name}")
+        print(f"Persona: {persona_name}")
+        print("\n[Intro]")
+        print(entry.intro)
+        print("\n[Social Proof]")
+        print(entry.social_proof)
+        print("----------------------")
+
+    finally:
+        db.close()
+
+if __name__ == "__main__":
+    read_specific_entry("Healthcare - Hospital", "Infrastruktur-Verantwortlicher")
+
+
--- a/ARCHIVE_legacy_scripts/reindent.py
+++ b/ARCHIVE_legacy_scripts/reindent.py
@@ -0,0 +1,333 @@
+#! /usr/bin/env python3
+
+# Released to the public domain, by Tim Peters, 03 October 2000.
+
+"""reindent [-d][-r][-v] [ path ... ]
+
+-d (--dryrun)   Dry run.   Analyze, but don't make any changes to, files.
+-r (--recurse)  Recurse.   Search for all .py files in subdirectories too.
+-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
+-v (--verbose)  Verbose.   Print informative msgs; else no output.
+   (--newline)  Newline.   Specify the newline character to use (CRLF, LF).
+                           Default is the same as the original file.
+-h (--help)     Help.      Print this usage information and exit.
+
+Change Python (.py) files to use 4-space indents and no hard tab characters.
+Also trim excess spaces and tabs from ends of lines, and remove empty lines
+at the end of files.  Also ensure the last line ends with a newline.
+
+If no paths are given on the command line, reindent operates as a filter,
+reading a single source file from standard input and writing the transformed
+source to standard output.  In this case, the -d, -r and -v flags are
+ignored.
+
+You can pass one or more file and/or directory paths.  When a directory
+path, all .py files within the directory will be examined, and, if the -r
+option is given, likewise recursively for subdirectories.
+
+If output is not to standard output, reindent overwrites files in place,
+renaming the originals with a .bak extension.  If it finds nothing to
+change, the file is left alone.  If reindent does change a file, the changed
+file is a fixed-point for future runs (i.e., running reindent on the
+resulting .py file won't change it again).
+
+The hard part of reindenting is figuring out what to do with comment
+lines.  So long as the input files get a clean bill of health from
+tabnanny.py, reindent should do a good job.
+
+The backup file is a copy of the one that is being reindented. The ".bak"
+file is generated with shutil.copy(), but some corner cases regarding
+user/group and permissions could leave the backup file more readable than
+you'd prefer. You can always use the --nobackup option to prevent this.
+"""
+
+__version__ = "1"
+
+import tokenize
+import os
+import shutil
+import sys
+
+verbose = False
+recurse = False
+dryrun = False
+makebackup = True
+# A specified newline to be used in the output (set by --newline option)
+spec_newline = None
+
+
+def usage(msg=None):
+    if msg is None:
+        msg = __doc__
+    print(msg, file=sys.stderr)
+
+
+def errprint(*args):
+    sys.stderr.write(" ".join(str(arg) for arg in args))
+    sys.stderr.write("\n")
+
+def main():
+    import getopt
+    global verbose, recurse, dryrun, makebackup, spec_newline
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "drnvh",
+            ["dryrun", "recurse", "nobackup", "verbose", "newline=", "help"])
+    except getopt.error as msg:
+        usage(msg)
+        return
+    for o, a in opts:
+        if o in ('-d', '--dryrun'):
+            dryrun = True
+        elif o in ('-r', '--recurse'):
+            recurse = True
+        elif o in ('-n', '--nobackup'):
+            makebackup = False
+        elif o in ('-v', '--verbose'):
+            verbose = True
+        elif o in ('--newline',):
+            if not a.upper() in ('CRLF', 'LF'):
+                usage()
+                return
+            spec_newline = dict(CRLF='\r\n', LF='\n')[a.upper()]
+        elif o in ('-h', '--help'):
+            usage()
+            return
+    if not args:
+        r = Reindenter(sys.stdin)
+        r.run()
+        r.write(sys.stdout)
+        return
+    for arg in args:
+        check(arg)
+
+
+def check(file):
+    if os.path.isdir(file) and not os.path.islink(file):
+        if verbose:
+            print("listing directory", file)
+        names = os.listdir(file)
+        for name in names:
+            fullname = os.path.join(file, name)
+            if ((recurse and os.path.isdir(fullname) and
+                 not os.path.islink(fullname) and
+                 not os.path.split(fullname)[1].startswith("."))
+                or name.lower().endswith(".py")):
+                check(fullname)
+        return
+
+    if verbose:
+        print("checking", file, "...", end=' ')
+    with open(file, 'rb') as f:
+        try:
+            encoding, _ = tokenize.detect_encoding(f.readline)
+        except SyntaxError as se:
+            errprint("%s: SyntaxError: %s" % (file, str(se)))
+            return
+    try:
+        with open(file, encoding=encoding) as f:
+            r = Reindenter(f)
+    except IOError as msg:
+        errprint("%s: I/O Error: %s" % (file, str(msg)))
+        return
+
+    newline = spec_newline if spec_newline else r.newlines
+    if isinstance(newline, tuple):
+        errprint("%s: mixed newlines detected; cannot continue without --newline" % file)
+        return
+
+    if r.run():
+        if verbose:
+            print("changed.")
+            if dryrun:
+                print("But this is a dry run, so leaving it alone.")
+        if not dryrun:
+            bak = file + ".bak"
+            if makebackup:
+                shutil.copyfile(file, bak)
+                if verbose:
+                    print("backed up", file, "to", bak)
+            with open(file, "w", encoding=encoding, newline=newline) as f:
+                r.write(f)
+            if verbose:
+                print("wrote new", file)
+        return True
+    else:
+        if verbose:
+            print("unchanged.")
+        return False
+
+
+def _rstrip(line, JUNK='\n \t'):
+    """Return line stripped of trailing spaces, tabs, newlines.
+
+    Note that line.rstrip() instead also strips sundry control characters,
+    but at least one known Emacs user expects to keep junk like that, not
+    mentioning Barry by name or anything <wink>.
+    """
+
+    i = len(line)
+    while i > 0 and line[i - 1] in JUNK:
+        i -= 1
+    return line[:i]
+
+
+class Reindenter:
+
+    def __init__(self, f):
+        self.find_stmt = 1  # next token begins a fresh stmt?
+        self.level = 0      # current indent level
+
+        # Raw file lines.
+        self.raw = f.readlines()
+
+        # File lines, rstripped & tab-expanded.  Dummy at start is so
+        # that we can use tokenize's 1-based line numbering easily.
+        # Note that a line is all-blank iff it's "\n".
+        self.lines = [_rstrip(line).expandtabs() + "\n"
+                      for line in self.raw]
+        self.lines.insert(0, None)
+        self.index = 1  # index into self.lines of next line
+
+        # List of (lineno, indentlevel) pairs, one for each stmt and
+        # comment line.  indentlevel is -1 for comment lines, as a
+        # signal that tokenize doesn't know what to do about them;
+        # indeed, they're our headache!
+        self.stats = []
+
+        # Save the newlines found in the file so they can be used to
+        #  create output without mutating the newlines.
+        self.newlines = f.newlines
+
+    def run(self):
+        tokens = tokenize.generate_tokens(self.getline)
+        for _token in tokens:
+            self.tokeneater(*_token)
+        # Remove trailing empty lines.
+        lines = self.lines
+        while lines and lines[-1] == "\n":
+            lines.pop()
+        # Sentinel.
+        stats = self.stats
+        stats.append((len(lines), 0))
+        # Map count of leading spaces to # we want.
+        have2want = {}
+        # Program after transformation.
+        after = self.after = []
+        # Copy over initial empty lines -- there's nothing to do until
+        # we see a line with *something* on it.
+        i = stats[0][0]
+        after.extend(lines[1:i])
+        for i in range(len(stats) - 1):
+            thisstmt, thislevel = stats[i]
+            nextstmt = stats[i + 1][0]
+            have = getlspace(lines[thisstmt])
+            want = thislevel * 4
+            if want < 0:
+                # A comment line.
+                if have:
+                    # An indented comment line.  If we saw the same
+                    # indentation before, reuse what it most recently
+                    # mapped to.
+                    want = have2want.get(have, -1)
+                    if want < 0:
+                        # Then it probably belongs to the next real stmt.
+                        for j in range(i + 1, len(stats) - 1):
+                            jline, jlevel = stats[j]
+                            if jlevel >= 0:
+                                if have == getlspace(lines[jline]):
+                                    want = jlevel * 4
+                                break
+                    if want < 0:           # Maybe it's a hanging
+                                           # comment like this one,
+                        # in which case we should shift it like its base
+                        # line got shifted.
+                        for j in range(i - 1, -1, -1):
+                            jline, jlevel = stats[j]
+                            if jlevel >= 0:
+                                want = have + (getlspace(after[jline - 1]) -
+                                               getlspace(lines[jline]))
+                                break
+                    if want < 0:
+                        # Still no luck -- leave it alone.
+                        want = have
+                else:
+                    want = 0
+            assert want >= 0
+            have2want[have] = want
+            diff = want - have
+            if diff == 0 or have == 0:
+                after.extend(lines[thisstmt:nextstmt])
+            else:
+                for line in lines[thisstmt:nextstmt]:
+                    if diff > 0:
+                        if line == "\n":
+                            after.append(line)
+                        else:
+                            after.append(" " * diff + line)
+                    else:
+                        remove = min(getlspace(line), -diff)
+                        after.append(line[remove:])
+        return self.raw != self.after
+
+    def write(self, f):
+        f.writelines(self.after)
+
+    # Line-getter for tokenize.
+    def getline(self):
+        if self.index >= len(self.lines):
+            line = ""
+        else:
+            line = self.lines[self.index]
+            self.index += 1
+        return line
+
+    # Line-eater for tokenize.
+    def tokeneater(self, type, token, slinecol, end, line,
+                   INDENT=tokenize.INDENT,
+                   DEDENT=tokenize.DEDENT,
+                   NEWLINE=tokenize.NEWLINE,
+                   COMMENT=tokenize.COMMENT,
+                   NL=tokenize.NL):
+
+        if type == NEWLINE:
+            # A program statement, or ENDMARKER, will eventually follow,
+            # after some (possibly empty) run of tokens of the form
+            #     (NL | COMMENT)* (INDENT | DEDENT+)?
+            self.find_stmt = 1
+
+        elif type == INDENT:
+            self.find_stmt = 1
+            self.level += 1
+
+        elif type == DEDENT:
+            self.find_stmt = 1
+            self.level -= 1
+
+        elif type == COMMENT:
+            if self.find_stmt:
+                self.stats.append((slinecol[0], -1))
+                # but we're still looking for a new stmt, so leave
+                # find_stmt alone
+
+        elif type == NL:
+            pass
+
+        elif self.find_stmt:
+            # This is the first "real token" following a NEWLINE, so it
+            # must be the first token of the next program statement, or an
+            # ENDMARKER.
+            self.find_stmt = 0
+            if line:   # not endmarker
+                self.stats.append((slinecol[0], self.level))
+
+
+# Count number of leading blanks.
+def getlspace(line):
+    i, n = 0, len(line)
+    while i < n and line[i] == " ":
+        i += 1
+    return i
+
+
+if __name__ == '__main__':
+    main()
--- a/ARCHIVE_legacy_scripts/standalone_importer.py
+++ b/ARCHIVE_legacy_scripts/standalone_importer.py
@@ -0,0 +1,92 @@
+import csv
+from collections import Counter
+import os
+import argparse
+from sqlalchemy import create_engine, Column, Integer, String, Boolean, DateTime
+from sqlalchemy.orm import sessionmaker
+from sqlalchemy.ext.declarative import declarative_base
+from datetime import datetime
+import logging
+
+# --- Standalone Configuration ---
+DATABASE_URL = "sqlite:////app/companies_v3_fixed_2.db"
+LOG_FILE = "/app/Log_from_docker/standalone_importer.log"
+
+# --- Logging Setup ---
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler(LOG_FILE),
+        logging.StreamHandler()
+    ]
+)
+logger = logging.getLogger(__name__)
+
+# --- SQLAlchemy Models (simplified, only what's needed) ---
+Base = declarative_base()
+
+class RawJobTitle(Base):
+    __tablename__ = 'raw_job_titles'
+    id = Column(Integer, primary_key=True)
+    title = Column(String, unique=True, index=True)
+    count = Column(Integer, default=1)
+    source = Column(String, default="import")
+    is_mapped = Column(Boolean, default=False)
+    created_at = Column(DateTime, default=datetime.utcnow)
+    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+
+# --- Database Connection ---
+engine = create_engine(DATABASE_URL)
+SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+
+def import_job_titles_standalone(file_path: str):
+    db = SessionLocal()
+    try:
+        logger.info(f"Starting standalone import of job titles from {file_path}")
+        
+        job_title_counts = Counter()
+        total_rows = 0
+
+        with open(file_path, 'r', encoding='utf-8') as f:
+            reader = csv.reader(f)
+            for row in reader:
+                if row and row[0].strip():
+                    title = row[0].strip()
+                    job_title_counts[title] += 1
+                    total_rows += 1
+        
+        logger.info(f"Read {total_rows} total job title entries. Found {len(job_title_counts)} unique titles.")
+
+        added_count = 0
+        updated_count = 0
+
+        for title, count in job_title_counts.items():
+            existing_title = db.query(RawJobTitle).filter(RawJobTitle.title == title).first()
+            if existing_title:
+                if existing_title.count != count:
+                    existing_title.count = count
+                    updated_count += 1
+            else:
+                new_title = RawJobTitle(title=title, count=count, source="csv_import", is_mapped=False)
+                db.add(new_title)
+                added_count += 1
+        
+        db.commit()
+        logger.info(f"Standalone import complete. Added {added_count} new unique titles, updated {updated_count} existing titles.")
+
+    except Exception as e:
+        logger.error(f"Error during standalone job title import: {e}", exc_info=True)
+        db.rollback()
+    finally:
+        db.close()
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Standalone script to import job titles from a CSV file.")
+    parser.add_argument("file_path", type=str, help="Path to the CSV file containing job titles.")
+    args = parser.parse_args()
+
+    # Ensure the log directory exists
+    os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
+
+    import_job_titles_standalone(args.file_path)
--- a/ARCHIVE_legacy_scripts/test_api_logic.py
+++ b/ARCHIVE_legacy_scripts/test_api_logic.py
@@ -0,0 +1,22 @@
+import os
+import sys
+
+# Add the company-explorer directory to the Python path
+sys.path.append(os.path.abspath(os.path.join(os.getcwd(), 'company-explorer')))
+
+from backend.database import SessionLocal, MarketingMatrix, Industry, Persona
+from sqlalchemy.orm import joinedload
+
+db = SessionLocal()
+try:
+    query = db.query(MarketingMatrix).options(
+        joinedload(MarketingMatrix.industry),
+        joinedload(MarketingMatrix.persona)
+    )
+    entries = query.all()
+    print(f"Total entries: {len(entries)}")
+    for e in entries[:3]:
+        print(f"ID={e.id}, Industry={e.industry.name if e.industry else 'N/A'}, Persona={e.persona.name if e.persona else 'N/A'}")
+        print(f"  Subject: {e.subject}")
+finally:
+    db.close()
--- a/ARCHIVE_legacy_scripts/test_company_explorer_connector.py
+++ b/ARCHIVE_legacy_scripts/test_company_explorer_connector.py
@@ -0,0 +1,98 @@
+import unittest
+from unittest.mock import patch, MagicMock
+import os
+import requests
+
+# Den Pfad anpassen, damit das Modul gefunden wird
+import sys
+sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
+
+from check_company_existence import check_company_existence_with_company_explorer
+
+class TestCompanyExistenceChecker(unittest.TestCase):
+
+    @patch('check_company_existence.requests.get')
+    def test_company_exists_exact_match(self, mock_get):
+        """Testet, ob ein exakt passendes Unternehmen korrekt als 'existent' erkannt wird."""
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {
+            "total": 1,
+            "items": [
+                {"id": 123, "name": "TestCorp"}
+            ]
+        }
+        mock_get.return_value = mock_response
+
+        result = check_company_existence_with_company_explorer("TestCorp")
+        
+        self.assertTrue(result["exists"])
+        self.assertEqual(result["company_id"], 123)
+        self.assertEqual(result["company_name"], "TestCorp")
+
+    @patch('check_company_existence.requests.get')
+    def test_company_does_not_exist(self, mock_get):
+        """Testet, ob ein nicht existentes Unternehmen korrekt als 'nicht existent' erkannt wird."""
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {"total": 0, "items": []}
+        mock_get.return_value = mock_response
+
+        result = check_company_existence_with_company_explorer("NonExistentCorp")
+        
+        self.assertFalse(result["exists"])
+        self.assertIn("not found", result["message"])
+
+    @patch('check_company_existence.requests.get')
+    def test_company_partial_match_only(self, mock_get):
+        """Testet den Fall, in dem die Suche Ergebnisse liefert, aber kein exakter Match dabei ist."""
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {
+            "total": 1,
+            "items": [
+                {"id": 124, "name": "TestCorp Inc"}
+            ]
+        }
+        mock_get.return_value = mock_response
+
+        result = check_company_existence_with_company_explorer("TestCorp")
+        
+        self.assertFalse(result["exists"])
+        self.assertIn("not found as an exact match", result["message"])
+
+    @patch('check_company_existence.requests.get')
+    def test_http_error_handling(self, mock_get):
+        """Testet das Fehlerhandling bei einem HTTP 401 Unauthorized Error."""
+        # Importiere requests innerhalb des Test-Scopes, um den side_effect zu verwenden
+        import requests
+        
+        mock_response = MagicMock()
+        mock_response.status_code = 401
+        mock_response.text = "Unauthorized"
+        # Die raise_for_status Methode muss eine Ausnahme auslösen
+        mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError("401 Client Error: Unauthorized for url")
+        mock_get.return_value = mock_response
+
+        result = check_company_existence_with_company_explorer("AnyCompany")
+        
+        self.assertFalse(result["exists"])
+        self.assertIn("HTTP error occurred", result["error"])
+
+    @patch('check_company_existence.requests.get')
+    def test_connection_error_handling(self, mock_get):
+        """Testet das Fehlerhandling bei einem Connection Error."""
+        # Importiere requests hier, damit die Ausnahme im Patch-Kontext ist
+        import requests
+        mock_get.side_effect = requests.exceptions.ConnectionError("Connection failed")
+
+        result = check_company_existence_with_company_explorer("AnyCompany")
+        
+        self.assertFalse(result["exists"])
+        self.assertIn("Connection error occurred", result["error"])
+
+
+if __name__ == '__main__':
+    # Füge 'requests' zum globalen Scope hinzu, damit es im Test-HTTP-Error-Handling-Test verwendet werden kann
+    import requests
+    unittest.main(argv=['first-arg-is-ignored'], exit=False)
--- a/ARCHIVE_legacy_scripts/test_core_functionality.py
+++ b/ARCHIVE_legacy_scripts/test_core_functionality.py
@@ -0,0 +1,60 @@
+# test_core_functionality.py
+
+import pytest
+from helpers import extract_numeric_value, get_col_idx
+from config import COLUMN_ORDER # Wir brauchen die echte Spaltenreihenfolge für den Test
+
+# --- Testfälle für die kritische Funktion extract_numeric_value ---
+# Format: (Input-String, erwarteter Output als String)
+umsatz_test_cases = [
+    ("ca. 1.234,56 Mio. € (2022)", "1"),       # In Mio, Tausendertrenner ., Komma als Dezimal
+    ("rund 500 Tsd. US-Dollar", "0"),          # Tausender wird zu 0.5, gerundet 0
+    ("750.000 Euro", "1"),                     # . als Tausendertrenner, wird zu 0.75, gerundet 1
+    ("1,5 Milliarden CHF", "1500"),            # Milliarden-Einheit
+    ("25.7 mn", "26"),                         # "mn" Abkürzung
+    ("keine Angabe", "k.A."),                  # Text
+    ("0", "0"),                                # Null-Wert
+    ("FEHLERHAFTER WERT", "k.A."),             # Fehler-Fallback
+    ("1234567", "1"),                          # Reine Zahl ohne Einheit
+    ("€ 850 k", "1"),                          # "k" für Tausend
+]
+
+mitarbeiter_test_cases = [
+    ("ca. 1.234", "1234"),
+    ("rund 500 Tsd.", "500000"),
+    ("1,5 Millionen", "1500000"),
+    ("1.234 (Stand 2023)", "1234"),
+    ("k.A.", "k.A."),
+]
+
+@pytest.mark.parametrize("input_str, expected", umsatz_test_cases)
+def test_extract_umsatz_from_various_formats(input_str, expected):
+    """Prüft, ob `extract_numeric_value` für Umsatz verschiedene Formate korrekt in Millionen umwandelt."""
+    assert extract_numeric_value(input_str, is_umsatz=True) == expected
+
+@pytest.mark.parametrize("input_str, expected", mitarbeiter_test_cases)
+def test_extract_mitarbeiter_from_various_formats(input_str, expected):
+    """Prüft, ob `extract_numeric_value` für Mitarbeiter verschiedene Formate korrekt in absolute Zahlen umwandelt."""
+    assert extract_numeric_value(input_str, is_umsatz=False) == expected
+
+
+# --- Testfälle für die neue, zentrale get_col_idx Funktion ---
+def test_get_col_idx_success():
+    """Prüft, ob ein gültiger Spaltenname den korrekten Index zurückgibt."""
+    # Wir nehmen an, "CRM Name" ist die zweite Spalte laut COLUMN_ORDER
+    assert get_col_idx("CRM Name") == 1 
+    # Wir nehmen an, "ReEval Flag" ist die erste Spalte
+    assert get_col_idx("ReEval Flag") == 0
+
+def test_get_col_idx_failure():
+    """Prüft, ob ein ungültiger Spaltenname None zurückgibt."""
+    assert get_col_idx("Diese Spalte existiert nicht") is None
+
+def test_get_col_idx_edge_cases():
+    """Prüft Randfälle."""
+    assert get_col_idx("") is None
+    assert get_col_idx(None) is None
+    # Letzte Spalte
+    last_column_name = COLUMN_ORDER[-1]
+    expected_last_index = len(COLUMN_ORDER) - 1
+    assert get_col_idx(last_column_name) == expected_last_index
--- a/ARCHIVE_legacy_scripts/test_explorer_connection.py
+++ b/ARCHIVE_legacy_scripts/test_explorer_connection.py
@@ -0,0 +1,31 @@
+import requests
+import os
+from requests.auth import HTTPBasicAuth
+
+def test_connection(url, name):
+    print(f"--- Testing {name}: {url} ---")
+    try:
+        # We try the health endpoint
+        response = requests.get(
+            f"{url}/health", 
+            auth=HTTPBasicAuth("admin", "gemini"),
+            timeout=5
+        )
+        print(f"Status Code: {response.status_code}")
+        print(f"Response: {response.text}")
+        return response.status_code == 200
+    except Exception as e:
+        print(f"Error: {e}")
+        return False
+
+# Path 1: Hardcoded LAN IP through Proxy
+url_lan = "http://192.168.178.6:8090/ce/api"
+# Path 2: Internal Docker Networking (direct)
+url_docker = "http://company-explorer:8000/api"
+
+success_lan = test_connection(url_lan, "LAN IP (Proxy)")
+print("\n")
+success_docker = test_connection(url_docker, "Docker Internal (Direct)")
+
+if not success_lan and not success_docker:
+    print("\nFATAL: Company Explorer not reachable from this container.")
--- a/ARCHIVE_legacy_scripts/test_export.py
+++ b/ARCHIVE_legacy_scripts/test_export.py
@@ -0,0 +1,34 @@
+
+import requests
+import os
+
+def test_export_endpoint():
+    # The app runs on port 8000 inside the container.
+    # The root_path is /ce, so the full URL is http://localhost:8000/ce/api/companies/export
+    url = "http://localhost:8000/ce/api/companies/export"
+    
+    print(f"--- Testing Export Endpoint: GET {url} ---")
+    
+    try:
+        response = requests.get(url)
+        response.raise_for_status() # Will raise an exception for 4xx/5xx errors
+        
+        # Print the first few hundred characters to verify content
+        print("\n--- Response Headers ---")
+        print(response.headers)
+        
+        print("\n--- CSV Output (first 500 chars) ---")
+        print(response.text[:500])
+
+        # A simple check
+        if "Metric Value" in response.text and "Source URL" in response.text:
+            print("\n[SUCCESS] New columns found in export.")
+        else:
+            print("\n[FAILURE] New columns seem to be missing from the export.")
+
+    except requests.exceptions.RequestException as e:
+        print(f"\n[FAILURE] Could not connect to the endpoint: {e}")
+
+if __name__ == "__main__":
+    test_export_endpoint()
+
--- a/ARCHIVE_legacy_scripts/test_opener_api.py
+++ b/ARCHIVE_legacy_scripts/test_opener_api.py
@@ -0,0 +1,91 @@
+import requests
+import os
+import sys
+import time
+
+# Load credentials from .env
+# Simple manual parser to avoid dependency on python-dotenv
+def load_env(path):
+    if not os.path.exists(path):
+        print(f"Warning: .env file not found at {path}")
+        return
+    with open(path) as f:
+        for line in f:
+            if line.strip() and not line.startswith('#'):
+                key, val = line.strip().split('=', 1)
+                os.environ.setdefault(key, val)
+
+load_env('/app/.env')
+
+API_USER = os.getenv("API_USER", "admin")
+API_PASS = os.getenv("API_PASSWORD", "gemini")
+CE_URL = "http://127.0.0.1:8000" # Target the local container (assuming port 8000 is mapped)
+TEST_CONTACT_ID = 1 # Therme Erding
+
+def run_test():
+    print("🚀 STARTING API-LEVEL E2E TEXT GENERATION TEST\n")
+    
+    # --- Health Check ---
+    print("Waiting for Company Explorer API to be ready...")
+    for i in range(10):
+        try:
+            health_resp = requests.get(f"{CE_URL}/api/health", auth=(API_USER, API_PASS), timeout=2)
+            if health_resp.status_code == 200:
+                print("✅ API is ready.")
+                break
+        except requests.exceptions.RequestException:
+            pass
+        if i == 9:
+            print("❌ API not ready after 20 seconds. Aborting.")
+            return False
+        time.sleep(2)
+
+    scenarios = [
+        {"name": "Infrastructure Role", "job_title": "Facility Manager", "opener_field": "opener", "keyword": "Sicherheit"},
+        {"name": "Operational Role", "job_title": "Leiter Badbetrieb", "opener_field": "opener_secondary", "keyword": "Gäste"}
+    ]
+
+    all_passed = True
+    for s in scenarios:
+        print(f"--- Testing: {s['name']} ---")
+        endpoint = f"{CE_URL}/api/provision/superoffice-contact"
+        payload = {
+            "so_contact_id": TEST_CONTACT_ID,
+            "job_title": s['job_title']
+        }
+        
+        try:
+            resp = requests.post(endpoint, json=payload, auth=(API_USER, API_PASS))
+            resp.raise_for_status()
+            data = resp.json()
+            
+            # --- Assertions ---
+            opener = data.get('opener')
+            opener_sec = data.get('opener_secondary')
+
+            assert opener, "❌ FAIL: Primary opener is missing!"
+            print(f"   ✅ Primary Opener: '{opener}'")
+            
+            assert opener_sec, "❌ FAIL: Secondary opener is missing!"
+            print(f"   ✅ Secondary Opener: '{opener_sec}'")
+
+            target_opener_text = data.get(s['opener_field'])
+            assert s['keyword'].lower() in target_opener_text.lower(), f"❌ FAIL: Keyword '{s['keyword']}' not in '{s['opener_field']}'!"
+            print(f"   ✅ Keyword '{s['keyword']}' found in correct opener.")
+            
+            print(f"--- ✅ PASSED: {s['name']} ---\\n")
+
+        except Exception as e:
+            print(f"   ❌ TEST FAILED: {e}")
+            if hasattr(e, 'response') and e.response is not None:
+                print(f"      Response: {e.response.text}")
+            all_passed = False
+    
+    return all_passed
+
+if __name__ == "__main__":
+    if run_test():
+        print("🏁 All scenarios passed successfully!")
+    else:
+        print("🔥 Some scenarios failed.")
+        sys.exit(1)
--- a/ARCHIVE_legacy_scripts/test_parser.py
+++ b/ARCHIVE_legacy_scripts/test_parser.py
@@ -0,0 +1,61 @@
+
+import re
+import json
+
+def parse_markdown_table(markdown_text):
+    lines = [line.strip() for line in markdown_text.strip().split('\n') if line.strip()]
+    table_lines = []
+    
+    for line in lines:
+        if line.startswith('|') and line.endswith('|'):
+            table_lines.append(line)
+    
+    if not table_lines: 
+        return {"headers": [], "rows": []}
+    
+    separator_index = -1
+    for i, line in enumerate(table_lines):
+        if '---' in line and not re.search(r'[a-zA-Z0-9]', line.replace('|', '').replace('-', '').replace(' ', '').replace(':', '')):
+            separator_index = i
+            break
+            
+    if separator_index == -1:
+        header_line = table_lines[0]
+        data_start = 1
+    else:
+        if separator_index == 0: return {"headers": [], "rows": []}
+        header_line = table_lines[separator_index - 1]
+        data_start = separator_index + 1
+        
+    headers = [re.sub(r'\*+([^\*]+)\*+', r'\1', h.strip()).strip() for h in header_line.split('|') if h.strip()]
+    if not headers: return {"headers": [], "rows": []}
+    
+    rows = []
+    for line in table_lines[data_start:]:
+        raw_cells = line.split('|')
+        cells = [re.sub(r'\*+([^\*]+)\*+', r'\1', c.strip()).strip() for c in raw_cells]
+        
+        if line.startswith('|'): cells = cells[1:]
+        if line.endswith('|'): cells = cells[:-1]
+        
+        if len(cells) < len(headers):
+            cells.extend([''] * (len(headers) - len(cells)))
+        elif len(cells) > len(headers):
+            cells = cells[:len(headers)]
+            
+        if any(cells):
+            rows.append(cells)
+            
+    return {"headers": headers, "rows": rows}
+
+# Content from the log (simplified/cleaned of the huge gap for testing)
+content = """
+## Schritt 1: Angebot (WAS)
+
+| Produkt/Lösung | Beschreibung (1-2 Sätze) | Kernfunktionen | Differenzierung | Primäre Quelle (URL) |
+| --- | --- | --- | --- | --- |
+| **AgreeDo (Meeting Management Software)** | AgreeDo ist eine webbasierte Anwendung... | **Kernfunktionen:**... | **Differenzierung:**... | `https://agreedo.com/` |
+"""
+
+result = parse_markdown_table(content)
+print(json.dumps(result, indent=2))
--- a/ARCHIVE_legacy_scripts/test_provisioning_api.py
+++ b/ARCHIVE_legacy_scripts/test_provisioning_api.py
@@ -0,0 +1,12 @@
+import requests
+import json
+
+url = "http://company-explorer:8000/api/provision/superoffice-contact"
+payload = {"so_contact_id": 4}
+auth = ("admin", "gemini")
+
+try:
+    resp = requests.post(url, json=payload, auth=auth)
+    print(json.dumps(resp.json(), indent=2))
+except Exception as e:
+    print(f"Error: {e}")
--- a/ARCHIVE_legacy_scripts/test_pytube.py
+++ b/ARCHIVE_legacy_scripts/test_pytube.py
@@ -0,0 +1,31 @@
+from pytube import YouTube
+import traceback
+import sys # Importiere sys für den Modulzugriff
+
+VIDEO_URL = 'https://www.youtube.com/watch?v=dQw4w9WgXcQ' # Oder eine andere Test-URL
+
+try:
+    # Versuche, den Pfad des pytube-Moduls auszugeben
+    pytube_module = sys.modules[YouTube.__module__]
+    print(f"Pytube Modulpfad: {pytube_module.__file__}")
+except Exception as e_path:
+    print(f"Konnte Pytube Modulpfad nicht ermitteln: {e_path}")
+
+print(f"Versuche, Infos für Video abzurufen: {VIDEO_URL}")
+try:
+    yt = YouTube(VIDEO_URL)
+    print(f"Titel: {yt.title}")
+    # Dieser Aufruf ist oft der kritische Punkt, der den Fehler auslöst
+    print(f"Verfügbare Streams (Anzahl): {len(yt.streams)}")
+    stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
+    if stream:
+        print(f"Erfolgreich einen progressiven MP4 Stream gefunden: {stream.itag}")
+    else:
+        print("Keinen progressiven MP4 Stream gefunden.")
+
+except Exception as e:
+    print("\nEin Fehler ist aufgetreten im Haupt-Try-Block:")
+    print(f"Fehlertyp: {type(e)}")
+    print(f"Fehlermeldung: {str(e)}")
+    print("Traceback:")
+    traceback.print_exc()
--- a/ARCHIVE_legacy_scripts/test_selenium.py
+++ b/ARCHIVE_legacy_scripts/test_selenium.py
@@ -0,0 +1,24 @@
+import tempfile
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+
+chrome_options = Options()
+chrome_options.add_argument('--no-sandbox')
+chrome_options.add_argument('--disable-dev-shm-usage')
+chrome_options.add_argument('--window-size=1920,1200')
+chrome_options.binary_location = "/usr/bin/chromium"
+
+# TEMP DIR für User Data
+user_data_dir = tempfile.mkdtemp()
+chrome_options.add_argument(f'--user-data-dir={user_data_dir}')
+
+try:
+    driver = webdriver.Chrome(options=chrome_options)
+    print("WebDriver erfolgreich gestartet!")
+    print("Typ:", type(driver))
+    print("Session ID:", driver.session_id)
+    driver.get("https://www.example.com")
+    print("Titel der Seite:", driver.title)
+    driver.quit()
+except Exception as e:
+    print("Fehler beim Starten des WebDrivers:", e)
--- a/ARCHIVE_legacy_scripts/trading_twins_tool.py
+++ b/ARCHIVE_legacy_scripts/trading_twins_tool.py
@@ -0,0 +1,99 @@
+import json
+import time
+import os
+import sys
+
+# Ensure we can import from lead-engine
+sys.path.append(os.path.join(os.path.dirname(__file__), 'lead-engine'))
+try:
+    from trading_twins_ingest import process_leads
+except ImportError:
+    print("Warning: Could not import trading_twins_ingest from lead-engine. Email ingestion disabled.")
+    process_leads = None
+
+from company_explorer_connector import handle_company_workflow
+
+def run_trading_twins_process(target_company_name: str):
+    """
+    Startet den Trading Twins Prozess für ein Zielunternehmen.
+    Ruft den Company Explorer Workflow auf, um das Unternehmen zu finden, 
+    zu erstellen oder anzureichern.
+    """
+    print(f"\n{'='*50}")
+    print(f"Starte Trading Twins Analyse für: {target_company_name}")
+    print(f"{'='*50}\n")
+
+    # Aufruf des Company Explorer Workflows
+    # Diese Funktion prüft, ob die Firma existiert. 
+    # Wenn nicht, erstellt sie die Firma und startet die Anreicherung.
+    # Sie gibt am Ende die Daten aus dem Company Explorer zurück.
+    company_data_result = handle_company_workflow(target_company_name)
+
+    # Verarbeitung der Rückgabe (für den POC genügt eine Ausgabe)
+    print("\n--- Ergebnis vom Company Explorer Connector (für Trading Twins) ---")
+    
+    status = company_data_result.get("status")
+    data = company_data_result.get("data")
+
+    if status == "error":
+        print(f"Ein Fehler ist aufgetreten: {company_data_result.get('message')}")
+    elif status == "found":
+        print(f"Unternehmen gefunden. ID: {data.get('id')}, Name: {data.get('name')}")
+        print(json.dumps(data, indent=2, ensure_ascii=False))
+    elif status == "created_and_enriched":
+        print(f"Unternehmen erstellt und Enrichment angestoßen. ID: {data.get('id')}, Name: {data.get('name')}")
+        print("Hinweis: Enrichment-Prozesse laufen im Hintergrund und können einige Zeit dauern, bis alle Daten verfügbar sind.")
+        print(json.dumps(data, indent=2, ensure_ascii=False))
+    elif status == "created_discovery_timeout":
+        print(f"Unternehmen erstellt, aber Discovery konnte keine Website finden (ID: {data.get('id')}, Name: {data.get('name')}).")
+        print("Der Analyse-Prozess wurde daher nicht gestartet.")
+        print(json.dumps(data, indent=2, ensure_ascii=False))
+    else:
+        print("Ein unerwarteter Status ist aufgetreten.")
+        print(json.dumps(company_data_result, indent=2, ensure_ascii=False))
+
+    print(f"\n{'='*50}")
+    print(f"Trading Twins Analyse für {target_company_name} abgeschlossen.")
+    print(f"{'='*50}\n")
+
+def run_email_ingest():
+    """Starts the automated email ingestion process for Tradingtwins leads."""
+    if process_leads:
+        print("\nStarting automated email ingestion via Microsoft Graph...")
+        process_leads()
+        print("Email ingestion completed.")
+    else:
+        print("Error: Email ingestion module not available.")
+
+if __name__ == "__main__":
+    # Simulieren der Umgebungsvariablen für diesen Testlauf, falls nicht gesetzt
+    if "COMPANY_EXPLORER_API_USER" not in os.environ:
+        os.environ["COMPANY_EXPLORER_API_USER"] = "admin"
+    if "COMPANY_EXPLORER_API_PASSWORD" not in os.environ:
+        os.environ["COMPANY_EXPLORER_API_PASSWORD"] = "gemini"
+
+    print("Trading Twins Tool - Main Menu")
+    print("1. Process specific company name")
+    print("2. Ingest leads from Email (info@robo-planet.de)")
+    print("3. Run demo sequence (Robo-Planet, Erding, etc.)")
+    
+    choice = input("\nSelect option (1-3): ").strip()
+    
+    if choice == "1":
+        name = input("Enter company name: ").strip()
+        if name:
+            run_trading_twins_process(name)
+    elif choice == "2":
+        run_email_ingest()
+    elif choice == "3":
+        # Testfall 1: Ein Unternehmen, das wahrscheinlich bereits existiert
+        run_trading_twins_process("Robo-Planet GmbH")
+        time.sleep(2)
+        # Testfall 1b: Ein bekanntes, real existierendes Unternehmen
+        run_trading_twins_process("Klinikum Landkreis Erding")
+        time.sleep(2)
+        # Testfall 2: Ein neues, eindeutiges Unternehmen
+        new_unique_company_name = f"Trading Twins New Target {int(time.time())}"
+        run_trading_twins_process(new_unique_company_name)
+    else:
+        print("Invalid choice.")
--- a/ARCHIVE_legacy_scripts/train_model.py
+++ b/ARCHIVE_legacy_scripts/train_model.py
@@ -0,0 +1,118 @@
+# train_model_v3.0.py (final)
+import pandas as pd
+import numpy as np
+import re
+import math
+import joblib
+import xgboost as xgb
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score, classification_report
+from thefuzz import fuzz
+from collections import Counter
+import logging
+import sys
+import os
+from google_sheet_handler import GoogleSheetHandler
+from helpers import normalize_company_name
+
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler(sys.stdout)])
+log = logging.getLogger()
+
+GOLD_STANDARD_FILE = 'erweitertes_matching.csv' 
+CRM_SHEET_NAME = "CRM_Accounts"
+MODEL_OUTPUT_FILE = 'xgb_model.json'
+TERM_WEIGHTS_OUTPUT_FILE = 'term_weights.joblib'
+CRM_PREDICTION_FILE = 'crm_for_prediction.pkl'
+BEST_MATCH_COL = 'Best Match Option'
+SUGGESTION_COLS = ['V2_Match_Suggestion', 'V3_Match_Suggestion', 'V4_Match_Suggestion']
+
+# ... (Alle Hilfsfunktionen bleiben identisch zu Version 2.4/2.5) ...
+def _tokenize(s: str):
+    if not s: return []
+    return re.split(r"[^a-z0-9äöüß]+", str(s).lower())
+def clean_name_for_scoring(norm_name: str):
+    STOP_TOKENS_BASE = {'gmbh','mbh','ag','kg','ug','ohg','se','co','kgaa','inc','llc','ltd','sarl', 'b.v', 'bv','holding','gruppe','group','international','solutions','solution','service','services'}
+    CITY_TOKENS = set()
+    if not norm_name: return "", set()
+    tokens = [t for t in _tokenize(norm_name) if len(t) >= 3]
+    stop_union = STOP_TOKENS_BASE | CITY_TOKENS
+    final_tokens = [t for t in tokens if t not in stop_union]
+    return " ".join(final_tokens), set(final_tokens)
+def choose_rarest_token(norm_name: str, term_weights: dict):
+    _, toks = clean_name_for_scoring(norm_name)
+    if not toks: return None
+    return max(toks, key=lambda t: term_weights.get(t, 0))
+def create_features(mrec: dict, crec: dict, term_weights: dict):
+    features = {}
+    n1_raw = mrec.get('normalized_CRM Name', '')
+    n2_raw = crec.get('normalized_name', '')
+    clean1, toks1 = clean_name_for_scoring(n1_raw)
+    clean2, toks2 = clean_name_for_scoring(n2_raw)
+    features['fuzz_ratio'] = fuzz.ratio(n1_raw, n2_raw)
+    features['fuzz_partial_ratio'] = fuzz.partial_ratio(n1_raw, n2_raw)
+    features['fuzz_token_set_ratio'] = fuzz.token_set_ratio(clean1, clean2)
+    features['fuzz_token_sort_ratio'] = fuzz.token_sort_ratio(clean1, clean2)
+    domain1_raw = str(mrec.get('CRM Website', '')).lower()
+    domain2_raw = str(crec.get('CRM Website', '')).lower()
+    domain1 = domain1_raw.replace('www.', '').split('/')[0].strip()
+    domain2 = domain2_raw.replace('www.', '').split('/')[0].strip()
+    features['domain_match'] = 1 if domain1 and domain1 == domain2 else 0
+    features['city_match'] = 1 if mrec.get('CRM Ort') and crec.get('CRM Ort') and mrec['CRM Ort'] == crec['CRM Ort'] else 0
+    features['country_match'] = 1 if mrec.get('CRM Land') and crec.get('CRM Land') and mrec['CRM Land'] == crec['CRM Land'] else 0
+    features['country_mismatch'] = 1 if (mrec.get('CRM Land') and crec.get('CRM Land') and mrec['CRM Land'] != crec['CRM Land']) else 0
+    overlapping_tokens = toks1 & toks2
+    rarest_token_mrec = choose_rarest_token(n1_raw, term_weights)
+    features['rarest_token_overlap'] = 1 if rarest_token_mrec and rarest_token_mrec in toks2 else 0
+    features['weighted_token_score'] = sum(term_weights.get(t, 0) for t in overlapping_tokens)
+    features['jaccard_similarity'] = len(overlapping_tokens) / len(toks1 | toks2) if len(toks1 | toks2) > 0 else 0
+    features['name_len_diff'] = abs(len(n1_raw) - len(n2_raw))
+    features['candidate_is_shorter'] = 1 if len(n2_raw) < len(n1_raw) else 0
+    return features
+
+if __name__ == "__main__":
+    log.info("Starte Trainingsprozess (v3.0 final)")
+    try:
+        gold_df = pd.read_csv(GOLD_STANDARD_FILE, sep=';', encoding='utf-8')
+        sheet_handler = GoogleSheetHandler()
+        crm_df = sheet_handler.get_sheet_as_dataframe(CRM_SHEET_NAME)
+    except Exception as e:
+        log.critical(f"Fehler beim Laden der Daten: {e}")
+        sys.exit(1)
+
+    crm_df.drop_duplicates(subset=['CRM Name'], keep='first', inplace=True)
+    crm_df['normalized_name'] = crm_df['CRM Name'].astype(str).apply(normalize_company_name)
+    gold_df['normalized_CRM Name'] = gold_df['CRM Name'].astype(str).apply(normalize_company_name)
+    term_weights = {token: math.log(len(crm_df) / (count + 1)) for token, count in Counter(t for n in crm_df['normalized_name'] for t in set(clean_name_for_scoring(n)[1])).items()}
+    
+    features_list, labels = [], []
+    crm_lookup = crm_df.set_index('CRM Name').to_dict('index')
+    suggestion_cols_found = [col for col in gold_df.columns if col in SUGGESTION_COLS]
+
+    for _, row in gold_df.iterrows():
+        mrec = row.to_dict()
+        best_match_name = row.get(BEST_MATCH_COL)
+        if pd.notna(best_match_name) and str(best_match_name).strip() != '' and best_match_name in crm_lookup:
+            features_list.append(create_features(mrec, crm_lookup[best_match_name], term_weights))
+            labels.append(1)
+        for col_name in suggestion_cols_found:
+            suggestion_name = row.get(col_name)
+            if pd.notna(suggestion_name) and suggestion_name != best_match_name and suggestion_name in crm_lookup:
+                features_list.append(create_features(mrec, crm_lookup[suggestion_name], term_weights))
+                labels.append(0)
+
+    X, y = pd.DataFrame(features_list), np.array(labels)
+    log.info(f"Trainingsdatensatz erstellt mit {X.shape[0]} Beispielen. Klassenverteilung: {Counter(y)}")
+    
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
+    scale_pos_weight = sum(y_train == 0) / sum(y_train) if sum(y_train) > 0 else 1
+    model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss', scale_pos_weight=scale_pos_weight)
+    model.fit(X_train, y_train)
+    log.info("Modell erfolgreich trainiert.")
+    
+    y_pred = model.predict(X_test)
+    log.info(f"\n--- Validierungsergebnis ---\nGenauigkeit: {accuracy_score(y_test, y_pred):.2%}\n" + classification_report(y_test, y_pred, zero_division=0))
+    
+    model.save_model(MODEL_OUTPUT_FILE)
+    joblib.dump(term_weights, TERM_WEIGHTS_OUTPUT_FILE)
+    crm_df.to_pickle(CRM_PREDICTION_FILE)
+    log.info("Alle 3 Modelldateien erfolgreich erstellt.")
--- a/ARCHIVE_legacy_scripts/trigger_resync.py
+++ b/ARCHIVE_legacy_scripts/trigger_resync.py
@@ -0,0 +1,25 @@
+import sqlite3
+import json
+import time
+
+DB_PATH = "connector_queue.db"
+
+def trigger_resync(contact_id):
+    print(f"🚀 Triggering manual resync for Contact {contact_id}...")
+    
+    payload = {
+        "Event": "contact.changed",
+        "PrimaryKey": contact_id,
+        "ContactId": contact_id,
+        "Changes": ["UserDefinedFields", "Name"] # Dummy changes to pass filters
+    }
+    
+    with sqlite3.connect(DB_PATH) as conn:
+        conn.execute(
+            "INSERT INTO jobs (event_type, payload, status) VALUES (?, ?, ?)",
+            ("contact.changed", json.dumps(payload), 'PENDING')
+        )
+    print("✅ Job added to queue.")
+
+if __name__ == "__main__":
+    trigger_resync(6) # Bennis Playland has CRM ID 6
--- a/ARCHIVE_legacy_scripts/verify_db.py
+++ b/ARCHIVE_legacy_scripts/verify_db.py
@@ -0,0 +1,13 @@
+import sqlite3
+
+DB_PATH = "/app/companies_v3_fixed_2.db"
+conn = sqlite3.connect(DB_PATH)
+cursor = conn.cursor()
+cursor.execute("SELECT name, description, convincing_arguments FROM personas")
+rows = cursor.fetchall()
+for row in rows:
+    print(f"Persona: {row[0]}")
+    print(f"  Description: {row[1][:100]}...")
+    print(f"  Convincing: {row[2][:100]}...")
+    print("-" * 20)
+conn.close()