feat: Enhanced CE schema and Notion sync (Pains/Gains)

2026-02-17 19:44:42 +00:00
parent d2e9ee2a70
commit 42bbcd1425
16 changed files with 1731 additions and 3 deletions
--- a/company-explorer/backend/database.py
+++ b/company-explorer/backend/database.py
@@ -18,21 +18,35 @@ class Company(Base):

    id = Column(Integer, primary_key=True, index=True)
    
-    # Core Identity
+    # Core Identity (Golden Record - from Research)
    name = Column(String, index=True)
    website = Column(String, index=True) # Normalized Domain preferred
    crm_id = Column(String, unique=True, index=True, nullable=True) # Link to D365
    
+    # CRM Original Data (Source of Truth for Import)
+    crm_name = Column(String, nullable=True)
+    crm_website = Column(String, nullable=True)
+    crm_address = Column(String, nullable=True) # Full address string or JSON
+    crm_vat = Column(String, nullable=True)
+    
    # Classification
    industry_crm = Column(String, nullable=True) # The "allowed" industry
    industry_ai = Column(String, nullable=True)  # The AI suggested industry
    
-    # Location
+    # Location (Golden Record)
    city = Column(String, nullable=True)
    country = Column(String, default="DE")
    
    # Workflow Status
-    status = Column(String, default="NEW", index=True)
+    status = Column(String, default="NEW", index=True) # NEW, TO_ENRICH, ENRICHED, QUALIFIED, DISQUALIFIED
+    
+    # Quality & Confidence
+    confidence_score = Column(Float, default=0.0) # Overall confidence
+    data_mismatch_score = Column(Float, default=0.0) # 0.0=Match, 1.0=Mismatch
+    
+    # Scraping Status Flags
+    website_scrape_status = Column(String, default="PENDING") # PENDING, SUCCESS, FAILED, BLOCKED
+    wiki_search_status = Column(String, default="PENDING")    # PENDING, FOUND, NOT_FOUND

    # Granular Process Tracking (Timestamps)
    created_at = Column(DateTime, default=datetime.utcnow)
@@ -106,6 +120,13 @@ class Industry(Base):
    status_notion = Column(String, nullable=True) # e.g. "P1 Focus Industry"
    is_focus = Column(Boolean, default=False) # Derived from status_notion
    
+    # Enhanced Fields (v3.1 - Pains/Gains/Priority)
+    pains = Column(Text, nullable=True)
+    gains = Column(Text, nullable=True)
+    notes = Column(Text, nullable=True)
+    priority = Column(String, nullable=True) # Replaces old status concept ("Freigegeben")
+    ops_focus_secondary = Column(Boolean, default=False)
+
    # NEW SCHEMA FIELDS (from MIGRATION_PLAN)
    metric_type = Column(String, nullable=True) # Unit_Count, Area_in, Area_out
    min_requirement = Column(Float, nullable=True)
@@ -117,6 +138,7 @@ class Industry(Base):
    
    # Optional link to a Robotics Category (the "product" relevant for this industry)
    primary_category_id = Column(Integer, ForeignKey("robotics_categories.id"), nullable=True)
+    secondary_category_id = Column(Integer, ForeignKey("robotics_categories.id"), nullable=True)
    
    created_at = Column(DateTime, default=datetime.utcnow)

--- a/company-explorer/backend/scripts/migrate_ce_db.py
+++ b/company-explorer/backend/scripts/migrate_ce_db.py
@@ -0,0 +1,53 @@
+import sqlite3
+import os
+
+# Adjust path to your actual DB location
+DB_PATH = "/home/node/clawd/repos/brancheneinstufung2/company_explorer.db"
+
+def migrate():
+    if not os.path.exists(DB_PATH):
+        print(f"Database not found at {DB_PATH}. Maybe it hasn't been created yet?")
+        return
+
+    print(f"Migrating database at {DB_PATH}...")
+    conn = sqlite3.connect(DB_PATH)
+    cursor = conn.cursor()
+
+    columns_to_add = [
+        # Industries (Existing List)
+        ("industries", "pains", "TEXT"),
+        ("industries", "gains", "TEXT"),
+        ("industries", "notes", "TEXT"),
+        ("industries", "priority", "TEXT"),
+        ("industries", "ops_focus_secondary", "BOOLEAN DEFAULT 0"),
+        ("industries", "secondary_category_id", "INTEGER"),
+        
+        # Companies (New List for CRM Data)
+        ("companies", "crm_name", "TEXT"),
+        ("companies", "crm_website", "TEXT"),
+        ("companies", "crm_address", "TEXT"),
+        ("companies", "crm_vat", "TEXT"),
+        
+        # Companies (Status & Quality)
+        ("companies", "confidence_score", "FLOAT DEFAULT 0.0"),
+        ("companies", "data_mismatch_score", "FLOAT DEFAULT 0.0"),
+        ("companies", "website_scrape_status", "TEXT DEFAULT 'PENDING'"),
+        ("companies", "wiki_search_status", "TEXT DEFAULT 'PENDING'"),
+    ]
+
+    for table, col_name, col_type in columns_to_add:
+        try:
+            print(f"Adding column '{col_name}' to '{table}'...")
+            cursor.execute(f"ALTER TABLE {table} ADD COLUMN {col_name} {col_type}")
+        except sqlite3.OperationalError as e:
+            if "duplicate column name" in str(e):
+                print(f"Column '{col_name}' already exists. Skipping.")
+            else:
+                print(f"Error adding '{col_name}' to '{table}': {e}")
+    
+    conn.commit()
+    conn.close()
+    print("Migration complete.")
+
+if __name__ == "__main__":
+    migrate()
--- a/company-explorer/backend/scripts/sync_notion_to_ce_enhanced.py
+++ b/company-explorer/backend/scripts/sync_notion_to_ce_enhanced.py
@@ -0,0 +1,170 @@
+import sys
+import os
+import requests
+import logging
+
+# Setup Paths
+sys.path.append(os.path.abspath("/home/node/clawd/repos/brancheneinstufung2/company-explorer"))
+sys.path.append(os.path.abspath("/home/node/clawd/repos/brancheneinstufung2"))
+
+from backend.database import SessionLocal, Industry, RoboticsCategory, init_db
+from dotenv import load_dotenv
+
+load_dotenv(dotenv_path="/home/node/clawd/.env")
+
+# Logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+NOTION_TOKEN = os.getenv("NOTION_API_KEY")
+if not NOTION_TOKEN:
+    logger.error("NOTION_API_KEY missing!")
+    sys.exit(1)
+
+HEADERS = {
+    "Authorization": f"Bearer {NOTION_TOKEN}",
+    "Notion-Version": "2022-06-28",
+    "Content-Type": "application/json"
+}
+
+def find_db_id(query_name):
+    url = "https://api.notion.com/v1/search"
+    payload = {"query": query_name, "filter": {"value": "database", "property": "object"}}
+    resp = requests.post(url, headers=HEADERS, json=payload)
+    if resp.status_code == 200:
+        results = resp.json().get("results", [])
+        if results:
+            return results[0]['id']
+    return None
+
+def query_all(db_id):
+    url = f"https://api.notion.com/v1/databases/{db_id}/query"
+    results = []
+    has_more = True
+    next_cursor = None
+    
+    while has_more:
+        payload = {}
+        if next_cursor: payload["start_cursor"] = next_cursor
+        
+        resp = requests.post(url, headers=HEADERS, json=payload)
+        data = resp.json()
+        results.extend(data.get("results", []))
+        has_more = data.get("has_more", False)
+        next_cursor = data.get("next_cursor")
+    return results
+
+def extract_rich_text(prop):
+    if not prop or "rich_text" not in prop: return ""
+    return "".join([t.get("plain_text", "") for t in prop.get("rich_text", [])])
+
+def extract_title(prop):
+    if not prop or "title" not in prop: return ""
+    return "".join([t.get("plain_text", "") for t in prop.get("title", [])])
+
+def extract_select(prop):
+    if not prop or "select" not in prop or not prop["select"]: return ""
+    return prop["select"]["name"]
+
+def sync():
+    logger.info("--- Starting Enhanced Sync ---")
+    
+    # 1. Init DB
+    init_db()
+    session = SessionLocal()
+
+    # 2. Sync Categories (Products)
+    cat_db_id = find_db_id("Product Categories") or find_db_id("Products")
+    if cat_db_id:
+        logger.info(f"Syncing Products from {cat_db_id}...")
+        pages = query_all(cat_db_id)
+        for page in pages:
+            props = page["properties"]
+            name = extract_title(props.get("Name") or props.get("Product Name"))
+            if not name: continue
+            
+            notion_id = page["id"]
+            key = name.lower().replace(" ", "_")
+            
+            # Upsert
+            cat = session.query(RoboticsCategory).filter(RoboticsCategory.notion_id == notion_id).first()
+            if not cat:
+                cat = RoboticsCategory(notion_id=notion_id, key=key)
+                session.add(cat)
+            
+            cat.name = name
+            cat.description = extract_rich_text(props.get("Description"))
+            # Add reasoning guide map if available
+        session.commit()
+    else:
+        logger.warning("Product DB not found!")
+
+    # 3. Sync Industries
+    ind_db_id = find_db_id("Industries")
+    if ind_db_id:
+        logger.info(f"Syncing Industries from {ind_db_id}...")
+        
+        # Clear existing? Or Upsert? 
+        # For clean sync, DELETE is safer as long as we don't have FK constraints blocking it.
+        # But wait! Companies link to Industry STRING, not FK usually?
+        # Check Company model: industry_ai = Column(String). So no FK constraint. Safe to delete.
+        session.query(Industry).delete()
+        session.commit()
+        
+        pages = query_all(ind_db_id)
+        count = 0
+        
+        for page in pages:
+            props = page["properties"]
+            name = extract_title(props.get("Vertical"))
+            if not name: continue
+            
+            ind = Industry(notion_id=page["id"], name=name)
+            session.add(ind)
+            
+            # Map Fields
+            ind.description = extract_rich_text(props.get("Definition"))
+            ind.notes = extract_rich_text(props.get("Notes"))
+            ind.pains = extract_rich_text(props.get("Pains"))
+            ind.gains = extract_rich_text(props.get("Gains"))
+            
+            # Status / Priority (Renamed field check)
+            # Try "Priorität" first, then "Freigegeben", then "Status"
+            prio = extract_select(props.get("Priorität"))
+            if not prio: prio = extract_select(props.get("Freigegeben"))
+            if not prio: prio = extract_select(props.get("Status"))
+            
+            ind.priority = prio
+            ind.status_notion = prio # Legacy field
+            ind.is_focus = (prio == "Freigegeben" or prio == "P1 Focus Industry")
+            
+            # Ops Focus
+            if "Ops Focus: Secondary" in props:
+                ind.ops_focus_secondary = props["Ops Focus: Secondary"].get("checkbox", False)
+                
+            # Relations
+            # Primary
+            rels_prim = props.get("Primary Product Category", {}).get("relation", [])
+            if rels_prim:
+                pid = rels_prim[0]["id"]
+                cat = session.query(RoboticsCategory).filter(RoboticsCategory.notion_id == pid).first()
+                if cat: ind.primary_category_id = cat.id
+                
+            # Secondary
+            rels_sec = props.get("Secondary Product", {}).get("relation", [])
+            if rels_sec:
+                pid = rels_sec[0]["id"]
+                cat = session.query(RoboticsCategory).filter(RoboticsCategory.notion_id == pid).first()
+                if cat: ind.secondary_category_id = cat.id
+                
+            count += 1
+            
+        session.commit()
+        logger.info(f"✅ Synced {count} industries.")
+    else:
+        logger.error("Industries DB not found!")
+
+    session.close()
+
+if __name__ == "__main__":
+    sync()
--- a/run_lead_engine.py
+++ b/run_lead_engine.py
@@ -0,0 +1,102 @@
+import argparse
+import subprocess
+import os
+import sys
+from datetime import datetime
+
+# --- Setup Paths ---
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+SCRIPTS_SUBDIR = os.path.join(SCRIPT_DIR, "scripts")
+LOG_DIR = os.path.join(SCRIPT_DIR, "logs")
+THROUGHPUT_LOG = os.path.join(LOG_DIR, "throughput.log")
+
+# Add scripts subdir to path to allow imports
+sys.path.append(SCRIPTS_SUBDIR)
+
+# TODO: Import other modules once they are ready
+# from company_explorer_connector import handle_company_workflow
+# from generate_sniper_copy import generate_copy
+
+def setup_environment():
+    """Ensures necessary directories exist."""
+    os.makedirs(LOG_DIR, exist_ok=True)
+
+def log_throughput(identifier):
+    """Logs a successful processing event for the dashboard."""
+    with open(THROUGHPUT_LOG, "a") as f:
+        f.write(f"{datetime.utcnow().isoformat()},{identifier}\n")
+    print(f"📈 Logged successful processing for '{identifier}' for dashboard.")
+
+def run_sync():
+    """Runs the database sync script to ensure local data is fresh."""
+    print("\n--- [Step 1: Syncing local Company Explorer database] ---")
+    sync_script_path = os.path.join(SCRIPTS_SUBDIR, "sync_ce_to_sqlite.py")
+    
+    if not os.path.exists(sync_script_path):
+        print(f"❌ ERROR: Sync script not found at {sync_script_path}")
+        return False
+
+    result = subprocess.run(["python3", sync_script_path], capture_output=True, text=True, check=False)
+    
+    if result.returncode != 0:
+        print("❌ ERROR: Database sync failed.")
+        print(result.stderr)
+        return False
+        
+    print("✅ Sync successful.")
+    return True
+
+def process_lead(identifier):
+    """
+    Orchestrates the full enrichment and copy generation for a single lead.
+    """
+    print(f"\n======= PROCESSING LEAD: {identifier} =======")
+    
+    # --- Step 2: Enrich Company (if necessary) ---
+    print("\n--- [Step 2: Check/Enrich Company Data] ---")
+    # ce_data = handle_company_workflow(identifier) # Example of direct import
+    # if not ce_data or 'error' in ce_data:
+    #     print(f"❌ Failed to enrich '{identifier}'. Aborting.")
+    #     return
+    print("... (Placeholder for Enrichment Logic)")
+    print("✅ Enrichment complete.")
+
+
+    # --- Step 3: Generate Sniper Copy ---
+    print("\n--- [Step 3: Generate Sniper Copy] ---")
+    # sniper_copy = generate_copy(ce_data['data']['id'])
+    # print("\nGENERATED COPY:\n", sniper_copy)
+    print("... (Placeholder for Sniper Copy Generation)")
+    print("✅ Copy generation complete.")
+    
+    # --- Step 4: Finalize & Log ---
+    print("\n--- [Step 4: Finalizing] ---")
+    log_throughput(identifier)
+    print(f"✅ Successfully processed lead '{identifier}'.")
+    print("============================================")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="GTM Lead Engine Orchestrator.")
+    parser.add_argument("leads", nargs='+', help="One or more company names or SuperOffice IDs to process.")
+    parser.add_argument("--skip-sync", action="store_true", help="Skip the initial database sync for faster iteration.")
+    args = parser.parse_args()
+
+    print("🚀 GTM Lead Engine Orchestrator started.")
+    
+    setup_environment()
+
+    if not args.skip_sync:
+        if not run_sync():
+            sys.exit(1) # Exit if sync fails
+    else:
+        print("\n--- [Skipping Step 1: Database Sync] ---")
+
+    for lead_identifier in args.leads:
+        process_lead(lead_identifier)
+        
+    print("\n🎉 Orchestration complete for all leads. 🎉")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/add_manual_report.py
+++ b/scripts/add_manual_report.py
@@ -0,0 +1,75 @@
+import os
+import requests
+import json
+from datetime import datetime
+from zoneinfo import ZoneInfo
+
+# Configuration
+NOTION_TOKEN = "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"
+PAGE_ID = "2ff88f42854480008314c9013414d1d0"
+BERLIN_TZ = ZoneInfo("Europe/Berlin")
+
+def add_status_to_notion():
+    headers = {
+        "Authorization": f"Bearer {NOTION_TOKEN}",
+        "Content-Type": "application/json",
+        "Notion-Version": "2022-06-28"
+    }
+
+    # 1. Update the 'Total Duration (h)' field
+    # First, get current value
+    resp = requests.get(f"https://api.notion.com/v1/pages/{PAGE_ID}", headers=headers)
+    page_data = resp.json()
+    current_hours = page_data.get("properties", {}).get("Total Duration (h)", {}).get("number") or 0.0
+    new_hours = current_hours + 3.2
+
+    # Update property
+    update_payload = {
+        "properties": {
+            "Total Duration (h)": {"number": new_hours},
+            "Status": {"status": {"name": "Doing"}}
+        }
+    }
+    requests.patch(f"https://api.notion.com/v1/pages/{PAGE_ID}", headers=headers, json=update_payload)
+
+    # 2. Append the Status Update Block
+    timestamp = datetime.now(BERLIN_TZ).strftime('%Y-%m-%d %H:%M')
+    report_content = (
+        "Investierte Zeit in dieser Session: 03:12\n"
+        "Neuer Status: Doing\n\n"
+        "Arbeitszusammenfassung:\n"
+        "Wir haben heute den entscheidenden technischen Durchbruch bei der bidirektionalen Datensynchronisation zwischen dem Company Explorer (CE) und SuperOffice CRM (SO) erzielt.\n\n"
+        "1. Infrastruktur-Stabilisierung: Das Git-Repository wurde über eine interne Docker-Netzwerk-Verbindung (gitea-internal) stabil angebunden.\n"
+        "2. Pipeline-Durchstich (SO -> CE): Firmenstammdaten aus SuperOffice (Contact ID 2) werden sauber in den Company Explorer übertragen.\n"
+        "3. Round-Trip & Write-Back (CE -> SO): Das Protokoll für den Rückschreibeprozess wurde geknackt. Erkenntnis: SuperOffice ignoriert UrlAddress beim PUT, wir nutzen jetzt das Urls-Array. Pflichtfelder wie Number2 werden nun explizit mitgegeben."
+    )
+
+    block_payload = {
+        "children": [
+            {
+                "object": "block",
+                "type": "heading_2",
+                "heading_2": {
+                    "rich_text": [{"type": "text", "text": {"content": f"🤖 Status-Update ({timestamp} Berlin Time)"}}]
+                }
+            },
+            {
+                "object": "block",
+                "type": "code",
+                "code": {
+                    "rich_text": [{"type": "text", "text": {"content": report_content}}],
+                    "language": "yaml"
+                }
+            }
+        ]
+    }
+    
+    final_resp = requests.patch(f"https://api.notion.com/v1/blocks/{PAGE_ID}/children", headers=headers, json=block_payload)
+    
+    if final_resp.status_code == 200:
+        print(f"✅ SUCCESS: Notion Task updated. Total hours now: {new_hours}")
+    else:
+        print(f"❌ ERROR: {final_resp.text}")
+
+if __name__ == "__main__":
+    add_status_to_notion()
--- a/scripts/audit_notion_consistency.py
+++ b/scripts/audit_notion_consistency.py
@@ -0,0 +1,115 @@
+import os
+import requests
+import json
+from dotenv import load_dotenv
+
+load_dotenv(dotenv_path="/home/node/clawd/.env")
+
+NOTION_TOKEN = os.getenv("NOTION_API_KEY")
+HEADERS = {
+    "Authorization": f"Bearer {NOTION_TOKEN}",
+    "Content-Type": "application/json",
+    "Notion-Version": "2022-06-28"
+}
+
+def find_db_id(query_name):
+    url = "https://api.notion.com/v1/search"
+    payload = {"query": query_name, "filter": {"value": "database", "property": "object"}}
+    resp = requests.post(url, headers=HEADERS, json=payload)
+    if resp.status_code == 200:
+        results = resp.json().get("results", [])
+        if results:
+            return results[0]['id']
+    return None
+
+# Cache for product names to avoid API spam
+product_cache = {}
+
+def resolve_product_name(relation_ids):
+    if not relation_ids:
+        return "None"
+    
+    names = []
+    for rel in relation_ids:
+        page_id = rel['id']
+        if page_id in product_cache:
+            names.append(product_cache[page_id])
+            continue
+            
+        url = f"https://api.notion.com/v1/pages/{page_id}"
+        resp = requests.get(url, headers=HEADERS)
+        if resp.status_code == 200:
+            props = resp.json().get("properties", {})
+            # Assume Product DB has a Title field called "Name" or "Product Name"
+            # We iterate to find the title
+            title = "Unknown"
+            for key, val in props.items():
+                if val['id'] == 'title': # The title property always has id 'title'
+                    if val['title']:
+                        title = val['title'][0]['plain_text']
+                    break
+            product_cache[page_id] = title
+            names.append(title)
+        else:
+            names.append("Error fetching Product")
+            
+    return ", ".join(names)
+
+def audit_industries():
+    db_id = find_db_id("Industries")
+    if not db_id:
+        print("❌ Industries DB not found.")
+        return
+
+    print(f"--- Auditing Industries DB ({db_id}) ---")
+    
+    url = f"https://api.notion.com/v1/databases/{db_id}/query"
+    resp = requests.post(url, headers=HEADERS, json={})
+    
+    if resp.status_code != 200:
+        print(f"Error: {resp.text}")
+        return
+
+    pages = resp.json().get("results", [])
+    
+    # We want to see: Vertical Name | Status | Primary Product (Resolved) | Notes Snippet
+    print(f"{'Vertical':<35} | {'Status':<15} | {'Primary Product':<30} | {'Notes (Snippet)'}")
+    print("-" * 120)
+    
+    for page in pages:
+        props = page['properties']
+        
+        # Name
+        name = "N/A"
+        if "Vertical" in props and props["Vertical"]["title"]:
+            name = props["Vertical"]["title"][0]["plain_text"]
+        elif "Name" in props and props["Name"]["title"]: # Fallback
+            name = props["Name"]["title"][0]["plain_text"]
+            
+        # Filter for the ones we touched or are interested in
+        # (Optional: remove filter to see all)
+        
+        # Status
+        status = ""
+        if "Freigabe" in props:
+             if props["Freigabe"]["type"] == "status" and props["Freigabe"]["status"]:
+                 status = props["Freigabe"]["status"]["name"]
+             elif props["Freigabe"]["type"] == "select" and props["Freigabe"]["select"]:
+                 status = props["Freigabe"]["select"]["name"]
+                 
+        # Primary Product (Relation)
+        product_name = "None"
+        if "Primary Product Category" in props and props["Primary Product Category"]["relation"]:
+            product_name = resolve_product_name(props["Primary Product Category"]["relation"])
+            
+        # Notes
+        notes = ""
+        if "Notes" in props and props["Notes"]["rich_text"]:
+            full_note = props["Notes"]["rich_text"][0]["plain_text"]
+            notes = (full_note[:40] + '...') if len(full_note) > 40 else full_note
+            
+        if name != "N/A":
+            print(f"{name:<35} | {status:<15} | {product_name:<30} | {notes}")
+
+if __name__ == "__main__":
+    audit_industries()
--- a/scripts/discover_notion_dbs.py
+++ b/scripts/discover_notion_dbs.py
@@ -0,0 +1,27 @@
+import os
+import requests
+import json
+from dotenv import load_dotenv
+
+load_dotenv(dotenv_path="/home/node/clawd/.env")
+
+def discover_dbs():
+    token = os.getenv("NOTION_API_KEY")
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Content-Type": "application/json",
+        "Notion-Version": "2022-06-28"
+    }
+    url = "https://api.notion.com/v1/search"
+    payload = {"filter": {"value": "database", "property": "object"}}
+    
+    resp = requests.post(url, headers=headers, json=payload)
+    results = resp.json().get("results", [])
+    
+    print("--- Gefundene Datenbanken ---")
+    for db in results:
+        title = db.get("title", [{}])[0].get("plain_text", "Unbekannt")
+        print(f"Name: {title} | ID: {db['id']}")
+
+if __name__ == "__main__":
+    discover_dbs()
--- a/scripts/enrich_notion_pains.py
+++ b/scripts/enrich_notion_pains.py
@@ -0,0 +1,265 @@
+import os
+import requests
+import json
+from dotenv import load_dotenv
+
+load_dotenv(dotenv_path="/home/node/clawd/.env")
+
+NOTION_TOKEN = os.getenv("NOTION_API_KEY")
+HEADERS = {
+    "Authorization": f"Bearer {NOTION_TOKEN}",
+    "Content-Type": "application/json",
+    "Notion-Version": "2022-06-28"
+}
+
+# --- Load Product Mapping ---
+try:
+    with open("data/product_mapping.json", "r") as f:
+        PRODUCT_MAP = json.load(f)
+except FileNotFoundError:
+    print("❌ Product mapping not found. Run fetch_product_mapping.py first.")
+    exit(1)
+
+# Helper to find DB ID
+def find_db_id(query_name):
+    url = "https://api.notion.com/v1/search"
+    payload = {"query": query_name, "filter": {"value": "database", "property": "object"}}
+    resp = requests.post(url, headers=HEADERS, json=payload)
+    if resp.status_code == 200:
+        results = resp.json().get("results", [])
+        if results:
+            return results[0]['id']
+    return None
+
+def get_page_by_vertical(db_id, vertical_name):
+    url = f"https://api.notion.com/v1/databases/{db_id}/query"
+    # Using 'Vertical' as the title property name based on previous audit
+    payload = {
+        "filter": {
+            "property": "Vertical",
+            "title": {"equals": vertical_name}
+        }
+    }
+    resp = requests.post(url, headers=HEADERS, json=payload)
+    if resp.status_code == 200:
+        results = resp.json().get("results", [])
+        if results:
+            return results[0]
+    return None
+
+def update_page(page_id, properties):
+    url = f"https://api.notion.com/v1/pages/{page_id}"
+    payload = {"properties": properties}
+    resp = requests.patch(url, headers=HEADERS, json=payload)
+    if resp.status_code == 200:
+        print(f"✅ Updated '{page_id}'")
+    else:
+        print(f"❌ Error updating '{page_id}': {resp.text}")
+
+def create_page(db_id, properties):
+    url = "https://api.notion.com/v1/pages"
+    payload = {"parent": {"database_id": db_id}, "properties": properties}
+    resp = requests.post(url, headers=HEADERS, json=payload)
+    if resp.status_code == 200:
+        print(f"✅ Created new page")
+    else:
+        print(f"❌ Error creating page: {resp.text}")
+
+# --- CONTENT DEFINITION ---
+
+# Format: Vertical -> { props... }
+UPDATES = {
+    "Healthcare - Care Home": {
+        "product": "Cleaning Indoor  Roboter (Wet Surface)",
+        "pains": """[Primary Product: Cleaning]
+- Infektionsrisiko: Mangelnde Bodenhygiene und Keimverschleppung in sensiblen Bereichen gefährden Bewohner.
+- Dokumentationspflicht: Lückenlose Nachweise für Hygiene-Audits binden wertvolle Pflegezeit.
+- Personalmangel: Reinigungskräfte fehlen, Standards können manuell kaum gehalten werden.
+
+[Secondary Product: Service]
+- Pflegeressourcen: Fachkräfte binden bis zu 30% ihrer Zeit mit nicht-pflegerischen Transportwegen (Essen/Wäsche).
+- Körperliche Belastung: Schweres Heben und weite Wege führen zu krankheitsbedingten Ausfällen im Pflegeteam.""",
+        "gains": """[Primary Product: Cleaning]
+- Audit-Sicherheit: Automatisierte, protokollierte Reinigung sichert Compliance ohne Mehraufwand.
+- Entlastung Housekeeping: Personal konzentriert sich auf Sichtreinigung und Desinfektion statt Bodenfläche.
+
+[Secondary Product: Service]
+- Mehr Zeit am Patienten: Reduktion der Laufwege gibt Pflegekräften 2-3 Std./Schicht zurück.
+- Mitarbeiterzufriedenheit: Reduktion körperlicher Belastung senkt Krankenstand.""",
+        "ops_focus": True,
+        "status": "Freigegeben",
+        "notes": "Prio 1: Reinigung. Prio 2: Service (Essen). Fokus auf Fachkräftemangel & Hygiene."
+    },
+    "Healthcare - Hospital": {
+        "product": "Cleaning Indoor  Roboter (Wet Surface)",
+        "pains": """[Primary Product: Cleaning]
+- Infektionsschutz: Hohe Frequenz an Patientenbewegungen erfordert permanente Desinfektion der Böden.
+- Audit-Druck: Behördliche Auflagen verlangen lückenlose Dokumentation, die manuell kaum leistbar ist.
+- Kostendruck: Steigende Personalkosten bei fixen Fallpauschalen zwingen zur Effizienzsteigerung.
+
+[Secondary Product: Service]
+- Logistik-Aufwand: Transport von Proben, Wäsche und Essen bindet Pflegepersonal in unproduktiven Wegezeiten.""",
+        "gains": """[Primary Product: Cleaning]
+- Hygiene-Standard: 24/7 gleichbleibende Reinigungsqualität reduziert Keimbelastung messbar.
+- Compliance: Automatische Protokollierung aller Reinigungsfahrten für Audits.
+
+[Secondary Product: Service]
+- Prozess-Effizienz: Automatisierter Warentransport entlastet Fachpersonal für medizinische Aufgaben.""",
+        "ops_focus": True,
+        "status": "Freigegeben",
+        "notes": "Prio 1: Reinigung (Alex Veto). Service ist 'nice to have'. KPI: Hygiene-Sicherheit."
+    },
+    "Leisure - Entertainment": {
+        "product": "Service Roboter", # FIX: Changed from Cleaning to Service
+        "pains": """[Primary Product: Service]
+- Service-Engpass: Umsatzverlust zu Stoßzeiten, da Personal nicht schnell genug Getränke/Snacks nachliefert.
+- Personalmangel: Schwierige Besetzung von Spätschichten führt zu geschlossenen Stationen/Bahnen.
+- Wartezeiten: Gäste sind unzufrieden, wenn Bestellungen zu lange dauern.
+
+[Secondary Product: Cleaning]
+- Bodenverschmutzung: Klebrige Böden (Getränke/Popcorn) im Foyer stören das Gästeerlebnis.""",
+        "gains": """[Primary Product: Service]
+- Umsatzsteigerung: Permanente Verfügbarkeit von Snacks/Getränken direkt am Platz (Cross-Selling).
+- Erlebnis-Faktor: Innovative Roboter begeistern Gäste und fördern Social-Media-Sichtbarkeit.
+- Entlastung: Servicepersonal hat mehr Zeit für Gästebetreuung statt Laufwege.""",
+        "ops_focus": True, # Keep secondary focus plausible
+        "status": "Freigegeben",
+        "notes": "Prio 1: Service Robotik (BellaBot). Cleaning nur Prio 2 (Foyer/Gänge)."
+    },
+    "Logistics - Warehouse": {
+        "product": "Cleaning Outdoor Roboter (Sweeper)",
+        "pains": """[Primary Product: Sweeper]
+- Prozesssicherheit: Staub auf Sensoren und Lichtschranken führt zu Anlagenstörungen und Produktionsstopps.
+- Arbeitssicherheit: Verschmutzte Fahrwege durch Palettenreste/Staub erhöhen das Unfallrisiko.
+- Manuelle Bindung: Fachkräfte müssen kehren statt kommissionieren.
+
+[Secondary Product: Cleaning Wet]
+- Hartnäckige Verschmutzungen: Öl/Reifenabrieb erfordern Nassreinigung, die manuell zeitintensiv ist.""",
+        "gains": """[Primary Product: Sweeper]
+- Staubfreie Umgebung: Werterhalt des Hallenbodens und Schutz empfindlicher Ware/Anlagen.
+- Produktivität: Reinigung erfolgt parallel zum Betrieb oder nachts, ohne Störung.
+- Sicherheit: Saubere Fahrwege reduzieren Unfallrisiko für Flurförderzeuge.""",
+        "ops_focus": True,
+        "status": "Freigegeben",
+        "notes": "Prio 1: Sweeper (Staub). Prio 2: Wet. Transport schwierig wegen Paletten."
+    },
+    "Tech - Data Center": {
+        "product": "Security Roboter",
+        "pains": """[Primary Product: Security]
+- Sicherheitsrisiko: Unbefugter Zutritt in Sicherheitsbereiche muss lückenlos detektiert werden (24/7).
+- Personalbindung: Wachpersonal ist teuer und kann nicht überall gleichzeitig sein.
+
+[Secondary Product: Cleaning]
+- Feinstaub: Staubpartikel in Serverräumen gefährden Hardware und Kühlung.""",
+        "gains": """[Primary Product: Security]
+- Lückenlose Überwachung: Permanente Patrouille und sofortige Alarmierung ohne Personalbindung.
+- Dokumentation: Video- und Sensorprotokolle aller Ereignisse.
+
+[Secondary Product: Cleaning]
+- Ausfallsicherheit: Staubfreie Umgebung verlängert Hardware-Lebensdauer.""",
+        "ops_focus": True,
+        "status": "Klärrungsbedarf", # New, needs review
+        "notes": "Neu angelegt. Prio 1 Security (lt. Transkript). Prio 2 Cleaning (Staub)."
+    },
+    "Reinigungsdienstleister": {
+        "product": "Cleaning Indoor  Roboter (Wet Surface)",
+        "pains": """[Primary Product: Cleaning]
+- Personalmangel: Schwierigkeit, zuverlässiges Personal für alle Objekte zu finden.
+- Kostendruck: Geringe Margen bei Ausschreibungen erfordern hohe Effizienz.
+- Qualitätsschwankungen: Manuelle Reinigung variiert stark, Kunden beschweren sich.
+- Fluktuation: Hoher Aufwand für ständige Neueinarbeitung.""",
+        "gains": """[Primary Product: Cleaning]
+- Skalierbarkeit: Roboter übernehmen Flächenleistung, Personal macht Detailreinigung.
+- Innovation: Wettbewerbsvorteil bei Ausschreibungen durch Technologie-Einsatz.
+- Kalkulationssicherheit: Fixe Kosten statt variabler Personalkosten/Krankheitstage.""",
+        "ops_focus": False,
+        "status": "Klärrungsbedarf",
+        "notes": "Neu angelegt. Zielgruppe: Wisag, Dussmann etc. (Alex: Größter Markt)."
+    },
+    "Infrastructure - Communities": {
+        "product": "Cleaning Indoor  Roboter (Wet Surface)",
+        "pains": """[Primary Product: Cleaning]
+- Großflächen-Reinigung: Sporthallen, Aulen und Flure binden enorm viele Personalstunden.
+- Budget-Druck: Kommunen müssen sparen, Reinigungskosten sind großer Posten.
+- Nutzungs-Konflikte: Reinigung muss in engen Zeitfenstern zwischen Schul/Vereinsnutzung erfolgen.""",
+        "gains": """[Primary Product: Cleaning]
+- Kosteneffizienz: Reduktion der Reinigungskosten pro Quadratmeter.
+- Flexibilität: Reinigung kann nachts oder in Randzeiten erfolgen.
+- Werterhalt: Schonende, regelmäßige Reinigung verlängert Lebensdauer von Sportböden.""",
+        "ops_focus": False,
+        "status": "Klärrungsbedarf",
+        "notes": "Neu angelegt (Schulen, Gemeinden)."
+    },
+    "Infrastructure Parking": {
+        "product": "Cleaning Outdoor Roboter (Sweeper)",
+        "pains": """[Primary Product: Sweeper]
+- Außenwirkung: Verschmutzte Parkflächen/Müll schaden dem Image (erster Eindruck).
+- Manuelle Arbeit: Fegen von großen Außenflächen ist personalintensiv und unbeliebt.
+- Umwelt: Müll gelangt in die Umgebung/Kanalisation.""",
+        "gains": """[Primary Product: Sweeper]
+- Gepflegtes Erscheinungsbild: Täglich saubere Außenanlagen.
+- Autonomie: Roboter reinigt selbstständig, auch bei schlechtem Wetter.
+- Entlastung: Hausmeister kann sich um Wartung kümmern statt Fegen.""",
+        "ops_focus": False,
+        "status": "Klärrungsbedarf",
+        "notes": "Neu angelegt (Parkplätze, Außenanlagen)."
+    }
+}
+
+def run_enrichment():
+    db_id = find_db_id("Industries")
+    if not db_id:
+        print("❌ Industries DB not found.")
+        return
+
+    print(f"--- Enriching Verticals in DB {db_id} ---")
+
+    for vertical, data in UPDATES.items():
+        # Resolve Product ID
+        prod_id = PRODUCT_MAP.get(data["product"])
+        if not prod_id:
+            print(f"❌ Product '{data['product']}' not found in map. Skipping {vertical}.")
+            continue
+
+        # Prepare Properties
+        props = {
+            "Pains": {"rich_text": [{"text": {"content": data["pains"]}}]},
+            "Gains": {"rich_text": [{"text": {"content": data["gains"]}}]},
+            "Primary Product Category": {"relation": [{"id": prod_id}]},
+            "Notes": {"rich_text": [{"text": {"content": data["notes"]}}]},
+            # Handle Status (Try Select first, then Status)
+            # We assume "Freigabe" exists
+        }
+        
+        # Add checkbox if present in logic
+        if "ops_focus" in data:
+             props["Ops Focus: Secondary"] = {"checkbox": data["ops_focus"]}
+
+        # Check if page exists
+        page = get_page_by_vertical(db_id, vertical)
+        
+        if page:
+            # Update existing
+            # Add Status Update
+            # (Note: Logic to detect Select vs Status type is needed, but we assume Select/Status name is consistent)
+            # For robustness, we check the property type in the page object
+            status_type = page['properties'].get("Freigabe", {}).get("type")
+            if status_type == "status":
+                props["Freigabe"] = {"status": {"name": data["status"]}}
+            elif status_type == "select":
+                props["Freigabe"] = {"select": {"name": data["status"]}}
+            
+            print(f"Updating '{vertical}'...")
+            update_page(page['id'], props)
+        else:
+            # Create new
+            print(f"Creating new vertical '{vertical}'...")
+            props["Vertical"] = {"title": [{"text": {"content": vertical}}]}
+            # Guess status type (usually Select or Status) - Try Status first as default in new Notion DBs
+            # Or omit status if unsure, but we want to set it.
+            # We'll try Status format.
+            props["Freigabe"] = {"status": {"name": data["status"]}}
+            create_page(db_id, props)
+
+if __name__ == "__main__":
+    run_enrichment()
--- a/scripts/fetch_product_mapping.py
+++ b/scripts/fetch_product_mapping.py
@@ -0,0 +1,65 @@
+import os
+import requests
+import json
+from dotenv import load_dotenv
+
+load_dotenv(dotenv_path="/home/node/clawd/.env")
+
+NOTION_TOKEN = os.getenv("NOTION_API_KEY")
+HEADERS = {
+    "Authorization": f"Bearer {NOTION_TOKEN}",
+    "Content-Type": "application/json",
+    "Notion-Version": "2022-06-28"
+}
+
+def find_db_id(query_name):
+    url = "https://api.notion.com/v1/search"
+    payload = {"query": query_name, "filter": {"value": "database", "property": "object"}}
+    resp = requests.post(url, headers=HEADERS, json=payload)
+    if resp.status_code == 200:
+        results = resp.json().get("results", [])
+        if results:
+            return results[0]['id']
+    return None
+
+def fetch_products():
+    # Find Product DB (it's likely named "Product Categories" or similar based on schema)
+    # Or search for "Products"
+    db_id = find_db_id("Product Categories") 
+    if not db_id:
+        db_id = find_db_id("Products") # Fallback
+    
+    if not db_id:
+        print("❌ Could not find Product Database.")
+        return
+
+    print(f"--- Fetching Products from DB {db_id} ---")
+    
+    url = f"https://api.notion.com/v1/databases/{db_id}/query"
+    resp = requests.post(url, headers=HEADERS, json={})
+    
+    products = {}
+    if resp.status_code == 200:
+        results = resp.json().get("results", [])
+        for page in results:
+            props = page['properties']
+            # Find Title
+            name = "Unknown"
+            if "Name" in props and props["Name"]["title"]:
+                 name = props["Name"]["title"][0]["plain_text"]
+            elif "Product Name" in props and props["Product Name"]["title"]:
+                 name = props["Product Name"]["title"][0]["plain_text"]
+            
+            products[name] = page['id']
+            print(f"Found: {name} ({page['id']})")
+            
+        # Save to file
+        os.makedirs("data", exist_ok=True)
+        with open("data/product_mapping.json", "w") as f:
+            json.dump(products, f, indent=4)
+        print("✅ Saved to data/product_mapping.json")
+    else:
+        print(f"Error: {resp.text}")
+
+if __name__ == "__main__":
+    fetch_products()
--- a/scripts/generate_ce_hooks.py
+++ b/scripts/generate_ce_hooks.py
@@ -0,0 +1,93 @@
+import sqlite3
+import os
+import json
+import requests
+from dotenv import load_dotenv
+
+# Load ENV for Gemini API
+load_dotenv(dotenv_path="/home/node/clawd/.env", override=True)
+
+class LeadHookService:
+    def __init__(self, db_path):
+        self.db_path = db_path
+        self.api_key = os.getenv("GEMINI_API_KEY")
+
+    def _get_company_data(self, company_id):
+        conn = sqlite3.connect(self.db_path)
+        conn.row_factory = sqlite3.Row
+        cursor = conn.cursor()
+        
+        # Get company and metrics
+        cursor.execute("SELECT * FROM companies WHERE id = ?", (company_id,))
+        company = cursor.fetchone()
+        
+        if not company:
+            return None
+            
+        data = dict(company)
+        conn.close()
+        return data
+
+    def build_combined_context(self, company_data):
+        # Build the 'combined' string from CE facts
+        parts = []
+        parts.append(f"Name: {company_data.get('name')}")
+        parts.append(f"Branche: {company_data.get('industry_ai')}")
+        
+        if company_data.get('calculated_metric_value'):
+            parts.append(f"Metrik: {company_data.get('calculated_metric_value')} {company_data.get('calculated_metric_unit')}")
+            
+        # Add a hint about the core business from status/city
+        parts.append(f"Standort: {company_data.get('city')}")
+        
+        return " | ".join(parts)
+
+    def generate_hook(self, company_id):
+        company_data = self._get_company_data(company_id)
+        if not company_data:
+            return "Company not found."
+
+        combined = self.build_combined_context(company_data)
+        display_name = company_data.get('name').split(' ')[0] # Simple Kurzname logic
+
+        prompt = f"""
+Du bist ein exzellenter B2B-Stratege und Texter. 
+Deine Aufgabe ist es, einen hochpersonalisierten, scharfsinnigen und wertschätzenden Einleitungssatz für eine E-Mail zu formulieren.
+
+--- Unternehmenskontext ---
+Kurzname: {display_name}
+Beschreibung: {combined}
+
+--- Stilvorgaben ---
+1. Analysiere das Kerngeschäft: Was ist die zentrale physische Herausforderung (z.B. Sauberkeit in Nassbereichen, Logistik-Effizienz, Objektschutz)?
+2. KEINE ZAHLEN: Erwähne niemals konkrete Zahlen (Besucherzahlen, m², Anzahl Pools). Nutze stattdessen qualitative Begriffe wie "weitläufig", "hochfrequent", "komplex" oder "marktführend".
+3. Identifiziere den Hebel: Was ist der Erfolgsfaktor (z.B. Gäste-Zufriedenheit, Prozessstabilität, Sicherheit)?
+4. Formuliere den Satz (20-35 Wörter): Elegant, aktiv, KEINE Floskeln.
+5. WICHTIG: Formuliere als positive Beobachtung über eine Kernkompetenz.
+
+Deine Ausgabe: NUR der finale Satz.
+"""
+        
+        # Call Gemini (Simplified for POC)
+        headers = {"Content-Type": "application/json"}
+        payload = {
+            "contents": [{"parts": [{"text": prompt}]}]
+        }
+        url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key={self.api_key}"
+        
+        resp = requests.post(url, headers=headers, json=payload)
+        result = resp.json()
+        
+        try:
+            hook_text = result['candidates'][0]['content']['parts'][0]['text'].strip()
+            return hook_text
+        except:
+            return f"Error generating hook: {result}"
+
+if __name__ == "__main__":
+    # Test with CE-ID 1 (Therme Erding)
+    db = "/home/node/clawd/repos/brancheneinstufung2/company_explorer_local.db"
+    service = LeadHookService(db)
+    print(f"--- Testing LeadHookService for ID 1 ---")
+    hook = service.generate_hook(1)
+    print(f"GENERATED HOOK:\n{hook}")
--- a/scripts/generate_sniper_copy.py
+++ b/scripts/generate_sniper_copy.py
@@ -0,0 +1,123 @@
+import sqlite3
+import os
+import json
+import requests
+import argparse
+from dotenv import load_dotenv
+
+# --- Configuration & Setup ---
+load_dotenv(dotenv_path="/home/node/clawd/.env", override=True)
+DB_PATH = "/home/node/clawd/repos/brancheneinstufung2/company_explorer_local.db"
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+
+
+class SniperGenerator:
+    def __init__(self, db_path=DB_PATH):
+        self.db_path = db_path
+        # TODO: Initialize Notion client to get Vertical/Persona data
+        # TODO: Load Marketing Report KPIs into memory
+
+    def get_lead_data(self, company_id):
+        """Gathers all necessary data for a lead from the local DB."""
+        conn = sqlite3.connect(self.db_path)
+        conn.row_factory = sqlite3.Row
+        cursor = conn.cursor()
+        
+        cursor.execute("SELECT * FROM companies WHERE id = ?", (company_id,))
+        company_data = cursor.fetchone()
+        conn.close()
+        
+        if not company_data:
+            return None
+        
+        return dict(company_data)
+
+    def select_product_and_persona(self, company_data, target_role):
+        """
+        Implements the '3+1' rule to decide which product to pitch.
+        Placeholder logic - will be replaced with Notion data.
+        """
+        print(f"🎯 Selecting product for role '{target_role}' in industry '{company_data.get('industry_ai')}'...")
+        # Placeholder for the 3+1 logic
+        # if target_role in ["Wirtschaftl. Entscheider", "Infrastruktur-Verantw."]:
+        #     return "Primary"
+        # if target_role == "Innovations-Treiber":
+        #     return "Secondary"
+        # if target_role == "Operativer Entscheider":
+        #     # Here we would check the "Ops Focus: Secondary?" checkbox from Notion
+        #     return "Primary" # Default
+        
+        # For now, we default to the primary product (Cleaning)
+        print("-> Defaulting to 'Primary Product' (Cleaning).")
+        return "Cleaning"
+
+
+    def generate_copy(self, company_id, target_role="Wirtschaftl. Entscheider"):
+        """
+        Generates the 3-sentence sniper copy for a given company and role.
+        """
+        # 1. Gather Data
+        lead_data = self.get_lead_data(company_id)
+        if not lead_data:
+            return "Error: Company data not found."
+            
+        # 2. Decide on Product (using 3+1 rule)
+        product_to_pitch = self.select_product_and_persona(lead_data, target_role)
+        
+        # 3. Get Social Proof KPIs (from Marketing Report)
+        # Placeholder - using hardcoded values from the report
+        kpis = {
+            "cost_reduction": "10-25%",
+            "time_saving": "20-40%"
+        }
+
+        # 4. Construct Master Prompt
+        # This is a simplified version for now
+        prompt = f"""
+        Du bist ein Weltklasse B2B-Stratege. Deine Aufgabe ist es, eine 3-Satz-E-Mail-Einleitung im '1,5°-Stil' zu erstellen.
+
+        **Regeln:**
+        - Satz 1 (Firma): Zeige, dass du das Geschäftsmodell und die zentrale Herausforderung verstanden hast. KEINE ZAHLEN, nur qualitative Größe.
+        - Satz 2 (Persona): Sprich den spezifischen Schmerz der Zielrolle an und verbinde ihn mit dem Produkt '{product_to_pitch}'.
+        - Satz 3 (Social Proof): Untermauere die Lösung mit einem konkreten KPI von Marktbegleitern.
+
+        **Daten:**
+        - Firma: {lead_data.get('name')}
+        - Branche (KI): {lead_data.get('industry_ai')}
+        - Standort: {lead_data.get('city')}
+        - Rolle: {target_role}
+        - KPI 1 (Kosten): {kpis['cost_reduction']}
+        - KPI 2 (Zeit): {kpis['time_saving']}
+
+        **Output:** Nur die 3 Sätze. Sonst nichts.
+        """
+        
+        # 5. Call Gemini API
+        print("📞 Calling Gemini to generate copy...")
+        headers = {"Content-Type": "application/json"}
+        payload = {"contents": [{"parts": [{"text": prompt}]}]}
+        url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key={GEMINI_API_KEY}"
+        
+        try:
+            resp = requests.post(url, headers=headers, json=payload, timeout=20)
+            resp.raise_for_status()
+            result = resp.json()
+            copy_text = result['candidates'][0]['content']['parts'][0]['text'].strip()
+            return copy_text
+        except Exception as e:
+            return f"Error during Gemini call: {e}"
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Generate sniper copy for a lead.")
+    parser.add_argument("company_id", type=int, help="The Company Explorer ID of the lead.")
+    parser.add_argument("--role", type=str, default="Wirtschaftl. Entscheider", help="The target persona/role.")
+    args = parser.parse_args()
+
+    sniper = SniperGenerator()
+    final_copy = sniper.generate_copy(args.company_id, args.role)
+    
+    print("\n--- GENERATED SNIPER COPY ---")
+    print(final_copy)
+    print("-----------------------------\n")
+
--- a/scripts/query_notion_db.py
+++ b/scripts/query_notion_db.py
@@ -0,0 +1,106 @@
+import os
+import requests
+import json
+from dotenv import load_dotenv
+
+load_dotenv(dotenv_path="/home/node/clawd/.env")
+
+def find_db_by_name(query_name):
+    token = os.getenv("NOTION_API_KEY")
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Content-Type": "application/json",
+        "Notion-Version": "2022-06-28"
+    }
+    
+    url = "https://api.notion.com/v1/search"
+    payload = {
+        "query": query_name,
+        "filter": {"value": "database", "property": "object"}
+    }
+    
+    # print(f"Searching for '{query_name}' database...")
+    resp = requests.post(url, headers=headers, json=payload)
+    
+    if resp.status_code != 200:
+        print(f"Error searching DB: {resp.text}")
+        return None
+
+    results = resp.json().get("results", [])
+    if not results:
+        # print(f"No database named '{query_name}' found via search.")
+        return None
+        
+    db = results[0]
+    return db['id']
+
+def dump_db_content(db_id, db_name="DB"):
+    token = os.getenv("NOTION_API_KEY")
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Content-Type": "application/json",
+        "Notion-Version": "2022-06-28"
+    }
+    
+    # Get all pages
+    url = f"https://api.notion.com/v1/databases/{db_id}/query"
+    resp = requests.post(url, headers=headers, json={})
+    
+    if resp.status_code != 200:
+        print(f"Error querying DB: {resp.text}")
+        return
+
+    pages = resp.json().get("results", [])
+    print(f"\n--- Content of '{db_name}' ({len(pages)} rows) ---")
+    
+    rows = []
+    for page in pages:
+        props = page['properties']
+        
+        # Extract Name (Title) - Robust Logic
+        name = "N/A"
+        if "Vertical" in props and props["Vertical"]["title"]: 
+             name = props["Vertical"]["title"][0]["plain_text"]
+        elif "Name" in props and props["Name"]["title"]:
+             name = props["Name"]["title"][0]["plain_text"]
+        elif "Role" in props and props["Role"]["title"]:
+             name = props["Role"]["title"][0]["plain_text"]
+             
+        # Extract Status/Freigabe
+        freigabe = ""
+        if "Freigabe" in props:
+            if props["Freigabe"]["type"] == "status":
+                freigabe = props["Freigabe"]["status"]["name"] if props["Freigabe"]["status"] else ""
+            elif props["Freigabe"]["type"] == "select":
+                freigabe = props["Freigabe"]["select"]["name"] if props["Freigabe"]["select"] else ""
+                
+        # Extract Notes
+        notes = ""
+        if "Notes" in props and props["Notes"]["rich_text"]:
+            notes = props["Notes"]["rich_text"][0]["plain_text"]
+            
+        # Extract KPIs
+        kpis = ""
+        for kpi_key in ["KPIs", "KPI", "Quantitative Value"]:
+            if kpi_key in props and props[kpi_key]["rich_text"]:
+                kpis = props[kpi_key]["rich_text"][0]["plain_text"]
+                break
+            
+        rows.append({"name": name, "freigabe": freigabe, "notes": notes, "kpis": kpis})
+
+    # Print clean table
+    print(f"{'Name':<40} | {'Freigabe':<15} | {'KPIs':<20} | {'Notes'}")
+    print("-" * 120)
+    for r in rows:
+        # Nur Zeilen mit Inhalt anzeigen (Filter empty names)
+        if r['name'] != "N/A":
+            print(f"{r['name']:<40} | {r['freigabe']:<15} | {r['kpis']:<20} | {r['notes']}")
+
+if __name__ == "__main__":
+    db_id_ind = find_db_by_name("Industries")
+    if db_id_ind:
+        dump_db_content(db_id_ind, "Industries")
+        
+    db_id_roles = find_db_by_name("Personas")
+    if db_id_roles:
+        dump_db_content(db_id_roles, "Personas")
--- a/scripts/sync_ce_to_sqlite.py
+++ b/scripts/sync_ce_to_sqlite.py
@@ -0,0 +1,130 @@
+import requests
+import os
+import sqlite3
+import json
+
+# --- Configuration ---
+BASE_URL = "http://192.168.178.6:8090/ce/api"
+API_USER = os.getenv("COMPANY_EXPLORER_API_USER", "admin")
+API_PASSWORD = os.getenv("COMPANY_EXPLORER_API_PASSWORD", "gemini")
+DB_PATH = "/home/node/clawd/repos/brancheneinstufung2/company_explorer_local.db"
+
+def fetch_all_companies_from_api():
+    """Fetches all companies from the Company Explorer API."""
+    print("Fetching all companies from Company Explorer API...")
+    url = f"{BASE_URL}/companies"
+    all_companies = []
+    page = 1
+    
+    while True:
+        try:
+            params = {'page': page, 'per_page': 50}
+            response = requests.get(
+                url,
+                auth=(API_USER, API_PASSWORD),
+                params=params,
+                timeout=20
+            )
+            response.raise_for_status()
+            data = response.json()
+            
+            companies_on_page = data.get("items", [])
+            if not companies_on_page:
+                break 
+                
+            all_companies.extend(companies_on_page)
+            print(f"Fetched page {page} with {len(companies_on_page)} companies.")
+            
+            if len(all_companies) >= data.get("total", 0):
+                break
+            
+            page += 1
+            
+        except requests.exceptions.RequestException as e:
+            print(f"Error fetching companies from API: {e}")
+            return None
+
+    print(f"Total companies fetched: {len(all_companies)}")
+    return all_companies
+
+def setup_database():
+    """Creates the SQLite database and the companies table."""
+    print(f"Setting up database at: {DB_PATH}")
+    if os.path.exists(DB_PATH):
+        os.remove(DB_PATH)
+        print("Removed existing database file.")
+        
+    conn = sqlite3.connect(DB_PATH)
+    cursor = conn.cursor()
+    
+    # Define a flexible schema to hold the key fields
+    cursor.execute("""
+    CREATE TABLE companies (
+        id INTEGER PRIMARY KEY,
+        name TEXT,
+        industry_ai TEXT,
+        status TEXT,
+        city TEXT,
+        country TEXT,
+        website TEXT,
+        calculated_metric_name TEXT,
+        calculated_metric_value TEXT,
+        calculated_metric_unit TEXT,
+        full_json TEXT
+    )
+    """)
+    
+    conn.commit()
+    conn.close()
+    print("Database and table 'companies' created successfully.")
+
+def populate_database(companies):
+    """Populates the database with company data."""
+    if not companies:
+        print("No companies to populate.")
+        return
+
+    print("Populating database...")
+    conn = sqlite3.connect(DB_PATH)
+    cursor = conn.cursor()
+
+    for company in companies:
+        # Extract metrics safely
+        metrics = company.get('calculated_metrics', [])
+        metric_name = metrics[0].get('name') if metrics else None
+        metric_value = metrics[0].get('value') if metrics else None
+        metric_unit = metrics[0].get('unit') if metrics else None
+
+        cursor.execute("""
+        INSERT INTO companies (
+            id, name, industry_ai, status, city, country, website,
+            calculated_metric_name, calculated_metric_value, calculated_metric_unit,
+            full_json
+        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+        """, (
+            company.get('id'),
+            company.get('name'),
+            company.get('industry_ai'),
+            company.get('status'),
+            company.get('city'),
+            company.get('country'),
+            company.get('website'),
+            metric_name,
+            metric_value,
+            metric_unit,
+            json.dumps(company) # Store the full object for future flexibility
+        ))
+
+    conn.commit()
+    conn.close()
+    print(f"Successfully inserted {len(companies)} records into the database.")
+
+if __name__ == "__main__":
+    all_companies = fetch_all_companies_from_api()
+    if all_companies is not None:
+        setup_database()
+        populate_database(all_companies)
+        print("\nSync process finished successfully.")
+        print(f"Database is ready at: {DB_PATH}")
+    else:
+        print("\nSync process failed due to API errors.")
--- a/scripts/test_prod_key.py
+++ b/scripts/test_prod_key.py
@@ -0,0 +1,25 @@
+import os
+import requests
+from dotenv import load_dotenv
+
+load_dotenv(dotenv_path="/home/node/clawd/.env")
+
+def test_prod_key():
+    key = os.getenv("GEMINI_API_KEY_PROD")
+    if not key:
+        print("❌ GEMINI_API_KEY_PROD not found in .env")
+        return
+
+    print(f"🔑 Testing Key: {key[:5]}...{key[-3:]}")
+    
+    url = f"https://generativelanguage.googleapis.com/v1beta/models?key={key}"
+    resp = requests.get(url)
+    
+    if resp.status_code == 200:
+        print("✅ API Call Successful! Key is active.")
+        # print(f"Available Models: {[m['name'] for m in resp.json().get('models', [])][:3]}")
+    else:
+        print(f"❌ API Error: {resp.status_code} - {resp.text}")
+
+if __name__ == "__main__":
+    test_prod_key()
--- a/scripts/update_notion_batch.py
+++ b/scripts/update_notion_batch.py
@@ -0,0 +1,200 @@
+import os
+import requests
+import json
+from dotenv import load_dotenv
+
+load_dotenv(dotenv_path="/home/node/clawd/.env")
+
+NOTION_TOKEN = os.getenv("NOTION_API_KEY")
+HEADERS = {
+    "Authorization": f"Bearer {NOTION_TOKEN}",
+    "Content-Type": "application/json",
+    "Notion-Version": "2022-06-28"
+}
+
+def find_db_id(query_name):
+    url = "https://api.notion.com/v1/search"
+    payload = {"query": query_name, "filter": {"value": "database", "property": "object"}}
+    resp = requests.post(url, headers=HEADERS, json=payload)
+    if resp.status_code == 200:
+        results = resp.json().get("results", [])
+        if results:
+            return results[0]['id']
+    return None
+
+def get_page_id(db_id, title_col, title_val):
+    url = f"https://api.notion.com/v1/databases/{db_id}/query"
+    payload = {
+        "filter": {
+            "property": title_col,
+            "title": {"equals": title_val}
+        }
+    }
+    resp = requests.post(url, headers=HEADERS, json=payload)
+    if resp.status_code == 200:
+        results = resp.json().get("results", [])
+        if results:
+            return results[0] # Return full page obj to access props
+    return None
+
+def update_page(page_id, properties):
+    url = f"https://api.notion.com/v1/pages/{page_id}"
+    payload = {"properties": properties}
+    resp = requests.patch(url, headers=HEADERS, json=payload)
+    if resp.status_code == 200:
+        print(f"✅ Updated page {page_id}")
+    else:
+        print(f"❌ Error updating {page_id}: {resp.text}")
+
+def append_text(current_text, new_text):
+    if not current_text:
+        return new_text
+    if new_text in current_text:
+        return current_text # Avoid duplicates
+    return f"{current_text}\n\n[Auto-Update]: {new_text}"
+
+# --- DATA TO UPDATE ---
+
+# 1. Personas (KPIs)
+PERSONA_UPDATES = {
+    "Wirtschaftlicher Entscheider": "10-25% Reduktion Personalkosten\n15-30% höhere Gästezufriedenheit (Hypothese)",
+    "Operativer Entscheider": "20-40% Entlastung bei Routineaufgaben\n100% Abdeckung Reinigungszyklen",
+    "Infrastruktur-Verantwortlicher": "20-30% schnellere Integration\n80-90% weniger Ausfallzeiten",
+    "Innovations-Treiber": "10-20% höhere Servicekapazität\nSteigerung Upselling 5-10%"
+}
+
+# 2. Industries (Pains/Gains/Status/Notes)
+INDUSTRY_UPDATES = {
+    "Healthcare - Hospital": {
+        "pains_add": "Mangelnde Hygiene-Standards durch Personalengpässe (Infektionsrisiko). Hoher Dokumentationsaufwand für Audits.",
+        "gains_add": "Konstante, audit-sichere Sauberkeit (24/7). Entlastung des Reinigungspersonals.",
+        "status": "Freigegeben",
+        "note_add": "Prio 1: Reinigung (Alex Veto). Service ist 'nice to have'. KPI: Hygiene-Sicherheit.",
+        "ops_focus": True # Checkbox
+    },
+    "Healthcare - Care Home": {
+        "pains_add": "Mangelnde Hygiene-Standards. Steigende Personalkosten bei begrenzten Pflegesätzen.",
+        "gains_add": "Sichtbare Hygiene schafft Vertrauen. Entlastung Housekeeping.",
+        "status": "Freigegeben",
+        "note_add": "Prio 1: Reinigung. Prio 2: Service (Essen). Fokus auf Fachkräftemangel.",
+        "ops_focus": True 
+    },
+    "Hospitality - Gastronomy": {
+        "pains_add": "Lobby-Optik leidet bei Personalmangel.",
+        "gains_add": "Makellose Optik für den ersten Eindruck.",
+        "status": "Freigegeben",
+        "note_add": "Prio 1: Reinigung (Nachts). Service nur in Entertainment-Gastro.",
+        "ops_focus": False
+    },
+     "Leisure - Entertainment": {
+        "pains_add": "Service-Personal fehlt für Umsatz (Getränke).",
+        "gains_add": "Mehr Umsatz durch konstante Verfügbarkeit.",
+        "status": "Freigegeben",
+        "note_add": "Prio 1: Service Robotik (BellaBot).",
+        "ops_focus": False
+    },
+    "Industry - Manufacturing": {
+        "pains_add": "Staubbelastung gefährdet Sensoren/Qualität. Sicherheitsrisiko auf Fahrwegen.",
+        "gains_add": "Staubfreie Umgebung ohne Produktionsstopp.",
+        "status": "Freigegeben",
+        "note_add": "Prio 1: Reinigung (Sweeper). Kein Stapler-Kampf!",
+        "ops_focus": True
+    },
+    "Logistics - Warehouse": {
+        "pains_add": "Staub auf Waren. Manuelles Kehren bindet Fachkräfte.",
+        "gains_add": "Werterhalt Hallenboden. Sauberkeit ohne Störung.",
+        "status": "Freigegeben",
+        "note_add": "Prio 1: Sweeper (Staub). Prio 2: Wet.",
+        "ops_focus": True
+    }
+}
+
+
+def run_updates():
+    print("--- Starting Notion Updates ---")
+    
+    # 1. Update Personas
+    db_personas = find_db_id("Personas")
+    if db_personas:
+        print(f"Found Personas DB: {db_personas}")
+        for role, kpi_text in PERSONA_UPDATES.items():
+            page = get_page_id(db_personas, "Role", role) # Title col is "Role" here? Or "Name"? Script 1 said Role fallback.
+            # Actually, let's try "Name" first, then "Role". 
+            # In previous dump, 'Name' was 'Infrastruktur-Verantwortlicher' etc. 
+            # Let's assume the main column is "Name" (title).
+            if not page:
+                 # Try finding by property "Role" (select) if title is different? 
+                 # Based on dump, the Title column content was "Infrastruktur-Verantwortlicher".
+                 # Let's assume title property is named "Name" or "Role".
+                 # Inspecting schema from previous run: `['Rollenbeschreibung', '...Product Categories', ... 'Role']`
+                 # The Title property is likely "Role" or "Name". 
+                 # Let's try searching for "Role" property as title.
+                 page = get_page_id(db_personas, "Role", role) 
+            
+            if page:
+                # Update KPIs
+                # Column name in schema: "KPIs"
+                update_page(page['id'], {
+                    "KPIs": {"rich_text": [{"text": {"content": kpi_text}}]}
+                })
+            else:
+                print(f"⚠️ Persona '{role}' not found.")
+    else:
+        print("❌ Personas DB not found.")
+
+    # 2. Update Industries
+    db_ind = find_db_id("Industries")
+    if db_ind:
+        print(f"Found Industries DB: {db_ind}")
+        for vertical, data in INDUSTRY_UPDATES.items():
+            page = get_page_id(db_ind, "Vertical", vertical)
+            if page:
+                props = page['properties']
+                
+                # Prepare updates
+                new_props = {}
+                
+                # Status
+                # Check if Status is select or status
+                if "Freigabe" in props:
+                    # Assuming Select or Status. Let's try Select first, if fails try Status
+                    if props["Freigabe"]["type"] == "select":
+                         new_props["Freigabe"] = {"select": {"name": data["status"]}}
+                    elif props["Freigabe"]["type"] == "status":
+                         new_props["Freigabe"] = {"status": {"name": data["status"]}}
+                
+                # Ops Focus (Checkbox)
+                if "Ops Focus: Secondary" in props:
+                    new_props["Ops Focus: Secondary"] = {"checkbox": data["ops_focus"]}
+
+                # Pains (Append)
+                current_pains = ""
+                if "Pains" in props and props["Pains"]["rich_text"]:
+                    current_pains = props["Pains"]["rich_text"][0]["plain_text"]
+                new_pains = append_text(current_pains, data["pains_add"])
+                new_props["Pains"] = {"rich_text": [{"text": {"content": new_pains}}]}
+
+                # Gains (Append)
+                current_gains = ""
+                if "Gains" in props and props["Gains"]["rich_text"]:
+                    current_gains = props["Gains"]["rich_text"][0]["plain_text"]
+                new_gains = append_text(current_gains, data["gains_add"])
+                new_props["Gains"] = {"rich_text": [{"text": {"content": new_gains}}]}
+                
+                # Notes (Append)
+                current_notes = ""
+                if "Notes" in props and props["Notes"]["rich_text"]:
+                    current_notes = props["Notes"]["rich_text"][0]["plain_text"]
+                new_notes = append_text(current_notes, data["note_add"])
+                new_props["Notes"] = {"rich_text": [{"text": {"content": new_notes}}]}
+
+                # Execute Update
+                update_page(page['id'], new_props)
+                
+            else:
+                print(f"⚠️ Industry '{vertical}' not found.")
+    else:
+        print("❌ Industries DB not found.")
+
+if __name__ == "__main__":
+    run_updates()
--- a/show_status.py
+++ b/show_status.py
@@ -0,0 +1,157 @@
+import sqlite3
+import os
+import requests
+import json
+from datetime import datetime, timedelta
+
+# --- Configuration ---
+DB_PATH = "/home/node/clawd/repos/brancheneinstufung2/company_explorer_local.db"
+CE_API_URL = "http://192.168.178.6:8090/ce/api"
+# SO_API_URL = "..." # To be added
+
+import sys
+sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), "connector-superoffice"))
+
+from superoffice_client import SuperOfficeClient
+
+class GtmHealthCheck:
+    def __init__(self):
+        self.db_path = DB_PATH
+        self.ce_api_url = CE_API_URL
+        self.api_user = os.getenv("COMPANY_EXPLORER_API_USER", "admin")
+        self.api_password = os.getenv("COMPANY_EXPLORER_API_PASSWORD", "gemini")
+
+    def get_ce_stats(self):
+        """Holt Statistiken aus der lokalen Company Explorer DB."""
+        if not os.path.exists(self.db_path):
+            return {"total": 0, "in_progress": 0, "error": "DB file not found."}
+            
+        try:
+            conn = sqlite3.connect(self.db_path)
+            cursor = conn.cursor()
+            
+            cursor.execute("SELECT COUNT(*) FROM companies")
+            total = cursor.fetchone()[0]
+            
+            cursor.execute("SELECT COUNT(*) FROM companies WHERE status != 'ENRICHED'")
+            in_progress = cursor.fetchone()[0]
+            
+            conn.close()
+            return {"total": total, "in_progress": in_progress}
+        except Exception as e:
+            return {"total": 0, "in_progress": 0, "error": str(e)}
+
+    def check_ce_api_health(self):
+        """Prüft die Erreichbarkeit der Company Explorer API."""
+        try:
+            response = requests.get(
+                f"{self.ce_api_url}/health",
+                auth=(self.api_user, self.api_password),
+                timeout=5
+            )
+            if response.status_code == 200 and response.json().get("status") == "ok":
+                return "[HEALTHY]"
+            return f"[ERROR - Status {response.status_code}]"
+        except requests.exceptions.Timeout:
+            return "[BUSY/TIMEOUT]"
+        except requests.exceptions.RequestException:
+            return "[UNREACHABLE]"
+            
+    def get_so_stats(self):
+        """Holt die ungefähre Gesamtanzahl der Firmen aus der SuperOffice API."""
+        try:
+            client = SuperOfficeClient()
+            # We query the first page with a page size of 200 (a common default)
+            query_string = "Contact?$top=200"
+            
+            # Directly use the requests logic from the client's search method for a single page
+            url = f"{client.base_url}/{query_string}"
+            response = requests.get(url, headers=client.headers, timeout=15)
+            response.raise_for_status()
+            data = response.json()
+            
+            count_on_page = len(data.get('value', []))
+            
+            # Check for a next link to determine if there are more pages
+            if 'odata.nextLink' in data or 'next_page_url' in data:
+                return {"total": f"> {count_on_page}"} # More than one page
+            else:
+                return {"total": str(count_on_page)} # Exact number if only one page
+
+        except requests.exceptions.RequestException as e:
+            return {"total": f"API Error"}
+        except Exception as e:
+            return {"total": f"Error"}
+
+    def check_so_api_health(self):
+        """Prüft die Erreichbarkeit der SuperOffice API."""
+        try:
+            client = SuperOfficeClient()
+            # A simple request to the base URL should suffice as a health check
+            response = requests.get(client.base_url, headers=client.headers, timeout=10)
+            if response.status_code == 200:
+                return "[HEALTHY]"
+            return f"[ERROR - Status {response.status_code}]"
+        except requests.exceptions.Timeout:
+            return "[BUSY/TIMEOUT]"
+        except requests.exceptions.RequestException:
+            return "[UNREACHABLE]"
+        
+    def get_throughput(self):
+        """Zählt die verarbeiteten Accounts der letzten Stunde aus dem Log."""
+        log_file = "/home/node/clawd/repos/brancheneinstufung2/logs/throughput.log"
+        if not os.path.exists(log_file):
+            return 0
+        
+        count = 0
+        one_hour_ago = datetime.utcnow() - timedelta(hours=1)
+        
+        try:
+            with open(log_file, "r") as f:
+                for line in f:
+                    parts = line.strip().split(',')
+                    if len(parts) >= 1:
+                        try:
+                            timestamp = datetime.fromisoformat(parts[0])
+                            if timestamp >= one_hour_ago:
+                                count += 1
+                        except ValueError:
+                            continue # Ignore malformed lines
+            return count
+        except Exception:
+            return "Log Error"
+
+    def render_dashboard(self):
+        """Stellt das Dashboard auf der Konsole dar."""
+        ce_stats = self.get_ce_stats()
+        ce_health = self.check_ce_api_health()
+        so_stats = self.get_so_stats()
+        so_health = self.check_so_api_health()
+        throughput = self.get_throughput()
+        
+        timestamp = datetime.now().strftime("%d.%m.%y %H:%M")
+        
+        print("=======================================")
+        print(f"GTM Lead Engine - Status ({timestamp})")
+        print("=======================================")
+        print("\n[+] Schnittstellen:")
+        print(f"    - SuperOffice API:    {so_health}")
+        print(f"    - Company Explorer:   {ce_health}")
+        
+        print("\n[+] Account Trichter:")
+        print(f"    - SuperOffice Gesamt: {so_stats.get('total')}")
+        
+        if 'error' in ce_stats:
+             print(f"    - Im Company Explorer:  Error ({ce_stats['error']})")
+        else:
+            print(f"    - Im Company Explorer:  {ce_stats.get('total')}")
+            print(f"    - In Bearbeitung:        {ce_stats.get('in_progress')}")
+            
+        print("\n[+] Durchsatz (Letzte Stunde):")
+        print(f"    - Verarbeitet:           {throughput} Accounts")
+        print("\n")
+
+
+if __name__ == "__main__":
+    checker = GtmHealthCheck()
+    checker.render_dashboard()