From f65df42f55497cdb6c8f2888164ee87bf6a6c893 Mon Sep 17 00:00:00 2001 From: Floke Date: Thu, 19 Feb 2026 20:59:04 +0000 Subject: [PATCH] [2ff88f42] multiplikation vorbereitet multiplikation vorbereitet --- .dev_session/SESSION_INFO | 2 +- company-explorer/backend/app.py | 26 ++- company-explorer/backend/database.py | 27 ++- .../backend/scripts/check_mappings.py | 22 +++ .../backend/scripts/generate_matrix.py | 162 ++++++++++++++++++ .../backend/scripts/seed_marketing_data.py | 123 +++++++++++++ .../backend/scripts/sync_notion_personas.py | 134 +++++++++++++++ .../scripts/sync_notion_to_ce_enhanced.py | 59 ++++++- .../backend/scripts/test_mapping_logic.py | 47 +++++ .../backend/scripts/upgrade_schema_v2.py | 33 ++++ connector-superoffice/README.md | 18 +- debug_notion_schema.py | 36 ++++ readme.md | 9 +- sync_archetypes_final.py | 161 +++++++++++++++++ sync_personas_to_notion.py | 150 ++++++++++++++++ 15 files changed, 982 insertions(+), 27 deletions(-) create mode 100644 company-explorer/backend/scripts/check_mappings.py create mode 100644 company-explorer/backend/scripts/generate_matrix.py create mode 100644 company-explorer/backend/scripts/seed_marketing_data.py create mode 100644 company-explorer/backend/scripts/sync_notion_personas.py create mode 100644 company-explorer/backend/scripts/test_mapping_logic.py create mode 100644 company-explorer/backend/scripts/upgrade_schema_v2.py create mode 100644 debug_notion_schema.py create mode 100644 sync_archetypes_final.py create mode 100644 sync_personas_to_notion.py diff --git a/.dev_session/SESSION_INFO b/.dev_session/SESSION_INFO index e95f7636..dde53f28 100644 --- a/.dev_session/SESSION_INFO +++ b/.dev_session/SESSION_INFO @@ -1 +1 @@ -{"task_id": "2ff88f42-8544-8000-8314-c9013414d1d0", "token": "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", "session_start_time": "2026-02-19T16:06:04.236614"} \ No newline at end of file +{"task_id": "2ff88f42-8544-8000-8314-c9013414d1d0", "token": "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", "session_start_time": "2026-02-19T20:58:39.825485"} \ No newline at end of file diff --git a/company-explorer/backend/app.py b/company-explorer/backend/app.py index 4c27d82b..19aac38e 100644 --- a/company-explorer/backend/app.py +++ b/company-explorer/backend/app.py @@ -32,7 +32,7 @@ setup_logging() import logging logger = logging.getLogger(__name__) -from .database import init_db, get_db, Company, Signal, EnrichmentData, RoboticsCategory, Contact, Industry, JobRoleMapping, ReportedMistake, MarketingMatrix +from .database import init_db, get_db, Company, Signal, EnrichmentData, RoboticsCategory, Contact, Industry, JobRoleMapping, ReportedMistake, MarketingMatrix, Persona from .services.deduplication import Deduplicator from .services.discovery import DiscoveryService from .services.scraping import ScraperService @@ -223,22 +223,18 @@ def provision_superoffice_contact( if vertical_name and role_name: industry_obj = db.query(Industry).filter(Industry.name == vertical_name).first() + persona_obj = db.query(Persona).filter(Persona.name == role_name).first() - if industry_obj: - # Find any mapping for this role to query the Matrix - # (Assuming Matrix is linked to *one* canonical mapping for this role string) - role_ids = [m.id for m in db.query(JobRoleMapping).filter(JobRoleMapping.role == role_name).all()] + if industry_obj and persona_obj: + matrix_entry = db.query(MarketingMatrix).filter( + MarketingMatrix.industry_id == industry_obj.id, + MarketingMatrix.persona_id == persona_obj.id + ).first() - if role_ids: - matrix_entry = db.query(MarketingMatrix).filter( - MarketingMatrix.industry_id == industry_obj.id, - MarketingMatrix.role_id.in_(role_ids) - ).first() - - if matrix_entry: - texts["subject"] = matrix_entry.subject - texts["intro"] = matrix_entry.intro - texts["social_proof"] = matrix_entry.social_proof + if matrix_entry: + texts["subject"] = matrix_entry.subject + texts["intro"] = matrix_entry.intro + texts["social_proof"] = matrix_entry.social_proof return ProvisioningResponse( status="success", diff --git a/company-explorer/backend/database.py b/company-explorer/backend/database.py index 2c82e456..dfea3933 100644 --- a/company-explorer/backend/database.py +++ b/company-explorer/backend/database.py @@ -162,6 +162,23 @@ class JobRoleMapping(Base): created_at = Column(DateTime, default=datetime.utcnow) +class Persona(Base): + """ + Represents a generalized persona/role (e.g. 'Geschäftsführer', 'IT-Leiter') + independent of the specific job title pattern. + Stores the strategic messaging components. + """ + __tablename__ = "personas" + + id = Column(Integer, primary_key=True, index=True) + name = Column(String, unique=True, index=True) # Matches the 'role' string in JobRoleMapping + + pains = Column(Text, nullable=True) # JSON list or multiline string + gains = Column(Text, nullable=True) # JSON list or multiline string + + created_at = Column(DateTime, default=datetime.utcnow) + updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + class Signal(Base): """ @@ -254,8 +271,8 @@ class ReportedMistake(Base): class MarketingMatrix(Base): """ - Stores the static marketing texts for Industry x Role combinations. - Source: Notion (synced). + Stores the static marketing texts for Industry x Persona combinations. + Source: Generated via AI. """ __tablename__ = "marketing_matrix" @@ -263,7 +280,7 @@ class MarketingMatrix(Base): # The combination keys industry_id = Column(Integer, ForeignKey("industries.id"), nullable=False) - role_id = Column(Integer, ForeignKey("job_role_mappings.id"), nullable=False) + persona_id = Column(Integer, ForeignKey("personas.id"), nullable=False) # The Content subject = Column(Text, nullable=True) @@ -273,7 +290,7 @@ class MarketingMatrix(Base): updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) industry = relationship("Industry") - role = relationship("JobRoleMapping") + persona = relationship("Persona") # ============================================================================== @@ -329,4 +346,4 @@ def get_db(): try: yield db finally: - db.close() + db.close() \ No newline at end of file diff --git a/company-explorer/backend/scripts/check_mappings.py b/company-explorer/backend/scripts/check_mappings.py new file mode 100644 index 00000000..55657337 --- /dev/null +++ b/company-explorer/backend/scripts/check_mappings.py @@ -0,0 +1,22 @@ + +import sys +import os + +# Setup Environment +sys.path.append(os.path.join(os.path.dirname(__file__), "../../")) + +from backend.database import SessionLocal, JobRoleMapping + +def check_mappings(): + db = SessionLocal() + count = db.query(JobRoleMapping).count() + print(f"Total JobRoleMappings: {count}") + + examples = db.query(JobRoleMapping).limit(5).all() + for ex in examples: + print(f" - {ex.pattern} -> {ex.role}") + + db.close() + +if __name__ == "__main__": + check_mappings() diff --git a/company-explorer/backend/scripts/generate_matrix.py b/company-explorer/backend/scripts/generate_matrix.py new file mode 100644 index 00000000..f49babff --- /dev/null +++ b/company-explorer/backend/scripts/generate_matrix.py @@ -0,0 +1,162 @@ + +import sys +import os +import json +import argparse +from typing import List + +# Setup Environment +sys.path.append(os.path.join(os.path.dirname(__file__), "../../")) + +from backend.database import SessionLocal, Industry, Persona, MarketingMatrix + +# --- Configuration --- +MODEL = "gpt-4o" + +def generate_prompt(industry: Industry, persona: Persona) -> str: + """ + Builds the prompt for the AI to generate the marketing texts. + Combines Industry context with Persona specific pains/gains. + """ + + # Safely load JSON lists + try: + persona_pains = json.loads(persona.pains) if persona.pains else [] + persona_gains = json.loads(persona.gains) if persona.gains else [] + except: + persona_pains = [persona.pains] if persona.pains else [] + persona_gains = [persona.gains] if persona.gains else [] + + industry_pains = industry.pains if industry.pains else "Allgemeine Effizienzprobleme" + + prompt = f""" +Du bist ein erfahrener B2B-Copywriter für Robotik-Lösungen (Reinigung, Transport, Service). +Ziel: Erstelle personalisierte E-Mail-Textbausteine für einen Outreach. + +--- KONTEXT --- +ZIELBRANCHE: {industry.name} +BRANCHEN-KONTEXT: {industry.description or 'Keine spezifische Beschreibung'} +BRANCHEN-PAINS: {industry_pains} + +ZIELPERSON (ARCHETYP): {persona.name} +PERSÖNLICHE PAINS (Herausforderungen): +{chr(10).join(['- ' + p for p in persona_pains])} + +GEWÜNSCHTE GAINS (Ziele): +{chr(10).join(['- ' + g for g in persona_gains])} + +--- AUFGABE --- +Erstelle ein JSON-Objekt mit genau 3 Textbausteinen. +Tonalität: Professionell, lösungsorientiert, auf den Punkt. Keine Marketing-Floskeln ("Game Changer"). + +1. "subject": Betreffzeile (Max 6 Wörter). Muss neugierig machen und einen Pain adressieren. +2. "intro": Einleitungssatz (1-2 Sätze). Verbinde die Branchen-Herausforderung mit der persönlichen Rolle des Empfängers. Zeige Verständnis für seine Situation. +3. "social_proof": Ein Satz, der Vertrauen aufbaut. Nenne generische Erfolge (z.B. "Unternehmen in der {industry.name} senken so ihre Kosten um 15%"), da wir noch keine spezifischen Logos nennen dürfen. + +--- FORMAT --- +{{ + "subject": "...", + "intro": "...", + "social_proof": "..." +}} +""" + return prompt + +def mock_openai_call(prompt: str): + """Simulates an API call for dry runs.""" + print(f"\n--- [MOCK] GENERATING PROMPT ---\n{prompt[:300]}...\n--------------------------------") + return { + "subject": "[MOCK] Effizienzsteigerung in der Produktion", + "intro": "[MOCK] Als Produktionsleiter wissen Sie, wie teuer Stillstand ist. Unsere Roboter helfen.", + "social_proof": "[MOCK] Ähnliche Betriebe sparten 20% Kosten." + } + +def real_openai_call(prompt: str): + # This would link to the actual OpenAI client + # For now, we keep it simple or import from a lib + import openai + from backend.config import settings + + if not settings.OPENAI_API_KEY: + raise ValueError("OPENAI_API_KEY not set") + + client = openai.OpenAI(api_key=settings.OPENAI_API_KEY) + response = client.chat.completions.create( + model=MODEL, + response_format={"type": "json_object"}, + messages=[{"role": "user", "content": prompt}], + temperature=0.7 + ) + return json.loads(response.choices[0].message.content) + +def run_matrix_generation(dry_run: bool = True, force: bool = False): + db = SessionLocal() + try: + industries = db.query(Industry).all() + personas = db.query(Persona).all() + + print(f"Found {len(industries)} Industries and {len(personas)} Personas.") + print(f"Mode: {'DRY RUN (No API calls, no DB writes)' if dry_run else 'LIVE'}") + + total_combinations = len(industries) * len(personas) + processed = 0 + + for ind in industries: + for pers in personas: + processed += 1 + print(f"[{processed}/{total_combinations}] Check: {ind.name} x {pers.name}") + + # Check existing + existing = db.query(MarketingMatrix).filter( + MarketingMatrix.industry_id == ind.id, + MarketingMatrix.persona_id == pers.id + ).first() + + if existing and not force: + print(f" -> Skipped (Already exists)") + continue + + # Generate + prompt = generate_prompt(ind, pers) + + if dry_run: + result = mock_openai_call(prompt) + else: + try: + result = real_openai_call(prompt) + except Exception as e: + print(f" -> API ERROR: {e}") + continue + + # Write to DB (only if not dry run) + if not dry_run: + if not existing: + new_entry = MarketingMatrix( + industry_id=ind.id, + persona_id=pers.id, + subject=result.get("subject"), + intro=result.get("intro"), + social_proof=result.get("social_proof") + ) + db.add(new_entry) + print(f" -> Created new entry.") + else: + existing.subject = result.get("subject") + existing.intro = result.get("intro") + existing.social_proof = result.get("social_proof") + print(f" -> Updated entry.") + + db.commit() + + except Exception as e: + print(f"Error: {e}") + finally: + db.close() + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--live", action="store_true", help="Actually call OpenAI and write to DB") + parser.add_argument("--force", action="store_true", help="Overwrite existing matrix entries") + args = parser.parse_args() + + run_matrix_generation(dry_run=not args.live, force=args.force) diff --git a/company-explorer/backend/scripts/seed_marketing_data.py b/company-explorer/backend/scripts/seed_marketing_data.py new file mode 100644 index 00000000..1ca1bfd8 --- /dev/null +++ b/company-explorer/backend/scripts/seed_marketing_data.py @@ -0,0 +1,123 @@ +import sys +import os +import json + +# Setup Environment to import backend modules +sys.path.append(os.path.join(os.path.dirname(__file__), "../../")) +from backend.database import SessionLocal, Persona, JobRoleMapping + +def seed_archetypes(): + db = SessionLocal() + print("Seeding Strategic Archetypes (Pains & Gains)...") + + # --- 1. The 4 Strategic Archetypes --- + # Based on user input and synthesis of previous specific roles + archetypes = [ + { + "name": "Operativer Entscheider", + "pains": [ + "Personelle Unterbesetzung und hohe Fluktuation führen zu Überstunden und Qualitätsmängeln.", + "Manuelle, wiederkehrende Prozesse binden wertvolle Ressourcen und senken die Effizienz.", + "Sicherstellung gleichbleibend hoher Standards (Hygiene/Service) ist bei Personalmangel kaum möglich." + ], + "gains": [ + "Spürbare Entlastung des Teams von Routineaufgaben (20-40%).", + "Garantierte, gleichbleibend hohe Ausführungsqualität rund um die Uhr.", + "Stabilisierung der operativen Abläufe unabhängig von kurzfristigen Personalausfällen." + ] + }, + { + "name": "Infrastruktur-Verantwortlicher", + "pains": [ + "Integration neuer Systeme in bestehende Gebäude/IT ist oft komplex und risikobehaftet.", + "Sorge vor hohen Ausfallzeiten und aufwändiger Fehlerbehebung ohne internes Spezialwissen.", + "Unklare Wartungsaufwände und Schnittstellenprobleme (WLAN, Aufzüge, Türen)." + ], + "gains": [ + "Reibungslose, fachgerechte Integration in die bestehende Infrastruktur.", + "Maximale Betriebssicherheit durch proaktives Monitoring und schnelle Reaktionszeiten.", + "Volle Transparenz über Systemstatus und Wartungsbedarf." + ] + }, + { + "name": "Wirtschaftlicher Entscheider", + "pains": [ + "Steigende operative Kosten (Personal, Material) drücken auf die Margen.", + "Unklare Amortisation (ROI) und Risiko von Fehlinvestitionen bei neuen Technologien.", + "Intransparente Folgekosten (TCO) über die Lebensdauer der Anlagen." + ], + "gains": [ + "Nachweisbare Senkung der operativen Kosten (10-25%).", + "Transparente und planbare Kostenstruktur (TCO) ohne versteckte Überraschungen.", + "Schneller, messbarer Return on Investment durch Effizienzsteigerung." + ] + }, + { + "name": "Innovations-Treiber", + "pains": [ + "Verlust der Wettbewerbsfähigkeit durch veraltete Prozesse und Kundenangebote.", + "Schwierigkeit, das Unternehmen als modernes, zukunftsorientiertes Brand zu positionieren.", + "Verpasste Chancen durch fehlende Datengrundlage für Optimierungen." + ], + "gains": [ + "Positionierung als Innovationsführer und Steigerung der Arbeitgeberattraktivität.", + "Nutzung modernster Technologie als sichtbares Differenzierungsmerkmal.", + "Gewinnung wertvoller Daten zur kontinuierlichen Prozessoptimierung." + ] + } + ] + + # Clear existing Personas to avoid mix-up with old granular ones + # (In production, we might want to be more careful, but here we want a clean slate for the new archetypes) + try: + db.query(Persona).delete() + db.commit() + print("Cleared old Personas.") + except Exception as e: + print(f"Warning clearing personas: {e}") + + for p_data in archetypes: + print(f"Creating Archetype: {p_data['name']}") + new_persona = Persona( + name=p_data["name"], + pains=json.dumps(p_data["pains"]), + gains=json.dumps(p_data["gains"]) + ) + db.add(new_persona) + + db.commit() + + # --- 2. Update JobRoleMappings to map to Archetypes --- + # We map the patterns to the new 4 Archetypes + + mapping_updates = [ + # Wirtschaftlicher Entscheider + {"role": "Wirtschaftlicher Entscheider", "patterns": ["%geschäftsführer%", "%ceo%", "%director%", "%einkauf%", "%procurement%", "%finance%", "%cfo%"]}, + + # Operativer Entscheider + {"role": "Operativer Entscheider", "patterns": ["%housekeeping%", "%hausdame%", "%hauswirtschaft%", "%reinigung%", "%restaurant%", "%f&b%", "%werksleiter%", "%produktionsleiter%", "%lager%", "%logistik%", "%operations%", "%coo%"]}, + + # Infrastruktur-Verantwortlicher + {"role": "Infrastruktur-Verantwortlicher", "patterns": ["%facility%", "%technik%", "%instandhaltung%", "%it-leiter%", "%cto%", "%admin%", "%building%"]}, + + # Innovations-Treiber + {"role": "Innovations-Treiber", "patterns": ["%innovation%", "%digital%", "%transformation%", "%business dev%", "%marketing%"]} + ] + + # Clear old mappings to prevent confusion + db.query(JobRoleMapping).delete() + db.commit() + print("Cleared old JobRoleMappings.") + + for group in mapping_updates: + role_name = group["role"] + for pattern in group["patterns"]: + print(f"Mapping '{pattern}' -> '{role_name}'") + db.add(JobRoleMapping(pattern=pattern, role=role_name)) + + db.commit() + print("Archetypes and Mappings Seeded Successfully.") + db.close() + +if __name__ == "__main__": + seed_archetypes() \ No newline at end of file diff --git a/company-explorer/backend/scripts/sync_notion_personas.py b/company-explorer/backend/scripts/sync_notion_personas.py new file mode 100644 index 00000000..a88d854f --- /dev/null +++ b/company-explorer/backend/scripts/sync_notion_personas.py @@ -0,0 +1,134 @@ +import sys +import os +import requests +import json +import logging + +# Add company-explorer to path (parent of backend) +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))) + +from backend.database import SessionLocal, Persona, init_db +from backend.config import settings + +# Setup Logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +NOTION_TOKEN_FILE = "/app/notion_token.txt" +# Sector & Persona Master DB +PERSONAS_DB_ID = "2e288f42-8544-8113-b878-ec99c8a02a6b" + +VALID_ARCHETYPES = { + "Wirtschaftlicher Entscheider", + "Operativer Entscheider", + "Infrastruktur-Verantwortlicher", + "Innovations-Treiber" +} + +def load_notion_token(): + try: + with open(NOTION_TOKEN_FILE, "r") as f: + return f.read().strip() + except FileNotFoundError: + logger.error(f"Notion token file not found at {NOTION_TOKEN_FILE}") + sys.exit(1) + +def query_notion_db(token, db_id): + url = f"https://api.notion.com/v1/databases/{db_id}/query" + headers = { + "Authorization": f"Bearer {token}", + "Notion-Version": "2022-06-28", + "Content-Type": "application/json" + } + results = [] + has_more = True + next_cursor = None + + while has_more: + payload = {} + if next_cursor: + payload["start_cursor"] = next_cursor + + response = requests.post(url, headers=headers, json=payload) + if response.status_code != 200: + logger.error(f"Error querying Notion DB {db_id}: {response.text}") + break + + data = response.json() + results.extend(data.get("results", [])) + has_more = data.get("has_more", False) + next_cursor = data.get("next_cursor") + + return results + +def extract_title(prop): + if not prop: return "" + return "".join([t.get("plain_text", "") for t in prop.get("title", [])]) + +def extract_rich_text_to_list(prop): + """ + Extracts rich text and converts bullet points/newlines into a list of strings. + """ + if not prop: return [] + full_text = "".join([t.get("plain_text", "") for t in prop.get("rich_text", [])]) + + # Split by newline and clean up bullets + lines = full_text.split('\n') + cleaned_lines = [] + for line in lines: + line = line.strip() + if not line: continue + if line.startswith("- "): + line = line[2:] + elif line.startswith("• "): + line = line[2:] + cleaned_lines.append(line) + + return cleaned_lines + +def sync_personas(token, session): + logger.info("Syncing Personas from Notion...") + + pages = query_notion_db(token, PERSONAS_DB_ID) + count = 0 + + for page in pages: + props = page.get("properties", {}) + name = extract_title(props.get("Name")) + + if name not in VALID_ARCHETYPES: + logger.debug(f"Skipping '{name}' (Not a target Archetype)") + continue + + logger.info(f"Processing Persona: {name}") + + pains_list = extract_rich_text_to_list(props.get("Pains")) + gains_list = extract_rich_text_to_list(props.get("Gains")) + + # Upsert Logic + persona = session.query(Persona).filter(Persona.name == name).first() + if not persona: + persona = Persona(name=name) + session.add(persona) + logger.info(f" -> Creating new entry") + else: + logger.info(f" -> Updating existing entry") + + persona.pains = json.dumps(pains_list, ensure_ascii=False) + persona.gains = json.dumps(gains_list, ensure_ascii=False) + + count += 1 + + session.commit() + logger.info(f"Sync complete. Updated {count} personas.") + +if __name__ == "__main__": + token = load_notion_token() + db = SessionLocal() + + try: + sync_personas(token, db) + except Exception as e: + logger.error(f"Sync failed: {e}", exc_info=True) + finally: + db.close() diff --git a/company-explorer/backend/scripts/sync_notion_to_ce_enhanced.py b/company-explorer/backend/scripts/sync_notion_to_ce_enhanced.py index e7b42410..513b44ed 100644 --- a/company-explorer/backend/scripts/sync_notion_to_ce_enhanced.py +++ b/company-explorer/backend/scripts/sync_notion_to_ce_enhanced.py @@ -7,7 +7,7 @@ import logging # /app/backend/scripts/sync.py -> /app sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))) -from backend.database import SessionLocal, Industry, RoboticsCategory, init_db +from backend.database import SessionLocal, Industry, RoboticsCategory, Persona, init_db from dotenv import load_dotenv # Try loading from .env in root if exists @@ -76,6 +76,21 @@ def extract_number(prop): if not prop or "number" not in prop: return None return prop["number"] +def extract_rich_text_to_list(prop): + if not prop or "rich_text" not in prop: return [] + full_text = "".join([t.get("plain_text", "") for t in prop.get("rich_text", [])]) + lines = full_text.split('\n') + cleaned_lines = [] + for line in lines: + line = line.strip() + if not line: continue + if line.startswith("- "): + line = line[2:] + elif line.startswith("• "): + line = line[2:] + cleaned_lines.append(line) + return cleaned_lines + def sync(): logger.info("--- Starting Enhanced Sync ---") @@ -83,6 +98,48 @@ def sync(): init_db() session = SessionLocal() + # --- 4. Sync Personas (NEW) --- + # Sector & Persona Master ID + PERSONAS_DB_ID = "2e288f42-8544-8113-b878-ec99c8a02a6b" + VALID_ARCHETYPES = { + "Wirtschaftlicher Entscheider", + "Operativer Entscheider", + "Infrastruktur-Verantwortlicher", + "Innovations-Treiber" + } + + if PERSONAS_DB_ID: + logger.info(f"Syncing Personas from {PERSONAS_DB_ID}...") + pages = query_all(PERSONAS_DB_ID) + p_count = 0 + + # We assume Personas are cumulative, so we don't delete all first (safer for IDs) + # But we could if we wanted a clean slate. Upsert is better. + + for page in pages: + props = page["properties"] + name = extract_title(props.get("Name")) + + if name not in VALID_ARCHETYPES: + continue + + import json + pains_list = extract_rich_text_to_list(props.get("Pains")) + gains_list = extract_rich_text_to_list(props.get("Gains")) + + persona = session.query(Persona).filter(Persona.name == name).first() + if not persona: + persona = Persona(name=name) + session.add(persona) + + persona.pains = json.dumps(pains_list, ensure_ascii=False) + persona.gains = json.dumps(gains_list, ensure_ascii=False) + + p_count += 1 + + session.commit() + logger.info(f"✅ Synced {p_count} Personas.") + # 2. Sync Categories (Products) cat_db_id = find_db_id("Product Categories") or find_db_id("Products") if cat_db_id: diff --git a/company-explorer/backend/scripts/test_mapping_logic.py b/company-explorer/backend/scripts/test_mapping_logic.py new file mode 100644 index 00000000..5ae6f510 --- /dev/null +++ b/company-explorer/backend/scripts/test_mapping_logic.py @@ -0,0 +1,47 @@ + +import sys +import os + +# Setup Environment +sys.path.append(os.path.join(os.path.dirname(__file__), "../../")) + +from backend.database import SessionLocal, JobRoleMapping, Persona + +def test_mapping(job_title): + db = SessionLocal() + print(f"\n--- Testing Mapping for '{job_title}' ---") + + # 1. Find Role Name via JobRoleMapping + role_name = None + mappings = db.query(JobRoleMapping).all() + for m in mappings: + pattern_clean = m.pattern.replace("%", "").lower() + if pattern_clean in job_title.lower(): + role_name = m.role + print(f" -> Matched Pattern: '{m.pattern}' => Role: '{role_name}'") + break + + if not role_name: + print(" -> No Pattern Matched.") + return + + # 2. Find Persona via Role Name + persona = db.query(Persona).filter(Persona.name == role_name).first() + if persona: + print(f" -> Found Persona ID: {persona.id} (Name: {persona.name})") + else: + print(f" -> ERROR: Persona '{role_name}' not found in DB!") + + db.close() + +if __name__ == "__main__": + test_titles = [ + "Leiter Hauswirtschaft", + "CTO", + "Geschäftsführer", + "Head of Marketing", + "Einkaufsleiter" + ] + + for t in test_titles: + test_mapping(t) diff --git a/company-explorer/backend/scripts/upgrade_schema_v2.py b/company-explorer/backend/scripts/upgrade_schema_v2.py new file mode 100644 index 00000000..7614d5e2 --- /dev/null +++ b/company-explorer/backend/scripts/upgrade_schema_v2.py @@ -0,0 +1,33 @@ + +import sys +import os + +# Add parent directory to path to allow import of backend.database +sys.path.append(os.path.join(os.path.dirname(__file__), "../../")) + +# Import everything to ensure metadata is populated +from backend.database import engine, Base, Company, Contact, Industry, JobRoleMapping, Persona, Signal, EnrichmentData, RoboticsCategory, ImportLog, ReportedMistake, MarketingMatrix + +def migrate(): + print("Migrating Database Schema...") + + try: + # Hacky migration for MarketingMatrix: Drop if exists to enforce new schema + with engine.connect() as con: + print("Dropping old MarketingMatrix table to enforce schema change...") + try: + from sqlalchemy import text + con.execute(text("DROP TABLE IF EXISTS marketing_matrix")) + print("Dropped marketing_matrix.") + except Exception as e: + print(f"Could not drop marketing_matrix: {e}") + + except Exception as e: + print(f"Pre-migration cleanup error: {e}") + + # This creates 'personas' table AND re-creates 'marketing_matrix' + Base.metadata.create_all(bind=engine) + print("Migration complete. 'personas' table created and 'marketing_matrix' refreshed.") + +if __name__ == "__main__": + migrate() diff --git a/connector-superoffice/README.md b/connector-superoffice/README.md index 2c6d904a..cc2d6e7a 100644 --- a/connector-superoffice/README.md +++ b/connector-superoffice/README.md @@ -83,8 +83,18 @@ Der Connector ruft den Company Explorer auf und liefert dabei **Live-Daten** aus } ``` -## 5. Offene To-Dos (Roadmap) +## 5. Offene To-Dos (Roadmap für Produktionsfreigabe) -* [ ] **UDF-Mapping:** Aktuell sind die `ProgId`s (z.B. `SuperOffice:5`) im Code (`worker.py`) hartkodiert. Dies muss in eine Config ausgelagert werden. -* [ ] **Fehlerbehandlung:** Was passiert, wenn der Company Explorer "404 Not Found" meldet? (Aktuell: Log Warning & Skip). -* [ ] **Redis:** Bei sehr hoher Last (>100 Events/Sekunde) sollte die SQLite-Queue durch Redis ersetzt werden. \ No newline at end of file +Um den Connector für den stabilen Betrieb in der Produktivumgebung freizugeben, sind folgende Härtungsmaßnahmen erforderlich: + +* [ ] **Konfigurationshärtung (UDFs & Endpunkte):** + * Alle umgebungsspezifischen Werte (SuperOffice Base URL, Customer ID, **alle UDF ProgIDs** für Vertical, Subject, Intro, Social Proof, etc.) müssen aus dem Code entfernt und über Umgebungsvariablen (`.env`) konfigurierbar gemacht werden. Dies stellt sicher, dass derselbe Container ohne Code-Änderung in DEV und PROD läuft. +* [ ] **Werkzeug zur UDF-ID-Findung:** + * Erstellung eines Python-Skripts (`discover_fields.py`), das die SuperOffice API abfragt und alle verfügbaren UDFs mit ihren `ProgId`s auflistet. Dies vereinfacht die Erstkonfiguration in neuen Umgebungen. +* [ ] **Feiertags-Logik (Autarkie SuperOffice):** + * Erstellung einer dedizierten SuperOffice Y-Tabelle (`y_holidays`) zur Speicherung von Feiertagen. + * Erstellung eines Python-Skripts (`import_holidays_to_so.py`) zur einmaligen und periodischen Befüllung dieser Tabelle. + * Anpassung des SuperOffice CRMScripts, um diese Tabelle vor dem Versand zu prüfen. +* [ ] **Webinterface (Settings -> Job Role Mapping):** Erweiterung des UI zur Darstellung und Verwaltung der neuen Persona-Archetypen und ihrer Mappings. Dies beinhaltet auch eine Überarbeitung der bestehenden Job-Titel-Mappungsansicht, um die Zuordnung zu den Archetypen zu verdeutlichen und ggf. zu editieren. +* [ ] **Skalierung (Optional/Zukunft):** + * Bei sehr hoher Last (>100 Events/Sekunde) sollte die interne SQLite-Queue durch eine performantere Lösung wie Redis ersetzt werden. \ No newline at end of file diff --git a/debug_notion_schema.py b/debug_notion_schema.py new file mode 100644 index 00000000..284253a0 --- /dev/null +++ b/debug_notion_schema.py @@ -0,0 +1,36 @@ + +import requests +import json + +# Notion Config +try: + with open("notion_token.txt", "r") as f: + NOTION_TOKEN = f.read().strip() +except FileNotFoundError: + print("Error: notion_token.txt not found.") + exit(1) + +NOTION_VERSION = "2022-06-28" +NOTION_API_BASE_URL = "https://api.notion.com/v1" +HEADERS = { + "Authorization": f"Bearer {NOTION_TOKEN}", + "Notion-Version": NOTION_VERSION, + "Content-Type": "application/json", +} + +# DB ID from import_product.py +DB_ID = "2e288f42-8544-8113-b878-ec99c8a02a6b" + +def get_db_properties(database_id): + url = f"{NOTION_API_BASE_URL}/databases/{database_id}" + try: + response = requests.get(url, headers=HEADERS) + response.raise_for_status() + return response.json().get("properties") + except Exception as e: + print(f"Error: {e}") + return None + +props = get_db_properties(DB_ID) +if props: + print(json.dumps(props, indent=2)) diff --git a/readme.md b/readme.md index 9e7c0db4..ddf6f4c1 100644 --- a/readme.md +++ b/readme.md @@ -63,7 +63,14 @@ VII. DIE STRATEGIE-SCHMIEDE (GTM Architect) ├── gtm-architect/ (React Frontend) └── server.cjs (Node.js API-Bridge) -VIII. DAS FUNDAMENT +VIII. MARKETING AUTOMATION CORE (Company Explorer) + └── Backend-Logik für hyper-personalisierte E-Mail-Texte (vertical x persona) + ├── database.py (Neue 'Persona' Tabelle, angepasste 'MarketingMatrix') + ├── scripts/seed_marketing_data.py (Befüllt 'Persona' mit Pains/Gains) + ├── scripts/sync_notion_personas.py (Synchronisiert Personas aus Notion) + └── scripts/generate_matrix.py (Generiert Texte für alle Vertical x Persona Kombinationen) + +IX. DAS FUNDAMENT └── config.py (Einstellungen & Konstanten für ALLE) ``` diff --git a/sync_archetypes_final.py b/sync_archetypes_final.py new file mode 100644 index 00000000..685e50c8 --- /dev/null +++ b/sync_archetypes_final.py @@ -0,0 +1,161 @@ + +import requests +import json +import os + +# --- Configuration --- +try: + with open("notion_token.txt", "r") as f: + NOTION_TOKEN = f.read().strip() +except FileNotFoundError: + print("Error: notion_token.txt not found.") + exit(1) + +NOTION_VERSION = "2022-06-28" +NOTION_API_BASE_URL = "https://api.notion.com/v1" +HEADERS = { + "Authorization": f"Bearer {NOTION_TOKEN}", + "Notion-Version": NOTION_VERSION, + "Content-Type": "application/json", +} + +# DB: Personas / Roles +DB_ID = "30588f42854480c38919e22d74d945ea" + +# --- Data for Archetypes --- +archetypes = [ + { + "name": "Wirtschaftlicher Entscheider", + "pains": [ + "Steigende Personalkosten im Reinigungs- und Servicebereich gefährden Profitabilität.", + "Fachkräftemangel und Schwierigkeiten bei der Stellenbesetzung.", + "Inkonsistente Qualitätsstandards schaden dem Ruf des Hauses.", + "Hoher Managementaufwand für manuelle operative Prozesse." + ], + "gains": [ + "Reduktion operativer Personalkosten um 10-25%.", + "Deutliche Abnahme der Überstunden (bis zu 50%).", + "Sicherstellung konstant hoher Qualitätsstandards.", + "Erhöhung der operativen Effizienz durch präzise Datenanalysen." + ], + "kpis": "Betriebskosten pro Einheit, Gästezufriedenheit (NPS), Mitarbeiterfluktuation.", + "positions": "Direktor, Geschäftsführer, C-Level, Einkaufsleiter." + }, + { + "name": "Operativer Entscheider", + "pains": [ + "Team ist überlastet und gestresst (Gefahr hoher Fluktuation).", + "Zu viele manuelle Routineaufgaben wie Abräumen oder Materialtransport.", + "Mangelnde Personalverfügbarkeit in Stoßzeiten führt zu Engpässen." + ], + "gains": [ + "Signifikante Entlastung des Personals von Routineaufgaben (20-40% Zeitgewinn).", + "Garantierte Reinigungszyklen unabhängig von Personalausfällen.", + "Mehr Zeit für wertschöpfende Aufgaben (Gästebetreuung, Upselling)." + ], + "kpis": "Zeitaufwand für Routineaufgaben, Abdeckungsrate der Zyklen, Servicegeschwindigkeit.", + "positions": "Leiter Housekeeping, F&B Manager, Restaurantleiter, Stationsleitung." + }, + { + "name": "Infrastruktur-Verantwortlicher", + "pains": [ + "Technische Komplexität der Integration in bestehende Infrastruktur (Aufzüge, WLAN).", + "Sorge vor hohen Ausfallzeiten und unplanmäßigen Wartungskosten.", + "Fehlendes internes Fachpersonal für die Wartung autonomer Systeme." + ], + "gains": [ + "Reibungslose Integration (20-30% schnellere Implementierung).", + "Minimierung von Ausfallzeiten um 80-90% durch proaktives Monitoring.", + "Planbare Wartung und transparente Kosten durch feste SLAs." + ], + "kpis": "System-Uptime, Implementierungszeit, Wartungskosten (TCO).", + "positions": "Technischer Leiter, Facility Manager, IT-Leiter." + }, + { + "name": "Innovations-Treiber", + "pains": [ + "Verlust der Wettbewerbsfähigkeit durch veraltete Prozesse.", + "Schwierigkeit das Unternehmen als modernen Arbeitgeber zu positionieren.", + "Statische Informations- und Marketingflächen werden oft ignoriert." + ], + "gains": [ + "Positionierung als Innovationsführer am Markt.", + "Steigerung der Kundeninteraktion um 20-30%.", + "Gewinnung wertvoller Daten zur kontinuierlichen Prozessoptimierung.", + "Erhöhte Attraktivität für junge, technikaffine Talente." + ], + "kpis": "Besucherinteraktionsrate, Anzahl Prozessinnovationen, Modernitäts-Sentiment.", + "positions": "Marketingleiter, Center Manager, CDO, Business Development." + } +] + +# --- Helper Functions --- + +def format_rich_text(text): + return {"rich_text": [{"type": "text", "text": {"content": text}}]} + +def format_title(text): + return {"title": [{"type": "text", "text": {"content": text}}]} + +def find_page(title): + url = f"{NOTION_API_BASE_URL}/databases/{DB_ID}/query" + payload = { + "filter": { + "property": "Role", + "title": {"equals": title} + } + } + resp = requests.post(url, headers=HEADERS, json=payload) + resp.raise_for_status() + results = resp.json().get("results") + return results[0] if results else None + +def create_page(properties): + url = f"{NOTION_API_BASE_URL}/pages" + payload = { + "parent": {"database_id": DB_ID}, + "properties": properties + } + resp = requests.post(url, headers=HEADERS, json=payload) + resp.raise_for_status() + print("Created.") + +def update_page(page_id, properties): + url = f"{NOTION_API_BASE_URL}/pages/{page_id}" + payload = {"properties": properties} + resp = requests.patch(url, headers=HEADERS, json=payload) + resp.raise_for_status() + print("Updated.") + +# --- Main Logic --- + +def main(): + print(f"Syncing {len(archetypes)} Personas to Notion DB {DB_ID}...") + + for p in archetypes: + print(f"Processing '{p['name']}'...") + + pains_text = "\n".join([f"- {item}" for item in p["pains"]]) + gains_text = "\n".join([f"- {item}" for item in p["gains"]]) + + properties = { + "Role": format_title(p["name"]), + "Pains": format_rich_text(pains_text), + "Gains": format_rich_text(gains_text), + "KPIs": format_rich_text(p.get("kpis", "")), + "Typische Positionen": format_rich_text(p.get("positions", "")) + } + + existing_page = find_page(p["name"]) + + if existing_page: + print(f" -> Found existing page {existing_page['id']}. Updating...") + update_page(existing_page["id"], properties) + else: + print(" -> Creating new page...") + create_page(properties) + + print("Sync complete.") + +if __name__ == "__main__": + main() diff --git a/sync_personas_to_notion.py b/sync_personas_to_notion.py new file mode 100644 index 00000000..1fd5bdd9 --- /dev/null +++ b/sync_personas_to_notion.py @@ -0,0 +1,150 @@ + +import requests +import json + +# --- Configuration --- +try: + with open("notion_token.txt", "r") as f: + NOTION_TOKEN = f.read().strip() +except FileNotFoundError: + print("Error: notion_token.txt not found.") + exit(1) + +NOTION_VERSION = "2022-06-28" +NOTION_API_BASE_URL = "https://api.notion.com/v1" +HEADERS = { + "Authorization": f"Bearer {NOTION_TOKEN}", + "Notion-Version": NOTION_VERSION, + "Content-Type": "application/json", +} + +# DB: Sector & Persona Master +DB_ID = "2e288f42-8544-8113-b878-ec99c8a02a6b" + +# --- Data --- +archetypes = [ + { + "name": "Wirtschaftlicher Entscheider", + "pains": [ + "Steigende operative Personalkosten und Fachkräftemangel gefährden die Profitabilität.", + "Unklare Amortisation (ROI) und Risiko von Fehlinvestitionen bei neuen Technologien.", + "Intransparente Folgekosten (TCO) und schwierige Budgetplanung über die Lebensdauer." + ], + "gains": [ + "Nachweisbare Senkung der operativen Kosten (10-25%) und schnelle Amortisation.", + "Sicherung der Wettbewerbsfähigkeit durch effizientere Kostenstrukturen.", + "Volle Transparenz und Planbarkeit durch klare Service-Modelle (SLAs)." + ] + }, + { + "name": "Operativer Entscheider", + "pains": [ + "Personelle Unterbesetzung führt zu Überstunden, Stress und Qualitätsmängeln.", + "Wiederkehrende Routineaufgaben binden wertvolle Fachkräfte-Ressourcen.", + "Schwierigkeit, gleichbleibend hohe Standards (Hygiene/Service) 24/7 zu garantieren." + ], + "gains": [ + "Spürbare Entlastung des Teams von Routineaufgaben (20-40%).", + "Garantierte, gleichbleibend hohe Ausführungsqualität unabhängig von der Tagesform.", + "Stabilisierung der operativen Abläufe und Kompensation von Personalausfällen." + ] + }, + { + "name": "Infrastruktur-Verantwortlicher", + "pains": [ + "Sorge vor komplexer Integration in bestehende IT- und Gebäudeinfrastruktur (WLAN, Türen, Aufzüge).", + "Risiko von hohen Ausfallzeiten und aufwändiger Fehlerbehebung ohne internes Spezialwissen.", + "Unklare Wartungsaufwände und Angst vor 'Insel-Lösungen' ohne Schnittstellen." + ], + "gains": [ + "Reibungslose, fachgerechte Integration durch Experten-Support (Plug & Play).", + "Maximale Betriebssicherheit durch proaktives Monitoring und schnelle Reaktionszeiten.", + "Zentrales Management und volle Transparenz über Systemstatus und Wartungsbedarf." + ] + }, + { + "name": "Innovations-Treiber", + "pains": [ + "Verlust der Attraktivität als moderner Arbeitgeber oder Dienstleister (Veraltetes Image).", + "Fehlende 'Wow-Effekte' in der Kundeninteraktion und mangelnde Differenzierung vom Wettbewerb.", + "Verpasste Chancen durch fehlende Datengrundlage für digitale Optimierungen." + ], + "gains": [ + "Positionierung als Innovationsführer und Steigerung der Markenattraktivität.", + "Schaffung einzigartiger Kundenerlebnisse durch sichtbare High-Tech-Lösungen.", + "Gewinnung wertvoller Daten zur kontinuierlichen Prozessoptimierung und Digitalisierung." + ] + } +] + +# --- Helper Functions --- + +def format_rich_text(text): + return {"rich_text": [{"type": "text", "text": {"content": text}}]} + +def format_title(text): + return {"title": [{"type": "text", "text": {"content": text}}]} + +def find_page(title): + url = f"{NOTION_API_BASE_URL}/databases/{DB_ID}/query" + payload = { + "filter": { + "property": "Name", + "title": {"equals": title} + } + } + resp = requests.post(url, headers=HEADERS, json=payload) + resp.raise_for_status() + results = resp.json().get("results") + return results[0] if results else None + +def create_page(properties): + url = f"{NOTION_API_BASE_URL}/pages" + payload = { + "parent": {"database_id": DB_ID}, + "properties": properties + } + resp = requests.post(url, headers=HEADERS, json=payload) + resp.raise_for_status() + print("Created.") + +def update_page(page_id, properties): + url = f"{NOTION_API_BASE_URL}/pages/{page_id}" + payload = {"properties": properties} + resp = requests.patch(url, headers=HEADERS, json=payload) + resp.raise_for_status() + print("Updated.") + +# --- Main Sync Loop --- + +def main(): + print(f"Syncing {len(archetypes)} Personas to Notion DB {DB_ID}...") + + for p in archetypes: + print(f"Processing '{p['name']}'...") + + # Format Pains/Gains as lists with bullets for Notion Text field + pains_text = "\n".join([f"- {item}" for item in p["pains"]]) + gains_text = "\n".join([f"- {item}" for item in p["gains"]]) + + properties = { + "Name": format_title(p["name"]), + "Pains": format_rich_text(pains_text), + "Gains": format_rich_text(gains_text), + # Optional: Add a tag to distinguish them from Sectors if needed? + # Currently just relying on Name uniqueness. + } + + existing_page = find_page(p["name"]) + + if existing_page: + print(f" -> Found existing page {existing_page['id']}. Updating...") + update_page(existing_page["id"], properties) + else: + print(" -> Creating new page...") + create_page(properties) + + print("Sync complete.") + +if __name__ == "__main__": + main()