import sys import os import requests import json import logging # Add company-explorer to path (parent of backend) sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))) from backend.database import SessionLocal, Persona, init_db from backend.config import settings # Setup Logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) NOTION_TOKEN_FILE = "/app/notion_token.txt" # Sector & Persona Master DB PERSONAS_DB_ID = "30588f42-8544-80c3-8919-e22d74d945ea" VALID_ARCHETYPES = { "Wirtschaftlicher Entscheider", "Operativer Entscheider", "Infrastruktur-Verantwortlicher", "Innovations-Treiber", "Influencer" } def load_notion_token(): try: with open(NOTION_TOKEN_FILE, "r") as f: return f.read().strip() except FileNotFoundError: logger.error(f"Notion token file not found at {NOTION_TOKEN_FILE}") sys.exit(1) def query_notion_db(token, db_id): url = f"https://api.notion.com/v1/databases/{db_id}/query" headers = { "Authorization": f"Bearer {token}", "Notion-Version": "2022-06-28", "Content-Type": "application/json" } results = [] has_more = True next_cursor = None while has_more: payload = {} if next_cursor: payload["start_cursor"] = next_cursor response = requests.post(url, headers=headers, json=payload) if response.status_code != 200: logger.error(f"Error querying Notion DB {db_id}: {response.text}") break data = response.json() results.extend(data.get("results", [])) has_more = data.get("has_more", False) next_cursor = data.get("next_cursor") return results def extract_title(prop): if not prop: return "" return "".join([t.get("plain_text", "") for t in prop.get("title", [])]) def extract_rich_text(prop): if not prop: return "" return "".join([t.get("plain_text", "") for t in prop.get("rich_text", [])]) def extract_rich_text_to_list(prop): """ Extracts rich text and converts bullet points/newlines into a list of strings. """ if not prop: return [] full_text = "".join([t.get("plain_text", "") for t in prop.get("rich_text", [])]) # Split by newline and clean up bullets lines = full_text.split('\n') cleaned_lines = [] for line in lines: line = line.strip() if not line: continue if line.startswith("- "): line = line[2:] elif line.startswith("• "): line = line[2:] cleaned_lines.append(line) return cleaned_lines def sync_personas(token, session): logger.info("Syncing Personas from Notion...") pages = query_notion_db(token, PERSONAS_DB_ID) count = 0 for page in pages: props = page.get("properties", {}) # The title property is 'Role' in the new DB, not 'Name' name = extract_title(props.get("Role")) if name not in VALID_ARCHETYPES: logger.debug(f"Skipping '{name}' (Not a target Archetype)") continue logger.info(f"Processing Persona: {name}") pains_list = extract_rich_text_to_list(props.get("Pains")) gains_list = extract_rich_text_to_list(props.get("Gains")) description = extract_rich_text(props.get("Rollenbeschreibung")) convincing_arguments = extract_rich_text(props.get("Was ihn überzeugt")) typical_positions = extract_rich_text(props.get("Typische Positionen")) kpis = extract_rich_text(props.get("KPIs")) # Upsert Logic persona = session.query(Persona).filter(Persona.name == name).first() if not persona: persona = Persona(name=name) session.add(persona) logger.info(f" -> Creating new entry") else: logger.info(f" -> Updating existing entry") persona.pains = json.dumps(pains_list, ensure_ascii=False) persona.gains = json.dumps(gains_list, ensure_ascii=False) persona.description = description persona.convincing_arguments = convincing_arguments persona.typical_positions = typical_positions persona.kpis = kpis count += 1 session.commit() logger.info(f"Sync complete. Updated {count} personas.") if __name__ == "__main__": token = load_notion_token() db = SessionLocal() try: sync_personas(token, db) except Exception as e: logger.error(f"Sync failed: {e}", exc_info=True) finally: db.close()