[30388f42] Infrastructure Hardening: Repaired CE/Connector DB schema, fixed frontend styling build, implemented robust echo shield in worker v2.1.1, and integrated Lead Engine into gateway.
This commit is contained in:
149
company-explorer/backend/scripts/sync_notion_personas.py
Normal file
149
company-explorer/backend/scripts/sync_notion_personas.py
Normal file
@@ -0,0 +1,149 @@
|
||||
import sys
|
||||
import os
|
||||
import requests
|
||||
import json
|
||||
import logging
|
||||
|
||||
# Add company-explorer to path (parent of backend)
|
||||
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
|
||||
|
||||
from backend.database import SessionLocal, Persona, init_db
|
||||
from backend.config import settings
|
||||
|
||||
# Setup Logging
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
NOTION_TOKEN_FILE = "/app/notion_token.txt"
|
||||
# Sector & Persona Master DB
|
||||
PERSONAS_DB_ID = "30588f42-8544-80c3-8919-e22d74d945ea"
|
||||
|
||||
VALID_ARCHETYPES = {
|
||||
"Wirtschaftlicher Entscheider",
|
||||
"Operativer Entscheider",
|
||||
"Infrastruktur-Verantwortlicher",
|
||||
"Innovations-Treiber",
|
||||
"Influencer"
|
||||
}
|
||||
|
||||
def load_notion_token():
|
||||
try:
|
||||
with open(NOTION_TOKEN_FILE, "r") as f:
|
||||
return f.read().strip()
|
||||
except FileNotFoundError:
|
||||
logger.error(f"Notion token file not found at {NOTION_TOKEN_FILE}")
|
||||
sys.exit(1)
|
||||
|
||||
def query_notion_db(token, db_id):
|
||||
url = f"https://api.notion.com/v1/databases/{db_id}/query"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Notion-Version": "2022-06-28",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
results = []
|
||||
has_more = True
|
||||
next_cursor = None
|
||||
|
||||
while has_more:
|
||||
payload = {}
|
||||
if next_cursor:
|
||||
payload["start_cursor"] = next_cursor
|
||||
|
||||
response = requests.post(url, headers=headers, json=payload)
|
||||
if response.status_code != 200:
|
||||
logger.error(f"Error querying Notion DB {db_id}: {response.text}")
|
||||
break
|
||||
|
||||
data = response.json()
|
||||
results.extend(data.get("results", []))
|
||||
has_more = data.get("has_more", False)
|
||||
next_cursor = data.get("next_cursor")
|
||||
|
||||
return results
|
||||
|
||||
def extract_title(prop):
|
||||
if not prop: return ""
|
||||
return "".join([t.get("plain_text", "") for t in prop.get("title", [])])
|
||||
|
||||
def extract_rich_text(prop):
|
||||
if not prop: return ""
|
||||
return "".join([t.get("plain_text", "") for t in prop.get("rich_text", [])])
|
||||
|
||||
def extract_rich_text_to_list(prop):
|
||||
"""
|
||||
Extracts rich text and converts bullet points/newlines into a list of strings.
|
||||
"""
|
||||
if not prop: return []
|
||||
full_text = "".join([t.get("plain_text", "") for t in prop.get("rich_text", [])])
|
||||
|
||||
# Split by newline and clean up bullets
|
||||
lines = full_text.split('\n')
|
||||
cleaned_lines = []
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line: continue
|
||||
if line.startswith("- "):
|
||||
line = line[2:]
|
||||
elif line.startswith("• "):
|
||||
line = line[2:]
|
||||
cleaned_lines.append(line)
|
||||
|
||||
return cleaned_lines
|
||||
|
||||
def sync_personas(token, session):
|
||||
logger.info("Syncing Personas from Notion...")
|
||||
|
||||
pages = query_notion_db(token, PERSONAS_DB_ID)
|
||||
count = 0
|
||||
|
||||
for page in pages:
|
||||
props = page.get("properties", {})
|
||||
# The title property is 'Role' in the new DB, not 'Name'
|
||||
name = extract_title(props.get("Role"))
|
||||
|
||||
if name not in VALID_ARCHETYPES:
|
||||
logger.debug(f"Skipping '{name}' (Not a target Archetype)")
|
||||
continue
|
||||
|
||||
logger.info(f"Processing Persona: {name}")
|
||||
|
||||
pains_list = extract_rich_text_to_list(props.get("Pains"))
|
||||
gains_list = extract_rich_text_to_list(props.get("Gains"))
|
||||
|
||||
description = extract_rich_text(props.get("Rollenbeschreibung"))
|
||||
convincing_arguments = extract_rich_text(props.get("Was ihn überzeugt"))
|
||||
typical_positions = extract_rich_text(props.get("Typische Positionen"))
|
||||
kpis = extract_rich_text(props.get("KPIs"))
|
||||
|
||||
# Upsert Logic
|
||||
persona = session.query(Persona).filter(Persona.name == name).first()
|
||||
if not persona:
|
||||
persona = Persona(name=name)
|
||||
session.add(persona)
|
||||
logger.info(f" -> Creating new entry")
|
||||
else:
|
||||
logger.info(f" -> Updating existing entry")
|
||||
|
||||
persona.pains = json.dumps(pains_list, ensure_ascii=False)
|
||||
persona.gains = json.dumps(gains_list, ensure_ascii=False)
|
||||
persona.description = description
|
||||
persona.convincing_arguments = convincing_arguments
|
||||
persona.typical_positions = typical_positions
|
||||
persona.kpis = kpis
|
||||
|
||||
count += 1
|
||||
|
||||
session.commit()
|
||||
logger.info(f"Sync complete. Updated {count} personas.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
token = load_notion_token()
|
||||
db = SessionLocal()
|
||||
|
||||
try:
|
||||
sync_personas(token, db)
|
||||
except Exception as e:
|
||||
logger.error(f"Sync failed: {e}", exc_info=True)
|
||||
finally:
|
||||
db.close()
|
||||
Reference in New Issue
Block a user