Files
Brancheneinstufung2/company-explorer/backend/scripts/sync_notion_personas.py
Floke d64189ef5f [2ff88f42] multiplikation vorbereitet
multiplikation vorbereitet
2026-02-19 20:59:04 +00:00

135 lines
4.0 KiB
Python

import sys
import os
import requests
import json
import logging
# Add company-explorer to path (parent of backend)
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
from backend.database import SessionLocal, Persona, init_db
from backend.config import settings
# Setup Logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
NOTION_TOKEN_FILE = "/app/notion_token.txt"
# Sector & Persona Master DB
PERSONAS_DB_ID = "2e288f42-8544-8113-b878-ec99c8a02a6b"
VALID_ARCHETYPES = {
"Wirtschaftlicher Entscheider",
"Operativer Entscheider",
"Infrastruktur-Verantwortlicher",
"Innovations-Treiber"
}
def load_notion_token():
try:
with open(NOTION_TOKEN_FILE, "r") as f:
return f.read().strip()
except FileNotFoundError:
logger.error(f"Notion token file not found at {NOTION_TOKEN_FILE}")
sys.exit(1)
def query_notion_db(token, db_id):
url = f"https://api.notion.com/v1/databases/{db_id}/query"
headers = {
"Authorization": f"Bearer {token}",
"Notion-Version": "2022-06-28",
"Content-Type": "application/json"
}
results = []
has_more = True
next_cursor = None
while has_more:
payload = {}
if next_cursor:
payload["start_cursor"] = next_cursor
response = requests.post(url, headers=headers, json=payload)
if response.status_code != 200:
logger.error(f"Error querying Notion DB {db_id}: {response.text}")
break
data = response.json()
results.extend(data.get("results", []))
has_more = data.get("has_more", False)
next_cursor = data.get("next_cursor")
return results
def extract_title(prop):
if not prop: return ""
return "".join([t.get("plain_text", "") for t in prop.get("title", [])])
def extract_rich_text_to_list(prop):
"""
Extracts rich text and converts bullet points/newlines into a list of strings.
"""
if not prop: return []
full_text = "".join([t.get("plain_text", "") for t in prop.get("rich_text", [])])
# Split by newline and clean up bullets
lines = full_text.split('\n')
cleaned_lines = []
for line in lines:
line = line.strip()
if not line: continue
if line.startswith("- "):
line = line[2:]
elif line.startswith(""):
line = line[2:]
cleaned_lines.append(line)
return cleaned_lines
def sync_personas(token, session):
logger.info("Syncing Personas from Notion...")
pages = query_notion_db(token, PERSONAS_DB_ID)
count = 0
for page in pages:
props = page.get("properties", {})
name = extract_title(props.get("Name"))
if name not in VALID_ARCHETYPES:
logger.debug(f"Skipping '{name}' (Not a target Archetype)")
continue
logger.info(f"Processing Persona: {name}")
pains_list = extract_rich_text_to_list(props.get("Pains"))
gains_list = extract_rich_text_to_list(props.get("Gains"))
# Upsert Logic
persona = session.query(Persona).filter(Persona.name == name).first()
if not persona:
persona = Persona(name=name)
session.add(persona)
logger.info(f" -> Creating new entry")
else:
logger.info(f" -> Updating existing entry")
persona.pains = json.dumps(pains_list, ensure_ascii=False)
persona.gains = json.dumps(gains_list, ensure_ascii=False)
count += 1
session.commit()
logger.info(f"Sync complete. Updated {count} personas.")
if __name__ == "__main__":
token = load_notion_token()
db = SessionLocal()
try:
sync_personas(token, db)
except Exception as e:
logger.error(f"Sync failed: {e}", exc_info=True)
finally:
db.close()