Files
Brancheneinstufung2/import_single_competitor.py
2026-01-29 11:03:21 +00:00

320 lines
13 KiB
Python

import json
import requests
import sys
import argparse
import re
# --- CONFIGURATION ---
NOTION_TOKEN = "" # Will be loaded from file
HEADERS = {
"Authorization": f"Bearer {NOTION_TOKEN}",
"Content-Type": "application/json",
"Notion-Version": "2022-06-28",
}
# --- DATABASE IDs ---
COMPANIES_DB_ID = "2e688f42-8544-8158-8673-d8b1e3eca5b5"
CANONICAL_PRODUCTS_DB_ID = "2f088f42-8544-81d5-bec7-d9189f3bacd4"
PORTFOLIO_DB_ID = "2e688f42-8544-81df-8fcc-f1d7f8745e00"
LANDMINES_DB_ID = "2e688f42-8544-81aa-94f8-d6242be4d0cd"
REFERENCES_DB_ID = "2e688f42-8544-81df-8d83-f4d7f57d8168"
INDUSTRIES_DB_ID = "2ec88f42-8544-8014-ab38-ea664b4c2b81"
# --- API HELPERS ---
def query_db(db_id, filter_payload=None):
"""Retrieves all pages from a Notion database, with optional filter."""
url = f"https://api.notion.com/v1/databases/{db_id}/query"
all_pages = []
start_cursor = None
while True:
payload = {}
if start_cursor:
payload["start_cursor"] = start_cursor
if filter_payload:
payload["filter"] = filter_payload
response = requests.post(url, headers=HEADERS, json=payload)
if response.status_code != 200:
print(f"Error querying DB {db_id}: {response.status_code}")
print(response.json())
return None
data = response.json()
all_pages.extend(data["results"])
if data.get("has_more"):
start_cursor = data["next_cursor"]
else:
break
return all_pages
def create_page(db_id, properties):
"""Creates a new page in a Notion database."""
url = "https://api.notion.com/v1/pages"
payload = {"parent": {"database_id": db_id}, "properties": properties}
response = requests.post(url, headers=HEADERS, data=json.dumps(payload))
if response.status_code == 200:
return response.json()
else:
print(f"Error creating page in DB {db_id}: {response.status_code}")
print(response.json())
return None
def update_page(page_id, properties):
"""Updates properties of an existing page in Notion."""
url = f"https://api.notion.com/v1/pages/{page_id}"
payload = {"properties": properties}
response = requests.patch(url, headers=HEADERS, data=json.dumps(payload))
if response.status_code == 200:
return response.json()
else:
print(f"Error updating page {page_id}: {response.status_code}")
print(response.json())
return None
# --- STATE AWARENESS HELPERS ---
def get_existing_items_map(db_id, name_property="Name"):
"""Fetches all items from a DB and returns a map of {name: id}."""
print(f"Fetching existing items from DB {db_id} to build cache...")
pages = query_db(db_id)
if pages is None:
sys.exit(f"Could not fetch items from DB {db_id}. Aborting.")
item_map = {}
for page in pages:
try:
# Handle cases where title might be empty or malformed
title_list = page["properties"][name_property].get("title", [])
if title_list:
item_name = title_list[0].get("text", {}).get("content", "").strip()
if item_name:
item_map[item_name] = page["id"]
except (KeyError, IndexError):
continue
print(f" - Found {len(item_map)} existing items.")
return item_map
def get_existing_relations(db_id, relation_property_name, target_relation_id_prop_name):
"""Fetches all items from a DB and returns a set of (item_name, related_id) tuples."""
print(f"Fetching existing relations from DB {db_id}...")
pages = query_db(db_id)
if pages is None:
sys.exit(f"Could not fetch relations from DB {db_id}. Aborting.")
relation_set = set()
for page in pages:
try:
item_name = page["properties"]["Name"]["title"][0]["text"]["content"]
related_ids = [rel["id"] for rel in page["properties"][relation_property_name].get("relation", [])]
target_related_ids = [rel["id"] for rel in page["properties"][target_relation_id_prop_name].get("relation", [])]
if related_ids and target_related_ids:
relation_set.add((item_name, related_ids[0], target_related_ids[0]))
except (KeyError, IndexError):
continue
print(f" - Found {len(relation_set)} existing relations.")
return relation_set
def inspect_database(db_id):
"""Retrieves and prints the properties of a specific Notion database."""
print(f"🔍 Inspecting properties for database ID: {db_id}")
url = f"https://api.notion.com/v1/databases/{db_id}"
response = requests.get(url, headers=HEADERS)
if response.status_code != 200:
print(f"Error retrieving database properties: {response.status_code}")
print(response.json())
return
data = response.json()
properties = data.get("properties", {})
if not properties:
print("No properties found for this database.")
return
print("\n--- Database Properties ---")
for prop_name, prop_data in properties.items():
print(f"- Property Name: '{prop_name}'")
print(f" Type: {prop_data.get('type')}\n")
print("---------------------------\n")
# --- MAIN LOGIC ---
def main():
global NOTION_TOKEN, HEADERS
try:
with open("notion_token.txt", "r") as f:
NOTION_TOKEN = f.read().strip()
HEADERS["Authorization"] = f"Bearer {NOTION_TOKEN}"
except FileNotFoundError:
print("Error: `notion_token.txt` not found.")
return
parser = argparse.ArgumentParser(description="Import a single competitor from a JSON analysis file into Notion.")
parser.add_argument('--file', help="Path to the JSON analysis file.")
parser.add_argument('--name', help="Exact name of the competitor to import.")
parser.add_argument('--inspect', help="Database ID to inspect.")
args = parser.parse_args()
if args.inspect:
inspect_database(args.inspect)
return
if not args.file or not args.name:
parser.error("--file and --name are required.")
return
# --- Phase 1: State Awareness ---
print("\n--- Phase 1: Reading current state from Notion ---")
companies_map = get_existing_items_map(COMPANIES_DB_ID)
products_map = get_existing_items_map(CANONICAL_PRODUCTS_DB_ID)
industries_map = get_existing_items_map(INDUSTRIES_DB_ID, name_property="Vertical")
# For relations, we create a unique key to check for existence
existing_landmines = {f'{page["properties"]["Question"]["title"][0]["text"]["content"]}_{page["properties"]["Related Competitor"]["relation"][0]["id"]}' for page in query_db(LANDMINES_DB_ID) if "Question" in page["properties"] and page["properties"]["Question"]["title"] and page["properties"]["Related Competitor"]["relation"]}
print(f" - Found {len(existing_landmines)} existing landmines.")
existing_references = {f'{page["properties"]["Customer"]["title"][0]["text"]["content"]}_{page["properties"]["Related Competitor"]["relation"][0]["id"]}' for page in query_db(REFERENCES_DB_ID) if "Customer" in page["properties"] and page["properties"]["Customer"]["title"] and page["properties"]["Related Competitor"]["relation"]}
print(f" - Found {len(existing_references)} existing references.")
json_file_path = args.file
target_competitor_name = args.name
# --- Phase 2: Processing JSON ---
print(f"\n--- Phase 2: Processing local JSON file: {json_file_path} for {target_competitor_name} ---")
try:
with open(json_file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
except FileNotFoundError:
print(f"Error: `{json_file_path}` not found.")
return
except json.JSONDecodeError as e:
print(f"Error decoding JSON from {json_file_path}: {e}")
return
# Find the correct analysis and reference data for the target competitor
target_analysis = None
for analysis in data.get('analyses', []):
if analysis['competitor']['name'] == target_competitor_name:
target_analysis = analysis
break
# Find references from the separate reference_analysis block
target_references_data = None
if 'reference_analysis' in data:
for ref_block in data.get('reference_analysis', []):
if ref_block.get('competitor_name') == target_competitor_name:
target_references_data = ref_block.get('references', [])
break
target_battlecard = None
if 'battlecards' in data:
for bc in data.get('battlecards', []):
if bc['competitor_name'] == target_competitor_name:
target_battlecard = bc
break
if not target_analysis:
print(f"Error: Competitor '{target_competitor_name}' not found in 'analyses' list in {json_file_path}.")
return
print(f"\nProcessing target competitor: {target_competitor_name}")
# --- Phase 3: "Upsert" Company ---
if target_competitor_name not in companies_map:
print(f" - Company '{target_competitor_name}' not found. Creating...")
props = {"Name": {"title": [{"text": {"content": target_competitor_name}}]}}
new_company = create_page(COMPANIES_DB_ID, props)
if new_company:
companies_map[target_competitor_name] = new_company["id"]
else:
print(f" - Failed to create company '{target_competitor_name}'. Halting.")
return
company_id = companies_map[target_competitor_name]
# --- Phase 4: Create and Link Target Industries ---
print("\n--- Processing Target Industries ---")
target_industry_relation_ids = []
if INDUSTRIES_DB_ID:
for industry_name in target_analysis.get('target_industries', []):
if industry_name not in industries_map:
print(f" - Industry '{industry_name}' not found in Notion DB. Creating...")
props = {"Vertical": {"title": [{"text": {"content": industry_name}}]}}
new_industry = create_page(INDUSTRIES_DB_ID, props)
if new_industry:
industries_map[industry_name] = new_industry["id"]
target_industry_relation_ids.append({"id": new_industry["id"]})
else:
target_industry_relation_ids.append({"id": industries_map[industry_name]})
if target_industry_relation_ids:
print(f" - Linking company to {len(target_analysis.get('target_industries', []))} industries...")
# Format for multi-select is a list of objects with names
multi_select_payload = [{"name": name} for name in target_analysis.get('target_industries', [])]
update_props = {
"Target Industries": {"multi_select": multi_select_payload}
}
update_page(company_id, update_props)
else:
print(" - INDUSTRIES_DB_ID not set. Skipping.")
# --- Phase 5: Import Landmines ---
if target_battlecard and LANDMINES_DB_ID:
print("\n--- Processing Landmines ---")
for landmine in target_battlecard.get('landmine_questions', []):
unique_key = f"{landmine}_{company_id}"
if unique_key not in existing_landmines:
print(f" - Landmine '{landmine}' not found. Creating...")
props = {
"Question": {"title": [{"text": {"content": landmine}}]},
"Related Competitor": {"relation": [{"id": company_id}]}
}
new_landmine = create_page(LANDMINES_DB_ID, props)
if new_landmine:
existing_landmines.add(unique_key)
else:
print(f" - Landmine '{landmine}' already exists for this competitor. Skipping.")
# --- Phase 6: Import References ---
if target_references_data and REFERENCES_DB_ID:
print("\n--- Processing References ---")
for ref in target_references_data:
ref_name = ref.get("name", "Unknown Reference")
unique_key = f"{ref_name}_{company_id}"
if unique_key not in existing_references:
print(f" - Reference '{ref_name}' not found. Creating...")
props = {
"Customer": {"title": [{"text": {"content": ref_name}}]},
"Related Competitor": {"relation": [{"id": company_id}]},
"Quote": {"rich_text": [{"text": {"content": ref.get("testimonial_snippet", "")[:2000]}}]}
}
# Handle Industry as a select property
ref_industry_name = ref.get("industry")
if ref_industry_name:
props["Industry"] = {"select": {"name": ref_industry_name}}
new_ref = create_page(REFERENCES_DB_ID, props)
if new_ref:
existing_references.add(unique_key)
else:
print(f" - Reference '{ref_name}' already exists for this competitor. Skipping.")
print("\n--- ✅ Import script finished ---")
if __name__ == "__main__":
main()