import json import requests import sys # --- CONFIGURATION --- JSON_FILE = 'analysis_robo-planet.de-4.json' NOTION_TOKEN = "" # Will be loaded from file HEADERS = { "Authorization": f"Bearer {NOTION_TOKEN}", "Content-Type": "application/json", "Notion-Version": "2022-06-28", } # --- DATABASE IDs --- COMPANIES_DB_ID = "2e688f42-8544-8158-8673-d8b1e3eca5b5" CANONICAL_PRODUCTS_DB_ID = "2f088f42-8544-81d5-bec7-d9189f3bacd4" PORTFOLIO_DB_ID = "2e688f42-8544-81df-8fcc-f1d7f8745e00" LANDMINES_DB_ID = "" # Optional: Add if you want to re-import landmines REFERENCES_DB_ID = "" # Optional: Add if you want to re-import references # --- API HELPERS --- def query_db(db_id, filter_payload=None): """Retrieves all pages from a Notion database, with optional filter.""" url = f"https://api.notion.com/v1/databases/{db_id}/query" all_pages = [] start_cursor = None while True: payload = {} if start_cursor: payload["start_cursor"] = start_cursor if filter_payload: payload["filter"] = filter_payload response = requests.post(url, headers=HEADERS, json=payload) if response.status_code != 200: print(f"Error querying DB {db_id}: {response.status_code}") print(response.json()) return None data = response.json() all_pages.extend(data["results"]) if data.get("has_more"): start_cursor = data["next_cursor"] else: break return all_pages def create_page(db_id, properties): """Creates a new page in a Notion database.""" url = "https://api.notion.com/v1/pages" payload = {"parent": {"database_id": db_id}, "properties": properties} response = requests.post(url, headers=HEADERS, data=json.dumps(payload)) if response.status_code == 200: return response.json() else: print(f"Error creating page in DB {db_id}: {response.status_code}") print(response.json()) return None # --- STATE AWARENESS HELPERS --- def get_existing_items_map(db_id, name_property="Name"): """Fetches all items from a DB and returns a map of {name: id}.""" print(f"Fetching existing items from DB {db_id} to build cache...") pages = query_db(db_id) if pages is None: sys.exit(f"Could not fetch items from DB {db_id}. Aborting.") item_map = {} for page in pages: try: item_name = page["properties"][name_property]["title"][0]["text"]["content"] item_map[item_name] = page["id"] except (KeyError, IndexError): continue print(f" - Found {len(item_map)} existing items.") return item_map def get_existing_portfolio_links(db_id): """Fetches all portfolio links and returns a set of (company_id, product_id) tuples.""" print(f"Fetching existing portfolio links from DB {db_id}...") pages = query_db(db_id) if pages is None: sys.exit(f"Could not fetch portfolio links from DB {db_id}. Aborting.") link_set = set() for page in pages: try: company_id = page["properties"]["Related Competitor"]["relation"][0]["id"] product_id = page["properties"]["Canonical Product"]["relation"][0]["id"] link_set.add((company_id, product_id)) except (KeyError, IndexError): continue print(f" - Found {len(link_set)} existing portfolio links.") return link_set # --- MAIN LOGIC --- def main(): global NOTION_TOKEN, HEADERS try: with open("notion_token.txt", "r") as f: NOTION_TOKEN = f.read().strip() HEADERS["Authorization"] = f"Bearer {NOTION_TOKEN}" except FileNotFoundError: print("Error: `notion_token.txt` not found.") return # --- Phase 1: State Awareness --- print("\n--- Phase 1: Reading current state from Notion ---") companies_map = get_existing_items_map(COMPANIES_DB_ID) products_map = get_existing_items_map(CANONICAL_PRODUCTS_DB_ID) portfolio_links = get_existing_portfolio_links(PORTFOLIO_DB_ID) # --- Phase 2: Processing JSON --- print("\n--- Phase 2: Processing local JSON file ---") try: with open(JSON_FILE, 'r') as f: data = json.load(f) except FileNotFoundError: print(f"Error: `{JSON_FILE}` not found.") return for analysis in data.get('analyses', []): competitor = analysis['competitor'] competitor_name = competitor['name'] print(f"\nProcessing competitor: {competitor_name}") # --- Phase 3: "Upsert" Company --- if competitor_name not in companies_map: print(f" - Company '{competitor_name}' not found. Creating...") props = {"Name": {"title": [{"text": {"content": competitor_name}}]}} new_company = create_page(COMPANIES_DB_ID, props) if new_company: companies_map[competitor_name] = new_company["id"] else: print(f" - Failed to create company '{competitor_name}'. Skipping.") continue company_id = companies_map[competitor_name] # --- Phase 4: "Upsert" Products and Portfolio Links --- for product in analysis.get('portfolio', []): product_name = product['product'] # Upsert Canonical Product if product_name not in products_map: print(f" - Product '{product_name}' not found. Creating canonical product...") props = {"Name": {"title": [{"text": {"content": product_name}}]}} new_product = create_page(CANONICAL_PRODUCTS_DB_ID, props) if new_product: products_map[product_name] = new_product["id"] else: print(f" - Failed to create canonical product '{product_name}'. Skipping.") continue product_id = products_map[product_name] # Check and create Portfolio Link if (company_id, product_id) not in portfolio_links: print(f" - Portfolio link for '{competitor_name}' -> '{product_name}' not found. Creating...") portfolio_props = { "Product": {"title": [{"text": {"content": f"{competitor_name} - {product_name}"}}]}, "Related Competitor": {"relation": [{"id": company_id}]}, "Canonical Product": {"relation": [{"id": product_id}]} } new_portfolio_entry = create_page(PORTFOLIO_DB_ID, portfolio_props) if new_portfolio_entry: portfolio_links.add((company_id, product_id)) # Add to cache to prevent re-creation in same run else: print(f" - Portfolio link for '{competitor_name}' -> '{product_name}' already exists. Skipping.") print("\n--- ✅ Import script finished ---") if __name__ == "__main__": main()