feat: Enhanced CE schema and Notion sync (Pains/Gains)
This commit is contained in:
130
scripts/sync_ce_to_sqlite.py
Normal file
130
scripts/sync_ce_to_sqlite.py
Normal file
@@ -0,0 +1,130 @@
|
||||
import requests
|
||||
import os
|
||||
import sqlite3
|
||||
import json
|
||||
|
||||
# --- Configuration ---
|
||||
BASE_URL = "http://192.168.178.6:8090/ce/api"
|
||||
API_USER = os.getenv("COMPANY_EXPLORER_API_USER", "admin")
|
||||
API_PASSWORD = os.getenv("COMPANY_EXPLORER_API_PASSWORD", "gemini")
|
||||
DB_PATH = "/home/node/clawd/repos/brancheneinstufung2/company_explorer_local.db"
|
||||
|
||||
def fetch_all_companies_from_api():
|
||||
"""Fetches all companies from the Company Explorer API."""
|
||||
print("Fetching all companies from Company Explorer API...")
|
||||
url = f"{BASE_URL}/companies"
|
||||
all_companies = []
|
||||
page = 1
|
||||
|
||||
while True:
|
||||
try:
|
||||
params = {'page': page, 'per_page': 50}
|
||||
response = requests.get(
|
||||
url,
|
||||
auth=(API_USER, API_PASSWORD),
|
||||
params=params,
|
||||
timeout=20
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
companies_on_page = data.get("items", [])
|
||||
if not companies_on_page:
|
||||
break
|
||||
|
||||
all_companies.extend(companies_on_page)
|
||||
print(f"Fetched page {page} with {len(companies_on_page)} companies.")
|
||||
|
||||
if len(all_companies) >= data.get("total", 0):
|
||||
break
|
||||
|
||||
page += 1
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"Error fetching companies from API: {e}")
|
||||
return None
|
||||
|
||||
print(f"Total companies fetched: {len(all_companies)}")
|
||||
return all_companies
|
||||
|
||||
def setup_database():
|
||||
"""Creates the SQLite database and the companies table."""
|
||||
print(f"Setting up database at: {DB_PATH}")
|
||||
if os.path.exists(DB_PATH):
|
||||
os.remove(DB_PATH)
|
||||
print("Removed existing database file.")
|
||||
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Define a flexible schema to hold the key fields
|
||||
cursor.execute("""
|
||||
CREATE TABLE companies (
|
||||
id INTEGER PRIMARY KEY,
|
||||
name TEXT,
|
||||
industry_ai TEXT,
|
||||
status TEXT,
|
||||
city TEXT,
|
||||
country TEXT,
|
||||
website TEXT,
|
||||
calculated_metric_name TEXT,
|
||||
calculated_metric_value TEXT,
|
||||
calculated_metric_unit TEXT,
|
||||
full_json TEXT
|
||||
)
|
||||
""")
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
print("Database and table 'companies' created successfully.")
|
||||
|
||||
def populate_database(companies):
|
||||
"""Populates the database with company data."""
|
||||
if not companies:
|
||||
print("No companies to populate.")
|
||||
return
|
||||
|
||||
print("Populating database...")
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cursor = conn.cursor()
|
||||
|
||||
for company in companies:
|
||||
# Extract metrics safely
|
||||
metrics = company.get('calculated_metrics', [])
|
||||
metric_name = metrics[0].get('name') if metrics else None
|
||||
metric_value = metrics[0].get('value') if metrics else None
|
||||
metric_unit = metrics[0].get('unit') if metrics else None
|
||||
|
||||
cursor.execute("""
|
||||
INSERT INTO companies (
|
||||
id, name, industry_ai, status, city, country, website,
|
||||
calculated_metric_name, calculated_metric_value, calculated_metric_unit,
|
||||
full_json
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
company.get('id'),
|
||||
company.get('name'),
|
||||
company.get('industry_ai'),
|
||||
company.get('status'),
|
||||
company.get('city'),
|
||||
company.get('country'),
|
||||
company.get('website'),
|
||||
metric_name,
|
||||
metric_value,
|
||||
metric_unit,
|
||||
json.dumps(company) # Store the full object for future flexibility
|
||||
))
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
print(f"Successfully inserted {len(companies)} records into the database.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
all_companies = fetch_all_companies_from_api()
|
||||
if all_companies is not None:
|
||||
setup_database()
|
||||
populate_database(all_companies)
|
||||
print("\nSync process finished successfully.")
|
||||
print(f"Database is ready at: {DB_PATH}")
|
||||
else:
|
||||
print("\nSync process failed due to API errors.")
|
||||
Reference in New Issue
Block a user