import requests import os import sqlite3 import json # --- Configuration --- BASE_URL = "http://192.168.178.6:8090/ce/api" API_USER = os.getenv("COMPANY_EXPLORER_API_USER", "admin") API_PASSWORD = os.getenv("COMPANY_EXPLORER_API_PASSWORD", "gemini") DB_PATH = "/home/node/clawd/repos/brancheneinstufung2/company_explorer_local.db" def fetch_all_companies_from_api(): """Fetches all companies from the Company Explorer API.""" print("Fetching all companies from Company Explorer API...") url = f"{BASE_URL}/companies" all_companies = [] page = 1 while True: try: params = {'page': page, 'per_page': 50} response = requests.get( url, auth=(API_USER, API_PASSWORD), params=params, timeout=20 ) response.raise_for_status() data = response.json() companies_on_page = data.get("items", []) if not companies_on_page: break all_companies.extend(companies_on_page) print(f"Fetched page {page} with {len(companies_on_page)} companies.") if len(all_companies) >= data.get("total", 0): break page += 1 except requests.exceptions.RequestException as e: print(f"Error fetching companies from API: {e}") return None print(f"Total companies fetched: {len(all_companies)}") return all_companies def setup_database(): """Creates the SQLite database and the companies table.""" print(f"Setting up database at: {DB_PATH}") if os.path.exists(DB_PATH): os.remove(DB_PATH) print("Removed existing database file.") conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() # Define a flexible schema to hold the key fields cursor.execute(""" CREATE TABLE companies ( id INTEGER PRIMARY KEY, name TEXT, industry_ai TEXT, status TEXT, city TEXT, country TEXT, website TEXT, calculated_metric_name TEXT, calculated_metric_value TEXT, calculated_metric_unit TEXT, full_json TEXT ) """) conn.commit() conn.close() print("Database and table 'companies' created successfully.") def populate_database(companies): """Populates the database with company data.""" if not companies: print("No companies to populate.") return print("Populating database...") conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() for company in companies: # Extract metrics safely metrics = company.get('calculated_metrics', []) metric_name = metrics[0].get('name') if metrics else None metric_value = metrics[0].get('value') if metrics else None metric_unit = metrics[0].get('unit') if metrics else None cursor.execute(""" INSERT INTO companies ( id, name, industry_ai, status, city, country, website, calculated_metric_name, calculated_metric_value, calculated_metric_unit, full_json ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( company.get('id'), company.get('name'), company.get('industry_ai'), company.get('status'), company.get('city'), company.get('country'), company.get('website'), metric_name, metric_value, metric_unit, json.dumps(company) # Store the full object for future flexibility )) conn.commit() conn.close() print(f"Successfully inserted {len(companies)} records into the database.") if __name__ == "__main__": all_companies = fetch_all_companies_from_api() if all_companies is not None: setup_database() populate_database(all_companies) print("\nSync process finished successfully.") print(f"Database is ready at: {DB_PATH}") else: print("\nSync process failed due to API errors.")