131 lines
4.1 KiB
Python
131 lines
4.1 KiB
Python
import requests
|
|
import os
|
|
import sqlite3
|
|
import json
|
|
|
|
# --- Configuration ---
|
|
BASE_URL = "http://192.168.178.6:8090/ce/api"
|
|
API_USER = os.getenv("COMPANY_EXPLORER_API_USER", "admin")
|
|
API_PASSWORD = os.getenv("COMPANY_EXPLORER_API_PASSWORD", "gemini")
|
|
DB_PATH = "/home/node/clawd/repos/brancheneinstufung2/company_explorer_local.db"
|
|
|
|
def fetch_all_companies_from_api():
|
|
"""Fetches all companies from the Company Explorer API."""
|
|
print("Fetching all companies from Company Explorer API...")
|
|
url = f"{BASE_URL}/companies"
|
|
all_companies = []
|
|
page = 1
|
|
|
|
while True:
|
|
try:
|
|
params = {'page': page, 'per_page': 50}
|
|
response = requests.get(
|
|
url,
|
|
auth=(API_USER, API_PASSWORD),
|
|
params=params,
|
|
timeout=20
|
|
)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
companies_on_page = data.get("items", [])
|
|
if not companies_on_page:
|
|
break
|
|
|
|
all_companies.extend(companies_on_page)
|
|
print(f"Fetched page {page} with {len(companies_on_page)} companies.")
|
|
|
|
if len(all_companies) >= data.get("total", 0):
|
|
break
|
|
|
|
page += 1
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
print(f"Error fetching companies from API: {e}")
|
|
return None
|
|
|
|
print(f"Total companies fetched: {len(all_companies)}")
|
|
return all_companies
|
|
|
|
def setup_database():
|
|
"""Creates the SQLite database and the companies table."""
|
|
print(f"Setting up database at: {DB_PATH}")
|
|
if os.path.exists(DB_PATH):
|
|
os.remove(DB_PATH)
|
|
print("Removed existing database file.")
|
|
|
|
conn = sqlite3.connect(DB_PATH)
|
|
cursor = conn.cursor()
|
|
|
|
# Define a flexible schema to hold the key fields
|
|
cursor.execute("""
|
|
CREATE TABLE companies (
|
|
id INTEGER PRIMARY KEY,
|
|
name TEXT,
|
|
industry_ai TEXT,
|
|
status TEXT,
|
|
city TEXT,
|
|
country TEXT,
|
|
website TEXT,
|
|
calculated_metric_name TEXT,
|
|
calculated_metric_value TEXT,
|
|
calculated_metric_unit TEXT,
|
|
full_json TEXT
|
|
)
|
|
""")
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
print("Database and table 'companies' created successfully.")
|
|
|
|
def populate_database(companies):
|
|
"""Populates the database with company data."""
|
|
if not companies:
|
|
print("No companies to populate.")
|
|
return
|
|
|
|
print("Populating database...")
|
|
conn = sqlite3.connect(DB_PATH)
|
|
cursor = conn.cursor()
|
|
|
|
for company in companies:
|
|
# Extract metrics safely
|
|
metrics = company.get('calculated_metrics', [])
|
|
metric_name = metrics[0].get('name') if metrics else None
|
|
metric_value = metrics[0].get('value') if metrics else None
|
|
metric_unit = metrics[0].get('unit') if metrics else None
|
|
|
|
cursor.execute("""
|
|
INSERT INTO companies (
|
|
id, name, industry_ai, status, city, country, website,
|
|
calculated_metric_name, calculated_metric_value, calculated_metric_unit,
|
|
full_json
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
company.get('id'),
|
|
company.get('name'),
|
|
company.get('industry_ai'),
|
|
company.get('status'),
|
|
company.get('city'),
|
|
company.get('country'),
|
|
company.get('website'),
|
|
metric_name,
|
|
metric_value,
|
|
metric_unit,
|
|
json.dumps(company) # Store the full object for future flexibility
|
|
))
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
print(f"Successfully inserted {len(companies)} records into the database.")
|
|
|
|
if __name__ == "__main__":
|
|
all_companies = fetch_all_companies_from_api()
|
|
if all_companies is not None:
|
|
setup_database()
|
|
populate_database(all_companies)
|
|
print("\nSync process finished successfully.")
|
|
print(f"Database is ready at: {DB_PATH}")
|
|
else:
|
|
print("\nSync process failed due to API errors.")
|