From 91ea02820b1f46f617a26b302f333481a730d683 Mon Sep 17 00:00:00 2001
From: Floke <floke.com@gmail.com>
Date: Fri, 20 Feb 2026 16:41:56 +0000
Subject: [PATCH] [2ff88f42] feat(GTM-Engine): Add final test and trigger
 scripts

---
 check_settings_api.py                         | 55 +++++++++++++++++++
 company-explorer/Dockerfile                   |  1 +
 .../backend/scripts/inspect_sqlite_native.py  |  8 ++-
 .../backend/services/classification.py        | 32 ++++++++---
 trigger_analysis.py                           | 49 +++++++++++++++++
 5 files changed, 135 insertions(+), 10 deletions(-)
 create mode 100644 check_settings_api.py
 rename inspect_sqlite_native.py => company-explorer/backend/scripts/inspect_sqlite_native.py (82%)
 create mode 100644 trigger_analysis.py

diff --git a/check_settings_api.py b/check_settings_api.py
new file mode 100644
index 00000000..c6ac6fbd
--- /dev/null
+++ b/check_settings_api.py
@@ -0,0 +1,55 @@
+import requests
+import os
+
+# --- Configuration ---
+def load_env_manual(path):
+    if not os.path.exists(path):
+        print(f"⚠️  Warning: .env file not found at {path}")
+        return
+    with open(path) as f:
+        for line in f:
+            line = line.strip()
+            if line and not line.startswith('#') and '=' in line:
+                key, val = line.split('=', 1)
+                os.environ.setdefault(key.strip(), val.strip())
+
+load_env_manual('/app/.env')
+
+API_USER = os.getenv("API_USER")
+API_PASS = os.getenv("API_PASSWORD")
+CE_URL = "http://127.0.0.1:8000"
+
+endpoints_to_check = {
+    "Industries": "/api/industries",
+    "Robotics Categories": "/api/robotics/categories",
+    "Job Roles": "/api/job_roles"
+}
+
+def check_settings_endpoints():
+    print("="*60)
+    print("🩺 Running Settings Endpoints Health Check...")
+    print("="*60)
+
+    all_ok = True
+    for name, endpoint in endpoints_to_check.items():
+        url = f"{CE_URL}{endpoint}"
+        print(f"--- Checking {name} ({url}) ---")
+        try:
+            response = requests.get(url, auth=(API_USER, API_PASS), timeout=5)
+            if response.status_code == 200:
+                print(f"   ✅ SUCCESS: Received {len(response.json())} items.")
+            else:
+                print(f"   ❌ FAILURE: Status {response.status_code}, Response: {response.text}")
+                all_ok = False
+        except requests.exceptions.RequestException as e:
+            print(f"   ❌ FATAL: Connection error: {e}")
+            all_ok = False
+
+    return all_ok
+
+if __name__ == "__main__":
+    if check_settings_endpoints():
+        print("\n✅ All settings endpoints are healthy.")
+    else:
+        print("\n🔥 One or more settings endpoints failed.")
+        exit(1)
diff --git a/company-explorer/Dockerfile b/company-explorer/Dockerfile
index 024c89ea..ee4e17e9 100644
--- a/company-explorer/Dockerfile
+++ b/company-explorer/Dockerfile
@@ -25,6 +25,7 @@ COPY --from=frontend-builder /build/dist /frontend_static
 
 # Copy Backend Source
 COPY backend ./backend
+COPY backend/tests /app/backend/tests
 
 # Environment Variables
 ENV PYTHONPATH=/app
diff --git a/inspect_sqlite_native.py b/company-explorer/backend/scripts/inspect_sqlite_native.py
similarity index 82%
rename from inspect_sqlite_native.py
rename to company-explorer/backend/scripts/inspect_sqlite_native.py
index a475c349..3e28e0cc 100644
--- a/inspect_sqlite_native.py
+++ b/company-explorer/backend/scripts/inspect_sqlite_native.py
@@ -9,7 +9,7 @@ def inspect(name_part):
         cursor = conn.cursor()
         
         print(f"Searching for '{name_part}' in {DB_PATH}...")
-        cursor.execute("SELECT id, name, website, industry_ai, calculated_metric_value, standardized_metric_value FROM companies WHERE name LIKE ?", (f'%{name_part}%',))
+        cursor.execute("SELECT id, name, website, industry_ai, calculated_metric_value, standardized_metric_value, ai_opener, ai_opener_secondary FROM companies WHERE name LIKE ?", (f'%{name_part}%',))
         companies = cursor.fetchall()
         
         if not companies:
@@ -17,12 +17,14 @@ def inspect(name_part):
             return
 
         for c in companies:
-            cid, name, website, industry, metric, std_metric = c
+            cid, name, website, industry, metric, std_metric, opener_primary, opener_secondary = c
             print("\n" + "="*40)
             print(f"🏢 {name} (ID: {cid})")
             print(f"   Vertical: {industry}")
             print(f"   Website: {website}")
             print(f"   Metric: {metric} (Std: {std_metric})")
+            print(f"   Opener (Primary): {opener_primary}")
+            print(f"   Opener (Secondary): {opener_secondary}")
             
             # Fetch Enrichment Data
             cursor.execute("SELECT source_type, content FROM enrichment_data WHERE company_id = ?", (cid,))
@@ -35,7 +37,7 @@ def inspect(name_part):
                     content = json.loads(content_raw)
                     if stype == "website_scrape":
                         summary = content.get("summary", "")
-                        raw = content.get("raw_text", "")
+                        raw = content.get("text", "")
                         print(f"     > Summary: {summary[:150]}...")
                         print(f"     > Raw Length: {len(raw)}")
                         if len(raw) > 500:
diff --git a/company-explorer/backend/services/classification.py b/company-explorer/backend/services/classification.py
index 7be378b9..7e77ee28 100644
--- a/company-explorer/backend/services/classification.py
+++ b/company-explorer/backend/services/classification.py
@@ -294,15 +294,18 @@ Gib NUR den finalen Satz aus. Keine Anführungszeichen.
         # 1. Load Definitions
         industries = self._load_industry_definitions(db)
         industry_defs = [{"name": i.name, "description": i.description} for i in industries]
+        logger.debug(f"Loaded {len(industries)} industry definitions.")
         
         # 2. Get Content (Website)
         website_content, _ = self._get_website_content_and_url(company)
         
-        if not website_content:
-            logger.warning(f"No website content for {company.name}. Skipping classification.")
+        if not website_content or len(website_content) < 100:
+            logger.warning(f"No or insufficient website content for {company.name} (Length: {len(website_content) if website_content else 0}). Skipping classification.")
             return company
+        logger.debug(f"Website content length for classification: {len(website_content)}")
 
         # 3. Classify Industry
+        logger.info(f"Running LLM classification prompt for {company.name}...")
         suggested_industry_name = self._run_llm_classification_prompt(website_content, company.name, industry_defs)
         logger.info(f"AI suggests industry: {suggested_industry_name}")
         
@@ -311,32 +314,47 @@ Gib NUR den finalen Satz aus. Keine Anführungszeichen.
         
         if matched_industry:
             company.industry_ai = matched_industry.name
+            logger.info(f"Matched company to industry: {matched_industry.name}")
             
             # --- Generate PRIMARY Opener (Infrastructure/Cleaning) ---
+            logger.info(f"Generating PRIMARY opener for {company.name}...")
             op_prim = self._generate_marketing_opener(
                 company.name, website_content, matched_industry.name, matched_industry.pains, "primary"
             )
             if op_prim:
                 company.ai_opener = op_prim
-                logger.info(f"Opener (Primary): {op_prim}")
+                logger.info(f"Opener (Primary) generated and set.")
+            else:
+                logger.warning(f"Failed to generate PRIMARY opener for {company.name}.")
 
             # --- Generate SECONDARY Opener (Service/Logistics) ---
-            # Only if relevant (could be optimized, but generating always is safer for "Dual Strategy")
+            logger.info(f"Generating SECONDARY opener for {company.name}...")
             op_sec = self._generate_marketing_opener(
                 company.name, website_content, matched_industry.name, matched_industry.pains, "secondary"
             )
             if op_sec:
                 company.ai_opener_secondary = op_sec
-                logger.info(f"Opener (Secondary): {op_sec}")
+                logger.info(f"Opener (Secondary) generated and set.")
+            else:
+                logger.warning(f"Failed to generate SECONDARY opener for {company.name}.")
             
         else:
             company.industry_ai = "Others" 
-            
+            logger.warning(f"No specific industry matched for {company.name}. Set to 'Others'.")
+
         # 5. Extract Metrics (Cascade)
         if matched_industry:
-            self.extract_metrics_for_industry(company, db, matched_industry)
+            logger.info(f"Extracting metrics for {company.name} and industry {matched_industry.name}...")
+            try:
+                self.extract_metrics_for_industry(company, db, matched_industry)
+                logger.info(f"Metric extraction completed for {company.name}.")
+            except Exception as e:
+                logger.error(f"Error during metric extraction for {company.name}: {e}", exc_info=True)
+        else:
+            logger.warning(f"Skipping metric extraction for {company.name} as no specific industry was matched.")
             
         company.last_classification_at = datetime.utcnow()
         db.commit()
+        logger.info(f"Classification and enrichment for {company.name} completed and committed.")
         
         return company
\ No newline at end of file
diff --git a/trigger_analysis.py b/trigger_analysis.py
new file mode 100644
index 00000000..5e604449
--- /dev/null
+++ b/trigger_analysis.py
@@ -0,0 +1,49 @@
+import requests
+import os
+import time
+
+# --- Configuration ---
+def load_env_manual(path):
+    if not os.path.exists(path):
+        print(f"⚠️  Warning: .env file not found at {path}")
+        return
+    with open(path) as f:
+        for line in f:
+            line = line.strip()
+            if line and not line.startswith('#') and '=' in line:
+                key, val = line.split('=', 1)
+                os.environ.setdefault(key.strip(), val.strip())
+
+load_env_manual('/app/.env')
+
+API_USER = os.getenv("API_USER")
+API_PASS = os.getenv("API_PASSWORD")
+CE_URL = "http://127.0.0.1:8000"
+ANALYZE_ENDPOINT = f"{CE_URL}/api/enrich/analyze"
+COMPANY_ID_TO_ANALYZE = 1 # Therme Erding
+
+def trigger_analysis():
+    print("="*60)
+    print(f"🚀 Triggering REAL analysis for Company ID: {COMPANY_ID_TO_ANALYZE}")
+    print("="*60)
+
+    payload = {"company_id": COMPANY_ID_TO_ANALYZE}
+
+    try:
+        response = requests.post(ANALYZE_ENDPOINT, json=payload, auth=(API_USER, API_PASS), timeout=10)
+        
+        if response.status_code == 200 and response.json().get("status") == "queued":
+            print("   ✅ SUCCESS: Analysis task has been queued on the server.")
+            print("   The result will be available in the database and UI shortly.")
+            return True
+        else:
+            print(f"   ❌ FAILURE: Server responded with status {response.status_code}")
+            print(f"   Response: {response.text}")
+            return False
+
+    except requests.exceptions.RequestException as e:
+        print(f"   ❌ FATAL: Could not connect to the server: {e}")
+        return False
+
+if __name__ == "__main__":
+    trigger_analysis()