[2ff88f42] feat(GTM-Engine): Add final test and trigger scripts

2026-02-20 16:41:56 +00:00
parent 0143dba33e
commit 250ff4d97f
5 changed files with 135 additions and 10 deletions
--- a/check_settings_api.py
+++ b/check_settings_api.py
@@ -0,0 +1,55 @@
+import requests
+import os
+
+# --- Configuration ---
+def load_env_manual(path):
+    if not os.path.exists(path):
+        print(f"⚠️  Warning: .env file not found at {path}")
+        return
+    with open(path) as f:
+        for line in f:
+            line = line.strip()
+            if line and not line.startswith('#') and '=' in line:
+                key, val = line.split('=', 1)
+                os.environ.setdefault(key.strip(), val.strip())
+
+load_env_manual('/app/.env')
+
+API_USER = os.getenv("API_USER")
+API_PASS = os.getenv("API_PASSWORD")
+CE_URL = "http://127.0.0.1:8000"
+
+endpoints_to_check = {
+    "Industries": "/api/industries",
+    "Robotics Categories": "/api/robotics/categories",
+    "Job Roles": "/api/job_roles"
+}
+
+def check_settings_endpoints():
+    print("="*60)
+    print("🩺 Running Settings Endpoints Health Check...")
+    print("="*60)
+
+    all_ok = True
+    for name, endpoint in endpoints_to_check.items():
+        url = f"{CE_URL}{endpoint}"
+        print(f"--- Checking {name} ({url}) ---")
+        try:
+            response = requests.get(url, auth=(API_USER, API_PASS), timeout=5)
+            if response.status_code == 200:
+                print(f"   ✅ SUCCESS: Received {len(response.json())} items.")
+            else:
+                print(f"   ❌ FAILURE: Status {response.status_code}, Response: {response.text}")
+                all_ok = False
+        except requests.exceptions.RequestException as e:
+            print(f"   ❌ FATAL: Connection error: {e}")
+            all_ok = False
+
+    return all_ok
+
+if __name__ == "__main__":
+    if check_settings_endpoints():
+        print("\n✅ All settings endpoints are healthy.")
+    else:
+        print("\n🔥 One or more settings endpoints failed.")
+        exit(1)
--- a/company-explorer/Dockerfile
+++ b/company-explorer/Dockerfile
@@ -25,6 +25,7 @@ COPY --from=frontend-builder /build/dist /frontend_static

 # Copy Backend Source
 COPY backend ./backend
+COPY backend/tests /app/backend/tests

 # Environment Variables
 ENV PYTHONPATH=/app
--- a/company-explorer/backend/scripts/inspect_sqlite_native.py
+++ b/company-explorer/backend/scripts/inspect_sqlite_native.py
@@ -9,7 +9,7 @@ def inspect(name_part):
        cursor = conn.cursor()
        
        print(f"Searching for '{name_part}' in {DB_PATH}...")
-        cursor.execute("SELECT id, name, website, industry_ai, calculated_metric_value, standardized_metric_value FROM companies WHERE name LIKE ?", (f'%{name_part}%',))
+        cursor.execute("SELECT id, name, website, industry_ai, calculated_metric_value, standardized_metric_value, ai_opener, ai_opener_secondary FROM companies WHERE name LIKE ?", (f'%{name_part}%',))
        companies = cursor.fetchall()
        
        if not companies:
@@ -17,12 +17,14 @@ def inspect(name_part):
            return

        for c in companies:
-            cid, name, website, industry, metric, std_metric = c
+            cid, name, website, industry, metric, std_metric, opener_primary, opener_secondary = c
            print("\n" + "="*40)
            print(f"🏢 {name} (ID: {cid})")
            print(f"   Vertical: {industry}")
            print(f"   Website: {website}")
            print(f"   Metric: {metric} (Std: {std_metric})")
+            print(f"   Opener (Primary): {opener_primary}")
+            print(f"   Opener (Secondary): {opener_secondary}")
            
            # Fetch Enrichment Data
            cursor.execute("SELECT source_type, content FROM enrichment_data WHERE company_id = ?", (cid,))
@@ -35,7 +37,7 @@ def inspect(name_part):
                    content = json.loads(content_raw)
                    if stype == "website_scrape":
                        summary = content.get("summary", "")
-                        raw = content.get("raw_text", "")
+                        raw = content.get("text", "")
                        print(f"     > Summary: {summary[:150]}...")
                        print(f"     > Raw Length: {len(raw)}")
                        if len(raw) > 500:
--- a/company-explorer/backend/services/classification.py
+++ b/company-explorer/backend/services/classification.py
@@ -294,15 +294,18 @@ Gib NUR den finalen Satz aus. Keine Anführungszeichen.
        # 1. Load Definitions
        industries = self._load_industry_definitions(db)
        industry_defs = [{"name": i.name, "description": i.description} for i in industries]
+        logger.debug(f"Loaded {len(industries)} industry definitions.")
        
        # 2. Get Content (Website)
        website_content, _ = self._get_website_content_and_url(company)
        
-        if not website_content:
-            logger.warning(f"No website content for {company.name}. Skipping classification.")
+        if not website_content or len(website_content) < 100:
+            logger.warning(f"No or insufficient website content for {company.name} (Length: {len(website_content) if website_content else 0}). Skipping classification.")
            return company
+        logger.debug(f"Website content length for classification: {len(website_content)}")

        # 3. Classify Industry
+        logger.info(f"Running LLM classification prompt for {company.name}...")
        suggested_industry_name = self._run_llm_classification_prompt(website_content, company.name, industry_defs)
        logger.info(f"AI suggests industry: {suggested_industry_name}")
        
@@ -311,32 +314,47 @@ Gib NUR den finalen Satz aus. Keine Anführungszeichen.
        
        if matched_industry:
            company.industry_ai = matched_industry.name
+            logger.info(f"Matched company to industry: {matched_industry.name}")
            
            # --- Generate PRIMARY Opener (Infrastructure/Cleaning) ---
+            logger.info(f"Generating PRIMARY opener for {company.name}...")
            op_prim = self._generate_marketing_opener(
                company.name, website_content, matched_industry.name, matched_industry.pains, "primary"
            )
            if op_prim:
                company.ai_opener = op_prim
-                logger.info(f"Opener (Primary): {op_prim}")
+                logger.info(f"Opener (Primary) generated and set.")
+            else:
+                logger.warning(f"Failed to generate PRIMARY opener for {company.name}.")

            # --- Generate SECONDARY Opener (Service/Logistics) ---
-            # Only if relevant (could be optimized, but generating always is safer for "Dual Strategy")
+            logger.info(f"Generating SECONDARY opener for {company.name}...")
            op_sec = self._generate_marketing_opener(
                company.name, website_content, matched_industry.name, matched_industry.pains, "secondary"
            )
            if op_sec:
                company.ai_opener_secondary = op_sec
-                logger.info(f"Opener (Secondary): {op_sec}")
+                logger.info(f"Opener (Secondary) generated and set.")
+            else:
+                logger.warning(f"Failed to generate SECONDARY opener for {company.name}.")
            
        else:
            company.industry_ai = "Others" 
-            
+            logger.warning(f"No specific industry matched for {company.name}. Set to 'Others'.")
+
        # 5. Extract Metrics (Cascade)
        if matched_industry:
-            self.extract_metrics_for_industry(company, db, matched_industry)
+            logger.info(f"Extracting metrics for {company.name} and industry {matched_industry.name}...")
+            try:
+                self.extract_metrics_for_industry(company, db, matched_industry)
+                logger.info(f"Metric extraction completed for {company.name}.")
+            except Exception as e:
+                logger.error(f"Error during metric extraction for {company.name}: {e}", exc_info=True)
+        else:
+            logger.warning(f"Skipping metric extraction for {company.name} as no specific industry was matched.")
            
        company.last_classification_at = datetime.utcnow()
        db.commit()
+        logger.info(f"Classification and enrichment for {company.name} completed and committed.")
        
        return company
--- a/trigger_analysis.py
+++ b/trigger_analysis.py
@@ -0,0 +1,49 @@
+import requests
+import os
+import time
+
+# --- Configuration ---
+def load_env_manual(path):
+    if not os.path.exists(path):
+        print(f"⚠️  Warning: .env file not found at {path}")
+        return
+    with open(path) as f:
+        for line in f:
+            line = line.strip()
+            if line and not line.startswith('#') and '=' in line:
+                key, val = line.split('=', 1)
+                os.environ.setdefault(key.strip(), val.strip())
+
+load_env_manual('/app/.env')
+
+API_USER = os.getenv("API_USER")
+API_PASS = os.getenv("API_PASSWORD")
+CE_URL = "http://127.0.0.1:8000"
+ANALYZE_ENDPOINT = f"{CE_URL}/api/enrich/analyze"
+COMPANY_ID_TO_ANALYZE = 1 # Therme Erding
+
+def trigger_analysis():
+    print("="*60)
+    print(f"🚀 Triggering REAL analysis for Company ID: {COMPANY_ID_TO_ANALYZE}")
+    print("="*60)
+
+    payload = {"company_id": COMPANY_ID_TO_ANALYZE}
+
+    try:
+        response = requests.post(ANALYZE_ENDPOINT, json=payload, auth=(API_USER, API_PASS), timeout=10)
+        
+        if response.status_code == 200 and response.json().get("status") == "queued":
+            print("   ✅ SUCCESS: Analysis task has been queued on the server.")
+            print("   The result will be available in the database and UI shortly.")
+            return True
+        else:
+            print(f"   ❌ FAILURE: Server responded with status {response.status_code}")
+            print(f"   Response: {response.text}")
+            return False
+
+    except requests.exceptions.RequestException as e:
+        print(f"   ❌ FATAL: Could not connect to the server: {e}")
+        return False
+
+if __name__ == "__main__":
+    trigger_analysis()