From 91ea02820b1f46f617a26b302f333481a730d683 Mon Sep 17 00:00:00 2001 From: Floke Date: Fri, 20 Feb 2026 16:41:56 +0000 Subject: [PATCH] [2ff88f42] feat(GTM-Engine): Add final test and trigger scripts --- check_settings_api.py | 55 +++++++++++++++++++ company-explorer/Dockerfile | 1 + .../backend/scripts/inspect_sqlite_native.py | 8 ++- .../backend/services/classification.py | 32 ++++++++--- trigger_analysis.py | 49 +++++++++++++++++ 5 files changed, 135 insertions(+), 10 deletions(-) create mode 100644 check_settings_api.py rename inspect_sqlite_native.py => company-explorer/backend/scripts/inspect_sqlite_native.py (82%) create mode 100644 trigger_analysis.py diff --git a/check_settings_api.py b/check_settings_api.py new file mode 100644 index 00000000..c6ac6fbd --- /dev/null +++ b/check_settings_api.py @@ -0,0 +1,55 @@ +import requests +import os + +# --- Configuration --- +def load_env_manual(path): + if not os.path.exists(path): + print(f"⚠️ Warning: .env file not found at {path}") + return + with open(path) as f: + for line in f: + line = line.strip() + if line and not line.startswith('#') and '=' in line: + key, val = line.split('=', 1) + os.environ.setdefault(key.strip(), val.strip()) + +load_env_manual('/app/.env') + +API_USER = os.getenv("API_USER") +API_PASS = os.getenv("API_PASSWORD") +CE_URL = "http://127.0.0.1:8000" + +endpoints_to_check = { + "Industries": "/api/industries", + "Robotics Categories": "/api/robotics/categories", + "Job Roles": "/api/job_roles" +} + +def check_settings_endpoints(): + print("="*60) + print("🩺 Running Settings Endpoints Health Check...") + print("="*60) + + all_ok = True + for name, endpoint in endpoints_to_check.items(): + url = f"{CE_URL}{endpoint}" + print(f"--- Checking {name} ({url}) ---") + try: + response = requests.get(url, auth=(API_USER, API_PASS), timeout=5) + if response.status_code == 200: + print(f" ✅ SUCCESS: Received {len(response.json())} items.") + else: + print(f" ❌ FAILURE: Status {response.status_code}, Response: {response.text}") + all_ok = False + except requests.exceptions.RequestException as e: + print(f" ❌ FATAL: Connection error: {e}") + all_ok = False + + return all_ok + +if __name__ == "__main__": + if check_settings_endpoints(): + print("\n✅ All settings endpoints are healthy.") + else: + print("\n🔥 One or more settings endpoints failed.") + exit(1) diff --git a/company-explorer/Dockerfile b/company-explorer/Dockerfile index 024c89ea..ee4e17e9 100644 --- a/company-explorer/Dockerfile +++ b/company-explorer/Dockerfile @@ -25,6 +25,7 @@ COPY --from=frontend-builder /build/dist /frontend_static # Copy Backend Source COPY backend ./backend +COPY backend/tests /app/backend/tests # Environment Variables ENV PYTHONPATH=/app diff --git a/inspect_sqlite_native.py b/company-explorer/backend/scripts/inspect_sqlite_native.py similarity index 82% rename from inspect_sqlite_native.py rename to company-explorer/backend/scripts/inspect_sqlite_native.py index a475c349..3e28e0cc 100644 --- a/inspect_sqlite_native.py +++ b/company-explorer/backend/scripts/inspect_sqlite_native.py @@ -9,7 +9,7 @@ def inspect(name_part): cursor = conn.cursor() print(f"Searching for '{name_part}' in {DB_PATH}...") - cursor.execute("SELECT id, name, website, industry_ai, calculated_metric_value, standardized_metric_value FROM companies WHERE name LIKE ?", (f'%{name_part}%',)) + cursor.execute("SELECT id, name, website, industry_ai, calculated_metric_value, standardized_metric_value, ai_opener, ai_opener_secondary FROM companies WHERE name LIKE ?", (f'%{name_part}%',)) companies = cursor.fetchall() if not companies: @@ -17,12 +17,14 @@ def inspect(name_part): return for c in companies: - cid, name, website, industry, metric, std_metric = c + cid, name, website, industry, metric, std_metric, opener_primary, opener_secondary = c print("\n" + "="*40) print(f"🏢 {name} (ID: {cid})") print(f" Vertical: {industry}") print(f" Website: {website}") print(f" Metric: {metric} (Std: {std_metric})") + print(f" Opener (Primary): {opener_primary}") + print(f" Opener (Secondary): {opener_secondary}") # Fetch Enrichment Data cursor.execute("SELECT source_type, content FROM enrichment_data WHERE company_id = ?", (cid,)) @@ -35,7 +37,7 @@ def inspect(name_part): content = json.loads(content_raw) if stype == "website_scrape": summary = content.get("summary", "") - raw = content.get("raw_text", "") + raw = content.get("text", "") print(f" > Summary: {summary[:150]}...") print(f" > Raw Length: {len(raw)}") if len(raw) > 500: diff --git a/company-explorer/backend/services/classification.py b/company-explorer/backend/services/classification.py index 7be378b9..7e77ee28 100644 --- a/company-explorer/backend/services/classification.py +++ b/company-explorer/backend/services/classification.py @@ -294,15 +294,18 @@ Gib NUR den finalen Satz aus. Keine Anführungszeichen. # 1. Load Definitions industries = self._load_industry_definitions(db) industry_defs = [{"name": i.name, "description": i.description} for i in industries] + logger.debug(f"Loaded {len(industries)} industry definitions.") # 2. Get Content (Website) website_content, _ = self._get_website_content_and_url(company) - if not website_content: - logger.warning(f"No website content for {company.name}. Skipping classification.") + if not website_content or len(website_content) < 100: + logger.warning(f"No or insufficient website content for {company.name} (Length: {len(website_content) if website_content else 0}). Skipping classification.") return company + logger.debug(f"Website content length for classification: {len(website_content)}") # 3. Classify Industry + logger.info(f"Running LLM classification prompt for {company.name}...") suggested_industry_name = self._run_llm_classification_prompt(website_content, company.name, industry_defs) logger.info(f"AI suggests industry: {suggested_industry_name}") @@ -311,32 +314,47 @@ Gib NUR den finalen Satz aus. Keine Anführungszeichen. if matched_industry: company.industry_ai = matched_industry.name + logger.info(f"Matched company to industry: {matched_industry.name}") # --- Generate PRIMARY Opener (Infrastructure/Cleaning) --- + logger.info(f"Generating PRIMARY opener for {company.name}...") op_prim = self._generate_marketing_opener( company.name, website_content, matched_industry.name, matched_industry.pains, "primary" ) if op_prim: company.ai_opener = op_prim - logger.info(f"Opener (Primary): {op_prim}") + logger.info(f"Opener (Primary) generated and set.") + else: + logger.warning(f"Failed to generate PRIMARY opener for {company.name}.") # --- Generate SECONDARY Opener (Service/Logistics) --- - # Only if relevant (could be optimized, but generating always is safer for "Dual Strategy") + logger.info(f"Generating SECONDARY opener for {company.name}...") op_sec = self._generate_marketing_opener( company.name, website_content, matched_industry.name, matched_industry.pains, "secondary" ) if op_sec: company.ai_opener_secondary = op_sec - logger.info(f"Opener (Secondary): {op_sec}") + logger.info(f"Opener (Secondary) generated and set.") + else: + logger.warning(f"Failed to generate SECONDARY opener for {company.name}.") else: company.industry_ai = "Others" - + logger.warning(f"No specific industry matched for {company.name}. Set to 'Others'.") + # 5. Extract Metrics (Cascade) if matched_industry: - self.extract_metrics_for_industry(company, db, matched_industry) + logger.info(f"Extracting metrics for {company.name} and industry {matched_industry.name}...") + try: + self.extract_metrics_for_industry(company, db, matched_industry) + logger.info(f"Metric extraction completed for {company.name}.") + except Exception as e: + logger.error(f"Error during metric extraction for {company.name}: {e}", exc_info=True) + else: + logger.warning(f"Skipping metric extraction for {company.name} as no specific industry was matched.") company.last_classification_at = datetime.utcnow() db.commit() + logger.info(f"Classification and enrichment for {company.name} completed and committed.") return company \ No newline at end of file diff --git a/trigger_analysis.py b/trigger_analysis.py new file mode 100644 index 00000000..5e604449 --- /dev/null +++ b/trigger_analysis.py @@ -0,0 +1,49 @@ +import requests +import os +import time + +# --- Configuration --- +def load_env_manual(path): + if not os.path.exists(path): + print(f"⚠️ Warning: .env file not found at {path}") + return + with open(path) as f: + for line in f: + line = line.strip() + if line and not line.startswith('#') and '=' in line: + key, val = line.split('=', 1) + os.environ.setdefault(key.strip(), val.strip()) + +load_env_manual('/app/.env') + +API_USER = os.getenv("API_USER") +API_PASS = os.getenv("API_PASSWORD") +CE_URL = "http://127.0.0.1:8000" +ANALYZE_ENDPOINT = f"{CE_URL}/api/enrich/analyze" +COMPANY_ID_TO_ANALYZE = 1 # Therme Erding + +def trigger_analysis(): + print("="*60) + print(f"🚀 Triggering REAL analysis for Company ID: {COMPANY_ID_TO_ANALYZE}") + print("="*60) + + payload = {"company_id": COMPANY_ID_TO_ANALYZE} + + try: + response = requests.post(ANALYZE_ENDPOINT, json=payload, auth=(API_USER, API_PASS), timeout=10) + + if response.status_code == 200 and response.json().get("status") == "queued": + print(" ✅ SUCCESS: Analysis task has been queued on the server.") + print(" The result will be available in the database and UI shortly.") + return True + else: + print(f" ❌ FAILURE: Server responded with status {response.status_code}") + print(f" Response: {response.text}") + return False + + except requests.exceptions.RequestException as e: + print(f" ❌ FATAL: Could not connect to the server: {e}") + return False + +if __name__ == "__main__": + trigger_analysis()