[2ff88f42] feat(GTM-Engine): Add final test and trigger scripts

This commit is contained in:
2026-02-20 16:41:56 +00:00
parent 0143dba33e
commit 250ff4d97f
5 changed files with 135 additions and 10 deletions

55
check_settings_api.py Normal file
View File

@@ -0,0 +1,55 @@
import requests
import os
# --- Configuration ---
def load_env_manual(path):
if not os.path.exists(path):
print(f"⚠️ Warning: .env file not found at {path}")
return
with open(path) as f:
for line in f:
line = line.strip()
if line and not line.startswith('#') and '=' in line:
key, val = line.split('=', 1)
os.environ.setdefault(key.strip(), val.strip())
load_env_manual('/app/.env')
API_USER = os.getenv("API_USER")
API_PASS = os.getenv("API_PASSWORD")
CE_URL = "http://127.0.0.1:8000"
endpoints_to_check = {
"Industries": "/api/industries",
"Robotics Categories": "/api/robotics/categories",
"Job Roles": "/api/job_roles"
}
def check_settings_endpoints():
print("="*60)
print("🩺 Running Settings Endpoints Health Check...")
print("="*60)
all_ok = True
for name, endpoint in endpoints_to_check.items():
url = f"{CE_URL}{endpoint}"
print(f"--- Checking {name} ({url}) ---")
try:
response = requests.get(url, auth=(API_USER, API_PASS), timeout=5)
if response.status_code == 200:
print(f" ✅ SUCCESS: Received {len(response.json())} items.")
else:
print(f" ❌ FAILURE: Status {response.status_code}, Response: {response.text}")
all_ok = False
except requests.exceptions.RequestException as e:
print(f" ❌ FATAL: Connection error: {e}")
all_ok = False
return all_ok
if __name__ == "__main__":
if check_settings_endpoints():
print("\n✅ All settings endpoints are healthy.")
else:
print("\n🔥 One or more settings endpoints failed.")
exit(1)

View File

@@ -25,6 +25,7 @@ COPY --from=frontend-builder /build/dist /frontend_static
# Copy Backend Source
COPY backend ./backend
COPY backend/tests /app/backend/tests
# Environment Variables
ENV PYTHONPATH=/app

View File

@@ -9,7 +9,7 @@ def inspect(name_part):
cursor = conn.cursor()
print(f"Searching for '{name_part}' in {DB_PATH}...")
cursor.execute("SELECT id, name, website, industry_ai, calculated_metric_value, standardized_metric_value FROM companies WHERE name LIKE ?", (f'%{name_part}%',))
cursor.execute("SELECT id, name, website, industry_ai, calculated_metric_value, standardized_metric_value, ai_opener, ai_opener_secondary FROM companies WHERE name LIKE ?", (f'%{name_part}%',))
companies = cursor.fetchall()
if not companies:
@@ -17,12 +17,14 @@ def inspect(name_part):
return
for c in companies:
cid, name, website, industry, metric, std_metric = c
cid, name, website, industry, metric, std_metric, opener_primary, opener_secondary = c
print("\n" + "="*40)
print(f"🏢 {name} (ID: {cid})")
print(f" Vertical: {industry}")
print(f" Website: {website}")
print(f" Metric: {metric} (Std: {std_metric})")
print(f" Opener (Primary): {opener_primary}")
print(f" Opener (Secondary): {opener_secondary}")
# Fetch Enrichment Data
cursor.execute("SELECT source_type, content FROM enrichment_data WHERE company_id = ?", (cid,))
@@ -35,7 +37,7 @@ def inspect(name_part):
content = json.loads(content_raw)
if stype == "website_scrape":
summary = content.get("summary", "")
raw = content.get("raw_text", "")
raw = content.get("text", "")
print(f" > Summary: {summary[:150]}...")
print(f" > Raw Length: {len(raw)}")
if len(raw) > 500:

View File

@@ -294,15 +294,18 @@ Gib NUR den finalen Satz aus. Keine Anführungszeichen.
# 1. Load Definitions
industries = self._load_industry_definitions(db)
industry_defs = [{"name": i.name, "description": i.description} for i in industries]
logger.debug(f"Loaded {len(industries)} industry definitions.")
# 2. Get Content (Website)
website_content, _ = self._get_website_content_and_url(company)
if not website_content:
logger.warning(f"No website content for {company.name}. Skipping classification.")
if not website_content or len(website_content) < 100:
logger.warning(f"No or insufficient website content for {company.name} (Length: {len(website_content) if website_content else 0}). Skipping classification.")
return company
logger.debug(f"Website content length for classification: {len(website_content)}")
# 3. Classify Industry
logger.info(f"Running LLM classification prompt for {company.name}...")
suggested_industry_name = self._run_llm_classification_prompt(website_content, company.name, industry_defs)
logger.info(f"AI suggests industry: {suggested_industry_name}")
@@ -311,32 +314,47 @@ Gib NUR den finalen Satz aus. Keine Anführungszeichen.
if matched_industry:
company.industry_ai = matched_industry.name
logger.info(f"Matched company to industry: {matched_industry.name}")
# --- Generate PRIMARY Opener (Infrastructure/Cleaning) ---
logger.info(f"Generating PRIMARY opener for {company.name}...")
op_prim = self._generate_marketing_opener(
company.name, website_content, matched_industry.name, matched_industry.pains, "primary"
)
if op_prim:
company.ai_opener = op_prim
logger.info(f"Opener (Primary): {op_prim}")
logger.info(f"Opener (Primary) generated and set.")
else:
logger.warning(f"Failed to generate PRIMARY opener for {company.name}.")
# --- Generate SECONDARY Opener (Service/Logistics) ---
# Only if relevant (could be optimized, but generating always is safer for "Dual Strategy")
logger.info(f"Generating SECONDARY opener for {company.name}...")
op_sec = self._generate_marketing_opener(
company.name, website_content, matched_industry.name, matched_industry.pains, "secondary"
)
if op_sec:
company.ai_opener_secondary = op_sec
logger.info(f"Opener (Secondary): {op_sec}")
logger.info(f"Opener (Secondary) generated and set.")
else:
logger.warning(f"Failed to generate SECONDARY opener for {company.name}.")
else:
company.industry_ai = "Others"
logger.warning(f"No specific industry matched for {company.name}. Set to 'Others'.")
# 5. Extract Metrics (Cascade)
if matched_industry:
self.extract_metrics_for_industry(company, db, matched_industry)
logger.info(f"Extracting metrics for {company.name} and industry {matched_industry.name}...")
try:
self.extract_metrics_for_industry(company, db, matched_industry)
logger.info(f"Metric extraction completed for {company.name}.")
except Exception as e:
logger.error(f"Error during metric extraction for {company.name}: {e}", exc_info=True)
else:
logger.warning(f"Skipping metric extraction for {company.name} as no specific industry was matched.")
company.last_classification_at = datetime.utcnow()
db.commit()
logger.info(f"Classification and enrichment for {company.name} completed and committed.")
return company

49
trigger_analysis.py Normal file
View File

@@ -0,0 +1,49 @@
import requests
import os
import time
# --- Configuration ---
def load_env_manual(path):
if not os.path.exists(path):
print(f"⚠️ Warning: .env file not found at {path}")
return
with open(path) as f:
for line in f:
line = line.strip()
if line and not line.startswith('#') and '=' in line:
key, val = line.split('=', 1)
os.environ.setdefault(key.strip(), val.strip())
load_env_manual('/app/.env')
API_USER = os.getenv("API_USER")
API_PASS = os.getenv("API_PASSWORD")
CE_URL = "http://127.0.0.1:8000"
ANALYZE_ENDPOINT = f"{CE_URL}/api/enrich/analyze"
COMPANY_ID_TO_ANALYZE = 1 # Therme Erding
def trigger_analysis():
print("="*60)
print(f"🚀 Triggering REAL analysis for Company ID: {COMPANY_ID_TO_ANALYZE}")
print("="*60)
payload = {"company_id": COMPANY_ID_TO_ANALYZE}
try:
response = requests.post(ANALYZE_ENDPOINT, json=payload, auth=(API_USER, API_PASS), timeout=10)
if response.status_code == 200 and response.json().get("status") == "queued":
print(" ✅ SUCCESS: Analysis task has been queued on the server.")
print(" The result will be available in the database and UI shortly.")
return True
else:
print(f" ❌ FAILURE: Server responded with status {response.status_code}")
print(f" Response: {response.text}")
return False
except requests.exceptions.RequestException as e:
print(f" ❌ FATAL: Could not connect to the server: {e}")
return False
if __name__ == "__main__":
trigger_analysis()