[2f988f42] fix(company-explorer): Implement robust quantitative potential and atomic opener generation\n\n- Refactored ClassificationService for two-stage metric extraction (direct area and proxy).- Enhanced MetricParser for targeted value matching and robust number parsing.- Implemented persona-specific 'Atomic Opener' generation using segmented pains.- Fixed logging configuration and Pydantic response models.- Added dedicated debugging script and updated documentation (GEMINI.md, MIGRATION_PLAN.md).

This commit is contained in:
2026-02-21 08:01:07 +00:00
parent 62a924a168
commit 45acbeefb9
13 changed files with 666 additions and 534 deletions

View File

@@ -0,0 +1,72 @@
import os
import sys
import argparse
import logging
# Add the backend directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from backend.database import get_db, Company
from backend.services.classification import ClassificationService
from backend.lib.logging_setup import setup_logging
# --- CONFIGURATION ---
# Setup logging to be very verbose for this script
setup_logging()
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
logger = logging.getLogger(__name__)
def run_debug_analysis(company_id: int):
"""
Runs the full classification and enrichment process for a single company
in the foreground and prints detailed results.
"""
logger.info(f"--- Starting Interactive Debug for Company ID: {company_id} ---")
db_session = next(get_db())
try:
# 1. Fetch the company
company = db_session.query(Company).filter(Company.id == company_id).first()
if not company:
logger.error(f"Company with ID {company_id} not found.")
return
logger.info(f"Found Company: {company.name}")
# --- PRE-ANALYSIS STATE ---
print("\n--- METRICS BEFORE ---")
print(f"Calculated: {company.calculated_metric_value} {company.calculated_metric_unit}")
print(f"Standardized: {company.standardized_metric_value} {company.standardized_metric_unit}")
print("----------------------\n")
# 2. Instantiate the service
classifier = ClassificationService()
# 3. RUN THE CORE LOGIC
# This will now print all the detailed logs we added
updated_company = classifier.classify_company_potential(company, db_session)
# --- POST-ANALYSIS STATE ---
print("\n--- METRICS AFTER ---")
print(f"Industry (AI): {updated_company.industry_ai}")
print(f"Metric Source: {updated_company.metric_source}")
print(f"Proof Text: {updated_company.metric_proof_text}")
print(f"Calculated: {updated_company.calculated_metric_value} {updated_company.calculated_metric_unit}")
print(f"Standardized: {updated_company.standardized_metric_value} {updated_company.standardized_metric_unit}")
print(f"\nOpener 1 (Infra): {updated_company.ai_opener}")
print(f"Opener 2 (Ops): {updated_company.ai_opener_secondary}")
print("---------------------")
logger.info(f"--- Interactive Debug Finished for Company ID: {company_id} ---")
finally:
db_session.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run a single company analysis for debugging.")
parser.add_argument("--id", type=int, default=1, help="The ID of the company to analyze.")
args = parser.parse_args()
run_debug_analysis(args.id)

View File

@@ -0,0 +1,67 @@
import requests
import os
import time
import argparse
import sys
import logging
# Add the backend directory to the Python path for relative imports to work
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
# --- Configuration ---
def load_env_manual(path):
if not os.path.exists(path):
# print(f"⚠️ Warning: .env file not found at {path}") # Suppress for cleaner output in container
return
with open(path) as f:
for line in f:
line = line.strip()
if line and not line.startswith('#') and '=' in line:
key, val = line.split('=', 1)
os.environ.setdefault(key.strip(), val.strip())
# Load .env (assuming it's in /app) - this needs to be run from /app or adjusted
# For docker-compose exec from project root, /app is the container's WORKDIR
load_env_manual('/app/.env')
API_USER = os.getenv("API_USER")
API_PASS = os.getenv("API_PASSWORD")
# When run INSIDE the container, the service is reachable via localhost
CE_URL = "http://localhost:8000"
ANALYZE_ENDPOINT = f"{CE_URL}/api/enrich/analyze"
def trigger_analysis(company_id: int):
print("="*60)
print(f"🚀 Triggering REAL analysis for Company ID: {company_id}")
print("="*60)
payload = {"company_id": company_id}
try:
# Added logging for API user/pass (debug only, remove in prod)
logger.debug(f"API Call to {ANALYZE_ENDPOINT} with user {API_USER}")
response = requests.post(ANALYZE_ENDPOINT, json=payload, auth=(API_USER, API_PASS), timeout=30) # Increased timeout
if response.status_code == 200 and response.json().get("status") == "queued":
print(" ✅ SUCCESS: Analysis task has been queued on the server.")
print(" The result will be available in the database and UI shortly.")
return True
else:
print(f" ❌ FAILURE: Server responded with status {response.status_code}")
print(f" Response: {response.text}")
return False
except requests.exceptions.RequestException as e:
print(f" ❌ FATAL: Could not connect to the server: {e}")
return False
if __name__ == "__main__":
# Add a basic logger to the script itself for clearer output
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
parser = argparse.ArgumentParser(description="Trigger Company Explorer Analysis Task")
parser.add_argument("--company-id", type=int, required=True, help="ID of the company to analyze")
args = parser.parse_args()
trigger_analysis(args.company_id)