[2ff88f42] feat(GTM-Engine): Implement Dual Opener Strategy & Harden Analysis
Completed the GTM engine setup:\n\n- Implemented 'Dual Opener' generation (Primary/Secondary) in ClassificationService.\n- Migrated DB to support two opener fields.\n- Updated API and Frontend to handle and display both openers.\n- Fixed bug creating duplicate website_scrape entries.\n- Hardened metric extraction by improving the LLM prompt and adding content length checks.
This commit is contained in:
58
company-explorer/backend/scripts/inspect_therme.py
Normal file
58
company-explorer/backend/scripts/inspect_therme.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import sys
|
||||
import os
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
# Add backend path
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
|
||||
|
||||
from backend.database import Company, EnrichmentData
|
||||
from backend.config import settings
|
||||
|
||||
def inspect_company(company_name_part):
|
||||
engine = create_engine(settings.DATABASE_URL)
|
||||
SessionLocal = sessionmaker(bind=engine)
|
||||
db = SessionLocal()
|
||||
|
||||
try:
|
||||
print(f"Searching for company containing: '{company_name_part}'...")
|
||||
companies = db.query(Company).filter(Company.name.ilike(f"%{company_name_part}%")).all()
|
||||
|
||||
if not companies:
|
||||
print("❌ No company found.")
|
||||
return
|
||||
|
||||
for company in companies:
|
||||
print("\n" + "="*60)
|
||||
print(f"🏢 COMPANY: {company.name} (ID: {company.id})")
|
||||
print("="*60)
|
||||
print(f"🌐 Website: {company.website}")
|
||||
print(f"🏗️ Industry (AI): {company.industry_ai}")
|
||||
print(f"📊 Metric: {company.calculated_metric_value} {company.calculated_metric_unit} (Std: {company.standardized_metric_value} m²)")
|
||||
print(f"✅ Status: {company.status}")
|
||||
|
||||
# Enrichment Data
|
||||
enrichment = db.query(EnrichmentData).filter(EnrichmentData.company_id == company.id).all()
|
||||
print("\n📚 ENRICHMENT DATA:")
|
||||
for ed in enrichment:
|
||||
print(f" 🔹 Type: {ed.source_type} (Locked: {ed.is_locked})")
|
||||
if ed.source_type == "website_scrape":
|
||||
content = ed.content
|
||||
if isinstance(content, dict):
|
||||
summary = content.get("summary", "No summary")
|
||||
raw_text = content.get("raw_text", "")
|
||||
print(f" 📝 Summary: {str(summary)[:200]}...")
|
||||
print(f" 📄 Raw Text Length: {len(str(raw_text))} chars")
|
||||
elif ed.source_type == "wikipedia":
|
||||
content = ed.content
|
||||
if isinstance(content, dict):
|
||||
print(f" 🔗 Wiki URL: {content.get('url')}")
|
||||
print(f" 📄 Content Snippet: {str(content.get('full_text', ''))[:200]}...")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
inspect_company("Therme Erding")
|
||||
31
company-explorer/backend/scripts/migrate_opener.py
Normal file
31
company-explorer/backend/scripts/migrate_opener.py
Normal file
@@ -0,0 +1,31 @@
|
||||
from sqlalchemy import create_engine, text
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add backend path
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
|
||||
from backend.config import settings
|
||||
|
||||
def migrate():
|
||||
engine = create_engine(settings.DATABASE_URL)
|
||||
with engine.connect() as conn:
|
||||
try:
|
||||
# Check if column exists
|
||||
print("Checking schema...")
|
||||
# SQLite specific pragma
|
||||
result = conn.execute(text("PRAGMA table_info(companies)"))
|
||||
columns = [row[1] for row in result.fetchall()]
|
||||
|
||||
if "ai_opener" in columns:
|
||||
print("Column 'ai_opener' already exists. Skipping.")
|
||||
else:
|
||||
print("Adding column 'ai_opener' to 'companies' table...")
|
||||
conn.execute(text("ALTER TABLE companies ADD COLUMN ai_opener TEXT"))
|
||||
conn.commit()
|
||||
print("✅ Migration successful.")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Migration failed: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
migrate()
|
||||
41
company-explorer/backend/scripts/test_opener_generation.py
Normal file
41
company-explorer/backend/scripts/test_opener_generation.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import sys
|
||||
import os
|
||||
import logging
|
||||
|
||||
# Add backend path
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
|
||||
|
||||
# Mock logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
# Import Service
|
||||
from backend.services.classification import ClassificationService
|
||||
|
||||
def test_opener_generation():
|
||||
service = ClassificationService()
|
||||
|
||||
print("\n--- TEST: Therme Erding (Primary Focus: Hygiene) ---")
|
||||
op_prim = service._generate_marketing_opener(
|
||||
company_name="Therme Erding",
|
||||
website_text="Größte Therme der Welt, 35 Saunen, Rutschenparadies Galaxy, Wellenbad. Täglich tausende Besucher.",
|
||||
industry_name="Leisure - Wet & Spa",
|
||||
industry_pains="Rutschgefahr und Hygiene",
|
||||
focus_mode="primary"
|
||||
)
|
||||
print(f"Primary Opener: {op_prim}")
|
||||
|
||||
print("\n--- TEST: Dachser Logistik (Secondary Focus: Process) ---")
|
||||
op_sec = service._generate_marketing_opener(
|
||||
company_name="Dachser SE",
|
||||
website_text="Globaler Logistikdienstleister, Warehousing, Food Logistics, Air & Sea Logistics. Intelligent Logistics.",
|
||||
industry_name="Logistics - Warehouse",
|
||||
industry_pains="Effizienz und Sicherheit",
|
||||
focus_mode="secondary"
|
||||
)
|
||||
print(f"Secondary Opener: {op_sec}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
test_opener_generation()
|
||||
except Exception as e:
|
||||
print(f"Test Failed (likely due to missing env/deps): {e}")
|
||||
Reference in New Issue
Block a user