Completed the GTM engine setup:\n\n- Implemented 'Dual Opener' generation (Primary/Secondary) in ClassificationService.\n- Migrated DB to support two opener fields.\n- Updated API and Frontend to handle and display both openers.\n- Fixed bug creating duplicate website_scrape entries.\n- Hardened metric extraction by improving the LLM prompt and adding content length checks.
57 lines
2.2 KiB
Python
57 lines
2.2 KiB
Python
import sqlite3
|
|
import json
|
|
|
|
DB_PATH = "/app/companies_v3_fixed_2.db"
|
|
|
|
def inspect(name_part):
|
|
try:
|
|
conn = sqlite3.connect(DB_PATH)
|
|
cursor = conn.cursor()
|
|
|
|
print(f"Searching for '{name_part}' in {DB_PATH}...")
|
|
cursor.execute("SELECT id, name, website, industry_ai, calculated_metric_value, standardized_metric_value FROM companies WHERE name LIKE ?", (f'%{name_part}%',))
|
|
companies = cursor.fetchall()
|
|
|
|
if not companies:
|
|
print("No hits.")
|
|
return
|
|
|
|
for c in companies:
|
|
cid, name, website, industry, metric, std_metric = c
|
|
print("\n" + "="*40)
|
|
print(f"🏢 {name} (ID: {cid})")
|
|
print(f" Vertical: {industry}")
|
|
print(f" Website: {website}")
|
|
print(f" Metric: {metric} (Std: {std_metric})")
|
|
|
|
# Fetch Enrichment Data
|
|
cursor.execute("SELECT source_type, content FROM enrichment_data WHERE company_id = ?", (cid,))
|
|
rows = cursor.fetchall()
|
|
print("\n 📚 Enrichment Data:")
|
|
for r in rows:
|
|
stype, content_raw = r
|
|
print(f" - {stype}")
|
|
try:
|
|
content = json.loads(content_raw)
|
|
if stype == "website_scrape":
|
|
summary = content.get("summary", "")
|
|
raw = content.get("raw_text", "")
|
|
print(f" > Summary: {summary[:150]}...")
|
|
print(f" > Raw Length: {len(raw)}")
|
|
if len(raw) > 500:
|
|
print(f" > Raw Snippet: {raw[:300]}...")
|
|
elif stype == "wikipedia":
|
|
print(f" > URL: {content.get('url')}")
|
|
intro = content.get("intro_text", "") or content.get("full_text", "")
|
|
print(f" > Intro: {str(intro)[:150]}...")
|
|
except:
|
|
print(" > (Content not valid JSON)")
|
|
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
finally:
|
|
if conn: conn.close()
|
|
|
|
if __name__ == "__main__":
|
|
inspect("Therme Erding")
|