[2ff88f42] feat(GTM-Engine): Add final test and trigger scripts
This commit is contained in:
58
company-explorer/backend/scripts/inspect_sqlite_native.py
Normal file
58
company-explorer/backend/scripts/inspect_sqlite_native.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import sqlite3
|
||||
import json
|
||||
|
||||
DB_PATH = "/app/companies_v3_fixed_2.db"
|
||||
|
||||
def inspect(name_part):
|
||||
try:
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cursor = conn.cursor()
|
||||
|
||||
print(f"Searching for '{name_part}' in {DB_PATH}...")
|
||||
cursor.execute("SELECT id, name, website, industry_ai, calculated_metric_value, standardized_metric_value, ai_opener, ai_opener_secondary FROM companies WHERE name LIKE ?", (f'%{name_part}%',))
|
||||
companies = cursor.fetchall()
|
||||
|
||||
if not companies:
|
||||
print("No hits.")
|
||||
return
|
||||
|
||||
for c in companies:
|
||||
cid, name, website, industry, metric, std_metric, opener_primary, opener_secondary = c
|
||||
print("\n" + "="*40)
|
||||
print(f"🏢 {name} (ID: {cid})")
|
||||
print(f" Vertical: {industry}")
|
||||
print(f" Website: {website}")
|
||||
print(f" Metric: {metric} (Std: {std_metric})")
|
||||
print(f" Opener (Primary): {opener_primary}")
|
||||
print(f" Opener (Secondary): {opener_secondary}")
|
||||
|
||||
# Fetch Enrichment Data
|
||||
cursor.execute("SELECT source_type, content FROM enrichment_data WHERE company_id = ?", (cid,))
|
||||
rows = cursor.fetchall()
|
||||
print("\n 📚 Enrichment Data:")
|
||||
for r in rows:
|
||||
stype, content_raw = r
|
||||
print(f" - {stype}")
|
||||
try:
|
||||
content = json.loads(content_raw)
|
||||
if stype == "website_scrape":
|
||||
summary = content.get("summary", "")
|
||||
raw = content.get("text", "")
|
||||
print(f" > Summary: {summary[:150]}...")
|
||||
print(f" > Raw Length: {len(raw)}")
|
||||
if len(raw) > 500:
|
||||
print(f" > Raw Snippet: {raw[:300]}...")
|
||||
elif stype == "wikipedia":
|
||||
print(f" > URL: {content.get('url')}")
|
||||
intro = content.get("intro_text", "") or content.get("full_text", "")
|
||||
print(f" > Intro: {str(intro)[:150]}...")
|
||||
except:
|
||||
print(" > (Content not valid JSON)")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
finally:
|
||||
if conn: conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
inspect("Therme Erding")
|
||||
@@ -294,15 +294,18 @@ Gib NUR den finalen Satz aus. Keine Anführungszeichen.
|
||||
# 1. Load Definitions
|
||||
industries = self._load_industry_definitions(db)
|
||||
industry_defs = [{"name": i.name, "description": i.description} for i in industries]
|
||||
logger.debug(f"Loaded {len(industries)} industry definitions.")
|
||||
|
||||
# 2. Get Content (Website)
|
||||
website_content, _ = self._get_website_content_and_url(company)
|
||||
|
||||
if not website_content:
|
||||
logger.warning(f"No website content for {company.name}. Skipping classification.")
|
||||
if not website_content or len(website_content) < 100:
|
||||
logger.warning(f"No or insufficient website content for {company.name} (Length: {len(website_content) if website_content else 0}). Skipping classification.")
|
||||
return company
|
||||
logger.debug(f"Website content length for classification: {len(website_content)}")
|
||||
|
||||
# 3. Classify Industry
|
||||
logger.info(f"Running LLM classification prompt for {company.name}...")
|
||||
suggested_industry_name = self._run_llm_classification_prompt(website_content, company.name, industry_defs)
|
||||
logger.info(f"AI suggests industry: {suggested_industry_name}")
|
||||
|
||||
@@ -311,32 +314,47 @@ Gib NUR den finalen Satz aus. Keine Anführungszeichen.
|
||||
|
||||
if matched_industry:
|
||||
company.industry_ai = matched_industry.name
|
||||
logger.info(f"Matched company to industry: {matched_industry.name}")
|
||||
|
||||
# --- Generate PRIMARY Opener (Infrastructure/Cleaning) ---
|
||||
logger.info(f"Generating PRIMARY opener for {company.name}...")
|
||||
op_prim = self._generate_marketing_opener(
|
||||
company.name, website_content, matched_industry.name, matched_industry.pains, "primary"
|
||||
)
|
||||
if op_prim:
|
||||
company.ai_opener = op_prim
|
||||
logger.info(f"Opener (Primary): {op_prim}")
|
||||
logger.info(f"Opener (Primary) generated and set.")
|
||||
else:
|
||||
logger.warning(f"Failed to generate PRIMARY opener for {company.name}.")
|
||||
|
||||
# --- Generate SECONDARY Opener (Service/Logistics) ---
|
||||
# Only if relevant (could be optimized, but generating always is safer for "Dual Strategy")
|
||||
logger.info(f"Generating SECONDARY opener for {company.name}...")
|
||||
op_sec = self._generate_marketing_opener(
|
||||
company.name, website_content, matched_industry.name, matched_industry.pains, "secondary"
|
||||
)
|
||||
if op_sec:
|
||||
company.ai_opener_secondary = op_sec
|
||||
logger.info(f"Opener (Secondary): {op_sec}")
|
||||
logger.info(f"Opener (Secondary) generated and set.")
|
||||
else:
|
||||
logger.warning(f"Failed to generate SECONDARY opener for {company.name}.")
|
||||
|
||||
else:
|
||||
company.industry_ai = "Others"
|
||||
|
||||
logger.warning(f"No specific industry matched for {company.name}. Set to 'Others'.")
|
||||
|
||||
# 5. Extract Metrics (Cascade)
|
||||
if matched_industry:
|
||||
self.extract_metrics_for_industry(company, db, matched_industry)
|
||||
logger.info(f"Extracting metrics for {company.name} and industry {matched_industry.name}...")
|
||||
try:
|
||||
self.extract_metrics_for_industry(company, db, matched_industry)
|
||||
logger.info(f"Metric extraction completed for {company.name}.")
|
||||
except Exception as e:
|
||||
logger.error(f"Error during metric extraction for {company.name}: {e}", exc_info=True)
|
||||
else:
|
||||
logger.warning(f"Skipping metric extraction for {company.name} as no specific industry was matched.")
|
||||
|
||||
company.last_classification_at = datetime.utcnow()
|
||||
db.commit()
|
||||
logger.info(f"Classification and enrichment for {company.name} completed and committed.")
|
||||
|
||||
return company
|
||||
Reference in New Issue
Block a user