[2ff88f42] feat(GTM-Engine): Implement Dual Opener Strategy & Harden Analysis

Completed the GTM engine setup:\n\n- Implemented 'Dual Opener' generation (Primary/Secondary) in ClassificationService.\n- Migrated DB to support two opener fields.\n- Updated API and Frontend to handle and display both openers.\n- Fixed bug creating duplicate website_scrape entries.\n- Hardened metric extraction by improving the LLM prompt and adding content length checks.
This commit is contained in:
2026-02-20 15:38:06 +00:00
parent e4d738990a
commit 23d0c695d6
12 changed files with 434 additions and 19 deletions

View File

@@ -75,10 +75,12 @@ Source Text:
{text_content[:6000]}
Return a JSON object with:
- "raw_value": The number found (e.g. 352 or 352.0). If text says "352 Betten", extract 352. If not found, null.
- "raw_value": The number found (e.g. 352 or 352.0). If not found, null.
- "raw_unit": The unit found (e.g. "Betten", "").
- "proof_text": A short quote from the text proving this value.
**IMPORTANT:** Ignore obvious year numbers (like 1900-2026) if other, more plausible metric values are present in the text. Focus on the target metric.
JSON ONLY.
"""
try:
@@ -159,8 +161,8 @@ JSON ONLY.
try:
args = (company,) if source_name == 'website' else (db, company.id) if source_name == 'wikipedia' else (company, search_term)
content_text, current_source_url = content_loader(*args)
if not content_text:
logger.info(f"No content for {source_name}.")
if not content_text or len(content_text) < 100:
logger.info(f"No or insufficient content for {source_name} (Length: {len(content_text) if content_text else 0}).")
continue
llm_result = self._run_llm_metric_extraction_prompt(content_text, search_term, industry_name)
if llm_result:
@@ -224,13 +226,68 @@ JSON ONLY.
company.metric_confidence_reason = metrics["metric_confidence_reason"]
company.last_classification_at = datetime.utcnow()
db.commit()
# REMOVED: db.commit() - This should be handled by the calling function.
return company
def reevaluate_wikipedia_metric(self, company: Company, db: Session, industry: Industry) -> Company:
logger.info(f"Re-evaluating metric for {company.name}...")
return self.extract_metrics_for_industry(company, db, industry)
def _generate_marketing_opener(self, company_name: str, website_text: str, industry_name: str, industry_pains: str, focus_mode: str = "primary") -> Optional[str]:
"""
Generates the 'First Sentence' (Opener).
focus_mode: 'primary' (Standard/Cleaning) or 'secondary' (Service/Logistics).
"""
if not industry_pains:
industry_pains = "Effizienz und Personalmangel" # Fallback
# Dynamic Focus Instruction
if focus_mode == "secondary":
focus_instruction = """
- **FOKUS: SEKUNDÄR-PROZESSE (Logistik/Service/Versorgung).**
- Ignoriere das Thema Reinigung. Konzentriere dich auf **Abläufe, Materialfluss, Entlastung von Fachkräften** oder **Gäste-Service**.
- Der Satz muss einen operativen Entscheider (z.B. Pflegedienstleitung, Produktionsleiter) abholen."""
else:
focus_instruction = """
- **FOKUS: PRIMÄR-PROZESSE (Infrastruktur/Sauberkeit/Sicherheit).**
- Konzentriere dich auf Anforderungen an das Facility Management, Hygiene, Außenwirkung oder Arbeitssicherheit.
- Der Satz muss einen Infrastruktur-Entscheider (z.B. FM-Leiter, Geschäftsführer) abholen."""
prompt = f"""
Du bist ein exzellenter B2B-Stratege und Texter.
Deine Aufgabe ist es, einen hochpersonalisierten Einleitungssatz für eine E-Mail an ein potenzielles Kundenunternehmen zu formulieren.
--- KONTEXT ---
Zielunternehmen: {company_name}
Branche: {industry_name}
Operative Herausforderung (Pain): "{industry_pains}"
Webseiten-Kontext:
{website_text[:2500]}
--- Denkprozess & Stilvorgaben ---
1. **Analysiere den Kontext:** Verstehe das Kerngeschäft.
2. **Identifiziere den Hebel:** Was ist der Erfolgsfaktor in Bezug auf den FOKUS?
3. **Formuliere den Satz (ca. 20-35 Wörter):**
- Wähle einen eleganten, aktiven Einstieg.
- Verbinde die **Tätigkeit** mit dem **Hebel** und den **Konsequenzen**.
- **WICHTIG:** Formuliere als positive Beobachtung über eine Kernkompetenz.
- **VERMEIDE:** Konkrete Zahlen.
- Verwende den Firmennamen: {company_name}.
{focus_instruction}
--- Deine Ausgabe ---
Gib NUR den finalen Satz aus. Keine Anführungszeichen.
"""
try:
response = call_gemini_flash(prompt)
if response:
return response.strip().strip('"')
return None
except Exception as e:
logger.error(f"Opener Generation Error: {e}")
return None
def classify_company_potential(self, company: Company, db: Session) -> Company:
logger.info(f"Starting classification for {company.name}...")
@@ -249,12 +306,29 @@ JSON ONLY.
suggested_industry_name = self._run_llm_classification_prompt(website_content, company.name, industry_defs)
logger.info(f"AI suggests industry: {suggested_industry_name}")
# 4. Update Company
# Match back to DB object
# 4. Update Company & Generate Openers
matched_industry = next((i for i in industries if i.name == suggested_industry_name), None)
if matched_industry:
company.industry_ai = matched_industry.name
# --- Generate PRIMARY Opener (Infrastructure/Cleaning) ---
op_prim = self._generate_marketing_opener(
company.name, website_content, matched_industry.name, matched_industry.pains, "primary"
)
if op_prim:
company.ai_opener = op_prim
logger.info(f"Opener (Primary): {op_prim}")
# --- Generate SECONDARY Opener (Service/Logistics) ---
# Only if relevant (could be optimized, but generating always is safer for "Dual Strategy")
op_sec = self._generate_marketing_opener(
company.name, website_content, matched_industry.name, matched_industry.pains, "secondary"
)
if op_sec:
company.ai_opener_secondary = op_sec
logger.info(f"Opener (Secondary): {op_sec}")
else:
company.industry_ai = "Others"