[2ff88f42] feat(GTM-Engine): Implement Dual Opener Strategy & Harden Analysis

Completed the GTM engine setup:\n\n- Implemented 'Dual Opener' generation (Primary/Secondary) in ClassificationService.\n- Migrated DB to support two opener fields.\n- Updated API and Frontend to handle and display both openers.\n- Fixed bug creating duplicate website_scrape entries.\n- Hardened metric extraction by improving the LLM prompt and adding content length checks.
2026-02-20 15:38:06 +00:00
parent e4d738990a
commit 23d0c695d6
12 changed files with 434 additions and 19 deletions
--- a/company-explorer/backend/services/classification.py
+++ b/company-explorer/backend/services/classification.py
@@ -75,10 +75,12 @@ Source Text:
 {text_content[:6000]}

 Return a JSON object with:
- "raw_value": The number found (e.g. 352 or 352.0). If text says "352 Betten", extract 352. If not found, null.
+- "raw_value": The number found (e.g. 352 or 352.0). If not found, null.
 - "raw_unit": The unit found (e.g. "Betten", "m²").
 - "proof_text": A short quote from the text proving this value.

+**IMPORTANT:** Ignore obvious year numbers (like 1900-2026) if other, more plausible metric values are present in the text. Focus on the target metric.
+
 JSON ONLY.
 """
        try:
@@ -159,8 +161,8 @@ JSON ONLY.
            try:
                args = (company,) if source_name == 'website' else (db, company.id) if source_name == 'wikipedia' else (company, search_term)
                content_text, current_source_url = content_loader(*args)
-                if not content_text:
-                    logger.info(f"No content for {source_name}.")
+                if not content_text or len(content_text) < 100:
+                    logger.info(f"No or insufficient content for {source_name} (Length: {len(content_text) if content_text else 0}).")
                    continue
                llm_result = self._run_llm_metric_extraction_prompt(content_text, search_term, industry_name)
                if llm_result:
@@ -224,13 +226,68 @@ JSON ONLY.
        company.metric_confidence_reason = metrics["metric_confidence_reason"]
        
        company.last_classification_at = datetime.utcnow()
-        db.commit()
+        # REMOVED: db.commit() - This should be handled by the calling function.
        return company

    def reevaluate_wikipedia_metric(self, company: Company, db: Session, industry: Industry) -> Company:
        logger.info(f"Re-evaluating metric for {company.name}...")
        return self.extract_metrics_for_industry(company, db, industry)

+    def _generate_marketing_opener(self, company_name: str, website_text: str, industry_name: str, industry_pains: str, focus_mode: str = "primary") -> Optional[str]:
+        """
+        Generates the 'First Sentence' (Opener).
+        focus_mode: 'primary' (Standard/Cleaning) or 'secondary' (Service/Logistics).
+        """
+        if not industry_pains:
+            industry_pains = "Effizienz und Personalmangel" # Fallback
+
+        # Dynamic Focus Instruction
+        if focus_mode == "secondary":
+            focus_instruction = """
+   - **FOKUS: SEKUNDÄR-PROZESSE (Logistik/Service/Versorgung).**
+   - Ignoriere das Thema Reinigung. Konzentriere dich auf **Abläufe, Materialfluss, Entlastung von Fachkräften** oder **Gäste-Service**.
+   - Der Satz muss einen operativen Entscheider (z.B. Pflegedienstleitung, Produktionsleiter) abholen."""
+        else:
+            focus_instruction = """
+   - **FOKUS: PRIMÄR-PROZESSE (Infrastruktur/Sauberkeit/Sicherheit).**
+   - Konzentriere dich auf Anforderungen an das Facility Management, Hygiene, Außenwirkung oder Arbeitssicherheit.
+   - Der Satz muss einen Infrastruktur-Entscheider (z.B. FM-Leiter, Geschäftsführer) abholen."""
+
+        prompt = f"""
+Du bist ein exzellenter B2B-Stratege und Texter.
+Deine Aufgabe ist es, einen hochpersonalisierten Einleitungssatz für eine E-Mail an ein potenzielles Kundenunternehmen zu formulieren.
+
+--- KONTEXT ---
+Zielunternehmen: {company_name}
+Branche: {industry_name}
+Operative Herausforderung (Pain): "{industry_pains}"
+
+Webseiten-Kontext:
+{website_text[:2500]}
+
+--- Denkprozess & Stilvorgaben ---
+1. **Analysiere den Kontext:** Verstehe das Kerngeschäft.
+2. **Identifiziere den Hebel:** Was ist der Erfolgsfaktor in Bezug auf den FOKUS?
+3. **Formuliere den Satz (ca. 20-35 Wörter):**
+   - Wähle einen eleganten, aktiven Einstieg.
+   - Verbinde die **Tätigkeit** mit dem **Hebel** und den **Konsequenzen**.
+   - **WICHTIG:** Formuliere als positive Beobachtung über eine Kernkompetenz.
+   - **VERMEIDE:** Konkrete Zahlen.
+   - Verwende den Firmennamen: {company_name}.
+   {focus_instruction}
+
+--- Deine Ausgabe ---
+Gib NUR den finalen Satz aus. Keine Anführungszeichen.
+"""
+        try:
+            response = call_gemini_flash(prompt)
+            if response:
+                return response.strip().strip('"')
+            return None
+        except Exception as e:
+            logger.error(f"Opener Generation Error: {e}")
+            return None
+
    def classify_company_potential(self, company: Company, db: Session) -> Company:
        logger.info(f"Starting classification for {company.name}...")
        
@@ -249,12 +306,29 @@ JSON ONLY.
        suggested_industry_name = self._run_llm_classification_prompt(website_content, company.name, industry_defs)
        logger.info(f"AI suggests industry: {suggested_industry_name}")
        
-        # 4. Update Company
-        # Match back to DB object
+        # 4. Update Company & Generate Openers
        matched_industry = next((i for i in industries if i.name == suggested_industry_name), None)
        
        if matched_industry:
            company.industry_ai = matched_industry.name
+            
+            # --- Generate PRIMARY Opener (Infrastructure/Cleaning) ---
+            op_prim = self._generate_marketing_opener(
+                company.name, website_content, matched_industry.name, matched_industry.pains, "primary"
+            )
+            if op_prim:
+                company.ai_opener = op_prim
+                logger.info(f"Opener (Primary): {op_prim}")
+
+            # --- Generate SECONDARY Opener (Service/Logistics) ---
+            # Only if relevant (could be optimized, but generating always is safer for "Dual Strategy")
+            op_sec = self._generate_marketing_opener(
+                company.name, website_content, matched_industry.name, matched_industry.pains, "secondary"
+            )
+            if op_sec:
+                company.ai_opener_secondary = op_sec
+                logger.info(f"Opener (Secondary): {op_sec}")
+            
        else:
            company.industry_ai = "Others"