From 461d9d3bbc23a0de48e78a5a5c0c2e3cfbe41963 Mon Sep 17 00:00:00 2001 From: Floke Date: Mon, 22 Dec 2025 15:54:06 +0000 Subject: [PATCH] feat(market-intel): implement role-based campaign engine and gritty reporting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Implementierung der rollenbasierten Campaign-Engine mit operativem Fokus (Grit). - Integration von Social Proof (Referenzkunden) in die E-Mail-Generierung. - Erweiterung des Deep Tech Audits um gezielte Wettbewerber-Recherche (Technographic Search). - Fix des Lösch-Bugs in der Target-Liste und Optimierung des Frontend-States. - Erweiterung des Markdown-Exports um transparente Proof-Links und Evidenz. - Aktualisierung der Dokumentation in readme.md und market_intel_backend_plan.md. --- general-market-intelligence/App.tsx | 1 + .../components/StepOutreach.tsx | 15 +- .../components/StepReport.tsx | 22 +- .../components/StepReview.tsx | 8 +- general-market-intelligence/server.cjs | 83 ++++ .../services/geminiService.ts | 62 ++- market_intel_backend_plan.md | 28 +- market_intel_orchestrator.py | 400 +++++++++++++++--- readme.md | 2 +- 9 files changed, 545 insertions(+), 76 deletions(-) diff --git a/general-market-intelligence/App.tsx b/general-market-intelligence/App.tsx index 60f82212..dc058623 100644 --- a/general-market-intelligence/App.tsx +++ b/general-market-intelligence/App.tsx @@ -222,6 +222,7 @@ const App: React.FC = () => { language={language} referenceUrl={referenceUrl} onBack={handleBack} + knowledgeBase={productContext} /> )} diff --git a/general-market-intelligence/components/StepOutreach.tsx b/general-market-intelligence/components/StepOutreach.tsx index eb289af3..7a8767d2 100644 --- a/general-market-intelligence/components/StepOutreach.tsx +++ b/general-market-intelligence/components/StepOutreach.tsx @@ -9,17 +9,26 @@ interface StepOutreachProps { language: Language; referenceUrl: string; onBack: () => void; + knowledgeBase?: string; // New prop for pre-loaded context } -export const StepOutreach: React.FC = ({ company, language, referenceUrl, onBack }) => { - const [fileContent, setFileContent] = useState(''); - const [fileName, setFileName] = useState(''); +export const StepOutreach: React.FC = ({ company, language, referenceUrl, onBack, knowledgeBase }) => { + const [fileContent, setFileContent] = useState(knowledgeBase || ''); + const [fileName, setFileName] = useState(knowledgeBase ? 'Knowledge Base from Strategy Step' : ''); const [isProcessing, setIsProcessing] = useState(false); const [isTranslating, setIsTranslating] = useState(false); const [emails, setEmails] = useState([]); const [activeTab, setActiveTab] = useState(0); const [copied, setCopied] = useState(false); + // If knowledgeBase prop changes, update state (useful if it loads late) + React.useEffect(() => { + if (knowledgeBase && !fileContent) { + setFileContent(knowledgeBase); + setFileName('Knowledge Base from Strategy Step'); + } + }, [knowledgeBase]); + const handleFileUpload = (e: React.ChangeEvent) => { const file = e.target.files?.[0]; if (file) { diff --git a/general-market-intelligence/components/StepReport.tsx b/general-market-intelligence/components/StepReport.tsx index 5000bb2e..1058ba1f 100644 --- a/general-market-intelligence/components/StepReport.tsx +++ b/general-market-intelligence/components/StepReport.tsx @@ -46,8 +46,26 @@ export const StepReport: React.FC = ({ results, strategy, onRes const headers = ["Company", "Prio", "Rev/Emp", "Status", ...signalHeaders, "Recommendation"]; const rows = sortedResults.map(r => { - const signalValues = strategy.signals.map(s => r.dynamicAnalysis[s.id]?.value || '-'); - return `| ${r.companyName} | ${r.tier} | ${r.revenue} / ${r.employees} | ${r.status} | ${signalValues.join(" | ")} | ${r.recommendation} |`; + const signalValues = strategy.signals.map(s => { + const data = r.dynamicAnalysis[s.id]; + if (!data) return '-'; + + let content = data.value || '-'; + // Sanitize content pipes + content = content.replace(/\|/g, '\\|'); + + if (data.proof) { + // Sanitize proof pipes and newlines + const safeProof = data.proof.replace(/\|/g, '\\|').replace(/(\r\n|\n|\r)/gm, ' '); + content += `
*Proof: ${safeProof}*`; + } + return content; + }); + + // Helper to sanitize other fields + const safe = (str: string) => (str || '').replace(/\|/g, '\\|').replace(/(\r\n|\n|\r)/gm, ' '); + + return `| ${safe(r.companyName)} | ${r.tier} | ${safe(r.revenue)} / ${safe(r.employees)} | ${r.status} | ${signalValues.join(" | ")} | ${safe(r.recommendation)} |`; }); const content = ` diff --git a/general-market-intelligence/components/StepReview.tsx b/general-market-intelligence/components/StepReview.tsx index 8a8b09a5..ab4262c8 100644 --- a/general-market-intelligence/components/StepReview.tsx +++ b/general-market-intelligence/components/StepReview.tsx @@ -30,14 +30,18 @@ export const StepReview: React.FC = ({ competitors, categorized }; const renderCompetitorList = (comps: Competitor[], category: string) => { - if (!comps || comps.length === 0) { + // Filter out competitors that have been removed from the main list + const activeIds = new Set(competitors.map(c => c.id)); + const activeComps = comps.filter(c => activeIds.has(c.id)); + + if (!activeComps || activeComps.length === 0) { return (
  • Keine {category} Konkurrenten gefunden.
  • ); } - return comps.map((comp) => ( + return activeComps.map((comp) => (
  • diff --git a/general-market-intelligence/server.cjs b/general-market-intelligence/server.cjs index 02aa8819..91fb404a 100644 --- a/general-market-intelligence/server.cjs +++ b/general-market-intelligence/server.cjs @@ -269,6 +269,89 @@ app.post('/api/analyze-company', async (req, res) => { } }); +// API-Endpunkt für generate-outreach +app.post('/api/generate-outreach', async (req, res) => { + console.log(`[${new Date().toISOString()}] HIT: /api/generate-outreach`); + const { companyData, knowledgeBase, referenceUrl } = req.body; + + if (!companyData || !knowledgeBase) { + console.error('Validation Error: Missing companyData or knowledgeBase for generate-outreach.'); + return res.status(400).json({ error: 'Missing companyData or knowledgeBase' }); + } + + const tempDataFilePath = path.join(__dirname, 'tmp', `outreach_data_${Date.now()}.json`); + const tempContextFilePath = path.join(__dirname, 'tmp', `outreach_context_${Date.now()}.md`); + const tmpDir = path.join(__dirname, 'tmp'); + if (!fs.existsSync(tmpDir)) { + fs.mkdirSync(tmpDir); + } + + try { + fs.writeFileSync(tempDataFilePath, JSON.stringify(companyData)); + fs.writeFileSync(tempContextFilePath, knowledgeBase); + console.log(`Successfully wrote temporary files for outreach.`); + + const pythonExecutable = path.join(__dirname, '..', '.venv', 'bin', 'python3'); + const pythonScript = path.join(__dirname, '..', 'market_intel_orchestrator.py'); + + const scriptArgs = [ + pythonScript, + '--mode', 'generate_outreach', + '--company_data_file', tempDataFilePath, + '--context_file', tempContextFilePath, + '--reference_url', referenceUrl || '' + ]; + + console.log(`Spawning Outreach Generation for ${companyData.companyName}...`); + + const pythonProcess = spawn(pythonExecutable, scriptArgs, { + env: { ...process.env, PYTHONPATH: path.join(__dirname, '..', '.venv', 'lib', 'python3.11', 'site-packages') } + }); + + let pythonOutput = ''; + let pythonError = ''; + + pythonProcess.stdout.on('data', (data) => { + pythonOutput += data.toString(); + }); + + pythonProcess.stderr.on('data', (data) => { + pythonError += data.toString(); + }); + + pythonProcess.on('close', (code) => { + console.log(`Outreach Generation finished with exit code: ${code}`); + + // Clean up + if (fs.existsSync(tempDataFilePath)) fs.unlinkSync(tempDataFilePath); + if (fs.existsSync(tempContextFilePath)) fs.unlinkSync(tempContextFilePath); + + if (code !== 0) { + console.error(`Python script (generate_outreach) exited with error.`); + return res.status(500).json({ error: 'Python script failed', details: pythonError }); + } + try { + const result = JSON.parse(pythonOutput); + res.json(result); + } catch (parseError) { + console.error('Failed to parse Python output (generate_outreach) as JSON:', parseError); + res.status(500).json({ error: 'Invalid JSON from Python script', rawOutput: pythonOutput, details: pythonError }); + } + }); + + pythonProcess.on('error', (err) => { + console.error(`FATAL: Failed to start python process for outreach.`, err); + if (fs.existsSync(tempDataFilePath)) fs.unlinkSync(tempDataFilePath); + if (fs.existsSync(tempContextFilePath)) fs.unlinkSync(tempContextFilePath); + res.status(500).json({ error: 'Failed to start Python process', details: err.message }); + }); + + } catch (err) { + console.error(`Internal Server Error in /api/generate-outreach: ${err.message}`); + res.status(500).json({ error: err.message }); + } +}); + // Start des Servers app.listen(PORT, () => { console.log(`Node.js API Bridge running on http://localhost:${PORT}`); diff --git a/general-market-intelligence/services/geminiService.ts b/general-market-intelligence/services/geminiService.ts index 5627f3c0..5314b22d 100644 --- a/general-market-intelligence/services/geminiService.ts +++ b/general-market-intelligence/services/geminiService.ts @@ -170,11 +170,65 @@ export const generateOutreachCampaign = async ( language: Language, referenceUrl: string ): Promise => { - // Dieser Teil muss noch im Python-Backend implementiert werden - console.warn("generateOutreachCampaign ist noch nicht im Python-Backend implementiert."); - return []; -}; + console.log(`Frontend: Starte Outreach-Generierung für ${companyData.companyName}...`); + + try { + const response = await fetch(`${API_BASE_URL}/generate-outreach`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + companyData, + knowledgeBase, + referenceUrl + }), + }); + if (!response.ok) { + const errorData = await response.json(); + throw new Error(`Backend-Fehler: ${errorData.error || response.statusText}`); + } + + const result = await response.json(); + console.log(`Frontend: Outreach-Generierung für ${companyData.companyName} erfolgreich.`); + + // Transform new backend structure to match frontend EmailDraft interface + if (Array.isArray(result)) { + return result.map((item: any) => { + // Construct a body that shows the sequence + let fullBody = ""; + const firstSubject = item.emails?.[0]?.subject || "No Subject"; + + if (item.emails && Array.isArray(item.emails)) { + item.emails.forEach((mail: any, idx: number) => { + fullBody += `### Email ${idx + 1}: ${mail.subject}\n\n`; + fullBody += `${mail.body}\n\n`; + if (idx < item.emails.length - 1) fullBody += `\n---\n\n`; + }); + } else { + // Fallback for flat structure or error + fullBody = item.body || "No content generated."; + } + + return { + persona: item.target_role || "Unknown Role", + subject: firstSubject, + body: fullBody, + keyPoints: item.rationale ? [item.rationale] : [] + }; + }); + } else if (result.campaign && Array.isArray(result.campaign)) { + return result.campaign as EmailDraft[]; + } + + return []; + + } catch (error) { + console.error(`Frontend: Outreach-Generierung fehlgeschlagen für ${companyData.companyName}`, error); + throw error; + } +}; export const translateEmailDrafts = async (drafts: EmailDraft[], targetLanguage: Language): Promise => { // Dieser Teil muss noch im Python-Backend oder direkt im Frontend implementiert werden console.warn("translateEmailDrafts ist noch nicht im Python-Backend implementiert."); diff --git a/market_intel_backend_plan.md b/market_intel_backend_plan.md index b2efbce2..1958e280 100644 --- a/market_intel_backend_plan.md +++ b/market_intel_backend_plan.md @@ -46,6 +46,7 @@ Die Logik aus `geminiService.ts` wird in Python-Funktionen innerhalb von `market --- + ### Funktion 2: `identify_competitors` - **Trigger:** Aufruf mit `--mode identify_competitors`. @@ -56,6 +57,7 @@ Die Logik aus `geminiService.ts` wird in Python-Funktionen innerhalb von `market --- + ### Funktion 3: `run_full_analysis` - **Trigger:** Aufruf mit `--mode run_analysis`. @@ -70,6 +72,7 @@ Die Logik aus `geminiService.ts` wird in Python-Funktionen innerhalb von `market --- + ### Funktion 4: `generate_outreach_campaign` - **Trigger:** Aufruf mit `--mode generate_outreach`. @@ -105,8 +108,25 @@ Wir haben heute das gesamte System von einer instabilen n8n-Abhängigkeit zu ein - **Frontend-Abstürze:** Absicherung des Reports gegen fehlende Datenpunkte. --- -### Nächste Ziele für die nächste Sitzung: -1. **Schritt 4: Hyper-personalisierte Campaign-Generation:** Implementierung der Funktion, die basierend auf den Audit-Fakten (z.B. gefundene Software-Stacks oder Nachhaltigkeits-Ziele) maßgeschneiderte E-Mails erstellt. -2. **Stabilitäts-Check:** Testen des Batch-Audits mit einer größeren Anzahl an Firmen (Timeout/Rate-Limit Handling). -3. **Report-Polishing:** Integration der "Proof-Links" direkt in die MD-Export-Funktion. +## 6. Status Update (2025-12-22) - Campaign Engine & Reporting + +### Erreichte Meilensteine: +1. **Rollenbasierte Campaign-Engine:** + * Die Funktion `generate_outreach_campaign` wurde komplett überarbeitet. + * Sie nutzt nun die volle Tiefe der Knowledge Base (`yamaichi_neu.md`), um **personalisierte Sequenzen für spezifische Rollen** (z.B. "Hardware-Entwickler" vs. "Einkäufer") zu erstellen. + * Die Ansprache erfolgt strikt im "Partner auf Augenhöhe"-Tonfall. + * **Social Proof Integration:** Der Absender (`reference_url`) wird als Beweis der Kompetenz inkl. passender KPIs im Abbinder integriert. + * **"Grit"-Prompting:** Der Prompt wurde massiv geschärft, um operative Schmerzpunkte ("ASNs", "Bandstillstand") statt Marketing-Bla-Bla zu nutzen. + +2. **Report Polishing (Frontend):** + * Der Markdown-Export (`StepReport.tsx`) wurde erweitert. + * Er enthält nun die **"Proof-Links"** (Beweise/URLs) direkt in den Tabellenzellen, sauber formatiert. Damit ist die Herleitung der Ergebnisse (z.B. "Warum nutzt der Kunde Ariba?") auch im Export transparent nachvollziehbar. + +3. **Frontend UX & Bugfixes:** + * **Kein doppelter Upload:** `StepOutreach.tsx` wurde angepasst, um den Strategie-Kontext aus Schritt 1 direkt zu übernehmen. + * **Lösch-Bug:** `StepReview.tsx` wurde korrigiert, sodass gelöschte Unternehmen sofort aus der UI verschwinden. + * **Crash-Fix:** Die Behandlung der API-Antwort in `geminiService.ts` wurde gehärtet, um die neue verschachtelte Antwortstruktur der Campaign-Engine korrekt zu verarbeiten. + +### Nächste Schritte: +* **Stabilitäts-Test:** Ausführung eines Batch-Audits mit >20 Firmen, um Rate-Limits und Fehlerbehandlung unter Last zu prüfen. diff --git a/market_intel_orchestrator.py b/market_intel_orchestrator.py index b8740f02..99a22e58 100644 --- a/market_intel_orchestrator.py +++ b/market_intel_orchestrator.py @@ -68,7 +68,9 @@ def get_website_text(url): for tag in soup(['script', 'style', 'nav', 'footer', 'header']): tag.decompose() text = soup.get_text(separator=' ', strip=True) - return text[:15000] # Erhöhtes Limit für besseren Kontext + # Bereinigung des Textes von nicht-druckbaren Zeichen + text = re.sub(r'[^\x20-\x7E\n\r\t]', '', text) + return text[:10000] # Limit für besseren Kontext except Exception as e: logger.error(f"Scraping failed for {url}: {e}") return None @@ -109,8 +111,14 @@ def serp_search(query, num_results=3): def _extract_target_industries_from_context(context_content): md = context_content + # Versuche verschiedene Muster für die Tabelle, falls das Format variiert step2_match = re.search(r'##\s*Schritt\s*2:[\s\S]*?(?=\n##\s*Schritt\s*\d:|\s*$)', md, re.IGNORECASE) - if not step2_match: return [] + if not step2_match: + # Fallback: Suche nach "Zielbranche" irgendwo im Text + match = re.search(r'Zielbranche\s*\|?\s*([^|\n]+)', md, re.IGNORECASE) + if match: + return [s.strip() for s in match.group(1).split(',')] + return [] table_lines = [] in_table = False @@ -132,13 +140,37 @@ def _extract_target_industries_from_context(context_content): if len(cells) > col_idx: industries.append(cells[col_idx]) return list(set(industries)) +def _extract_json_from_text(text): + """ + Versucht, ein JSON-Objekt aus einem Textstring zu extrahieren, + unabhängig von Markdown-Formatierung (```json ... ```). + """ + try: + # 1. Versuch: Direktersatz von Markdown-Tags (falls vorhanden) + clean_text = text.replace("```json", "").replace("```", "").strip() + return json.loads(clean_text) + except json.JSONDecodeError: + pass + + try: + # 2. Versuch: Regex Suche nach dem ersten { und letzten } + json_match = re.search(r"(\{[\s\S]*\})", text) + if json_match: + return json.loads(json_match.group(1)) + except json.JSONDecodeError: + pass + + logger.error(f"JSON Parsing fehlgeschlagen. Roher Text: {text[:500]}...") + return None + def generate_search_strategy(reference_url, context_content): logger.info(f"Generating strategy for {reference_url}") api_key = load_gemini_api_key() target_industries = _extract_target_industries_from_context(context_content) homepage_text = get_website_text(reference_url) - GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1/models/gemini-2.5-pro:generateContent?key={api_key}" + # Switch to stable 2.5-pro model (which works for v1beta) + GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}" prompt = f""" You are a B2B Market Intelligence Architect. @@ -150,19 +182,30 @@ def generate_search_strategy(reference_url, context_content): {', '.join(target_industries)} --- REFERENCE CLIENT HOMEPAGE --- - {homepage_text} + {homepage_text[:10000] if homepage_text else "No Homepage Text"} - TASK: - 1. Create a 1-sentence 'summaryOfOffer'. - 2. Define an 'idealCustomerProfile' based on the reference client. - 3. Identify 3-5 'signals'. - - FOR EACH SIGNAL, you MUST define a 'proofStrategy': - - 'likelySource': Where to find the proof (e.g., "Datenschutz", "Jobs", "Case Studies", "Homepage", "Press"). - - 'searchQueryTemplate': A specific Google search query template to find this proof. Use '{{COMPANY}}' as placeholder for the company name. - Example: "site:{{COMPANY}} 'it-leiter' sap" or "{{COMPANY}} nachhaltigkeitsbericht 2024 filetype:pdf". + --- TASK --- + Based on the context and the reference client's homepage, develop a search strategy to find similar companies (competitors/lookalikes) and audit them to find sales triggers. - STRICTLY output only valid JSON: + 1. **summaryOfOffer**: A 1-sentence summary of what the reference client sells. + 2. **idealCustomerProfile**: A concise definition of the Ideal Customer Profile (ICP) based on the reference client. + 3. **signals**: Identify exactly 4 specific digital signals. + - **CRITICAL**: One signal MUST be "Technographic / Incumbent Search". It must look for existing competitor software or legacy systems that our offer replaces or complements (e.g., "Uses SAP Ariba", "Has Supplier Portal", "Uses Salesforce"). + - The other 3 signals should focus on business pains or strategic fit (e.g., "Sustainability Report", "Supply Chain Complexity"). + + --- SIGNAL DEFINITION --- + For EACH signal, you MUST provide: + - `id`: A unique ID (e.g., "sig_1"). + - `name`: A short, descriptive name. + - `description`: What does this signal indicate? + - `targetPageKeywords`: A list of 3-5 keywords to look for on a company's website (e.g., ["career", "jobs"] for a hiring signal). + - `proofStrategy`: An object containing: + - `likelySource`: Where on the website or web is this info found? (e.g., "Careers Page"). + - `searchQueryTemplate`: A Google search query to find this. Use `{{COMPANY}}` as a placeholder for the company name. + Example: `site:{{COMPANY}} "software engineer" OR "developer"` + + --- OUTPUT FORMAT --- + Return ONLY a valid JSON object. {{ "summaryOfOffer": "...", "idealCustomerProfile": "...", @@ -171,59 +214,103 @@ def generate_search_strategy(reference_url, context_content): "id": "sig_1", "name": "...", "description": "...", - "targetPageKeywords": ["homepage"], + "targetPageKeywords": ["..."], "proofStrategy": {{ "likelySource": "...", "searchQueryTemplate": "..." }} - }} + }}, + ... ] }} """ payload = {"contents": [{"parts": [{"text": prompt}]}]} + logger.info("Sende Anfrage an Gemini API...") + # logger.debug(f"Rohe Gemini API-Anfrage (JSON): {json.dumps(payload, indent=2)}") try: response = requests.post(GEMINI_API_URL, json=payload, headers={'Content-Type': 'application/json'}) response.raise_for_status() res_json = response.json() + logger.info(f"Gemini API-Antwort erhalten (Status: {response.status_code}).") + text = res_json['candidates'][0]['content']['parts'][0]['text'] - if "```json" in text: text = text.split("```json")[1].split("```")[0].strip() - return json.loads(text) + result = _extract_json_from_text(text) + + if not result: + raise ValueError("Konnte kein valides JSON extrahieren") + + return result + except Exception as e: logger.error(f"Strategy generation failed: {e}") - return {"error": str(e)} + # Return fallback to avoid frontend crash + return { + "summaryOfOffer": "Error generating strategy. Please check logs.", + "idealCustomerProfile": "Error generating ICP. Please check logs.", + "signals": [] + } def identify_competitors(reference_url, target_market, industries, summary_of_offer=None): logger.info(f"Identifying competitors for {reference_url}") api_key = load_gemini_api_key() - GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1/models/gemini-2.5-pro:generateContent?key={api_key}" + # Switch to stable 2.5-pro model + GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}" prompt = f""" - Find 3-5 competitors/lookalikes for the company at {reference_url}. - Offer context: {summary_of_offer} - Target Market: {target_market} - Industries: {', '.join(industries)} + You are a B2B Market Analyst. Find 3-5 direct competitors or highly similar companies (lookalikes) for the company at `{reference_url}`. - Categorize into 'localCompetitors', 'nationalCompetitors', 'internationalCompetitors'. - Return ONLY JSON. + --- CONTEXT --- + - Offer: {summary_of_offer} + - Target Market: {target_market} + - Relevant Industries: {', '.join(industries)} + + --- TASK --- + Identify competitors and categorize them into three groups: + 1. 'localCompetitors': Competitors in the same immediate region/city. + 2. 'nationalCompetitors': Competitors operating across the same country. + 3. 'internationalCompetitors': Global players. + + For EACH competitor, you MUST provide: + - `id`: A unique, URL-friendly identifier (e.g., "competitor-name-gmbh"). + - `name`: The official, full name of the company. + - `description`: A concise explanation of why they are a competitor. + + --- OUTPUT FORMAT --- + Return ONLY a valid JSON object with the following structure: + {{ + "localCompetitors": [ {{ "id": "...", "name": "...", "description": "..." }} ], + "nationalCompetitors": [ ... ], + "internationalCompetitors": [ ... ] + }} """ payload = {"contents": [{"parts": [{"text": prompt}]}]} + logger.info("Sende Anfrage an Gemini API...") + # logger.debug(f"Rohe Gemini API-Anfrage (JSON): {json.dumps(payload, indent=2)}") try: response = requests.post(GEMINI_API_URL, json=payload, headers={'Content-Type': 'application/json'}) response.raise_for_status() res_json = response.json() + logger.info(f"Gemini API-Antwort erhalten (Status: {response.status_code}).") + text = res_json['candidates'][0]['content']['parts'][0]['text'] - if "```json" in text: text = text.split("```json")[1].split("```")[0].strip() - return json.loads(text) + result = _extract_json_from_text(text) + + if not result: + raise ValueError("Konnte kein valides JSON extrahieren") + + return result + except Exception as e: logger.error(f"Competitor identification failed: {e}") - return {"error": str(e)} + return {"localCompetitors": [], "nationalCompetitors": [], "internationalCompetitors": []} def analyze_company(company_name, strategy, target_market): logger.info(f"--- STARTING DEEP TECH AUDIT FOR: {company_name} ---") api_key = load_gemini_api_key() - GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1/models/gemini-2.5-pro:generateContent?key={api_key}" + # Switch to stable 2.5-pro model + GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}" # 1. Website Finding (SerpAPI fallback to Gemini) url = None @@ -235,11 +322,30 @@ def analyze_company(company_name, strategy, target_market): if not url: # Fallback: Frage Gemini (Low Confidence) logger.info("Keine URL via SerpAPI, frage Gemini...") - prompt_url = f"Find the official website URL for '{company_name}' in '{target_market}'. Output ONLY the URL." + prompt_url = f"What is the official homepage URL for the company '{company_name}' in the market '{target_market}'? Respond with ONLY the single, complete URL and nothing else." + payload_url = {"contents": [{"parts": [{"text": prompt_url}]}]} + logger.info("Sende Anfrage an Gemini API (URL Fallback)...") + # logger.debug(f"Rohe Gemini API-Anfrage (JSON): {json.dumps(payload_url, indent=2)}") try: - res = requests.post(GEMINI_API_URL, json={"contents": [{"parts": [{"text": prompt_url}]}]}, headers={'Content-Type': 'application/json'}) - url = res.json()['candidates'][0]['content']['parts'][0]['text'].strip() - except: pass + res = requests.post(GEMINI_API_URL, json=payload_url, headers={'Content-Type': 'application/json'}, timeout=15) + res.raise_for_status() + res_json = res.json() + logger.info(f"Gemini API-Antwort erhalten (Status: {res.status_code}).") + + candidate = res_json.get('candidates', [{}])[0] + content = candidate.get('content', {}).get('parts', [{}])[0] + text_response = content.get('text', '').strip() + + url_match = re.search(r'(https?://[^\s"]+)', text_response) + if url_match: + url = url_match.group(1) + logger.info(f"Gemini Fallback hat URL gefunden: {url}") + else: + logger.warning(f"Keine gültige URL in Gemini-Antwort gefunden: '{text_response}'") + + except Exception as e: + logger.error(f"Gemini URL Fallback failed: {e}") + pass if not url or not url.startswith("http"): return {"error": f"Could not find website for {company_name}"} @@ -248,25 +354,72 @@ def analyze_company(company_name, strategy, target_market): homepage_text = get_website_text(url) if not homepage_text: return {"error": f"Could not scrape website {url}"} + + homepage_text = re.sub(r'[^\x20-\x7E\n\r\t]', '', homepage_text) - # 3. Targeted Signal Search (The "Hunter" Phase) + # --- ENHANCED: EXTERNAL TECHNOGRAPHIC INTELLIGENCE --- + # Suche aktiv nach Wettbewerbern, nicht nur auf der Firmenwebsite. + tech_evidence = [] + + # Liste bekannter Wettbewerber / Incumbents + known_incumbents = [ + "SAP Ariba", "Jaggaer", "Coupa", "SynerTrade", "Ivalua", + "ServiceNow", "Salesforce", "Oracle SCM", "Zycus", "GEP", + "SupplyOn", "EcoVadis", "IntegrityNext" + ] + + # Suche 1: Direkte Verbindung zu Software-Anbietern (Case Studies, News, etc.) + # Wir bauen eine Query mit OR, um API-Calls zu sparen. + # Splitte in 2 Gruppen, um Query-Länge im Rahmen zu halten + half = len(known_incumbents) // 2 + group1 = " OR ".join([f'"{inc}"' for inc in known_incumbents[:half]]) + group2 = " OR ".join([f'"{inc}"' for inc in known_incumbents[half:]]) + + tech_queries = [ + f'"{company_name}" ({group1})', + f'"{company_name}" ({group2})', + f'"{company_name}" "supplier portal" login' # Suche nach dem Portal selbst + ] + + logger.info(f"Starte erweiterte Tech-Stack-Suche für {company_name}...") + for q in tech_queries: + logger.info(f"Tech Search: {q}") + results = serp_search(q, num_results=4) # Etwas mehr Ergebnisse + if results: + for r in results: + tech_evidence.append(f"- Found: {r['title']}\n Snippet: {r['snippet']}\n Link: {r['link']}") + + tech_evidence_text = "\n".join(tech_evidence) + # --- END ENHANCED TECH SEARCH --- + + # 3. Targeted Signal Search (The "Hunter" Phase) - Basierend auf Strategy signal_evidence = [] # Firmographics Search firmographics_results = serp_search(f"{company_name} Umsatz Mitarbeiterzahl 2023") firmographics_context = "\n".join([f"- {r['snippet']} ({r['link']})" for r in firmographics_results]) - # Signal Searches + # Signal Searches (Original Strategy) signals = strategy.get('signals', []) for signal in signals: + # Überspringe Signale, die wir schon durch die Tech-Suche massiv abgedeckt haben, + # es sei denn, sie sind sehr spezifisch. + if "incumbent" in signal['id'].lower() or "tech" in signal['id'].lower(): + logger.info(f"Skipping generic signal search '{signal['name']}' in favor of Enhanced Tech Search.") + continue + proof_strategy = signal.get('proofStrategy', {}) query_template = proof_strategy.get('searchQueryTemplate') search_context = "" if query_template: - # Domain aus URL extrahieren für bessere Queries (z.B. site:firma.de) - domain = url.split("//")[-1].split("/")[0].replace("www.", "") - query = query_template.replace("{{COMPANY}}", company_name).replace("{{domain}}", domain) + try: + domain = url.split("//")[-1].split("/")[0].replace("www.", "") + except: + domain = "" + + query = query_template.replace("{{COMPANY}}", company_name).replace("{COMPANY}", company_name) + query = query.replace("{{domain}}", domain).replace("{domain}", domain) logger.info(f"Signal Search '{signal['name']}': {query}") results = serp_search(query, num_results=3) @@ -280,31 +433,39 @@ def analyze_company(company_name, strategy, target_market): evidence_text = "\n\n".join(signal_evidence) prompt = f""" - You are a B2B Market Intelligence Auditor. - Audit the company '{company_name}' ({url}) based on the collected evidence. + You are a Strategic B2B Sales Consultant. + Analyze the company '{company_name}' ({url}) to create a "best-of-breed" sales pitch strategy. - --- STRATEGY (Signals to find) --- + --- STRATEGY (What we are looking for) --- {json.dumps(signals, indent=2)} - --- EVIDENCE SOURCE 1: HOMEPAGE CONTENT --- - {homepage_text[:10000]} + --- EVIDENCE 1: EXTERNAL TECH-STACK INTELLIGENCE (CRITICAL) --- + Look closely here for mentions of competitors like SAP Ariba, Jaggaer, SynerTrade, Coupa, etc. + {tech_evidence_text} - --- EVIDENCE SOURCE 2: FIRMOGRAPHICS SEARCH --- + --- EVIDENCE 2: HOMEPAGE CONTENT --- + {homepage_text[:8000]} + + --- EVIDENCE 3: FIRMOGRAPHICS SEARCH --- {firmographics_context} - --- EVIDENCE SOURCE 3: TARGETED SIGNAL SEARCH RESULTS --- + --- EVIDENCE 4: TARGETED SIGNAL SEARCH RESULTS --- {evidence_text} ---------------------------------- TASK: - 1. **Firmographics**: Estimate Revenue and Employees based on Source 1 & 2. Be realistic. Use buckets if unsure. - 2. **Status**: Determine 'status' (Bestandskunde, Nutzt Wettbewerber, Greenfield, Unklar). - 3. **Evaluate Signals**: For each signal, decide 'value' (Yes/No/Partial). - - **CRITICAL**: You MUST cite your source for the 'proof'. - - If found in Source 3 (Search), write: "Found in job posting/doc: [Snippet]" and include the URL. - - If found in Source 1 (Homepage), write: "On homepage: [Quote]". - - If not found, write: "Not found". - 4. **Recommendation**: 1-sentence verdict. + 1. **Firmographics**: Estimate Revenue and Employees. + 2. **Technographic Audit**: Look for specific competitor software or legacy systems mentioned in EVIDENCE 1 (e.g., "Partner of SynerTrade", "Login to Jaggaer Portal"). + 3. **Status**: + - Set to "Nutzt Wettbewerber" if ANY competitor technology is found (Ariba, Jaggaer, SynerTrade, Coupa, etc.). + - Set to "Greenfield" ONLY if absolutely no competitor tech is found. + - Set to "Bestandskunde" if they already use our solution. + 4. **Evaluate Signals**: For each signal, provide a "value" (Yes/No/Partial) and "proof". + 5. **Recommendation (Pitch Strategy)**: + - DO NOT write a generic verdict. + - If they use a competitor (e.g., Ariba), explain how to position against it (e.g., "Pitch as a specialized add-on for logistics, filling Ariba's gaps"). + - If Greenfield, explain the entry point. + - **Tone**: Strategic, insider-knowledge, specific. STRICTLY output only JSON: {{ @@ -326,21 +487,134 @@ def analyze_company(company_name, strategy, target_market): } try: + logger.info("Sende Audit-Anfrage an Gemini API...") + # logger.debug(f"Rohe Gemini API-Anfrage (JSON): {json.dumps(payload, indent=2)}") response = requests.post(GEMINI_API_URL, json=payload, headers={'Content-Type': 'application/json'}) response.raise_for_status() response_data = response.json() - response_text = response_data['candidates'][0]['content']['parts'][0]['text'] + logger.info(f"Gemini API-Antwort erhalten (Status: {response.status_code}).") - if response_text.startswith('```json'): - response_text = response_text.split('```json')[1].split('```')[0].strip() + text = response_data['candidates'][0]['content']['parts'][0]['text'] + result = _extract_json_from_text(text) + + if not result: + raise ValueError("Konnte kein valides JSON extrahieren") - result = json.loads(response_text) - result['dataSource'] = "Digital Trace Audit (Deep Dive)" # Mark as verified + result['dataSource'] = "Digital Trace Audit (Deep Dive)" logger.info(f"Audit für {company_name} erfolgreich abgeschlossen.") return result except Exception as e: logger.error(f"Audit failed for {company_name}: {e}") - return {"error": str(e)} + return { + "companyName": company_name, + "status": "Unklar / Manuelle Prüfung", + "revenue": "Error", + "employees": "Error", + "tier": "Tier 3", + "dynamicAnalysis": {}, + "recommendation": f"Audit failed due to API Error: {str(e)}", + "dataSource": "Error" + } + +def generate_outreach_campaign(company_data_json, knowledge_base_content, reference_url): + """ + Erstellt personalisierte E-Mail-Kampagnen basierend auf Audit-Daten und einer strukturierten Wissensdatenbank. + Generiert spezifische Ansprachen für verschiedene Rollen (Personas). + """ + company_name = company_data_json.get('companyName', 'Unknown') + logger.info(f"--- STARTING ROLE-BASED OUTREACH GENERATION FOR: {company_name} ---") + + api_key = load_gemini_api_key() + # Switch to stable 2.5-pro model + GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}" + + prompt = f""" + You are a Strategic Key Account Manager and deeply technical Industry Insider. + Your goal is to write highly personalized, **operationally specific** outreach emails to the company '{company_name}'. + + --- INPUT 1: YOUR IDENTITY & STRATEGY (The Sender) --- + The following Markdown contains your company's identity, products, and strategy. + You act as the sales representative for the company described here: + {knowledge_base_content} + + --- INPUT 2: THE TARGET COMPANY (Audit Facts) --- + {json.dumps(company_data_json, indent=2)} + + --- INPUT 3: THE REFERENCE CLIENT (Social Proof) --- + Reference Client URL: {reference_url} + + CRITICAL: This 'Reference Client' is an existing happy customer of ours. They are the "Seed Company" used to find the Target Company (Lookalike). + You MUST mention this Reference Client by name (derive it from the URL, e.g., 'schindler.com' -> 'Schindler') to establish trust. + + --- TASK --- + 1. **Analyze**: Match the Target Company (Input 2) to the most relevant 'Zielbranche/Segment' from the Knowledge Base (Input 1). + 2. **Select Roles**: Identify the top 2 most distinct and relevant 'Rollen' (Personas) from the Knowledge Base for this specific company situation. + - *Example:* If the audit says they use a competitor (risk of lock-in), select a role like "Strategic Purchaser" or "Head of R&D" who cares about "Second Source". + - *Example:* If they have quality issues or complex logistics, pick "Quality Manager" or "Logistics Head". + 3. **Draft Campaigns**: For EACH of the 2 selected roles, write a 3-step email sequence. + + --- TONE & STYLE GUIDELINES (CRITICAL) --- + - **Perspective:** Operational Expert & Insider. NOT generic marketing. + - **Be Gritty & Specific:** Do NOT use fluff like "optimize efficiency" or "streamline processes" without context. + - Use **hard, operational keywords** from the Knowledge Base (e.g., "ASNs", "VMI", "8D-Reports", "Maverick Buying", "Bandstillstand", "Sonderfahrten", "PPAP"). + - Show you understand their daily pain. + - **Narrative Arc:** + 1. "I noticed [Fact from Audit/Tech Stack]..." (e.g., "You rely on PDF orders via Jaggaer...") + 2. "In [Industry], this often leads to [Operational Pain]..." (e.g., "missing ASNs causing delays at the hub.") + 3. "We helped [Reference Client Name] solve exactly this by [Specific Solution]..." + 4. "Let's discuss how to get [Operational Gain] without replacing your ERP." + - **Mandatory Social Proof:** You MUST mention the Reference Client Name (from Input 3) in the email body or footer. + - **Language:** German (as the inputs are German). + + --- OUTPUT FORMAT (Strictly JSON) --- + Returns a list of campaigns. + [ + {{ + "target_role": "Name of the Role (e.g. Leiter F&E)", + "rationale": "Why this role? (e.g. Because the audit found dependency on Competitor X...)", + "emails": [ + {{ + "subject": "Specific Subject Line", + "body": "Email Body..." + }}, + {{ + "subject": "Re: Subject", + "body": "Follow-up Body..." + }}, + {{ + "subject": "Final Check", + "body": "Final Body..." + }} + ] + }}, + ... (Second Role) + ] + """ + + payload = { + "contents": [{"parts": [{"text": prompt}]}], + "generationConfig": {"response_mime_type": "application/json"} + } + + try: + logger.info("Sende Campaign-Anfrage an Gemini API...") + # logger.debug(f"Rohe Gemini API-Anfrage (JSON): {json.dumps(payload, indent=2)}") + response = requests.post(GEMINI_API_URL, json=payload, headers={'Content-Type': 'application/json'}) + response.raise_for_status() + response_data = response.json() + logger.info(f"Gemini API-Antwort erhalten (Status: {response.status_code}).") + # logger.debug(f"Rohe API-Antwort (JSON): {json.dumps(response_data, indent=2)}") + + text = response_data['candidates'][0]['content']['parts'][0]['text'] + result = _extract_json_from_text(text) + + if not result: + raise ValueError("Konnte kein valides JSON extrahieren") + + return result + except Exception as e: + logger.error(f"Campaign generation failed for {company_name}: {e}") + return [{"error": str(e)}] def main(): parser = argparse.ArgumentParser() @@ -351,6 +625,7 @@ def main(): parser.add_argument("--company_name") parser.add_argument("--strategy_json") parser.add_argument("--summary_of_offer") + parser.add_argument("--company_data_file") # For generate_outreach args = parser.parse_args() if args.mode == "generate_strategy": @@ -365,6 +640,11 @@ def main(): elif args.mode == "analyze_company": strategy = json.loads(args.strategy_json) print(json.dumps(analyze_company(args.company_name, strategy, args.target_market))) + elif args.mode == "generate_outreach": + with open(args.company_data_file, "r") as f: company_data = json.load(f) + with open(args.context_file, "r") as f: knowledge_base = f.read() + print(json.dumps(generate_outreach_campaign(company_data, knowledge_base, args.reference_url))) + if __name__ == "__main__": main() diff --git a/readme.md b/readme.md index 0b6949a0..58820d43 100644 --- a/readme.md +++ b/readme.md @@ -706,4 +706,4 @@ Der Prozess für den Benutzer bleibt weitgehend gleich, ist aber technisch solid **Schritt 4 & 5: Reporting & Personalisierte Ansprache** - **Ergebnis-Darstellung:** Die faktenbasierten Analyseergebnisse werden im Frontend angezeigt. -- **Kampagnen-Generierung:** Die KI nutzt die validierten "Digitalen Signale" als Aufhänger, um hyper-personalisierte und extrem treffsichere E-Mail-Entwürfe zu erstellen. \ No newline at end of file +- **Kampagnen-Generierung:** Die KI nutzt die validierten "Digitalen Signale" als Aufhänger, um hyper-personalisierte und extrem treffsichere E-Mail-Entwürfe zu erstellen. Dabei werden **operative Schmerzpunkte ("Grit")** und **Social Proof** (Referenzkunden) aggressiv genutzt, um Insider-Status zu demonstrieren.