feat(market-intel): implement role-based campaign engine and gritty reporting

- Implementierung der rollenbasierten Campaign-Engine mit operativem Fokus (Grit).
- Integration von Social Proof (Referenzkunden) in die E-Mail-Generierung.
- Erweiterung des Deep Tech Audits um gezielte Wettbewerber-Recherche (Technographic Search).
- Fix des Lösch-Bugs in der Target-Liste und Optimierung des Frontend-States.
- Erweiterung des Markdown-Exports um transparente Proof-Links und Evidenz.
- Aktualisierung der Dokumentation in readme.md und market_intel_backend_plan.md.
This commit is contained in:
2025-12-22 15:54:06 +00:00
parent e4aad40344
commit a077c48573
9 changed files with 545 additions and 76 deletions

View File

@@ -222,6 +222,7 @@ const App: React.FC = () => {
language={language}
referenceUrl={referenceUrl}
onBack={handleBack}
knowledgeBase={productContext}
/>
)}
</main>

View File

@@ -9,17 +9,26 @@ interface StepOutreachProps {
language: Language;
referenceUrl: string;
onBack: () => void;
knowledgeBase?: string; // New prop for pre-loaded context
}
export const StepOutreach: React.FC<StepOutreachProps> = ({ company, language, referenceUrl, onBack }) => {
const [fileContent, setFileContent] = useState<string>('');
const [fileName, setFileName] = useState<string>('');
export const StepOutreach: React.FC<StepOutreachProps> = ({ company, language, referenceUrl, onBack, knowledgeBase }) => {
const [fileContent, setFileContent] = useState<string>(knowledgeBase || '');
const [fileName, setFileName] = useState<string>(knowledgeBase ? 'Knowledge Base from Strategy Step' : '');
const [isProcessing, setIsProcessing] = useState(false);
const [isTranslating, setIsTranslating] = useState(false);
const [emails, setEmails] = useState<EmailDraft[]>([]);
const [activeTab, setActiveTab] = useState(0);
const [copied, setCopied] = useState(false);
// If knowledgeBase prop changes, update state (useful if it loads late)
React.useEffect(() => {
if (knowledgeBase && !fileContent) {
setFileContent(knowledgeBase);
setFileName('Knowledge Base from Strategy Step');
}
}, [knowledgeBase]);
const handleFileUpload = (e: React.ChangeEvent<HTMLInputElement>) => {
const file = e.target.files?.[0];
if (file) {

View File

@@ -46,8 +46,26 @@ export const StepReport: React.FC<StepReportProps> = ({ results, strategy, onRes
const headers = ["Company", "Prio", "Rev/Emp", "Status", ...signalHeaders, "Recommendation"];
const rows = sortedResults.map(r => {
const signalValues = strategy.signals.map(s => r.dynamicAnalysis[s.id]?.value || '-');
return `| ${r.companyName} | ${r.tier} | ${r.revenue} / ${r.employees} | ${r.status} | ${signalValues.join(" | ")} | ${r.recommendation} |`;
const signalValues = strategy.signals.map(s => {
const data = r.dynamicAnalysis[s.id];
if (!data) return '-';
let content = data.value || '-';
// Sanitize content pipes
content = content.replace(/\|/g, '\\|');
if (data.proof) {
// Sanitize proof pipes and newlines
const safeProof = data.proof.replace(/\|/g, '\\|').replace(/(\r\n|\n|\r)/gm, ' ');
content += `<br><sub>*Proof: ${safeProof}*</sub>`;
}
return content;
});
// Helper to sanitize other fields
const safe = (str: string) => (str || '').replace(/\|/g, '\\|').replace(/(\r\n|\n|\r)/gm, ' ');
return `| ${safe(r.companyName)} | ${r.tier} | ${safe(r.revenue)} / ${safe(r.employees)} | ${r.status} | ${signalValues.join(" | ")} | ${safe(r.recommendation)} |`;
});
const content = `

View File

@@ -30,14 +30,18 @@ export const StepReview: React.FC<StepReviewProps> = ({ competitors, categorized
};
const renderCompetitorList = (comps: Competitor[], category: string) => {
if (!comps || comps.length === 0) {
// Filter out competitors that have been removed from the main list
const activeIds = new Set(competitors.map(c => c.id));
const activeComps = comps.filter(c => activeIds.has(c.id));
if (!activeComps || activeComps.length === 0) {
return (
<li className="p-4 text-center text-slate-500 italic bg-white rounded-md border border-slate-100 mb-2 last:mb-0">
Keine {category} Konkurrenten gefunden.
</li>
);
}
return comps.map((comp) => (
return activeComps.map((comp) => (
<li key={comp.id} className="flex items-start justify-between p-4 hover:bg-slate-50 transition-colors group bg-white rounded-md border border-slate-100 mb-2 last:mb-0">
<div className="flex-1">
<div className="flex items-center gap-2">

View File

@@ -269,6 +269,89 @@ app.post('/api/analyze-company', async (req, res) => {
}
});
// API-Endpunkt für generate-outreach
app.post('/api/generate-outreach', async (req, res) => {
console.log(`[${new Date().toISOString()}] HIT: /api/generate-outreach`);
const { companyData, knowledgeBase, referenceUrl } = req.body;
if (!companyData || !knowledgeBase) {
console.error('Validation Error: Missing companyData or knowledgeBase for generate-outreach.');
return res.status(400).json({ error: 'Missing companyData or knowledgeBase' });
}
const tempDataFilePath = path.join(__dirname, 'tmp', `outreach_data_${Date.now()}.json`);
const tempContextFilePath = path.join(__dirname, 'tmp', `outreach_context_${Date.now()}.md`);
const tmpDir = path.join(__dirname, 'tmp');
if (!fs.existsSync(tmpDir)) {
fs.mkdirSync(tmpDir);
}
try {
fs.writeFileSync(tempDataFilePath, JSON.stringify(companyData));
fs.writeFileSync(tempContextFilePath, knowledgeBase);
console.log(`Successfully wrote temporary files for outreach.`);
const pythonExecutable = path.join(__dirname, '..', '.venv', 'bin', 'python3');
const pythonScript = path.join(__dirname, '..', 'market_intel_orchestrator.py');
const scriptArgs = [
pythonScript,
'--mode', 'generate_outreach',
'--company_data_file', tempDataFilePath,
'--context_file', tempContextFilePath,
'--reference_url', referenceUrl || ''
];
console.log(`Spawning Outreach Generation for ${companyData.companyName}...`);
const pythonProcess = spawn(pythonExecutable, scriptArgs, {
env: { ...process.env, PYTHONPATH: path.join(__dirname, '..', '.venv', 'lib', 'python3.11', 'site-packages') }
});
let pythonOutput = '';
let pythonError = '';
pythonProcess.stdout.on('data', (data) => {
pythonOutput += data.toString();
});
pythonProcess.stderr.on('data', (data) => {
pythonError += data.toString();
});
pythonProcess.on('close', (code) => {
console.log(`Outreach Generation finished with exit code: ${code}`);
// Clean up
if (fs.existsSync(tempDataFilePath)) fs.unlinkSync(tempDataFilePath);
if (fs.existsSync(tempContextFilePath)) fs.unlinkSync(tempContextFilePath);
if (code !== 0) {
console.error(`Python script (generate_outreach) exited with error.`);
return res.status(500).json({ error: 'Python script failed', details: pythonError });
}
try {
const result = JSON.parse(pythonOutput);
res.json(result);
} catch (parseError) {
console.error('Failed to parse Python output (generate_outreach) as JSON:', parseError);
res.status(500).json({ error: 'Invalid JSON from Python script', rawOutput: pythonOutput, details: pythonError });
}
});
pythonProcess.on('error', (err) => {
console.error(`FATAL: Failed to start python process for outreach.`, err);
if (fs.existsSync(tempDataFilePath)) fs.unlinkSync(tempDataFilePath);
if (fs.existsSync(tempContextFilePath)) fs.unlinkSync(tempContextFilePath);
res.status(500).json({ error: 'Failed to start Python process', details: err.message });
});
} catch (err) {
console.error(`Internal Server Error in /api/generate-outreach: ${err.message}`);
res.status(500).json({ error: err.message });
}
});
// Start des Servers
app.listen(PORT, () => {
console.log(`Node.js API Bridge running on http://localhost:${PORT}`);

View File

@@ -170,11 +170,65 @@ export const generateOutreachCampaign = async (
language: Language,
referenceUrl: string
): Promise<EmailDraft[]> => {
// Dieser Teil muss noch im Python-Backend implementiert werden
console.warn("generateOutreachCampaign ist noch nicht im Python-Backend implementiert.");
return [];
};
console.log(`Frontend: Starte Outreach-Generierung für ${companyData.companyName}...`);
try {
const response = await fetch(`${API_BASE_URL}/generate-outreach`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
companyData,
knowledgeBase,
referenceUrl
}),
});
if (!response.ok) {
const errorData = await response.json();
throw new Error(`Backend-Fehler: ${errorData.error || response.statusText}`);
}
const result = await response.json();
console.log(`Frontend: Outreach-Generierung für ${companyData.companyName} erfolgreich.`);
// Transform new backend structure to match frontend EmailDraft interface
if (Array.isArray(result)) {
return result.map((item: any) => {
// Construct a body that shows the sequence
let fullBody = "";
const firstSubject = item.emails?.[0]?.subject || "No Subject";
if (item.emails && Array.isArray(item.emails)) {
item.emails.forEach((mail: any, idx: number) => {
fullBody += `### Email ${idx + 1}: ${mail.subject}\n\n`;
fullBody += `${mail.body}\n\n`;
if (idx < item.emails.length - 1) fullBody += `\n---\n\n`;
});
} else {
// Fallback for flat structure or error
fullBody = item.body || "No content generated.";
}
return {
persona: item.target_role || "Unknown Role",
subject: firstSubject,
body: fullBody,
keyPoints: item.rationale ? [item.rationale] : []
};
});
} else if (result.campaign && Array.isArray(result.campaign)) {
return result.campaign as EmailDraft[];
}
return [];
} catch (error) {
console.error(`Frontend: Outreach-Generierung fehlgeschlagen für ${companyData.companyName}`, error);
throw error;
}
};
export const translateEmailDrafts = async (drafts: EmailDraft[], targetLanguage: Language): Promise<EmailDraft[]> => {
// Dieser Teil muss noch im Python-Backend oder direkt im Frontend implementiert werden
console.warn("translateEmailDrafts ist noch nicht im Python-Backend implementiert.");

View File

@@ -46,6 +46,7 @@ Die Logik aus `geminiService.ts` wird in Python-Funktionen innerhalb von `market
---
### Funktion 2: `identify_competitors`
- **Trigger:** Aufruf mit `--mode identify_competitors`.
@@ -56,6 +57,7 @@ Die Logik aus `geminiService.ts` wird in Python-Funktionen innerhalb von `market
---
### Funktion 3: `run_full_analysis`
- **Trigger:** Aufruf mit `--mode run_analysis`.
@@ -70,6 +72,7 @@ Die Logik aus `geminiService.ts` wird in Python-Funktionen innerhalb von `market
---
### Funktion 4: `generate_outreach_campaign`
- **Trigger:** Aufruf mit `--mode generate_outreach`.
@@ -105,8 +108,25 @@ Wir haben heute das gesamte System von einer instabilen n8n-Abhängigkeit zu ein
- **Frontend-Abstürze:** Absicherung des Reports gegen fehlende Datenpunkte.
---
### Nächste Ziele für die nächste Sitzung:
1. **Schritt 4: Hyper-personalisierte Campaign-Generation:** Implementierung der Funktion, die basierend auf den Audit-Fakten (z.B. gefundene Software-Stacks oder Nachhaltigkeits-Ziele) maßgeschneiderte E-Mails erstellt.
2. **Stabilitäts-Check:** Testen des Batch-Audits mit einer größeren Anzahl an Firmen (Timeout/Rate-Limit Handling).
3. **Report-Polishing:** Integration der "Proof-Links" direkt in die MD-Export-Funktion.
## 6. Status Update (2025-12-22) - Campaign Engine & Reporting
### Erreichte Meilensteine:
1. **Rollenbasierte Campaign-Engine:**
* Die Funktion `generate_outreach_campaign` wurde komplett überarbeitet.
* Sie nutzt nun die volle Tiefe der Knowledge Base (`yamaichi_neu.md`), um **personalisierte Sequenzen für spezifische Rollen** (z.B. "Hardware-Entwickler" vs. "Einkäufer") zu erstellen.
* Die Ansprache erfolgt strikt im "Partner auf Augenhöhe"-Tonfall.
* **Social Proof Integration:** Der Absender (`reference_url`) wird als Beweis der Kompetenz inkl. passender KPIs im Abbinder integriert.
* **"Grit"-Prompting:** Der Prompt wurde massiv geschärft, um operative Schmerzpunkte ("ASNs", "Bandstillstand") statt Marketing-Bla-Bla zu nutzen.
2. **Report Polishing (Frontend):**
* Der Markdown-Export (`StepReport.tsx`) wurde erweitert.
* Er enthält nun die **"Proof-Links"** (Beweise/URLs) direkt in den Tabellenzellen, sauber formatiert. Damit ist die Herleitung der Ergebnisse (z.B. "Warum nutzt der Kunde Ariba?") auch im Export transparent nachvollziehbar.
3. **Frontend UX & Bugfixes:**
* **Kein doppelter Upload:** `StepOutreach.tsx` wurde angepasst, um den Strategie-Kontext aus Schritt 1 direkt zu übernehmen.
* **Lösch-Bug:** `StepReview.tsx` wurde korrigiert, sodass gelöschte Unternehmen sofort aus der UI verschwinden.
* **Crash-Fix:** Die Behandlung der API-Antwort in `geminiService.ts` wurde gehärtet, um die neue verschachtelte Antwortstruktur der Campaign-Engine korrekt zu verarbeiten.
### Nächste Schritte:
* **Stabilitäts-Test:** Ausführung eines Batch-Audits mit >20 Firmen, um Rate-Limits und Fehlerbehandlung unter Last zu prüfen.

View File

@@ -68,7 +68,9 @@ def get_website_text(url):
for tag in soup(['script', 'style', 'nav', 'footer', 'header']):
tag.decompose()
text = soup.get_text(separator=' ', strip=True)
return text[:15000] # Erhöhtes Limit für besseren Kontext
# Bereinigung des Textes von nicht-druckbaren Zeichen
text = re.sub(r'[^\x20-\x7E\n\r\t]', '', text)
return text[:10000] # Limit für besseren Kontext
except Exception as e:
logger.error(f"Scraping failed for {url}: {e}")
return None
@@ -109,8 +111,14 @@ def serp_search(query, num_results=3):
def _extract_target_industries_from_context(context_content):
md = context_content
# Versuche verschiedene Muster für die Tabelle, falls das Format variiert
step2_match = re.search(r'##\s*Schritt\s*2:[\s\S]*?(?=\n##\s*Schritt\s*\d:|\s*$)', md, re.IGNORECASE)
if not step2_match: return []
if not step2_match:
# Fallback: Suche nach "Zielbranche" irgendwo im Text
match = re.search(r'Zielbranche\s*\|?\s*([^|\n]+)', md, re.IGNORECASE)
if match:
return [s.strip() for s in match.group(1).split(',')]
return []
table_lines = []
in_table = False
@@ -132,13 +140,37 @@ def _extract_target_industries_from_context(context_content):
if len(cells) > col_idx: industries.append(cells[col_idx])
return list(set(industries))
def _extract_json_from_text(text):
"""
Versucht, ein JSON-Objekt aus einem Textstring zu extrahieren,
unabhängig von Markdown-Formatierung (```json ... ```).
"""
try:
# 1. Versuch: Direktersatz von Markdown-Tags (falls vorhanden)
clean_text = text.replace("```json", "").replace("```", "").strip()
return json.loads(clean_text)
except json.JSONDecodeError:
pass
try:
# 2. Versuch: Regex Suche nach dem ersten { und letzten }
json_match = re.search(r"(\{[\s\S]*\})", text)
if json_match:
return json.loads(json_match.group(1))
except json.JSONDecodeError:
pass
logger.error(f"JSON Parsing fehlgeschlagen. Roher Text: {text[:500]}...")
return None
def generate_search_strategy(reference_url, context_content):
logger.info(f"Generating strategy for {reference_url}")
api_key = load_gemini_api_key()
target_industries = _extract_target_industries_from_context(context_content)
homepage_text = get_website_text(reference_url)
GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1/models/gemini-2.5-pro:generateContent?key={api_key}"
# Switch to stable 2.5-pro model (which works for v1beta)
GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}"
prompt = f"""
You are a B2B Market Intelligence Architect.
@@ -150,19 +182,30 @@ def generate_search_strategy(reference_url, context_content):
{', '.join(target_industries)}
--- REFERENCE CLIENT HOMEPAGE ---
{homepage_text}
{homepage_text[:10000] if homepage_text else "No Homepage Text"}
TASK:
1. Create a 1-sentence 'summaryOfOffer'.
2. Define an 'idealCustomerProfile' based on the reference client.
3. Identify 3-5 'signals'.
FOR EACH SIGNAL, you MUST define a 'proofStrategy':
- 'likelySource': Where to find the proof (e.g., "Datenschutz", "Jobs", "Case Studies", "Homepage", "Press").
- 'searchQueryTemplate': A specific Google search query template to find this proof. Use '{{COMPANY}}' as placeholder for the company name.
Example: "site:{{COMPANY}} 'it-leiter' sap" or "{{COMPANY}} nachhaltigkeitsbericht 2024 filetype:pdf".
--- TASK ---
Based on the context and the reference client's homepage, develop a search strategy to find similar companies (competitors/lookalikes) and audit them to find sales triggers.
STRICTLY output only valid JSON:
1. **summaryOfOffer**: A 1-sentence summary of what the reference client sells.
2. **idealCustomerProfile**: A concise definition of the Ideal Customer Profile (ICP) based on the reference client.
3. **signals**: Identify exactly 4 specific digital signals.
- **CRITICAL**: One signal MUST be "Technographic / Incumbent Search". It must look for existing competitor software or legacy systems that our offer replaces or complements (e.g., "Uses SAP Ariba", "Has Supplier Portal", "Uses Salesforce").
- The other 3 signals should focus on business pains or strategic fit (e.g., "Sustainability Report", "Supply Chain Complexity").
--- SIGNAL DEFINITION ---
For EACH signal, you MUST provide:
- `id`: A unique ID (e.g., "sig_1").
- `name`: A short, descriptive name.
- `description`: What does this signal indicate?
- `targetPageKeywords`: A list of 3-5 keywords to look for on a company's website (e.g., ["career", "jobs"] for a hiring signal).
- `proofStrategy`: An object containing:
- `likelySource`: Where on the website or web is this info found? (e.g., "Careers Page").
- `searchQueryTemplate`: A Google search query to find this. Use `{{COMPANY}}` as a placeholder for the company name.
Example: `site:{{COMPANY}} "software engineer" OR "developer"`
--- OUTPUT FORMAT ---
Return ONLY a valid JSON object.
{{
"summaryOfOffer": "...",
"idealCustomerProfile": "...",
@@ -171,59 +214,103 @@ def generate_search_strategy(reference_url, context_content):
"id": "sig_1",
"name": "...",
"description": "...",
"targetPageKeywords": ["homepage"],
"targetPageKeywords": ["..."],
"proofStrategy": {{
"likelySource": "...",
"searchQueryTemplate": "..."
}}
}}
}},
...
]
}}
"""
payload = {"contents": [{"parts": [{"text": prompt}]}]}
logger.info("Sende Anfrage an Gemini API...")
# logger.debug(f"Rohe Gemini API-Anfrage (JSON): {json.dumps(payload, indent=2)}")
try:
response = requests.post(GEMINI_API_URL, json=payload, headers={'Content-Type': 'application/json'})
response.raise_for_status()
res_json = response.json()
logger.info(f"Gemini API-Antwort erhalten (Status: {response.status_code}).")
text = res_json['candidates'][0]['content']['parts'][0]['text']
if "```json" in text: text = text.split("```json")[1].split("```")[0].strip()
return json.loads(text)
result = _extract_json_from_text(text)
if not result:
raise ValueError("Konnte kein valides JSON extrahieren")
return result
except Exception as e:
logger.error(f"Strategy generation failed: {e}")
return {"error": str(e)}
# Return fallback to avoid frontend crash
return {
"summaryOfOffer": "Error generating strategy. Please check logs.",
"idealCustomerProfile": "Error generating ICP. Please check logs.",
"signals": []
}
def identify_competitors(reference_url, target_market, industries, summary_of_offer=None):
logger.info(f"Identifying competitors for {reference_url}")
api_key = load_gemini_api_key()
GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1/models/gemini-2.5-pro:generateContent?key={api_key}"
# Switch to stable 2.5-pro model
GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}"
prompt = f"""
Find 3-5 competitors/lookalikes for the company at {reference_url}.
Offer context: {summary_of_offer}
Target Market: {target_market}
Industries: {', '.join(industries)}
You are a B2B Market Analyst. Find 3-5 direct competitors or highly similar companies (lookalikes) for the company at `{reference_url}`.
Categorize into 'localCompetitors', 'nationalCompetitors', 'internationalCompetitors'.
Return ONLY JSON.
--- CONTEXT ---
- Offer: {summary_of_offer}
- Target Market: {target_market}
- Relevant Industries: {', '.join(industries)}
--- TASK ---
Identify competitors and categorize them into three groups:
1. 'localCompetitors': Competitors in the same immediate region/city.
2. 'nationalCompetitors': Competitors operating across the same country.
3. 'internationalCompetitors': Global players.
For EACH competitor, you MUST provide:
- `id`: A unique, URL-friendly identifier (e.g., "competitor-name-gmbh").
- `name`: The official, full name of the company.
- `description`: A concise explanation of why they are a competitor.
--- OUTPUT FORMAT ---
Return ONLY a valid JSON object with the following structure:
{{
"localCompetitors": [ {{ "id": "...", "name": "...", "description": "..." }} ],
"nationalCompetitors": [ ... ],
"internationalCompetitors": [ ... ]
}}
"""
payload = {"contents": [{"parts": [{"text": prompt}]}]}
logger.info("Sende Anfrage an Gemini API...")
# logger.debug(f"Rohe Gemini API-Anfrage (JSON): {json.dumps(payload, indent=2)}")
try:
response = requests.post(GEMINI_API_URL, json=payload, headers={'Content-Type': 'application/json'})
response.raise_for_status()
res_json = response.json()
logger.info(f"Gemini API-Antwort erhalten (Status: {response.status_code}).")
text = res_json['candidates'][0]['content']['parts'][0]['text']
if "```json" in text: text = text.split("```json")[1].split("```")[0].strip()
return json.loads(text)
result = _extract_json_from_text(text)
if not result:
raise ValueError("Konnte kein valides JSON extrahieren")
return result
except Exception as e:
logger.error(f"Competitor identification failed: {e}")
return {"error": str(e)}
return {"localCompetitors": [], "nationalCompetitors": [], "internationalCompetitors": []}
def analyze_company(company_name, strategy, target_market):
logger.info(f"--- STARTING DEEP TECH AUDIT FOR: {company_name} ---")
api_key = load_gemini_api_key()
GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1/models/gemini-2.5-pro:generateContent?key={api_key}"
# Switch to stable 2.5-pro model
GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}"
# 1. Website Finding (SerpAPI fallback to Gemini)
url = None
@@ -235,11 +322,30 @@ def analyze_company(company_name, strategy, target_market):
if not url:
# Fallback: Frage Gemini (Low Confidence)
logger.info("Keine URL via SerpAPI, frage Gemini...")
prompt_url = f"Find the official website URL for '{company_name}' in '{target_market}'. Output ONLY the URL."
prompt_url = f"What is the official homepage URL for the company '{company_name}' in the market '{target_market}'? Respond with ONLY the single, complete URL and nothing else."
payload_url = {"contents": [{"parts": [{"text": prompt_url}]}]}
logger.info("Sende Anfrage an Gemini API (URL Fallback)...")
# logger.debug(f"Rohe Gemini API-Anfrage (JSON): {json.dumps(payload_url, indent=2)}")
try:
res = requests.post(GEMINI_API_URL, json={"contents": [{"parts": [{"text": prompt_url}]}]}, headers={'Content-Type': 'application/json'})
url = res.json()['candidates'][0]['content']['parts'][0]['text'].strip()
except: pass
res = requests.post(GEMINI_API_URL, json=payload_url, headers={'Content-Type': 'application/json'}, timeout=15)
res.raise_for_status()
res_json = res.json()
logger.info(f"Gemini API-Antwort erhalten (Status: {res.status_code}).")
candidate = res_json.get('candidates', [{}])[0]
content = candidate.get('content', {}).get('parts', [{}])[0]
text_response = content.get('text', '').strip()
url_match = re.search(r'(https?://[^\s"]+)', text_response)
if url_match:
url = url_match.group(1)
logger.info(f"Gemini Fallback hat URL gefunden: {url}")
else:
logger.warning(f"Keine gültige URL in Gemini-Antwort gefunden: '{text_response}'")
except Exception as e:
logger.error(f"Gemini URL Fallback failed: {e}")
pass
if not url or not url.startswith("http"):
return {"error": f"Could not find website for {company_name}"}
@@ -248,25 +354,72 @@ def analyze_company(company_name, strategy, target_market):
homepage_text = get_website_text(url)
if not homepage_text:
return {"error": f"Could not scrape website {url}"}
homepage_text = re.sub(r'[^\x20-\x7E\n\r\t]', '', homepage_text)
# 3. Targeted Signal Search (The "Hunter" Phase)
# --- ENHANCED: EXTERNAL TECHNOGRAPHIC INTELLIGENCE ---
# Suche aktiv nach Wettbewerbern, nicht nur auf der Firmenwebsite.
tech_evidence = []
# Liste bekannter Wettbewerber / Incumbents
known_incumbents = [
"SAP Ariba", "Jaggaer", "Coupa", "SynerTrade", "Ivalua",
"ServiceNow", "Salesforce", "Oracle SCM", "Zycus", "GEP",
"SupplyOn", "EcoVadis", "IntegrityNext"
]
# Suche 1: Direkte Verbindung zu Software-Anbietern (Case Studies, News, etc.)
# Wir bauen eine Query mit OR, um API-Calls zu sparen.
# Splitte in 2 Gruppen, um Query-Länge im Rahmen zu halten
half = len(known_incumbents) // 2
group1 = " OR ".join([f'"{inc}"' for inc in known_incumbents[:half]])
group2 = " OR ".join([f'"{inc}"' for inc in known_incumbents[half:]])
tech_queries = [
f'"{company_name}" ({group1})',
f'"{company_name}" ({group2})',
f'"{company_name}" "supplier portal" login' # Suche nach dem Portal selbst
]
logger.info(f"Starte erweiterte Tech-Stack-Suche für {company_name}...")
for q in tech_queries:
logger.info(f"Tech Search: {q}")
results = serp_search(q, num_results=4) # Etwas mehr Ergebnisse
if results:
for r in results:
tech_evidence.append(f"- Found: {r['title']}\n Snippet: {r['snippet']}\n Link: {r['link']}")
tech_evidence_text = "\n".join(tech_evidence)
# --- END ENHANCED TECH SEARCH ---
# 3. Targeted Signal Search (The "Hunter" Phase) - Basierend auf Strategy
signal_evidence = []
# Firmographics Search
firmographics_results = serp_search(f"{company_name} Umsatz Mitarbeiterzahl 2023")
firmographics_context = "\n".join([f"- {r['snippet']} ({r['link']})" for r in firmographics_results])
# Signal Searches
# Signal Searches (Original Strategy)
signals = strategy.get('signals', [])
for signal in signals:
# Überspringe Signale, die wir schon durch die Tech-Suche massiv abgedeckt haben,
# es sei denn, sie sind sehr spezifisch.
if "incumbent" in signal['id'].lower() or "tech" in signal['id'].lower():
logger.info(f"Skipping generic signal search '{signal['name']}' in favor of Enhanced Tech Search.")
continue
proof_strategy = signal.get('proofStrategy', {})
query_template = proof_strategy.get('searchQueryTemplate')
search_context = ""
if query_template:
# Domain aus URL extrahieren für bessere Queries (z.B. site:firma.de)
domain = url.split("//")[-1].split("/")[0].replace("www.", "")
query = query_template.replace("{{COMPANY}}", company_name).replace("{{domain}}", domain)
try:
domain = url.split("//")[-1].split("/")[0].replace("www.", "")
except:
domain = ""
query = query_template.replace("{{COMPANY}}", company_name).replace("{COMPANY}", company_name)
query = query.replace("{{domain}}", domain).replace("{domain}", domain)
logger.info(f"Signal Search '{signal['name']}': {query}")
results = serp_search(query, num_results=3)
@@ -280,31 +433,39 @@ def analyze_company(company_name, strategy, target_market):
evidence_text = "\n\n".join(signal_evidence)
prompt = f"""
You are a B2B Market Intelligence Auditor.
Audit the company '{company_name}' ({url}) based on the collected evidence.
You are a Strategic B2B Sales Consultant.
Analyze the company '{company_name}' ({url}) to create a "best-of-breed" sales pitch strategy.
--- STRATEGY (Signals to find) ---
--- STRATEGY (What we are looking for) ---
{json.dumps(signals, indent=2)}
--- EVIDENCE SOURCE 1: HOMEPAGE CONTENT ---
{homepage_text[:10000]}
--- EVIDENCE 1: EXTERNAL TECH-STACK INTELLIGENCE (CRITICAL) ---
Look closely here for mentions of competitors like SAP Ariba, Jaggaer, SynerTrade, Coupa, etc.
{tech_evidence_text}
--- EVIDENCE SOURCE 2: FIRMOGRAPHICS SEARCH ---
--- EVIDENCE 2: HOMEPAGE CONTENT ---
{homepage_text[:8000]}
--- EVIDENCE 3: FIRMOGRAPHICS SEARCH ---
{firmographics_context}
--- EVIDENCE SOURCE 3: TARGETED SIGNAL SEARCH RESULTS ---
--- EVIDENCE 4: TARGETED SIGNAL SEARCH RESULTS ---
{evidence_text}
----------------------------------
TASK:
1. **Firmographics**: Estimate Revenue and Employees based on Source 1 & 2. Be realistic. Use buckets if unsure.
2. **Status**: Determine 'status' (Bestandskunde, Nutzt Wettbewerber, Greenfield, Unklar).
3. **Evaluate Signals**: For each signal, decide 'value' (Yes/No/Partial).
- **CRITICAL**: You MUST cite your source for the 'proof'.
- If found in Source 3 (Search), write: "Found in job posting/doc: [Snippet]" and include the URL.
- If found in Source 1 (Homepage), write: "On homepage: [Quote]".
- If not found, write: "Not found".
4. **Recommendation**: 1-sentence verdict.
1. **Firmographics**: Estimate Revenue and Employees.
2. **Technographic Audit**: Look for specific competitor software or legacy systems mentioned in EVIDENCE 1 (e.g., "Partner of SynerTrade", "Login to Jaggaer Portal").
3. **Status**:
- Set to "Nutzt Wettbewerber" if ANY competitor technology is found (Ariba, Jaggaer, SynerTrade, Coupa, etc.).
- Set to "Greenfield" ONLY if absolutely no competitor tech is found.
- Set to "Bestandskunde" if they already use our solution.
4. **Evaluate Signals**: For each signal, provide a "value" (Yes/No/Partial) and "proof".
5. **Recommendation (Pitch Strategy)**:
- DO NOT write a generic verdict.
- If they use a competitor (e.g., Ariba), explain how to position against it (e.g., "Pitch as a specialized add-on for logistics, filling Ariba's gaps").
- If Greenfield, explain the entry point.
- **Tone**: Strategic, insider-knowledge, specific.
STRICTLY output only JSON:
{{
@@ -326,21 +487,134 @@ def analyze_company(company_name, strategy, target_market):
}
try:
logger.info("Sende Audit-Anfrage an Gemini API...")
# logger.debug(f"Rohe Gemini API-Anfrage (JSON): {json.dumps(payload, indent=2)}")
response = requests.post(GEMINI_API_URL, json=payload, headers={'Content-Type': 'application/json'})
response.raise_for_status()
response_data = response.json()
response_text = response_data['candidates'][0]['content']['parts'][0]['text']
logger.info(f"Gemini API-Antwort erhalten (Status: {response.status_code}).")
if response_text.startswith('```json'):
response_text = response_text.split('```json')[1].split('```')[0].strip()
text = response_data['candidates'][0]['content']['parts'][0]['text']
result = _extract_json_from_text(text)
if not result:
raise ValueError("Konnte kein valides JSON extrahieren")
result = json.loads(response_text)
result['dataSource'] = "Digital Trace Audit (Deep Dive)" # Mark as verified
result['dataSource'] = "Digital Trace Audit (Deep Dive)"
logger.info(f"Audit für {company_name} erfolgreich abgeschlossen.")
return result
except Exception as e:
logger.error(f"Audit failed for {company_name}: {e}")
return {"error": str(e)}
return {
"companyName": company_name,
"status": "Unklar / Manuelle Prüfung",
"revenue": "Error",
"employees": "Error",
"tier": "Tier 3",
"dynamicAnalysis": {},
"recommendation": f"Audit failed due to API Error: {str(e)}",
"dataSource": "Error"
}
def generate_outreach_campaign(company_data_json, knowledge_base_content, reference_url):
"""
Erstellt personalisierte E-Mail-Kampagnen basierend auf Audit-Daten und einer strukturierten Wissensdatenbank.
Generiert spezifische Ansprachen für verschiedene Rollen (Personas).
"""
company_name = company_data_json.get('companyName', 'Unknown')
logger.info(f"--- STARTING ROLE-BASED OUTREACH GENERATION FOR: {company_name} ---")
api_key = load_gemini_api_key()
# Switch to stable 2.5-pro model
GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}"
prompt = f"""
You are a Strategic Key Account Manager and deeply technical Industry Insider.
Your goal is to write highly personalized, **operationally specific** outreach emails to the company '{company_name}'.
--- INPUT 1: YOUR IDENTITY & STRATEGY (The Sender) ---
The following Markdown contains your company's identity, products, and strategy.
You act as the sales representative for the company described here:
{knowledge_base_content}
--- INPUT 2: THE TARGET COMPANY (Audit Facts) ---
{json.dumps(company_data_json, indent=2)}
--- INPUT 3: THE REFERENCE CLIENT (Social Proof) ---
Reference Client URL: {reference_url}
CRITICAL: This 'Reference Client' is an existing happy customer of ours. They are the "Seed Company" used to find the Target Company (Lookalike).
You MUST mention this Reference Client by name (derive it from the URL, e.g., 'schindler.com' -> 'Schindler') to establish trust.
--- TASK ---
1. **Analyze**: Match the Target Company (Input 2) to the most relevant 'Zielbranche/Segment' from the Knowledge Base (Input 1).
2. **Select Roles**: Identify the top 2 most distinct and relevant 'Rollen' (Personas) from the Knowledge Base for this specific company situation.
- *Example:* If the audit says they use a competitor (risk of lock-in), select a role like "Strategic Purchaser" or "Head of R&D" who cares about "Second Source".
- *Example:* If they have quality issues or complex logistics, pick "Quality Manager" or "Logistics Head".
3. **Draft Campaigns**: For EACH of the 2 selected roles, write a 3-step email sequence.
--- TONE & STYLE GUIDELINES (CRITICAL) ---
- **Perspective:** Operational Expert & Insider. NOT generic marketing.
- **Be Gritty & Specific:** Do NOT use fluff like "optimize efficiency" or "streamline processes" without context.
- Use **hard, operational keywords** from the Knowledge Base (e.g., "ASNs", "VMI", "8D-Reports", "Maverick Buying", "Bandstillstand", "Sonderfahrten", "PPAP").
- Show you understand their daily pain.
- **Narrative Arc:**
1. "I noticed [Fact from Audit/Tech Stack]..." (e.g., "You rely on PDF orders via Jaggaer...")
2. "In [Industry], this often leads to [Operational Pain]..." (e.g., "missing ASNs causing delays at the hub.")
3. "We helped [Reference Client Name] solve exactly this by [Specific Solution]..."
4. "Let's discuss how to get [Operational Gain] without replacing your ERP."
- **Mandatory Social Proof:** You MUST mention the Reference Client Name (from Input 3) in the email body or footer.
- **Language:** German (as the inputs are German).
--- OUTPUT FORMAT (Strictly JSON) ---
Returns a list of campaigns.
[
{{
"target_role": "Name of the Role (e.g. Leiter F&E)",
"rationale": "Why this role? (e.g. Because the audit found dependency on Competitor X...)",
"emails": [
{{
"subject": "Specific Subject Line",
"body": "Email Body..."
}},
{{
"subject": "Re: Subject",
"body": "Follow-up Body..."
}},
{{
"subject": "Final Check",
"body": "Final Body..."
}}
]
}},
... (Second Role)
]
"""
payload = {
"contents": [{"parts": [{"text": prompt}]}],
"generationConfig": {"response_mime_type": "application/json"}
}
try:
logger.info("Sende Campaign-Anfrage an Gemini API...")
# logger.debug(f"Rohe Gemini API-Anfrage (JSON): {json.dumps(payload, indent=2)}")
response = requests.post(GEMINI_API_URL, json=payload, headers={'Content-Type': 'application/json'})
response.raise_for_status()
response_data = response.json()
logger.info(f"Gemini API-Antwort erhalten (Status: {response.status_code}).")
# logger.debug(f"Rohe API-Antwort (JSON): {json.dumps(response_data, indent=2)}")
text = response_data['candidates'][0]['content']['parts'][0]['text']
result = _extract_json_from_text(text)
if not result:
raise ValueError("Konnte kein valides JSON extrahieren")
return result
except Exception as e:
logger.error(f"Campaign generation failed for {company_name}: {e}")
return [{"error": str(e)}]
def main():
parser = argparse.ArgumentParser()
@@ -351,6 +625,7 @@ def main():
parser.add_argument("--company_name")
parser.add_argument("--strategy_json")
parser.add_argument("--summary_of_offer")
parser.add_argument("--company_data_file") # For generate_outreach
args = parser.parse_args()
if args.mode == "generate_strategy":
@@ -365,6 +640,11 @@ def main():
elif args.mode == "analyze_company":
strategy = json.loads(args.strategy_json)
print(json.dumps(analyze_company(args.company_name, strategy, args.target_market)))
elif args.mode == "generate_outreach":
with open(args.company_data_file, "r") as f: company_data = json.load(f)
with open(args.context_file, "r") as f: knowledge_base = f.read()
print(json.dumps(generate_outreach_campaign(company_data, knowledge_base, args.reference_url)))
if __name__ == "__main__":
main()

View File

@@ -706,4 +706,4 @@ Der Prozess für den Benutzer bleibt weitgehend gleich, ist aber technisch solid
**Schritt 4 & 5: Reporting & Personalisierte Ansprache**
- **Ergebnis-Darstellung:** Die faktenbasierten Analyseergebnisse werden im Frontend angezeigt.
- **Kampagnen-Generierung:** Die KI nutzt die validierten "Digitalen Signale" als Aufhänger, um hyper-personalisierte und extrem treffsichere E-Mail-Entwürfe zu erstellen.
- **Kampagnen-Generierung:** Die KI nutzt die validierten "Digitalen Signale" als Aufhänger, um hyper-personalisierte und extrem treffsichere E-Mail-Entwürfe zu erstellen. Dabei werden **operative Schmerzpunkte ("Grit")** und **Social Proof** (Referenzkunden) aggressiv genutzt, um Insider-Status zu demonstrieren.