feat(gtm): add aspect ratio & corporate design; fix(market): harden backend logging & json parsing

This commit is contained in:
2026-01-05 11:42:15 +00:00
parent 7cd90f77e3
commit aa1319856c
10 changed files with 241 additions and 169 deletions

View File

@@ -173,8 +173,8 @@ def _extract_json_from_text(text):
logger.error(f"JSON Parsing fehlgeschlagen. Roher Text: {text[:500]}...")
return None
def generate_search_strategy(reference_url, context_content):
logger.info(f"Generating strategy for {reference_url}")
def generate_search_strategy(reference_url, context_content, language='de'):
logger.info(f"Generating strategy for {reference_url} (Language: {language})")
api_key = load_gemini_api_key()
target_industries = _extract_target_industries_from_context(context_content)
@@ -186,6 +186,8 @@ def generate_search_strategy(reference_url, context_content):
# Switch to stable 2.5-pro model (which works for v1beta)
GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}"
lang_instruction = "GERMAN (Deutsch)" if language == 'de' else "ENGLISH"
prompt = f"""
You are a B2B Market Intelligence Architect.
@@ -223,6 +225,9 @@ def generate_search_strategy(reference_url, context_content):
- `searchQueryTemplate`: A Google search query to find this. Use `{{COMPANY}}` as a placeholder for the company name.
Example: `site:{{COMPANY}} "software engineer" OR "developer"`
--- LANGUAGE INSTRUCTION ---
IMPORTANT: The entire JSON content (descriptions, rationale, summaries) MUST be in {lang_instruction}. Translate if necessary.
--- OUTPUT FORMAT ---
Return ONLY a valid JSON object.
{{
@@ -267,12 +272,14 @@ def generate_search_strategy(reference_url, context_content):
"signals": []
}
def identify_competitors(reference_url, target_market, industries, summary_of_offer=None):
logger.info(f"Identifying competitors for {reference_url}")
def identify_competitors(reference_url, target_market, industries, summary_of_offer=None, language='de'):
logger.info(f"Identifying competitors for {reference_url} (Language: {language})")
api_key = load_gemini_api_key()
# Switch to stable 2.5-pro model
GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}"
lang_instruction = "GERMAN (Deutsch)" if language == 'de' else "ENGLISH"
prompt = f"""
You are a B2B Market Analyst. Find 3-5 direct competitors or highly similar companies (lookalikes) for the company at `{reference_url}`.
@@ -295,6 +302,9 @@ def identify_competitors(reference_url, target_market, industries, summary_of_of
- `name`: The official, full name of the company.
- `description`: A concise explanation of why they are a competitor.
--- LANGUAGE INSTRUCTION ---
IMPORTANT: The entire JSON content (descriptions) MUST be in {lang_instruction}.
--- OUTPUT FORMAT ---
Return ONLY a valid JSON object with the following structure:
{{
@@ -325,11 +335,14 @@ def identify_competitors(reference_url, target_market, industries, summary_of_of
logger.error(f"Competitor identification failed: {e}")
return {"localCompetitors": [], "nationalCompetitors": [], "internationalCompetitors": []}
def analyze_company(company_name, strategy, target_market):
logger.info(f"--- STARTING DEEP TECH AUDIT FOR: {company_name} ---")
def analyze_company(company_name, strategy, target_market, language='de'):
logger.info(f"--- STARTING DEEP TECH AUDIT FOR: {company_name} (Language: {language}) ---")
api_key = load_gemini_api_key()
GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}"
lang_instruction = "GERMAN (Deutsch)" if language == 'de' else "ENGLISH"
# ... (Rest of function logic remains same, just update prompt) ...
# 1. Website Finding (SerpAPI fallback to Gemini)
url = None
website_search_results = serp_search(f"{company_name} offizielle Website")
@@ -343,24 +356,16 @@ def analyze_company(company_name, strategy, target_market):
prompt_url = f"What is the official homepage URL for the company '{company_name}' in the market '{target_market}'? Respond with ONLY the single, complete URL and nothing else."
payload_url = {"contents": [{"parts": [{"text": prompt_url}]}]}
logger.info("Sende Anfrage an Gemini API (URL Fallback)...")
# logger.debug(f"Rohe Gemini API-Anfrage (JSON): {json.dumps(payload_url, indent=2)}")
try:
res = requests.post(GEMINI_API_URL, json=payload_url, headers={'Content-Type': 'application/json'}, timeout=15)
res.raise_for_status()
res_json = res.json()
logger.info(f"Gemini API-Antwort erhalten (Status: {res.status_code}).")
candidate = res_json.get('candidates', [{}])[0]
content = candidate.get('content', {}).get('parts', [{}])[0]
text_response = content.get('text', '').strip()
url_match = re.search(r'(https?://[^\s"]+)', text_response)
if url_match:
url = url_match.group(1)
logger.info(f"Gemini Fallback hat URL gefunden: {url}")
else:
logger.warning(f"Keine gültige URL in Gemini-Antwort gefunden: '{text_response}'")
except Exception as e:
logger.error(f"Gemini URL Fallback failed: {e}")
pass
@@ -368,7 +373,6 @@ def analyze_company(company_name, strategy, target_market):
if not url or not url.startswith("http"):
return {"error": f"Could not find website for {company_name}"}
# 2. Homepage Scraping with GRACEFUL FALLBACK
homepage_text = ""
scraping_note = ""
@@ -377,86 +381,48 @@ def analyze_company(company_name, strategy, target_market):
if scraped_content:
homepage_text = scraped_content
else:
homepage_text = "[WEBSITE ACCESS DENIED] - The audit must rely on external search signals (Tech Stack, Job Postings, News) as the homepage content is unavailable."
scraping_note = "(Website Content Unavailable - Analysis based on Digital Footprint)"
logger.warning(f"Audit continuing without website content for {company_name}")
homepage_text = "[WEBSITE ACCESS DENIED]"
scraping_note = "(Website Content Unavailable)"
else:
homepage_text = "No valid URL found. Analysis based on Name ONLY."
homepage_text = "No valid URL found."
scraping_note = "(No URL found)"
# --- ENHANCED: EXTERNAL TECHNOGRAPHIC INTELLIGENCE ---
# Suche aktiv nach Wettbewerbern, nicht nur auf der Firmenwebsite.
tech_evidence = []
# Liste bekannter Wettbewerber / Incumbents
known_incumbents = [
"SAP Ariba", "Jaggaer", "Coupa", "SynerTrade", "Ivalua",
"ServiceNow", "Salesforce", "Oracle SCM", "Zycus", "GEP",
"SupplyOn", "EcoVadis", "IntegrityNext"
]
# Suche 1: Direkte Verbindung zu Software-Anbietern (Case Studies, News, etc.)
# Wir bauen eine Query mit OR, um API-Calls zu sparen.
# Splitte in 2 Gruppen, um Query-Länge im Rahmen zu halten
known_incumbents = ["SAP Ariba", "Jaggaer", "Coupa", "SynerTrade", "Ivalua", "ServiceNow", "Salesforce", "Oracle SCM", "Zycus", "GEP", "SupplyOn", "EcoVadis", "IntegrityNext"]
half = len(known_incumbents) // 2
group1 = " OR ".join([f'"{inc}"' for inc in known_incumbents[:half]])
group2 = " OR ".join([f'"{inc}"' for inc in known_incumbents[half:]])
tech_queries = [f'"{company_name}" ({group1})', f'"{company_name}" ({group2})', f'"{company_name}" "supplier portal" login']
tech_queries = [
f'"{company_name}" ({group1})',
f'"{company_name}" ({group2})',
f'"{company_name}" "supplier portal" login' # Suche nach dem Portal selbst
]
logger.info(f"Starte erweiterte Tech-Stack-Suche für {company_name}...")
for q in tech_queries:
logger.info(f"Tech Search: {q}")
results = serp_search(q, num_results=4) # Etwas mehr Ergebnisse
results = serp_search(q, num_results=4)
if results:
for r in results:
tech_evidence.append(f"- Found: {r['title']}\n Snippet: {r['snippet']}\n Link: {r['link']}")
tech_evidence_text = "\n".join(tech_evidence)
# --- END ENHANCED TECH SEARCH ---
# 3. Targeted Signal Search (The "Hunter" Phase) - Basierend auf Strategy
signal_evidence = []
# Firmographics Search
firmographics_results = serp_search(f"{company_name} Umsatz Mitarbeiterzahl 2023")
firmographics_context = "\n".join([f"- {r['snippet']} ({r['link']})" for r in firmographics_results])
# Signal Searches (Original Strategy)
signals = strategy.get('signals', [])
for signal in signals:
# Überspringe Signale, die wir schon durch die Tech-Suche massiv abgedeckt haben,
# es sei denn, sie sind sehr spezifisch.
if "incumbent" in signal['id'].lower() or "tech" in signal['id'].lower():
logger.info(f"Skipping generic signal search '{signal['name']}' in favor of Enhanced Tech Search.")
continue
if "incumbent" in signal['id'].lower() or "tech" in signal['id'].lower(): continue
proof_strategy = signal.get('proofStrategy', {})
query_template = proof_strategy.get('searchQueryTemplate')
search_context = ""
if query_template:
try:
domain = url.split("//")[-1].split("/")[0].replace("www.", "")
except:
domain = ""
query = query_template.replace("{{COMPANY}}", company_name).replace("{COMPANY}", company_name)
query = query.replace("{{domain}}", domain).replace("{domain}", domain)
logger.info(f"Signal Search '{signal['name']}': {query}")
query = query_template.replace("{{COMPANY}}", company_name).replace("{COMPANY}", company_name).replace("{{domain}}", domain).replace("{domain}", domain)
results = serp_search(query, num_results=3)
if results:
search_context = "\n".join([f" * Snippet: {r['snippet']}\n Source: {r['link']}" for r in results])
if search_context:
signal_evidence.append(f"SIGNAL '{signal['name']}':\n{search_context}")
# 4. Final Analysis & Synthesis (The "Judge" Phase)
evidence_text = "\n\n".join(signal_evidence)
prompt = f"""
@@ -484,17 +450,18 @@ def analyze_company(company_name, strategy, target_market):
1. **Firmographics**: Estimate Revenue and Employees.
2. **Technographic Audit**: Look for specific competitor software or legacy systems mentioned in EVIDENCE 1 (e.g., "Partner of SynerTrade", "Login to Jaggaer Portal").
3. **Status**:
- Set to "Nutzt Wettbewerber" if ANY competitor technology is found (Ariba, Jaggaer, SynerTrade, Coupa, etc.).
- Set to "Nutzt Wettbewerber" if ANY competitor technology is found.
- Set to "Greenfield" ONLY if absolutely no competitor tech is found.
- Set to "Bestandskunde" if they already use our solution.
4. **Evaluate Signals**: For each signal, provide a "value" (Yes/No/Partial) and "proof".
- NOTE: If Homepage Content is unavailable, rely on Evidence 1, 3, and 4.
5. **Recommendation (Pitch Strategy)**:
- DO NOT write a generic verdict.
- If they use a competitor (e.g., Ariba), explain how to position against it (e.g., "Pitch as a specialized add-on for logistics, filling Ariba's gaps").
- If they use a competitor, explain how to position against it.
- If Greenfield, explain the entry point.
- **Tone**: Strategic, insider-knowledge, specific.
--- LANGUAGE INSTRUCTION ---
IMPORTANT: The entire JSON content (especially 'recommendation', 'proof', 'value') MUST be in {lang_instruction}.
STRICTLY output only JSON:
{{
"companyName": "{company_name}",
@@ -516,7 +483,6 @@ def analyze_company(company_name, strategy, target_market):
try:
logger.info("Sende Audit-Anfrage an Gemini API...")
# logger.debug(f"Rohe Gemini API-Anfrage (JSON): {json.dumps(payload, indent=2)}")
response = requests.post(GEMINI_API_URL, json=payload, headers={'Content-Type': 'application/json'})
response.raise_for_status()
response_data = response.json()
@@ -529,32 +495,32 @@ def analyze_company(company_name, strategy, target_market):
raise ValueError("Konnte kein valides JSON extrahieren")
result['dataSource'] = "Digital Trace Audit (Deep Dive)"
logger.info(f"Audit für {company_name} erfolgreich abgeschlossen.")
return result
except Exception as e:
logger.error(f"Audit failed for {company_name}: {e}")
return {
"companyName": company_name,
"status": "Unklar / Manuelle Prüfung",
"status": "Unklar",
"revenue": "Error",
"employees": "Error",
"tier": "Tier 3",
"dynamicAnalysis": {},
"recommendation": f"Audit failed due to API Error: {str(e)}",
"recommendation": f"Audit failed: {str(e)}",
"dataSource": "Error"
}
def generate_outreach_campaign(company_data_json, knowledge_base_content, reference_url, specific_role=None):
def generate_outreach_campaign(company_data_json, knowledge_base_content, reference_url, specific_role=None, language='de'):
"""
Erstellt personalisierte E-Mail-Kampagnen.
"""
company_name = company_data_json.get('companyName', 'Unknown')
logger.info(f"--- STARTING OUTREACH GENERATION FOR: {company_name} (Role: {specific_role if specific_role else 'Top 5'}) ---")
logger.info(f"--- STARTING OUTREACH GENERATION FOR: {company_name} (Role: {specific_role if specific_role else 'Top 5'}) [Lang: {language}] ---")
api_key = load_gemini_api_key()
# Back to high-quality 2.5-pro, but generating only 1 campaign to be fast
GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}"
lang_instruction = "GERMAN (Deutsch)" if language == 'de' else "ENGLISH"
if specific_role:
# --- MODE B: SINGLE ROLE GENERATION (On Demand) ---
task_description = f"""
@@ -573,7 +539,6 @@ def generate_outreach_campaign(company_data_json, knowledge_base_content, refere
"""
else:
# --- MODE A: INITIAL START (TOP 1 + SUGGESTIONS) ---
# We only generate 1 campaign to ensure the request finishes quickly (< 20s).
task_description = f"""
--- TASK ---
1. **Analyze**: Match the Target Company (Input 2) to the most relevant 'Zielbranche/Segment' from the Knowledge Base (Input 1).
@@ -616,12 +581,7 @@ def generate_outreach_campaign(company_data_json, knowledge_base_content, refere
--- TONE & STYLE GUIDELINES (CRITICAL) ---
- **Perspective:** Operational Expert & Insider. NOT generic marketing.
- **Be Gritty & Specific:** Use hard, operational keywords from the Knowledge Base (e.g., "ASNs", "8D-Reports").
- **Narrative Arc:**
1. "I noticed [Fact from Audit]..."
2. "In [Industry], this often leads to [Pain]..."
3. "We helped [Reference Client] solve this..."
4. "Let's discuss [Gain]."
- **Language:** German.
- **Language:** {lang_instruction}.
{output_format}
"""
@@ -659,26 +619,36 @@ def main():
parser.add_argument("--strategy_json")
parser.add_argument("--summary_of_offer")
parser.add_argument("--company_data_file")
parser.add_argument("--specific_role") # New argument
parser.add_argument("--specific_role")
parser.add_argument("--language", default="de") # New Argument
args = parser.parse_args()
if args.mode == "generate_strategy":
with open(args.context_file, "r") as f: context = f.read()
print(json.dumps(generate_search_strategy(args.reference_url, context)))
print(json.dumps(generate_search_strategy(args.reference_url, context, args.language)))
elif args.mode == "identify_competitors":
industries = []
if args.context_file:
with open(args.context_file, "r") as f: context = f.read()
industries = _extract_target_industries_from_context(context)
print(json.dumps(identify_competitors(args.reference_url, args.target_market, industries, args.summary_of_offer)))
print(json.dumps(identify_competitors(args.reference_url, args.target_market, industries, args.summary_of_offer, args.language)))
elif args.mode == "analyze_company":
strategy = json.loads(args.strategy_json)
print(json.dumps(analyze_company(args.company_name, strategy, args.target_market)))
print(json.dumps(analyze_company(args.company_name, strategy, args.target_market, args.language)))
elif args.mode == "generate_outreach":
with open(args.company_data_file, "r") as f: company_data = json.load(f)
with open(args.context_file, "r") as f: knowledge_base = f.read()
print(json.dumps(generate_outreach_campaign(company_data, knowledge_base, args.reference_url, args.specific_role)))
print(json.dumps(generate_outreach_campaign(company_data, knowledge_base, args.reference_url, args.specific_role, args.language)))
if __name__ == "__main__":
main()
sys.stdout.reconfigure(encoding='utf-8')
try:
main()
sys.stdout.flush()
except Exception as e:
logger.critical(f"Unhandled Exception in Main: {e}", exc_info=True)
# Fallback JSON output so the server doesn't crash on parse error
error_json = json.dumps({"error": f"Critical Script Error: {str(e)}", "details": "Check market_intel.log"})
print(error_json)
sys.exit(1)