feat(gtm): add aspect ratio & corporate design; fix(market): harden backend logging & json parsing
This commit is contained in:
@@ -173,8 +173,8 @@ def _extract_json_from_text(text):
|
||||
logger.error(f"JSON Parsing fehlgeschlagen. Roher Text: {text[:500]}...")
|
||||
return None
|
||||
|
||||
def generate_search_strategy(reference_url, context_content):
|
||||
logger.info(f"Generating strategy for {reference_url}")
|
||||
def generate_search_strategy(reference_url, context_content, language='de'):
|
||||
logger.info(f"Generating strategy for {reference_url} (Language: {language})")
|
||||
api_key = load_gemini_api_key()
|
||||
target_industries = _extract_target_industries_from_context(context_content)
|
||||
|
||||
@@ -186,6 +186,8 @@ def generate_search_strategy(reference_url, context_content):
|
||||
# Switch to stable 2.5-pro model (which works for v1beta)
|
||||
GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}"
|
||||
|
||||
lang_instruction = "GERMAN (Deutsch)" if language == 'de' else "ENGLISH"
|
||||
|
||||
prompt = f"""
|
||||
You are a B2B Market Intelligence Architect.
|
||||
|
||||
@@ -223,6 +225,9 @@ def generate_search_strategy(reference_url, context_content):
|
||||
- `searchQueryTemplate`: A Google search query to find this. Use `{{COMPANY}}` as a placeholder for the company name.
|
||||
Example: `site:{{COMPANY}} "software engineer" OR "developer"`
|
||||
|
||||
--- LANGUAGE INSTRUCTION ---
|
||||
IMPORTANT: The entire JSON content (descriptions, rationale, summaries) MUST be in {lang_instruction}. Translate if necessary.
|
||||
|
||||
--- OUTPUT FORMAT ---
|
||||
Return ONLY a valid JSON object.
|
||||
{{
|
||||
@@ -267,12 +272,14 @@ def generate_search_strategy(reference_url, context_content):
|
||||
"signals": []
|
||||
}
|
||||
|
||||
def identify_competitors(reference_url, target_market, industries, summary_of_offer=None):
|
||||
logger.info(f"Identifying competitors for {reference_url}")
|
||||
def identify_competitors(reference_url, target_market, industries, summary_of_offer=None, language='de'):
|
||||
logger.info(f"Identifying competitors for {reference_url} (Language: {language})")
|
||||
api_key = load_gemini_api_key()
|
||||
# Switch to stable 2.5-pro model
|
||||
GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}"
|
||||
|
||||
lang_instruction = "GERMAN (Deutsch)" if language == 'de' else "ENGLISH"
|
||||
|
||||
prompt = f"""
|
||||
You are a B2B Market Analyst. Find 3-5 direct competitors or highly similar companies (lookalikes) for the company at `{reference_url}`.
|
||||
|
||||
@@ -295,6 +302,9 @@ def identify_competitors(reference_url, target_market, industries, summary_of_of
|
||||
- `name`: The official, full name of the company.
|
||||
- `description`: A concise explanation of why they are a competitor.
|
||||
|
||||
--- LANGUAGE INSTRUCTION ---
|
||||
IMPORTANT: The entire JSON content (descriptions) MUST be in {lang_instruction}.
|
||||
|
||||
--- OUTPUT FORMAT ---
|
||||
Return ONLY a valid JSON object with the following structure:
|
||||
{{
|
||||
@@ -325,11 +335,14 @@ def identify_competitors(reference_url, target_market, industries, summary_of_of
|
||||
logger.error(f"Competitor identification failed: {e}")
|
||||
return {"localCompetitors": [], "nationalCompetitors": [], "internationalCompetitors": []}
|
||||
|
||||
def analyze_company(company_name, strategy, target_market):
|
||||
logger.info(f"--- STARTING DEEP TECH AUDIT FOR: {company_name} ---")
|
||||
def analyze_company(company_name, strategy, target_market, language='de'):
|
||||
logger.info(f"--- STARTING DEEP TECH AUDIT FOR: {company_name} (Language: {language}) ---")
|
||||
api_key = load_gemini_api_key()
|
||||
GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}"
|
||||
|
||||
lang_instruction = "GERMAN (Deutsch)" if language == 'de' else "ENGLISH"
|
||||
|
||||
# ... (Rest of function logic remains same, just update prompt) ...
|
||||
# 1. Website Finding (SerpAPI fallback to Gemini)
|
||||
url = None
|
||||
website_search_results = serp_search(f"{company_name} offizielle Website")
|
||||
@@ -343,24 +356,16 @@ def analyze_company(company_name, strategy, target_market):
|
||||
prompt_url = f"What is the official homepage URL for the company '{company_name}' in the market '{target_market}'? Respond with ONLY the single, complete URL and nothing else."
|
||||
payload_url = {"contents": [{"parts": [{"text": prompt_url}]}]}
|
||||
logger.info("Sende Anfrage an Gemini API (URL Fallback)...")
|
||||
# logger.debug(f"Rohe Gemini API-Anfrage (JSON): {json.dumps(payload_url, indent=2)}")
|
||||
try:
|
||||
res = requests.post(GEMINI_API_URL, json=payload_url, headers={'Content-Type': 'application/json'}, timeout=15)
|
||||
res.raise_for_status()
|
||||
res_json = res.json()
|
||||
logger.info(f"Gemini API-Antwort erhalten (Status: {res.status_code}).")
|
||||
|
||||
candidate = res_json.get('candidates', [{}])[0]
|
||||
content = candidate.get('content', {}).get('parts', [{}])[0]
|
||||
text_response = content.get('text', '').strip()
|
||||
|
||||
url_match = re.search(r'(https?://[^\s"]+)', text_response)
|
||||
if url_match:
|
||||
url = url_match.group(1)
|
||||
logger.info(f"Gemini Fallback hat URL gefunden: {url}")
|
||||
else:
|
||||
logger.warning(f"Keine gültige URL in Gemini-Antwort gefunden: '{text_response}'")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Gemini URL Fallback failed: {e}")
|
||||
pass
|
||||
@@ -368,7 +373,6 @@ def analyze_company(company_name, strategy, target_market):
|
||||
if not url or not url.startswith("http"):
|
||||
return {"error": f"Could not find website for {company_name}"}
|
||||
|
||||
# 2. Homepage Scraping with GRACEFUL FALLBACK
|
||||
homepage_text = ""
|
||||
scraping_note = ""
|
||||
|
||||
@@ -377,86 +381,48 @@ def analyze_company(company_name, strategy, target_market):
|
||||
if scraped_content:
|
||||
homepage_text = scraped_content
|
||||
else:
|
||||
homepage_text = "[WEBSITE ACCESS DENIED] - The audit must rely on external search signals (Tech Stack, Job Postings, News) as the homepage content is unavailable."
|
||||
scraping_note = "(Website Content Unavailable - Analysis based on Digital Footprint)"
|
||||
logger.warning(f"Audit continuing without website content for {company_name}")
|
||||
homepage_text = "[WEBSITE ACCESS DENIED]"
|
||||
scraping_note = "(Website Content Unavailable)"
|
||||
else:
|
||||
homepage_text = "No valid URL found. Analysis based on Name ONLY."
|
||||
homepage_text = "No valid URL found."
|
||||
scraping_note = "(No URL found)"
|
||||
|
||||
# --- ENHANCED: EXTERNAL TECHNOGRAPHIC INTELLIGENCE ---
|
||||
# Suche aktiv nach Wettbewerbern, nicht nur auf der Firmenwebsite.
|
||||
tech_evidence = []
|
||||
|
||||
# Liste bekannter Wettbewerber / Incumbents
|
||||
known_incumbents = [
|
||||
"SAP Ariba", "Jaggaer", "Coupa", "SynerTrade", "Ivalua",
|
||||
"ServiceNow", "Salesforce", "Oracle SCM", "Zycus", "GEP",
|
||||
"SupplyOn", "EcoVadis", "IntegrityNext"
|
||||
]
|
||||
|
||||
# Suche 1: Direkte Verbindung zu Software-Anbietern (Case Studies, News, etc.)
|
||||
# Wir bauen eine Query mit OR, um API-Calls zu sparen.
|
||||
# Splitte in 2 Gruppen, um Query-Länge im Rahmen zu halten
|
||||
known_incumbents = ["SAP Ariba", "Jaggaer", "Coupa", "SynerTrade", "Ivalua", "ServiceNow", "Salesforce", "Oracle SCM", "Zycus", "GEP", "SupplyOn", "EcoVadis", "IntegrityNext"]
|
||||
half = len(known_incumbents) // 2
|
||||
group1 = " OR ".join([f'"{inc}"' for inc in known_incumbents[:half]])
|
||||
group2 = " OR ".join([f'"{inc}"' for inc in known_incumbents[half:]])
|
||||
tech_queries = [f'"{company_name}" ({group1})', f'"{company_name}" ({group2})', f'"{company_name}" "supplier portal" login']
|
||||
|
||||
tech_queries = [
|
||||
f'"{company_name}" ({group1})',
|
||||
f'"{company_name}" ({group2})',
|
||||
f'"{company_name}" "supplier portal" login' # Suche nach dem Portal selbst
|
||||
]
|
||||
|
||||
logger.info(f"Starte erweiterte Tech-Stack-Suche für {company_name}...")
|
||||
for q in tech_queries:
|
||||
logger.info(f"Tech Search: {q}")
|
||||
results = serp_search(q, num_results=4) # Etwas mehr Ergebnisse
|
||||
results = serp_search(q, num_results=4)
|
||||
if results:
|
||||
for r in results:
|
||||
tech_evidence.append(f"- Found: {r['title']}\n Snippet: {r['snippet']}\n Link: {r['link']}")
|
||||
|
||||
tech_evidence_text = "\n".join(tech_evidence)
|
||||
# --- END ENHANCED TECH SEARCH ---
|
||||
|
||||
# 3. Targeted Signal Search (The "Hunter" Phase) - Basierend auf Strategy
|
||||
signal_evidence = []
|
||||
|
||||
# Firmographics Search
|
||||
firmographics_results = serp_search(f"{company_name} Umsatz Mitarbeiterzahl 2023")
|
||||
firmographics_context = "\n".join([f"- {r['snippet']} ({r['link']})" for r in firmographics_results])
|
||||
|
||||
# Signal Searches (Original Strategy)
|
||||
signals = strategy.get('signals', [])
|
||||
for signal in signals:
|
||||
# Überspringe Signale, die wir schon durch die Tech-Suche massiv abgedeckt haben,
|
||||
# es sei denn, sie sind sehr spezifisch.
|
||||
if "incumbent" in signal['id'].lower() or "tech" in signal['id'].lower():
|
||||
logger.info(f"Skipping generic signal search '{signal['name']}' in favor of Enhanced Tech Search.")
|
||||
continue
|
||||
|
||||
if "incumbent" in signal['id'].lower() or "tech" in signal['id'].lower(): continue
|
||||
proof_strategy = signal.get('proofStrategy', {})
|
||||
query_template = proof_strategy.get('searchQueryTemplate')
|
||||
|
||||
search_context = ""
|
||||
if query_template:
|
||||
try:
|
||||
domain = url.split("//")[-1].split("/")[0].replace("www.", "")
|
||||
except:
|
||||
domain = ""
|
||||
|
||||
query = query_template.replace("{{COMPANY}}", company_name).replace("{COMPANY}", company_name)
|
||||
query = query.replace("{{domain}}", domain).replace("{domain}", domain)
|
||||
|
||||
logger.info(f"Signal Search '{signal['name']}': {query}")
|
||||
query = query_template.replace("{{COMPANY}}", company_name).replace("{COMPANY}", company_name).replace("{{domain}}", domain).replace("{domain}", domain)
|
||||
results = serp_search(query, num_results=3)
|
||||
if results:
|
||||
search_context = "\n".join([f" * Snippet: {r['snippet']}\n Source: {r['link']}" for r in results])
|
||||
|
||||
if search_context:
|
||||
signal_evidence.append(f"SIGNAL '{signal['name']}':\n{search_context}")
|
||||
|
||||
# 4. Final Analysis & Synthesis (The "Judge" Phase)
|
||||
evidence_text = "\n\n".join(signal_evidence)
|
||||
|
||||
prompt = f"""
|
||||
@@ -484,17 +450,18 @@ def analyze_company(company_name, strategy, target_market):
|
||||
1. **Firmographics**: Estimate Revenue and Employees.
|
||||
2. **Technographic Audit**: Look for specific competitor software or legacy systems mentioned in EVIDENCE 1 (e.g., "Partner of SynerTrade", "Login to Jaggaer Portal").
|
||||
3. **Status**:
|
||||
- Set to "Nutzt Wettbewerber" if ANY competitor technology is found (Ariba, Jaggaer, SynerTrade, Coupa, etc.).
|
||||
- Set to "Nutzt Wettbewerber" if ANY competitor technology is found.
|
||||
- Set to "Greenfield" ONLY if absolutely no competitor tech is found.
|
||||
- Set to "Bestandskunde" if they already use our solution.
|
||||
4. **Evaluate Signals**: For each signal, provide a "value" (Yes/No/Partial) and "proof".
|
||||
- NOTE: If Homepage Content is unavailable, rely on Evidence 1, 3, and 4.
|
||||
5. **Recommendation (Pitch Strategy)**:
|
||||
- DO NOT write a generic verdict.
|
||||
- If they use a competitor (e.g., Ariba), explain how to position against it (e.g., "Pitch as a specialized add-on for logistics, filling Ariba's gaps").
|
||||
- If they use a competitor, explain how to position against it.
|
||||
- If Greenfield, explain the entry point.
|
||||
- **Tone**: Strategic, insider-knowledge, specific.
|
||||
|
||||
--- LANGUAGE INSTRUCTION ---
|
||||
IMPORTANT: The entire JSON content (especially 'recommendation', 'proof', 'value') MUST be in {lang_instruction}.
|
||||
|
||||
STRICTLY output only JSON:
|
||||
{{
|
||||
"companyName": "{company_name}",
|
||||
@@ -516,7 +483,6 @@ def analyze_company(company_name, strategy, target_market):
|
||||
|
||||
try:
|
||||
logger.info("Sende Audit-Anfrage an Gemini API...")
|
||||
# logger.debug(f"Rohe Gemini API-Anfrage (JSON): {json.dumps(payload, indent=2)}")
|
||||
response = requests.post(GEMINI_API_URL, json=payload, headers={'Content-Type': 'application/json'})
|
||||
response.raise_for_status()
|
||||
response_data = response.json()
|
||||
@@ -529,32 +495,32 @@ def analyze_company(company_name, strategy, target_market):
|
||||
raise ValueError("Konnte kein valides JSON extrahieren")
|
||||
|
||||
result['dataSource'] = "Digital Trace Audit (Deep Dive)"
|
||||
logger.info(f"Audit für {company_name} erfolgreich abgeschlossen.")
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"Audit failed for {company_name}: {e}")
|
||||
return {
|
||||
"companyName": company_name,
|
||||
"status": "Unklar / Manuelle Prüfung",
|
||||
"status": "Unklar",
|
||||
"revenue": "Error",
|
||||
"employees": "Error",
|
||||
"tier": "Tier 3",
|
||||
"dynamicAnalysis": {},
|
||||
"recommendation": f"Audit failed due to API Error: {str(e)}",
|
||||
"recommendation": f"Audit failed: {str(e)}",
|
||||
"dataSource": "Error"
|
||||
}
|
||||
|
||||
def generate_outreach_campaign(company_data_json, knowledge_base_content, reference_url, specific_role=None):
|
||||
def generate_outreach_campaign(company_data_json, knowledge_base_content, reference_url, specific_role=None, language='de'):
|
||||
"""
|
||||
Erstellt personalisierte E-Mail-Kampagnen.
|
||||
"""
|
||||
company_name = company_data_json.get('companyName', 'Unknown')
|
||||
logger.info(f"--- STARTING OUTREACH GENERATION FOR: {company_name} (Role: {specific_role if specific_role else 'Top 5'}) ---")
|
||||
logger.info(f"--- STARTING OUTREACH GENERATION FOR: {company_name} (Role: {specific_role if specific_role else 'Top 5'}) [Lang: {language}] ---")
|
||||
|
||||
api_key = load_gemini_api_key()
|
||||
# Back to high-quality 2.5-pro, but generating only 1 campaign to be fast
|
||||
GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}"
|
||||
|
||||
lang_instruction = "GERMAN (Deutsch)" if language == 'de' else "ENGLISH"
|
||||
|
||||
if specific_role:
|
||||
# --- MODE B: SINGLE ROLE GENERATION (On Demand) ---
|
||||
task_description = f"""
|
||||
@@ -573,7 +539,6 @@ def generate_outreach_campaign(company_data_json, knowledge_base_content, refere
|
||||
"""
|
||||
else:
|
||||
# --- MODE A: INITIAL START (TOP 1 + SUGGESTIONS) ---
|
||||
# We only generate 1 campaign to ensure the request finishes quickly (< 20s).
|
||||
task_description = f"""
|
||||
--- TASK ---
|
||||
1. **Analyze**: Match the Target Company (Input 2) to the most relevant 'Zielbranche/Segment' from the Knowledge Base (Input 1).
|
||||
@@ -616,12 +581,7 @@ def generate_outreach_campaign(company_data_json, knowledge_base_content, refere
|
||||
--- TONE & STYLE GUIDELINES (CRITICAL) ---
|
||||
- **Perspective:** Operational Expert & Insider. NOT generic marketing.
|
||||
- **Be Gritty & Specific:** Use hard, operational keywords from the Knowledge Base (e.g., "ASNs", "8D-Reports").
|
||||
- **Narrative Arc:**
|
||||
1. "I noticed [Fact from Audit]..."
|
||||
2. "In [Industry], this often leads to [Pain]..."
|
||||
3. "We helped [Reference Client] solve this..."
|
||||
4. "Let's discuss [Gain]."
|
||||
- **Language:** German.
|
||||
- **Language:** {lang_instruction}.
|
||||
|
||||
{output_format}
|
||||
"""
|
||||
@@ -659,26 +619,36 @@ def main():
|
||||
parser.add_argument("--strategy_json")
|
||||
parser.add_argument("--summary_of_offer")
|
||||
parser.add_argument("--company_data_file")
|
||||
parser.add_argument("--specific_role") # New argument
|
||||
parser.add_argument("--specific_role")
|
||||
parser.add_argument("--language", default="de") # New Argument
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.mode == "generate_strategy":
|
||||
with open(args.context_file, "r") as f: context = f.read()
|
||||
print(json.dumps(generate_search_strategy(args.reference_url, context)))
|
||||
print(json.dumps(generate_search_strategy(args.reference_url, context, args.language)))
|
||||
elif args.mode == "identify_competitors":
|
||||
industries = []
|
||||
if args.context_file:
|
||||
with open(args.context_file, "r") as f: context = f.read()
|
||||
industries = _extract_target_industries_from_context(context)
|
||||
print(json.dumps(identify_competitors(args.reference_url, args.target_market, industries, args.summary_of_offer)))
|
||||
print(json.dumps(identify_competitors(args.reference_url, args.target_market, industries, args.summary_of_offer, args.language)))
|
||||
elif args.mode == "analyze_company":
|
||||
strategy = json.loads(args.strategy_json)
|
||||
print(json.dumps(analyze_company(args.company_name, strategy, args.target_market)))
|
||||
print(json.dumps(analyze_company(args.company_name, strategy, args.target_market, args.language)))
|
||||
elif args.mode == "generate_outreach":
|
||||
with open(args.company_data_file, "r") as f: company_data = json.load(f)
|
||||
with open(args.context_file, "r") as f: knowledge_base = f.read()
|
||||
print(json.dumps(generate_outreach_campaign(company_data, knowledge_base, args.reference_url, args.specific_role)))
|
||||
print(json.dumps(generate_outreach_campaign(company_data, knowledge_base, args.reference_url, args.specific_role, args.language)))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
sys.stdout.reconfigure(encoding='utf-8')
|
||||
try:
|
||||
main()
|
||||
sys.stdout.flush()
|
||||
except Exception as e:
|
||||
logger.critical(f"Unhandled Exception in Main: {e}", exc_info=True)
|
||||
# Fallback JSON output so the server doesn't crash on parse error
|
||||
error_json = json.dumps({"error": f"Critical Script Error: {str(e)}", "details": "Check market_intel.log"})
|
||||
print(error_json)
|
||||
sys.exit(1)
|
||||
Reference in New Issue
Block a user