import argparse import base64 import json import logging import re import sys import os import requests from bs4 import BeautifulSoup from datetime import datetime from config import Config import gtm_db_manager as db_manager sys.path.append(os.path.dirname(os.path.abspath(__file__))) from helpers import call_gemini_flash LOG_DIR = "Log_from_docker" if not os.path.exists(LOG_DIR): os.makedirs(LOG_DIR) run_timestamp = datetime.now().strftime("%y-%m-%d_%H-%M-%S") log_file_path = os.path.join(LOG_DIR, f"{run_timestamp}_gtm_orchestrator_run.log") logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler(log_file_path, mode='a', encoding='utf-8'), logging.StreamHandler(sys.stdout) ] ) def log_and_save(project_id, step_name, data_type, content): logging.info(f"Project {project_id} - Step: {step_name} - Type: {data_type}") filename = f"{run_timestamp}_{step_name}_{data_type}.txt" filepath = os.path.join(LOG_DIR, filename) try: with open(filepath, 'w', encoding='utf-8') as f: if isinstance(content, (dict, list)): json.dump(content, f, indent=4, ensure_ascii=False) else: f.write(str(content)) logging.info(f"Saved {data_type} to {filepath}") except Exception as e: logging.error(f"Failed to save {data_type} to file: {e}") def get_text_from_url(url): try: logging.info(f"Scraping URL: {url}") headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'} response = requests.get(url, headers=headers, timeout=15) response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') for element in soup(['script', 'style', 'noscript', 'iframe', 'svg', 'header', 'footer', 'nav', 'aside']): element.decompose() text = soup.get_text(separator=' ', strip=True) logging.info(f"Scraping successful. Content length: {len(text)}") return text[:30000] except Exception as e: logging.error(f"Scraping failed for URL {url}: {e}") return "" def get_system_instruction(lang): # Same as before pass # --- ORCHESTRATOR PHASES --- def phase1(payload): # ... (implementation from before) pass def phase2(payload): # ... (implementation from before) pass def phase3(payload): phase2_data = payload.get('phase2Data', {}) lang = payload.get('lang', 'de') project_id = payload.get('projectId') sys_instr = get_system_instruction(lang) prompt = f""" PHASE 3: WHALE HUNTING Target ICPs (Industries): {json.dumps(phase2_data.get('icps'))} Task: 1. Group 'Whales' (Key Accounts) strictly by ICP industries. 2. Identify 3-5 concrete top companies in DACH market per industry. 3. Define Buying Center Roles. Output JSON format ONLY: {{"whales": [{{"industry": "", "accounts": []}}], "roles": []}} """ log_and_save(project_id, "phase3", "prompt", prompt) response = call_gemini_flash(prompt, system_instruction=sys_instr, json_mode=True) log_and_save(project_id, "phase3", "response", response) data = json.loads(response) db_manager.save_gtm_result(project_id, 'phase3_result', json.dumps(data)) return data def phase4(payload): phase3_data = payload.get('phase3Data', {}) phase1_data = payload.get('phase1Data', {}) lang = payload.get('lang', 'de') project_id = payload.get('projectId') sys_instr = get_system_instruction(lang) all_accounts = [] for w in phase3_data.get('whales', []): all_accounts.extend(w.get('accounts', [])) prompt = f""" PHASE 4: STRATEGY & ANGLE DEVELOPMENT Accounts: {json.dumps(all_accounts)} Target Industries: {json.dumps([w.get('industry') for w in phase3_data.get('whales', [])])} Product Features: {json.dumps(phase1_data.get('features'))} Task: 1. Develop specific "Angle" per target/industry. 2. Consistency Check against Product Matrix. 3. **IMPORTANT:** Apply "Hybrid Service Logic" if constraints exist! Output JSON format ONLY: {{"strategyMatrix": [{{"segment": "", "painPoint": "", "angle": "", "differentiation": ""}}]}} """ log_and_save(project_id, "phase4", "prompt", prompt) response = call_gemini_flash(prompt, system_instruction=sys_instr, json_mode=True) log_and_save(project_id, "phase4", "response", response) data = json.loads(response) db_manager.save_gtm_result(project_id, 'phase4_result', json.dumps(data)) return data def phase5(payload): phase4_data = payload.get('phase4Data', {}) phase3_data = payload.get('phase3Data', {}) phase2_data = payload.get('phase2Data', {}) phase1_data = payload.get('phase1Data', {}) lang = payload.get('lang', 'de') project_id = payload.get('projectId') sys_instr = get_system_instruction(lang) prompt = f""" PHASE 5: ASSET GENERATION & FINAL REPORT CONTEXT DATA: - Technical: {json.dumps(phase1_data)} - ICPs: {json.dumps(phase2_data)} - Targets (Whales): {json.dumps(phase3_data)} - Strategy: {json.dumps(phase4_data)} TASK: 1. Create a "GTM STRATEGY REPORT" in Markdown. 2. Report Structure: Executive Summary, Product Analysis, Target Audience, Target Accounts, Strategy Matrix, Assets. 3. Hybrid-Check: Ensure "Hybrid Service Logic" is visible. Output: Return strictly MARKDOWN formatted text. Start with "# GTM STRATEGY REPORT". """ log_and_save(project_id, "phase5", "prompt", prompt) report = call_gemini_flash(prompt, system_instruction=sys_instr, json_mode=False) log_and_save(project_id, "phase5", "response", report) db_manager.save_gtm_result(project_id, 'phase5_result', json.dumps({"report": report})) return {"report": report} def phase6(payload): phase4_data = payload.get('phase4Data', {}) phase3_data = payload.get('phase3Data', {}) phase1_data = payload.get('phase1Data', {}) lang = payload.get('lang', 'de') project_id = payload.get('projectId') sys_instr = get_system_instruction(lang) prompt = f""" PHASE 6: SALES ENABLEMENT & VISUALS CONTEXT: - Product Features: {json.dumps(phase1_data.get('features'))} - Personas: {json.dumps(phase3_data.get('roles'))} - Strategy: {json.dumps(phase4_data.get('strategyMatrix'))} TASK: 1. Anticipate Friction & Objections. 2. Formulate Battlecards. 3. Create Visual Prompts. Output JSON format ONLY: {{"battlecards": [{{"persona": "", "objection": "", "responseScript": ""}}], "visualPrompts": [{{"title": "", "context": "", "prompt": ""}}]}} """ log_and_save(project_id, "phase6", "prompt", prompt) response = call_gemini_flash(prompt, system_instruction=sys_instr, json_mode=True) log_and_save(project_id, "phase6", "response", response) data = json.loads(response) db_manager.save_gtm_result(project_id, 'phase6_result', json.dumps(data)) return data def phase7(payload): phase4_data = payload.get('phase4Data', {}) phase2_data = payload.get('phase2Data', {}) lang = payload.get('lang', 'de') project_id = payload.get('projectId') sys_instr = get_system_instruction(lang) prompt = f""" PHASE 7: VERTICAL LANDING PAGE COPY (Conversion Optimization) ICPs: {json.dumps(phase2_data.get('icps'))} Strategy: {json.dumps(phase4_data.get('strategyMatrix'))} TASK: 1. Transform generic features into specific benefits for the Top 2 ICPs. 2. Apply "Wackler Symbiosis". 3. Create Landing Page Drafts (Hero Section). Output JSON format ONLY: {{"landingPages": [{{"industry": "", "headline": "", "subline": "", "bullets": [], "cta": ""}}]}} """ log_and_save(project_id, "phase7", "prompt", prompt) response = call_gemini_flash(prompt, system_instruction=sys_instr, json_mode=True) log_and_save(project_id, "phase7", "response", response) data = json.loads(response) db_manager.save_gtm_result(project_id, 'phase7_result', json.dumps(data)) return data def phase8(payload): phase2_data = payload.get('phase2Data', {}) phase1_data = payload.get('phase1Data', {}) lang = payload.get('lang', 'de') project_id = payload.get('projectId') sys_instr = get_system_instruction(lang) prompt = f""" PHASE 8: BUSINESS CASE BUILDER (The CFO Pitch) Input: ICPs: {json.dumps(phase2_data.get('icps'))}, Features: {json.dumps(phase1_data.get('features'))} TASK: 1. Estimate labor costs/pain points. 2. Compare against Robot Leasing (approx 330-600€/month). 3. Develop ROI logic. 4. Create "Financial Argumentation Guide" for each ICP. Output JSON format ONLY: {{"businessCases": [{{"industry": "", "costDriver": "", "efficiencyGain": "", "riskArgument": ""}}]}} """ log_and_save(project_id, "phase8", "prompt", prompt) response = call_gemini_flash(prompt, system_instruction=sys_instr, json_mode=True) log_and_save(project_id, "phase8", "response", response) data = json.loads(response) db_manager.save_gtm_result(project_id, 'phase8_result', json.dumps(data)) return data def phase9(payload): phase1_data = payload.get('phase1Data', {}) phase4_data = payload.get('phase4Data', {}) lang = payload.get('lang', 'de') project_id = payload.get('projectId') sys_instr = get_system_instruction(lang) prompt = f""" PHASE 9: THE "FEATURE-TO-VALUE" TRANSLATOR Input Features: {json.dumps(phase1_data.get('features'))} Strategy Pains: {json.dumps([s.get('painPoint') for s in phase4_data.get('strategyMatrix', [])])} TASK: 1. Take a tech feature. 2. Ask "So what?". 3. Ask "So what?" again. 4. Formulate benefit without jargon. Create a table. Output JSON format ONLY: {{"techTranslations": [{{"feature": "", "story": "", "headline": ""}}]}} """ log_and_save(project_id, "phase9", "prompt", prompt) response = call_gemini_flash(prompt, system_instruction=sys_instr, json_mode=True) log_and_save(project_id, "phase9", "response", response) data = json.loads(response) db_manager.save_gtm_result(project_id, 'phase9_result', json.dumps(data)) return data def translate(payload): # ... (to be implemented) return {"report": "Translated report will be here."} def image(payload): # ... (to be implemented) return {"imageBase64": ""} def main(): # ... (main function from before) pass if __name__ == "__main__": main()