feat(gtm): Implement Meta-Framework for strategic analysis
Refactors the GTM orchestrator prompts (phases 2-9) to use a question-based strategic framework derived from the internal marketing blueprint. This new 'Meta-Framework' approach ensures strategic depth and prevents content pollution from irrelevant examples when analyzing new product categories. - Updates orchestrator prompts in . - Adds documentation in explaining how to modify the new strategy logic. - Includes minor fixes to the Node.js and dependency updates in .
This commit is contained in:
@@ -433,33 +433,78 @@ def find_relevant_links(base_url):
|
||||
logging.warning(f"Could not scrape base URL {base_url} for links: {e}")
|
||||
return []
|
||||
|
||||
def clean_llm_response(text):
|
||||
"""Sanitizes the LLM response to remove excessive whitespace and common artifacts."""
|
||||
if not text: return ""
|
||||
# 1. Replace multiple spaces/newlines with single ones (within a reasonable limit)
|
||||
# But preserve single newlines for markdown structure
|
||||
text = re.sub(r'[ \t]{5,}', ' ', text) # Replace 5+ spaces/tabs with 1 space
|
||||
# 2. Remove non-printable characters (except common ones)
|
||||
text = "".join(ch for ch in text if ch.isprintable() or ch in "\n\r\t")
|
||||
# 3. Fix common table artifacts like empty pipes at the end of lines
|
||||
text = re.sub(r'\|\s*$', '|', text, flags=re.MULTILINE)
|
||||
return text.strip()
|
||||
|
||||
def parse_markdown_table(markdown_text):
|
||||
lines = [line.strip() for line in markdown_text.strip().split('\n')]
|
||||
# Sanitize input first
|
||||
markdown_text = clean_llm_response(markdown_text)
|
||||
|
||||
lines = [line.strip() for line in markdown_text.strip().split('\n') if line.strip()]
|
||||
table_lines = []
|
||||
in_table_section = False
|
||||
|
||||
# 1. Identify all lines that look like table rows (start and end with |)
|
||||
for line in lines:
|
||||
if re.match(r'^\|.*\|$', line) and '---' not in line:
|
||||
in_table_section = True
|
||||
if line.startswith('|') and line.endswith('|'):
|
||||
table_lines.append(line)
|
||||
elif in_table_section and '---' in line and re.match(r'^\|(:?-+:?)\|', line.replace(' ', '')):
|
||||
table_lines.append(line)
|
||||
elif in_table_section:
|
||||
break
|
||||
if not table_lines: return {"headers": [], "rows": []}
|
||||
|
||||
if not table_lines:
|
||||
return {"headers": [], "rows": []}
|
||||
|
||||
# 2. Find the separator line (|---|---|...)
|
||||
separator_index = -1
|
||||
for i, line in enumerate(table_lines):
|
||||
if '---' in line and re.match(r'^\|(:?-+:?)\|', line.replace(' ', '')):
|
||||
# A separator line usually has at least one dash between pipes and no alphanumeric chars
|
||||
if '---' in line and not re.search(r'[a-zA-Z0-9]', line.replace('|', '').replace('-', '').replace(' ', '').replace(':', '')):
|
||||
separator_index = i
|
||||
break
|
||||
if separator_index == -1 or separator_index == 0: return {"headers": [], "rows": []}
|
||||
header_line = table_lines[0]
|
||||
|
||||
if separator_index == -1:
|
||||
# If no separator found, we might just have a list of rows where the first is the header
|
||||
# but usually LLMs provide the separator. Let's assume the first is header.
|
||||
header_line = table_lines[0]
|
||||
data_start = 1
|
||||
else:
|
||||
# Separator found. Header is the line before it.
|
||||
if separator_index == 0: return {"headers": [], "rows": []}
|
||||
header_line = table_lines[separator_index - 1]
|
||||
data_start = separator_index + 1
|
||||
|
||||
# 3. Extract and clean headers
|
||||
headers = [re.sub(r'\*+([^*]+)\*+', r'\1', h.strip()).strip() for h in header_line.split('|') if h.strip()]
|
||||
if not headers: return {"headers": [], "rows": []}
|
||||
|
||||
# 4. Extract and clean rows
|
||||
rows = []
|
||||
for line in table_lines[separator_index + 1:]:
|
||||
for line in table_lines[data_start:]:
|
||||
# Split by | and remove leading/trailing empty elements from the split result
|
||||
raw_cells = line.split('|')
|
||||
cells = [re.sub(r'\*+([^*]+)\*+', r'\1', c.strip()).strip() for c in raw_cells if c.strip()]
|
||||
if len(cells) == len(headers):
|
||||
# Handle the leading/trailing empty strings caused by the outer pipes
|
||||
cells = [re.sub(r'\*+([^*]+)\*+', r'\1', c.strip()).strip() for c in raw_cells]
|
||||
|
||||
# If the line starts and ends with |, the first and last elements are empty strings
|
||||
if line.startswith('|'): cells = cells[1:]
|
||||
if line.endswith('|'): cells = cells[:-1]
|
||||
|
||||
# Pad or truncate row to match header length
|
||||
if len(cells) < len(headers):
|
||||
cells.extend([''] * (len(headers) - len(cells)))
|
||||
elif len(cells) > len(headers):
|
||||
cells = cells[:len(headers)]
|
||||
|
||||
# Only add row if it's not another separator or empty
|
||||
if any(cells):
|
||||
rows.append(cells)
|
||||
|
||||
return {"headers": headers, "rows": rows}
|
||||
|
||||
def format_context_for_prompt(analysis_data, language):
|
||||
@@ -521,7 +566,8 @@ def start_generation(url, language, regions, focus):
|
||||
save_detailed_log("step1_offer", "response", response_text)
|
||||
|
||||
step1_title = current_prompts['STEP_TITLES']['offer']
|
||||
title_match = re.search(rf'## {re.escape(step1_title)}\s*', response_text, re.IGNORECASE)
|
||||
# Flexible header matching
|
||||
title_match = re.search(rf'^##\s*(?:Schritt|Step)\s*1.*$', response_text, re.IGNORECASE | re.MULTILINE)
|
||||
content = response_text[title_match.end():].strip() if title_match else response_text
|
||||
table_data = parse_markdown_table(content)
|
||||
|
||||
@@ -566,7 +612,8 @@ def next_step(language, context_file, generation_step, channels, focus_industry=
|
||||
|
||||
step_key = ['offer', 'targetGroups', 'personas', 'painPoints', 'gains', 'messages', 'customerJourney'][generation_step - 1]
|
||||
expected_title = current_prompts['STEP_TITLES'][step_key]
|
||||
title_match = re.search(rf'## {re.escape(expected_title)}\s*', response_text, re.IGNORECASE)
|
||||
# Flexible header matching
|
||||
title_match = re.search(rf'^##\s*(?:Schritt|Step)\s*{generation_step}.*$', response_text, re.IGNORECASE | re.MULTILINE)
|
||||
content = response_text[title_match.end():].strip() if title_match else response_text
|
||||
table_data = parse_markdown_table(content)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user