Dateien nach "b2b-marketing-assistant/services" hochladen

2025-12-22 20:02:06 +00:00
parent e62d3a2dae
commit 5b56d66f28
1 changed files with 129 additions and 0 deletions
--- a/b2b-marketing-assistant/services/parser.ts
+++ b/b2b-marketing-assistant/services/parser.ts
@@ -0,0 +1,129 @@
+import type { AnalysisData, AnalysisStep } from '../types';
+
+function parseTable(tableLines: string[]): { headers: string[], rows:string[][] } {
+    if (tableLines.length < 2) return { headers: [], rows: [] };
+
+    // Removes leading/trailing markdown asterisks (for bold/italic) from a string.
+    const cleanMarkdownFormatting = (text: string) => {
+        return text.trim().replace(/^\*+|\*+$/g, '');
+    };
+
+    const headers = tableLines[0]
+        .split('|')
+        .map(h => cleanMarkdownFormatting(h))
+        .filter(h => h);
+        
+    const rows = tableLines.slice(2) // Skip header and separator line
+        .map(rowLine => {
+            const cells = rowLine
+                .split('|')
+                .map(c => cleanMarkdownFormatting(c))
+                .filter((c, i, arr) => i > 0 && i < arr.length - 1); // remove first and last empty string from split
+            return cells;
+        })
+        .filter(row => {
+            if (row.length === 0 || !row.some(cell => cell.trim() !== '')) return false;
+            // Filter out any row that looks like a markdown separator, e.g. |:---|:---|
+            // This checks if all cells are composed of dashes and optional colons
+            const isSeparator = row.every(cell => /^:?-+:?$/.test(cell.trim()));
+            return !isSeparator;
+        });
+
+    return { headers, rows };
+}
+
+function parseSection(sectionText: string): AnalysisStep {
+    const lines = sectionText.split('\n').filter(line => line.trim() !== '');
+    let summary: string[] = [];
+    let tableStartIndex = -1;
+    
+    const summaryMatch = sectionText.match(/\*\*(?:Kurzresümee|Summary).*?:\*\*\s*([\s\S]*?)(\n\n|\| ---|## (?:Schritt|Step))/i);
+    if(summaryMatch && summaryMatch[1]) {
+        summary = summaryMatch[1].split('\n').map(s => s.replace(/^[*\-]\s*/, '').trim()).filter(Boolean);
+    }
+    
+    for (let i = 0; i < lines.length; i++) {
+        if (lines[i].includes('|') && lines[i+1]?.includes('---')) {
+            tableStartIndex = i;
+            break;
+        }
+    }
+
+    if (tableStartIndex === -1) {
+        return { headers: [], rows: [], summary };
+    }
+
+    const tableLines = lines.slice(tableStartIndex);
+    const { headers, rows } = parseTable(tableLines);
+    
+    // In step 6, headers can be dynamic based on user channel selection. Let's fix them.
+    if (sectionText.match(/## (?:Schritt|Step) 6:/i)) {
+         const foundChannels = headers.slice(3); // Channels start after Zielbranche, Rolle, Kernbotschaft
+         if(foundChannels.length > 0) {
+            const defaultHeaders = headers.slice(0, 3);
+            const dynamicHeaders = foundChannels.join(', ');
+            headers.splice(3, foundChannels.length, dynamicHeaders);
+         }
+    }
+
+
+    return { headers, rows, summary };
+}
+
+/**
+ * Fills empty cells in a specified column by carrying down the last non-empty value from preceding rows.
+ * This is useful for tables where grouping values are not repeated for each row.
+ * @param step The analysis step object containing headers and rows.
+ * @param columnNames An array of possible column names (e.g., ['Rolle', 'Role']) to fill down.
+ * @returns The analysis step with the specified column filled.
+ */
+const fillDownColumn = (step: AnalysisStep, columnNames: string[]): AnalysisStep => {
+    const columnIndex = step.headers.findIndex(h => 
+        columnNames.some(name => h.toLowerCase().trim().startsWith(name.toLowerCase().trim()))
+    );
+    
+    if (columnIndex === -1 || !step.rows || step.rows.length === 0) {
+        return step;
+    }
+
+    let lastSeenValue = '';
+    const filledRows = step.rows.map(row => {
+        const newRow = [...row];
+        if (newRow.length > columnIndex) {
+            const currentCell = newRow[columnIndex] || ''; // handle undefined
+            if (currentCell.trim() !== '') {
+                lastSeenValue = currentCell;
+            } else {
+                newRow[columnIndex] = lastSeenValue;
+            }
+        }
+        return newRow;
+    });
+
+    return { ...step, rows: filledRows };
+};
+
+
+export const parseGeminiStepResponse = (markdown: string): Partial<AnalysisData> => {
+    const stepMatch = markdown.match(/## (?:Schritt|Step) (\d+):/i);
+    if (!stepMatch) return {};
+
+    const stepNumber = parseInt(stepMatch[1], 10);
+    const stepKeys: (keyof AnalysisData)[] = ['offer', 'targetGroups', 'personas', 'painPoints', 'gains', 'messages'];
+    const currentStepKey = stepKeys[stepNumber - 1];
+    
+    if (!currentStepKey) return {};
+
+    let stepData = parseSection(markdown);
+
+    // Post-processing to fill down empty cells in key columns for better filtering and readability.
+    if (['personas', 'painPoints', 'gains'].includes(currentStepKey)) {
+        stepData = fillDownColumn(stepData, ['Rolle', 'Role']);
+    }
+    if (currentStepKey === 'messages') {
+        stepData = fillDownColumn(stepData, ['Zielbranche/Segment', 'Target Industry/Segment']);
+        stepData = fillDownColumn(stepData, ['Rolle', 'Role']);
+    }
+
+    return { [currentStepKey]: stepData };
+};