Files

62 lines
2.1 KiB
Python

import re
import json
def parse_markdown_table(markdown_text):
lines = [line.strip() for line in markdown_text.strip().split('\n') if line.strip()]
table_lines = []
for line in lines:
if line.startswith('|') and line.endswith('|'):
table_lines.append(line)
if not table_lines:
return {"headers": [], "rows": []}
separator_index = -1
for i, line in enumerate(table_lines):
if '---' in line and not re.search(r'[a-zA-Z0-9]', line.replace('|', '').replace('-', '').replace(' ', '').replace(':', '')):
separator_index = i
break
if separator_index == -1:
header_line = table_lines[0]
data_start = 1
else:
if separator_index == 0: return {"headers": [], "rows": []}
header_line = table_lines[separator_index - 1]
data_start = separator_index + 1
headers = [re.sub(r'\*+([^\*]+)\*+', r'\1', h.strip()).strip() for h in header_line.split('|') if h.strip()]
if not headers: return {"headers": [], "rows": []}
rows = []
for line in table_lines[data_start:]:
raw_cells = line.split('|')
cells = [re.sub(r'\*+([^\*]+)\*+', r'\1', c.strip()).strip() for c in raw_cells]
if line.startswith('|'): cells = cells[1:]
if line.endswith('|'): cells = cells[:-1]
if len(cells) < len(headers):
cells.extend([''] * (len(headers) - len(cells)))
elif len(cells) > len(headers):
cells = cells[:len(headers)]
if any(cells):
rows.append(cells)
return {"headers": headers, "rows": rows}
# Content from the log (simplified/cleaned of the huge gap for testing)
content = """
## Schritt 1: Angebot (WAS)
| Produkt/Lösung | Beschreibung (1-2 Sätze) | Kernfunktionen | Differenzierung | Primäre Quelle (URL) |
| --- | --- | --- | --- | --- |
| **AgreeDo (Meeting Management Software)** | AgreeDo ist eine webbasierte Anwendung... | **Kernfunktionen:**... | **Differenzierung:**... | `https://agreedo.com/` |
"""
result = parse_markdown_table(content)
print(json.dumps(result, indent=2))