[30388f42] Infrastructure Hardening: Repaired CE/Connector DB schema, fixed frontend styling build, implemented robust echo shield in worker v2.1.1, and integrated Lead Engine into gateway.
This commit is contained in:
61
ARCHIVE_legacy_scripts/test_parser.py
Normal file
61
ARCHIVE_legacy_scripts/test_parser.py
Normal file
@@ -0,0 +1,61 @@
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
def parse_markdown_table(markdown_text):
|
||||
lines = [line.strip() for line in markdown_text.strip().split('\n') if line.strip()]
|
||||
table_lines = []
|
||||
|
||||
for line in lines:
|
||||
if line.startswith('|') and line.endswith('|'):
|
||||
table_lines.append(line)
|
||||
|
||||
if not table_lines:
|
||||
return {"headers": [], "rows": []}
|
||||
|
||||
separator_index = -1
|
||||
for i, line in enumerate(table_lines):
|
||||
if '---' in line and not re.search(r'[a-zA-Z0-9]', line.replace('|', '').replace('-', '').replace(' ', '').replace(':', '')):
|
||||
separator_index = i
|
||||
break
|
||||
|
||||
if separator_index == -1:
|
||||
header_line = table_lines[0]
|
||||
data_start = 1
|
||||
else:
|
||||
if separator_index == 0: return {"headers": [], "rows": []}
|
||||
header_line = table_lines[separator_index - 1]
|
||||
data_start = separator_index + 1
|
||||
|
||||
headers = [re.sub(r'\*+([^\*]+)\*+', r'\1', h.strip()).strip() for h in header_line.split('|') if h.strip()]
|
||||
if not headers: return {"headers": [], "rows": []}
|
||||
|
||||
rows = []
|
||||
for line in table_lines[data_start:]:
|
||||
raw_cells = line.split('|')
|
||||
cells = [re.sub(r'\*+([^\*]+)\*+', r'\1', c.strip()).strip() for c in raw_cells]
|
||||
|
||||
if line.startswith('|'): cells = cells[1:]
|
||||
if line.endswith('|'): cells = cells[:-1]
|
||||
|
||||
if len(cells) < len(headers):
|
||||
cells.extend([''] * (len(headers) - len(cells)))
|
||||
elif len(cells) > len(headers):
|
||||
cells = cells[:len(headers)]
|
||||
|
||||
if any(cells):
|
||||
rows.append(cells)
|
||||
|
||||
return {"headers": headers, "rows": rows}
|
||||
|
||||
# Content from the log (simplified/cleaned of the huge gap for testing)
|
||||
content = """
|
||||
## Schritt 1: Angebot (WAS)
|
||||
|
||||
| Produkt/Lösung | Beschreibung (1-2 Sätze) | Kernfunktionen | Differenzierung | Primäre Quelle (URL) |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| **AgreeDo (Meeting Management Software)** | AgreeDo ist eine webbasierte Anwendung... | **Kernfunktionen:**... | **Differenzierung:**... | `https://agreedo.com/` |
|
||||
"""
|
||||
|
||||
result = parse_markdown_table(content)
|
||||
print(json.dumps(result, indent=2))
|
||||
Reference in New Issue
Block a user