Files
Brancheneinstufung2/ARCHIVE_legacy_scripts/debug_transcription_raw.py

71 lines
2.3 KiB
Python

import sqlite3
import json
import os
DB_PATH = "transcripts.db"
def inspect_latest_meeting():
if not os.path.exists(DB_PATH):
print(f"Error: Database file '{DB_PATH}' not found.")
return
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
# Get latest meeting
cursor.execute("SELECT id, title, created_at FROM meetings ORDER BY created_at DESC LIMIT 1")
meeting = cursor.fetchone()
if not meeting:
print("No meetings found in DB.")
conn.close()
return
meeting_id, title, created_at = meeting
print(f"--- Inspecting Latest Meeting: ID {meeting_id} ('{title}') created at {created_at} ---")
# Get chunks for this meeting
cursor.execute("SELECT id, chunk_index, raw_text, json_content FROM transcript_chunks WHERE meeting_id = ? ORDER BY chunk_index", (meeting_id,))
chunks = cursor.fetchall()
if not chunks:
print("No chunks found for this meeting.")
for chunk in chunks:
chunk_id, idx, raw_text, json_content = chunk
print(f"\n[Chunk {idx} (ID: {chunk_id})]")
print(f"Stored JSON Content (Length): {len(json.loads(json_content)) if json_content else 'None/Empty'}")
print("-" * 20 + " RAW TEXT START " + "-" * 20)
print(raw_text[:500]) # Print first 500 chars
print("..." if len(raw_text) > 500 else "")
print("-" * 20 + " RAW TEXT END " + "-" * 20)
# Try to parse manually to see error
try:
# Simulate cleaning logic from orchestrator
cleaned = raw_text.strip()
if cleaned.startswith("```json"):
cleaned = cleaned[7:]
elif cleaned.startswith("```"):
cleaned = cleaned[3:]
if cleaned.endswith("```"):
cleaned = cleaned[:-3]
cleaned = cleaned.strip()
parsed = json.loads(cleaned)
print("✅ Manual Parsing Successful!")
except json.JSONDecodeError as e:
print(f"❌ Manual Parsing Failed: {e}")
# Show context around error
if hasattr(e, 'pos'):
start = max(0, e.pos - 20)
end = min(len(cleaned), e.pos + 20)
print(f" Context at error: ...{cleaned[start:end]}...")
conn.close()
if __name__ == "__main__":
inspect_latest_meeting()