[30388f42] Infrastructure Hardening: Repaired CE/Connector DB schema, fixed frontend styling build, implemented robust echo shield in worker v2.1.1, and integrated Lead Engine into gateway.
This commit is contained in:
70
ARCHIVE_legacy_scripts/debug_transcription_raw.py
Normal file
70
ARCHIVE_legacy_scripts/debug_transcription_raw.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import sqlite3
|
||||
import json
|
||||
import os
|
||||
|
||||
DB_PATH = "transcripts.db"
|
||||
|
||||
def inspect_latest_meeting():
|
||||
if not os.path.exists(DB_PATH):
|
||||
print(f"Error: Database file '{DB_PATH}' not found.")
|
||||
return
|
||||
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Get latest meeting
|
||||
cursor.execute("SELECT id, title, created_at FROM meetings ORDER BY created_at DESC LIMIT 1")
|
||||
meeting = cursor.fetchone()
|
||||
|
||||
if not meeting:
|
||||
print("No meetings found in DB.")
|
||||
conn.close()
|
||||
return
|
||||
|
||||
meeting_id, title, created_at = meeting
|
||||
print(f"--- Inspecting Latest Meeting: ID {meeting_id} ('{title}') created at {created_at} ---")
|
||||
|
||||
# Get chunks for this meeting
|
||||
cursor.execute("SELECT id, chunk_index, raw_text, json_content FROM transcript_chunks WHERE meeting_id = ? ORDER BY chunk_index", (meeting_id,))
|
||||
chunks = cursor.fetchall()
|
||||
|
||||
if not chunks:
|
||||
print("No chunks found for this meeting.")
|
||||
|
||||
for chunk in chunks:
|
||||
chunk_id, idx, raw_text, json_content = chunk
|
||||
print(f"\n[Chunk {idx} (ID: {chunk_id})]")
|
||||
|
||||
print(f"Stored JSON Content (Length): {len(json.loads(json_content)) if json_content else 'None/Empty'}")
|
||||
|
||||
print("-" * 20 + " RAW TEXT START " + "-" * 20)
|
||||
print(raw_text[:500]) # Print first 500 chars
|
||||
print("..." if len(raw_text) > 500 else "")
|
||||
print("-" * 20 + " RAW TEXT END " + "-" * 20)
|
||||
|
||||
# Try to parse manually to see error
|
||||
try:
|
||||
# Simulate cleaning logic from orchestrator
|
||||
cleaned = raw_text.strip()
|
||||
if cleaned.startswith("```json"):
|
||||
cleaned = cleaned[7:]
|
||||
elif cleaned.startswith("```"):
|
||||
cleaned = cleaned[3:]
|
||||
if cleaned.endswith("```"):
|
||||
cleaned = cleaned[:-3]
|
||||
cleaned = cleaned.strip()
|
||||
|
||||
parsed = json.loads(cleaned)
|
||||
print("✅ Manual Parsing Successful!")
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"❌ Manual Parsing Failed: {e}")
|
||||
# Show context around error
|
||||
if hasattr(e, 'pos'):
|
||||
start = max(0, e.pos - 20)
|
||||
end = min(len(cleaned), e.pos + 20)
|
||||
print(f" Context at error: ...{cleaned[start:end]}...")
|
||||
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
inspect_latest_meeting()
|
||||
Reference in New Issue
Block a user