diff --git a/debug_transcription_raw.py b/debug_transcription_raw.py
new file mode 100644
index 00000000..40fbe1e7
--- /dev/null
+++ b/debug_transcription_raw.py
@@ -0,0 +1,70 @@
+import sqlite3
+import json
+import os
+
+DB_PATH = "transcripts.db"
+
+def inspect_latest_meeting():
+ if not os.path.exists(DB_PATH):
+ print(f"Error: Database file '{DB_PATH}' not found.")
+ return
+
+ conn = sqlite3.connect(DB_PATH)
+ cursor = conn.cursor()
+
+ # Get latest meeting
+ cursor.execute("SELECT id, title, created_at FROM meetings ORDER BY created_at DESC LIMIT 1")
+ meeting = cursor.fetchone()
+
+ if not meeting:
+ print("No meetings found in DB.")
+ conn.close()
+ return
+
+ meeting_id, title, created_at = meeting
+ print(f"--- Inspecting Latest Meeting: ID {meeting_id} ('{title}') created at {created_at} ---")
+
+ # Get chunks for this meeting
+ cursor.execute("SELECT id, chunk_index, raw_text, json_content FROM transcript_chunks WHERE meeting_id = ? ORDER BY chunk_index", (meeting_id,))
+ chunks = cursor.fetchall()
+
+ if not chunks:
+ print("No chunks found for this meeting.")
+
+ for chunk in chunks:
+ chunk_id, idx, raw_text, json_content = chunk
+ print(f"\n[Chunk {idx} (ID: {chunk_id})]")
+
+ print(f"Stored JSON Content (Length): {len(json.loads(json_content)) if json_content else 'None/Empty'}")
+
+ print("-" * 20 + " RAW TEXT START " + "-" * 20)
+ print(raw_text[:500]) # Print first 500 chars
+ print("..." if len(raw_text) > 500 else "")
+ print("-" * 20 + " RAW TEXT END " + "-" * 20)
+
+ # Try to parse manually to see error
+ try:
+ # Simulate cleaning logic from orchestrator
+ cleaned = raw_text.strip()
+ if cleaned.startswith("```json"):
+ cleaned = cleaned[7:]
+ elif cleaned.startswith("```"):
+ cleaned = cleaned[3:]
+ if cleaned.endswith("```"):
+ cleaned = cleaned[:-3]
+ cleaned = cleaned.strip()
+
+ parsed = json.loads(cleaned)
+ print("✅ Manual Parsing Successful!")
+ except json.JSONDecodeError as e:
+ print(f"❌ Manual Parsing Failed: {e}")
+ # Show context around error
+ if hasattr(e, 'pos'):
+ start = max(0, e.pos - 20)
+ end = min(len(cleaned), e.pos + 20)
+ print(f" Context at error: ...{cleaned[start:end]}...")
+
+ conn.close()
+
+if __name__ == "__main__":
+ inspect_latest_meeting()
diff --git a/docker-compose.yml b/docker-compose.yml
index a34e7061..d9136bee 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -98,13 +98,13 @@ services:
restart: unless-stopped
volumes:
- ./transcription-tool/backend:/app/backend
+ - ./transcription-tool/frontend/dist:/app/frontend/dist # Mount Frontend Build for Live Updates
- ./transcripts.db:/app/transcripts.db
- ./uploads_audio:/app/uploads_audio
- ./gemini_api_key.txt:/app/gemini_api_key.txt
environment:
PYTHONUNBUFFERED: "1"
DATABASE_URL: "sqlite:////app/transcripts.db"
- GEMINI_API_KEY: "AIzaSyCFRmr1rOrkFKiEuh9GOCJNB2zfJsYmR68" # Placeholder, actual key is in file
ports:
- "8001:8001"
diff --git a/transcription-tool/backend/app.py b/transcription-tool/backend/app.py
index 162231fa..6890fc25 100644
--- a/transcription-tool/backend/app.py
+++ b/transcription-tool/backend/app.py
@@ -99,6 +99,31 @@ async def upload_audio(
return meeting
+@app.post("/api/meetings/{meeting_id}/retry")
+def retry_meeting(
+ meeting_id: int,
+ background_tasks: BackgroundTasks,
+ db: Session = Depends(get_db)
+):
+ meeting = db.query(Meeting).filter(Meeting.id == meeting_id).first()
+ if not meeting:
+ raise HTTPException(404, detail="Meeting not found")
+
+ # Check if chunks directory exists
+ chunk_dir = os.path.join(settings.UPLOAD_DIR, "chunks", str(meeting_id))
+ if not os.path.exists(chunk_dir) or not os.listdir(chunk_dir):
+ raise HTTPException(400, detail="Original audio chunks not found. Please re-upload.")
+
+ # Reset status
+ meeting.status = "QUEUED"
+ db.commit()
+
+ # Trigger Retry Task
+ from .services.orchestrator import retry_meeting_task
+ background_tasks.add_task(retry_meeting_task, meeting.id, SessionLocal)
+
+ return {"status": "started", "message": "Retrying transcription..."}
+
from pydantic import BaseModel
class InsightRequest(BaseModel):
@@ -201,9 +226,16 @@ def delete_meeting(meeting_id: int, db: Session = Depends(get_db)):
# Serve Frontend
# This must be the last route definition to avoid catching API routes
-static_path = "/frontend_static"
+
+# PRIORITY 1: Mounted Volume (Development / Live Update)
+static_path = "/app/frontend/dist"
+
+# PRIORITY 2: Built-in Image Path (Production)
+if not os.path.exists(static_path):
+ static_path = "/frontend_static"
+
+# PRIORITY 3: Local Development (running python directly)
if not os.path.exists(static_path):
- # Fallback for local development if not in Docker
static_path = os.path.join(os.path.dirname(__file__), "../frontend/dist")
if os.path.exists(static_path):
diff --git a/transcription-tool/backend/services/orchestrator.py b/transcription-tool/backend/services/orchestrator.py
index 4806febe..60ab336c 100644
--- a/transcription-tool/backend/services/orchestrator.py
+++ b/transcription-tool/backend/services/orchestrator.py
@@ -19,6 +19,16 @@ def parse_time_to_seconds(time_str):
return 0
return 0
+def clean_json_string(text):
+ text = text.strip()
+ if text.startswith("```json"):
+ text = text[7:]
+ elif text.startswith("```"):
+ text = text[3:]
+ if text.endswith("```"):
+ text = text[:-3]
+ return text.strip()
+
def process_meeting_task(meeting_id: int, db_session_factory):
db = db_session_factory()
meeting = db.query(Meeting).filter(Meeting.id == meeting_id).first()
@@ -50,7 +60,13 @@ def process_meeting_task(meeting_id: int, db_session_factory):
# Parse JSON and Adjust Timestamps
json_data = []
try:
- raw_json = json.loads(result["raw_text"])
+ cleaned_text = clean_json_string(result["raw_text"])
+ raw_json = json.loads(cleaned_text)
+
+ # Check for wrapped structure (e.g. {"items": [...]}) if schema enforced it
+ if isinstance(raw_json, dict) and "items" in raw_json:
+ raw_json = raw_json["items"] # Extract inner list
+
if isinstance(raw_json, list):
for entry in raw_json:
seconds = parse_time_to_seconds(entry.get("time", "00:00"))
@@ -63,7 +79,7 @@ def process_meeting_task(meeting_id: int, db_session_factory):
entry["display_time"] = f"{h:02}:{m:02}:{s:02}"
json_data.append(entry)
except Exception as e:
- logger.error(f"JSON Parsing failed for chunk {i}: {e}")
+ logger.error(f"JSON Parsing failed for chunk {i}: {e}. Raw text start: {result['raw_text'][:100]}")
# Save chunk result
db_chunk = TranscriptChunk(
@@ -89,3 +105,94 @@ def process_meeting_task(meeting_id: int, db_session_factory):
db.commit()
finally:
db.close()
+
+def retry_meeting_task(meeting_id: int, db_session_factory):
+ """
+ Retries transcription using existing chunks on disk.
+ Avoids re-splitting the original file.
+ """
+ db = db_session_factory()
+ meeting = db.query(Meeting).filter(Meeting.id == meeting_id).first()
+ if not meeting:
+ return
+
+ try:
+ import os
+ transcriber = TranscriptionService()
+
+ # 0. Validate Chunk Directory
+ chunk_dir = os.path.join(settings.UPLOAD_DIR, "chunks", str(meeting_id))
+ if not os.path.exists(chunk_dir):
+ logger.error(f"Chunk directory not found for meeting {meeting_id}")
+ meeting.status = "ERROR"
+ db.commit()
+ return
+
+ chunks = sorted([os.path.join(chunk_dir, f) for f in os.listdir(chunk_dir) if f.endswith(".mp3")])
+ if not chunks:
+ logger.error(f"No chunks found for meeting {meeting_id}")
+ meeting.status = "ERROR"
+ db.commit()
+ return
+
+ # Phase 1: Clear Old Chunks
+ meeting.status = "RETRYING"
+ db.query(TranscriptChunk).filter(TranscriptChunk.meeting_id == meeting_id).delete()
+ db.commit()
+
+ # Phase 2: Transcribe
+ all_text = []
+ for i, chunk_path in enumerate(chunks):
+ offset = i * settings.CHUNK_DURATION_SEC
+ logger.info(f"Retrying chunk {i+1}/{len(chunks)} with offset {offset}s")
+
+ result = transcriber.transcribe_chunk(chunk_path, offset)
+
+ # Parse JSON and Adjust Timestamps (Same logic as process_meeting_task)
+ json_data = []
+ try:
+ # With response_schema, raw_text SHOULD be valid JSON directly
+ # But let's keep clean_json_string just in case specific models deviate
+ cleaned_text = clean_json_string(result["raw_text"])
+ raw_json = json.loads(cleaned_text)
+
+ # Check for wrapped structure (e.g. {"items": [...]}) if schema enforced it
+ if isinstance(raw_json, dict) and "items" in raw_json:
+ raw_json = raw_json["items"] # Extract inner list
+
+ if isinstance(raw_json, list):
+ for entry in raw_json:
+ seconds = parse_time_to_seconds(entry.get("time", "00:00"))
+ absolute_seconds = seconds + offset
+ entry["absolute_seconds"] = absolute_seconds
+
+ h = int(absolute_seconds // 3600)
+ m = int((absolute_seconds % 3600) // 60)
+ s = int(absolute_seconds % 60)
+ entry["display_time"] = f"{h:02}:{m:02}:{s:02}"
+ json_data.append(entry)
+ except Exception as e:
+ logger.error(f"JSON Parsing failed for chunk {i}: {e}. Raw: {result['raw_text'][:100]}")
+
+ # Save chunk result
+ db_chunk = TranscriptChunk(
+ meeting_id=meeting.id,
+ chunk_index=i,
+ raw_text=result["raw_text"],
+ json_content=json_data
+ )
+ db.add(db_chunk)
+ all_text.append(result["raw_text"])
+ db.commit()
+
+ # Phase 3: Finalize
+ meeting.status = "COMPLETED"
+ db.commit()
+ logger.info(f"Meeting {meeting.id} retry completed.")
+
+ except Exception as e:
+ logger.error(f"Error retrying meeting {meeting_id}: {e}", exc_info=True)
+ meeting.status = "ERROR"
+ db.commit()
+ finally:
+ db.close()
diff --git a/transcription-tool/backend/services/transcription_service.py b/transcription-tool/backend/services/transcription_service.py
index f8c6e375..1775c4b7 100644
--- a/transcription-tool/backend/services/transcription_service.py
+++ b/transcription-tool/backend/services/transcription_service.py
@@ -30,20 +30,17 @@ class TranscriptionService:
if media_file.state == "FAILED":
raise Exception("File processing failed at Gemini.")
- # 3. Transcribe with Diarization and Timestamps
+ # 3. Transcribe with Diarization and Timestamps (Plain Text Mode for Stability)
prompt = """
Transkribiere dieses Audio wortgetreu.
Identifiziere die Sprecher (Speaker A, Speaker B, etc.).
- Gib das Ergebnis als JSON-Liste zurück.
- Format:
- [
- {
- "time": "MM:SS",
- "speaker": "Speaker A",
- "text": "..."
- }
- ]
+ Gib das Ergebnis EXAKT in diesem Format zurück (pro Zeile ein Sprecherwechsel):
+ [MM:SS] Speaker Name: Gesprochener Text...
+
+ Beispiel:
+ [00:00] Speaker A: Hallo zusammen.
+ [00:05] Speaker B: Guten Morgen.
"""
logger.info(f"Generating transcription for {file_path}...")
@@ -52,14 +49,46 @@ class TranscriptionService:
contents=[media_file, prompt],
config=types.GenerateContentConfig(
temperature=0.1,
- response_mime_type="application/json"
+ max_output_tokens=8192
)
)
# Cleanup: Delete file from Gemini storage
self.client.files.delete(name=media_file.name)
-
+
+ # Parse Plain Text to JSON
+ structured_data = self.parse_transcript(response.text)
+ import json
return {
- "raw_text": response.text, # This is now a JSON string
+ "raw_text": json.dumps(structured_data), # Return valid JSON string
"offset": offset_seconds
}
+
+ def parse_transcript(self, text: str) -> list:
+ """
+ Parses lines like '[00:12] Speaker A: Hello world' into structured JSON.
+ """
+ import re
+ results = []
+ # Regex to match: [MM:SS] Speaker: Text
+ # Flexible for MM:SS or H:MM:SS
+ pattern = re.compile(r"^\[(\d{1,2}:\d{2}(?::\d{2})?)\]\s*([^:]+):\s*(.+)$")
+
+ for line in text.strip().split('\n'):
+ line = line.strip()
+ if not line: continue
+
+ match = pattern.match(line)
+ if match:
+ time_str, speaker, content = match.groups()
+ results.append({
+ "time": time_str,
+ "speaker": speaker.strip(),
+ "text": content.strip()
+ })
+ else:
+ # Fallback: Append to previous if it looks like continuation
+ if results and not line.startswith("["):
+ results[-1]["text"] += " " + line
+
+ return results
diff --git a/transcription-tool/frontend/src/App.tsx b/transcription-tool/frontend/src/App.tsx
index 863de20f..dd2d980a 100644
--- a/transcription-tool/frontend/src/App.tsx
+++ b/transcription-tool/frontend/src/App.tsx
@@ -394,6 +394,20 @@ export default function App() {
>
+
diff --git a/uploads_audio/76900d3a-9d65-460e-ad9b-6553fbc1a6bc.m4a b/uploads_audio/7cc29087-842f-4b47-b2df-ce34f8395ad4.m4a
similarity index 100%
rename from uploads_audio/76900d3a-9d65-460e-ad9b-6553fbc1a6bc.m4a
rename to uploads_audio/7cc29087-842f-4b47-b2df-ce34f8395ad4.m4a
diff --git a/uploads_audio/chunks/6/chunk_000.mp3 b/uploads_audio/chunks/6/chunk_000.mp3
index e69de29b..73a881e2 100644
Binary files a/uploads_audio/chunks/6/chunk_000.mp3 and b/uploads_audio/chunks/6/chunk_000.mp3 differ
diff --git a/uploads_audio/chunks/6/chunk_001.mp3 b/uploads_audio/chunks/6/chunk_001.mp3
new file mode 100644
index 00000000..05e1b85e
Binary files /dev/null and b/uploads_audio/chunks/6/chunk_001.mp3 differ