feat(transcription): v0.4.0 with structured json, inline editing and deletion

- Backend: Switched prompt to JSON output for structured data - Backend: Added PUT /chunks/{id} endpoint for persistence - Backend: Fixed app.py imports and initialization logic - Frontend: Complete rewrite for Unified View (flattened chunks) - Frontend: Added Inline Editing (Text/Speaker) and Row Deletion - Docs: Updated TRANSCRIPTION_TOOL.md with v0.4 features
2026-01-24 20:43:33 +00:00
parent 0858df6f25
commit da00d461e1
5 changed files with 389 additions and 99 deletions
--- a/transcription-tool/backend/app.py
+++ b/transcription-tool/backend/app.py
@@ -1,6 +1,8 @@
-from fastapi import FastAPI, Depends, HTTPException, UploadFile, File, BackgroundTasks
+from fastapi import FastAPI, Depends, HTTPException, UploadFile, File, BackgroundTasks, Body
+from fastapi.staticfiles import StaticFiles
 from fastapi.middleware.cors import CORSMiddleware
-from sqlalchemy.orm import Session
+from sqlalchemy.orm import Session, joinedload
+from typing import List, Dict, Any
 import os
 import shutil
 import uuid
@@ -10,6 +12,7 @@ from .config import settings
 from .database import init_db, get_db, Meeting, TranscriptChunk, AnalysisResult, SessionLocal
 from .services.orchestrator import process_meeting_task

+# Initialize FastAPI App
 app = FastAPI(
    title=settings.APP_NAME,
    version=settings.VERSION,
@@ -36,6 +39,33 @@ def health():
 def list_meetings(db: Session = Depends(get_db)):
    return db.query(Meeting).order_by(Meeting.created_at.desc()).all()

+@app.get("/api/meetings/{meeting_id}")
+def get_meeting(meeting_id: int, db: Session = Depends(get_db)):
+    meeting = db.query(Meeting).options(
+        joinedload(Meeting.chunks)
+    ).filter(Meeting.id == meeting_id).first()
+    
+    if not meeting:
+        raise HTTPException(404, detail="Meeting not found")
+    
+    # Sort chunks by index
+    meeting.chunks.sort(key=lambda x: x.chunk_index)
+    
+    return meeting
+
+@app.put("/api/chunks/{chunk_id}")
+def update_chunk(chunk_id: int, payload: Dict[str, Any] = Body(...), db: Session = Depends(get_db)):
+    chunk = db.query(TranscriptChunk).filter(TranscriptChunk.id == chunk_id).first()
+    if not chunk:
+        raise HTTPException(404, detail="Chunk not found")
+    
+    # Update JSON content (e.g. after editing/deleting lines)
+    if "json_content" in payload:
+        chunk.json_content = payload["json_content"]
+        db.commit()
+    
+    return {"status": "updated"}
+
@app.post("/api/upload")
 async def upload_audio(
    background_tasks: BackgroundTasks,
@@ -67,6 +97,39 @@ async def upload_audio(
    
    return meeting

+@app.delete("/api/meetings/{meeting_id}")
+def delete_meeting(meeting_id: int, db: Session = Depends(get_db)):
+    meeting = db.query(Meeting).filter(Meeting.id == meeting_id).first()
+    if not meeting:
+        raise HTTPException(404, detail="Meeting not found")
+    
+    # 1. Delete Files
+    try:
+        if os.path.exists(meeting.file_path):
+            os.remove(meeting.file_path)
+        
+        # Delete chunks dir
+        chunk_dir = os.path.join(settings.UPLOAD_DIR, "chunks", str(meeting_id))
+        if os.path.exists(chunk_dir):
+            shutil.rmtree(chunk_dir)
+    except Exception as e:
+        print(f"Error deleting files: {e}")
+
+    # 2. Delete DB Entry (Cascade deletes chunks/analyses)
+    db.delete(meeting)
+    db.commit()
+    return {"status": "deleted"}
+
+# Serve Frontend
+# This must be the last route definition to avoid catching API routes
+static_path = "/frontend_static"
+if not os.path.exists(static_path):
+    # Fallback for local development if not in Docker
+    static_path = os.path.join(os.path.dirname(__file__), "../frontend/dist")
+
+if os.path.exists(static_path):
+    app.mount("/", StaticFiles(directory=static_path, html=True), name="static")
+
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run("backend.app:app", host="0.0.0.0", port=8001, reload=True)
--- a/transcription-tool/backend/services/orchestrator.py
+++ b/transcription-tool/backend/services/orchestrator.py
@@ -1,4 +1,5 @@
 import logging
+import json
 from sqlalchemy.orm import Session
 from .ffmpeg_service import FFmpegService
 from .transcription_service import TranscriptionService
@@ -7,6 +8,17 @@ from ..config import settings

 logger = logging.getLogger(__name__)

+def parse_time_to_seconds(time_str):
+    try:
+        parts = time_str.split(':')
+        if len(parts) == 2: # MM:SS
+            return int(parts[0]) * 60 + int(parts[1])
+        elif len(parts) == 3: # HH:MM:SS
+            return int(parts[0]) * 3600 + int(parts[1]) * 60 + int(parts[2])
+    except:
+        return 0
+    return 0
+
 def process_meeting_task(meeting_id: int, db_session_factory):
    db = db_session_factory()
    meeting = db.query(Meeting).filter(Meeting.id == meeting_id).first()
@@ -35,11 +47,30 @@ def process_meeting_task(meeting_id: int, db_session_factory):
            
            result = transcriber.transcribe_chunk(chunk_path, offset)
            
+            # Parse JSON and Adjust Timestamps
+            json_data = []
+            try:
+                raw_json = json.loads(result["raw_text"])
+                if isinstance(raw_json, list):
+                    for entry in raw_json:
+                        seconds = parse_time_to_seconds(entry.get("time", "00:00"))
+                        absolute_seconds = seconds + offset
+                        entry["absolute_seconds"] = absolute_seconds
+                        
+                        h = int(absolute_seconds // 3600)
+                        m = int((absolute_seconds % 3600) // 60)
+                        s = int(absolute_seconds % 60)
+                        entry["display_time"] = f"{h:02}:{m:02}:{s:02}"
+                        json_data.append(entry)
+            except Exception as e:
+                logger.error(f"JSON Parsing failed for chunk {i}: {e}")
+
            # Save chunk result
            db_chunk = TranscriptChunk(
                meeting_id=meeting.id,
                chunk_index=i,
-                raw_text=result["raw_text"]
+                raw_text=result["raw_text"],
+                json_content=json_data
            )
            db.add(db_chunk)
            all_text.append(result["raw_text"])
--- a/transcription-tool/backend/services/transcription_service.py
+++ b/transcription-tool/backend/services/transcription_service.py
@@ -19,8 +19,8 @@ class TranscriptionService:
        """
        logger.info(f"Uploading chunk {file_path} to Gemini...")
        
-        # 1. Upload file
-        media_file = self.client.files.upload(path=file_path)
+        # 1. Upload file (positional argument)
+        media_file = self.client.files.upload(file=file_path)
        
        # 2. Wait for processing (usually fast for audio)
        while media_file.state == "PROCESSING":
@@ -32,12 +32,18 @@ class TranscriptionService:

        # 3. Transcribe with Diarization and Timestamps
        prompt = """
-        Transkribiere dieses Audio wortgetreu. 
-        Identifiziere die Sprecher (Sprecher A, Sprecher B, etc.).
-        Gib das Ergebnis als strukturierte Liste mit Timestamps aus.
-        Wichtig: Das Audio ist ein Teil eines größeren Gesprächs. 
-        Antworte NUR mit dem Transkript im Format:
-        [MM:SS] Sprecher X: Text
+        Transkribiere dieses Audio wortgetreu.
+        Identifiziere die Sprecher (Speaker A, Speaker B, etc.).
+        
+        Gib das Ergebnis als JSON-Liste zurück.
+        Format:
+        [
+          {
+            "time": "MM:SS",
+            "speaker": "Speaker A",
+            "text": "..."
+          }
+        ]
        """
        
        logger.info(f"Generating transcription for {file_path}...")
@@ -45,7 +51,8 @@ class TranscriptionService:
            model="gemini-2.0-flash",
            contents=[media_file, prompt],
            config=types.GenerateContentConfig(
-                temperature=0.1, # Low temp for accuracy
+                temperature=0.1,
+                response_mime_type="application/json"
            )
        )

@@ -53,6 +60,6 @@ class TranscriptionService:
        self.client.files.delete(name=media_file.name)

        return {
-            "raw_text": response.text,
+            "raw_text": response.text, # This is now a JSON string
            "offset": offset_seconds
        }