feat(transcription): v0.4.0 with structured json, inline editing and deletion

- Backend: Switched prompt to JSON output for structured data
- Backend: Added PUT /chunks/{id} endpoint for persistence
- Backend: Fixed app.py imports and initialization logic
- Frontend: Complete rewrite for Unified View (flattened chunks)
- Frontend: Added Inline Editing (Text/Speaker) and Row Deletion
- Docs: Updated TRANSCRIPTION_TOOL.md with v0.4 features
This commit is contained in:
2026-01-24 20:43:33 +00:00
parent 0858df6f25
commit da00d461e1
5 changed files with 389 additions and 99 deletions

View File

@@ -1,6 +1,8 @@
from fastapi import FastAPI, Depends, HTTPException, UploadFile, File, BackgroundTasks
from fastapi import FastAPI, Depends, HTTPException, UploadFile, File, BackgroundTasks, Body
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
from sqlalchemy.orm import Session
from sqlalchemy.orm import Session, joinedload
from typing import List, Dict, Any
import os
import shutil
import uuid
@@ -10,6 +12,7 @@ from .config import settings
from .database import init_db, get_db, Meeting, TranscriptChunk, AnalysisResult, SessionLocal
from .services.orchestrator import process_meeting_task
# Initialize FastAPI App
app = FastAPI(
title=settings.APP_NAME,
version=settings.VERSION,
@@ -36,6 +39,33 @@ def health():
def list_meetings(db: Session = Depends(get_db)):
return db.query(Meeting).order_by(Meeting.created_at.desc()).all()
@app.get("/api/meetings/{meeting_id}")
def get_meeting(meeting_id: int, db: Session = Depends(get_db)):
meeting = db.query(Meeting).options(
joinedload(Meeting.chunks)
).filter(Meeting.id == meeting_id).first()
if not meeting:
raise HTTPException(404, detail="Meeting not found")
# Sort chunks by index
meeting.chunks.sort(key=lambda x: x.chunk_index)
return meeting
@app.put("/api/chunks/{chunk_id}")
def update_chunk(chunk_id: int, payload: Dict[str, Any] = Body(...), db: Session = Depends(get_db)):
chunk = db.query(TranscriptChunk).filter(TranscriptChunk.id == chunk_id).first()
if not chunk:
raise HTTPException(404, detail="Chunk not found")
# Update JSON content (e.g. after editing/deleting lines)
if "json_content" in payload:
chunk.json_content = payload["json_content"]
db.commit()
return {"status": "updated"}
@app.post("/api/upload")
async def upload_audio(
background_tasks: BackgroundTasks,
@@ -67,6 +97,39 @@ async def upload_audio(
return meeting
@app.delete("/api/meetings/{meeting_id}")
def delete_meeting(meeting_id: int, db: Session = Depends(get_db)):
meeting = db.query(Meeting).filter(Meeting.id == meeting_id).first()
if not meeting:
raise HTTPException(404, detail="Meeting not found")
# 1. Delete Files
try:
if os.path.exists(meeting.file_path):
os.remove(meeting.file_path)
# Delete chunks dir
chunk_dir = os.path.join(settings.UPLOAD_DIR, "chunks", str(meeting_id))
if os.path.exists(chunk_dir):
shutil.rmtree(chunk_dir)
except Exception as e:
print(f"Error deleting files: {e}")
# 2. Delete DB Entry (Cascade deletes chunks/analyses)
db.delete(meeting)
db.commit()
return {"status": "deleted"}
# Serve Frontend
# This must be the last route definition to avoid catching API routes
static_path = "/frontend_static"
if not os.path.exists(static_path):
# Fallback for local development if not in Docker
static_path = os.path.join(os.path.dirname(__file__), "../frontend/dist")
if os.path.exists(static_path):
app.mount("/", StaticFiles(directory=static_path, html=True), name="static")
if __name__ == "__main__":
import uvicorn
uvicorn.run("backend.app:app", host="0.0.0.0", port=8001, reload=True)

View File

@@ -1,4 +1,5 @@
import logging
import json
from sqlalchemy.orm import Session
from .ffmpeg_service import FFmpegService
from .transcription_service import TranscriptionService
@@ -7,6 +8,17 @@ from ..config import settings
logger = logging.getLogger(__name__)
def parse_time_to_seconds(time_str):
try:
parts = time_str.split(':')
if len(parts) == 2: # MM:SS
return int(parts[0]) * 60 + int(parts[1])
elif len(parts) == 3: # HH:MM:SS
return int(parts[0]) * 3600 + int(parts[1]) * 60 + int(parts[2])
except:
return 0
return 0
def process_meeting_task(meeting_id: int, db_session_factory):
db = db_session_factory()
meeting = db.query(Meeting).filter(Meeting.id == meeting_id).first()
@@ -35,11 +47,30 @@ def process_meeting_task(meeting_id: int, db_session_factory):
result = transcriber.transcribe_chunk(chunk_path, offset)
# Parse JSON and Adjust Timestamps
json_data = []
try:
raw_json = json.loads(result["raw_text"])
if isinstance(raw_json, list):
for entry in raw_json:
seconds = parse_time_to_seconds(entry.get("time", "00:00"))
absolute_seconds = seconds + offset
entry["absolute_seconds"] = absolute_seconds
h = int(absolute_seconds // 3600)
m = int((absolute_seconds % 3600) // 60)
s = int(absolute_seconds % 60)
entry["display_time"] = f"{h:02}:{m:02}:{s:02}"
json_data.append(entry)
except Exception as e:
logger.error(f"JSON Parsing failed for chunk {i}: {e}")
# Save chunk result
db_chunk = TranscriptChunk(
meeting_id=meeting.id,
chunk_index=i,
raw_text=result["raw_text"]
raw_text=result["raw_text"],
json_content=json_data
)
db.add(db_chunk)
all_text.append(result["raw_text"])

View File

@@ -19,8 +19,8 @@ class TranscriptionService:
"""
logger.info(f"Uploading chunk {file_path} to Gemini...")
# 1. Upload file
media_file = self.client.files.upload(path=file_path)
# 1. Upload file (positional argument)
media_file = self.client.files.upload(file=file_path)
# 2. Wait for processing (usually fast for audio)
while media_file.state == "PROCESSING":
@@ -32,12 +32,18 @@ class TranscriptionService:
# 3. Transcribe with Diarization and Timestamps
prompt = """
Transkribiere dieses Audio wortgetreu.
Identifiziere die Sprecher (Sprecher A, Sprecher B, etc.).
Gib das Ergebnis als strukturierte Liste mit Timestamps aus.
Wichtig: Das Audio ist ein Teil eines größeren Gesprächs.
Antworte NUR mit dem Transkript im Format:
[MM:SS] Sprecher X: Text
Transkribiere dieses Audio wortgetreu.
Identifiziere die Sprecher (Speaker A, Speaker B, etc.).
Gib das Ergebnis als JSON-Liste zurück.
Format:
[
{
"time": "MM:SS",
"speaker": "Speaker A",
"text": "..."
}
]
"""
logger.info(f"Generating transcription for {file_path}...")
@@ -45,7 +51,8 @@ class TranscriptionService:
model="gemini-2.0-flash",
contents=[media_file, prompt],
config=types.GenerateContentConfig(
temperature=0.1, # Low temp for accuracy
temperature=0.1,
response_mime_type="application/json"
)
)
@@ -53,6 +60,6 @@ class TranscriptionService:
self.client.files.delete(name=media_file.name)
return {
"raw_text": response.text,
"raw_text": response.text, # This is now a JSON string
"offset": offset_seconds
}