feat(transcription): v0.4.0 with structured json, inline editing and deletion
- Backend: Switched prompt to JSON output for structured data
- Backend: Added PUT /chunks/{id} endpoint for persistence
- Backend: Fixed app.py imports and initialization logic
- Frontend: Complete rewrite for Unified View (flattened chunks)
- Frontend: Added Inline Editing (Text/Speaker) and Row Deletion
- Docs: Updated TRANSCRIPTION_TOOL.md with v0.4 features
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
from fastapi import FastAPI, Depends, HTTPException, UploadFile, File, BackgroundTasks
|
||||
from fastapi import FastAPI, Depends, HTTPException, UploadFile, File, BackgroundTasks, Body
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy.orm import Session, joinedload
|
||||
from typing import List, Dict, Any
|
||||
import os
|
||||
import shutil
|
||||
import uuid
|
||||
@@ -10,6 +12,7 @@ from .config import settings
|
||||
from .database import init_db, get_db, Meeting, TranscriptChunk, AnalysisResult, SessionLocal
|
||||
from .services.orchestrator import process_meeting_task
|
||||
|
||||
# Initialize FastAPI App
|
||||
app = FastAPI(
|
||||
title=settings.APP_NAME,
|
||||
version=settings.VERSION,
|
||||
@@ -36,6 +39,33 @@ def health():
|
||||
def list_meetings(db: Session = Depends(get_db)):
|
||||
return db.query(Meeting).order_by(Meeting.created_at.desc()).all()
|
||||
|
||||
@app.get("/api/meetings/{meeting_id}")
|
||||
def get_meeting(meeting_id: int, db: Session = Depends(get_db)):
|
||||
meeting = db.query(Meeting).options(
|
||||
joinedload(Meeting.chunks)
|
||||
).filter(Meeting.id == meeting_id).first()
|
||||
|
||||
if not meeting:
|
||||
raise HTTPException(404, detail="Meeting not found")
|
||||
|
||||
# Sort chunks by index
|
||||
meeting.chunks.sort(key=lambda x: x.chunk_index)
|
||||
|
||||
return meeting
|
||||
|
||||
@app.put("/api/chunks/{chunk_id}")
|
||||
def update_chunk(chunk_id: int, payload: Dict[str, Any] = Body(...), db: Session = Depends(get_db)):
|
||||
chunk = db.query(TranscriptChunk).filter(TranscriptChunk.id == chunk_id).first()
|
||||
if not chunk:
|
||||
raise HTTPException(404, detail="Chunk not found")
|
||||
|
||||
# Update JSON content (e.g. after editing/deleting lines)
|
||||
if "json_content" in payload:
|
||||
chunk.json_content = payload["json_content"]
|
||||
db.commit()
|
||||
|
||||
return {"status": "updated"}
|
||||
|
||||
@app.post("/api/upload")
|
||||
async def upload_audio(
|
||||
background_tasks: BackgroundTasks,
|
||||
@@ -67,6 +97,39 @@ async def upload_audio(
|
||||
|
||||
return meeting
|
||||
|
||||
@app.delete("/api/meetings/{meeting_id}")
|
||||
def delete_meeting(meeting_id: int, db: Session = Depends(get_db)):
|
||||
meeting = db.query(Meeting).filter(Meeting.id == meeting_id).first()
|
||||
if not meeting:
|
||||
raise HTTPException(404, detail="Meeting not found")
|
||||
|
||||
# 1. Delete Files
|
||||
try:
|
||||
if os.path.exists(meeting.file_path):
|
||||
os.remove(meeting.file_path)
|
||||
|
||||
# Delete chunks dir
|
||||
chunk_dir = os.path.join(settings.UPLOAD_DIR, "chunks", str(meeting_id))
|
||||
if os.path.exists(chunk_dir):
|
||||
shutil.rmtree(chunk_dir)
|
||||
except Exception as e:
|
||||
print(f"Error deleting files: {e}")
|
||||
|
||||
# 2. Delete DB Entry (Cascade deletes chunks/analyses)
|
||||
db.delete(meeting)
|
||||
db.commit()
|
||||
return {"status": "deleted"}
|
||||
|
||||
# Serve Frontend
|
||||
# This must be the last route definition to avoid catching API routes
|
||||
static_path = "/frontend_static"
|
||||
if not os.path.exists(static_path):
|
||||
# Fallback for local development if not in Docker
|
||||
static_path = os.path.join(os.path.dirname(__file__), "../frontend/dist")
|
||||
|
||||
if os.path.exists(static_path):
|
||||
app.mount("/", StaticFiles(directory=static_path, html=True), name="static")
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run("backend.app:app", host="0.0.0.0", port=8001, reload=True)
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
import json
|
||||
from sqlalchemy.orm import Session
|
||||
from .ffmpeg_service import FFmpegService
|
||||
from .transcription_service import TranscriptionService
|
||||
@@ -7,6 +8,17 @@ from ..config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def parse_time_to_seconds(time_str):
|
||||
try:
|
||||
parts = time_str.split(':')
|
||||
if len(parts) == 2: # MM:SS
|
||||
return int(parts[0]) * 60 + int(parts[1])
|
||||
elif len(parts) == 3: # HH:MM:SS
|
||||
return int(parts[0]) * 3600 + int(parts[1]) * 60 + int(parts[2])
|
||||
except:
|
||||
return 0
|
||||
return 0
|
||||
|
||||
def process_meeting_task(meeting_id: int, db_session_factory):
|
||||
db = db_session_factory()
|
||||
meeting = db.query(Meeting).filter(Meeting.id == meeting_id).first()
|
||||
@@ -35,11 +47,30 @@ def process_meeting_task(meeting_id: int, db_session_factory):
|
||||
|
||||
result = transcriber.transcribe_chunk(chunk_path, offset)
|
||||
|
||||
# Parse JSON and Adjust Timestamps
|
||||
json_data = []
|
||||
try:
|
||||
raw_json = json.loads(result["raw_text"])
|
||||
if isinstance(raw_json, list):
|
||||
for entry in raw_json:
|
||||
seconds = parse_time_to_seconds(entry.get("time", "00:00"))
|
||||
absolute_seconds = seconds + offset
|
||||
entry["absolute_seconds"] = absolute_seconds
|
||||
|
||||
h = int(absolute_seconds // 3600)
|
||||
m = int((absolute_seconds % 3600) // 60)
|
||||
s = int(absolute_seconds % 60)
|
||||
entry["display_time"] = f"{h:02}:{m:02}:{s:02}"
|
||||
json_data.append(entry)
|
||||
except Exception as e:
|
||||
logger.error(f"JSON Parsing failed for chunk {i}: {e}")
|
||||
|
||||
# Save chunk result
|
||||
db_chunk = TranscriptChunk(
|
||||
meeting_id=meeting.id,
|
||||
chunk_index=i,
|
||||
raw_text=result["raw_text"]
|
||||
raw_text=result["raw_text"],
|
||||
json_content=json_data
|
||||
)
|
||||
db.add(db_chunk)
|
||||
all_text.append(result["raw_text"])
|
||||
|
||||
@@ -19,8 +19,8 @@ class TranscriptionService:
|
||||
"""
|
||||
logger.info(f"Uploading chunk {file_path} to Gemini...")
|
||||
|
||||
# 1. Upload file
|
||||
media_file = self.client.files.upload(path=file_path)
|
||||
# 1. Upload file (positional argument)
|
||||
media_file = self.client.files.upload(file=file_path)
|
||||
|
||||
# 2. Wait for processing (usually fast for audio)
|
||||
while media_file.state == "PROCESSING":
|
||||
@@ -32,12 +32,18 @@ class TranscriptionService:
|
||||
|
||||
# 3. Transcribe with Diarization and Timestamps
|
||||
prompt = """
|
||||
Transkribiere dieses Audio wortgetreu.
|
||||
Identifiziere die Sprecher (Sprecher A, Sprecher B, etc.).
|
||||
Gib das Ergebnis als strukturierte Liste mit Timestamps aus.
|
||||
Wichtig: Das Audio ist ein Teil eines größeren Gesprächs.
|
||||
Antworte NUR mit dem Transkript im Format:
|
||||
[MM:SS] Sprecher X: Text
|
||||
Transkribiere dieses Audio wortgetreu.
|
||||
Identifiziere die Sprecher (Speaker A, Speaker B, etc.).
|
||||
|
||||
Gib das Ergebnis als JSON-Liste zurück.
|
||||
Format:
|
||||
[
|
||||
{
|
||||
"time": "MM:SS",
|
||||
"speaker": "Speaker A",
|
||||
"text": "..."
|
||||
}
|
||||
]
|
||||
"""
|
||||
|
||||
logger.info(f"Generating transcription for {file_path}...")
|
||||
@@ -45,7 +51,8 @@ class TranscriptionService:
|
||||
model="gemini-2.0-flash",
|
||||
contents=[media_file, prompt],
|
||||
config=types.GenerateContentConfig(
|
||||
temperature=0.1, # Low temp for accuracy
|
||||
temperature=0.1,
|
||||
response_mime_type="application/json"
|
||||
)
|
||||
)
|
||||
|
||||
@@ -53,6 +60,6 @@ class TranscriptionService:
|
||||
self.client.files.delete(name=media_file.name)
|
||||
|
||||
return {
|
||||
"raw_text": response.text,
|
||||
"raw_text": response.text, # This is now a JSON string
|
||||
"offset": offset_seconds
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user