feat(transcription): v0.4.0 with structured json, inline editing and deletion

- Backend: Switched prompt to JSON output for structured data
- Backend: Added PUT /chunks/{id} endpoint for persistence
- Backend: Fixed app.py imports and initialization logic
- Frontend: Complete rewrite for Unified View (flattened chunks)
- Frontend: Added Inline Editing (Text/Speaker) and Row Deletion
- Docs: Updated TRANSCRIPTION_TOOL.md with v0.4 features
This commit is contained in:
2026-01-24 20:43:33 +00:00
parent 4e52e194f1
commit 68f263978a
5 changed files with 389 additions and 99 deletions

View File

@@ -19,8 +19,8 @@ class TranscriptionService:
"""
logger.info(f"Uploading chunk {file_path} to Gemini...")
# 1. Upload file
media_file = self.client.files.upload(path=file_path)
# 1. Upload file (positional argument)
media_file = self.client.files.upload(file=file_path)
# 2. Wait for processing (usually fast for audio)
while media_file.state == "PROCESSING":
@@ -32,12 +32,18 @@ class TranscriptionService:
# 3. Transcribe with Diarization and Timestamps
prompt = """
Transkribiere dieses Audio wortgetreu.
Identifiziere die Sprecher (Sprecher A, Sprecher B, etc.).
Gib das Ergebnis als strukturierte Liste mit Timestamps aus.
Wichtig: Das Audio ist ein Teil eines größeren Gesprächs.
Antworte NUR mit dem Transkript im Format:
[MM:SS] Sprecher X: Text
Transkribiere dieses Audio wortgetreu.
Identifiziere die Sprecher (Speaker A, Speaker B, etc.).
Gib das Ergebnis als JSON-Liste zurück.
Format:
[
{
"time": "MM:SS",
"speaker": "Speaker A",
"text": "..."
}
]
"""
logger.info(f"Generating transcription for {file_path}...")
@@ -45,7 +51,8 @@ class TranscriptionService:
model="gemini-2.0-flash",
contents=[media_file, prompt],
config=types.GenerateContentConfig(
temperature=0.1, # Low temp for accuracy
temperature=0.1,
response_mime_type="application/json"
)
)
@@ -53,6 +60,6 @@ class TranscriptionService:
self.client.files.delete(name=media_file.name)
return {
"raw_text": response.text,
"raw_text": response.text, # This is now a JSON string
"offset": offset_seconds
}