feat(transcription): v0.4.0 with structured json, inline editing and deletion
- Backend: Switched prompt to JSON output for structured data
- Backend: Added PUT /chunks/{id} endpoint for persistence
- Backend: Fixed app.py imports and initialization logic
- Frontend: Complete rewrite for Unified View (flattened chunks)
- Frontend: Added Inline Editing (Text/Speaker) and Row Deletion
- Docs: Updated TRANSCRIPTION_TOOL.md with v0.4 features
This commit is contained in:
@@ -19,8 +19,8 @@ class TranscriptionService:
|
||||
"""
|
||||
logger.info(f"Uploading chunk {file_path} to Gemini...")
|
||||
|
||||
# 1. Upload file
|
||||
media_file = self.client.files.upload(path=file_path)
|
||||
# 1. Upload file (positional argument)
|
||||
media_file = self.client.files.upload(file=file_path)
|
||||
|
||||
# 2. Wait for processing (usually fast for audio)
|
||||
while media_file.state == "PROCESSING":
|
||||
@@ -32,12 +32,18 @@ class TranscriptionService:
|
||||
|
||||
# 3. Transcribe with Diarization and Timestamps
|
||||
prompt = """
|
||||
Transkribiere dieses Audio wortgetreu.
|
||||
Identifiziere die Sprecher (Sprecher A, Sprecher B, etc.).
|
||||
Gib das Ergebnis als strukturierte Liste mit Timestamps aus.
|
||||
Wichtig: Das Audio ist ein Teil eines größeren Gesprächs.
|
||||
Antworte NUR mit dem Transkript im Format:
|
||||
[MM:SS] Sprecher X: Text
|
||||
Transkribiere dieses Audio wortgetreu.
|
||||
Identifiziere die Sprecher (Speaker A, Speaker B, etc.).
|
||||
|
||||
Gib das Ergebnis als JSON-Liste zurück.
|
||||
Format:
|
||||
[
|
||||
{
|
||||
"time": "MM:SS",
|
||||
"speaker": "Speaker A",
|
||||
"text": "..."
|
||||
}
|
||||
]
|
||||
"""
|
||||
|
||||
logger.info(f"Generating transcription for {file_path}...")
|
||||
@@ -45,7 +51,8 @@ class TranscriptionService:
|
||||
model="gemini-2.0-flash",
|
||||
contents=[media_file, prompt],
|
||||
config=types.GenerateContentConfig(
|
||||
temperature=0.1, # Low temp for accuracy
|
||||
temperature=0.1,
|
||||
response_mime_type="application/json"
|
||||
)
|
||||
)
|
||||
|
||||
@@ -53,6 +60,6 @@ class TranscriptionService:
|
||||
self.client.files.delete(name=media_file.name)
|
||||
|
||||
return {
|
||||
"raw_text": response.text,
|
||||
"raw_text": response.text, # This is now a JSON string
|
||||
"offset": offset_seconds
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user