feat(transcription): add meeting assistant micro-service v0.1.0
- Added FastAPI backend with FFmpeg and Gemini 2.0 integration - Added React frontend with upload and meeting list - Integrated into main docker-compose stack and dashboard
This commit is contained in:
58
transcription-tool/backend/services/transcription_service.py
Normal file
58
transcription-tool/backend/services/transcription_service.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import os
|
||||
import time
|
||||
import logging
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
from ..config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class TranscriptionService:
|
||||
def __init__(self):
|
||||
if not settings.GEMINI_API_KEY:
|
||||
raise Exception("Gemini API Key missing.")
|
||||
self.client = genai.Client(api_key=settings.GEMINI_API_KEY)
|
||||
|
||||
def transcribe_chunk(self, file_path: str, offset_seconds: int = 0) -> dict:
|
||||
"""
|
||||
Uploads a chunk to Gemini and returns the transcription with timestamps.
|
||||
"""
|
||||
logger.info(f"Uploading chunk {file_path} to Gemini...")
|
||||
|
||||
# 1. Upload file
|
||||
media_file = self.client.files.upload(path=file_path)
|
||||
|
||||
# 2. Wait for processing (usually fast for audio)
|
||||
while media_file.state == "PROCESSING":
|
||||
time.sleep(2)
|
||||
media_file = self.client.files.get(name=media_file.name)
|
||||
|
||||
if media_file.state == "FAILED":
|
||||
raise Exception("File processing failed at Gemini.")
|
||||
|
||||
# 3. Transcribe with Diarization and Timestamps
|
||||
prompt = """
|
||||
Transkribiere dieses Audio wortgetreu.
|
||||
Identifiziere die Sprecher (Sprecher A, Sprecher B, etc.).
|
||||
Gib das Ergebnis als strukturierte Liste mit Timestamps aus.
|
||||
Wichtig: Das Audio ist ein Teil eines größeren Gesprächs.
|
||||
Antworte NUR mit dem Transkript im Format:
|
||||
[MM:SS] Sprecher X: Text
|
||||
"""
|
||||
|
||||
logger.info(f"Generating transcription for {file_path}...")
|
||||
response = self.client.models.generate_content(
|
||||
model="gemini-2.0-flash",
|
||||
contents=[media_file, prompt],
|
||||
config=types.GenerateContentConfig(
|
||||
temperature=0.1, # Low temp for accuracy
|
||||
)
|
||||
)
|
||||
|
||||
# Cleanup: Delete file from Gemini storage
|
||||
self.client.files.delete(name=media_file.name)
|
||||
|
||||
return {
|
||||
"raw_text": response.text,
|
||||
"offset": offset_seconds
|
||||
}
|
||||
Reference in New Issue
Block a user