feat(transcription): add meeting assistant micro-service v0.1.0
- Added FastAPI backend with FFmpeg and Gemini 2.0 integration - Added React frontend with upload and meeting list - Integrated into main docker-compose stack and dashboard
This commit is contained in:
0
transcription-tool/backend/__init__.py
Normal file
0
transcription-tool/backend/__init__.py
Normal file
72
transcription-tool/backend/app.py
Normal file
72
transcription-tool/backend/app.py
Normal file
@@ -0,0 +1,72 @@
|
||||
from fastapi import FastAPI, Depends, HTTPException, UploadFile, File, BackgroundTasks
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from sqlalchemy.orm import Session
|
||||
import os
|
||||
import shutil
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from .config import settings
|
||||
from .database import init_db, get_db, Meeting, TranscriptChunk, AnalysisResult, SessionLocal
|
||||
from .services.orchestrator import process_meeting_task
|
||||
|
||||
app = FastAPI(
|
||||
title=settings.APP_NAME,
|
||||
version=settings.VERSION,
|
||||
root_path="/tr"
|
||||
)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
@app.on_event("startup")
|
||||
def startup_event():
|
||||
init_db()
|
||||
|
||||
@app.get("/api/health")
|
||||
def health():
|
||||
return {"status": "ok", "version": settings.VERSION}
|
||||
|
||||
@app.get("/api/meetings")
|
||||
def list_meetings(db: Session = Depends(get_db)):
|
||||
return db.query(Meeting).order_by(Meeting.created_at.desc()).all()
|
||||
|
||||
@app.post("/api/upload")
|
||||
async def upload_audio(
|
||||
background_tasks: BackgroundTasks,
|
||||
file: UploadFile = File(...),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
# 1. Save File
|
||||
file_id = str(uuid.uuid4())
|
||||
ext = os.path.splitext(file.filename)[1]
|
||||
filename = f"{file_id}{ext}"
|
||||
file_path = os.path.join(settings.UPLOAD_DIR, filename)
|
||||
|
||||
with open(file_path, "wb") as buffer:
|
||||
shutil.copyfileobj(file.file, buffer)
|
||||
|
||||
# 2. Create DB Entry
|
||||
meeting = Meeting(
|
||||
title=file.filename,
|
||||
filename=filename,
|
||||
file_path=file_path,
|
||||
status="UPLOADED"
|
||||
)
|
||||
db.add(meeting)
|
||||
db.commit()
|
||||
db.refresh(meeting)
|
||||
|
||||
# 3. Trigger Processing in Background
|
||||
background_tasks.add_task(process_meeting_task, meeting.id, SessionLocal)
|
||||
|
||||
return meeting
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run("backend.app:app", host="0.0.0.0", port=8001, reload=True)
|
||||
27
transcription-tool/backend/config.py
Normal file
27
transcription-tool/backend/config.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import os
|
||||
from pydantic_settings import BaseSettings
|
||||
from typing import Optional
|
||||
|
||||
class Settings(BaseSettings):
|
||||
APP_NAME: str = "Transcription Engine"
|
||||
VERSION: str = "0.1.0"
|
||||
DATABASE_URL: str = "sqlite:////app/transcripts.db"
|
||||
UPLOAD_DIR: str = "/app/uploads_audio"
|
||||
GEMINI_API_KEY: Optional[str] = None
|
||||
CHUNK_DURATION_SEC: int = 1800 # 30 Minutes
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
|
||||
settings = Settings()
|
||||
|
||||
# Auto-load API Key
|
||||
if not settings.GEMINI_API_KEY:
|
||||
key_path = "/app/gemini_api_key.txt"
|
||||
if os.path.exists(key_path):
|
||||
with open(key_path, "r") as f:
|
||||
settings.GEMINI_API_KEY = f.read().strip()
|
||||
|
||||
# Ensure Upload Dir exists
|
||||
os.makedirs(settings.UPLOAD_DIR, exist_ok=True)
|
||||
os.makedirs(os.path.join(settings.UPLOAD_DIR, "chunks"), exist_ok=True)
|
||||
63
transcription-tool/backend/database.py
Normal file
63
transcription-tool/backend/database.py
Normal file
@@ -0,0 +1,63 @@
|
||||
from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, ForeignKey, Float, JSON
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy.orm import sessionmaker, relationship
|
||||
from datetime import datetime
|
||||
from .config import settings
|
||||
|
||||
engine = create_engine(settings.DATABASE_URL, connect_args={"check_same_thread": False})
|
||||
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
||||
Base = declarative_base()
|
||||
|
||||
class Meeting(Base):
|
||||
__tablename__ = "meetings"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
title = Column(String, index=True)
|
||||
filename = Column(String)
|
||||
file_path = Column(String)
|
||||
date_recorded = Column(DateTime, default=datetime.utcnow)
|
||||
|
||||
duration_seconds = Column(Float, nullable=True)
|
||||
status = Column(String, default="UPLOADED") # UPLOADED, SPLITTING, TRANSCRIBING, ANALYZING, COMPLETED, ERROR
|
||||
|
||||
participants = Column(JSON, nullable=True) # List of names
|
||||
summary = Column(Text, nullable=True)
|
||||
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
chunks = relationship("TranscriptChunk", back_populates="meeting", cascade="all, delete-orphan")
|
||||
analysis_results = relationship("AnalysisResult", back_populates="meeting", cascade="all, delete-orphan")
|
||||
|
||||
class TranscriptChunk(Base):
|
||||
__tablename__ = "transcript_chunks"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
meeting_id = Column(Integer, ForeignKey("meetings.id"))
|
||||
chunk_index = Column(Integer)
|
||||
|
||||
raw_text = Column(Text)
|
||||
json_content = Column(JSON, nullable=True) # Structured timestamps/speakers
|
||||
|
||||
meeting = relationship("Meeting", back_populates="chunks")
|
||||
|
||||
class AnalysisResult(Base):
|
||||
__tablename__ = "analysis_results"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
meeting_id = Column(Integer, ForeignKey("meetings.id"))
|
||||
prompt_key = Column(String) # summary, tasks, notes
|
||||
result_text = Column(Text)
|
||||
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
meeting = relationship("Meeting", back_populates="analysis_results")
|
||||
|
||||
def init_db():
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
def get_db():
|
||||
db = SessionLocal()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
0
transcription-tool/backend/lib/__init__.py
Normal file
0
transcription-tool/backend/lib/__init__.py
Normal file
10
transcription-tool/backend/requirements.txt
Normal file
10
transcription-tool/backend/requirements.txt
Normal file
@@ -0,0 +1,10 @@
|
||||
fastapi
|
||||
uvicorn
|
||||
sqlalchemy
|
||||
pydantic
|
||||
pydantic-settings
|
||||
python-multipart
|
||||
requests
|
||||
google-genai
|
||||
python-dotenv
|
||||
aiofiles
|
||||
0
transcription-tool/backend/services/__init__.py
Normal file
0
transcription-tool/backend/services/__init__.py
Normal file
49
transcription-tool/backend/services/ffmpeg_service.py
Normal file
49
transcription-tool/backend/services/ffmpeg_service.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import subprocess
|
||||
import os
|
||||
import logging
|
||||
from ..config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class FFmpegService:
|
||||
def split_audio(self, input_path: str, meeting_id: int) -> list:
|
||||
"""
|
||||
Splits audio into 30min chunks using ffmpeg segment muxer.
|
||||
Returns a list of paths to the created chunks.
|
||||
"""
|
||||
output_dir = os.path.join(settings.UPLOAD_DIR, "chunks", str(meeting_id))
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
output_pattern = os.path.join(output_dir, "chunk_%03d.mp3")
|
||||
|
||||
# ffmpeg -i input.mp3 -f segment -segment_time 1800 -c copy chunk_%03d.mp3
|
||||
cmd = [
|
||||
"ffmpeg", "-i", input_path,
|
||||
"-f", "segment",
|
||||
"-segment_time", str(settings.CHUNK_DURATION_SEC),
|
||||
"-c", "copy",
|
||||
output_pattern
|
||||
]
|
||||
|
||||
logger.info(f"Splitting {input_path} into segments...")
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
|
||||
if result.returncode != 0:
|
||||
logger.error(f"FFmpeg Error: {result.stderr}")
|
||||
raise Exception("Failed to split audio file.")
|
||||
|
||||
chunks = sorted([os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith(".mp3")])
|
||||
logger.info(f"Created {len(chunks)} chunks.")
|
||||
return chunks
|
||||
|
||||
def get_duration(self, input_path: str) -> float:
|
||||
"""Gets duration of audio file in seconds."""
|
||||
cmd = [
|
||||
"ffprobe", "-v", "error", "-show_entries", "format=duration",
|
||||
"-of", "default=noprint_wrappers=1:nokey=1", input_path
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
try:
|
||||
return float(result.stdout.strip())
|
||||
except:
|
||||
return 0.0
|
||||
60
transcription-tool/backend/services/orchestrator.py
Normal file
60
transcription-tool/backend/services/orchestrator.py
Normal file
@@ -0,0 +1,60 @@
|
||||
import logging
|
||||
from sqlalchemy.orm import Session
|
||||
from .ffmpeg_service import FFmpegService
|
||||
from .transcription_service import TranscriptionService
|
||||
from ..database import Meeting, TranscriptChunk
|
||||
from ..config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def process_meeting_task(meeting_id: int, db_session_factory):
|
||||
db = db_session_factory()
|
||||
meeting = db.query(Meeting).filter(Meeting.id == meeting_id).first()
|
||||
if not meeting:
|
||||
return
|
||||
|
||||
try:
|
||||
ffmpeg = FFmpegService()
|
||||
transcriber = TranscriptionService()
|
||||
|
||||
# Phase 1: Split
|
||||
meeting.status = "SPLITTING"
|
||||
db.commit()
|
||||
|
||||
meeting.duration_seconds = ffmpeg.get_duration(meeting.file_path)
|
||||
chunks = ffmpeg.split_audio(meeting.file_path, meeting.id)
|
||||
|
||||
# Phase 2: Transcribe
|
||||
meeting.status = "TRANSCRIBING"
|
||||
db.commit()
|
||||
|
||||
all_text = []
|
||||
for i, chunk_path in enumerate(chunks):
|
||||
offset = i * settings.CHUNK_DURATION_SEC
|
||||
logger.info(f"Processing chunk {i+1}/{len(chunks)} with offset {offset}s")
|
||||
|
||||
result = transcriber.transcribe_chunk(chunk_path, offset)
|
||||
|
||||
# Save chunk result
|
||||
db_chunk = TranscriptChunk(
|
||||
meeting_id=meeting.id,
|
||||
chunk_index=i,
|
||||
raw_text=result["raw_text"]
|
||||
)
|
||||
db.add(db_chunk)
|
||||
all_text.append(result["raw_text"])
|
||||
db.commit()
|
||||
|
||||
# Phase 3: Finalize
|
||||
meeting.status = "COMPLETED"
|
||||
# Combine summary (first attempt - can be refined later with separate LLM call)
|
||||
# meeting.summary = ...
|
||||
db.commit()
|
||||
logger.info(f"Meeting {meeting.id} processing completed.")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing meeting {meeting_id}: {e}", exc_info=True)
|
||||
meeting.status = "ERROR"
|
||||
db.commit()
|
||||
finally:
|
||||
db.close()
|
||||
58
transcription-tool/backend/services/transcription_service.py
Normal file
58
transcription-tool/backend/services/transcription_service.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import os
|
||||
import time
|
||||
import logging
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
from ..config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class TranscriptionService:
|
||||
def __init__(self):
|
||||
if not settings.GEMINI_API_KEY:
|
||||
raise Exception("Gemini API Key missing.")
|
||||
self.client = genai.Client(api_key=settings.GEMINI_API_KEY)
|
||||
|
||||
def transcribe_chunk(self, file_path: str, offset_seconds: int = 0) -> dict:
|
||||
"""
|
||||
Uploads a chunk to Gemini and returns the transcription with timestamps.
|
||||
"""
|
||||
logger.info(f"Uploading chunk {file_path} to Gemini...")
|
||||
|
||||
# 1. Upload file
|
||||
media_file = self.client.files.upload(path=file_path)
|
||||
|
||||
# 2. Wait for processing (usually fast for audio)
|
||||
while media_file.state == "PROCESSING":
|
||||
time.sleep(2)
|
||||
media_file = self.client.files.get(name=media_file.name)
|
||||
|
||||
if media_file.state == "FAILED":
|
||||
raise Exception("File processing failed at Gemini.")
|
||||
|
||||
# 3. Transcribe with Diarization and Timestamps
|
||||
prompt = """
|
||||
Transkribiere dieses Audio wortgetreu.
|
||||
Identifiziere die Sprecher (Sprecher A, Sprecher B, etc.).
|
||||
Gib das Ergebnis als strukturierte Liste mit Timestamps aus.
|
||||
Wichtig: Das Audio ist ein Teil eines größeren Gesprächs.
|
||||
Antworte NUR mit dem Transkript im Format:
|
||||
[MM:SS] Sprecher X: Text
|
||||
"""
|
||||
|
||||
logger.info(f"Generating transcription for {file_path}...")
|
||||
response = self.client.models.generate_content(
|
||||
model="gemini-2.0-flash",
|
||||
contents=[media_file, prompt],
|
||||
config=types.GenerateContentConfig(
|
||||
temperature=0.1, # Low temp for accuracy
|
||||
)
|
||||
)
|
||||
|
||||
# Cleanup: Delete file from Gemini storage
|
||||
self.client.files.delete(name=media_file.name)
|
||||
|
||||
return {
|
||||
"raw_text": response.text,
|
||||
"offset": offset_seconds
|
||||
}
|
||||
Reference in New Issue
Block a user