feat(transcription): add meeting assistant micro-service v0.1.0

- Added FastAPI backend with FFmpeg and Gemini 2.0 integration
- Added React frontend with upload and meeting list
- Integrated into main docker-compose stack and dashboard
This commit is contained in:
2026-01-24 16:34:01 +00:00
parent b16babb032
commit 0858df6f25
25 changed files with 721 additions and 2 deletions

View File

@@ -0,0 +1,37 @@
# --- STAGE 1: Build Frontend ---
FROM node:20-slim AS frontend-builder
WORKDIR /build
COPY frontend/package*.json ./
RUN npm install
COPY frontend/ ./
RUN npm run build
# --- STAGE 2: Backend & Runtime ---
FROM python:3.11-slim
WORKDIR /app
# System Dependencies (FFmpeg ist essenziell)
RUN apt-get update && apt-get install -y \
ffmpeg \
build-essential \
&& rm -rf /var/lib/apt/lists/*
# Copy Requirements & Install
COPY backend/requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy Built Frontend from Stage 1
COPY --from=frontend-builder /build/dist /frontend_static
# Copy Backend Source
COPY backend ./backend
# Environment Variables
ENV PYTHONPATH=/app
ENV PYTHONUNBUFFERED=1
# Expose Port
EXPOSE 8001
# Start FastAPI
CMD ["uvicorn", "backend.app:app", "--host", "0.0.0.0", "--port", "8001", "--reload"]

View File

View File

@@ -0,0 +1,72 @@
from fastapi import FastAPI, Depends, HTTPException, UploadFile, File, BackgroundTasks
from fastapi.middleware.cors import CORSMiddleware
from sqlalchemy.orm import Session
import os
import shutil
import uuid
from datetime import datetime
from .config import settings
from .database import init_db, get_db, Meeting, TranscriptChunk, AnalysisResult, SessionLocal
from .services.orchestrator import process_meeting_task
app = FastAPI(
title=settings.APP_NAME,
version=settings.VERSION,
root_path="/tr"
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.on_event("startup")
def startup_event():
init_db()
@app.get("/api/health")
def health():
return {"status": "ok", "version": settings.VERSION}
@app.get("/api/meetings")
def list_meetings(db: Session = Depends(get_db)):
return db.query(Meeting).order_by(Meeting.created_at.desc()).all()
@app.post("/api/upload")
async def upload_audio(
background_tasks: BackgroundTasks,
file: UploadFile = File(...),
db: Session = Depends(get_db)
):
# 1. Save File
file_id = str(uuid.uuid4())
ext = os.path.splitext(file.filename)[1]
filename = f"{file_id}{ext}"
file_path = os.path.join(settings.UPLOAD_DIR, filename)
with open(file_path, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
# 2. Create DB Entry
meeting = Meeting(
title=file.filename,
filename=filename,
file_path=file_path,
status="UPLOADED"
)
db.add(meeting)
db.commit()
db.refresh(meeting)
# 3. Trigger Processing in Background
background_tasks.add_task(process_meeting_task, meeting.id, SessionLocal)
return meeting
if __name__ == "__main__":
import uvicorn
uvicorn.run("backend.app:app", host="0.0.0.0", port=8001, reload=True)

View File

@@ -0,0 +1,27 @@
import os
from pydantic_settings import BaseSettings
from typing import Optional
class Settings(BaseSettings):
APP_NAME: str = "Transcription Engine"
VERSION: str = "0.1.0"
DATABASE_URL: str = "sqlite:////app/transcripts.db"
UPLOAD_DIR: str = "/app/uploads_audio"
GEMINI_API_KEY: Optional[str] = None
CHUNK_DURATION_SEC: int = 1800 # 30 Minutes
class Config:
env_file = ".env"
settings = Settings()
# Auto-load API Key
if not settings.GEMINI_API_KEY:
key_path = "/app/gemini_api_key.txt"
if os.path.exists(key_path):
with open(key_path, "r") as f:
settings.GEMINI_API_KEY = f.read().strip()
# Ensure Upload Dir exists
os.makedirs(settings.UPLOAD_DIR, exist_ok=True)
os.makedirs(os.path.join(settings.UPLOAD_DIR, "chunks"), exist_ok=True)

View File

@@ -0,0 +1,63 @@
from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, ForeignKey, Float, JSON
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, relationship
from datetime import datetime
from .config import settings
engine = create_engine(settings.DATABASE_URL, connect_args={"check_same_thread": False})
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base()
class Meeting(Base):
__tablename__ = "meetings"
id = Column(Integer, primary_key=True, index=True)
title = Column(String, index=True)
filename = Column(String)
file_path = Column(String)
date_recorded = Column(DateTime, default=datetime.utcnow)
duration_seconds = Column(Float, nullable=True)
status = Column(String, default="UPLOADED") # UPLOADED, SPLITTING, TRANSCRIBING, ANALYZING, COMPLETED, ERROR
participants = Column(JSON, nullable=True) # List of names
summary = Column(Text, nullable=True)
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
chunks = relationship("TranscriptChunk", back_populates="meeting", cascade="all, delete-orphan")
analysis_results = relationship("AnalysisResult", back_populates="meeting", cascade="all, delete-orphan")
class TranscriptChunk(Base):
__tablename__ = "transcript_chunks"
id = Column(Integer, primary_key=True, index=True)
meeting_id = Column(Integer, ForeignKey("meetings.id"))
chunk_index = Column(Integer)
raw_text = Column(Text)
json_content = Column(JSON, nullable=True) # Structured timestamps/speakers
meeting = relationship("Meeting", back_populates="chunks")
class AnalysisResult(Base):
__tablename__ = "analysis_results"
id = Column(Integer, primary_key=True, index=True)
meeting_id = Column(Integer, ForeignKey("meetings.id"))
prompt_key = Column(String) # summary, tasks, notes
result_text = Column(Text)
created_at = Column(DateTime, default=datetime.utcnow)
meeting = relationship("Meeting", back_populates="analysis_results")
def init_db():
Base.metadata.create_all(bind=engine)
def get_db():
db = SessionLocal()
try:
yield db
finally:
db.close()

View File

@@ -0,0 +1,10 @@
fastapi
uvicorn
sqlalchemy
pydantic
pydantic-settings
python-multipart
requests
google-genai
python-dotenv
aiofiles

View File

@@ -0,0 +1,49 @@
import subprocess
import os
import logging
from ..config import settings
logger = logging.getLogger(__name__)
class FFmpegService:
def split_audio(self, input_path: str, meeting_id: int) -> list:
"""
Splits audio into 30min chunks using ffmpeg segment muxer.
Returns a list of paths to the created chunks.
"""
output_dir = os.path.join(settings.UPLOAD_DIR, "chunks", str(meeting_id))
os.makedirs(output_dir, exist_ok=True)
output_pattern = os.path.join(output_dir, "chunk_%03d.mp3")
# ffmpeg -i input.mp3 -f segment -segment_time 1800 -c copy chunk_%03d.mp3
cmd = [
"ffmpeg", "-i", input_path,
"-f", "segment",
"-segment_time", str(settings.CHUNK_DURATION_SEC),
"-c", "copy",
output_pattern
]
logger.info(f"Splitting {input_path} into segments...")
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
logger.error(f"FFmpeg Error: {result.stderr}")
raise Exception("Failed to split audio file.")
chunks = sorted([os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith(".mp3")])
logger.info(f"Created {len(chunks)} chunks.")
return chunks
def get_duration(self, input_path: str) -> float:
"""Gets duration of audio file in seconds."""
cmd = [
"ffprobe", "-v", "error", "-show_entries", "format=duration",
"-of", "default=noprint_wrappers=1:nokey=1", input_path
]
result = subprocess.run(cmd, capture_output=True, text=True)
try:
return float(result.stdout.strip())
except:
return 0.0

View File

@@ -0,0 +1,60 @@
import logging
from sqlalchemy.orm import Session
from .ffmpeg_service import FFmpegService
from .transcription_service import TranscriptionService
from ..database import Meeting, TranscriptChunk
from ..config import settings
logger = logging.getLogger(__name__)
def process_meeting_task(meeting_id: int, db_session_factory):
db = db_session_factory()
meeting = db.query(Meeting).filter(Meeting.id == meeting_id).first()
if not meeting:
return
try:
ffmpeg = FFmpegService()
transcriber = TranscriptionService()
# Phase 1: Split
meeting.status = "SPLITTING"
db.commit()
meeting.duration_seconds = ffmpeg.get_duration(meeting.file_path)
chunks = ffmpeg.split_audio(meeting.file_path, meeting.id)
# Phase 2: Transcribe
meeting.status = "TRANSCRIBING"
db.commit()
all_text = []
for i, chunk_path in enumerate(chunks):
offset = i * settings.CHUNK_DURATION_SEC
logger.info(f"Processing chunk {i+1}/{len(chunks)} with offset {offset}s")
result = transcriber.transcribe_chunk(chunk_path, offset)
# Save chunk result
db_chunk = TranscriptChunk(
meeting_id=meeting.id,
chunk_index=i,
raw_text=result["raw_text"]
)
db.add(db_chunk)
all_text.append(result["raw_text"])
db.commit()
# Phase 3: Finalize
meeting.status = "COMPLETED"
# Combine summary (first attempt - can be refined later with separate LLM call)
# meeting.summary = ...
db.commit()
logger.info(f"Meeting {meeting.id} processing completed.")
except Exception as e:
logger.error(f"Error processing meeting {meeting_id}: {e}", exc_info=True)
meeting.status = "ERROR"
db.commit()
finally:
db.close()

View File

@@ -0,0 +1,58 @@
import os
import time
import logging
from google import genai
from google.genai import types
from ..config import settings
logger = logging.getLogger(__name__)
class TranscriptionService:
def __init__(self):
if not settings.GEMINI_API_KEY:
raise Exception("Gemini API Key missing.")
self.client = genai.Client(api_key=settings.GEMINI_API_KEY)
def transcribe_chunk(self, file_path: str, offset_seconds: int = 0) -> dict:
"""
Uploads a chunk to Gemini and returns the transcription with timestamps.
"""
logger.info(f"Uploading chunk {file_path} to Gemini...")
# 1. Upload file
media_file = self.client.files.upload(path=file_path)
# 2. Wait for processing (usually fast for audio)
while media_file.state == "PROCESSING":
time.sleep(2)
media_file = self.client.files.get(name=media_file.name)
if media_file.state == "FAILED":
raise Exception("File processing failed at Gemini.")
# 3. Transcribe with Diarization and Timestamps
prompt = """
Transkribiere dieses Audio wortgetreu.
Identifiziere die Sprecher (Sprecher A, Sprecher B, etc.).
Gib das Ergebnis als strukturierte Liste mit Timestamps aus.
Wichtig: Das Audio ist ein Teil eines größeren Gesprächs.
Antworte NUR mit dem Transkript im Format:
[MM:SS] Sprecher X: Text
"""
logger.info(f"Generating transcription for {file_path}...")
response = self.client.models.generate_content(
model="gemini-2.0-flash",
contents=[media_file, prompt],
config=types.GenerateContentConfig(
temperature=0.1, # Low temp for accuracy
)
)
# Cleanup: Delete file from Gemini storage
self.client.files.delete(name=media_file.name)
return {
"raw_text": response.text,
"offset": offset_seconds
}

View File

@@ -0,0 +1,12 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Meeting Assistant</title>
</head>
<body class="bg-slate-50 dark:bg-slate-950">
<div id="root"></div>
<script type="module" src="/src/main.tsx"></script>
</body>
</html>

View File

@@ -0,0 +1,27 @@
{
"name": "transcription-frontend",
"private": true,
"version": "0.1.0",
"type": "module",
"scripts": {
"dev": "vite",
"build": "tsc && vite build",
"preview": "vite preview"
},
"dependencies": {
"axios": "^1.6.2",
"clsx": "^2.0.0",
"lucide-react": "^0.294.0",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"tailwind-merge": "^2.1.0",
"@types/react": "^18.2.37",
"@types/react-dom": "^18.2.15",
"@vitejs/plugin-react": "^4.2.0",
"autoprefixer": "^10.4.16",
"postcss": "^8.4.31",
"tailwindcss": "^3.3.5",
"typescript": "^5.2.2",
"vite": "^5.0.0"
}
}

View File

@@ -0,0 +1,6 @@
export default {
plugins: {
tailwindcss: {},
autoprefixer: {},
},
}

View File

@@ -0,0 +1,121 @@
import { useState, useEffect } from 'react'
import axios from 'axios'
import { Upload, Mic, FileText, Clock, CheckCircle2, Loader2, AlertCircle, ChevronRight } from 'lucide-react'
import clsx from 'clsx'
const API_BASE = '/tr/api'
interface Meeting {
id: number
title: string
status: string
date_recorded: string
duration_seconds?: number
created_at: string
}
export default function App() {
const [meetings, setMeetings] = useState<Meeting[]>([])
const [uploading, setUploading] = useState(false)
const [error, setError] = useState<string | null>(null)
const fetchMeetings = async () => {
try {
const res = await axios.get(`${API_BASE}/meetings`)
setMeetings(res.data)
} catch (e) {
console.error("Failed to fetch meetings", e)
}
}
useEffect(() => {
fetchMeetings()
const interval = setInterval(fetchMeetings, 5000) // Poll every 5s
return () => clearInterval(interval)
}, [])
const handleUpload = async (e: React.ChangeEvent<HTMLInputElement>) => {
const file = e.target.files?.[0]
if (!file) return
setUploading(true)
setError(null)
const formData = new FormData()
formData.append('file', file)
try {
await axios.post(`${API_BASE}/upload`, formData)
fetchMeetings()
} catch (e) {
setError("Upload failed. Make sure the file is not too large.")
} finally {
setUploading(false)
}
}
return (
<div className="min-h-screen bg-slate-50 dark:bg-slate-950 text-slate-900 dark:text-slate-200">
<div className="max-w-5xl mx-auto px-4 py-12">
<header className="flex items-center justify-between mb-12">
<div>
<h1 className="text-3xl font-bold tracking-tight">Meeting Assistant</h1>
<p className="text-slate-500 mt-2">Transcribe and analyze your meetings with Gemini 2.0</p>
</div>
<label className={clsx(
"flex items-center gap-2 px-6 py-3 bg-blue-600 hover:bg-blue-500 text-white rounded-full font-semibold transition-all cursor-pointer shadow-lg shadow-blue-500/20",
uploading && "opacity-50 cursor-not-allowed"
)}>
{uploading ? <Loader2 className="h-5 w-5 animate-spin" /> : <Upload className="h-5 w-5" />}
{uploading ? "Uploading..." : "New Meeting"}
<input type="file" className="hidden" accept="audio/*" onChange={handleUpload} disabled={uploading} />
</label>
</header>
{error && (
<div className="mb-8 p-4 bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded-xl text-red-600 dark:text-red-400 flex items-center gap-3">
<AlertCircle className="h-5 w-5" />
{error}
</div>
)}
<div className="grid gap-4">
{meetings.length === 0 ? (
<div className="text-center py-20 bg-white dark:bg-slate-900 rounded-3xl border-2 border-dashed border-slate-200 dark:border-slate-800">
<Mic className="h-12 w-12 mx-auto mb-4 text-slate-300" />
<p className="text-slate-500 font-medium">No meetings yet. Upload your first audio file.</p>
</div>
) : (
meetings.map(m => (
<div key={m.id} className="group bg-white dark:bg-slate-900 p-6 rounded-2xl border border-slate-200 dark:border-slate-800 hover:shadow-xl transition-all flex items-center justify-between cursor-pointer">
<div className="flex items-center gap-4">
<div className={clsx(
"p-3 rounded-xl",
m.status === 'COMPLETED' ? "bg-green-100 dark:bg-green-900/30 text-green-600" :
m.status === 'ERROR' ? "bg-red-100 dark:bg-red-900/30 text-red-600" :
"bg-blue-100 dark:bg-blue-900/30 text-blue-600 animate-pulse"
)}>
{m.status === 'COMPLETED' ? <CheckCircle2 className="h-6 w-6" /> : <FileText className="h-6 w-6" />}
</div>
<div>
<h3 className="font-bold text-lg leading-tight">{m.title}</h3>
<div className="flex items-center gap-4 mt-1 text-sm text-slate-500">
<span className="flex items-center gap-1"><Clock className="h-3.5 w-3.5" /> {new Date(m.created_at).toLocaleDateString()}</span>
{m.duration_seconds && (
<span>{Math.round(m.duration_seconds / 60)} min</span>
)}
<span className={clsx(
"font-semibold uppercase tracking-wider text-[10px] px-2 py-0.5 rounded",
m.status === 'COMPLETED' ? "bg-green-100 text-green-700" : "bg-slate-100 text-slate-600"
)}>{m.status}</span>
</div>
</div>
</div>
<ChevronRight className="h-6 w-6 text-slate-300 group-hover:text-blue-500 transition-colors" />
</div>
))
)}
</div>
</div>
</div>
)
}

View File

@@ -0,0 +1,7 @@
@tailwind base;
@tailwind components;
@tailwind utilities;
:root {
color-scheme: light dark;
}

View File

@@ -0,0 +1,10 @@
import React from 'react'
import ReactDOM from 'react-dom/client'
import App from './App'
import './index.css'
ReactDOM.createRoot(document.getElementById('root')!).render(
<React.StrictMode>
<App />
</React.StrictMode>,
)

View File

@@ -0,0 +1 @@
/// <reference types="vite/client" />

View File

@@ -0,0 +1,11 @@
/** @type {import('tailwindcss').Config} */
export default {
content: [
"./index.html",
"./src/**/*.{js,ts,jsx,tsx}",
],
theme: {
extend: {},
},
plugins: [],
}

View File

@@ -0,0 +1,15 @@
import { defineConfig } from 'vite'
import react from '@vitejs/plugin-react'
export default defineConfig({
plugins: [react()],
base: '/tr/',
server: {
proxy: {
'/tr/api': {
target: 'http://localhost:8001',
changeOrigin: true,
}
}
}
})