From 0858df6f25c50677fe05bdce4cb8a14a88a88a33 Mon Sep 17 00:00:00 2001
From: Floke
Date: Sat, 24 Jan 2026 16:34:01 +0000
Subject: [PATCH] feat(transcription): add meeting assistant micro-service
v0.1.0
- Added FastAPI backend with FFmpeg and Gemini 2.0 integration
- Added React frontend with upload and meeting list
- Integrated into main docker-compose stack and dashboard
---
TRANSCRIPTION_TOOL.md | 82 ++++++++++++
dashboard/index.html | 9 ++
docker-compose.yml | 21 ++-
nginx-proxy.conf | 17 +++
readme.md | 8 ++
transcription-tool/Dockerfile | 37 ++++++
transcription-tool/backend/__init__.py | 0
transcription-tool/backend/app.py | 72 +++++++++++
transcription-tool/backend/config.py | 27 ++++
transcription-tool/backend/database.py | 63 +++++++++
transcription-tool/backend/lib/__init__.py | 0
transcription-tool/backend/requirements.txt | 10 ++
.../backend/services/__init__.py | 0
.../backend/services/ffmpeg_service.py | 49 +++++++
.../backend/services/orchestrator.py | 60 +++++++++
.../backend/services/transcription_service.py | 58 +++++++++
transcription-tool/frontend/index.html | 12 ++
transcription-tool/frontend/package.json | 27 ++++
transcription-tool/frontend/postcss.config.js | 6 +
transcription-tool/frontend/src/App.tsx | 121 ++++++++++++++++++
transcription-tool/frontend/src/index.css | 7 +
transcription-tool/frontend/src/main.tsx | 10 ++
transcription-tool/frontend/src/vite-env.d.ts | 1 +
.../frontend/tailwind.config.js | 11 ++
transcription-tool/frontend/vite.config.ts | 15 +++
25 files changed, 721 insertions(+), 2 deletions(-)
create mode 100644 TRANSCRIPTION_TOOL.md
create mode 100644 transcription-tool/Dockerfile
create mode 100644 transcription-tool/backend/__init__.py
create mode 100644 transcription-tool/backend/app.py
create mode 100644 transcription-tool/backend/config.py
create mode 100644 transcription-tool/backend/database.py
create mode 100644 transcription-tool/backend/lib/__init__.py
create mode 100644 transcription-tool/backend/requirements.txt
create mode 100644 transcription-tool/backend/services/__init__.py
create mode 100644 transcription-tool/backend/services/ffmpeg_service.py
create mode 100644 transcription-tool/backend/services/orchestrator.py
create mode 100644 transcription-tool/backend/services/transcription_service.py
create mode 100644 transcription-tool/frontend/index.html
create mode 100644 transcription-tool/frontend/package.json
create mode 100644 transcription-tool/frontend/postcss.config.js
create mode 100644 transcription-tool/frontend/src/App.tsx
create mode 100644 transcription-tool/frontend/src/index.css
create mode 100644 transcription-tool/frontend/src/main.tsx
create mode 100644 transcription-tool/frontend/src/vite-env.d.ts
create mode 100644 transcription-tool/frontend/tailwind.config.js
create mode 100644 transcription-tool/frontend/vite.config.ts
diff --git a/TRANSCRIPTION_TOOL.md b/TRANSCRIPTION_TOOL.md
new file mode 100644
index 00000000..8359dad1
--- /dev/null
+++ b/TRANSCRIPTION_TOOL.md
@@ -0,0 +1,82 @@
+# Meeting Assistant (Transcription Tool)
+
+**Version:** 0.1.0
+**Status:** Beta (Core Functionality)
+
+Der **Meeting Assistant** ist ein lokaler Micro-Service zur Transkription und Analyse von Audio-Dateien (Meetings, Calls, Interviews). Er kombiniert die Datensicherheit einer lokalen Datenhaltung mit der Leistungsfähigkeit von Googles **Gemini 2.0 Flash** Modell für kostengünstige, hochqualitative Speech-to-Text Umwandlung.
+
+---
+
+## 1. Architektur
+
+Der Service folgt dem "Sidecar"-Pattern im Docker-Stack und ist vollständig in das Dashboard integriert.
+
+* **Frontend:** React (Vite + Tailwind) unter `/tr/`.
+* **Backend:** FastAPI (Python) unter `/tr/api/`.
+* **Processing:**
+ * **FFmpeg:** Zerlegt große Audio-Dateien (> 2 Stunden) in verarbeitbare 30-Minuten-Chunks.
+ * **Gemini 2.0 Flash:** Führt die eigentliche Transkription durch (via `google-genai` SDK).
+ * **SQLite:** Speichert Metadaten, Status und Ergebnisse.
+* **Storage:** Lokales Docker-Volume für Audio-Uploads (werden nach Verarbeitung *nicht* gelöscht, um Re-Analysen zu ermöglichen).
+
+### Datenfluss
+1. **Upload:** User lädt MP3 hoch -> Speicherung in `/app/uploads_audio`.
+2. **Chunking:** Backend startet Background-Task -> FFmpeg erstellt Segmente in `/app/uploads_audio/chunks/{id}/`.
+3. **Transkription:** Loop über alle Chunks -> Upload zu Gemini File API -> Generierung -> Löschung aus Gemini Cloud -> Speicherung in DB.
+4. **Assemblierung:** (Geplant für v0.2) Zusammenfügen der Text-Fragmente.
+
+---
+
+## 2. API Endpunkte
+
+Basis-URL: `/tr/api`
+
+| Methode | Pfad | Beschreibung |
+| :--- | :--- | :--- |
+| `GET` | `/meetings` | Liste aller Meetings inkl. Status. |
+| `POST` | `/upload` | Upload einer Audio-Datei (`multipart/form-data`). Startet Prozess. |
+| `GET` | `/health` | Status-Check. |
+
+---
+
+## 3. Datenbank Schema (SQLite)
+
+Datei: `transcripts.db`
+
+### `meetings`
+* `id`: PK
+* `title`: Dateiname (z.B. "Jours_Fixe_Januar.mp3")
+* `status`: `UPLOADED` -> `SPLITTING` -> `TRANSCRIBING` -> `COMPLETED`
+* `duration_seconds`: Gesamtlänge
+* `file_path`: Lokaler Pfad
+
+### `transcript_chunks`
+* `id`: PK
+* `meeting_id`: FK
+* `chunk_index`: 0, 1, 2...
+* `raw_text`: Das rohe Transkript von Gemini.
+
+---
+
+## 4. Konfiguration & Limits
+
+* **Max Upload Size:** 500 MB (konfiguriert in Nginx).
+* **Chunk Size:** 30 Minuten (1800 Sekunden).
+* **Modell:** `gemini-2.0-flash` (Temperatur 0.1 für Faktentreue).
+* **Kosten:** Gemini 2.0 Flash ist extrem günstig (Audio-Input), aber beachten Sie die API-Limits bei sehr vielen parallelen Uploads.
+
+---
+
+## 5. Roadmap / Next Steps
+
+* **v0.2:** Frontend-Detailansicht mit Volltext-Anzeige.
+* **v0.3:** Sprecher-Identifikation (Mapping "Speaker A" -> "Thomas").
+* **v0.4:** "Meeting Notes" Generator (Zusammenfassung via LLM).
+
+---
+
+## 6. Troubleshooting
+
+* **Upload bricht ab:** Prüfen Sie, ob die Datei > 500MB ist oder der Nginx-Timeout (1800s) greift.
+* **Status bleibt bei "TRANSCRIBING":** Prüfen Sie die Docker-Logs (`docker logs transcription-app`). Evtl. ist der API-Key ungültig oder das Quota erschöpft.
+* **FFmpeg Fehler:** Stellen Sie sicher, dass das Eingangsformat valide ist (MP3, WAV, M4A werden meist unterstützt).
diff --git a/dashboard/index.html b/dashboard/index.html
index cf83d09c..da169827 100644
--- a/dashboard/index.html
+++ b/dashboard/index.html
@@ -183,6 +183,15 @@
Starten →
+
+
+
🎙️
+
Meeting Assistant
+
+ Transkribieren Sie Meetings (MP3/WAV) mit automatischer Sprechererkennung und Timestamps via Gemini 2.0.
+
+
Starten →
+
diff --git a/docker-compose.yml b/docker-compose.yml
index ffe95ff6..924436d7 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -74,10 +74,27 @@ services:
- ./Log_from_docker:/app/logs_debug
environment:
- PYTHONUNBUFFERED=1
- ports:
- - "8000:8000"
# Port 8000 is internal only
+ # --- TRANSCRIPTION TOOL (Meeting Assistant) ---
+ transcription-app:
+ build:
+ context: ./transcription-tool
+ dockerfile: Dockerfile
+ container_name: transcription-app
+ restart: unless-stopped
+ volumes:
+ - ./transcription-tool/backend:/app/backend
+ - ./transcripts.db:/app/transcripts.db
+ - ./uploads_audio:/app/uploads_audio
+ - ./gemini_api_key.txt:/app/gemini_api_key.txt
+ environment:
+ - PYTHONUNBUFFERED=1
+ - DATABASE_URL=sqlite:////app/transcripts.db
+ ports:
+ - "8001:8001"
+ depends_on:
+ - proxy
# --- B2B MARKETING ASSISTANT ---
b2b-app:
build:
diff --git a/nginx-proxy.conf b/nginx-proxy.conf
index 271e0e53..792dddf5 100644
--- a/nginx-proxy.conf
+++ b/nginx-proxy.conf
@@ -116,5 +116,22 @@ http {
proxy_connect_timeout 1200s;
proxy_send_timeout 1200s;
}
+ location /tr/ {
+ # Transcription Tool (Meeting Assistant)
+ # KEIN Trailing Slash, damit der /tr/ Pfad erhalten bleibt!
+ proxy_pass http://transcription-app:8001;
+ proxy_set_header Host $host;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header Upgrade $http_upgrade;
+ proxy_set_header Connection "upgrade";
+
+ # Increase limit for large MP3 uploads
+ client_max_body_size 500M;
+
+ # Explicit timeouts
+ proxy_read_timeout 1800s;
+ proxy_connect_timeout 1800s;
+ proxy_send_timeout 1800s;
+ }
}
}
diff --git a/readme.md b/readme.md
index 197c4bc5..ee57cec0 100644
--- a/readme.md
+++ b/readme.md
@@ -491,6 +491,14 @@ Das Skript `company_deduplicator.py` (ehemals `duplicate_checker_old.py`) ist ei
Es verwendet einen gewichteten, heuristischen Algorithmus, um Ähnlichkeiten zu bewerten und nutzt bekannte Unternehmenshierarchien (`Parent Account`), um Falsch-Positive zu reduzieren.
+### Meeting Assistant (Transcription Tool)
+
+Ein lokaler Micro-Service zur Transkription und Analyse von Audio-Dateien (Meetings, Calls, Interviews). Nutzt Gemini 2.0 Flash für kostengünstige, hochqualitative Ergebnisse.
+
+* **Dokumentation:** [TRANSCRIPTION_TOOL.md](TRANSCRIPTION_TOOL.md)
+* **Funktionen:** Upload (MP3/WAV), Automatisches Chunking (FFmpeg), Transkription mit Timestamps.
+* **Zugriff:** Über das Dashboard `/tr/`.
+
#### Neue Features (Interne Deduplizierung)
- **Zwei-Modi-Betrieb:** Das Skript fragt beim Start interaktiv ab, ob ein externer Vergleich oder eine interne Deduplizierung durchgeführt werden soll.
- **Gruppierung & ID-Zuweisung:** Im internen Modus werden gefundene Duplikatspaare zu Clustern zusammengefasst (z.B. wenn A=B und B=C, dann ist A,B,C eine Gruppe). Jede Gruppe erhält eine eindeutige ID (z.B. `Dup_0001`), die in eine neue Spalte `Duplicate_ID` im `CRM_Accounts`-Sheet geschrieben wird.
diff --git a/transcription-tool/Dockerfile b/transcription-tool/Dockerfile
new file mode 100644
index 00000000..0717edcb
--- /dev/null
+++ b/transcription-tool/Dockerfile
@@ -0,0 +1,37 @@
+# --- STAGE 1: Build Frontend ---
+FROM node:20-slim AS frontend-builder
+WORKDIR /build
+COPY frontend/package*.json ./
+RUN npm install
+COPY frontend/ ./
+RUN npm run build
+
+# --- STAGE 2: Backend & Runtime ---
+FROM python:3.11-slim
+WORKDIR /app
+
+# System Dependencies (FFmpeg ist essenziell)
+RUN apt-get update && apt-get install -y \
+ ffmpeg \
+ build-essential \
+ && rm -rf /var/lib/apt/lists/*
+
+# Copy Requirements & Install
+COPY backend/requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy Built Frontend from Stage 1
+COPY --from=frontend-builder /build/dist /frontend_static
+
+# Copy Backend Source
+COPY backend ./backend
+
+# Environment Variables
+ENV PYTHONPATH=/app
+ENV PYTHONUNBUFFERED=1
+
+# Expose Port
+EXPOSE 8001
+
+# Start FastAPI
+CMD ["uvicorn", "backend.app:app", "--host", "0.0.0.0", "--port", "8001", "--reload"]
\ No newline at end of file
diff --git a/transcription-tool/backend/__init__.py b/transcription-tool/backend/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/transcription-tool/backend/app.py b/transcription-tool/backend/app.py
new file mode 100644
index 00000000..88f8c006
--- /dev/null
+++ b/transcription-tool/backend/app.py
@@ -0,0 +1,72 @@
+from fastapi import FastAPI, Depends, HTTPException, UploadFile, File, BackgroundTasks
+from fastapi.middleware.cors import CORSMiddleware
+from sqlalchemy.orm import Session
+import os
+import shutil
+import uuid
+from datetime import datetime
+
+from .config import settings
+from .database import init_db, get_db, Meeting, TranscriptChunk, AnalysisResult, SessionLocal
+from .services.orchestrator import process_meeting_task
+
+app = FastAPI(
+ title=settings.APP_NAME,
+ version=settings.VERSION,
+ root_path="/tr"
+)
+
+app.add_middleware(
+ CORSMiddleware,
+ allow_origins=["*"],
+ allow_credentials=True,
+ allow_methods=["*"],
+ allow_headers=["*"],
+)
+
+@app.on_event("startup")
+def startup_event():
+ init_db()
+
+@app.get("/api/health")
+def health():
+ return {"status": "ok", "version": settings.VERSION}
+
+@app.get("/api/meetings")
+def list_meetings(db: Session = Depends(get_db)):
+ return db.query(Meeting).order_by(Meeting.created_at.desc()).all()
+
+@app.post("/api/upload")
+async def upload_audio(
+ background_tasks: BackgroundTasks,
+ file: UploadFile = File(...),
+ db: Session = Depends(get_db)
+):
+ # 1. Save File
+ file_id = str(uuid.uuid4())
+ ext = os.path.splitext(file.filename)[1]
+ filename = f"{file_id}{ext}"
+ file_path = os.path.join(settings.UPLOAD_DIR, filename)
+
+ with open(file_path, "wb") as buffer:
+ shutil.copyfileobj(file.file, buffer)
+
+ # 2. Create DB Entry
+ meeting = Meeting(
+ title=file.filename,
+ filename=filename,
+ file_path=file_path,
+ status="UPLOADED"
+ )
+ db.add(meeting)
+ db.commit()
+ db.refresh(meeting)
+
+ # 3. Trigger Processing in Background
+ background_tasks.add_task(process_meeting_task, meeting.id, SessionLocal)
+
+ return meeting
+
+if __name__ == "__main__":
+ import uvicorn
+ uvicorn.run("backend.app:app", host="0.0.0.0", port=8001, reload=True)
diff --git a/transcription-tool/backend/config.py b/transcription-tool/backend/config.py
new file mode 100644
index 00000000..c77e8e07
--- /dev/null
+++ b/transcription-tool/backend/config.py
@@ -0,0 +1,27 @@
+import os
+from pydantic_settings import BaseSettings
+from typing import Optional
+
+class Settings(BaseSettings):
+ APP_NAME: str = "Transcription Engine"
+ VERSION: str = "0.1.0"
+ DATABASE_URL: str = "sqlite:////app/transcripts.db"
+ UPLOAD_DIR: str = "/app/uploads_audio"
+ GEMINI_API_KEY: Optional[str] = None
+ CHUNK_DURATION_SEC: int = 1800 # 30 Minutes
+
+ class Config:
+ env_file = ".env"
+
+settings = Settings()
+
+# Auto-load API Key
+if not settings.GEMINI_API_KEY:
+ key_path = "/app/gemini_api_key.txt"
+ if os.path.exists(key_path):
+ with open(key_path, "r") as f:
+ settings.GEMINI_API_KEY = f.read().strip()
+
+# Ensure Upload Dir exists
+os.makedirs(settings.UPLOAD_DIR, exist_ok=True)
+os.makedirs(os.path.join(settings.UPLOAD_DIR, "chunks"), exist_ok=True)
diff --git a/transcription-tool/backend/database.py b/transcription-tool/backend/database.py
new file mode 100644
index 00000000..b12077b0
--- /dev/null
+++ b/transcription-tool/backend/database.py
@@ -0,0 +1,63 @@
+from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, ForeignKey, Float, JSON
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker, relationship
+from datetime import datetime
+from .config import settings
+
+engine = create_engine(settings.DATABASE_URL, connect_args={"check_same_thread": False})
+SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+Base = declarative_base()
+
+class Meeting(Base):
+ __tablename__ = "meetings"
+
+ id = Column(Integer, primary_key=True, index=True)
+ title = Column(String, index=True)
+ filename = Column(String)
+ file_path = Column(String)
+ date_recorded = Column(DateTime, default=datetime.utcnow)
+
+ duration_seconds = Column(Float, nullable=True)
+ status = Column(String, default="UPLOADED") # UPLOADED, SPLITTING, TRANSCRIBING, ANALYZING, COMPLETED, ERROR
+
+ participants = Column(JSON, nullable=True) # List of names
+ summary = Column(Text, nullable=True)
+
+ created_at = Column(DateTime, default=datetime.utcnow)
+ updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+
+ chunks = relationship("TranscriptChunk", back_populates="meeting", cascade="all, delete-orphan")
+ analysis_results = relationship("AnalysisResult", back_populates="meeting", cascade="all, delete-orphan")
+
+class TranscriptChunk(Base):
+ __tablename__ = "transcript_chunks"
+
+ id = Column(Integer, primary_key=True, index=True)
+ meeting_id = Column(Integer, ForeignKey("meetings.id"))
+ chunk_index = Column(Integer)
+
+ raw_text = Column(Text)
+ json_content = Column(JSON, nullable=True) # Structured timestamps/speakers
+
+ meeting = relationship("Meeting", back_populates="chunks")
+
+class AnalysisResult(Base):
+ __tablename__ = "analysis_results"
+
+ id = Column(Integer, primary_key=True, index=True)
+ meeting_id = Column(Integer, ForeignKey("meetings.id"))
+ prompt_key = Column(String) # summary, tasks, notes
+ result_text = Column(Text)
+
+ created_at = Column(DateTime, default=datetime.utcnow)
+ meeting = relationship("Meeting", back_populates="analysis_results")
+
+def init_db():
+ Base.metadata.create_all(bind=engine)
+
+def get_db():
+ db = SessionLocal()
+ try:
+ yield db
+ finally:
+ db.close()
diff --git a/transcription-tool/backend/lib/__init__.py b/transcription-tool/backend/lib/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/transcription-tool/backend/requirements.txt b/transcription-tool/backend/requirements.txt
new file mode 100644
index 00000000..121b1f44
--- /dev/null
+++ b/transcription-tool/backend/requirements.txt
@@ -0,0 +1,10 @@
+fastapi
+uvicorn
+sqlalchemy
+pydantic
+pydantic-settings
+python-multipart
+requests
+google-genai
+python-dotenv
+aiofiles
diff --git a/transcription-tool/backend/services/__init__.py b/transcription-tool/backend/services/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/transcription-tool/backend/services/ffmpeg_service.py b/transcription-tool/backend/services/ffmpeg_service.py
new file mode 100644
index 00000000..e1b86bae
--- /dev/null
+++ b/transcription-tool/backend/services/ffmpeg_service.py
@@ -0,0 +1,49 @@
+import subprocess
+import os
+import logging
+from ..config import settings
+
+logger = logging.getLogger(__name__)
+
+class FFmpegService:
+ def split_audio(self, input_path: str, meeting_id: int) -> list:
+ """
+ Splits audio into 30min chunks using ffmpeg segment muxer.
+ Returns a list of paths to the created chunks.
+ """
+ output_dir = os.path.join(settings.UPLOAD_DIR, "chunks", str(meeting_id))
+ os.makedirs(output_dir, exist_ok=True)
+
+ output_pattern = os.path.join(output_dir, "chunk_%03d.mp3")
+
+ # ffmpeg -i input.mp3 -f segment -segment_time 1800 -c copy chunk_%03d.mp3
+ cmd = [
+ "ffmpeg", "-i", input_path,
+ "-f", "segment",
+ "-segment_time", str(settings.CHUNK_DURATION_SEC),
+ "-c", "copy",
+ output_pattern
+ ]
+
+ logger.info(f"Splitting {input_path} into segments...")
+ result = subprocess.run(cmd, capture_output=True, text=True)
+
+ if result.returncode != 0:
+ logger.error(f"FFmpeg Error: {result.stderr}")
+ raise Exception("Failed to split audio file.")
+
+ chunks = sorted([os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith(".mp3")])
+ logger.info(f"Created {len(chunks)} chunks.")
+ return chunks
+
+ def get_duration(self, input_path: str) -> float:
+ """Gets duration of audio file in seconds."""
+ cmd = [
+ "ffprobe", "-v", "error", "-show_entries", "format=duration",
+ "-of", "default=noprint_wrappers=1:nokey=1", input_path
+ ]
+ result = subprocess.run(cmd, capture_output=True, text=True)
+ try:
+ return float(result.stdout.strip())
+ except:
+ return 0.0
diff --git a/transcription-tool/backend/services/orchestrator.py b/transcription-tool/backend/services/orchestrator.py
new file mode 100644
index 00000000..bca23b72
--- /dev/null
+++ b/transcription-tool/backend/services/orchestrator.py
@@ -0,0 +1,60 @@
+import logging
+from sqlalchemy.orm import Session
+from .ffmpeg_service import FFmpegService
+from .transcription_service import TranscriptionService
+from ..database import Meeting, TranscriptChunk
+from ..config import settings
+
+logger = logging.getLogger(__name__)
+
+def process_meeting_task(meeting_id: int, db_session_factory):
+ db = db_session_factory()
+ meeting = db.query(Meeting).filter(Meeting.id == meeting_id).first()
+ if not meeting:
+ return
+
+ try:
+ ffmpeg = FFmpegService()
+ transcriber = TranscriptionService()
+
+ # Phase 1: Split
+ meeting.status = "SPLITTING"
+ db.commit()
+
+ meeting.duration_seconds = ffmpeg.get_duration(meeting.file_path)
+ chunks = ffmpeg.split_audio(meeting.file_path, meeting.id)
+
+ # Phase 2: Transcribe
+ meeting.status = "TRANSCRIBING"
+ db.commit()
+
+ all_text = []
+ for i, chunk_path in enumerate(chunks):
+ offset = i * settings.CHUNK_DURATION_SEC
+ logger.info(f"Processing chunk {i+1}/{len(chunks)} with offset {offset}s")
+
+ result = transcriber.transcribe_chunk(chunk_path, offset)
+
+ # Save chunk result
+ db_chunk = TranscriptChunk(
+ meeting_id=meeting.id,
+ chunk_index=i,
+ raw_text=result["raw_text"]
+ )
+ db.add(db_chunk)
+ all_text.append(result["raw_text"])
+ db.commit()
+
+ # Phase 3: Finalize
+ meeting.status = "COMPLETED"
+ # Combine summary (first attempt - can be refined later with separate LLM call)
+ # meeting.summary = ...
+ db.commit()
+ logger.info(f"Meeting {meeting.id} processing completed.")
+
+ except Exception as e:
+ logger.error(f"Error processing meeting {meeting_id}: {e}", exc_info=True)
+ meeting.status = "ERROR"
+ db.commit()
+ finally:
+ db.close()
diff --git a/transcription-tool/backend/services/transcription_service.py b/transcription-tool/backend/services/transcription_service.py
new file mode 100644
index 00000000..4f747e5c
--- /dev/null
+++ b/transcription-tool/backend/services/transcription_service.py
@@ -0,0 +1,58 @@
+import os
+import time
+import logging
+from google import genai
+from google.genai import types
+from ..config import settings
+
+logger = logging.getLogger(__name__)
+
+class TranscriptionService:
+ def __init__(self):
+ if not settings.GEMINI_API_KEY:
+ raise Exception("Gemini API Key missing.")
+ self.client = genai.Client(api_key=settings.GEMINI_API_KEY)
+
+ def transcribe_chunk(self, file_path: str, offset_seconds: int = 0) -> dict:
+ """
+ Uploads a chunk to Gemini and returns the transcription with timestamps.
+ """
+ logger.info(f"Uploading chunk {file_path} to Gemini...")
+
+ # 1. Upload file
+ media_file = self.client.files.upload(path=file_path)
+
+ # 2. Wait for processing (usually fast for audio)
+ while media_file.state == "PROCESSING":
+ time.sleep(2)
+ media_file = self.client.files.get(name=media_file.name)
+
+ if media_file.state == "FAILED":
+ raise Exception("File processing failed at Gemini.")
+
+ # 3. Transcribe with Diarization and Timestamps
+ prompt = """
+ Transkribiere dieses Audio wortgetreu.
+ Identifiziere die Sprecher (Sprecher A, Sprecher B, etc.).
+ Gib das Ergebnis als strukturierte Liste mit Timestamps aus.
+ Wichtig: Das Audio ist ein Teil eines größeren Gesprächs.
+ Antworte NUR mit dem Transkript im Format:
+ [MM:SS] Sprecher X: Text
+ """
+
+ logger.info(f"Generating transcription for {file_path}...")
+ response = self.client.models.generate_content(
+ model="gemini-2.0-flash",
+ contents=[media_file, prompt],
+ config=types.GenerateContentConfig(
+ temperature=0.1, # Low temp for accuracy
+ )
+ )
+
+ # Cleanup: Delete file from Gemini storage
+ self.client.files.delete(name=media_file.name)
+
+ return {
+ "raw_text": response.text,
+ "offset": offset_seconds
+ }
diff --git a/transcription-tool/frontend/index.html b/transcription-tool/frontend/index.html
new file mode 100644
index 00000000..fd7d7fa3
--- /dev/null
+++ b/transcription-tool/frontend/index.html
@@ -0,0 +1,12 @@
+
+
+
+
+
+ Meeting Assistant
+
+
+
+
+
+
diff --git a/transcription-tool/frontend/package.json b/transcription-tool/frontend/package.json
new file mode 100644
index 00000000..7195d3d0
--- /dev/null
+++ b/transcription-tool/frontend/package.json
@@ -0,0 +1,27 @@
+{
+ "name": "transcription-frontend",
+ "private": true,
+ "version": "0.1.0",
+ "type": "module",
+ "scripts": {
+ "dev": "vite",
+ "build": "tsc && vite build",
+ "preview": "vite preview"
+ },
+ "dependencies": {
+ "axios": "^1.6.2",
+ "clsx": "^2.0.0",
+ "lucide-react": "^0.294.0",
+ "react": "^18.2.0",
+ "react-dom": "^18.2.0",
+ "tailwind-merge": "^2.1.0",
+ "@types/react": "^18.2.37",
+ "@types/react-dom": "^18.2.15",
+ "@vitejs/plugin-react": "^4.2.0",
+ "autoprefixer": "^10.4.16",
+ "postcss": "^8.4.31",
+ "tailwindcss": "^3.3.5",
+ "typescript": "^5.2.2",
+ "vite": "^5.0.0"
+ }
+}
diff --git a/transcription-tool/frontend/postcss.config.js b/transcription-tool/frontend/postcss.config.js
new file mode 100644
index 00000000..2e7af2b7
--- /dev/null
+++ b/transcription-tool/frontend/postcss.config.js
@@ -0,0 +1,6 @@
+export default {
+ plugins: {
+ tailwindcss: {},
+ autoprefixer: {},
+ },
+}
diff --git a/transcription-tool/frontend/src/App.tsx b/transcription-tool/frontend/src/App.tsx
new file mode 100644
index 00000000..3f412fa9
--- /dev/null
+++ b/transcription-tool/frontend/src/App.tsx
@@ -0,0 +1,121 @@
+import { useState, useEffect } from 'react'
+import axios from 'axios'
+import { Upload, Mic, FileText, Clock, CheckCircle2, Loader2, AlertCircle, ChevronRight } from 'lucide-react'
+import clsx from 'clsx'
+
+const API_BASE = '/tr/api'
+
+interface Meeting {
+ id: number
+ title: string
+ status: string
+ date_recorded: string
+ duration_seconds?: number
+ created_at: string
+}
+
+export default function App() {
+ const [meetings, setMeetings] = useState([])
+ const [uploading, setUploading] = useState(false)
+ const [error, setError] = useState(null)
+
+ const fetchMeetings = async () => {
+ try {
+ const res = await axios.get(`${API_BASE}/meetings`)
+ setMeetings(res.data)
+ } catch (e) {
+ console.error("Failed to fetch meetings", e)
+ }
+ }
+
+ useEffect(() => {
+ fetchMeetings()
+ const interval = setInterval(fetchMeetings, 5000) // Poll every 5s
+ return () => clearInterval(interval)
+ }, [])
+
+ const handleUpload = async (e: React.ChangeEvent) => {
+ const file = e.target.files?.[0]
+ if (!file) return
+
+ setUploading(true)
+ setError(null)
+ const formData = new FormData()
+ formData.append('file', file)
+
+ try {
+ await axios.post(`${API_BASE}/upload`, formData)
+ fetchMeetings()
+ } catch (e) {
+ setError("Upload failed. Make sure the file is not too large.")
+ } finally {
+ setUploading(false)
+ }
+ }
+
+ return (
+
+
+
+
+ {error && (
+
+ )}
+
+
+ {meetings.length === 0 ? (
+
+
+
No meetings yet. Upload your first audio file.
+
+ ) : (
+ meetings.map(m => (
+
+
+
+ {m.status === 'COMPLETED' ? : }
+
+
+
{m.title}
+
+ {new Date(m.created_at).toLocaleDateString()}
+ {m.duration_seconds && (
+ {Math.round(m.duration_seconds / 60)} min
+ )}
+ {m.status}
+
+
+
+
+
+ ))
+ )}
+
+
+
+ )
+}
diff --git a/transcription-tool/frontend/src/index.css b/transcription-tool/frontend/src/index.css
new file mode 100644
index 00000000..49ec1974
--- /dev/null
+++ b/transcription-tool/frontend/src/index.css
@@ -0,0 +1,7 @@
+@tailwind base;
+@tailwind components;
+@tailwind utilities;
+
+:root {
+ color-scheme: light dark;
+}
diff --git a/transcription-tool/frontend/src/main.tsx b/transcription-tool/frontend/src/main.tsx
new file mode 100644
index 00000000..964aeb4c
--- /dev/null
+++ b/transcription-tool/frontend/src/main.tsx
@@ -0,0 +1,10 @@
+import React from 'react'
+import ReactDOM from 'react-dom/client'
+import App from './App'
+import './index.css'
+
+ReactDOM.createRoot(document.getElementById('root')!).render(
+
+
+ ,
+)
diff --git a/transcription-tool/frontend/src/vite-env.d.ts b/transcription-tool/frontend/src/vite-env.d.ts
new file mode 100644
index 00000000..11f02fe2
--- /dev/null
+++ b/transcription-tool/frontend/src/vite-env.d.ts
@@ -0,0 +1 @@
+///
diff --git a/transcription-tool/frontend/tailwind.config.js b/transcription-tool/frontend/tailwind.config.js
new file mode 100644
index 00000000..dca8ba02
--- /dev/null
+++ b/transcription-tool/frontend/tailwind.config.js
@@ -0,0 +1,11 @@
+/** @type {import('tailwindcss').Config} */
+export default {
+ content: [
+ "./index.html",
+ "./src/**/*.{js,ts,jsx,tsx}",
+ ],
+ theme: {
+ extend: {},
+ },
+ plugins: [],
+}
diff --git a/transcription-tool/frontend/vite.config.ts b/transcription-tool/frontend/vite.config.ts
new file mode 100644
index 00000000..4bb54354
--- /dev/null
+++ b/transcription-tool/frontend/vite.config.ts
@@ -0,0 +1,15 @@
+import { defineConfig } from 'vite'
+import react from '@vitejs/plugin-react'
+
+export default defineConfig({
+ plugins: [react()],
+ base: '/tr/',
+ server: {
+ proxy: {
+ '/tr/api': {
+ target: 'http://localhost:8001',
+ changeOrigin: true,
+ }
+ }
+ }
+})