From 4e52e194f1a4b8dd5f42587386f11e3361f32cd1 Mon Sep 17 00:00:00 2001 From: Floke Date: Sat, 24 Jan 2026 16:34:01 +0000 Subject: [PATCH] feat(transcription): add meeting assistant micro-service v0.1.0 - Added FastAPI backend with FFmpeg and Gemini 2.0 integration - Added React frontend with upload and meeting list - Integrated into main docker-compose stack and dashboard --- TRANSCRIPTION_TOOL.md | 82 ++++++++++++ dashboard/index.html | 9 ++ docker-compose.yml | 21 ++- nginx-proxy.conf | 17 +++ readme.md | 8 ++ transcription-tool/Dockerfile | 37 ++++++ transcription-tool/backend/__init__.py | 0 transcription-tool/backend/app.py | 72 +++++++++++ transcription-tool/backend/config.py | 27 ++++ transcription-tool/backend/database.py | 63 +++++++++ transcription-tool/backend/lib/__init__.py | 0 transcription-tool/backend/requirements.txt | 10 ++ .../backend/services/__init__.py | 0 .../backend/services/ffmpeg_service.py | 49 +++++++ .../backend/services/orchestrator.py | 60 +++++++++ .../backend/services/transcription_service.py | 58 +++++++++ transcription-tool/frontend/index.html | 12 ++ transcription-tool/frontend/package.json | 27 ++++ transcription-tool/frontend/postcss.config.js | 6 + transcription-tool/frontend/src/App.tsx | 121 ++++++++++++++++++ transcription-tool/frontend/src/index.css | 7 + transcription-tool/frontend/src/main.tsx | 10 ++ transcription-tool/frontend/src/vite-env.d.ts | 1 + .../frontend/tailwind.config.js | 11 ++ transcription-tool/frontend/vite.config.ts | 15 +++ 25 files changed, 721 insertions(+), 2 deletions(-) create mode 100644 TRANSCRIPTION_TOOL.md create mode 100644 transcription-tool/Dockerfile create mode 100644 transcription-tool/backend/__init__.py create mode 100644 transcription-tool/backend/app.py create mode 100644 transcription-tool/backend/config.py create mode 100644 transcription-tool/backend/database.py create mode 100644 transcription-tool/backend/lib/__init__.py create mode 100644 transcription-tool/backend/requirements.txt create mode 100644 transcription-tool/backend/services/__init__.py create mode 100644 transcription-tool/backend/services/ffmpeg_service.py create mode 100644 transcription-tool/backend/services/orchestrator.py create mode 100644 transcription-tool/backend/services/transcription_service.py create mode 100644 transcription-tool/frontend/index.html create mode 100644 transcription-tool/frontend/package.json create mode 100644 transcription-tool/frontend/postcss.config.js create mode 100644 transcription-tool/frontend/src/App.tsx create mode 100644 transcription-tool/frontend/src/index.css create mode 100644 transcription-tool/frontend/src/main.tsx create mode 100644 transcription-tool/frontend/src/vite-env.d.ts create mode 100644 transcription-tool/frontend/tailwind.config.js create mode 100644 transcription-tool/frontend/vite.config.ts diff --git a/TRANSCRIPTION_TOOL.md b/TRANSCRIPTION_TOOL.md new file mode 100644 index 00000000..8359dad1 --- /dev/null +++ b/TRANSCRIPTION_TOOL.md @@ -0,0 +1,82 @@ +# Meeting Assistant (Transcription Tool) + +**Version:** 0.1.0 +**Status:** Beta (Core Functionality) + +Der **Meeting Assistant** ist ein lokaler Micro-Service zur Transkription und Analyse von Audio-Dateien (Meetings, Calls, Interviews). Er kombiniert die Datensicherheit einer lokalen Datenhaltung mit der Leistungsfähigkeit von Googles **Gemini 2.0 Flash** Modell für kostengünstige, hochqualitative Speech-to-Text Umwandlung. + +--- + +## 1. Architektur + +Der Service folgt dem "Sidecar"-Pattern im Docker-Stack und ist vollständig in das Dashboard integriert. + +* **Frontend:** React (Vite + Tailwind) unter `/tr/`. +* **Backend:** FastAPI (Python) unter `/tr/api/`. +* **Processing:** + * **FFmpeg:** Zerlegt große Audio-Dateien (> 2 Stunden) in verarbeitbare 30-Minuten-Chunks. + * **Gemini 2.0 Flash:** Führt die eigentliche Transkription durch (via `google-genai` SDK). + * **SQLite:** Speichert Metadaten, Status und Ergebnisse. +* **Storage:** Lokales Docker-Volume für Audio-Uploads (werden nach Verarbeitung *nicht* gelöscht, um Re-Analysen zu ermöglichen). + +### Datenfluss +1. **Upload:** User lädt MP3 hoch -> Speicherung in `/app/uploads_audio`. +2. **Chunking:** Backend startet Background-Task -> FFmpeg erstellt Segmente in `/app/uploads_audio/chunks/{id}/`. +3. **Transkription:** Loop über alle Chunks -> Upload zu Gemini File API -> Generierung -> Löschung aus Gemini Cloud -> Speicherung in DB. +4. **Assemblierung:** (Geplant für v0.2) Zusammenfügen der Text-Fragmente. + +--- + +## 2. API Endpunkte + +Basis-URL: `/tr/api` + +| Methode | Pfad | Beschreibung | +| :--- | :--- | :--- | +| `GET` | `/meetings` | Liste aller Meetings inkl. Status. | +| `POST` | `/upload` | Upload einer Audio-Datei (`multipart/form-data`). Startet Prozess. | +| `GET` | `/health` | Status-Check. | + +--- + +## 3. Datenbank Schema (SQLite) + +Datei: `transcripts.db` + +### `meetings` +* `id`: PK +* `title`: Dateiname (z.B. "Jours_Fixe_Januar.mp3") +* `status`: `UPLOADED` -> `SPLITTING` -> `TRANSCRIBING` -> `COMPLETED` +* `duration_seconds`: Gesamtlänge +* `file_path`: Lokaler Pfad + +### `transcript_chunks` +* `id`: PK +* `meeting_id`: FK +* `chunk_index`: 0, 1, 2... +* `raw_text`: Das rohe Transkript von Gemini. + +--- + +## 4. Konfiguration & Limits + +* **Max Upload Size:** 500 MB (konfiguriert in Nginx). +* **Chunk Size:** 30 Minuten (1800 Sekunden). +* **Modell:** `gemini-2.0-flash` (Temperatur 0.1 für Faktentreue). +* **Kosten:** Gemini 2.0 Flash ist extrem günstig (Audio-Input), aber beachten Sie die API-Limits bei sehr vielen parallelen Uploads. + +--- + +## 5. Roadmap / Next Steps + +* **v0.2:** Frontend-Detailansicht mit Volltext-Anzeige. +* **v0.3:** Sprecher-Identifikation (Mapping "Speaker A" -> "Thomas"). +* **v0.4:** "Meeting Notes" Generator (Zusammenfassung via LLM). + +--- + +## 6. Troubleshooting + +* **Upload bricht ab:** Prüfen Sie, ob die Datei > 500MB ist oder der Nginx-Timeout (1800s) greift. +* **Status bleibt bei "TRANSCRIBING":** Prüfen Sie die Docker-Logs (`docker logs transcription-app`). Evtl. ist der API-Key ungültig oder das Quota erschöpft. +* **FFmpeg Fehler:** Stellen Sie sicher, dass das Eingangsformat valide ist (MP3, WAV, M4A werden meist unterstützt). diff --git a/dashboard/index.html b/dashboard/index.html index cf83d09c..da169827 100644 --- a/dashboard/index.html +++ b/dashboard/index.html @@ -183,6 +183,15 @@

Starten → + +
+ 🎙️ +

Meeting Assistant

+

+ Transkribieren Sie Meetings (MP3/WAV) mit automatischer Sprechererkennung und Timestamps via Gemini 2.0. +

+ Starten → +
diff --git a/docker-compose.yml b/docker-compose.yml index ffe95ff6..924436d7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -74,10 +74,27 @@ services: - ./Log_from_docker:/app/logs_debug environment: - PYTHONUNBUFFERED=1 - ports: - - "8000:8000" # Port 8000 is internal only + # --- TRANSCRIPTION TOOL (Meeting Assistant) --- + transcription-app: + build: + context: ./transcription-tool + dockerfile: Dockerfile + container_name: transcription-app + restart: unless-stopped + volumes: + - ./transcription-tool/backend:/app/backend + - ./transcripts.db:/app/transcripts.db + - ./uploads_audio:/app/uploads_audio + - ./gemini_api_key.txt:/app/gemini_api_key.txt + environment: + - PYTHONUNBUFFERED=1 + - DATABASE_URL=sqlite:////app/transcripts.db + ports: + - "8001:8001" + depends_on: + - proxy # --- B2B MARKETING ASSISTANT --- b2b-app: build: diff --git a/nginx-proxy.conf b/nginx-proxy.conf index 271e0e53..792dddf5 100644 --- a/nginx-proxy.conf +++ b/nginx-proxy.conf @@ -116,5 +116,22 @@ http { proxy_connect_timeout 1200s; proxy_send_timeout 1200s; } + location /tr/ { + # Transcription Tool (Meeting Assistant) + # KEIN Trailing Slash, damit der /tr/ Pfad erhalten bleibt! + proxy_pass http://transcription-app:8001; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + + # Increase limit for large MP3 uploads + client_max_body_size 500M; + + # Explicit timeouts + proxy_read_timeout 1800s; + proxy_connect_timeout 1800s; + proxy_send_timeout 1800s; + } } } diff --git a/readme.md b/readme.md index 197c4bc5..ee57cec0 100644 --- a/readme.md +++ b/readme.md @@ -491,6 +491,14 @@ Das Skript `company_deduplicator.py` (ehemals `duplicate_checker_old.py`) ist ei Es verwendet einen gewichteten, heuristischen Algorithmus, um Ähnlichkeiten zu bewerten und nutzt bekannte Unternehmenshierarchien (`Parent Account`), um Falsch-Positive zu reduzieren. +### Meeting Assistant (Transcription Tool) + +Ein lokaler Micro-Service zur Transkription und Analyse von Audio-Dateien (Meetings, Calls, Interviews). Nutzt Gemini 2.0 Flash für kostengünstige, hochqualitative Ergebnisse. + +* **Dokumentation:** [TRANSCRIPTION_TOOL.md](TRANSCRIPTION_TOOL.md) +* **Funktionen:** Upload (MP3/WAV), Automatisches Chunking (FFmpeg), Transkription mit Timestamps. +* **Zugriff:** Über das Dashboard `/tr/`. + #### Neue Features (Interne Deduplizierung) - **Zwei-Modi-Betrieb:** Das Skript fragt beim Start interaktiv ab, ob ein externer Vergleich oder eine interne Deduplizierung durchgeführt werden soll. - **Gruppierung & ID-Zuweisung:** Im internen Modus werden gefundene Duplikatspaare zu Clustern zusammengefasst (z.B. wenn A=B und B=C, dann ist A,B,C eine Gruppe). Jede Gruppe erhält eine eindeutige ID (z.B. `Dup_0001`), die in eine neue Spalte `Duplicate_ID` im `CRM_Accounts`-Sheet geschrieben wird. diff --git a/transcription-tool/Dockerfile b/transcription-tool/Dockerfile new file mode 100644 index 00000000..0717edcb --- /dev/null +++ b/transcription-tool/Dockerfile @@ -0,0 +1,37 @@ +# --- STAGE 1: Build Frontend --- +FROM node:20-slim AS frontend-builder +WORKDIR /build +COPY frontend/package*.json ./ +RUN npm install +COPY frontend/ ./ +RUN npm run build + +# --- STAGE 2: Backend & Runtime --- +FROM python:3.11-slim +WORKDIR /app + +# System Dependencies (FFmpeg ist essenziell) +RUN apt-get update && apt-get install -y \ + ffmpeg \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +# Copy Requirements & Install +COPY backend/requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy Built Frontend from Stage 1 +COPY --from=frontend-builder /build/dist /frontend_static + +# Copy Backend Source +COPY backend ./backend + +# Environment Variables +ENV PYTHONPATH=/app +ENV PYTHONUNBUFFERED=1 + +# Expose Port +EXPOSE 8001 + +# Start FastAPI +CMD ["uvicorn", "backend.app:app", "--host", "0.0.0.0", "--port", "8001", "--reload"] \ No newline at end of file diff --git a/transcription-tool/backend/__init__.py b/transcription-tool/backend/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/transcription-tool/backend/app.py b/transcription-tool/backend/app.py new file mode 100644 index 00000000..88f8c006 --- /dev/null +++ b/transcription-tool/backend/app.py @@ -0,0 +1,72 @@ +from fastapi import FastAPI, Depends, HTTPException, UploadFile, File, BackgroundTasks +from fastapi.middleware.cors import CORSMiddleware +from sqlalchemy.orm import Session +import os +import shutil +import uuid +from datetime import datetime + +from .config import settings +from .database import init_db, get_db, Meeting, TranscriptChunk, AnalysisResult, SessionLocal +from .services.orchestrator import process_meeting_task + +app = FastAPI( + title=settings.APP_NAME, + version=settings.VERSION, + root_path="/tr" +) + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +@app.on_event("startup") +def startup_event(): + init_db() + +@app.get("/api/health") +def health(): + return {"status": "ok", "version": settings.VERSION} + +@app.get("/api/meetings") +def list_meetings(db: Session = Depends(get_db)): + return db.query(Meeting).order_by(Meeting.created_at.desc()).all() + +@app.post("/api/upload") +async def upload_audio( + background_tasks: BackgroundTasks, + file: UploadFile = File(...), + db: Session = Depends(get_db) +): + # 1. Save File + file_id = str(uuid.uuid4()) + ext = os.path.splitext(file.filename)[1] + filename = f"{file_id}{ext}" + file_path = os.path.join(settings.UPLOAD_DIR, filename) + + with open(file_path, "wb") as buffer: + shutil.copyfileobj(file.file, buffer) + + # 2. Create DB Entry + meeting = Meeting( + title=file.filename, + filename=filename, + file_path=file_path, + status="UPLOADED" + ) + db.add(meeting) + db.commit() + db.refresh(meeting) + + # 3. Trigger Processing in Background + background_tasks.add_task(process_meeting_task, meeting.id, SessionLocal) + + return meeting + +if __name__ == "__main__": + import uvicorn + uvicorn.run("backend.app:app", host="0.0.0.0", port=8001, reload=True) diff --git a/transcription-tool/backend/config.py b/transcription-tool/backend/config.py new file mode 100644 index 00000000..c77e8e07 --- /dev/null +++ b/transcription-tool/backend/config.py @@ -0,0 +1,27 @@ +import os +from pydantic_settings import BaseSettings +from typing import Optional + +class Settings(BaseSettings): + APP_NAME: str = "Transcription Engine" + VERSION: str = "0.1.0" + DATABASE_URL: str = "sqlite:////app/transcripts.db" + UPLOAD_DIR: str = "/app/uploads_audio" + GEMINI_API_KEY: Optional[str] = None + CHUNK_DURATION_SEC: int = 1800 # 30 Minutes + + class Config: + env_file = ".env" + +settings = Settings() + +# Auto-load API Key +if not settings.GEMINI_API_KEY: + key_path = "/app/gemini_api_key.txt" + if os.path.exists(key_path): + with open(key_path, "r") as f: + settings.GEMINI_API_KEY = f.read().strip() + +# Ensure Upload Dir exists +os.makedirs(settings.UPLOAD_DIR, exist_ok=True) +os.makedirs(os.path.join(settings.UPLOAD_DIR, "chunks"), exist_ok=True) diff --git a/transcription-tool/backend/database.py b/transcription-tool/backend/database.py new file mode 100644 index 00000000..b12077b0 --- /dev/null +++ b/transcription-tool/backend/database.py @@ -0,0 +1,63 @@ +from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, ForeignKey, Float, JSON +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker, relationship +from datetime import datetime +from .config import settings + +engine = create_engine(settings.DATABASE_URL, connect_args={"check_same_thread": False}) +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) +Base = declarative_base() + +class Meeting(Base): + __tablename__ = "meetings" + + id = Column(Integer, primary_key=True, index=True) + title = Column(String, index=True) + filename = Column(String) + file_path = Column(String) + date_recorded = Column(DateTime, default=datetime.utcnow) + + duration_seconds = Column(Float, nullable=True) + status = Column(String, default="UPLOADED") # UPLOADED, SPLITTING, TRANSCRIBING, ANALYZING, COMPLETED, ERROR + + participants = Column(JSON, nullable=True) # List of names + summary = Column(Text, nullable=True) + + created_at = Column(DateTime, default=datetime.utcnow) + updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + + chunks = relationship("TranscriptChunk", back_populates="meeting", cascade="all, delete-orphan") + analysis_results = relationship("AnalysisResult", back_populates="meeting", cascade="all, delete-orphan") + +class TranscriptChunk(Base): + __tablename__ = "transcript_chunks" + + id = Column(Integer, primary_key=True, index=True) + meeting_id = Column(Integer, ForeignKey("meetings.id")) + chunk_index = Column(Integer) + + raw_text = Column(Text) + json_content = Column(JSON, nullable=True) # Structured timestamps/speakers + + meeting = relationship("Meeting", back_populates="chunks") + +class AnalysisResult(Base): + __tablename__ = "analysis_results" + + id = Column(Integer, primary_key=True, index=True) + meeting_id = Column(Integer, ForeignKey("meetings.id")) + prompt_key = Column(String) # summary, tasks, notes + result_text = Column(Text) + + created_at = Column(DateTime, default=datetime.utcnow) + meeting = relationship("Meeting", back_populates="analysis_results") + +def init_db(): + Base.metadata.create_all(bind=engine) + +def get_db(): + db = SessionLocal() + try: + yield db + finally: + db.close() diff --git a/transcription-tool/backend/lib/__init__.py b/transcription-tool/backend/lib/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/transcription-tool/backend/requirements.txt b/transcription-tool/backend/requirements.txt new file mode 100644 index 00000000..121b1f44 --- /dev/null +++ b/transcription-tool/backend/requirements.txt @@ -0,0 +1,10 @@ +fastapi +uvicorn +sqlalchemy +pydantic +pydantic-settings +python-multipart +requests +google-genai +python-dotenv +aiofiles diff --git a/transcription-tool/backend/services/__init__.py b/transcription-tool/backend/services/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/transcription-tool/backend/services/ffmpeg_service.py b/transcription-tool/backend/services/ffmpeg_service.py new file mode 100644 index 00000000..e1b86bae --- /dev/null +++ b/transcription-tool/backend/services/ffmpeg_service.py @@ -0,0 +1,49 @@ +import subprocess +import os +import logging +from ..config import settings + +logger = logging.getLogger(__name__) + +class FFmpegService: + def split_audio(self, input_path: str, meeting_id: int) -> list: + """ + Splits audio into 30min chunks using ffmpeg segment muxer. + Returns a list of paths to the created chunks. + """ + output_dir = os.path.join(settings.UPLOAD_DIR, "chunks", str(meeting_id)) + os.makedirs(output_dir, exist_ok=True) + + output_pattern = os.path.join(output_dir, "chunk_%03d.mp3") + + # ffmpeg -i input.mp3 -f segment -segment_time 1800 -c copy chunk_%03d.mp3 + cmd = [ + "ffmpeg", "-i", input_path, + "-f", "segment", + "-segment_time", str(settings.CHUNK_DURATION_SEC), + "-c", "copy", + output_pattern + ] + + logger.info(f"Splitting {input_path} into segments...") + result = subprocess.run(cmd, capture_output=True, text=True) + + if result.returncode != 0: + logger.error(f"FFmpeg Error: {result.stderr}") + raise Exception("Failed to split audio file.") + + chunks = sorted([os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith(".mp3")]) + logger.info(f"Created {len(chunks)} chunks.") + return chunks + + def get_duration(self, input_path: str) -> float: + """Gets duration of audio file in seconds.""" + cmd = [ + "ffprobe", "-v", "error", "-show_entries", "format=duration", + "-of", "default=noprint_wrappers=1:nokey=1", input_path + ] + result = subprocess.run(cmd, capture_output=True, text=True) + try: + return float(result.stdout.strip()) + except: + return 0.0 diff --git a/transcription-tool/backend/services/orchestrator.py b/transcription-tool/backend/services/orchestrator.py new file mode 100644 index 00000000..bca23b72 --- /dev/null +++ b/transcription-tool/backend/services/orchestrator.py @@ -0,0 +1,60 @@ +import logging +from sqlalchemy.orm import Session +from .ffmpeg_service import FFmpegService +from .transcription_service import TranscriptionService +from ..database import Meeting, TranscriptChunk +from ..config import settings + +logger = logging.getLogger(__name__) + +def process_meeting_task(meeting_id: int, db_session_factory): + db = db_session_factory() + meeting = db.query(Meeting).filter(Meeting.id == meeting_id).first() + if not meeting: + return + + try: + ffmpeg = FFmpegService() + transcriber = TranscriptionService() + + # Phase 1: Split + meeting.status = "SPLITTING" + db.commit() + + meeting.duration_seconds = ffmpeg.get_duration(meeting.file_path) + chunks = ffmpeg.split_audio(meeting.file_path, meeting.id) + + # Phase 2: Transcribe + meeting.status = "TRANSCRIBING" + db.commit() + + all_text = [] + for i, chunk_path in enumerate(chunks): + offset = i * settings.CHUNK_DURATION_SEC + logger.info(f"Processing chunk {i+1}/{len(chunks)} with offset {offset}s") + + result = transcriber.transcribe_chunk(chunk_path, offset) + + # Save chunk result + db_chunk = TranscriptChunk( + meeting_id=meeting.id, + chunk_index=i, + raw_text=result["raw_text"] + ) + db.add(db_chunk) + all_text.append(result["raw_text"]) + db.commit() + + # Phase 3: Finalize + meeting.status = "COMPLETED" + # Combine summary (first attempt - can be refined later with separate LLM call) + # meeting.summary = ... + db.commit() + logger.info(f"Meeting {meeting.id} processing completed.") + + except Exception as e: + logger.error(f"Error processing meeting {meeting_id}: {e}", exc_info=True) + meeting.status = "ERROR" + db.commit() + finally: + db.close() diff --git a/transcription-tool/backend/services/transcription_service.py b/transcription-tool/backend/services/transcription_service.py new file mode 100644 index 00000000..4f747e5c --- /dev/null +++ b/transcription-tool/backend/services/transcription_service.py @@ -0,0 +1,58 @@ +import os +import time +import logging +from google import genai +from google.genai import types +from ..config import settings + +logger = logging.getLogger(__name__) + +class TranscriptionService: + def __init__(self): + if not settings.GEMINI_API_KEY: + raise Exception("Gemini API Key missing.") + self.client = genai.Client(api_key=settings.GEMINI_API_KEY) + + def transcribe_chunk(self, file_path: str, offset_seconds: int = 0) -> dict: + """ + Uploads a chunk to Gemini and returns the transcription with timestamps. + """ + logger.info(f"Uploading chunk {file_path} to Gemini...") + + # 1. Upload file + media_file = self.client.files.upload(path=file_path) + + # 2. Wait for processing (usually fast for audio) + while media_file.state == "PROCESSING": + time.sleep(2) + media_file = self.client.files.get(name=media_file.name) + + if media_file.state == "FAILED": + raise Exception("File processing failed at Gemini.") + + # 3. Transcribe with Diarization and Timestamps + prompt = """ + Transkribiere dieses Audio wortgetreu. + Identifiziere die Sprecher (Sprecher A, Sprecher B, etc.). + Gib das Ergebnis als strukturierte Liste mit Timestamps aus. + Wichtig: Das Audio ist ein Teil eines größeren Gesprächs. + Antworte NUR mit dem Transkript im Format: + [MM:SS] Sprecher X: Text + """ + + logger.info(f"Generating transcription for {file_path}...") + response = self.client.models.generate_content( + model="gemini-2.0-flash", + contents=[media_file, prompt], + config=types.GenerateContentConfig( + temperature=0.1, # Low temp for accuracy + ) + ) + + # Cleanup: Delete file from Gemini storage + self.client.files.delete(name=media_file.name) + + return { + "raw_text": response.text, + "offset": offset_seconds + } diff --git a/transcription-tool/frontend/index.html b/transcription-tool/frontend/index.html new file mode 100644 index 00000000..fd7d7fa3 --- /dev/null +++ b/transcription-tool/frontend/index.html @@ -0,0 +1,12 @@ + + + + + + Meeting Assistant + + +
+ + + diff --git a/transcription-tool/frontend/package.json b/transcription-tool/frontend/package.json new file mode 100644 index 00000000..7195d3d0 --- /dev/null +++ b/transcription-tool/frontend/package.json @@ -0,0 +1,27 @@ +{ + "name": "transcription-frontend", + "private": true, + "version": "0.1.0", + "type": "module", + "scripts": { + "dev": "vite", + "build": "tsc && vite build", + "preview": "vite preview" + }, + "dependencies": { + "axios": "^1.6.2", + "clsx": "^2.0.0", + "lucide-react": "^0.294.0", + "react": "^18.2.0", + "react-dom": "^18.2.0", + "tailwind-merge": "^2.1.0", + "@types/react": "^18.2.37", + "@types/react-dom": "^18.2.15", + "@vitejs/plugin-react": "^4.2.0", + "autoprefixer": "^10.4.16", + "postcss": "^8.4.31", + "tailwindcss": "^3.3.5", + "typescript": "^5.2.2", + "vite": "^5.0.0" + } +} diff --git a/transcription-tool/frontend/postcss.config.js b/transcription-tool/frontend/postcss.config.js new file mode 100644 index 00000000..2e7af2b7 --- /dev/null +++ b/transcription-tool/frontend/postcss.config.js @@ -0,0 +1,6 @@ +export default { + plugins: { + tailwindcss: {}, + autoprefixer: {}, + }, +} diff --git a/transcription-tool/frontend/src/App.tsx b/transcription-tool/frontend/src/App.tsx new file mode 100644 index 00000000..3f412fa9 --- /dev/null +++ b/transcription-tool/frontend/src/App.tsx @@ -0,0 +1,121 @@ +import { useState, useEffect } from 'react' +import axios from 'axios' +import { Upload, Mic, FileText, Clock, CheckCircle2, Loader2, AlertCircle, ChevronRight } from 'lucide-react' +import clsx from 'clsx' + +const API_BASE = '/tr/api' + +interface Meeting { + id: number + title: string + status: string + date_recorded: string + duration_seconds?: number + created_at: string +} + +export default function App() { + const [meetings, setMeetings] = useState([]) + const [uploading, setUploading] = useState(false) + const [error, setError] = useState(null) + + const fetchMeetings = async () => { + try { + const res = await axios.get(`${API_BASE}/meetings`) + setMeetings(res.data) + } catch (e) { + console.error("Failed to fetch meetings", e) + } + } + + useEffect(() => { + fetchMeetings() + const interval = setInterval(fetchMeetings, 5000) // Poll every 5s + return () => clearInterval(interval) + }, []) + + const handleUpload = async (e: React.ChangeEvent) => { + const file = e.target.files?.[0] + if (!file) return + + setUploading(true) + setError(null) + const formData = new FormData() + formData.append('file', file) + + try { + await axios.post(`${API_BASE}/upload`, formData) + fetchMeetings() + } catch (e) { + setError("Upload failed. Make sure the file is not too large.") + } finally { + setUploading(false) + } + } + + return ( +
+
+
+
+

Meeting Assistant

+

Transcribe and analyze your meetings with Gemini 2.0

+
+ +
+ + {error && ( +
+ + {error} +
+ )} + +
+ {meetings.length === 0 ? ( +
+ +

No meetings yet. Upload your first audio file.

+
+ ) : ( + meetings.map(m => ( +
+
+
+ {m.status === 'COMPLETED' ? : } +
+
+

{m.title}

+
+ {new Date(m.created_at).toLocaleDateString()} + {m.duration_seconds && ( + {Math.round(m.duration_seconds / 60)} min + )} + {m.status} +
+
+
+ +
+ )) + )} +
+
+
+ ) +} diff --git a/transcription-tool/frontend/src/index.css b/transcription-tool/frontend/src/index.css new file mode 100644 index 00000000..49ec1974 --- /dev/null +++ b/transcription-tool/frontend/src/index.css @@ -0,0 +1,7 @@ +@tailwind base; +@tailwind components; +@tailwind utilities; + +:root { + color-scheme: light dark; +} diff --git a/transcription-tool/frontend/src/main.tsx b/transcription-tool/frontend/src/main.tsx new file mode 100644 index 00000000..964aeb4c --- /dev/null +++ b/transcription-tool/frontend/src/main.tsx @@ -0,0 +1,10 @@ +import React from 'react' +import ReactDOM from 'react-dom/client' +import App from './App' +import './index.css' + +ReactDOM.createRoot(document.getElementById('root')!).render( + + + , +) diff --git a/transcription-tool/frontend/src/vite-env.d.ts b/transcription-tool/frontend/src/vite-env.d.ts new file mode 100644 index 00000000..11f02fe2 --- /dev/null +++ b/transcription-tool/frontend/src/vite-env.d.ts @@ -0,0 +1 @@ +/// diff --git a/transcription-tool/frontend/tailwind.config.js b/transcription-tool/frontend/tailwind.config.js new file mode 100644 index 00000000..dca8ba02 --- /dev/null +++ b/transcription-tool/frontend/tailwind.config.js @@ -0,0 +1,11 @@ +/** @type {import('tailwindcss').Config} */ +export default { + content: [ + "./index.html", + "./src/**/*.{js,ts,jsx,tsx}", + ], + theme: { + extend: {}, + }, + plugins: [], +} diff --git a/transcription-tool/frontend/vite.config.ts b/transcription-tool/frontend/vite.config.ts new file mode 100644 index 00000000..4bb54354 --- /dev/null +++ b/transcription-tool/frontend/vite.config.ts @@ -0,0 +1,15 @@ +import { defineConfig } from 'vite' +import react from '@vitejs/plugin-react' + +export default defineConfig({ + plugins: [react()], + base: '/tr/', + server: { + proxy: { + '/tr/api': { + target: 'http://localhost:8001', + changeOrigin: true, + } + } + } +})