{content}
+ diff --git a/TRANSCRIPTION_TOOL.md b/TRANSCRIPTION_TOOL.md index 2570beed..c17ebe6a 100644 --- a/TRANSCRIPTION_TOOL.md +++ b/TRANSCRIPTION_TOOL.md @@ -1,7 +1,7 @@ # Meeting Assistant (Transcription Tool) -**Version:** 0.5.0 -**Status:** Beta (Full Content Management) +**Version:** 0.6.0 +**Status:** Beta (AI Insights Integration) Der **Meeting Assistant** ist eine leistungsstarke Suite zur Transkription und Bearbeitung von Audio-Aufnahmen. Er kombiniert lokale FFmpeg-Verarbeitung mit der Gemini 2.0 Flash AI. @@ -10,13 +10,19 @@ Der **Meeting Assistant** ist eine leistungsstarke Suite zur Transkription und B ## 1. Architektur & Stack * **FFmpeg Engine:** Automatisches Splitting großer Dateien in 30-Minuten-Segmente. -* **Gemini 2.0 Flash:** AI-Transkription mit Fokus auf JSON-Struktur (Sprecher, Timestamps). -* **Structured Storage:** SQLite speichert jedes Segment als editierbares JSON-Array. -* **Unified UI:** Das Frontend fügt alle Segmente zu einem nahtlosen Dokument zusammen. +* **Gemini 2.0 Flash:** AI-Transkription mit Fokus auf JSON-Struktur (Sprecher, Timestamps) und zur Generierung von Meeting-Analysen. +* **Prompt Library:** Eine Sammlung von Vorlagen zur Steuerung der KI-Analyse. +* **Structured Storage:** SQLite speichert jedes Segment als editierbares JSON-Array und die Ergebnisse der KI-Analyse. +* **Unified UI:** Das Frontend fügt alle Segmente zu einem nahtlosen Dokument zusammen und bietet interaktive Analyse-Funktionen. --- -## 2. Key Features (v0.5.0) +## 2. Key Features (v0.6.0) + +### 🚀 **NEU:** AI Insights auf Knopfdruck +* **Meeting-Protokoll:** Erstellt automatisch ein formelles Protokoll (Meeting Minutes) mit Agenda, Entscheidungen und nächsten Schritten. +* **Action Items:** Extrahiert eine Aufgabenliste mit Verantwortlichen und Fälligkeiten direkt aus dem Gespräch. +* **Rollenbasierte Zusammenfassungen:** Generiert spezifische Zusammenfassungen, z.B. eine "Sales Summary", die sich auf Kundenbedürfnisse, Kaufsignale und nächste Schritte für das Vertriebsteam konzentriert. ### 🎙️ Intelligente Transkription * Unterstützt MP3/WAV bis 500MB. @@ -34,6 +40,7 @@ Der **Meeting Assistant** ist eine leistungsstarke Suite zur Transkription und B ### 📝 Editor & Export * **Inline-Edit:** Jeder Textblock und jeder Sprechername kann durch direktes Anklicken korrigiert werden. * **Copy Full Transcript:** Kopiert das gesamte, bereinigte Transkript inkl. Timestamps in die Zwischenablage. +* **Copy Insights:** Jedes Analyse-Ergebnis kann einfach in die Zwischenablage kopiert werden. --- @@ -43,7 +50,8 @@ Der **Meeting Assistant** ist eine leistungsstarke Suite zur Transkription und B | :--- | :--- | :--- | | `GET` | `/meetings` | Liste aller Meetings. | | `POST` | `/upload` | Audio-Upload & Prozess-Start. | -| `POST` | `.../rename_speaker` | **Neu:** Globale Umbenennung in der DB. | +| `POST` | `/meetings/{id}/insights` | **Neu:** Generiert eine Analyse (z.B. Protokoll, Action Items). | +| `POST` | `/meetings/{id}/rename_speaker` | Globale Umbenennung in der DB. | | `PUT` | `/chunks/{id}` | Speichert manuelle Text-Korrekturen. | | `DELETE` | `/meetings/{id}` | Vollständiges Löschen. | @@ -51,5 +59,6 @@ Der **Meeting Assistant** ist eine leistungsstarke Suite zur Transkription und B ## 4. Roadmap -* **v0.6: AI Insights:** Extraktion von Aufgaben (Action Items) und Zusammenfassungen per Button. * **v0.7: Search:** Globale Suche über alle Transkripte hinweg. +* **v0.8: Q&A an das Meeting:** Ermöglicht, Fragen direkt an das Transkript zu stellen ("Was wurde zu Thema X beschlossen?"). +* **v0.9: Export-Formate:** Export der Ergebnisse in verschiedene Formate (z.B. PDF, DOCX). diff --git a/transcription-tool/backend/app.py b/transcription-tool/backend/app.py index 5fd85dd0..dd462d3d 100644 --- a/transcription-tool/backend/app.py +++ b/transcription-tool/backend/app.py @@ -11,6 +11,7 @@ from datetime import datetime from .config import settings from .database import init_db, get_db, Meeting, TranscriptChunk, AnalysisResult, SessionLocal from .services.orchestrator import process_meeting_task +from .services.insights_service import generate_insight # Initialize FastAPI App app = FastAPI( @@ -42,7 +43,8 @@ def list_meetings(db: Session = Depends(get_db)): @app.get("/api/meetings/{meeting_id}") def get_meeting(meeting_id: int, db: Session = Depends(get_db)): meeting = db.query(Meeting).options( - joinedload(Meeting.chunks) + joinedload(Meeting.chunks), + joinedload(Meeting.analysis_results) # Eager load analysis results ).filter(Meeting.id == meeting_id).first() if not meeting: @@ -99,6 +101,26 @@ async def upload_audio( from pydantic import BaseModel +class InsightRequest(BaseModel): + insight_type: str + +@app.post("/api/meetings/{meeting_id}/insights") +def create_insight(meeting_id: int, payload: InsightRequest, db: Session = Depends(get_db)): + """ + Triggers the generation of a specific insight (e.g., meeting minutes, action items). + If the insight already exists, it returns the stored result. + Otherwise, it generates, stores, and returns the new insight. + """ + try: + insight = generate_insight(db, meeting_id, payload.insight_type) + return insight + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + except Exception as e: + # For unexpected errors, return a generic 500 error + print(f"ERROR: Unexpected error in create_insight: {e}") + raise HTTPException(status_code=500, detail="An internal error occurred while generating the insight.") + class RenameRequest(BaseModel): old_name: str new_name: str diff --git a/transcription-tool/backend/prompt_library.py b/transcription-tool/backend/prompt_library.py new file mode 100644 index 00000000..41afc8c3 --- /dev/null +++ b/transcription-tool/backend/prompt_library.py @@ -0,0 +1,113 @@ + +MEETING_MINUTES_PROMPT = """ +You are a professional assistant specialized in summarizing meeting transcripts. +Your task is to create a formal and structured protocol (Meeting Minutes) from the provided transcript. + +Please analyze the following transcript and generate the Meeting Minutes in German. + +**Transcript:** +--- +{transcript_text} +--- + +**Instructions for the Meeting Minutes:** + +1. **Header:** + * Start with a clear title: "Meeting-Protokoll". + * Add a placeholder for the meeting date: "Datum: [Datum des Meetings]". + +2. **Agenda Items:** + * Identify the main topics discussed. + * Structure the protocol using these topics as headlines (e.g., "Tagesordnungspunkt 1: [Thema]"). + +3. **Key Discussions & Decisions:** + * For each agenda item, summarize the key points of the discussion. + * Clearly list all decisions that were made. Use a format like "**Entscheidung:** ...". + +4. **Action Items (Next Steps):** + * Extract all clear tasks or action items. + * For each action item, identify the responsible person (Owner) and the deadline, if mentioned. + * Present the action items in a clear list under a headline "Nächste Schritte / Action Items". Use the format: "- [Aufgabe] (Verantwortlich: [Person], Fällig bis: [Datum/unbestimmt])". + +5. **General Tone & Language:** + * The protocol must be written in formal, professional German. + * Be concise and focus on the essential information (discussions, decisions, tasks). + * Do not invent information that is not present in the transcript. + +Please provide the output in Markdown format. +""" + +ACTION_ITEMS_PROMPT = """ +You are a highly efficient assistant focused on productivity and task management. +Your goal is to extract all actionable tasks (Action Items) from a meeting transcript. + +Please analyze the following transcript and list all tasks. + +**Transcript:** +--- +{transcript_text} +--- + +**Instructions for the Action Item List:** + +1. **Extraction:** + * Carefully read the entire transcript and identify every statement that constitutes a task, a to-do, or a commitment to do something. + * Ignore general discussions, opinions, and status updates. Focus only on future actions. + +2. **Format:** + * Present the extracted tasks as a bulleted list. + * For each task, clearly state: + * **What** needs to be done. + * **Who** is responsible for it (Owner). + * **When** it should be completed by (Due Date), if mentioned. + +3. **Output Structure:** + * Use the following format for each item: `- [Task Description] (Owner: [Person's Name], Due: [Date/unspecified])` + * If the owner or due date is not explicitly mentioned, use "[unbestimmt]". + * The list should be titled "Action Item Liste". + +4. **Language:** + * The output should be in German. + +Please provide the output in Markdown format. +""" + +SALES_SUMMARY_PROMPT = """ +You are a Senior Sales Manager analyzing a meeting transcript from a client conversation. +Your objective is to create a concise, rollenbasierte Zusammenfassung for the sales team. The summary should highlight key information relevant to closing a deal. + +Please analyze the following transcript and generate a Sales Summary. + +**Transcript:** +--- +{transcript_text} +--- + +**Instructions for the Sales Summary:** + +1. **Customer Needs & Pain Points:** + * Identify and list the core problems, challenges, and needs expressed by the client. + * What are their primary business goals? + +2. **Buying Signals:** + * Extract any phrases or questions that indicate a strong interest in the product/service (e.g., questions about price, implementation, specific features). + +3. **Key Decision-Makers:** + * Identify the people in the meeting who seem to have the most influence on the purchasing decision. Note their role or title if mentioned. + +4. **Budget & Timeline:** + * Note any mentions of budget, pricing expectations, or the timeline for their decision-making process. + +5. **Next Steps (from a Sales Perspective):** + * What are the immediate next actions the sales team needs to take to move this deal forward? (e.g., "Send proposal," "Schedule demo for the technical team"). + +**Output Format:** +* Use clear headings for each section (e.g., "Kundenbedürfnisse & Pain Points", "Kaufsignale"). +* Use bullet points for lists. +* The language should be direct, actionable, and in German. + +Please provide the output in Markdown format. +""" + +# You can add more prompts here for other analysis types. +# For example, a prompt for a technical summary, a marketing summary, etc. diff --git a/transcription-tool/backend/services/insights_service.py b/transcription-tool/backend/services/insights_service.py new file mode 100644 index 00000000..621232c7 --- /dev/null +++ b/transcription-tool/backend/services/insights_service.py @@ -0,0 +1,110 @@ +import sys +import os +from sqlalchemy.orm import Session +from .. import database +from .. import prompt_library + +# Add project root to path to allow importing from 'helpers' +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..'))) +from helpers import call_gemini_flash + +def _format_transcript(chunks: list[database.TranscriptChunk]) -> str: + """ + Formats the transcript chunks into a single, human-readable string. + Example: "[00:00:01] Speaker A: Hello world." + """ + full_transcript = [] + # Sort chunks by their index to ensure correct order + sorted_chunks = sorted(chunks, key=lambda c: c.chunk_index) + + for chunk in sorted_chunks: + if not chunk.json_content: + continue + + for item in chunk.json_content: + # json_content can be a list of dicts + if isinstance(item, dict): + speaker = item.get('speaker', 'Unknown') + start_time = item.get('start', 0) + text = item.get('line', '') + + # Format timestamp from seconds to HH:MM:SS + hours, remainder = divmod(int(start_time), 3600) + minutes, seconds = divmod(remainder, 60) + timestamp = f"{hours:02}:{minutes:02}:{seconds:02}" + + full_transcript.append(f"[{timestamp}] {speaker}: {text}") + + return "\n".join(full_transcript) + +def get_prompt_by_type(insight_type: str) -> str: + """ + Returns the corresponding prompt from the prompt_library based on the type. + """ + if insight_type == "meeting_minutes": + return prompt_library.MEETING_MINUTES_PROMPT + elif insight_type == "action_items": + return prompt_library.ACTION_ITEMS_PROMPT + elif insight_type == "sales_summary": + return prompt_library.SALES_SUMMARY_PROMPT + else: + raise ValueError(f"Unknown insight type: {insight_type}") + +def generate_insight(db: Session, meeting_id: int, insight_type: str) -> database.AnalysisResult: + """ + Generates a specific insight for a meeting, stores it, and returns it. + Checks for existing analysis to avoid re-generating. + """ + # 1. Check if the insight already exists + existing_insight = db.query(database.AnalysisResult).filter( + database.AnalysisResult.meeting_id == meeting_id, + database.AnalysisResult.prompt_key == insight_type + ).first() + + if existing_insight: + return existing_insight + + # 2. Get the meeting and its transcript + meeting = db.query(database.Meeting).filter(database.Meeting.id == meeting_id).first() + if not meeting: + raise ValueError(f"Meeting with id {meeting_id} not found.") + + if not meeting.chunks: + raise ValueError(f"Meeting with id {meeting_id} has no transcript chunks.") + + # 3. Format the transcript and select the prompt + transcript_text = _format_transcript(meeting.chunks) + if not transcript_text.strip(): + raise ValueError(f"Transcript for meeting {meeting_id} is empty.") + + prompt_template = get_prompt_by_type(insight_type) + final_prompt = prompt_template.format(transcript_text=transcript_text) + + # 4. Call the AI model + # Update meeting status + meeting.status = "ANALYZING" + db.commit() + + try: + generated_text = call_gemini_flash(prompt=final_prompt, temperature=0.5) + + # 5. Store the new insight + new_insight = database.AnalysisResult( + meeting_id=meeting_id, + prompt_key=insight_type, + result_text=generated_text + ) + db.add(new_insight) + + meeting.status = "COMPLETED" + db.commit() + db.refresh(new_insight) + + return new_insight + + except Exception as e: + meeting.status = "ERROR" + db.commit() + # Log the error properly in a real application + print(f"Error generating insight for meeting {meeting_id}: {e}") + raise diff --git a/transcription-tool/frontend/src/App.tsx b/transcription-tool/frontend/src/App.tsx index ac893a76..b77cd571 100644 --- a/transcription-tool/frontend/src/App.tsx +++ b/transcription-tool/frontend/src/App.tsx @@ -1,10 +1,19 @@ import React, { useState, useEffect } from 'react' import axios from 'axios' -import { Upload, FileText, Clock, CheckCircle2, Loader2, AlertCircle, Trash2, ArrowLeft, Copy, Edit3, X, Scissors, Users } from 'lucide-react' +import { Upload, FileText, Clock, CheckCircle2, Loader2, AlertCircle, Trash2, ArrowLeft, Copy, Edit3, X, Scissors, Users, Wand2 } from 'lucide-react' import clsx from 'clsx' const API_BASE = '/tr/api' +// --- INTERFACES --- + +interface AnalysisResult { + id: number; + prompt_key: string; + result_text: string; + created_at: string; +} + interface TranscriptMessage { time: string display_time: string @@ -30,8 +39,29 @@ interface Meeting { duration_seconds?: number created_at: string chunks?: Chunk[] + analysis_results?: AnalysisResult[] } +// --- MODAL COMPONENT --- + +const InsightModal = ({ title, content, onClose, onCopy }: { title: string, content: string, onClose: () => void, onCopy: (text: string) => void }) => ( +
{content}
+