Brancheneinstufung2/transcription-tool/backend/services/insights_service.py

import sys
import os
from sqlalchemy.orm import Session
from .. import database
from .. import prompt_library

import logging
from sqlalchemy.orm import Session
from .. import database
from .. import prompt_library
from ..lib.gemini_client import call_gemini_flash

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def _format_transcript(chunks: list[database.TranscriptChunk]) -> str:
    """
    Formats the transcript chunks into a single, human-readable string,
    sorted chronologically using the absolute_seconds timestamp.
    """
    all_messages = []

    for chunk in chunks:
        if not chunk.json_content:
            continue

        # The content can be a list of dicts, or sometimes a list containing a list of dicts
        content_list = chunk.json_content
        if content_list and isinstance(content_list[0], list):
            content_list = content_list[0]

        for item in content_list:
            if isinstance(item, dict):
                all_messages.append(item)

    # Sort all messages from all chunks chronologically
    # Use a default of 0 for absolute_seconds if the key is missing
    sorted_messages = sorted(all_messages, key=lambda msg: msg.get('absolute_seconds', 0))

    full_transcript = []
    for msg in sorted_messages:
        speaker = msg.get('speaker', 'Unknown')
        text = msg.get('text', '') # Changed from 'line' to 'text' to match the JSON

        # Use the reliable absolute_seconds for timestamp calculation
        absolute_seconds = msg.get('absolute_seconds', 0)

        try:
            time_in_seconds = float(absolute_seconds)
            hours, remainder = divmod(int(time_in_seconds), 3600)
            minutes, seconds = divmod(remainder, 60)
            timestamp = f"{hours:02}:{minutes:02}:{seconds:02}"
        except (ValueError, TypeError):
            timestamp = "00:00:00"

        full_transcript.append(f"[{timestamp}] {speaker}: {text}")

    return "\n".join(full_transcript)

def get_prompt_by_type(insight_type: str) -> str:
    """
    Returns the corresponding prompt from the prompt_library based on the type.
    """
    if insight_type == "meeting_minutes":
        return prompt_library.MEETING_MINUTES_PROMPT
    elif insight_type == "action_items":
        return prompt_library.ACTION_ITEMS_PROMPT
    elif insight_type == "sales_summary":
        return prompt_library.SALES_SUMMARY_PROMPT
    else:
        raise ValueError(f"Unknown insight type: {insight_type}")

def generate_insight(db: Session, meeting_id: int, insight_type: str) -> database.AnalysisResult:
    """
    Generates a specific insight for a meeting, stores it, and returns it.
    Checks for existing analysis to avoid re-generating.
    """
    # 1. Check if the insight already exists
    existing_insight = db.query(database.AnalysisResult).filter(
        database.AnalysisResult.meeting_id == meeting_id,
        database.AnalysisResult.prompt_key == insight_type
    ).first()

    if existing_insight:
        # Before returning, let's delete it so user can regenerate
        db.delete(existing_insight)
        db.commit()


    # 2. Get the meeting and its transcript
    meeting = db.query(database.Meeting).filter(database.Meeting.id == meeting_id).first()
    if not meeting:
        raise ValueError(f"Meeting with id {meeting_id} not found.")

    if not meeting.chunks:
        raise ValueError(f"Meeting with id {meeting_id} has no transcript chunks.")

    # 3. Format the transcript and select the prompt
    transcript_text = _format_transcript(meeting.chunks)

    if not transcript_text.strip():
        # This can happen if all chunks are empty or malformed
        raise ValueError(f"Formatted transcript for meeting {meeting_id} is empty or could not be processed.")

    prompt_template = get_prompt_by_type(insight_type)
    final_prompt = prompt_template.format(transcript_text=transcript_text)

    # 4. Call the AI model
    meeting.status = "ANALYZING"
    db.commit()

    try:
        generated_text = call_gemini_flash(prompt=final_prompt, temperature=0.5)

        # 5. Store the new insight
        new_insight = database.AnalysisResult(
            meeting_id=meeting_id,
            prompt_key=insight_type,
            result_text=generated_text
        )
        db.add(new_insight)

        meeting.status = "COMPLETED"
        db.commit()
        db.refresh(new_insight)

        return new_insight

    except Exception as e:
        meeting.status = "ERROR"
        db.commit()
        logger.error(f"Error generating insight for meeting {meeting_id}: {e}")
        raise