119 lines
4.0 KiB
Python
119 lines
4.0 KiB
Python
import sys
|
|
import os
|
|
from sqlalchemy.orm import Session
|
|
from .. import database
|
|
from ..prompt_library import get_prompt
|
|
import logging
|
|
from .llm_service import call_gemini_api
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def _format_transcript(chunks: list[database.TranscriptChunk]) -> str:
|
|
"""
|
|
Formats the transcript chunks into a single, human-readable string,
|
|
sorted chronologically using the absolute_seconds timestamp.
|
|
"""
|
|
all_messages = []
|
|
|
|
for chunk in chunks:
|
|
if not chunk.json_content:
|
|
continue
|
|
|
|
# The content can be a list of dicts, or sometimes a list containing a list of dicts
|
|
content_list = chunk.json_content
|
|
if content_list and isinstance(content_list[0], list):
|
|
content_list = content_list[0]
|
|
|
|
for item in content_list:
|
|
if isinstance(item, dict):
|
|
all_messages.append(item)
|
|
|
|
# Sort all messages from all chunks chronologically
|
|
# Use a default of 0 for absolute_seconds if the key is missing
|
|
sorted_messages = sorted(all_messages, key=lambda msg: msg.get('absolute_seconds', 0))
|
|
|
|
full_transcript = []
|
|
for msg in sorted_messages:
|
|
speaker = msg.get('speaker', 'Unknown')
|
|
text = msg.get('text', '') # Changed from 'line' to 'text' to match the JSON
|
|
|
|
# Use the reliable absolute_seconds for timestamp calculation
|
|
absolute_seconds = msg.get('absolute_seconds', 0)
|
|
|
|
try:
|
|
time_in_seconds = float(absolute_seconds)
|
|
hours, remainder = divmod(int(time_in_seconds), 3600)
|
|
minutes, seconds = divmod(remainder, 60)
|
|
timestamp = f"{hours:02}:{minutes:02}:{seconds:02}"
|
|
except (ValueError, TypeError):
|
|
timestamp = "00:00:00"
|
|
|
|
full_transcript.append(f"[{timestamp}] {speaker}: {text}")
|
|
|
|
return "\n".join(full_transcript)
|
|
|
|
|
|
|
|
def generate_insight(db: Session, meeting_id: int, insight_type: str) -> database.AnalysisResult:
|
|
"""
|
|
Generates a specific insight for a meeting, stores it, and returns it.
|
|
Checks for existing analysis to avoid re-generating.
|
|
"""
|
|
# 1. Check if the insight already exists
|
|
existing_insight = db.query(database.AnalysisResult).filter(
|
|
database.AnalysisResult.meeting_id == meeting_id,
|
|
database.AnalysisResult.prompt_key == insight_type
|
|
).first()
|
|
|
|
if existing_insight:
|
|
# Before returning, let's delete it so user can regenerate
|
|
db.delete(existing_insight)
|
|
db.commit()
|
|
|
|
|
|
# 2. Get the meeting and its transcript
|
|
meeting = db.query(database.Meeting).filter(database.Meeting.id == meeting_id).first()
|
|
if not meeting:
|
|
raise ValueError(f"Meeting with id {meeting_id} not found.")
|
|
|
|
if not meeting.chunks:
|
|
raise ValueError(f"Meeting with id {meeting_id} has no transcript chunks.")
|
|
|
|
# 3. Format the transcript and select the prompt
|
|
transcript_text = _format_transcript(meeting.chunks)
|
|
|
|
if not transcript_text.strip():
|
|
# This can happen if all chunks are empty or malformed
|
|
raise ValueError(f"Formatted transcript for meeting {meeting_id} is empty or could not be processed.")
|
|
|
|
prompt_template = get_prompt(insight_type)
|
|
final_prompt = prompt_template.format(transcript_text=transcript_text)
|
|
|
|
# 4. Call the AI model
|
|
meeting.status = "ANALYZING"
|
|
db.commit()
|
|
|
|
try:
|
|
generated_text = call_gemini_flash(prompt=final_prompt, temperature=0.5)
|
|
|
|
# 5. Store the new insight
|
|
new_insight = database.AnalysisResult(
|
|
meeting_id=meeting_id,
|
|
prompt_key=insight_type,
|
|
result_text=generated_text
|
|
)
|
|
db.add(new_insight)
|
|
|
|
meeting.status = "COMPLETED"
|
|
db.commit()
|
|
db.refresh(new_insight)
|
|
|
|
return new_insight
|
|
|
|
except Exception as e:
|
|
meeting.status = "ERROR"
|
|
db.commit()
|
|
logger.error(f"Error generating insight for meeting {meeting_id}: {e}")
|
|
raise
|