feat: Documentation and Tool Config Update

This commit is contained in:
Moltbot-Jarvis
2026-02-18 09:12:04 +00:00
parent 32d40c77f4
commit 46994e4ce0
38 changed files with 859 additions and 1763 deletions

View File

@@ -30,20 +30,17 @@ class TranscriptionService:
if media_file.state == "FAILED":
raise Exception("File processing failed at Gemini.")
# 3. Transcribe with Diarization and Timestamps
# 3. Transcribe with Diarization and Timestamps (Plain Text Mode for Stability)
prompt = """
Transkribiere dieses Audio wortgetreu.
Identifiziere die Sprecher (Speaker A, Speaker B, etc.).
Gib das Ergebnis als JSON-Liste zurück.
Format:
[
{
"time": "MM:SS",
"speaker": "Speaker A",
"text": "..."
}
]
Gib das Ergebnis EXAKT in diesem Format zurück (pro Zeile ein Sprecherwechsel):
[MM:SS] Speaker Name: Gesprochener Text...
Beispiel:
[00:00] Speaker A: Hallo zusammen.
[00:05] Speaker B: Guten Morgen.
"""
logger.info(f"Generating transcription for {file_path}...")
@@ -52,14 +49,46 @@ class TranscriptionService:
contents=[media_file, prompt],
config=types.GenerateContentConfig(
temperature=0.1,
response_mime_type="application/json"
max_output_tokens=8192
)
)
# Cleanup: Delete file from Gemini storage
self.client.files.delete(name=media_file.name)
# Parse Plain Text to JSON
structured_data = self.parse_transcript(response.text)
import json
return {
"raw_text": response.text, # This is now a JSON string
"raw_text": json.dumps(structured_data), # Return valid JSON string
"offset": offset_seconds
}
def parse_transcript(self, text: str) -> list:
"""
Parses lines like '[00:12] Speaker A: Hello world' into structured JSON.
"""
import re
results = []
# Regex to match: [MM:SS] Speaker: Text
# Flexible for MM:SS or H:MM:SS
pattern = re.compile(r"^\[(\d{1,2}:\d{2}(?::\d{2})?)\]\s*([^:]+):\s*(.+)$")
for line in text.strip().split('\n'):
line = line.strip()
if not line: continue
match = pattern.match(line)
if match:
time_str, speaker, content = match.groups()
results.append({
"time": time_str,
"speaker": speaker.strip(),
"text": content.strip()
})
else:
# Fallback: Append to previous if it looks like continuation
if results and not line.startswith("["):
results[-1]["text"] += " " + line
return results