feat: Documentation and Tool Config Update
This commit is contained in:
@@ -30,20 +30,17 @@ class TranscriptionService:
|
||||
if media_file.state == "FAILED":
|
||||
raise Exception("File processing failed at Gemini.")
|
||||
|
||||
# 3. Transcribe with Diarization and Timestamps
|
||||
# 3. Transcribe with Diarization and Timestamps (Plain Text Mode for Stability)
|
||||
prompt = """
|
||||
Transkribiere dieses Audio wortgetreu.
|
||||
Identifiziere die Sprecher (Speaker A, Speaker B, etc.).
|
||||
|
||||
Gib das Ergebnis als JSON-Liste zurück.
|
||||
Format:
|
||||
[
|
||||
{
|
||||
"time": "MM:SS",
|
||||
"speaker": "Speaker A",
|
||||
"text": "..."
|
||||
}
|
||||
]
|
||||
Gib das Ergebnis EXAKT in diesem Format zurück (pro Zeile ein Sprecherwechsel):
|
||||
[MM:SS] Speaker Name: Gesprochener Text...
|
||||
|
||||
Beispiel:
|
||||
[00:00] Speaker A: Hallo zusammen.
|
||||
[00:05] Speaker B: Guten Morgen.
|
||||
"""
|
||||
|
||||
logger.info(f"Generating transcription for {file_path}...")
|
||||
@@ -52,14 +49,46 @@ class TranscriptionService:
|
||||
contents=[media_file, prompt],
|
||||
config=types.GenerateContentConfig(
|
||||
temperature=0.1,
|
||||
response_mime_type="application/json"
|
||||
max_output_tokens=8192
|
||||
)
|
||||
)
|
||||
|
||||
# Cleanup: Delete file from Gemini storage
|
||||
self.client.files.delete(name=media_file.name)
|
||||
|
||||
|
||||
# Parse Plain Text to JSON
|
||||
structured_data = self.parse_transcript(response.text)
|
||||
import json
|
||||
return {
|
||||
"raw_text": response.text, # This is now a JSON string
|
||||
"raw_text": json.dumps(structured_data), # Return valid JSON string
|
||||
"offset": offset_seconds
|
||||
}
|
||||
|
||||
def parse_transcript(self, text: str) -> list:
|
||||
"""
|
||||
Parses lines like '[00:12] Speaker A: Hello world' into structured JSON.
|
||||
"""
|
||||
import re
|
||||
results = []
|
||||
# Regex to match: [MM:SS] Speaker: Text
|
||||
# Flexible for MM:SS or H:MM:SS
|
||||
pattern = re.compile(r"^\[(\d{1,2}:\d{2}(?::\d{2})?)\]\s*([^:]+):\s*(.+)$")
|
||||
|
||||
for line in text.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if not line: continue
|
||||
|
||||
match = pattern.match(line)
|
||||
if match:
|
||||
time_str, speaker, content = match.groups()
|
||||
results.append({
|
||||
"time": time_str,
|
||||
"speaker": speaker.strip(),
|
||||
"text": content.strip()
|
||||
})
|
||||
else:
|
||||
# Fallback: Append to previous if it looks like continuation
|
||||
if results and not line.startswith("["):
|
||||
results[-1]["text"] += " " + line
|
||||
|
||||
return results
|
||||
|
||||
Reference in New Issue
Block a user