Files
Brancheneinstufung2/scripts/generate_weekly_summary.py
Floke 46301f9b8c [31e88f42] Feat: Update weekly summary to use checkboxes for todos
- Modified  to instruct the Gemini model to use checkboxes for "Nächste Schritte / Offene To-Dos".
- Updated  to replace bullet points with checkboxes in all "Nächste Schritte / Offene To-Dos" sections.
2026-03-09 14:58:30 +00:00

320 lines
13 KiB
Python

import os
import re
import datetime
import json
import requests
from typing import List, Dict, Tuple
from dotenv import load_dotenv
import sys
# Make dev_session from /app available
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from dev_session import find_database_by_title, query_notion_database, get_page_content, get_page_title
def parse_time(time_str: str) -> float:
"""Parses 'HH:MM' into decimal hours."""
try:
hours, minutes = map(int, time_str.split(':'))
return hours + (minutes / 60.0)
except:
return 0.0
def format_time(decimal_hours: float) -> str:
hours = int(decimal_hours)
minutes = int(round((decimal_hours - hours) * 60))
if minutes == 60:
hours += 1
minutes = 0
return f"{hours:02d}:{minutes:02d}"
def extract_status_updates(content: str, cutoff_date: datetime.datetime) -> List[Dict]:
"""
Extracts status updates from the markdown content.
Looks for: ## 🤖 Status-Update (YYYY-MM-DD HH:MM Berlin Time)
"""
updates = []
# Pattern to match the heading and the subsequent code block
pattern = r"## 🤖 Status-Update \((?P<date>\d{4}-\d{2}-\d{2}) (?P<time>\d{2}:\d{2}).*?\)\n```\n(?P<body>.*?)\n```"
for match in re.finditer(pattern, content, re.DOTALL):
date_str = match.group('date')
time_str = match.group('time')
body = match.group('body').strip()
# Check if the date is within the cutoff
update_date = datetime.datetime.strptime(date_str, "%Y-%m-%d")
if update_date >= cutoff_date:
# Parse invested time
invested_time_match = re.search(r"Investierte Zeit in dieser Session:\s*(?P<hhmm>\d{2}:\d{2})", body)
invested_hours = 0.0
if invested_time_match:
invested_hours = parse_time(invested_time_match.group('hhmm'))
# Extract the summary part
summary_match = re.search(r"Arbeitszusammenfassung:\s*(.*)", body, re.DOTALL)
summary_text = summary_match.group(1).strip() if summary_match else body
updates.append({
"date": date_str,
"time": time_str,
"invested_hours": invested_hours,
"summary": summary_text
})
return updates
def generate_global_executive_summary(api_key: str, all_project_summaries: str) -> str:
"""Uses Gemini REST API to extract the top global highlights of the week."""
if not api_key:
return ""
url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key={api_key}"
headers = {'Content-Type': 'application/json'}
prompt = f"""
Du bist der CTO, der am Montagmorgen ein kurzes Management-Briefing (Executive Summary) für die Geschäftsführung gibt.
Hier sind die bereits aufbereiteten Meilensteine aller Projekte der letzten Woche:
<projekte>
{all_project_summaries}
</projekte>
Deine Aufgabe:
Fasse die 3 bis maximal 5 ABSOLUT WICHTIGSTEN, übergreifenden "Major Milestones" und Fortschritte der *gesamten Woche* extrem komprimiert und stichpunktartig zusammen.
Fokussiere dich auf den echten "Business Value", ausgelieferte Features oder große technische Durchbrüche.
Verwende folgendes Format (starte direkt mit den Bullet-Points):
- 🚀 **[Kurzer Titel/Projekt]**: [1 Satz mit dem Kern-Ergebnis]
- 💡 **[Erkenntnis/Entscheidung]**: [1 Satz]
- ...
Schreibe keinen einleitenden Text und kein Fazit. Nur diese prägnanten, professionellen Bullet-Points.
"""
payload = {
"contents": [{"parts": [{"text": prompt}]}],
"generationConfig": {"temperature": 0.2}
}
try:
response = requests.post(url, headers=headers, json=payload, timeout=30)
response.raise_for_status()
data = response.json()
summary = data['candidates'][0]['content']['parts'][0]['text']
return summary.strip()
except Exception as e:
print(f"Fehler bei der globalen Executive Summary: {e}")
return ""
def summarize_with_gemini(api_key: str, project_name: str, total_hours: float, raw_updates: str) -> str:
"""Uses Gemini REST API to summarize the project updates."""
if not api_key:
return "Kein Gemini API-Key gefunden. Generiere unkomprimierte Zusammenfassung...\n\n" + raw_updates
url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key={api_key}"
headers = {'Content-Type': 'application/json'}
prompt = f"""
Du bist ein technischer Projektmanager, der einen prägnanten Executive Summary für ein wöchentliches Montags-Meeting vorbereitet.
Deine Aufgabe ist es, die unstrukturierten Status-Updates des Entwicklers der letzten Woche zusammenzufassen.
Projekt: {project_name}
Investierte Zeit diese Woche: {format_time(total_hours)}
Hier sind die rohen Update-Logs der Woche:
<logs>
{raw_updates}
</logs>
Erstelle eine stark komprimierte Zusammenfassung mit folgendem Markdown-Format (verwende keine h1/h2, starte direkt mit Text oder h3):
### 🏆 Major Milestones
(Was wurde konkret erreicht/ausgeliefert/abgeschlossen? Max. 3-4 prägnante Bullet-Points)
### 💡 Wichtige Beschlüsse / Erkenntnisse
(Falls im Log vorhanden. Sonst weglassen. Max 2 Bullet-Points)
### 🚀 Nächste Schritte / Offene To-Dos
(Welche To-Dos wurden explizit für die Zukunft genannt? Verwende Checkboxen `[ ]`. Max 3 Checkboxen)
Fasse dich so kurz und präzise wie möglich. Ignoriere kleine Detail-Änderungen im Code und fokussiere dich auf den "Impact" und die übergeordneten Ziele.
"""
payload = {
"contents": [{"parts": [{"text": prompt}]}],
"generationConfig": {"temperature": 0.2}
}
try:
response = requests.post(url, headers=headers, json=payload, timeout=30)
response.raise_for_status()
data = response.json()
summary = data['candidates'][0]['content']['parts'][0]['text']
return summary.strip()
except Exception as e:
print(f"Fehler bei der Gemini-Zusammenfassung für {project_name}: {e}")
return f"Fehler bei der Zusammenfassung.\n\nRohdaten:\n{raw_updates}"
def generate_ascii_bar_chart(report_data: Dict, max_width: int = 40) -> str:
"""Generates a simple ASCII bar chart for environments where Mermaid is not rendered."""
lines = ["```text"]
lines.append("Zeitverteilung nach Projekten (Stunden)")
lines.append("-" * 50)
max_hours = max((p_data['invested_hours'] for p_data in report_data.values()), default=0)
for project, p_data in sorted(report_data.items(), key=lambda x: x[1]['invested_hours'], reverse=True):
hours = p_data['invested_hours']
if hours > 0:
bar_len = int((hours / max_hours) * max_width) if max_hours > 0 else 0
bar = "" * bar_len
project_short = (project[:25] + '..') if len(project) > 27 else project
lines.append(f"{project_short:<27} | {format_time(hours):>6} | {bar}")
lines.append("```")
return "\n".join(lines)
def main():
load_dotenv(os.path.join(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')), '.env'))
token = os.environ.get('NOTION_API_KEY')
gemini_key = os.environ.get('GEMINI_API_KEY')
if not token:
print("Error: NOTION_API_KEY environment variable not found.")
return
print("Fetching Notion configuration...")
tasks_db_id = find_database_by_title(token, "Tasks [UT]")
projects_db_id = find_database_by_title(token, "Projects [UT]")
if not tasks_db_id or not projects_db_id:
print("Could not find Tasks [UT] or Projects [UT] databases.")
return
# 1. Fetch Projects for lookup
print("Fetching Projects...")
projects_data = query_notion_database(token, projects_db_id)
project_lookup = {}
for proj in projects_data:
p_id = proj['id']
p_name = get_page_title(proj)
project_lookup[p_id] = p_name
# 2. Fetch Tasks modified in the last 7 days
now = datetime.datetime.utcnow()
cutoff_date = now - datetime.timedelta(days=7)
cutoff_iso = cutoff_date.isoformat() + "Z"
filter_payload = {
"property": "Edited",
"last_edited_time": {
"on_or_after": cutoff_iso
}
}
print(f"Fetching Tasks edited since {cutoff_date.strftime('%Y-%m-%d')}...")
tasks_data = query_notion_database(token, tasks_db_id, filter_payload=filter_payload)
print(f"Found {len(tasks_data)} recently edited tasks.")
report_data = {}
for task in tasks_data:
task_id = task['id']
task_name = get_page_title(task)
# Get Project ID
project_id = None
relation_prop = task.get('properties', {}).get('Project', {}).get('relation', [])
if relation_prop:
project_id = relation_prop[0]['id']
project_name = project_lookup.get(project_id, "Kein Projekt zugeordnet")
content = get_page_content(token, task_id)
updates = extract_status_updates(content, cutoff_date.replace(hour=0, minute=0, second=0, microsecond=0))
if updates:
if project_name not in report_data:
report_data[project_name] = {"invested_hours": 0.0, "tasks": {}}
if task_name not in report_data[project_name]["tasks"]:
report_data[project_name]["tasks"][task_name] = []
for update in updates:
report_data[project_name]["invested_hours"] += update["invested_hours"]
report_data[project_name]["tasks"][task_name].append(update)
if not report_data:
print("Keine Status-Updates in den letzten 7 Tagen gefunden.")
return
# 3. Process Individual Project Summaries
project_summaries = {}
for project_name, p_data in sorted(report_data.items(), key=lambda x: x[1]['invested_hours'], reverse=True):
print(f"Fasse zusammen (AI): {project_name} ...")
raw_updates_text = ""
for task_name, updates in p_data["tasks"].items():
raw_updates_text += f"\nTASK: {task_name}\n"
for update in sorted(updates, key=lambda x: x['date']):
raw_updates_text += f"UPDATE ({update['date']}):\n{update['summary']}\n"
ai_summary = summarize_with_gemini(gemini_key, project_name, p_data['invested_hours'], raw_updates_text)
project_summaries[project_name] = ai_summary
# 4. Generate Global Executive Summary
print("Erstelle globale Executive Summary...")
combined_summaries = ""
for proj_name, summ in project_summaries.items():
combined_summaries += f"\nProjekt: {proj_name}\n{summ}\n"
global_executive_summary = generate_global_executive_summary(gemini_key, combined_summaries)
# 5. Build Markdown Report
report_lines = []
report_lines.append(f"# 📊 Executive Weekly Summary ({cutoff_date.strftime('%Y-%m-%d')} bis {now.strftime('%Y-%m-%d')})")
report_lines.append("")
total_hours = sum(p_data["invested_hours"] for p_data in report_data.values())
report_lines.append(f"**Gesamte investierte Zeit der Woche:** {format_time(total_hours)}")
report_lines.append("")
# Global Summary
if global_executive_summary:
report_lines.append("## 🌟 Top Highlights der Woche")
report_lines.append(global_executive_summary)
report_lines.append("\n---")
report_lines.append("")
# Graphical time distribution
report_lines.append("## ⏱️ Zeitverteilung & Fokus")
report_lines.append(generate_ascii_bar_chart(report_data))
report_lines.append("---")
report_lines.append("")
# Individual Projects
for project_name, p_data in sorted(report_data.items(), key=lambda x: x[1]['invested_hours'], reverse=True):
report_lines.append(f"## 📁 {project_name} ({format_time(p_data['invested_hours'])})")
report_lines.append(project_summaries[project_name])
report_lines.append("\n---")
report_lines.append("")
report_content = "\n".join(report_lines)
output_filename = f"Executive_Weekly_Summary_{now.strftime('%Y-%m-%d')}.md"
output_path = os.path.join(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')), 'weekly', output_filename)
with open(output_path, "w", encoding="utf-8") as f:
f.write(report_content)
print(f"✅ Executive Weekly Summary erfolgreich generiert: {output_path}")
# Update latest summary shortcut
shortcut_path = os.path.join(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')), 'weekly', 'LATEST_WEEKLY_SUMMARY.md')
with open(shortcut_path, "w", encoding="utf-8") as f:
f.write(report_content)
if __name__ == "__main__":
main()