Brancheneinstufung2/scripts/generate_weekly_summary.py

import os
import re
import datetime
import json
import requests
from typing import List, Dict, Tuple
from dotenv import load_dotenv

import sys
# Make dev_session from /app available
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from dev_session import find_database_by_title, query_notion_database, get_page_content, get_page_title

def parse_time(time_str: str) -> float:
    """Parses 'HH:MM' into decimal hours."""
    try:
        hours, minutes = map(int, time_str.split(':'))
        return hours + (minutes / 60.0)
    except:
        return 0.0

def format_time(decimal_hours: float) -> str:
    hours = int(decimal_hours)
    minutes = int(round((decimal_hours - hours) * 60))
    if minutes == 60:
        hours += 1
        minutes = 0
    return f"{hours:02d}:{minutes:02d}"

def extract_status_updates(content: str, cutoff_date: datetime.datetime) -> List[Dict]:
    """
    Extracts status updates from the markdown content.
    Looks for: ## 🤖 Status-Update (YYYY-MM-DD HH:MM Berlin Time)
    """
    updates = []
    # Pattern to match the heading and the subsequent code block
    pattern = r"## 🤖 Status-Update \((?P<date>\d{4}-\d{2}-\d{2}) (?P<time>\d{2}:\d{2}).*?\)\n```\n(?P<body>.*?)\n```"

    for match in re.finditer(pattern, content, re.DOTALL):
        date_str = match.group('date')
        time_str = match.group('time')
        body = match.group('body').strip()

        # Check if the date is within the cutoff
        update_date = datetime.datetime.strptime(date_str, "%Y-%m-%d")
        if update_date >= cutoff_date:
            # Parse invested time
            invested_time_match = re.search(r"Investierte Zeit in dieser Session:\s*(?P<hhmm>\d{2}:\d{2})", body)
            invested_hours = 0.0
            if invested_time_match:
                invested_hours = parse_time(invested_time_match.group('hhmm'))

            # Extract the summary part
            summary_match = re.search(r"Arbeitszusammenfassung:\s*(.*)", body, re.DOTALL)
            summary_text = summary_match.group(1).strip() if summary_match else body

            updates.append({
                "date": date_str,
                "time": time_str,
                "invested_hours": invested_hours,
                "summary": summary_text
            })

    return updates

def generate_global_executive_summary(api_key: str, all_project_summaries: str) -> str:
    """Uses Gemini REST API to extract the top global highlights of the week."""
    if not api_key:
        return ""

    url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key={api_key}"
    headers = {'Content-Type': 'application/json'}

    prompt = f"""
Du bist der CTO, der am Montagmorgen ein kurzes Management-Briefing (Executive Summary) für die Geschäftsführung gibt.
Hier sind die bereits aufbereiteten Meilensteine aller Projekte der letzten Woche:

<projekte>
{all_project_summaries}
</projekte>

Deine Aufgabe:
Fasse die 3 bis maximal 5 ABSOLUT WICHTIGSTEN, übergreifenden "Major Milestones" und Fortschritte der *gesamten Woche* extrem komprimiert und stichpunktartig zusammen.
Fokussiere dich auf den echten "Business Value", ausgelieferte Features oder große technische Durchbrüche.

Verwende folgendes Format (starte direkt mit den Bullet-Points):
- 🚀 **[Kurzer Titel/Projekt]**: [1 Satz mit dem Kern-Ergebnis]
- 💡 **[Erkenntnis/Entscheidung]**: [1 Satz]
- ...

Schreibe keinen einleitenden Text und kein Fazit. Nur diese prägnanten, professionellen Bullet-Points.
"""

    payload = {
        "contents": [{"parts": [{"text": prompt}]}],
        "generationConfig": {"temperature": 0.2}
    }

    try:
        response = requests.post(url, headers=headers, json=payload, timeout=30)
        response.raise_for_status()
        data = response.json()
        summary = data['candidates'][0]['content']['parts'][0]['text']
        return summary.strip()
    except Exception as e:
        print(f"Fehler bei der globalen Executive Summary: {e}")
        return ""

def summarize_with_gemini(api_key: str, project_name: str, total_hours: float, raw_updates: str) -> str:
    """Uses Gemini REST API to summarize the project updates."""
    if not api_key:
        return "Kein Gemini API-Key gefunden. Generiere unkomprimierte Zusammenfassung...\n\n" + raw_updates

    url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key={api_key}"
    headers = {'Content-Type': 'application/json'}

    prompt = f"""
Du bist ein technischer Projektmanager, der einen prägnanten Executive Summary für ein wöchentliches Montags-Meeting vorbereitet.
Deine Aufgabe ist es, die unstrukturierten Status-Updates des Entwicklers der letzten Woche zusammenzufassen.

Projekt: {project_name}
Investierte Zeit diese Woche: {format_time(total_hours)}

Hier sind die rohen Update-Logs der Woche:
<logs>
{raw_updates}
</logs>

Erstelle eine stark komprimierte Zusammenfassung mit folgendem Markdown-Format (verwende keine h1/h2, starte direkt mit Text oder h3):
### 🏆 Major Milestones
(Was wurde konkret erreicht/ausgeliefert/abgeschlossen? Max. 3-4 prägnante Bullet-Points)

### 💡 Wichtige Beschlüsse / Erkenntnisse
(Falls im Log vorhanden. Sonst weglassen. Max 2 Bullet-Points)

### 🚀 Nächste Schritte / Offene To-Dos
(Welche To-Dos wurden explizit für die Zukunft genannt? Verwende Checkboxen `[ ]`. Max 3 Checkboxen)

Fasse dich so kurz und präzise wie möglich. Ignoriere kleine Detail-Änderungen im Code und fokussiere dich auf den "Impact" und die übergeordneten Ziele.
    """

    payload = {
        "contents": [{"parts": [{"text": prompt}]}],
        "generationConfig": {"temperature": 0.2}
    }

    try:
        response = requests.post(url, headers=headers, json=payload, timeout=30)
        response.raise_for_status()
        data = response.json()
        summary = data['candidates'][0]['content']['parts'][0]['text']
        return summary.strip()
    except Exception as e:
        print(f"Fehler bei der Gemini-Zusammenfassung für {project_name}: {e}")
        return f"Fehler bei der Zusammenfassung.\n\nRohdaten:\n{raw_updates}"

def generate_ascii_bar_chart(report_data: Dict, max_width: int = 40) -> str:
    """Generates a simple ASCII bar chart for environments where Mermaid is not rendered."""
    lines = ["```text"]
    lines.append("Zeitverteilung nach Projekten (Stunden)")
    lines.append("-" * 50)

    max_hours = max((p_data['invested_hours'] for p_data in report_data.values()), default=0)

    for project, p_data in sorted(report_data.items(), key=lambda x: x[1]['invested_hours'], reverse=True):
        hours = p_data['invested_hours']
        if hours > 0:
            bar_len = int((hours / max_hours) * max_width) if max_hours > 0 else 0
            bar = "█" * bar_len
            project_short = (project[:25] + '..') if len(project) > 27 else project
            lines.append(f"{project_short:<27} | {format_time(hours):>6} | {bar}")

    lines.append("```")
    return "\n".join(lines)


def main():
    load_dotenv(os.path.join(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')), '.env'))
    token = os.environ.get('NOTION_API_KEY')
    gemini_key = os.environ.get('GEMINI_API_KEY')

    if not token:
        print("Error: NOTION_API_KEY environment variable not found.")
        return

    print("Fetching Notion configuration...")
    tasks_db_id = find_database_by_title(token, "Tasks [UT]")
    projects_db_id = find_database_by_title(token, "Projects [UT]")

    if not tasks_db_id or not projects_db_id:
        print("Could not find Tasks [UT] or Projects [UT] databases.")
        return

    # 1. Fetch Projects for lookup
    print("Fetching Projects...")
    projects_data = query_notion_database(token, projects_db_id)
    project_lookup = {}
    for proj in projects_data:
        p_id = proj['id']
        p_name = get_page_title(proj)
        project_lookup[p_id] = p_name

    # 2. Fetch Tasks modified in the last 7 days
    now = datetime.datetime.utcnow()
    cutoff_date = now - datetime.timedelta(days=7)
    cutoff_iso = cutoff_date.isoformat() + "Z"

    filter_payload = {
        "property": "Edited",
        "last_edited_time": {
            "on_or_after": cutoff_iso
        }
    }

    print(f"Fetching Tasks edited since {cutoff_date.strftime('%Y-%m-%d')}...")
    tasks_data = query_notion_database(token, tasks_db_id, filter_payload=filter_payload)
    print(f"Found {len(tasks_data)} recently edited tasks.")

    report_data = {}

    for task in tasks_data:
        task_id = task['id']
        task_name = get_page_title(task)

        # Get Project ID
        project_id = None
        relation_prop = task.get('properties', {}).get('Project', {}).get('relation', [])
        if relation_prop:
            project_id = relation_prop[0]['id']

        project_name = project_lookup.get(project_id, "Kein Projekt zugeordnet")

        content = get_page_content(token, task_id)
        updates = extract_status_updates(content, cutoff_date.replace(hour=0, minute=0, second=0, microsecond=0))

        if updates:
            if project_name not in report_data:
                report_data[project_name] = {"invested_hours": 0.0, "tasks": {}}

            if task_name not in report_data[project_name]["tasks"]:
                report_data[project_name]["tasks"][task_name] = []

            for update in updates:
                report_data[project_name]["invested_hours"] += update["invested_hours"]
                report_data[project_name]["tasks"][task_name].append(update)

    if not report_data:
        print("Keine Status-Updates in den letzten 7 Tagen gefunden.")
        return

    # 3. Process Individual Project Summaries
    project_summaries = {}
    for project_name, p_data in sorted(report_data.items(), key=lambda x: x[1]['invested_hours'], reverse=True):
        print(f"Fasse zusammen (AI): {project_name} ...")

        raw_updates_text = ""
        for task_name, updates in p_data["tasks"].items():
            raw_updates_text += f"\nTASK: {task_name}\n"
            for update in sorted(updates, key=lambda x: x['date']):
                raw_updates_text += f"UPDATE ({update['date']}):\n{update['summary']}\n"

        ai_summary = summarize_with_gemini(gemini_key, project_name, p_data['invested_hours'], raw_updates_text)
        project_summaries[project_name] = ai_summary


    # 4. Generate Global Executive Summary
    print("Erstelle globale Executive Summary...")
    combined_summaries = ""
    for proj_name, summ in project_summaries.items():
        combined_summaries += f"\nProjekt: {proj_name}\n{summ}\n"

    global_executive_summary = generate_global_executive_summary(gemini_key, combined_summaries)

    # 5. Build Markdown Report
    report_lines = []
    report_lines.append(f"# 📊 Executive Weekly Summary ({cutoff_date.strftime('%Y-%m-%d')} bis {now.strftime('%Y-%m-%d')})")
    report_lines.append("")

    total_hours = sum(p_data["invested_hours"] for p_data in report_data.values())
    report_lines.append(f"**Gesamte investierte Zeit der Woche:** {format_time(total_hours)}")
    report_lines.append("")

    # Global Summary
    if global_executive_summary:
        report_lines.append("## 🌟 Top Highlights der Woche")
        report_lines.append(global_executive_summary)
        report_lines.append("\n---")
        report_lines.append("")

    # Graphical time distribution
    report_lines.append("## ⏱️ Zeitverteilung & Fokus")
    report_lines.append(generate_ascii_bar_chart(report_data))
    report_lines.append("---")
    report_lines.append("")

    # Individual Projects
    for project_name, p_data in sorted(report_data.items(), key=lambda x: x[1]['invested_hours'], reverse=True):
        report_lines.append(f"## 📁 {project_name} ({format_time(p_data['invested_hours'])})")
        report_lines.append(project_summaries[project_name])
        report_lines.append("\n---")
        report_lines.append("")

    report_content = "\n".join(report_lines)

    output_filename = f"Executive_Weekly_Summary_{now.strftime('%Y-%m-%d')}.md"
    output_path = os.path.join(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')), 'weekly', output_filename)

    with open(output_path, "w", encoding="utf-8") as f:
        f.write(report_content)

    print(f"✅ Executive Weekly Summary erfolgreich generiert: {output_path}")

    # Update latest summary shortcut
    shortcut_path = os.path.join(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')), 'weekly', 'LATEST_WEEKLY_SUMMARY.md')
    with open(shortcut_path, "w", encoding="utf-8") as f:
        f.write(report_content)

if __name__ == "__main__":
    main()