diff --git a/.dev_session/SESSION_INFO b/.dev_session/SESSION_INFO index 98f908d6f..859dcc981 100644 --- a/.dev_session/SESSION_INFO +++ b/.dev_session/SESSION_INFO @@ -1 +1 @@ -{"task_id": "32788f42-8544-80e1-a13a-c26114cf9b34", "token": "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", "readme_path": "readme.md", "session_start_time": "2026-04-12T19:57:10.454150"} \ No newline at end of file +{"task_id": "34288f42-8544-800e-b866-dfcbc22bd4e5", "token": "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", "readme_path": "readme.md", "session_start_time": "2026-04-14T08:37:49.545740"} \ No newline at end of file diff --git a/fotograf-de-scraper/README.md b/fotograf-de-scraper/README.md index 36903fbcb..9e36b68a6 100644 --- a/fotograf-de-scraper/README.md +++ b/fotograf-de-scraper/README.md @@ -1,6 +1,6 @@ # Fotograf.de Scraper & Management UI -**Status:** Production-Ready Microservice (Core Feature: PDF List Generation, QR Cards, Shooting Schedule & **Gmail API Integration**) +**Status:** Production-Ready Microservice (Core Feature: PDF List Generation, QR Cards, Shooting Schedule, **Siblings List** & **Gmail API Integration**) Dieser Service modernisiert die alten `Fotograf.de` Skripte, indem er eine robuste, web-basierte UI zur Verwaltung und Automatisierung von Foto-Aufträgen bereitstellt. Er ist als eigenständiger Microservice konzipiert, der unabhängig vom Haupt-Stack läuft. @@ -48,9 +48,15 @@ Identifizierung von potenziellen Käufern und automatisierter Kontakt. ### Feature 4: Verkaufs-Statistiken (Vollständig) * Detaillierte Analyse des Kaufverhaltens pro Album mit Echtzeit-Fortschrittsanzeige im Browser. +### Feature 5: Geschwisterliste (Einrichtungsintern) (Vollständig) +Spezielles Tool zur Identifizierung von Geschwistergruppen innerhalb einer Einrichtung. +* **Intelligente Erkennung:** Nutzt die "Email der Eltern (1)" aus der Fotograf.de-Anmeldeliste für einen automatischen Abgleich (Zählenwenn > 1). +* **Calendly-Cross-Check:** Gleicht die identifizierten Familien mit allen aktuellen Calendly-Buchungen ab, um Nachmittags-Termine automatisch in der Liste zu vermerken. +* **Optimiertes PDF:** Generiert eine alphabetisch nach Nachnamen sortierte Liste mit Kindern, deren Gruppen, Online-Wunsch-Status und Termin-Uhrzeit (inkl. Datum) sowie einem Erledigt-Feld für die manuelle Kontrolle vor Ort. + --- -## 🎯 Nächste Session: "Freigabeanfragen" (Feature 5) +## 🎯 Nächste Session: "Freigabeanfragen" (Feature 6) Das nächste große Ziel ist der automatische Versand von Freigabeanfragen via Gmail. @@ -84,4 +90,4 @@ Folgende Variablen müssen in der `.env` im Verzeichnis `/fotograf-de-scraper/` ### URLs & Ports * **Produktion / Nginx:** `https://floke-ai.duckdns.org/fotograf-de/` -* **Persistenz:** Datenbank unter `./backend/data/fotograf_jobs.db`. +* **Persistenz:** Datenbank unter `./backend/data/fotograf_jobs.db`. \ No newline at end of file diff --git a/fotograf-de-scraper/backend/main.py b/fotograf-de-scraper/backend/main.py index 9fc1000ed..92b541231 100644 --- a/fotograf-de-scraper/backend/main.py +++ b/fotograf-de-scraper/backend/main.py @@ -1193,4 +1193,94 @@ async def generate_pdf(job_id: str, account_type: str, db: Session = Depends(get finally: if driver: logger.debug("Closing driver.") - driver.quit() \ No newline at end of file + driver.quit() + +@app.get("/api/jobs/{job_id}/siblings-list") +async def generate_siblings_list(job_id: str, account_type: str, event_type_name: str = "", db: Session = Depends(get_db)): + logger.info(f"API Request: Generate siblings list for job {job_id}") + username = os.getenv(f"{account_type.upper()}_USER") + password = os.getenv(f"{account_type.upper()}_PW") + api_token = os.getenv("CALENDLY_TOKEN") + + if not api_token: + raise HTTPException(status_code=400, detail="Calendly API token missing.") + + # Get Calendly events + from qr_generator import get_calendly_events_raw + try: + # Fetch ALL events to ensure we don't miss siblings due to event name mismatches + calendly_events = get_calendly_events_raw(api_token, event_type_name=None) + logger.info(f"Fetched {len(calendly_events)} total events from Calendly for siblings check.") + except Exception as e: + logger.error(f"Error fetching Calendly events: {e}") + calendly_events = [] + + with tempfile.TemporaryDirectory() as temp_dir: + logger.debug(f"Using temp directory: {temp_dir}") + driver = setup_driver(download_path=temp_dir) + try: + if not login(driver, username, password): + raise HTTPException(status_code=401, detail="Login failed.") + + job_url = f"https://app.fotograf.de/config_jobs_settings/index/{job_id}" + driver.get(job_url) + wait = WebDriverWait(driver, 30) + + try: + institution = driver.find_element(By.TAG_NAME, "h1").text.strip() + except: + institution = "Fotoauftrag" + + personen_tab = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "[data-qa-id='link:photo-jobs-tabs-names_list']"))) + driver.execute_script("arguments[0].click();", personen_tab) + + export_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, SELECTORS["export_dropdown"]))) + driver.execute_script("arguments[0].scrollIntoView(true);", export_btn) + time.sleep(1) + driver.execute_script("arguments[0].click();", export_btn) + time.sleep(2) + + try: + csv_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, SELECTORS["export_csv_link"]))) + driver.execute_script("arguments[0].click();", csv_btn) + except TimeoutException: + raise HTTPException(status_code=500, detail="CSV Export Button nicht gefunden.") + + timeout = 45 + start_time = time.time() + csv_file = None + while time.time() - start_time < timeout: + files = os.listdir(temp_dir) + csv_files = [f for f in files if f.endswith('.csv')] + if csv_files: + csv_file = os.path.join(temp_dir, csv_files[0]) + break + time.sleep(1) + + if not csv_file: + raise HTTPException(status_code=500, detail="CSV Download fehlgeschlagen.") + + output_pdf_name = f"Geschwisterliste_{job_id}.pdf" + output_pdf_path = os.path.join(temp_dir, output_pdf_name) + + from siblings_logic import generate_siblings_pdf_from_csv + generate_siblings_pdf_from_csv( + csv_path=csv_file, + institution=institution, + calendly_events=calendly_events, + list_type=account_type, + output_path=output_pdf_path + ) + + final_storage = os.path.join("/tmp", output_pdf_name) + shutil.copy(output_pdf_path, final_storage) + return FileResponse(path=final_storage, filename=output_pdf_name, media_type="application/pdf") + + except HTTPException as he: + raise he + except Exception as e: + logger.exception("Error generating siblings list") + raise HTTPException(status_code=500, detail=str(e)) + finally: + if driver: driver.quit() + diff --git a/fotograf-de-scraper/backend/siblings_logic.py b/fotograf-de-scraper/backend/siblings_logic.py new file mode 100644 index 000000000..3ed7a09da --- /dev/null +++ b/fotograf-de-scraper/backend/siblings_logic.py @@ -0,0 +1,125 @@ +import pandas as pd +import os +import logging +from jinja2 import Environment, FileSystemLoader +from collections import defaultdict +from main import get_berlin_now_str, get_logo_base64 +from weasyprint import HTML + +logger = logging.getLogger("fotograf-scraper") + +def generate_siblings_pdf_from_csv(csv_path: str, institution: str, calendly_events: list, list_type: str, output_path: str): + logger.info(f"Generating Siblings PDF for {institution} from {csv_path}") + df = None + for sep in [";", ","]: + try: + test_df = pd.read_csv(csv_path, sep=sep, encoding="utf-8-sig", nrows=5) + if len(test_df.columns) > 1: + df = pd.read_csv(csv_path, sep=sep, encoding="utf-8-sig") + break + except Exception as e: + continue + + if df is None: + try: + df = pd.read_csv(csv_path, sep=";", encoding="latin1") + except: + raise Exception("CSV konnte nicht gelesen werden.") + + df.columns = df.columns.str.strip().str.replace("\"", "") + + # Identify Email Column + email_col = next((c for c in df.columns if "email" in c.lower()), None) + if not email_col: + email_col = next((c for c in df.columns if "e-mail" in c.lower()), None) + + if not email_col: + logger.warning("No email column found. Siblings logic cannot run.") + families = [] + else: + # Columns mappings + group_col = next((c for c in df.columns if c.lower() in ["gruppe", "klasse", "group", "class"]), None) + lastname_col = next((c for c in df.columns if "nachname" in c.lower()), None) + firstname_col = next((c for c in df.columns if "vorname" in c.lower()), None) + wunsch_col = next((c for c in df.columns if "familie" in c.lower() or "geschwister" in c.lower() and "fotos" in c.lower()), None) + if not wunsch_col: + wunsch_col = next((c for c in df.columns if "familie / geschwister" in c.lower()), None) + + # Build Calendly Dictionary for fast lookup (Email -> Time) + from zoneinfo import ZoneInfo + import datetime + calendly_map = {} + now_berlin = datetime.datetime.now(ZoneInfo("Europe/Berlin")) + midnight_today = now_berlin.replace(hour=0, minute=0, second=0, microsecond=0) + + for event in calendly_events: + try: + start_dt = datetime.datetime.fromisoformat(event['start_time'].replace('Z', '+00:00')) + start_dt = start_dt.astimezone(ZoneInfo("Europe/Berlin")) + + # Allow all events for siblings logic, regardless of date, just to be sure we match them + calendly_map[event['invitee_email'].lower().strip()] = start_dt.strftime("%d.%m. %H:%M") + except: + pass + + families_dict = defaultdict(list) + df = df.fillna("") + + # Group by email + for _, row in df.iterrows(): + email = str(row[email_col]).strip().lower() + if email and "@" in email: + families_dict[email].append(row) + + families = [] + for email, rows in families_dict.items(): + if len(rows) > 1: # SIBLINGS DETECTED + family_last_name = str(rows[0][lastname_col]).strip() if lastname_col else "Unbekannt" + + children = [] + for r in rows: + child_first = str(r[firstname_col]).strip() if firstname_col else "" + child_group = str(r[group_col]).strip() if group_col else "" + children.append({"vorname": child_first, "gruppe": child_group}) + + # Check fotograf wunsch + fotograf_wunsch = False + if wunsch_col: + for r in rows: + val = str(r[wunsch_col]).lower() + if "ja" in val or "familien" in val or "geschwister" in val: + fotograf_wunsch = True + break + + calendly_time = calendly_map.get(email, None) + + families.append({ + "nachname": family_last_name, + "children": children, + "fotograf_wunsch": fotograf_wunsch, + "calendly_time": calendly_time + }) + + # Sort by last name + families.sort(key=lambda x: x["nachname"]) + + template_dir = os.path.join(os.path.dirname(__file__), "templates") + env = Environment(loader=FileSystemLoader(template_dir)) + template = env.get_template("siblings_list.html") + + current_time = get_berlin_now_str() + logo_base64 = get_logo_base64() + + render_context = { + "institution": institution, + "current_time": current_time, + "logo_base64": logo_base64, + "families": families + } + + html_out = template.render(render_context) + pdf = HTML(string=html_out).write_pdf() + + with open(output_path, "wb") as f: + f.write(pdf) + logger.info(f"Siblings PDF saved to {output_path}") diff --git a/fotograf-de-scraper/backend/templates/siblings_list.html b/fotograf-de-scraper/backend/templates/siblings_list.html new file mode 100644 index 000000000..bf2100209 --- /dev/null +++ b/fotograf-de-scraper/backend/templates/siblings_list.html @@ -0,0 +1,90 @@ + + +
+ + + + +| Nachname | +Kinder in der Einrichtung (Gruppe) | +Wunsch Online | +Termin (Calendly) | +Erledigt | +
|---|---|---|---|---|
| {{ family.nachname }} | +
+ {% for child in family.children %}
+
+ {{ child.vorname }} ({{ child.gruppe }})
+
+ {% endfor %}
+ |
+ + {% if family.fotograf_wunsch %} + Ja + {% else %} + - + {% endif %} + | ++ {% if family.calendly_time %} + {{ family.calendly_time }} + {% else %} + - + {% endif %} + | ++ + | +
| Keine internen Geschwisterkinder in dieser Einrichtung gefunden. | +||||
|
+ |
+
+ Kinderfotos Erding | www.kinderfotos-erding.de +Gartenstr. 10 | 85445 Oberding | 08122-8470867 + |
+
Sucht nach Geschwisterkindern in der Einrichtung und gleicht diese mit Calendly ab.
+