diff --git a/fotograf-de-scraper/backend/main.py b/fotograf-de-scraper/backend/main.py index b9b05dcbe..abee573c1 100644 --- a/fotograf-de-scraper/backend/main.py +++ b/fotograf-de-scraper/backend/main.py @@ -76,6 +76,9 @@ SELECTORS = { "person_all_photos": ".//div[@data-key]", "person_purchased_photos": ".//div[@data-key and .//img[@alt='Bestellungen mit diesem Foto']]", "person_access_card_photo": ".//div[@data-key and contains(@class, 'opacity-50')]", + "potential_buyer_link": "//a[contains(@href, '/config_customers/view_customer')]", + "quick_login_url": "//a[@id='quick-login-url']", + "buyer_email": "//span[contains(., '@')]", } # --- PDF Generation Logic --- @@ -538,6 +541,162 @@ def process_statistics(task_id: str, job_id: str, account_type: str): logger.debug(f"Task {task_id}: Closing driver.") driver.quit() +def process_reminder_analysis(task_id: str, job_id: str, account_type: str): + logger.info(f"Task {task_id}: Starting reminder analysis for job {job_id}") + task_store[task_id] = {"status": "running", "progress": "Initialisiere Browser...", "result": None} + + username = os.getenv(f"{account_type.upper()}_USER") + password = os.getenv(f"{account_type.upper()}_PW") + driver = None + + try: + driver = setup_driver() + if not driver or not login(driver, username, password): + task_store[task_id] = {"status": "error", "progress": "Login fehlgeschlagen."} + return + + wait = WebDriverWait(driver, 15) + + # 1. Navigate to albums overview + albums_overview_url = f"https://app.fotograf.de/config_jobs_photos/index/{job_id}" + task_store[task_id]["progress"] = "Lade Alben-Übersicht..." + driver.get(albums_overview_url) + + albums_to_visit = [] + try: + album_rows = wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["album_overview_rows"]))) + for row in album_rows: + try: + album_link = row.find_element(By.XPATH, SELECTORS["album_overview_link"]) + albums_to_visit.append({"name": album_link.text, "url": album_link.get_attribute('href')}) + except NoSuchElementException: + continue + except TimeoutException: + task_store[task_id] = {"status": "error", "progress": "Konnte die Album-Liste nicht finden."} + return + + raw_results = [] + total_albums = len(albums_to_visit) + + for index, album in enumerate(albums_to_visit): + album_name = album['name'] + task_store[task_id]["progress"] = f"Album {index+1}/{total_albums}: '{album_name}'..." + driver.get(album['url']) + + try: + total_codes_text = wait.until(EC.visibility_of_element_located((By.XPATH, SELECTORS["access_code_count"]))).text + num_pages = math.ceil(int(total_codes_text) / 20) + + for page_num in range(1, num_pages + 1): + task_store[task_id]["progress"] = f"Album {index+1}/{total_albums}: '{album_name}' (Seite {page_num}/{num_pages})..." + if page_num > 1: + driver.get(album['url'] + f"?page_guest_accesses={page_num}") + + person_rows = wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["person_rows"]))) + num_persons = len(person_rows) + + for i in range(num_persons): + # Re-locate rows to avoid stale element reference + person_rows = wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["person_rows"]))) + person_row = person_rows[i] + + login_count_text = person_row.find_element(By.XPATH, ".//span[text()='Logins']/following-sibling::strong").text + + # Only interested in people with 0 or 1 logins (potential reminders) + # Actually, if they haven't bought yet, they might need a reminder regardless of logins, + # but the legacy logic uses login_count <= 1. + # Let's stick to the legacy logic for now. + if int(login_count_text) <= 1: + vorname = person_row.find_element(By.XPATH, ".//span[text()='Vorname']/following-sibling::strong").text + + try: + photo_container = person_row.find_element(By.XPATH, "./following-sibling::div[1]") + purchase_icons = photo_container.find_elements(By.XPATH, ".//img[@alt='Bestellungen mit diesem Foto']") + if len(purchase_icons) > 0: + continue + except NoSuchElementException: + pass + + # Potential candidate + access_code_page_url = person_row.find_element(By.XPATH, ".//a[contains(@data-qa-id, 'guest-access-banner-access-code')]").get_attribute('href') + + # Open in new tab or navigate back and forth? + # Scraper.py navigates back and forth. + driver.get(access_code_page_url) + + try: + wait.until(EC.visibility_of_element_located((By.XPATH, "//a[@id='quick-login-url']"))) + quick_login_url = driver.find_element(By.XPATH, "//a[@id='quick-login-url']").get_attribute('href') + potential_buyer_element = driver.find_element(By.XPATH, "//a[contains(@href, '/config_customers/view_customer')]") + buyer_name = potential_buyer_element.text + + potential_buyer_element.click() + email = wait.until(EC.visibility_of_element_located((By.XPATH, "//span[contains(., '@')]"))).text + + raw_results.append({ + "child_name": vorname, + "buyer_name": buyer_name, + "email": email, + "quick_login": quick_login_url + }) + except Exception as e: + logger.warning(f"Error getting details for {vorname}: {e}") + + # Go back to the album page + driver.get(album['url'] + (f"?page_guest_accesses={page_num}" if page_num > 1 else "")) + wait.until(EC.presence_of_element_located((By.XPATH, SELECTORS["person_rows"]))) + + except Exception as e: + logger.error(f"Fehler bei Album '{album_name}': {e}") + continue + + # Aggregate Results + task_store[task_id]["progress"] = "Aggregiere Ergebnisse..." + aggregated_data = {} + for res in raw_results: + email = res['email'] + child_name = "Familienbilder" if res['child_name'] == "Familie" else res['child_name'] + html_link = f'Fotos von {child_name}' + + if email not in aggregated_data: + aggregated_data[email] = { + 'buyer_first_name': res['buyer_name'].split(' ')[0], + 'email': email, + 'children': [child_name], + 'links': [html_link] + } + else: + if child_name not in aggregated_data[email]['children']: + aggregated_data[email]['children'].append(child_name) + aggregated_data[email]['links'].append(html_link) + + final_list = [] + for email, data in aggregated_data.items(): + names = data['children'] + if len(names) > 2: + names_str = ', '.join(names[:-1]) + ' und ' + names[-1] + else: + names_str = ' und '.join(names) + + final_list.append({ + 'Name Käufer': data['buyer_first_name'], + 'E-Mail-Adresse Käufer': email, + 'Kindernamen': names_str, + 'LinksHTML': '

'.join(data['links']) + }) + + task_store[task_id] = { + "status": "completed", + "progress": "Analyse abgeschlossen!", + "result": final_list + } + + except Exception as e: + logger.exception(f"Error in task {task_id}") + task_store[task_id] = {"status": "error", "progress": f"Fehler: {str(e)}"} + finally: + if driver: driver.quit() + from fastapi import FastAPI, HTTPException, Depends, BackgroundTasks, UploadFile, File, Form from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import FileResponse, JSONResponse @@ -756,6 +915,31 @@ async def start_statistics(job_id: str, account_type: str, background_tasks: Bac background_tasks.add_task(process_statistics, task_id, job_id, account_type) return {"task_id": task_id} +@app.post("/api/jobs/{job_id}/reminder-analysis") +async def start_reminder_analysis(job_id: str, account_type: str, background_tasks: BackgroundTasks): + logger.info(f"API Request: Start reminder analysis for job {job_id} ({account_type})") + task_id = str(uuid.uuid4()) + background_tasks.add_task(process_reminder_analysis, task_id, job_id, account_type) + return {"task_id": task_id} + +@app.get("/api/tasks/{task_id}/download-csv") +async def download_task_csv(task_id: str): + if task_id not in task_store or task_store[task_id]["status"] != "completed": + raise HTTPException(status_code=404, detail="Ergebnis nicht gefunden oder Task noch nicht abgeschlossen.") + + result = task_store[task_id]["result"] + if not result or not isinstance(result, list): + raise HTTPException(status_code=400, detail="Keine Daten zum Exportieren vorhanden.") + + try: + df = pd.DataFrame(result) + temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv") + df.to_csv(temp_file.name, index=False, encoding='utf-8-sig') + return FileResponse(path=temp_file.name, filename=f"Supermailer_Liste_{task_id[:8]}.csv", media_type="text/csv") + except Exception as e: + logger.error(f"Export error: {e}") + raise HTTPException(status_code=500, detail="CSV Export fehlgeschlagen.") + @app.get("/api/jobs/{job_id}/generate-pdf") async def generate_pdf(job_id: str, account_type: str): logger.info(f"API Request: Generate PDF for job {job_id} ({account_type})") diff --git a/fotograf-de-scraper/frontend/src/App.tsx b/fotograf-de-scraper/frontend/src/App.tsx index 579365ce7..db8d3222b 100644 --- a/fotograf-de-scraper/frontend/src/App.tsx +++ b/fotograf-de-scraper/frontend/src/App.tsx @@ -38,6 +38,9 @@ function App() { const [eventTypes, setEventTypes] = useState([]); const [selectedEventType, setSelectedEventType] = useState(""); const [isListGenerating, setIsListGenerating] = useState(false); + const [reminderTaskId, setReminderTaskId] = useState(null); + const [reminderProgress, setReminderProgress] = useState(''); + const [isReminderRunning, setIsReminderRunning] = useState(false); const API_BASE_URL = import.meta.env.VITE_API_BASE_URL || 'http://192.168.178.6:8002'; @@ -138,6 +141,32 @@ function App() { }; }, [statsTaskId, isStatsRunning]); + useEffect(() => { + let interval: any; + if (reminderTaskId && isReminderRunning) { + interval = setInterval(async () => { + try { + const res = await fetch(`${API_BASE_URL}/api/tasks/${reminderTaskId}`); + if (!res.ok) throw new Error('Task Status Request failed'); + const data = await res.json(); + setReminderProgress(data.progress || 'Verarbeite...'); + if (data.status === 'completed') { + setIsReminderRunning(false); + // Auto-trigger download or show button? The user wants a CSV. + // Let's keep the task ID so we can show a download button. + } else if (data.status === 'error') { + setError(data.progress || 'Ein Fehler ist aufgetreten.'); + setIsReminderRunning(false); + setReminderTaskId(null); + } + } catch (err: any) { + console.error("Polling Error:", err); + } + }, 1000); + } + return () => { if (interval) clearInterval(interval); }; + }, [reminderTaskId, isReminderRunning]); + const handleGeneratePdf = async (job: Job) => { setProcessingJobId(job.id); setError(null); @@ -237,6 +266,32 @@ function App() { setIsListGenerating(false); } }; + + const handleStartReminderAnalysis = async (job: Job) => { + setIsReminderRunning(true); + setReminderProgress('Starte Analyse...'); + setError(null); + + try { + const response = await fetch(`${API_BASE_URL}/api/jobs/${job.id}/reminder-analysis?account_type=${activeTab}`, { + method: 'POST' + }); + if (!response.ok) throw new Error('Konnte Analyse nicht starten.'); + const data = await response.json(); + setReminderTaskId(data.task_id); + } catch (err: any) { + setError(err.message); + setIsReminderRunning(false); + } + }; + + const handleDownloadReminderCsv = async (taskId: string) => { + try { + window.open(`${API_BASE_URL}/api/tasks/${taskId}/download-csv`, '_blank'); + } catch (err: any) { + setError("Download fehlgeschlagen."); + } + }; const currentJobs = jobsCache[activeTab]; return ( @@ -521,20 +576,43 @@ function App() { -{/* Tool 3: Follow-up Emails */} + + {/* Tool 3: Follow-up Emails */}
✉️
- Demnächst + Aktiv
Nachfass-Mails (Supermailer)

Analysiert das Kaufverhalten und generiert eine fertige CSV-Liste für den Supermailer.

- + + {isReminderRunning ? ( +
+
+ + Analyse läuft... +
+

{reminderProgress}

+
+ ) : reminderTaskId ? ( + + ) : ( + + )}
- - {/* Tool 4: Statistics */} +{/* Tool 4: Statistics */}
📊