[32788f42] Implement Feature 3: Nachfass-E-Mails (Reminder Analysis) with CSV export for Supermailer

This commit is contained in:
2026-03-21 19:31:10 +00:00
parent 539f30bdb7
commit ba8565e59a
2 changed files with 269 additions and 7 deletions

View File

@@ -76,6 +76,9 @@ SELECTORS = {
"person_all_photos": ".//div[@data-key]",
"person_purchased_photos": ".//div[@data-key and .//img[@alt='Bestellungen mit diesem Foto']]",
"person_access_card_photo": ".//div[@data-key and contains(@class, 'opacity-50')]",
"potential_buyer_link": "//a[contains(@href, '/config_customers/view_customer')]",
"quick_login_url": "//a[@id='quick-login-url']",
"buyer_email": "//span[contains(., '@')]",
}
# --- PDF Generation Logic ---
@@ -538,6 +541,162 @@ def process_statistics(task_id: str, job_id: str, account_type: str):
logger.debug(f"Task {task_id}: Closing driver.")
driver.quit()
def process_reminder_analysis(task_id: str, job_id: str, account_type: str):
logger.info(f"Task {task_id}: Starting reminder analysis for job {job_id}")
task_store[task_id] = {"status": "running", "progress": "Initialisiere Browser...", "result": None}
username = os.getenv(f"{account_type.upper()}_USER")
password = os.getenv(f"{account_type.upper()}_PW")
driver = None
try:
driver = setup_driver()
if not driver or not login(driver, username, password):
task_store[task_id] = {"status": "error", "progress": "Login fehlgeschlagen."}
return
wait = WebDriverWait(driver, 15)
# 1. Navigate to albums overview
albums_overview_url = f"https://app.fotograf.de/config_jobs_photos/index/{job_id}"
task_store[task_id]["progress"] = "Lade Alben-Übersicht..."
driver.get(albums_overview_url)
albums_to_visit = []
try:
album_rows = wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["album_overview_rows"])))
for row in album_rows:
try:
album_link = row.find_element(By.XPATH, SELECTORS["album_overview_link"])
albums_to_visit.append({"name": album_link.text, "url": album_link.get_attribute('href')})
except NoSuchElementException:
continue
except TimeoutException:
task_store[task_id] = {"status": "error", "progress": "Konnte die Album-Liste nicht finden."}
return
raw_results = []
total_albums = len(albums_to_visit)
for index, album in enumerate(albums_to_visit):
album_name = album['name']
task_store[task_id]["progress"] = f"Album {index+1}/{total_albums}: '{album_name}'..."
driver.get(album['url'])
try:
total_codes_text = wait.until(EC.visibility_of_element_located((By.XPATH, SELECTORS["access_code_count"]))).text
num_pages = math.ceil(int(total_codes_text) / 20)
for page_num in range(1, num_pages + 1):
task_store[task_id]["progress"] = f"Album {index+1}/{total_albums}: '{album_name}' (Seite {page_num}/{num_pages})..."
if page_num > 1:
driver.get(album['url'] + f"?page_guest_accesses={page_num}")
person_rows = wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["person_rows"])))
num_persons = len(person_rows)
for i in range(num_persons):
# Re-locate rows to avoid stale element reference
person_rows = wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["person_rows"])))
person_row = person_rows[i]
login_count_text = person_row.find_element(By.XPATH, ".//span[text()='Logins']/following-sibling::strong").text
# Only interested in people with 0 or 1 logins (potential reminders)
# Actually, if they haven't bought yet, they might need a reminder regardless of logins,
# but the legacy logic uses login_count <= 1.
# Let's stick to the legacy logic for now.
if int(login_count_text) <= 1:
vorname = person_row.find_element(By.XPATH, ".//span[text()='Vorname']/following-sibling::strong").text
try:
photo_container = person_row.find_element(By.XPATH, "./following-sibling::div[1]")
purchase_icons = photo_container.find_elements(By.XPATH, ".//img[@alt='Bestellungen mit diesem Foto']")
if len(purchase_icons) > 0:
continue
except NoSuchElementException:
pass
# Potential candidate
access_code_page_url = person_row.find_element(By.XPATH, ".//a[contains(@data-qa-id, 'guest-access-banner-access-code')]").get_attribute('href')
# Open in new tab or navigate back and forth?
# Scraper.py navigates back and forth.
driver.get(access_code_page_url)
try:
wait.until(EC.visibility_of_element_located((By.XPATH, "//a[@id='quick-login-url']")))
quick_login_url = driver.find_element(By.XPATH, "//a[@id='quick-login-url']").get_attribute('href')
potential_buyer_element = driver.find_element(By.XPATH, "//a[contains(@href, '/config_customers/view_customer')]")
buyer_name = potential_buyer_element.text
potential_buyer_element.click()
email = wait.until(EC.visibility_of_element_located((By.XPATH, "//span[contains(., '@')]"))).text
raw_results.append({
"child_name": vorname,
"buyer_name": buyer_name,
"email": email,
"quick_login": quick_login_url
})
except Exception as e:
logger.warning(f"Error getting details for {vorname}: {e}")
# Go back to the album page
driver.get(album['url'] + (f"?page_guest_accesses={page_num}" if page_num > 1 else ""))
wait.until(EC.presence_of_element_located((By.XPATH, SELECTORS["person_rows"])))
except Exception as e:
logger.error(f"Fehler bei Album '{album_name}': {e}")
continue
# Aggregate Results
task_store[task_id]["progress"] = "Aggregiere Ergebnisse..."
aggregated_data = {}
for res in raw_results:
email = res['email']
child_name = "Familienbilder" if res['child_name'] == "Familie" else res['child_name']
html_link = f'<a href="{res["quick_login"]}">Fotos von {child_name}</a>'
if email not in aggregated_data:
aggregated_data[email] = {
'buyer_first_name': res['buyer_name'].split(' ')[0],
'email': email,
'children': [child_name],
'links': [html_link]
}
else:
if child_name not in aggregated_data[email]['children']:
aggregated_data[email]['children'].append(child_name)
aggregated_data[email]['links'].append(html_link)
final_list = []
for email, data in aggregated_data.items():
names = data['children']
if len(names) > 2:
names_str = ', '.join(names[:-1]) + ' und ' + names[-1]
else:
names_str = ' und '.join(names)
final_list.append({
'Name Käufer': data['buyer_first_name'],
'E-Mail-Adresse Käufer': email,
'Kindernamen': names_str,
'LinksHTML': '<br><br>'.join(data['links'])
})
task_store[task_id] = {
"status": "completed",
"progress": "Analyse abgeschlossen!",
"result": final_list
}
except Exception as e:
logger.exception(f"Error in task {task_id}")
task_store[task_id] = {"status": "error", "progress": f"Fehler: {str(e)}"}
finally:
if driver: driver.quit()
from fastapi import FastAPI, HTTPException, Depends, BackgroundTasks, UploadFile, File, Form
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse, JSONResponse
@@ -756,6 +915,31 @@ async def start_statistics(job_id: str, account_type: str, background_tasks: Bac
background_tasks.add_task(process_statistics, task_id, job_id, account_type)
return {"task_id": task_id}
@app.post("/api/jobs/{job_id}/reminder-analysis")
async def start_reminder_analysis(job_id: str, account_type: str, background_tasks: BackgroundTasks):
logger.info(f"API Request: Start reminder analysis for job {job_id} ({account_type})")
task_id = str(uuid.uuid4())
background_tasks.add_task(process_reminder_analysis, task_id, job_id, account_type)
return {"task_id": task_id}
@app.get("/api/tasks/{task_id}/download-csv")
async def download_task_csv(task_id: str):
if task_id not in task_store or task_store[task_id]["status"] != "completed":
raise HTTPException(status_code=404, detail="Ergebnis nicht gefunden oder Task noch nicht abgeschlossen.")
result = task_store[task_id]["result"]
if not result or not isinstance(result, list):
raise HTTPException(status_code=400, detail="Keine Daten zum Exportieren vorhanden.")
try:
df = pd.DataFrame(result)
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
df.to_csv(temp_file.name, index=False, encoding='utf-8-sig')
return FileResponse(path=temp_file.name, filename=f"Supermailer_Liste_{task_id[:8]}.csv", media_type="text/csv")
except Exception as e:
logger.error(f"Export error: {e}")
raise HTTPException(status_code=500, detail="CSV Export fehlgeschlagen.")
@app.get("/api/jobs/{job_id}/generate-pdf")
async def generate_pdf(job_id: str, account_type: str):
logger.info(f"API Request: Generate PDF for job {job_id} ({account_type})")