fix(competitor-analysis): final migration fixes and documentation updates

2026-05-15 19:08:54 +00:00
parent efdd134556
commit 84d045421a
5 changed files with 157 additions and 103 deletions
--- a/fotograf-de-scraper/backend/main.py
+++ b/fotograf-de-scraper/backend/main.py
@@ -583,8 +583,10 @@ def process_reminder_analysis(task_id: str, job_id: str, account_type: str, max_

        # 3. Aggregate results by Email
        aggregation = {}
-        missing_links_count = 0
        
+        base_url = "https://kinderfoto-erding.fotograf.de" if account_type == "kiga" else "https://kinderfotos-erding.fotograf.de"
+        login_url = f"{base_url}/login"
+
        for c in candidates:
            email = c.email_eltern.lower()
            
@@ -592,12 +594,6 @@ def process_reminder_analysis(task_id: str, job_id: str, account_type: str, max_
            if exclude_purchased_emails and email in purchased_emails:
                continue
                
-            # STRICT LINK CHECK: If we don't have a scraped Quick Login URL, skip this child.
-            # We don't want to send broken /login/access/ links.
-            if not c.quick_login_url:
-                missing_links_count += 1
-                continue
-
            if email not in aggregation:
                aggregation[email] = {
                    "email": email,
@@ -612,8 +608,8 @@ def process_reminder_analysis(task_id: str, job_id: str, account_type: str, max_
            if child_label and child_label not in aggregation[email]["children"]:
                aggregation[email]["children"].append(child_label)
            
-            # Add Quick Login Link (Guaranteed to exist here)
-            html_link = f'<a href="{c.quick_login_url}">Fotos von {child_label}</a>'
+            # Add Zugangscode and Login Link
+            html_link = f'Fotos von {child_label}: Code <b>{c.zugangscode}</b> (<a href="{login_url}">Hier einloggen</a>)'
            if html_link not in aggregation[email]["links"]:
                aggregation[email]["links"].append(html_link)

@@ -632,8 +628,6 @@ def process_reminder_analysis(task_id: str, job_id: str, account_type: str, max_
            })

        progress_msg = f"Analyse fertig! {len(final_result)} Empfänger identifiziert."
-        if missing_links_count > 0:
-            progress_msg += f" (Hinweis: {missing_links_count} Kinder ignoriert, da Quick-Login-Link fehlt. Bitte vorher 'Daten abgleichen' drücken!)"

        task_store[task_id] = {
            "status": "completed", 
@@ -1169,97 +1163,6 @@ def sync_participants(job_id: str, account_type: str, db: Session, task_id: str
            except Exception as order_err:
                logger.error(f"Failed to scrape orders for price magic: {order_err}")

-            # --- PHASE 3: Link Magic (Scrape Quick Login URLs) ---
-            try:
-                # Find candidates for reminders who don't have a link yet
-                # We prioritize those with few logins and no orders
-                link_candidates = db.query(JobParticipant).filter(
-                    JobParticipant.job_id == job_id,
-                    JobParticipant.has_orders == 0,
-                    JobParticipant.logins <= 5,
-                    JobParticipant.quick_login_url == None
-                ).all()
-                
-                if link_candidates:
-                    if task_id: task_store[task_id]["progress"] = f"Sammle Login-Links für {len(link_candidates)} Personen (Link Magic)..."
-                    logger.info(f"Link Magic: Identified {len(link_candidates)} candidates for link scraping.")
-                    
-                    # Navigate back to Persons tab
-                    albums_overview_url = f"https://app.fotograf.de/config_jobs_photos/index/{job_id}"
-                    logger.info(f"Navigating to Albums overview: {albums_overview_url}")
-                    driver.get(albums_overview_url)
-                    
-                    # Find all album links
-                    album_elements = wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["album_overview_link"])))
-                    albums = [{"name": e.text, "url": e.get_attribute("href")} for e in album_elements]
-                    
-                    codes_to_find = {c.zugangscode: c for c in link_candidates}
-                    links_found = 0
-                    
-                    for album in albums:
-                        if not codes_to_find: break
-                        logger.info(f"Searching for links in album: {album['name']}")
-                        driver.get(album['url'])
-                        
-                        try:
-                            total_codes_text = wait.until(EC.visibility_of_element_located((By.XPATH, SELECTORS["access_code_count"]))).text
-                            num_pages = math.ceil(int(total_codes_text) / 20)
-                            
-                            for page_num in range(1, num_pages + 1):
-                                if not codes_to_find: break
-                                if page_num > 1:
-                                    driver.get(album['url'] + f"?page_guest_accesses={page_num}")
-                                
-                                person_rows = wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["person_rows"])))
-                                
-                                # Map of codes on this page to their communication link
-                                page_links = {}
-                                for row in person_rows:
-                                    row_text = row.text
-                                    for code in list(codes_to_find.keys()):
-                                        if code in row_text:
-                                            try:
-                                                comm_link = row.find_element(By.XPATH, SELECTORS["person_access_code_link"]).get_attribute("href")
-                                                page_links[code] = comm_link
-                                            except: pass
-                                
-                                # Now visit each communication page
-                                for code, comm_link in page_links.items():
-                                    if code not in codes_to_find: continue
-                                    logger.debug(f"Scraping link for code {code}...")
-                                    if task_id: task_store[task_id]["progress"] = f"Hole Link {links_found+1} / {len(link_candidates)}..."
-                                    driver.get(comm_link)
-                                    for attempt in range(3):
-                                        try:
-                                            wait_short = WebDriverWait(driver, 5)
-                                            quick_link_el = wait_short.until(EC.visibility_of_element_located((By.XPATH, SELECTORS["quick_login_url"])))
-                                            quick_link = quick_link_el.get_attribute("href")
-                                            
-                                            # Update DB
-                                            codes_to_find[code].quick_login_url = quick_link
-                                            del codes_to_find[code]
-                                            links_found += 1
-                                            
-                                            if links_found % 5 == 0: db.commit()
-                                            break
-                                        except Exception as e:
-                                            time.sleep(1)
-                                    else:
-                                        logger.warning(f"Could not find quick login link for {code}")
-                                
-                                # Go back to album page if we visited communication pages
-                                if page_links:
-                                    driver.get(album['url'] + (f"?page_guest_accesses={page_num}" if page_num > 1 else ""))
-                                    wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["person_rows"])))
-                                    
-                        except Exception as album_err:
-                            logger.error(f"Error in album {album['name']}: {album_err}")
-                    
-                    db.commit()
-                    logger.info(f"Link Magic complete: Scraped {links_found} links.")
-            except Exception as link_err:
-                logger.error(f"Failed to scrape links: {link_err}")
-
            return len(df)

        finally:
--- a/fotograf-de-scraper/frontend/src/App.tsx
+++ b/fotograf-de-scraper/frontend/src/App.tsx
@@ -151,7 +151,7 @@ function App() {
  const [reminderResult, setReminderResult] = useState<any[] | null>(null);
  const [reminderHistory, setReminderHistory] = useState<any[] | null>(null);
  const [emailSubject, setEmailSubject] = useState("Fotos von {Kindernamen}");
-  const [emailBody, setEmailBody] = useState("Hallo {Name Käufer},<br><br>deine Fotos sind fertig und warten auf dich! Klicke einfach auf die Links unten, um direkt zu den Galerien zu gelangen:<br><br>{LinksHTML}<br><br>Viel Spaß beim Anschauen!");
+  const [emailBody, setEmailBody] = useState("Hallo {Name Käufer},<br><br>deine Fotos sind fertig und warten auf dich! Kopiere einfach deinen Zugangscode und klicke auf den Link zum Shop, um dich einzuloggen:<br><br>{LinksHTML}<br><br>Viel Spaß beim Anschauen!");
  const [isSendingEmails, setIsSendingEmails] = useState(false);
  const [emailSendStatus, setEmailSendStatus] = useState<string | null>(null);
  const [reminderTab, setReminderTab] = useState<'config' | 'preview' | 'history'>('config');
--- a/patch_frontend.py
+++ b/patch_frontend.py
@@ -0,0 +1,15 @@
+import sys
+
+with open('fotograf-de-scraper/frontend/src/App.tsx', 'r') as f:
+    content = f.read()
+
+old_body = """  const [emailBody, setEmailBody] = useState("Hallo {Name Käufer},<br><br>deine Fotos sind fertig und warten auf dich! Klicke einfach auf die Links unten, um direkt zu den Galerien zu gelangen:<br><br>{LinksHTML}<br><br>Viel Spaß beim Anschauen!");"""
+new_body = """  const [emailBody, setEmailBody] = useState("Hallo {Name Käufer},<br><br>deine Fotos sind fertig und warten auf dich! Kopiere einfach deinen Zugangscode und klicke auf den Link zum Shop, um dich einzuloggen:<br><br>{LinksHTML}<br><br>Viel Spaß beim Anschauen!");"""
+
+if old_body in content:
+    content = content.replace(old_body, new_body)
+    with open('fotograf-de-scraper/frontend/src/App.tsx', 'w') as f:
+        f.write(content)
+    print("Frontend patched")
+else:
+    print("Frontend code not found")
--- a/patch_linkmagic.py
+++ b/patch_linkmagic.py
@@ -0,0 +1,18 @@
+import sys
+
+with open('fotograf-de-scraper/backend/main.py', 'r') as f:
+    content = f.read()
+
+import re
+
+# Find the start of Link Magic
+start = content.find('# --- PHASE 3: Link Magic (Scrape Quick Login URLs) ---')
+end = content.find('return len(df)')
+
+if start != -1 and end != -1:
+    new_content = content[:start] + content[end:]
+    with open('fotograf-de-scraper/backend/main.py', 'w') as f:
+        f.write(new_content)
+    print("Link magic removed successfully")
+else:
+    print("Could not find boundaries")
--- a/patch_reminder.py
+++ b/patch_reminder.py
@@ -0,0 +1,118 @@
+import sys
+
+with open('fotograf-de-scraper/backend/main.py', 'r') as f:
+    content = f.read()
+
+old_code = """        # 3. Aggregate results by Email
+        aggregation = {}
+        missing_links_count = 0
+        
+        for c in candidates:
+            email = c.email_eltern.lower()
+            
+            # Skip if this email already has a purchase for ANOTHER child
+            if exclude_purchased_emails and email in purchased_emails:
+                continue
+                
+            # STRICT LINK CHECK: If we don't have a scraped Quick Login URL, skip this child.
+            # We don't want to send broken /login/access/ links.
+            if not c.quick_login_url:
+                missing_links_count += 1
+                continue
+
+            if email not in aggregation:
+                aggregation[email] = {
+                    "email": email,
+                    "parent_name": c.vorname_eltern if c.vorname_eltern else "Liebe Eltern",
+                    "children": [],
+                    "links": []
+                }
+            
+            # Add child name
+            child_name = c.vorname_kind or ""
+            child_label = "Familienbilder" if child_name.lower() == "familie" else child_name
+            if child_label and child_label not in aggregation[email]["children"]:
+                aggregation[email]["children"].append(child_label)
+            
+            # Add Quick Login Link (Guaranteed to exist here)
+            html_link = f'<a href="{c.quick_login_url}">Fotos von {child_label}</a>'
+            if html_link not in aggregation[email]["links"]:
+                aggregation[email]["links"].append(html_link)
+
+        # 4. Format for Supermailer/Gmail
+        final_result = []
+        for email, data in aggregation.items():
+            children_str = " und ".join(data["children"]) if len(data["children"]) > 1 else (data["children"][0] if data["children"] else "Eurem Kind")
+            links_html = "".join([f"{l}<br>" for l in data["links"]])
+            
+            final_result.append({
+                "E-Mail-Adresse Käufer": email,
+                "Name Käufer": data["parent_name"],
+                "Kindernamen": children_str,
+                "Anzahl Kinder": len(data["children"]),
+                "LinksHTML": links_html
+            })
+
+        progress_msg = f"Analyse fertig! {len(final_result)} Empfänger identifiziert."
+        if missing_links_count > 0:
+            progress_msg += f" (Hinweis: {missing_links_count} Kinder ignoriert, da Quick-Login-Link fehlt. Bitte vorher 'Daten abgleichen' drücken!)"
+
+        task_store[task_id] = {"""
+
+new_code = """        # 3. Aggregate results by Email
+        aggregation = {}
+        
+        base_url = "https://kinderfoto-erding.fotograf.de" if account_type == "kiga" else "https://kinderfotos-erding.fotograf.de"
+        login_url = f"{base_url}/login"
+
+        for c in candidates:
+            email = c.email_eltern.lower()
+            
+            # Skip if this email already has a purchase for ANOTHER child
+            if exclude_purchased_emails and email in purchased_emails:
+                continue
+                
+            if email not in aggregation:
+                aggregation[email] = {
+                    "email": email,
+                    "parent_name": c.vorname_eltern if c.vorname_eltern else "Liebe Eltern",
+                    "children": [],
+                    "links": []
+                }
+            
+            # Add child name
+            child_name = c.vorname_kind or ""
+            child_label = "Familienbilder" if child_name.lower() == "familie" else child_name
+            if child_label and child_label not in aggregation[email]["children"]:
+                aggregation[email]["children"].append(child_label)
+            
+            # Add Zugangscode and Login Link
+            html_link = f'Fotos von {child_label}: Code <b>{c.zugangscode}</b> (<a href="{login_url}">Hier einloggen</a>)'
+            if html_link not in aggregation[email]["links"]:
+                aggregation[email]["links"].append(html_link)
+
+        # 4. Format for Supermailer/Gmail
+        final_result = []
+        for email, data in aggregation.items():
+            children_str = " und ".join(data["children"]) if len(data["children"]) > 1 else (data["children"][0] if data["children"] else "Eurem Kind")
+            links_html = "".join([f"{l}<br>" for l in data["links"]])
+            
+            final_result.append({
+                "E-Mail-Adresse Käufer": email,
+                "Name Käufer": data["parent_name"],
+                "Kindernamen": children_str,
+                "Anzahl Kinder": len(data["children"]),
+                "LinksHTML": links_html
+            })
+
+        progress_msg = f"Analyse fertig! {len(final_result)} Empfänger identifiziert."
+
+        task_store[task_id] = {"""
+
+if old_code in content:
+    content = content.replace(old_code, new_code)
+    with open('fotograf-de-scraper/backend/main.py', 'w') as f:
+        f.write(content)
+    print("Patched successfully")
+else:
+    print("Old code not found")