fix(competitor-analysis): final migration fixes and documentation updates

2026-05-15 19:08:54 +00:00
parent efdd134556
commit 84d045421a
5 changed files with 157 additions and 103 deletions
--- a/fotograf-de-scraper/backend/main.py
+++ b/fotograf-de-scraper/backend/main.py
@@ -583,8 +583,10 @@ def process_reminder_analysis(task_id: str, job_id: str, account_type: str, max_

        # 3. Aggregate results by Email
        aggregation = {}
-        missing_links_count = 0
        
+        base_url = "https://kinderfoto-erding.fotograf.de" if account_type == "kiga" else "https://kinderfotos-erding.fotograf.de"
+        login_url = f"{base_url}/login"
+
        for c in candidates:
            email = c.email_eltern.lower()
            
@@ -592,12 +594,6 @@ def process_reminder_analysis(task_id: str, job_id: str, account_type: str, max_
            if exclude_purchased_emails and email in purchased_emails:
                continue
                
-            # STRICT LINK CHECK: If we don't have a scraped Quick Login URL, skip this child.
-            # We don't want to send broken /login/access/ links.
-            if not c.quick_login_url:
-                missing_links_count += 1
-                continue
-
            if email not in aggregation:
                aggregation[email] = {
                    "email": email,
@@ -612,8 +608,8 @@ def process_reminder_analysis(task_id: str, job_id: str, account_type: str, max_
            if child_label and child_label not in aggregation[email]["children"]:
                aggregation[email]["children"].append(child_label)
            
-            # Add Quick Login Link (Guaranteed to exist here)
-            html_link = f'<a href="{c.quick_login_url}">Fotos von {child_label}</a>'
+            # Add Zugangscode and Login Link
+            html_link = f'Fotos von {child_label}: Code <b>{c.zugangscode}</b> (<a href="{login_url}">Hier einloggen</a>)'
            if html_link not in aggregation[email]["links"]:
                aggregation[email]["links"].append(html_link)

@@ -632,8 +628,6 @@ def process_reminder_analysis(task_id: str, job_id: str, account_type: str, max_
            })

        progress_msg = f"Analyse fertig! {len(final_result)} Empfänger identifiziert."
-        if missing_links_count > 0:
-            progress_msg += f" (Hinweis: {missing_links_count} Kinder ignoriert, da Quick-Login-Link fehlt. Bitte vorher 'Daten abgleichen' drücken!)"

        task_store[task_id] = {
            "status": "completed", 
@@ -1169,97 +1163,6 @@ def sync_participants(job_id: str, account_type: str, db: Session, task_id: str
            except Exception as order_err:
                logger.error(f"Failed to scrape orders for price magic: {order_err}")

-            # --- PHASE 3: Link Magic (Scrape Quick Login URLs) ---
-            try:
-                # Find candidates for reminders who don't have a link yet
-                # We prioritize those with few logins and no orders
-                link_candidates = db.query(JobParticipant).filter(
-                    JobParticipant.job_id == job_id,
-                    JobParticipant.has_orders == 0,
-                    JobParticipant.logins <= 5,
-                    JobParticipant.quick_login_url == None
-                ).all()
-                
-                if link_candidates:
-                    if task_id: task_store[task_id]["progress"] = f"Sammle Login-Links für {len(link_candidates)} Personen (Link Magic)..."
-                    logger.info(f"Link Magic: Identified {len(link_candidates)} candidates for link scraping.")
-                    
-                    # Navigate back to Persons tab
-                    albums_overview_url = f"https://app.fotograf.de/config_jobs_photos/index/{job_id}"
-                    logger.info(f"Navigating to Albums overview: {albums_overview_url}")
-                    driver.get(albums_overview_url)
-                    
-                    # Find all album links
-                    album_elements = wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["album_overview_link"])))
-                    albums = [{"name": e.text, "url": e.get_attribute("href")} for e in album_elements]
-                    
-                    codes_to_find = {c.zugangscode: c for c in link_candidates}
-                    links_found = 0
-                    
-                    for album in albums:
-                        if not codes_to_find: break
-                        logger.info(f"Searching for links in album: {album['name']}")
-                        driver.get(album['url'])
-                        
-                        try:
-                            total_codes_text = wait.until(EC.visibility_of_element_located((By.XPATH, SELECTORS["access_code_count"]))).text
-                            num_pages = math.ceil(int(total_codes_text) / 20)
-                            
-                            for page_num in range(1, num_pages + 1):
-                                if not codes_to_find: break
-                                if page_num > 1:
-                                    driver.get(album['url'] + f"?page_guest_accesses={page_num}")
-                                
-                                person_rows = wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["person_rows"])))
-                                
-                                # Map of codes on this page to their communication link
-                                page_links = {}
-                                for row in person_rows:
-                                    row_text = row.text
-                                    for code in list(codes_to_find.keys()):
-                                        if code in row_text:
-                                            try:
-                                                comm_link = row.find_element(By.XPATH, SELECTORS["person_access_code_link"]).get_attribute("href")
-                                                page_links[code] = comm_link
-                                            except: pass
-                                
-                                # Now visit each communication page
-                                for code, comm_link in page_links.items():
-                                    if code not in codes_to_find: continue
-                                    logger.debug(f"Scraping link for code {code}...")
-                                    if task_id: task_store[task_id]["progress"] = f"Hole Link {links_found+1} / {len(link_candidates)}..."
-                                    driver.get(comm_link)
-                                    for attempt in range(3):
-                                        try:
-                                            wait_short = WebDriverWait(driver, 5)
-                                            quick_link_el = wait_short.until(EC.visibility_of_element_located((By.XPATH, SELECTORS["quick_login_url"])))
-                                            quick_link = quick_link_el.get_attribute("href")
-                                            
-                                            # Update DB
-                                            codes_to_find[code].quick_login_url = quick_link
-                                            del codes_to_find[code]
-                                            links_found += 1
-                                            
-                                            if links_found % 5 == 0: db.commit()
-                                            break
-                                        except Exception as e:
-                                            time.sleep(1)
-                                    else:
-                                        logger.warning(f"Could not find quick login link for {code}")
-                                
-                                # Go back to album page if we visited communication pages
-                                if page_links:
-                                    driver.get(album['url'] + (f"?page_guest_accesses={page_num}" if page_num > 1 else ""))
-                                    wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["person_rows"])))
-                                    
-                        except Exception as album_err:
-                            logger.error(f"Error in album {album['name']}: {album_err}")
-                    
-                    db.commit()
-                    logger.info(f"Link Magic complete: Scraped {links_found} links.")
-            except Exception as link_err:
-                logger.error(f"Failed to scrape links: {link_err}")
-
            return len(df)

        finally: