fix(competitor-analysis): final migration fixes and documentation updates

This commit is contained in:
2026-05-15 19:08:54 +00:00
parent efdd134556
commit 84d045421a
5 changed files with 157 additions and 103 deletions

View File

@@ -583,8 +583,10 @@ def process_reminder_analysis(task_id: str, job_id: str, account_type: str, max_
# 3. Aggregate results by Email
aggregation = {}
missing_links_count = 0
base_url = "https://kinderfoto-erding.fotograf.de" if account_type == "kiga" else "https://kinderfotos-erding.fotograf.de"
login_url = f"{base_url}/login"
for c in candidates:
email = c.email_eltern.lower()
@@ -592,12 +594,6 @@ def process_reminder_analysis(task_id: str, job_id: str, account_type: str, max_
if exclude_purchased_emails and email in purchased_emails:
continue
# STRICT LINK CHECK: If we don't have a scraped Quick Login URL, skip this child.
# We don't want to send broken /login/access/ links.
if not c.quick_login_url:
missing_links_count += 1
continue
if email not in aggregation:
aggregation[email] = {
"email": email,
@@ -612,8 +608,8 @@ def process_reminder_analysis(task_id: str, job_id: str, account_type: str, max_
if child_label and child_label not in aggregation[email]["children"]:
aggregation[email]["children"].append(child_label)
# Add Quick Login Link (Guaranteed to exist here)
html_link = f'<a href="{c.quick_login_url}">Fotos von {child_label}</a>'
# Add Zugangscode and Login Link
html_link = f'Fotos von {child_label}: Code <b>{c.zugangscode}</b> (<a href="{login_url}">Hier einloggen</a>)'
if html_link not in aggregation[email]["links"]:
aggregation[email]["links"].append(html_link)
@@ -632,8 +628,6 @@ def process_reminder_analysis(task_id: str, job_id: str, account_type: str, max_
})
progress_msg = f"Analyse fertig! {len(final_result)} Empfänger identifiziert."
if missing_links_count > 0:
progress_msg += f" (Hinweis: {missing_links_count} Kinder ignoriert, da Quick-Login-Link fehlt. Bitte vorher 'Daten abgleichen' drücken!)"
task_store[task_id] = {
"status": "completed",
@@ -1169,97 +1163,6 @@ def sync_participants(job_id: str, account_type: str, db: Session, task_id: str
except Exception as order_err:
logger.error(f"Failed to scrape orders for price magic: {order_err}")
# --- PHASE 3: Link Magic (Scrape Quick Login URLs) ---
try:
# Find candidates for reminders who don't have a link yet
# We prioritize those with few logins and no orders
link_candidates = db.query(JobParticipant).filter(
JobParticipant.job_id == job_id,
JobParticipant.has_orders == 0,
JobParticipant.logins <= 5,
JobParticipant.quick_login_url == None
).all()
if link_candidates:
if task_id: task_store[task_id]["progress"] = f"Sammle Login-Links für {len(link_candidates)} Personen (Link Magic)..."
logger.info(f"Link Magic: Identified {len(link_candidates)} candidates for link scraping.")
# Navigate back to Persons tab
albums_overview_url = f"https://app.fotograf.de/config_jobs_photos/index/{job_id}"
logger.info(f"Navigating to Albums overview: {albums_overview_url}")
driver.get(albums_overview_url)
# Find all album links
album_elements = wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["album_overview_link"])))
albums = [{"name": e.text, "url": e.get_attribute("href")} for e in album_elements]
codes_to_find = {c.zugangscode: c for c in link_candidates}
links_found = 0
for album in albums:
if not codes_to_find: break
logger.info(f"Searching for links in album: {album['name']}")
driver.get(album['url'])
try:
total_codes_text = wait.until(EC.visibility_of_element_located((By.XPATH, SELECTORS["access_code_count"]))).text
num_pages = math.ceil(int(total_codes_text) / 20)
for page_num in range(1, num_pages + 1):
if not codes_to_find: break
if page_num > 1:
driver.get(album['url'] + f"?page_guest_accesses={page_num}")
person_rows = wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["person_rows"])))
# Map of codes on this page to their communication link
page_links = {}
for row in person_rows:
row_text = row.text
for code in list(codes_to_find.keys()):
if code in row_text:
try:
comm_link = row.find_element(By.XPATH, SELECTORS["person_access_code_link"]).get_attribute("href")
page_links[code] = comm_link
except: pass
# Now visit each communication page
for code, comm_link in page_links.items():
if code not in codes_to_find: continue
logger.debug(f"Scraping link for code {code}...")
if task_id: task_store[task_id]["progress"] = f"Hole Link {links_found+1} / {len(link_candidates)}..."
driver.get(comm_link)
for attempt in range(3):
try:
wait_short = WebDriverWait(driver, 5)
quick_link_el = wait_short.until(EC.visibility_of_element_located((By.XPATH, SELECTORS["quick_login_url"])))
quick_link = quick_link_el.get_attribute("href")
# Update DB
codes_to_find[code].quick_login_url = quick_link
del codes_to_find[code]
links_found += 1
if links_found % 5 == 0: db.commit()
break
except Exception as e:
time.sleep(1)
else:
logger.warning(f"Could not find quick login link for {code}")
# Go back to album page if we visited communication pages
if page_links:
driver.get(album['url'] + (f"?page_guest_accesses={page_num}" if page_num > 1 else ""))
wait.until(EC.presence_of_all_elements_located((By.XPATH, SELECTORS["person_rows"])))
except Exception as album_err:
logger.error(f"Error in album {album['name']}: {album_err}")
db.commit()
logger.info(f"Link Magic complete: Scraped {links_found} links.")
except Exception as link_err:
logger.error(f"Failed to scrape links: {link_err}")
return len(df)
finally: