diff --git a/fotograf-de-scraper/backend/main.py b/fotograf-de-scraper/backend/main.py index 340866197..6a0a6caee 100644 --- a/fotograf-de-scraper/backend/main.py +++ b/fotograf-de-scraper/backend/main.py @@ -550,18 +550,19 @@ def process_reminder_analysis(task_id: str, job_id: str, account_type: str, max_ task_store[task_id] = {"status": "error", "progress": "Keine Daten vorhanden. Bitte erst oben auf 'Daten abgleichen' klicken."} return - # 1. Get emails that have ALREADY purchased anything (in ANY job we have in DB) + # 1. Get emails that have ALREADY purchased anything (in THIS specific job) purchased_emails = set() if exclude_purchased_emails: from sqlalchemy import or_ - # We look globally across the whole job_participants table + # We look ONLY within the CURRENT job to find siblings that were already purchased purchased_results = db.query(JobParticipant.email_eltern).filter( + JobParticipant.job_id == job_id, or_(JobParticipant.has_orders == 1, JobParticipant.digital_package_ordered == 1), JobParticipant.email_eltern != "", JobParticipant.email_eltern != None ).all() purchased_emails = {r[0].lower() for r in purchased_results} - logger.info(f"Task {task_id}: Found {len(purchased_emails)} unique emails with existing purchases in DB to exclude.") + logger.info(f"Task {task_id}: Found {len(purchased_emails)} unique emails with existing purchases in THIS job to exclude.") # 2. Query DB for potential candidates (Logins <= max_logins and No Orders) candidates = db.query(JobParticipant).filter( diff --git a/patch_filter.py b/patch_filter.py new file mode 100644 index 000000000..834fae23e --- /dev/null +++ b/patch_filter.py @@ -0,0 +1,39 @@ +import sys + +with open('fotograf-de-scraper/backend/main.py', 'r') as f: + content = f.read() + +old_code = """ # 1. Get emails that have ALREADY purchased anything (in ANY job we have in DB) + purchased_emails = set() + if exclude_purchased_emails: + from sqlalchemy import or_ + # We look globally across the whole job_participants table + purchased_results = db.query(JobParticipant.email_eltern).filter( + or_(JobParticipant.has_orders == 1, JobParticipant.digital_package_ordered == 1), + JobParticipant.email_eltern != "", + JobParticipant.email_eltern != None + ).all() + purchased_emails = {r[0].lower() for r in purchased_results} + logger.info(f"Task {task_id}: Found {len(purchased_emails)} unique emails with existing purchases in DB to exclude.")""" + +new_code = """ # 1. Get emails that have ALREADY purchased anything (in THIS specific job) + purchased_emails = set() + if exclude_purchased_emails: + from sqlalchemy import or_ + # We look ONLY within the CURRENT job to find siblings that were already purchased + purchased_results = db.query(JobParticipant.email_eltern).filter( + JobParticipant.job_id == job_id, + or_(JobParticipant.has_orders == 1, JobParticipant.digital_package_ordered == 1), + JobParticipant.email_eltern != "", + JobParticipant.email_eltern != None + ).all() + purchased_emails = {r[0].lower() for r in purchased_results} + logger.info(f"Task {task_id}: Found {len(purchased_emails)} unique emails with existing purchases in THIS job to exclude.")""" + +if old_code in content: + content = content.replace(old_code, new_code) + with open('fotograf-de-scraper/backend/main.py', 'w') as f: + f.write(content) + print("Filter logic patched successfully") +else: + print("Old code not found")