diff --git a/.dev_session/SESSION_INFO b/.dev_session/SESSION_INFO index 2c3483c5c..3cc23a7d9 100644 --- a/.dev_session/SESSION_INFO +++ b/.dev_session/SESSION_INFO @@ -1 +1 @@ -{"task_id": "34588f42-8544-8046-85d4-d7895ed9b29c", "token": "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", "readme_path": "readme.md", "session_start_time": "2026-04-18T13:58:52.409921"} \ No newline at end of file +{"task_id": "34588f42-8544-8046-85d4-d7895ed9b29c", "token": "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", "readme_path": "readme.md", "session_start_time": "2026-04-18T20:58:29.155246"} \ No newline at end of file diff --git a/fotograf-de-scraper/backend/database.py b/fotograf-de-scraper/backend/database.py index 7725de85f..8f3e4732d 100644 --- a/fotograf-de-scraper/backend/database.py +++ b/fotograf-de-scraper/backend/database.py @@ -63,6 +63,7 @@ class JobParticipant(Base): gruppe = Column(String, nullable=True) logins = Column(Integer, default=0) has_orders = Column(Integer, default=0) # 0 for false, 1 for true + digital_package_ordered = Column(Integer, default=0) # 0 for false, 1 for true last_synced = Column(DateTime, default=datetime.datetime.utcnow) Base.metadata.create_all(bind=engine) diff --git a/fotograf-de-scraper/backend/inspect_orders.py b/fotograf-de-scraper/backend/inspect_orders.py new file mode 100644 index 000000000..ca86ad352 --- /dev/null +++ b/fotograf-de-scraper/backend/inspect_orders.py @@ -0,0 +1,49 @@ +import os +import sys +from dotenv import load_dotenv +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +from database import Job +from main import setup_driver, login +import time + +load_dotenv() + +engine = create_engine("sqlite:////app/data/fotograf_jobs.db") +Session = sessionmaker(bind=engine) +db = Session() + +# Get latest job +job = db.query(Job).order_by(Job.last_updated.desc()).first() +if not job: + print("No jobs found in database.") + sys.exit(1) + +print(f"Using Job ID: {job.id} ({job.name}), Account: {job.account_type}") + +username = os.getenv(f"{job.account_type.upper()}_USER") +password = os.getenv(f"{job.account_type.upper()}_PW") + +driver = setup_driver() +if not driver: + print("Failed to init driver") + sys.exit(1) + +if not login(driver, username, password): + print("Login failed") + driver.quit() + sys.exit(1) + +orders_url = f"https://app.fotograf.de/config_jobs_orders/index/{job.id}/customer_orders" +print(f"Navigating to {orders_url}") +driver.get(orders_url) +time.sleep(5) # wait for page to load + +html = driver.page_source +with open("orders_page.html", "w", encoding="utf-8") as f: + f.write(html) + +driver.save_screenshot("orders_page.png") +print("Saved orders_page.html and orders_page.png") + +driver.quit() diff --git a/fotograf-de-scraper/backend/main.py b/fotograf-de-scraper/backend/main.py index 0299abde1..8b24a133e 100644 --- a/fotograf-de-scraper/backend/main.py +++ b/fotograf-de-scraper/backend/main.py @@ -489,22 +489,17 @@ task_store: Dict[str, Dict[str, Any]] = {} def process_statistics(task_id: str, job_id: str, account_type: str): logger.info(f"Task {task_id}: Starting fast statistics calculation for job {job_id}") - task_store[task_id] = {"status": "running", "progress": "Synchronisiere Daten von Fotograf.de...", "result": None} + task_store[task_id] = {"status": "running", "progress": "Berechne Statistiken...", "result": None} db = SessionLocal() try: - # 1. Sync data from CSV - try: - sync_participants(job_id, account_type, db) - except Exception as sync_err: - logger.error(f"Sync failed during statistics: {sync_err}") - count = db.query(JobParticipant).filter(JobParticipant.job_id == job_id).count() - if count == 0: - task_store[task_id] = {"status": "error", "progress": f"Synchronisierung fehlgeschlagen: {str(sync_err)}"} - return + # Check if we have data at all + count = db.query(JobParticipant).filter(JobParticipant.job_id == job_id).count() + if count == 0: + task_store[task_id] = {"status": "error", "progress": "Keine Daten vorhanden. Bitte erst oben auf 'Daten abgleichen' klicken."} + return - # 2. Query DB and group by 'gruppe' - task_store[task_id]["progress"] = "Berechne Statistiken..." + # Query DB and group by 'gruppe' # Get all participants for this job participants = db.query(JobParticipant).filter(JobParticipant.job_id == job_id).all() @@ -518,12 +513,13 @@ def process_statistics(task_id: str, job_id: str, account_type: str): "Album": g_name, "Kinder_insgesamt": 0, "Kinder_mit_Käufen": 0, - "Kinder_Alle_Bilder_gekauft": 0 # Not available in CSV, setting to 0 or estimates + "Kinder_Alle_Bilder_gekauft": 0 } groups[g_name]["Kinder_insgesamt"] += 1 if p.has_orders: groups[g_name]["Kinder_mit_Käufen"] += 1 - + if p.digital_package_ordered: + groups[g_name]["Kinder_Alle_Bilder_gekauft"] += 1 statistics = list(groups.values()) statistics.sort(key=lambda x: x["Album"]) @@ -541,23 +537,17 @@ def process_statistics(task_id: str, job_id: str, account_type: str): def process_reminder_analysis(task_id: str, job_id: str, account_type: str): logger.info(f"Task {task_id}: Starting fast reminder analysis for job {job_id}") - task_store[task_id] = {"status": "running", "progress": "Synchronisiere Daten von Fotograf.de...", "result": None} + task_store[task_id] = {"status": "running", "progress": "Analysiere Datenbank-Einträge...", "result": None} db = SessionLocal() try: - # 1. Sync data from CSV (This takes ~20s and gets all parent emails, logins and orders) - try: - sync_participants(job_id, account_type, db) - except Exception as sync_err: - logger.error(f"Sync failed during reminder analysis: {sync_err}") - # Continue anyway if we have some data, or fail if we have none - count = db.query(JobParticipant).filter(JobParticipant.job_id == job_id).count() - if count == 0: - task_store[task_id] = {"status": "error", "progress": f"Synchronisierung fehlgeschlagen: {str(sync_err)}"} - return + # Check if we have data at all + count = db.query(JobParticipant).filter(JobParticipant.job_id == job_id).count() + if count == 0: + task_store[task_id] = {"status": "error", "progress": "Keine Daten vorhanden. Bitte erst oben auf 'Daten abgleichen' klicken."} + return - # 2. Query DB for potential candidates (Logins <= 1 and No Orders) - task_store[task_id]["progress"] = "Analysiere Datenbank-Einträge..." + # Query DB for potential candidates (Logins <= 1 and No Orders) candidates = db.query(JobParticipant).filter( JobParticipant.job_id == job_id, @@ -1073,32 +1063,119 @@ def sync_participants(job_id: str, account_type: str, db: Session): participant.last_synced = datetime.datetime.utcnow() db.commit() - logger.info(f"Successfully synced {len(df)} participants for job {job_id}") + logger.info(f"Successfully synced {len(df)} participants from CSV.") + + # --- PHASE 2: Scrape Orders for Digital Packages (Price Magic) --- + try: + orders_url = f"https://app.fotograf.de/config_jobs_orders/{job_id}/customer_orders" + logger.info(f"Navigating to orders page for price magic: {orders_url}") + driver.get(orders_url) + time.sleep(3) # Wait for page/table to load + + # Find all order rows + order_rows = driver.find_elements(By.XPATH, "//table/tbody/tr") + logger.info(f"Found {len(order_rows)} order rows to analyze.") + + digital_matches = 0 + for row in order_rows: + try: + cols = row.find_elements(By.TAG_NAME, "td") + if len(cols) < 11: continue + + fname = cols[4].text.strip() + lname = cols[5].text.strip() + sum_text = cols[8].text.strip() + status_text = cols[10].text.strip() + + # Parse Sum (e.g., "58,90 €") + clean_sum_text = sum_text.replace("€", "").replace(",", ".").replace(" ", "").strip() + try: + order_sum = float(clean_sum_text) + except: + order_sum = 0.0 + + is_digital = False + + # PRICE MAGIC: Defined package prices (regular & discounted) + # Digital Single: 58.90 / 53.90 + # Digital Siblings: 109.90 / 94.90 + # Digital Family: 75.90 / 70.90 + target_prices = [58.90, 53.90, 109.90, 94.90, 75.90, 70.90] + + if any(abs(order_sum - p) < 0.01 for p in target_prices): + is_digital = True + + # STATUS FALLBACK: If status already says download + if "heruntergeladen" in status_text.lower() or "download" in status_text.lower(): + is_digital = True + + if is_digital and fname and lname: + # Update participants matching these parents + db.query(JobParticipant).filter( + JobParticipant.job_id == job_id, + JobParticipant.vorname_eltern == fname, + JobParticipant.nachname_eltern == lname + ).update({JobParticipant.digital_package_ordered: 1}) + digital_matches += 1 + except Exception as row_err: + logger.warning(f"Error parsing order row: {row_err}") + continue + + db.commit() + logger.info(f"Price Magic complete: Identified {digital_matches} digital packages.") + + except Exception as order_err: + logger.error(f"Failed to scrape orders for price magic: {order_err}") + return len(df) finally: driver.quit() @app.get("/api/jobs/{job_id}/fast-stats") + async def get_fast_stats(job_id: str, db: Session = Depends(get_db)): + participants = db.query(JobParticipant).filter(JobParticipant.job_id == job_id).all() + if not participants: + return [] + + groups = {} + for p in participants: + g_name = p.gruppe or "Unbekannt" + if g_name not in groups: + groups[g_name] = { + "Album": g_name, + "Kinder_insgesamt": 0, + "Kinder_mit_Käufen": 0, + "Kinder_Alle_Bilder_gekauft": 0 + } + groups[g_name]["Kinder_insgesamt"] += 1 + if p.has_orders: + groups[g_name]["Kinder_mit_Käufen"] += 1 + if p.digital_package_ordered: + + groups[g_name]["Kinder_Alle_Bilder_gekauft"] += 1 + + + statistics = list(groups.values()) statistics.sort(key=lambda x: x["Album"]) return statistics diff --git a/fotograf-de-scraper/backend/migrate_db.py b/fotograf-de-scraper/backend/migrate_db.py new file mode 100644 index 000000000..5668aa70a --- /dev/null +++ b/fotograf-de-scraper/backend/migrate_db.py @@ -0,0 +1,18 @@ +import sqlite3 +import os + +db_path = "/app/data/fotograf_jobs.db" +if not os.path.exists(db_path): + db_path = "fotograf-de-scraper/backend/data/fotograf_jobs.db" + +conn = sqlite3.connect(db_path) +cursor = conn.cursor() + +try: + cursor.execute("ALTER TABLE job_participants ADD COLUMN digital_package_ordered INTEGER DEFAULT 0;") + print("Column 'digital_package_ordered' added successfully.") +except sqlite3.OperationalError: + print("Column 'digital_package_ordered' already exists.") + +conn.commit() +conn.close() diff --git a/fotograf-de-scraper/backend/orders_page.html b/fotograf-de-scraper/backend/orders_page.html new file mode 100644 index 000000000..a4454b710 --- /dev/null +++ b/fotograf-de-scraper/backend/orders_page.html @@ -0,0 +1,12 @@ +GotPhoto | Config
Das ist wahrscheinlich nicht das, wonach Sie suchen.Wir konnten die gesuchte Seite nicht finden, aber wir bringen Sie zurück auf bekannten Boden.
Zurück zum Dashboard
\ No newline at end of file diff --git a/fotograf-de-scraper/backend/orders_page.png b/fotograf-de-scraper/backend/orders_page.png new file mode 100644 index 000000000..a5403973c Binary files /dev/null and b/fotograf-de-scraper/backend/orders_page.png differ