[34588f42] Keine Zusammenfassung angegeben.

Keine Zusammenfassung angegeben.
This commit is contained in:
2026-04-18 20:58:31 +00:00
parent 02b17d53ea
commit 1ae8b3e353
7 changed files with 186 additions and 29 deletions

View File

@@ -1 +1 @@
{"task_id": "34588f42-8544-8046-85d4-d7895ed9b29c", "token": "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", "readme_path": "readme.md", "session_start_time": "2026-04-18T13:58:52.409921"}
{"task_id": "34588f42-8544-8046-85d4-d7895ed9b29c", "token": "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", "readme_path": "readme.md", "session_start_time": "2026-04-18T20:58:29.155246"}

View File

@@ -63,6 +63,7 @@ class JobParticipant(Base):
gruppe = Column(String, nullable=True)
logins = Column(Integer, default=0)
has_orders = Column(Integer, default=0) # 0 for false, 1 for true
digital_package_ordered = Column(Integer, default=0) # 0 for false, 1 for true
last_synced = Column(DateTime, default=datetime.datetime.utcnow)
Base.metadata.create_all(bind=engine)

View File

@@ -0,0 +1,49 @@
import os
import sys
from dotenv import load_dotenv
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from database import Job
from main import setup_driver, login
import time
load_dotenv()
engine = create_engine("sqlite:////app/data/fotograf_jobs.db")
Session = sessionmaker(bind=engine)
db = Session()
# Get latest job
job = db.query(Job).order_by(Job.last_updated.desc()).first()
if not job:
print("No jobs found in database.")
sys.exit(1)
print(f"Using Job ID: {job.id} ({job.name}), Account: {job.account_type}")
username = os.getenv(f"{job.account_type.upper()}_USER")
password = os.getenv(f"{job.account_type.upper()}_PW")
driver = setup_driver()
if not driver:
print("Failed to init driver")
sys.exit(1)
if not login(driver, username, password):
print("Login failed")
driver.quit()
sys.exit(1)
orders_url = f"https://app.fotograf.de/config_jobs_orders/index/{job.id}/customer_orders"
print(f"Navigating to {orders_url}")
driver.get(orders_url)
time.sleep(5) # wait for page to load
html = driver.page_source
with open("orders_page.html", "w", encoding="utf-8") as f:
f.write(html)
driver.save_screenshot("orders_page.png")
print("Saved orders_page.html and orders_page.png")
driver.quit()

View File

@@ -489,22 +489,17 @@ task_store: Dict[str, Dict[str, Any]] = {}
def process_statistics(task_id: str, job_id: str, account_type: str):
logger.info(f"Task {task_id}: Starting fast statistics calculation for job {job_id}")
task_store[task_id] = {"status": "running", "progress": "Synchronisiere Daten von Fotograf.de...", "result": None}
task_store[task_id] = {"status": "running", "progress": "Berechne Statistiken...", "result": None}
db = SessionLocal()
try:
# 1. Sync data from CSV
try:
sync_participants(job_id, account_type, db)
except Exception as sync_err:
logger.error(f"Sync failed during statistics: {sync_err}")
count = db.query(JobParticipant).filter(JobParticipant.job_id == job_id).count()
if count == 0:
task_store[task_id] = {"status": "error", "progress": f"Synchronisierung fehlgeschlagen: {str(sync_err)}"}
return
# Check if we have data at all
count = db.query(JobParticipant).filter(JobParticipant.job_id == job_id).count()
if count == 0:
task_store[task_id] = {"status": "error", "progress": "Keine Daten vorhanden. Bitte erst oben auf 'Daten abgleichen' klicken."}
return
# 2. Query DB and group by 'gruppe'
task_store[task_id]["progress"] = "Berechne Statistiken..."
# Query DB and group by 'gruppe'
# Get all participants for this job
participants = db.query(JobParticipant).filter(JobParticipant.job_id == job_id).all()
@@ -518,12 +513,13 @@ def process_statistics(task_id: str, job_id: str, account_type: str):
"Album": g_name,
"Kinder_insgesamt": 0,
"Kinder_mit_Käufen": 0,
"Kinder_Alle_Bilder_gekauft": 0 # Not available in CSV, setting to 0 or estimates
"Kinder_Alle_Bilder_gekauft": 0
}
groups[g_name]["Kinder_insgesamt"] += 1
if p.has_orders:
groups[g_name]["Kinder_mit_Käufen"] += 1
if p.digital_package_ordered:
groups[g_name]["Kinder_Alle_Bilder_gekauft"] += 1
statistics = list(groups.values())
statistics.sort(key=lambda x: x["Album"])
@@ -541,23 +537,17 @@ def process_statistics(task_id: str, job_id: str, account_type: str):
def process_reminder_analysis(task_id: str, job_id: str, account_type: str):
logger.info(f"Task {task_id}: Starting fast reminder analysis for job {job_id}")
task_store[task_id] = {"status": "running", "progress": "Synchronisiere Daten von Fotograf.de...", "result": None}
task_store[task_id] = {"status": "running", "progress": "Analysiere Datenbank-Einträge...", "result": None}
db = SessionLocal()
try:
# 1. Sync data from CSV (This takes ~20s and gets all parent emails, logins and orders)
try:
sync_participants(job_id, account_type, db)
except Exception as sync_err:
logger.error(f"Sync failed during reminder analysis: {sync_err}")
# Continue anyway if we have some data, or fail if we have none
count = db.query(JobParticipant).filter(JobParticipant.job_id == job_id).count()
if count == 0:
task_store[task_id] = {"status": "error", "progress": f"Synchronisierung fehlgeschlagen: {str(sync_err)}"}
return
# Check if we have data at all
count = db.query(JobParticipant).filter(JobParticipant.job_id == job_id).count()
if count == 0:
task_store[task_id] = {"status": "error", "progress": "Keine Daten vorhanden. Bitte erst oben auf 'Daten abgleichen' klicken."}
return
# 2. Query DB for potential candidates (Logins <= 1 and No Orders)
task_store[task_id]["progress"] = "Analysiere Datenbank-Einträge..."
# Query DB for potential candidates (Logins <= 1 and No Orders)
candidates = db.query(JobParticipant).filter(
JobParticipant.job_id == job_id,
@@ -1073,32 +1063,119 @@ def sync_participants(job_id: str, account_type: str, db: Session):
participant.last_synced = datetime.datetime.utcnow()
db.commit()
logger.info(f"Successfully synced {len(df)} participants for job {job_id}")
logger.info(f"Successfully synced {len(df)} participants from CSV.")
# --- PHASE 2: Scrape Orders for Digital Packages (Price Magic) ---
try:
orders_url = f"https://app.fotograf.de/config_jobs_orders/{job_id}/customer_orders"
logger.info(f"Navigating to orders page for price magic: {orders_url}")
driver.get(orders_url)
time.sleep(3) # Wait for page/table to load
# Find all order rows
order_rows = driver.find_elements(By.XPATH, "//table/tbody/tr")
logger.info(f"Found {len(order_rows)} order rows to analyze.")
digital_matches = 0
for row in order_rows:
try:
cols = row.find_elements(By.TAG_NAME, "td")
if len(cols) < 11: continue
fname = cols[4].text.strip()
lname = cols[5].text.strip()
sum_text = cols[8].text.strip()
status_text = cols[10].text.strip()
# Parse Sum (e.g., "58,90 €")
clean_sum_text = sum_text.replace("", "").replace(",", ".").replace(" ", "").strip()
try:
order_sum = float(clean_sum_text)
except:
order_sum = 0.0
is_digital = False
# PRICE MAGIC: Defined package prices (regular & discounted)
# Digital Single: 58.90 / 53.90
# Digital Siblings: 109.90 / 94.90
# Digital Family: 75.90 / 70.90
target_prices = [58.90, 53.90, 109.90, 94.90, 75.90, 70.90]
if any(abs(order_sum - p) < 0.01 for p in target_prices):
is_digital = True
# STATUS FALLBACK: If status already says download
if "heruntergeladen" in status_text.lower() or "download" in status_text.lower():
is_digital = True
if is_digital and fname and lname:
# Update participants matching these parents
db.query(JobParticipant).filter(
JobParticipant.job_id == job_id,
JobParticipant.vorname_eltern == fname,
JobParticipant.nachname_eltern == lname
).update({JobParticipant.digital_package_ordered: 1})
digital_matches += 1
except Exception as row_err:
logger.warning(f"Error parsing order row: {row_err}")
continue
db.commit()
logger.info(f"Price Magic complete: Identified {digital_matches} digital packages.")
except Exception as order_err:
logger.error(f"Failed to scrape orders for price magic: {order_err}")
return len(df)
finally:
driver.quit()
@app.get("/api/jobs/{job_id}/fast-stats")
async def get_fast_stats(job_id: str, db: Session = Depends(get_db)):
participants = db.query(JobParticipant).filter(JobParticipant.job_id == job_id).all()
if not participants:
return []
groups = {}
for p in participants:
g_name = p.gruppe or "Unbekannt"
if g_name not in groups:
groups[g_name] = {
"Album": g_name,
"Kinder_insgesamt": 0,
"Kinder_mit_Käufen": 0,
"Kinder_Alle_Bilder_gekauft": 0
}
groups[g_name]["Kinder_insgesamt"] += 1
if p.has_orders:
groups[g_name]["Kinder_mit_Käufen"] += 1
if p.digital_package_ordered:
groups[g_name]["Kinder_Alle_Bilder_gekauft"] += 1
statistics = list(groups.values())
statistics.sort(key=lambda x: x["Album"])
return statistics

View File

@@ -0,0 +1,18 @@
import sqlite3
import os
db_path = "/app/data/fotograf_jobs.db"
if not os.path.exists(db_path):
db_path = "fotograf-de-scraper/backend/data/fotograf_jobs.db"
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
try:
cursor.execute("ALTER TABLE job_participants ADD COLUMN digital_package_ordered INTEGER DEFAULT 0;")
print("Column 'digital_package_ordered' added successfully.")
except sqlite3.OperationalError:
print("Column 'digital_package_ordered' already exists.")
conn.commit()
conn.close()

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB