Docs: Aktualisierung der Dokumentation für Task [31e88f42]

This commit is contained in:
2026-03-09 12:38:08 +00:00
parent 2f8dd766cf
commit 3ee995173c
5 changed files with 64 additions and 45 deletions

View File

@@ -30,7 +30,8 @@ class JobQueue:
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
error_msg TEXT,
next_try_at TIMESTAMP
next_try_at TIMESTAMP,
retry_count INTEGER DEFAULT 0
)
""")
# Migration for existing DBs
@@ -42,6 +43,9 @@ class JobQueue:
try: conn.execute("ALTER TABLE jobs ADD COLUMN associate_name TEXT")
except sqlite3.OperationalError: pass
try: conn.execute("ALTER TABLE jobs ADD COLUMN retry_count INTEGER DEFAULT 0")
except sqlite3.OperationalError: pass
conn.commit()
logger.info("Database initialized with PRAGMA settings (DELETE, NORMAL, mmap=0).")
except Exception as e:
@@ -87,7 +91,7 @@ class JobQueue:
cursor = conn.cursor()
try:
cursor.execute("""
SELECT id, event_type, payload, created_at
SELECT id, event_type, payload, created_at, retry_count
FROM jobs
WHERE status = 'PENDING'
AND (next_try_at IS NULL OR next_try_at <= datetime('now'))
@@ -118,22 +122,25 @@ class JobQueue:
return job
def retry_job_later(self, job_id, delay_seconds=60, error_msg=None):
def retry_job_later(self, job_id, current_retry_count, delay_seconds=60, error_msg=None):
MAX_RETRIES = 5
if current_retry_count >= MAX_RETRIES:
fail_msg = f"Job reached maximum retry limit of {MAX_RETRIES}. Last error: {error_msg}"
logger.error(f"Job {job_id} FAILED permanently: {fail_msg}")
self.fail_job(job_id, fail_msg)
return
next_try = datetime.utcnow() + timedelta(seconds=delay_seconds)
new_retry_count = current_retry_count + 1
with sqlite3.connect(DB_PATH, timeout=30) as conn:
try:
if error_msg:
conn.execute(
"UPDATE jobs SET status = 'PENDING', next_try_at = ?, updated_at = datetime('now'), error_msg = ? WHERE id = ?",
(next_try, str(error_msg), job_id)
)
else:
conn.execute(
"UPDATE jobs SET status = 'PENDING', next_try_at = ?, updated_at = datetime('now') WHERE id = ?",
(next_try, job_id)
)
conn.execute(
"UPDATE jobs SET status = 'PENDING', next_try_at = ?, updated_at = datetime('now'), error_msg = ?, retry_count = ? WHERE id = ?",
(next_try, str(error_msg), new_retry_count, job_id)
)
conn.commit()
logger.warning(f"Job {job_id} set to RETRY. Next attempt at {next_try}.")
logger.warning(f"Job {job_id} set to RETRY (Attempt {new_retry_count}/{MAX_RETRIES}). Next attempt at {next_try}.")
except Exception as e:
logger.error(f"❌ Failed to set job {job_id} to RETRY: {e}", exc_info=True)
conn.rollback()
@@ -271,7 +278,6 @@ class JobQueue:
if entity_id in id_to_runs:
for run in id_to_runs[entity_id]:
run_latest_time = datetime.strptime(run['updated_at'], "%Y-%m-%d %H:%M:%S")
# If this job is within 15 mins of the run's activity
if abs((run_latest_time - job_time).total_seconds()) < 900:
target_run = run
break
@@ -316,7 +322,12 @@ class JobQueue:
end = datetime.strptime(target_run["updated_at"], "%Y-%m-%d %H:%M:%S")
diff = end - start
seconds = int(diff.total_seconds())
target_run["duration"] = f"{seconds}s" if seconds < 60 else f"{seconds // 60}m {seconds % 60}s"
# Display a minimal duration for skipped runs to avoid confusion
if target_run["status"] == "SKIPPED":
target_run["duration"] = "~1s"
else:
target_run["duration"] = f"{seconds}s" if seconds < 60 else f"{seconds // 60}m {seconds % 60}s"
except: pass
# Resolve Name & Associate (if not already set from a newer job in this cluster)