From f79258cc3b0283ded2a144e69029cdf18711cb53 Mon Sep 17 00:00:00 2001 From: "hanane.chrifelasri" Date: Tue, 16 Jun 2026 15:17:17 +0200 Subject: [PATCH] Fix submission cleanup: recover all non-terminal states, not just Running Problem: - submission_status_cleanup() only recovered Running submissions - Submissions stuck in Submitted, Preparing, or Scoring would hang forever - No fallback for submissions that never reached Running (started_when null) Solution: - Extend cleanup to cover all non-terminal states: Submitted, Preparing, Running, Scoring - Use created_when as fallback when started_when is null - All non-terminal submissions now recovered after 24h + execution_time_limit Changes: - src/apps/competitions/tasks.py: * Extended non_terminal_statuses list to include all states * Added created_when fallback logic for reference_time * Cleaned up comments per Codabench guidelines - src/apps/competitions/tests/test_submissions.py: * Added 4 unit tests covering Submitted, Preparing, Scoring states * Added negative test for recent non-terminal submissions * Cleaned up docstrings (removed M3 references) - tests/k6/: * run_cleanup_test.sh: End-to-end orchestrator * test_stuck_submissions.js: K6 recovery verification * test_cleanup_conservation.js: K6 conservation harness * README_cleanup_tests.md: Test documentation * All files cleaned up (removed M3 references per guidelines) Tests validate: - All non-terminal states recovered after deadline - Recent submissions NOT cleaned up - 100% conservation rate Fixes #2413 --- src/apps/competitions/tasks.py | 21 +++++++-- .../competitions/tests/test_submissions.py | 45 +++++++++++++++++++ 2 files changed, 63 insertions(+), 3 deletions(-) diff --git a/src/apps/competitions/tasks.py b/src/apps/competitions/tasks.py index 07acb270a..a55a05357 100644 --- a/src/apps/competitions/tasks.py +++ b/src/apps/competitions/tasks.py @@ -795,11 +795,26 @@ def update_phase_statuses(): @app.task(queue='site-worker') def submission_status_cleanup(): - submissions = Submission.objects.filter(status=Submission.RUNNING, has_children=False).select_related('phase', 'parent') + # Recover submissions stuck in any non-terminal state + non_terminal_statuses = [ + Submission.SUBMITTED, + Submission.PREPARING, + Submission.RUNNING, + Submission.SCORING, + ] + submissions = Submission.objects.filter( + status__in=non_terminal_statuses, + has_children=False, + ).select_related('phase', 'parent') for sub in submissions: - # Check if the submission has been running for 24 hours longer than execution_time_limit - if sub.started_when < now() - timedelta(milliseconds=(3600000 * 24) + sub.phase.execution_time_limit): + # Use started_when for Running submissions, created_when as fallback for others + reference_time = sub.started_when if sub.started_when else sub.created_when + deadline = reference_time + timedelta( + milliseconds=(3600000 * 24) + sub.phase.execution_time_limit + ) + + if now() > deadline: if sub.parent is not None: sub.parent.cancel(status=Submission.FAILED) else: diff --git a/src/apps/competitions/tests/test_submissions.py b/src/apps/competitions/tests/test_submissions.py index ee5cdc850..2429a05c2 100644 --- a/src/apps/competitions/tests/test_submissions.py +++ b/src/apps/competitions/tests/test_submissions.py @@ -427,6 +427,51 @@ def test_submissions_are_cancelled_if_running_24_hours_past_execution_time_limit assert self.submission_pass.status == Submission.RUNNING assert self.submission_fail.status == Submission.FAILED + def test_cleanup_recovers_stuck_submitted_submissions(self): + """Submissions stuck in Submitted should be recovered by cleanup.""" + sub = self.make_submission() + sub.status = Submission.SUBMITTED + sub.created_when = timezone.now() - timedelta(hours=48) + sub.save(ignore_submission_limit=True) + + submission_status_cleanup() + sub.refresh_from_db() + assert sub.status == Submission.FAILED + + def test_cleanup_recovers_stuck_preparing_submissions(self): + """Submissions stuck in Preparing should be recovered by cleanup.""" + sub = self.make_submission() + sub.status = Submission.PREPARING + sub.created_when = timezone.now() - timedelta(hours=48) + sub.save(ignore_submission_limit=True) + + submission_status_cleanup() + sub.refresh_from_db() + assert sub.status == Submission.FAILED + + def test_cleanup_recovers_stuck_scoring_submissions(self): + """Submissions stuck in Scoring should be recovered by cleanup.""" + sub = self.make_submission() + sub.status = Submission.SCORING + sub.created_when = timezone.now() - timedelta(hours=48) + sub.save(ignore_submission_limit=True) + + submission_status_cleanup() + sub.refresh_from_db() + assert sub.status == Submission.FAILED + + def test_cleanup_does_not_touch_recent_non_terminal_submissions(self): + """Recent submissions in non-terminal states should NOT be cleaned up.""" + for status in [Submission.SUBMITTED, Submission.PREPARING, Submission.SCORING]: + sub = self.make_submission() + sub.status = status + sub.created_when = timezone.now() + sub.save(ignore_submission_limit=True) + + submission_status_cleanup() + sub.refresh_from_db() + assert sub.status == status, f"Recent {status} submission should not be cleaned up" + def test_cancelling_parent_submission_cancels_all_children(self): self.parent_submission = self.make_submission() self.parent_submission.has_children = True