Skip to content

Commit 4fd44ed

Browse files
committed
modelerrors: move backoff and sleep utilities to pkg/backoff
SleepWithContext and CalculateBackoff are general-purpose utilities not specific to model errors. Move them to a new pkg/backoff package for clarity and reusability. MaxRetryAfterWait moves there too. Deprecated wrapper variables are left in modelerrors to maintain backward compatibility. The runtime/fallback package now imports pkg/backoff directly. Assisted-By: docker-agent
1 parent d13541e commit 4fd44ed

5 files changed

Lines changed: 150 additions & 110 deletions

File tree

pkg/backoff/backoff.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// Package backoff provides exponential backoff calculation and
2+
// context-aware sleep utilities.
3+
package backoff
4+
5+
import (
6+
"context"
7+
"math/rand/v2"
8+
"time"
9+
)
10+
11+
// Configuration constants for exponential backoff.
12+
const (
13+
baseDelay = 200 * time.Millisecond
14+
maxDelay = 2 * time.Second
15+
factor = 2.0
16+
jitter = 0.1
17+
18+
// MaxRetryAfterWait caps how long we'll honor a Retry-After header to prevent
19+
// a misbehaving server from blocking the agent for an unreasonable amount of time.
20+
MaxRetryAfterWait = 60 * time.Second
21+
)
22+
23+
// Calculate returns the backoff duration for a given attempt (0-indexed).
24+
// Uses exponential backoff with jitter.
25+
func Calculate(attempt int) time.Duration {
26+
if attempt < 0 {
27+
attempt = 0
28+
}
29+
30+
// Calculate exponential delay
31+
delay := float64(baseDelay)
32+
for range attempt {
33+
delay *= factor
34+
}
35+
36+
// Cap at max delay
37+
if delay > float64(maxDelay) {
38+
delay = float64(maxDelay)
39+
}
40+
41+
// Add jitter (±10%)
42+
j := delay * jitter * (2*rand.Float64() - 1)
43+
delay += j
44+
45+
return time.Duration(delay)
46+
}
47+
48+
// SleepWithContext sleeps for the specified duration, returning early if context is cancelled.
49+
// Returns true if the sleep completed, false if it was interrupted by context cancellation.
50+
func SleepWithContext(ctx context.Context, d time.Duration) bool {
51+
timer := time.NewTimer(d)
52+
defer timer.Stop()
53+
54+
select {
55+
case <-timer.C:
56+
return true
57+
case <-ctx.Done():
58+
return false
59+
}
60+
}

pkg/backoff/backoff_test.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
package backoff
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"testing"
7+
"time"
8+
9+
"github.com/stretchr/testify/assert"
10+
)
11+
12+
func TestCalculate(t *testing.T) {
13+
t.Parallel()
14+
15+
tests := []struct {
16+
attempt int
17+
minExpected time.Duration
18+
maxExpected time.Duration
19+
}{
20+
{attempt: 0, minExpected: 180 * time.Millisecond, maxExpected: 220 * time.Millisecond},
21+
{attempt: 1, minExpected: 360 * time.Millisecond, maxExpected: 440 * time.Millisecond},
22+
{attempt: 2, minExpected: 720 * time.Millisecond, maxExpected: 880 * time.Millisecond},
23+
{attempt: 3, minExpected: 1440 * time.Millisecond, maxExpected: 1760 * time.Millisecond},
24+
{attempt: 10, minExpected: 1800 * time.Millisecond, maxExpected: 2200 * time.Millisecond}, // capped at 2s
25+
}
26+
27+
for _, tt := range tests {
28+
t.Run(fmt.Sprintf("attempt_%d", tt.attempt), func(t *testing.T) {
29+
t.Parallel()
30+
b := Calculate(tt.attempt)
31+
assert.GreaterOrEqual(t, b, tt.minExpected, "backoff should be at least %v", tt.minExpected)
32+
assert.LessOrEqual(t, b, tt.maxExpected, "backoff should be at most %v", tt.maxExpected)
33+
})
34+
}
35+
36+
t.Run("negative attempt treated as 0", func(t *testing.T) {
37+
t.Parallel()
38+
b := Calculate(-1)
39+
assert.GreaterOrEqual(t, b, 180*time.Millisecond)
40+
assert.LessOrEqual(t, b, 220*time.Millisecond)
41+
})
42+
}
43+
44+
func TestSleepWithContext(t *testing.T) {
45+
t.Parallel()
46+
47+
t.Run("completes normally", func(t *testing.T) {
48+
t.Parallel()
49+
ctx := t.Context()
50+
start := time.Now()
51+
completed := SleepWithContext(ctx, 10*time.Millisecond)
52+
elapsed := time.Since(start)
53+
54+
assert.True(t, completed, "should complete normally")
55+
assert.GreaterOrEqual(t, elapsed, 10*time.Millisecond)
56+
})
57+
58+
t.Run("interrupted by context", func(t *testing.T) {
59+
t.Parallel()
60+
ctx, cancel := context.WithCancel(t.Context())
61+
time.AfterFunc(10*time.Millisecond, cancel)
62+
63+
start := time.Now()
64+
completed := SleepWithContext(ctx, 1*time.Second)
65+
elapsed := time.Since(start)
66+
67+
assert.False(t, completed, "should be interrupted")
68+
assert.Less(t, elapsed, 100*time.Millisecond, "should return quickly after cancel")
69+
})
70+
}

pkg/modelerrors/modelerrors.go

Lines changed: 9 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -8,25 +8,21 @@ import (
88
"context"
99
"errors"
1010
"log/slog"
11-
"math/rand/v2"
1211
"net"
1312
"net/http"
1413
"regexp"
1514
"strconv"
1615
"strings"
1716
"time"
17+
18+
"github.com/docker/docker-agent/pkg/backoff"
1819
)
1920

2021
// Backoff and retry-after configuration constants.
2122
const (
22-
backoffBaseDelay = 200 * time.Millisecond
23-
backoffMaxDelay = 2 * time.Second
24-
backoffFactor = 2.0
25-
backoffJitter = 0.1
26-
2723
// MaxRetryAfterWait caps how long we'll honor a Retry-After header to prevent
2824
// a misbehaving server from blocking the agent for an unreasonable amount of time.
29-
MaxRetryAfterWait = 60 * time.Second
25+
MaxRetryAfterWait = backoff.MaxRetryAfterWait
3026
)
3127

3228
// StatusError wraps an HTTP API error with structured metadata for retry decisions.
@@ -400,42 +396,15 @@ func ClassifyModelError(err error) (retryable, rateLimited bool, retryAfter time
400396

401397
// CalculateBackoff returns the backoff duration for a given attempt (0-indexed).
402398
// Uses exponential backoff with jitter.
403-
func CalculateBackoff(attempt int) time.Duration {
404-
if attempt < 0 {
405-
attempt = 0
406-
}
407-
408-
// Calculate exponential delay
409-
delay := float64(backoffBaseDelay)
410-
for range attempt {
411-
delay *= backoffFactor
412-
}
413-
414-
// Cap at max delay
415-
if delay > float64(backoffMaxDelay) {
416-
delay = float64(backoffMaxDelay)
417-
}
418-
419-
// Add jitter (±10%)
420-
jitter := delay * backoffJitter * (2*rand.Float64() - 1)
421-
delay += jitter
422-
423-
return time.Duration(delay)
424-
}
399+
//
400+
// Deprecated: Use [backoff.Calculate] directly.
401+
var CalculateBackoff = backoff.Calculate
425402

426403
// SleepWithContext sleeps for the specified duration, returning early if context is cancelled.
427404
// Returns true if the sleep completed, false if it was interrupted by context cancellation.
428-
func SleepWithContext(ctx context.Context, d time.Duration) bool {
429-
timer := time.NewTimer(d)
430-
defer timer.Stop()
431-
432-
select {
433-
case <-timer.C:
434-
return true
435-
case <-ctx.Done():
436-
return false
437-
}
438-
}
405+
//
406+
// Deprecated: Use [backoff.SleepWithContext] directly.
407+
var SleepWithContext = backoff.SleepWithContext
439408

440409
// FormatError returns a user-friendly error message for model errors.
441410
// Context overflow gets a dedicated actionable message; all other errors

pkg/modelerrors/modelerrors_test.go

Lines changed: 0 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -71,66 +71,6 @@ func TestIsRetryableModelError(t *testing.T) {
7171
}
7272
}
7373

74-
func TestCalculateBackoff(t *testing.T) {
75-
t.Parallel()
76-
77-
tests := []struct {
78-
attempt int
79-
minExpected time.Duration
80-
maxExpected time.Duration
81-
}{
82-
{attempt: 0, minExpected: 180 * time.Millisecond, maxExpected: 220 * time.Millisecond},
83-
{attempt: 1, minExpected: 360 * time.Millisecond, maxExpected: 440 * time.Millisecond},
84-
{attempt: 2, minExpected: 720 * time.Millisecond, maxExpected: 880 * time.Millisecond},
85-
{attempt: 3, minExpected: 1440 * time.Millisecond, maxExpected: 1760 * time.Millisecond},
86-
{attempt: 10, minExpected: 1800 * time.Millisecond, maxExpected: 2200 * time.Millisecond}, // capped at 2s
87-
}
88-
89-
for _, tt := range tests {
90-
t.Run(fmt.Sprintf("attempt_%d", tt.attempt), func(t *testing.T) {
91-
t.Parallel()
92-
backoff := CalculateBackoff(tt.attempt)
93-
assert.GreaterOrEqual(t, backoff, tt.minExpected, "backoff should be at least %v", tt.minExpected)
94-
assert.LessOrEqual(t, backoff, tt.maxExpected, "backoff should be at most %v", tt.maxExpected)
95-
})
96-
}
97-
98-
t.Run("negative attempt treated as 0", func(t *testing.T) {
99-
t.Parallel()
100-
backoff := CalculateBackoff(-1)
101-
assert.GreaterOrEqual(t, backoff, 180*time.Millisecond)
102-
assert.LessOrEqual(t, backoff, 220*time.Millisecond)
103-
})
104-
}
105-
106-
func TestSleepWithContext(t *testing.T) {
107-
t.Parallel()
108-
109-
t.Run("completes normally", func(t *testing.T) {
110-
t.Parallel()
111-
ctx := t.Context()
112-
start := time.Now()
113-
completed := SleepWithContext(ctx, 10*time.Millisecond)
114-
elapsed := time.Since(start)
115-
116-
assert.True(t, completed, "should complete normally")
117-
assert.GreaterOrEqual(t, elapsed, 10*time.Millisecond)
118-
})
119-
120-
t.Run("interrupted by context", func(t *testing.T) {
121-
t.Parallel()
122-
ctx, cancel := context.WithCancel(t.Context())
123-
time.AfterFunc(10*time.Millisecond, cancel)
124-
125-
start := time.Now()
126-
completed := SleepWithContext(ctx, 1*time.Second)
127-
elapsed := time.Since(start)
128-
129-
assert.False(t, completed, "should be interrupted")
130-
assert.Less(t, elapsed, 100*time.Millisecond, "should return quickly after cancel")
131-
})
132-
}
133-
13474
func TestExtractHTTPStatusCode(t *testing.T) {
13575
t.Parallel()
13676

pkg/runtime/fallback.go

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"time"
99

1010
"github.com/docker/docker-agent/pkg/agent"
11+
"github.com/docker/docker-agent/pkg/backoff"
1112
"github.com/docker/docker-agent/pkg/chat"
1213
"github.com/docker/docker-agent/pkg/model/provider"
1314
"github.com/docker/docker-agent/pkg/modelerrors"
@@ -69,12 +70,12 @@ func logFallbackAttempt(agentName string, model modelWithFallback, attempt, maxR
6970
}
7071

7172
// logRetryBackoff logs when we're backing off before a retry
72-
func logRetryBackoff(agentName, modelID string, attempt int, backoff time.Duration) {
73+
func logRetryBackoff(agentName, modelID string, attempt int, backoffDelay time.Duration) {
7374
slog.Debug("Backing off before retry",
7475
"agent", agentName,
7576
"model", modelID,
7677
"attempt", attempt+1,
77-
"backoff", backoff)
78+
"backoff", backoffDelay)
7879
}
7980

8081
// getCooldownState returns the current cooldown state for an agent (thread-safe).
@@ -222,9 +223,9 @@ func (r *LocalRuntime) tryModelWithFallback(
222223

223224
// Apply backoff before retry (not on first attempt of each model)
224225
if attempt > 0 {
225-
backoff := modelerrors.CalculateBackoff(attempt - 1)
226-
logRetryBackoff(a.Name(), modelEntry.provider.ID(), attempt, backoff)
227-
if !modelerrors.SleepWithContext(ctx, backoff) {
226+
backoffDelay := backoff.Calculate(attempt - 1)
227+
logRetryBackoff(a.Name(), modelEntry.provider.ID(), attempt, backoffDelay)
228+
if !backoff.SleepWithContext(ctx, backoffDelay) {
228229
return streamResult{}, nil, ctx.Err()
229230
}
230231
}
@@ -382,14 +383,14 @@ func (r *LocalRuntime) handleModelError(
382383
// Opt-in enabled, no fallbacks → retry same model after honouring Retry-After (or backoff).
383384
waitDuration := retryAfter
384385
if waitDuration <= 0 {
385-
waitDuration = modelerrors.CalculateBackoff(attempt)
386-
} else if waitDuration > modelerrors.MaxRetryAfterWait {
386+
waitDuration = backoff.Calculate(attempt)
387+
} else if waitDuration > backoff.MaxRetryAfterWait {
387388
slog.Warn("Retry-After exceeds maximum, capping",
388389
"agent", a.Name(),
389390
"model", modelEntry.provider.ID(),
390391
"retry_after", retryAfter,
391-
"max", modelerrors.MaxRetryAfterWait)
392-
waitDuration = modelerrors.MaxRetryAfterWait
392+
"max", backoff.MaxRetryAfterWait)
393+
waitDuration = backoff.MaxRetryAfterWait
393394
}
394395
slog.Warn("Rate limited, retrying (opt-in enabled)",
395396
"agent", a.Name(),
@@ -398,7 +399,7 @@ func (r *LocalRuntime) handleModelError(
398399
"wait", waitDuration,
399400
"retry_after_from_header", retryAfter > 0,
400401
"error", err)
401-
if !modelerrors.SleepWithContext(ctx, waitDuration) {
402+
if !backoff.SleepWithContext(ctx, waitDuration) {
402403
return retryDecisionReturn
403404
}
404405
return retryDecisionContinue

0 commit comments

Comments
 (0)