Skip to content

Commit dc8062b

Browse files
authored
Merge pull request #2319 from krissetto/dont-kill-agent
Exempt background-agent polling from loop-termination detection
2 parents b5a564d + 02348bf commit dc8062b

6 files changed

Lines changed: 207 additions & 23 deletions

File tree

pkg/a2a/adapter.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"fmt"
66
"iter"
77
"log/slog"
8+
"strings"
89

910
"google.golang.org/adk/agent"
1011
"google.golang.org/adk/model"
@@ -64,9 +65,10 @@ func runDockerAgent(ctx agent.InvocationContext, t *team.Team, agentName string,
6465
eventsChan := rt.RunStream(ctx, sess)
6566

6667
// Track accumulated content for chunked responses
67-
var contentBuilder string
68+
var contentBuilder strings.Builder
6869

6970
// Convert docker agent events to ADK events and yield them
71+
7072
for event := range eventsChan {
7173
if ctx.Ended() {
7274
slog.Debug("Invocation ended, stopping agent", "agent", agentName)
@@ -76,7 +78,7 @@ func runDockerAgent(ctx agent.InvocationContext, t *team.Team, agentName string,
7678
switch e := event.(type) {
7779
case *runtime.AgentChoiceEvent:
7880
// Accumulate content chunks
79-
contentBuilder += e.Content
81+
contentBuilder.WriteString(e.Content)
8082

8183
// Create a partial response event
8284
adkEvent := &adksession.Event{
@@ -94,16 +96,18 @@ func runDockerAgent(ctx agent.InvocationContext, t *team.Team, agentName string,
9496

9597
case *runtime.ErrorEvent:
9698
// Yield error and stop
99+
97100
yield(nil, fmt.Errorf("%s", e.Error))
98101
return
99102

100103
case *runtime.StreamStoppedEvent:
101104
// Send final complete event with all accumulated content
102-
if contentBuilder != "" {
105+
106+
if contentBuilder.Len() > 0 {
103107
finalEvent := &adksession.Event{
104108
Author: agentName,
105109
LLMResponse: model.LLMResponse{
106-
Content: genai.NewContentFromParts([]*genai.Part{{Text: contentBuilder}}, genai.RoleModel),
110+
Content: genai.NewContentFromParts([]*genai.Part{{Text: contentBuilder.String()}}, genai.RoleModel),
107111
Partial: false,
108112
TurnComplete: true,
109113
FinishReason: genai.FinishReasonStop,

pkg/runtime/loop.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"github.com/docker/docker-agent/pkg/telemetry"
2323
"github.com/docker/docker-agent/pkg/tools"
2424
"github.com/docker/docker-agent/pkg/tools/builtin"
25+
bgagent "github.com/docker/docker-agent/pkg/tools/builtin/agent"
2526
)
2627

2728
// registerDefaultTools wires up the built-in tool handlers (delegation,
@@ -123,11 +124,19 @@ func (r *LocalRuntime) RunStream(ctx context.Context, sess *session.Session) <-c
123124
runtimeMaxIterations := sess.MaxIterations
124125

125126
// Initialize consecutive duplicate tool call detector
127+
//
128+
// Polling tools (view_background_agent, view_background_job) are
129+
// expected to be called repeatedly with identical arguments while a
130+
// background task is in progress. Exempt them so they never trigger
131+
// the loop-termination path.
126132
loopThreshold := sess.MaxConsecutiveToolCalls
127133
if loopThreshold == 0 {
128134
loopThreshold = 5 // default: always active
129135
}
130-
loopDetector := newToolLoopDetector(loopThreshold)
136+
loopDetector := newToolLoopDetector(loopThreshold,
137+
bgagent.ToolNameViewBackgroundAgent,
138+
builtin.ToolNameViewBackgroundJob,
139+
)
131140

132141
// overflowCompactions counts how many consecutive context-overflow
133142
// auto-compactions have been attempted without a successful model

pkg/runtime/tool_loop_detector.go

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,20 @@ type toolLoopDetector struct {
1616
lastSignature string
1717
consecutive int
1818
threshold int
19+
exemptTools map[string]struct{}
1920
}
2021

2122
// newToolLoopDetector creates a detector that triggers after threshold
22-
// consecutive identical call batches.
23-
func newToolLoopDetector(threshold int) *toolLoopDetector {
24-
return &toolLoopDetector{threshold: threshold}
23+
// consecutive identical call batches. Tool names passed in exemptTools
24+
// are polling-safe: batches composed entirely of exempt tools (e.g.
25+
// view_background_agent, view_background_job) never count toward the
26+
// consecutive-duplicate limit.
27+
func newToolLoopDetector(threshold int, exemptTools ...string) *toolLoopDetector {
28+
exempt := make(map[string]struct{}, len(exemptTools))
29+
for _, name := range exemptTools {
30+
exempt[name] = struct{}{}
31+
}
32+
return &toolLoopDetector{threshold: threshold, exemptTools: exempt}
2533
}
2634

2735
// reset clears the detector state so it can be reused after recovery.
@@ -32,11 +40,22 @@ func (d *toolLoopDetector) reset() {
3240

3341
// record updates the detector with the latest tool call batch and returns
3442
// true if the consecutive-duplicate threshold has been reached.
43+
// Batches composed entirely of exempt (polling) tools are silently
44+
// skipped so that expected polling patterns are not flagged.
3545
func (d *toolLoopDetector) record(calls []tools.ToolCall) bool {
3646
if len(calls) == 0 {
3747
return false
3848
}
3949

50+
// Polling tools are expected to be called repeatedly with identical
51+
// arguments while waiting for a background task to finish. Exempt batches
52+
// are completely invisible to the detector: they neither increment the
53+
// consecutive counter nor reset it, so a looping model cannot evade
54+
// detection by interleaving a single polling call.
55+
if d.isExemptBatch(calls) {
56+
return false
57+
}
58+
4059
sig := callSignature(calls)
4160
if sig == d.lastSignature {
4261
d.consecutive++
@@ -48,6 +67,20 @@ func (d *toolLoopDetector) record(calls []tools.ToolCall) bool {
4867
return d.consecutive >= d.threshold
4968
}
5069

70+
// isExemptBatch returns true when every call in the batch targets a
71+
// polling-exempt tool.
72+
func (d *toolLoopDetector) isExemptBatch(calls []tools.ToolCall) bool {
73+
if len(d.exemptTools) == 0 {
74+
return false
75+
}
76+
for _, c := range calls {
77+
if _, ok := d.exemptTools[c.Function.Name]; !ok {
78+
return false
79+
}
80+
}
81+
return true
82+
}
83+
5184
// callSignature builds a composite key from the name and arguments of every
5285
// tool call in the batch. Arguments are canonicalized (sorted keys) so that
5386
// semantically identical JSON with different key ordering produces the same

pkg/runtime/tool_loop_detector_test.go

Lines changed: 78 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import (
44
"testing"
55

66
"github.com/docker/docker-agent/pkg/tools"
7+
"github.com/docker/docker-agent/pkg/tools/builtin"
8+
bgagent "github.com/docker/docker-agent/pkg/tools/builtin/agent"
79
)
810

911
func TestToolLoopDetector(t *testing.T) {
@@ -21,10 +23,12 @@ func TestToolLoopDetector(t *testing.T) {
2123
}
2224

2325
tests := []struct {
24-
name string
25-
threshold int
26-
batches [][]tools.ToolCall
27-
wantTrip bool // whether any record call returns true
26+
name string
27+
threshold int
28+
exemptTools []string
29+
batches [][]tools.ToolCall
30+
wantTrip bool // whether any record call returns true
31+
wantCount int
2832
}{
2933
{
3034
name: "no loop with varied calls",
@@ -34,7 +38,8 @@ func TestToolLoopDetector(t *testing.T) {
3438
makeCalls("read_file", `{"path":"b.txt"}`),
3539
makeCalls("write_file", `{"path":"c.txt"}`),
3640
},
37-
wantTrip: false,
41+
wantTrip: false,
42+
wantCount: 1,
3843
},
3944
{
4045
name: "loop detected at exact threshold",
@@ -44,7 +49,8 @@ func TestToolLoopDetector(t *testing.T) {
4449
makeCalls("read_file", `{"path":"a.txt"}`),
4550
makeCalls("read_file", `{"path":"a.txt"}`),
4651
},
47-
wantTrip: true,
52+
wantTrip: true,
53+
wantCount: 3,
4854
},
4955
{
5056
name: "counter resets when calls change",
@@ -55,7 +61,8 @@ func TestToolLoopDetector(t *testing.T) {
5561
makeCalls("read_file", `{"path":"b.txt"}`), // reset
5662
makeCalls("read_file", `{"path":"b.txt"}`),
5763
},
58-
wantTrip: false,
64+
wantTrip: false,
65+
wantCount: 2,
5966
},
6067
{
6168
name: "empty calls never trigger",
@@ -65,7 +72,8 @@ func TestToolLoopDetector(t *testing.T) {
6572
{},
6673
{},
6774
},
68-
wantTrip: false,
75+
wantTrip: false,
76+
wantCount: 0,
6977
},
7078
{
7179
name: "multi-tool batches compared correctly",
@@ -74,7 +82,8 @@ func TestToolLoopDetector(t *testing.T) {
7482
makeCalls("read_file", `{"path":"a"}`, "write_file", `{"path":"b"}`),
7583
makeCalls("read_file", `{"path":"a"}`, "write_file", `{"path":"b"}`),
7684
},
77-
wantTrip: true,
85+
wantTrip: true,
86+
wantCount: 2,
7887
},
7988
{
8089
name: "multi-tool batches differ by one argument",
@@ -83,7 +92,8 @@ func TestToolLoopDetector(t *testing.T) {
8392
makeCalls("read_file", `{"path":"a"}`, "write_file", `{"path":"b"}`),
8493
makeCalls("read_file", `{"path":"a"}`, "write_file", `{"path":"c"}`),
8594
},
86-
wantTrip: false,
95+
wantTrip: false,
96+
wantCount: 1,
8797
},
8898
{
8999
name: "reordered JSON keys are treated as identical",
@@ -92,7 +102,8 @@ func TestToolLoopDetector(t *testing.T) {
92102
makeCalls("run", `{"cmd":"ls","cwd":"/tmp"}`),
93103
makeCalls("run", `{"cwd":"/tmp","cmd":"ls"}`),
94104
},
95-
wantTrip: true,
105+
wantTrip: true,
106+
wantCount: 2,
96107
},
97108
{
98109
name: "nested JSON key reordering is normalized",
@@ -101,13 +112,64 @@ func TestToolLoopDetector(t *testing.T) {
101112
makeCalls("call", `{"a":{"y":2,"x":1},"b":1}`),
102113
makeCalls("call", `{"b":1,"a":{"x":1,"y":2}}`),
103114
},
104-
wantTrip: true,
115+
wantTrip: true,
116+
wantCount: 2,
117+
},
118+
{
119+
name: "exempt background agent polling does not count as a loop",
120+
threshold: 2,
121+
exemptTools: []string{bgagent.ToolNameViewBackgroundAgent},
122+
batches: [][]tools.ToolCall{
123+
makeCalls(bgagent.ToolNameViewBackgroundAgent, `{"task_id":"agent_task_123"}`),
124+
makeCalls(bgagent.ToolNameViewBackgroundAgent, `{"task_id":"agent_task_123"}`),
125+
makeCalls(bgagent.ToolNameViewBackgroundAgent, `{"task_id":"agent_task_123"}`),
126+
},
127+
wantTrip: false,
128+
wantCount: 0,
129+
},
130+
{
131+
name: "mixed batch with exempt and non exempt tools still counts",
132+
threshold: 2,
133+
exemptTools: []string{bgagent.ToolNameViewBackgroundAgent, builtin.ToolNameViewBackgroundJob},
134+
batches: [][]tools.ToolCall{
135+
makeCalls(bgagent.ToolNameViewBackgroundAgent, `{"task_id":"agent_task_123"}`, "read_file", `{"path":"a.txt"}`),
136+
makeCalls(bgagent.ToolNameViewBackgroundAgent, `{"task_id":"agent_task_123"}`, "read_file", `{"path":"a.txt"}`),
137+
},
138+
wantTrip: true,
139+
wantCount: 2,
140+
},
141+
{
142+
name: "exempt shell background job polling does not count as a loop",
143+
threshold: 2,
144+
exemptTools: []string{builtin.ToolNameViewBackgroundJob},
145+
batches: [][]tools.ToolCall{
146+
makeCalls(builtin.ToolNameViewBackgroundJob, `{"job_id":"job_1"}`),
147+
makeCalls(builtin.ToolNameViewBackgroundJob, `{"job_id":"job_1"}`),
148+
},
149+
wantTrip: false,
150+
wantCount: 0,
151+
},
152+
{
153+
// A looping model cannot evade detection by interleaving a single
154+
// polling call between identical non-exempt calls. Exempt calls are
155+
// completely invisible to the detector and do NOT reset the counter.
156+
name: "interleaved polling does not evade loop detection",
157+
threshold: 3,
158+
exemptTools: []string{bgagent.ToolNameViewBackgroundAgent},
159+
batches: [][]tools.ToolCall{
160+
makeCalls("read_file", `{"path":"a.txt"}`),
161+
makeCalls("read_file", `{"path":"a.txt"}`),
162+
makeCalls(bgagent.ToolNameViewBackgroundAgent, `{"task_id":"t1"}`), // exempt — counter stays at 2
163+
makeCalls("read_file", `{"path":"a.txt"}`), // consecutive=3 → trips
164+
},
165+
wantTrip: true,
166+
wantCount: 3,
105167
},
106168
}
107169

108170
for _, tt := range tests {
109171
t.Run(tt.name, func(t *testing.T) {
110-
d := newToolLoopDetector(tt.threshold)
172+
d := newToolLoopDetector(tt.threshold, tt.exemptTools...)
111173
var tripped bool
112174
for _, batch := range tt.batches {
113175
if d.record(batch) {
@@ -117,6 +179,9 @@ func TestToolLoopDetector(t *testing.T) {
117179
if tripped != tt.wantTrip {
118180
t.Errorf("tripped = %v, want %v", tripped, tt.wantTrip)
119181
}
182+
if d.consecutive != tt.wantCount {
183+
t.Errorf("consecutive = %d, want %d", d.consecutive, tt.wantCount)
184+
}
120185
})
121186
}
122187
}

pkg/tools/builtin/agent/agent.go

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,12 @@ type task struct {
112112
status atomic.Int32
113113
result string
114114
errMsg string
115+
116+
// viewCount tracks how many consecutive HandleView calls observed no
117+
// new output. It is reset whenever the buffered output grows.
118+
// Protected by outputMu.
119+
viewCount int
120+
lastViewedOutputBytes int
115121
}
116122

117123
func (t *task) loadStatus() taskStatus {
@@ -331,10 +337,21 @@ func (h *Handler) HandleView(_ context.Context, _ *session.Session, toolCall too
331337
case taskStopped:
332338
out.WriteString("<task was stopped>")
333339
default:
334-
t.outputMu.RLock()
340+
t.outputMu.Lock()
335341
progress := t.output.String()
336342
truncated := t.outputBytes >= maxOutputBytes
337-
t.outputMu.RUnlock()
343+
currentBytes := t.outputBytes
344+
345+
// Track whether output has changed since the last view.
346+
if currentBytes == t.lastViewedOutputBytes {
347+
t.viewCount++
348+
} else {
349+
t.viewCount = 1
350+
t.lastViewedOutputBytes = currentBytes
351+
}
352+
viewCount := t.viewCount
353+
t.outputMu.Unlock()
354+
338355
if progress != "" {
339356
out.WriteString(progress)
340357
if truncated {
@@ -345,6 +362,9 @@ func (h *Handler) HandleView(_ context.Context, _ *session.Session, toolCall too
345362
} else {
346363
out.WriteString("<no output yet — still running>")
347364
}
365+
if viewCount > 1 {
366+
fmt.Fprintf(&out, "\n\n[No new output since last check — poll #%d]", viewCount)
367+
}
348368
}
349369

350370
return tools.ResultSuccess(out.String()), nil

0 commit comments

Comments
 (0)