Merge pull request #2319 from krissetto/dont-kill-agent

dgageot · web-flow · commit dc8062bdaed0 · 2026-04-04T19:02:11.000+02:00
Exempt background-agent polling from loop-termination detection
diff --git a/pkg/a2a/adapter.go b/pkg/a2a/adapter.go
@@ -5,6 +5,7 @@ import (
 	"fmt"
 	"iter"
 	"log/slog"
+	"strings"
 
 	"google.golang.org/adk/agent"
 	"google.golang.org/adk/model"
@@ -64,9 +65,10 @@ func runDockerAgent(ctx agent.InvocationContext, t *team.Team, agentName string,
 		eventsChan := rt.RunStream(ctx, sess)
 
 		// Track accumulated content for chunked responses
-		var contentBuilder string
+		var contentBuilder strings.Builder
 
 		// Convert docker agent events to ADK events and yield them
+
 		for event := range eventsChan {
 			if ctx.Ended() {
 				slog.Debug("Invocation ended, stopping agent", "agent", agentName)
@@ -76,7 +78,7 @@ func runDockerAgent(ctx agent.InvocationContext, t *team.Team, agentName string,
 			switch e := event.(type) {
 			case *runtime.AgentChoiceEvent:
 				// Accumulate content chunks
-				contentBuilder += e.Content
+				contentBuilder.WriteString(e.Content)
 
 				// Create a partial response event
 				adkEvent := &adksession.Event{
@@ -94,16 +96,18 @@ func runDockerAgent(ctx agent.InvocationContext, t *team.Team, agentName string,
 
 			case *runtime.ErrorEvent:
 				// Yield error and stop
+
 				yield(nil, fmt.Errorf("%s", e.Error))
 				return
 
 			case *runtime.StreamStoppedEvent:
 				// Send final complete event with all accumulated content
-				if contentBuilder != "" {
+
+				if contentBuilder.Len() > 0 {
 					finalEvent := &adksession.Event{
 						Author: agentName,
 						LLMResponse: model.LLMResponse{
-							Content:      genai.NewContentFromParts([]*genai.Part{{Text: contentBuilder}}, genai.RoleModel),
+							Content:      genai.NewContentFromParts([]*genai.Part{{Text: contentBuilder.String()}}, genai.RoleModel),
 							Partial:      false,
 							TurnComplete: true,
 							FinishReason: genai.FinishReasonStop,
diff --git a/pkg/runtime/loop.go b/pkg/runtime/loop.go
@@ -22,6 +22,7 @@ import (
 	"github.com/docker/docker-agent/pkg/telemetry"
 	"github.com/docker/docker-agent/pkg/tools"
 	"github.com/docker/docker-agent/pkg/tools/builtin"
+	bgagent "github.com/docker/docker-agent/pkg/tools/builtin/agent"
 )
 
 // registerDefaultTools wires up the built-in tool handlers (delegation,
@@ -123,11 +124,19 @@ func (r *LocalRuntime) RunStream(ctx context.Context, sess *session.Session) <-c
 		runtimeMaxIterations := sess.MaxIterations
 
 		// Initialize consecutive duplicate tool call detector
+		//
+		// Polling tools (view_background_agent, view_background_job) are
+		// expected to be called repeatedly with identical arguments while a
+		// background task is in progress. Exempt them so they never trigger
+		// the loop-termination path.
 		loopThreshold := sess.MaxConsecutiveToolCalls
 		if loopThreshold == 0 {
 			loopThreshold = 5 // default: always active
 		}
-		loopDetector := newToolLoopDetector(loopThreshold)
+		loopDetector := newToolLoopDetector(loopThreshold,
+			bgagent.ToolNameViewBackgroundAgent,
+			builtin.ToolNameViewBackgroundJob,
+		)
 
 		// overflowCompactions counts how many consecutive context-overflow
 		// auto-compactions have been attempted without a successful model
diff --git a/pkg/runtime/tool_loop_detector.go b/pkg/runtime/tool_loop_detector.go
@@ -16,12 +16,20 @@ type toolLoopDetector struct {
 	lastSignature string
 	consecutive   int
 	threshold     int
+	exemptTools   map[string]struct{}
 }
 
 // newToolLoopDetector creates a detector that triggers after threshold
-// consecutive identical call batches.
-func newToolLoopDetector(threshold int) *toolLoopDetector {
-	return &toolLoopDetector{threshold: threshold}
+// consecutive identical call batches. Tool names passed in exemptTools
+// are polling-safe: batches composed entirely of exempt tools (e.g.
+// view_background_agent, view_background_job) never count toward the
+// consecutive-duplicate limit.
+func newToolLoopDetector(threshold int, exemptTools ...string) *toolLoopDetector {
+	exempt := make(map[string]struct{}, len(exemptTools))
+	for _, name := range exemptTools {
+		exempt[name] = struct{}{}
+	}
+	return &toolLoopDetector{threshold: threshold, exemptTools: exempt}
 }
 
 // reset clears the detector state so it can be reused after recovery.
@@ -32,11 +40,22 @@ func (d *toolLoopDetector) reset() {
 
 // record updates the detector with the latest tool call batch and returns
 // true if the consecutive-duplicate threshold has been reached.
+// Batches composed entirely of exempt (polling) tools are silently
+// skipped so that expected polling patterns are not flagged.
 func (d *toolLoopDetector) record(calls []tools.ToolCall) bool {
 	if len(calls) == 0 {
 		return false
 	}
 
+	// Polling tools are expected to be called repeatedly with identical
+	// arguments while waiting for a background task to finish. Exempt batches
+	// are completely invisible to the detector: they neither increment the
+	// consecutive counter nor reset it, so a looping model cannot evade
+	// detection by interleaving a single polling call.
+	if d.isExemptBatch(calls) {
+		return false
+	}
+
 	sig := callSignature(calls)
 	if sig == d.lastSignature {
 		d.consecutive++
@@ -48,6 +67,20 @@ func (d *toolLoopDetector) record(calls []tools.ToolCall) bool {
 	return d.consecutive >= d.threshold
 }
 
+// isExemptBatch returns true when every call in the batch targets a
+// polling-exempt tool.
+func (d *toolLoopDetector) isExemptBatch(calls []tools.ToolCall) bool {
+	if len(d.exemptTools) == 0 {
+		return false
+	}
+	for _, c := range calls {
+		if _, ok := d.exemptTools[c.Function.Name]; !ok {
+			return false
+		}
+	}
+	return true
+}
+
 // callSignature builds a composite key from the name and arguments of every
 // tool call in the batch. Arguments are canonicalized (sorted keys) so that
 // semantically identical JSON with different key ordering produces the same
diff --git a/pkg/runtime/tool_loop_detector_test.go b/pkg/runtime/tool_loop_detector_test.go
@@ -4,6 +4,8 @@ import (
 	"testing"
 
 	"github.com/docker/docker-agent/pkg/tools"
+	"github.com/docker/docker-agent/pkg/tools/builtin"
+	bgagent "github.com/docker/docker-agent/pkg/tools/builtin/agent"
 )
 
 func TestToolLoopDetector(t *testing.T) {
@@ -21,10 +23,12 @@ func TestToolLoopDetector(t *testing.T) {
 	}
 
 	tests := []struct {
-		name      string
-		threshold int
-		batches   [][]tools.ToolCall
-		wantTrip  bool // whether any record call returns true
+		name        string
+		threshold   int
+		exemptTools []string
+		batches     [][]tools.ToolCall
+		wantTrip    bool // whether any record call returns true
+		wantCount   int
 	}{
 		{
 			name:      "no loop with varied calls",
@@ -34,7 +38,8 @@ func TestToolLoopDetector(t *testing.T) {
 				makeCalls("read_file", `{"path":"b.txt"}`),
 				makeCalls("write_file", `{"path":"c.txt"}`),
 			},
-			wantTrip: false,
+			wantTrip:  false,
+			wantCount: 1,
 		},
 		{
 			name:      "loop detected at exact threshold",
@@ -44,7 +49,8 @@ func TestToolLoopDetector(t *testing.T) {
 				makeCalls("read_file", `{"path":"a.txt"}`),
 				makeCalls("read_file", `{"path":"a.txt"}`),
 			},
-			wantTrip: true,
+			wantTrip:  true,
+			wantCount: 3,
 		},
 		{
 			name:      "counter resets when calls change",
@@ -55,7 +61,8 @@ func TestToolLoopDetector(t *testing.T) {
 				makeCalls("read_file", `{"path":"b.txt"}`), // reset
 				makeCalls("read_file", `{"path":"b.txt"}`),
 			},
-			wantTrip: false,
+			wantTrip:  false,
+			wantCount: 2,
 		},
 		{
 			name:      "empty calls never trigger",
@@ -65,7 +72,8 @@ func TestToolLoopDetector(t *testing.T) {
 				{},
 				{},
 			},
-			wantTrip: false,
+			wantTrip:  false,
+			wantCount: 0,
 		},
 		{
 			name:      "multi-tool batches compared correctly",
@@ -74,7 +82,8 @@ func TestToolLoopDetector(t *testing.T) {
 				makeCalls("read_file", `{"path":"a"}`, "write_file", `{"path":"b"}`),
 				makeCalls("read_file", `{"path":"a"}`, "write_file", `{"path":"b"}`),
 			},
-			wantTrip: true,
+			wantTrip:  true,
+			wantCount: 2,
 		},
 		{
 			name:      "multi-tool batches differ by one argument",
@@ -83,7 +92,8 @@ func TestToolLoopDetector(t *testing.T) {
 				makeCalls("read_file", `{"path":"a"}`, "write_file", `{"path":"b"}`),
 				makeCalls("read_file", `{"path":"a"}`, "write_file", `{"path":"c"}`),
 			},
-			wantTrip: false,
+			wantTrip:  false,
+			wantCount: 1,
 		},
 		{
 			name:      "reordered JSON keys are treated as identical",
@@ -92,7 +102,8 @@ func TestToolLoopDetector(t *testing.T) {
 				makeCalls("run", `{"cmd":"ls","cwd":"/tmp"}`),
 				makeCalls("run", `{"cwd":"/tmp","cmd":"ls"}`),
 			},
-			wantTrip: true,
+			wantTrip:  true,
+			wantCount: 2,
 		},
 		{
 			name:      "nested JSON key reordering is normalized",
@@ -101,13 +112,64 @@ func TestToolLoopDetector(t *testing.T) {
 				makeCalls("call", `{"a":{"y":2,"x":1},"b":1}`),
 				makeCalls("call", `{"b":1,"a":{"x":1,"y":2}}`),
 			},
-			wantTrip: true,
+			wantTrip:  true,
+			wantCount: 2,
+		},
+		{
+			name:        "exempt background agent polling does not count as a loop",
+			threshold:   2,
+			exemptTools: []string{bgagent.ToolNameViewBackgroundAgent},
+			batches: [][]tools.ToolCall{
+				makeCalls(bgagent.ToolNameViewBackgroundAgent, `{"task_id":"agent_task_123"}`),
+				makeCalls(bgagent.ToolNameViewBackgroundAgent, `{"task_id":"agent_task_123"}`),
+				makeCalls(bgagent.ToolNameViewBackgroundAgent, `{"task_id":"agent_task_123"}`),
+			},
+			wantTrip:  false,
+			wantCount: 0,
+		},
+		{
+			name:        "mixed batch with exempt and non exempt tools still counts",
+			threshold:   2,
+			exemptTools: []string{bgagent.ToolNameViewBackgroundAgent, builtin.ToolNameViewBackgroundJob},
+			batches: [][]tools.ToolCall{
+				makeCalls(bgagent.ToolNameViewBackgroundAgent, `{"task_id":"agent_task_123"}`, "read_file", `{"path":"a.txt"}`),
+				makeCalls(bgagent.ToolNameViewBackgroundAgent, `{"task_id":"agent_task_123"}`, "read_file", `{"path":"a.txt"}`),
+			},
+			wantTrip:  true,
+			wantCount: 2,
+		},
+		{
+			name:        "exempt shell background job polling does not count as a loop",
+			threshold:   2,
+			exemptTools: []string{builtin.ToolNameViewBackgroundJob},
+			batches: [][]tools.ToolCall{
+				makeCalls(builtin.ToolNameViewBackgroundJob, `{"job_id":"job_1"}`),
+				makeCalls(builtin.ToolNameViewBackgroundJob, `{"job_id":"job_1"}`),
+			},
+			wantTrip:  false,
+			wantCount: 0,
+		},
+		{
+			// A looping model cannot evade detection by interleaving a single
+			// polling call between identical non-exempt calls. Exempt calls are
+			// completely invisible to the detector and do NOT reset the counter.
+			name:        "interleaved polling does not evade loop detection",
+			threshold:   3,
+			exemptTools: []string{bgagent.ToolNameViewBackgroundAgent},
+			batches: [][]tools.ToolCall{
+				makeCalls("read_file", `{"path":"a.txt"}`),
+				makeCalls("read_file", `{"path":"a.txt"}`),
+				makeCalls(bgagent.ToolNameViewBackgroundAgent, `{"task_id":"t1"}`), // exempt — counter stays at 2
+				makeCalls("read_file", `{"path":"a.txt"}`),                         // consecutive=3 → trips
+			},
+			wantTrip:  true,
+			wantCount: 3,
 		},
 	}
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			d := newToolLoopDetector(tt.threshold)
+			d := newToolLoopDetector(tt.threshold, tt.exemptTools...)
 			var tripped bool
 			for _, batch := range tt.batches {
 				if d.record(batch) {
@@ -117,6 +179,9 @@ func TestToolLoopDetector(t *testing.T) {
 			if tripped != tt.wantTrip {
 				t.Errorf("tripped = %v, want %v", tripped, tt.wantTrip)
 			}
+			if d.consecutive != tt.wantCount {
+				t.Errorf("consecutive = %d, want %d", d.consecutive, tt.wantCount)
+			}
 		})
 	}
 }
diff --git a/pkg/tools/builtin/agent/agent.go b/pkg/tools/builtin/agent/agent.go
@@ -112,6 +112,12 @@ type task struct {
 	status      atomic.Int32
 	result      string
 	errMsg      string
+
+	// viewCount tracks how many consecutive HandleView calls observed no
+	// new output. It is reset whenever the buffered output grows.
+	// Protected by outputMu.
+	viewCount             int
+	lastViewedOutputBytes int
 }
 
 func (t *task) loadStatus() taskStatus {
@@ -331,10 +337,21 @@ func (h *Handler) HandleView(_ context.Context, _ *session.Session, toolCall too
 	case taskStopped:
 		out.WriteString("<task was stopped>")
 	default:
-		t.outputMu.RLock()
+		t.outputMu.Lock()
 		progress := t.output.String()
 		truncated := t.outputBytes >= maxOutputBytes
-		t.outputMu.RUnlock()
+		currentBytes := t.outputBytes
+
+		// Track whether output has changed since the last view.
+		if currentBytes == t.lastViewedOutputBytes {
+			t.viewCount++
+		} else {
+			t.viewCount = 1
+			t.lastViewedOutputBytes = currentBytes
+		}
+		viewCount := t.viewCount
+		t.outputMu.Unlock()
+
 		if progress != "" {
 			out.WriteString(progress)
 			if truncated {
@@ -345,6 +362,9 @@ func (h *Handler) HandleView(_ context.Context, _ *session.Session, toolCall too
 		} else {
 			out.WriteString("<no output yet — still running>")
 		}
+		if viewCount > 1 {
+			fmt.Fprintf(&out, "\n\n[No new output since last check — poll #%d]", viewCount)
+		}
 	}
 
 	return tools.ResultSuccess(out.String()), nil
diff --git a/pkg/tools/builtin/agent/agent_test.go b/pkg/tools/builtin/agent/agent_test.go