44 "testing"
55
66 "github.com/docker/docker-agent/pkg/tools"
7+ "github.com/docker/docker-agent/pkg/tools/builtin"
8+ bgagent "github.com/docker/docker-agent/pkg/tools/builtin/agent"
79)
810
911func TestToolLoopDetector (t * testing.T ) {
@@ -21,10 +23,12 @@ func TestToolLoopDetector(t *testing.T) {
2123 }
2224
2325 tests := []struct {
24- name string
25- threshold int
26- batches [][]tools.ToolCall
27- wantTrip bool // whether any record call returns true
26+ name string
27+ threshold int
28+ exemptTools []string
29+ batches [][]tools.ToolCall
30+ wantTrip bool // whether any record call returns true
31+ wantCount int
2832 }{
2933 {
3034 name : "no loop with varied calls" ,
@@ -34,7 +38,8 @@ func TestToolLoopDetector(t *testing.T) {
3438 makeCalls ("read_file" , `{"path":"b.txt"}` ),
3539 makeCalls ("write_file" , `{"path":"c.txt"}` ),
3640 },
37- wantTrip : false ,
41+ wantTrip : false ,
42+ wantCount : 1 ,
3843 },
3944 {
4045 name : "loop detected at exact threshold" ,
@@ -44,7 +49,8 @@ func TestToolLoopDetector(t *testing.T) {
4449 makeCalls ("read_file" , `{"path":"a.txt"}` ),
4550 makeCalls ("read_file" , `{"path":"a.txt"}` ),
4651 },
47- wantTrip : true ,
52+ wantTrip : true ,
53+ wantCount : 3 ,
4854 },
4955 {
5056 name : "counter resets when calls change" ,
@@ -55,7 +61,8 @@ func TestToolLoopDetector(t *testing.T) {
5561 makeCalls ("read_file" , `{"path":"b.txt"}` ), // reset
5662 makeCalls ("read_file" , `{"path":"b.txt"}` ),
5763 },
58- wantTrip : false ,
64+ wantTrip : false ,
65+ wantCount : 2 ,
5966 },
6067 {
6168 name : "empty calls never trigger" ,
@@ -65,7 +72,8 @@ func TestToolLoopDetector(t *testing.T) {
6572 {},
6673 {},
6774 },
68- wantTrip : false ,
75+ wantTrip : false ,
76+ wantCount : 0 ,
6977 },
7078 {
7179 name : "multi-tool batches compared correctly" ,
@@ -74,7 +82,8 @@ func TestToolLoopDetector(t *testing.T) {
7482 makeCalls ("read_file" , `{"path":"a"}` , "write_file" , `{"path":"b"}` ),
7583 makeCalls ("read_file" , `{"path":"a"}` , "write_file" , `{"path":"b"}` ),
7684 },
77- wantTrip : true ,
85+ wantTrip : true ,
86+ wantCount : 2 ,
7887 },
7988 {
8089 name : "multi-tool batches differ by one argument" ,
@@ -83,7 +92,8 @@ func TestToolLoopDetector(t *testing.T) {
8392 makeCalls ("read_file" , `{"path":"a"}` , "write_file" , `{"path":"b"}` ),
8493 makeCalls ("read_file" , `{"path":"a"}` , "write_file" , `{"path":"c"}` ),
8594 },
86- wantTrip : false ,
95+ wantTrip : false ,
96+ wantCount : 1 ,
8797 },
8898 {
8999 name : "reordered JSON keys are treated as identical" ,
@@ -92,7 +102,8 @@ func TestToolLoopDetector(t *testing.T) {
92102 makeCalls ("run" , `{"cmd":"ls","cwd":"/tmp"}` ),
93103 makeCalls ("run" , `{"cwd":"/tmp","cmd":"ls"}` ),
94104 },
95- wantTrip : true ,
105+ wantTrip : true ,
106+ wantCount : 2 ,
96107 },
97108 {
98109 name : "nested JSON key reordering is normalized" ,
@@ -101,13 +112,64 @@ func TestToolLoopDetector(t *testing.T) {
101112 makeCalls ("call" , `{"a":{"y":2,"x":1},"b":1}` ),
102113 makeCalls ("call" , `{"b":1,"a":{"x":1,"y":2}}` ),
103114 },
104- wantTrip : true ,
115+ wantTrip : true ,
116+ wantCount : 2 ,
117+ },
118+ {
119+ name : "exempt background agent polling does not count as a loop" ,
120+ threshold : 2 ,
121+ exemptTools : []string {bgagent .ToolNameViewBackgroundAgent },
122+ batches : [][]tools.ToolCall {
123+ makeCalls (bgagent .ToolNameViewBackgroundAgent , `{"task_id":"agent_task_123"}` ),
124+ makeCalls (bgagent .ToolNameViewBackgroundAgent , `{"task_id":"agent_task_123"}` ),
125+ makeCalls (bgagent .ToolNameViewBackgroundAgent , `{"task_id":"agent_task_123"}` ),
126+ },
127+ wantTrip : false ,
128+ wantCount : 0 ,
129+ },
130+ {
131+ name : "mixed batch with exempt and non exempt tools still counts" ,
132+ threshold : 2 ,
133+ exemptTools : []string {bgagent .ToolNameViewBackgroundAgent , builtin .ToolNameViewBackgroundJob },
134+ batches : [][]tools.ToolCall {
135+ makeCalls (bgagent .ToolNameViewBackgroundAgent , `{"task_id":"agent_task_123"}` , "read_file" , `{"path":"a.txt"}` ),
136+ makeCalls (bgagent .ToolNameViewBackgroundAgent , `{"task_id":"agent_task_123"}` , "read_file" , `{"path":"a.txt"}` ),
137+ },
138+ wantTrip : true ,
139+ wantCount : 2 ,
140+ },
141+ {
142+ name : "exempt shell background job polling does not count as a loop" ,
143+ threshold : 2 ,
144+ exemptTools : []string {builtin .ToolNameViewBackgroundJob },
145+ batches : [][]tools.ToolCall {
146+ makeCalls (builtin .ToolNameViewBackgroundJob , `{"job_id":"job_1"}` ),
147+ makeCalls (builtin .ToolNameViewBackgroundJob , `{"job_id":"job_1"}` ),
148+ },
149+ wantTrip : false ,
150+ wantCount : 0 ,
151+ },
152+ {
153+ // A looping model cannot evade detection by interleaving a single
154+ // polling call between identical non-exempt calls. Exempt calls are
155+ // completely invisible to the detector and do NOT reset the counter.
156+ name : "interleaved polling does not evade loop detection" ,
157+ threshold : 3 ,
158+ exemptTools : []string {bgagent .ToolNameViewBackgroundAgent },
159+ batches : [][]tools.ToolCall {
160+ makeCalls ("read_file" , `{"path":"a.txt"}` ),
161+ makeCalls ("read_file" , `{"path":"a.txt"}` ),
162+ makeCalls (bgagent .ToolNameViewBackgroundAgent , `{"task_id":"t1"}` ), // exempt — counter stays at 2
163+ makeCalls ("read_file" , `{"path":"a.txt"}` ), // consecutive=3 → trips
164+ },
165+ wantTrip : true ,
166+ wantCount : 3 ,
105167 },
106168 }
107169
108170 for _ , tt := range tests {
109171 t .Run (tt .name , func (t * testing.T ) {
110- d := newToolLoopDetector (tt .threshold )
172+ d := newToolLoopDetector (tt .threshold , tt . exemptTools ... )
111173 var tripped bool
112174 for _ , batch := range tt .batches {
113175 if d .record (batch ) {
@@ -117,6 +179,9 @@ func TestToolLoopDetector(t *testing.T) {
117179 if tripped != tt .wantTrip {
118180 t .Errorf ("tripped = %v, want %v" , tripped , tt .wantTrip )
119181 }
182+ if d .consecutive != tt .wantCount {
183+ t .Errorf ("consecutive = %d, want %d" , d .consecutive , tt .wantCount )
184+ }
120185 })
121186 }
122187}
0 commit comments