Skip to content

Commit d4e9537

Browse files
committed
fix: eval tool_call_response uses correct event field names
SessionFromEvents and buildTranscript were looking for a non-existent "tool_call" nested map in tool_call_response events. The actual ToolCallResponseEvent serializes with "tool_call_id" (top-level string) and "tool_definition" (top-level object), so the type assertions always failed silently — tool call results were never written to sessions and transcript tool names were empty. Fix both functions to use the correct field names and update tests to match the real ToolCallResponseEvent JSON format. Assisted-By: docker-agent
1 parent 4e0861d commit d4e9537

4 files changed

Lines changed: 32 additions & 36 deletions

File tree

pkg/evaluation/eval.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -575,7 +575,10 @@ func buildTranscript(events []map[string]any) string {
575575
fmt.Fprintf(&transcript, "[Agent %s calls tool %q with arguments: %s]\n\n", cmp.Or(currentAgent, "unknown"), name, args)
576576

577577
case "tool_call_response":
578-
name, _ := getToolCallInfo(event)
578+
// The ToolCallResponseEvent has tool_definition at the top level, not
579+
// nested under "tool_call".
580+
td, _ := event["tool_definition"].(map[string]any)
581+
name, _ := td["name"].(string)
579582
response, _ := event["response"].(string)
580583
if len(response) > 500 {
581584
response = response[:500] + "...(truncated)"

pkg/evaluation/eval_test.go

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -796,12 +796,11 @@ func TestBuildTranscript(t *testing.T) {
796796
},
797797
},
798798
{
799-
"type": "tool_call_response",
800-
"response": "file contents here",
801-
"tool_call": map[string]any{
802-
"function": map[string]any{
803-
"name": "read_file",
804-
},
799+
"type": "tool_call_response",
800+
"response": "file contents here",
801+
"tool_call_id": "call_123",
802+
"tool_definition": map[string]any{
803+
"name": "read_file",
805804
},
806805
},
807806
},
@@ -814,12 +813,11 @@ func TestBuildTranscript(t *testing.T) {
814813
name: "long tool response truncated",
815814
events: []map[string]any{
816815
{
817-
"type": "tool_call_response",
818-
"response": strings.Repeat("x", 600),
819-
"tool_call": map[string]any{
820-
"function": map[string]any{
821-
"name": "shell",
822-
},
816+
"type": "tool_call_response",
817+
"response": strings.Repeat("x", 600),
818+
"tool_call_id": "call_789",
819+
"tool_definition": map[string]any{
820+
"name": "shell",
823821
},
824822
},
825823
},

pkg/evaluation/save.go

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -192,21 +192,20 @@ func SessionFromEvents(events []map[string]any, title string, questions []string
192192
// Flush any pending assistant message before adding tool response
193193
flushAssistantMessage()
194194

195-
// Add tool response message
196-
if tc, ok := event["tool_call"].(map[string]any); ok {
197-
toolCallID, _ := tc["id"].(string)
198-
response, _ := event["response"].(string)
195+
// The ToolCallResponseEvent serializes tool_call_id as a top-level string field,
196+
// not nested under a "tool_call" map.
197+
toolCallID, _ := event["tool_call_id"].(string)
198+
response, _ := event["response"].(string)
199199

200-
msg := &session.Message{
201-
Message: chat.Message{
202-
Role: chat.MessageRoleTool,
203-
Content: response,
204-
ToolCallID: toolCallID,
205-
CreatedAt: eventTimestamp,
206-
},
207-
}
208-
sess.AddMessage(msg)
200+
msg := &session.Message{
201+
Message: chat.Message{
202+
Role: chat.MessageRoleTool,
203+
Content: response,
204+
ToolCallID: toolCallID,
205+
CreatedAt: eventTimestamp,
206+
},
209207
}
208+
sess.AddMessage(msg)
210209

211210
case "token_usage":
212211
// Update session token usage

pkg/evaluation/save_test.go

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -302,11 +302,9 @@ func TestSessionFromEvents(t *testing.T) {
302302
},
303303
},
304304
{
305-
"type": "tool_call_response",
306-
"tool_call": map[string]any{
307-
"id": "call_123",
308-
},
309-
"response": "file content",
305+
"type": "tool_call_response",
306+
"tool_call_id": "call_123",
307+
"response": "file content",
310308
},
311309
{"type": "agent_choice", "content": "Done!"},
312310
{"type": "stream_stopped"},
@@ -452,11 +450,9 @@ func TestSessionFromEventsWithToolDefinitions(t *testing.T) {
452450
},
453451
},
454452
{
455-
"type": "tool_call_response",
456-
"tool_call": map[string]any{
457-
"id": "call_123",
458-
},
459-
"response": "file content",
453+
"type": "tool_call_response",
454+
"tool_call_id": "call_123",
455+
"response": "file content",
460456
},
461457
{"type": "stream_stopped"},
462458
}

0 commit comments

Comments
 (0)