Skip to content

Commit 25bea5c

Browse files
committed
Surface finish_reason on assistant messages and token usage events
Add FinishReason to chat.Message and MessageUsage so API consumers can distinguish the root agent's final response from intermediate tool-call turns during live streaming. - Propagate provider's explicit finish_reason through the streaming pipeline (stop/length via early return, tool_calls tracked and preserved after the stream loop) - Infer finish_reason when the provider sends a bare EOF: tool calls present → tool_calls, content present → stop, nothing → null - Validate finish_reason against actual stream output (tool_calls requires tool calls, stop is overridden when tool calls exist) - Reconstruct LastMessage on session restore so FinishReason is available for historical sessions (scoped to parent session only)
1 parent d6f7884 commit 25bea5c

6 files changed

Lines changed: 143 additions & 14 deletions

File tree

pkg/chat/chat.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,11 @@ type Message struct {
8989
// Cost is the cost of this message in dollars (only set for assistant messages)
9090
Cost float64 `json:"cost,omitempty"`
9191

92+
// FinishReason indicates why the model stopped generating for this message.
93+
// "stop" = natural end, "tool_calls" = tool invocation, "length" = token limit.
94+
// Only set for assistant messages.
95+
FinishReason FinishReason `json:"finish_reason,omitempty"`
96+
9297
// CacheControl indicates whether this message is a cached message (only used by anthropic)
9398
CacheControl bool `json:"cache_control,omitempty"`
9499
}

pkg/runtime/event.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -283,13 +283,14 @@ type Usage struct {
283283
}
284284

285285
// MessageUsage contains per-message usage data to include in TokenUsageEvent.
286-
// It embeds chat.Usage and adds Cost and Model fields.
286+
// It embeds chat.Usage and adds Cost, Model, and FinishReason fields.
287287
type MessageUsage struct {
288288
chat.Usage
289289
chat.RateLimit
290290

291-
Cost float64
292-
Model string
291+
Cost float64
292+
Model string
293+
FinishReason chat.FinishReason `json:"finish_reason,omitempty"`
293294
}
294295

295296
// NewTokenUsageEvent creates a TokenUsageEvent with the given usage data.

pkg/runtime/loop.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,7 @@ func (r *LocalRuntime) recordAssistantMessage(
439439
Usage: res.Usage,
440440
Model: messageModel,
441441
Cost: messageCost,
442+
FinishReason: res.FinishReason,
442443
}
443444

444445
addAgentMessage(sess, a, &assistantMessage, events)
@@ -449,9 +450,10 @@ func (r *LocalRuntime) recordAssistantMessage(
449450
return nil
450451
}
451452
msgUsage := &MessageUsage{
452-
Usage: *res.Usage,
453-
Cost: messageCost,
454-
Model: messageModel,
453+
Usage: *res.Usage,
454+
Cost: messageCost,
455+
Model: messageModel,
456+
FinishReason: res.FinishReason,
455457
}
456458
if res.RateLimit != nil {
457459
msgUsage.RateLimit = *res.RateLimit

pkg/runtime/runtime.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"go.opentelemetry.io/otel/trace"
1515

1616
"github.com/docker/docker-agent/pkg/agent"
17+
"github.com/docker/docker-agent/pkg/chat"
1718
"github.com/docker/docker-agent/pkg/config/types"
1819
"github.com/docker/docker-agent/pkg/hooks"
1920
"github.com/docker/docker-agent/pkg/modelsdev"
@@ -861,6 +862,32 @@ func (r *LocalRuntime) EmitStartupInfo(ctx context.Context, sess *session.Sessio
861862
}
862863
usage := SessionUsage(sess, contextLimit)
863864
usage.Cost = sess.TotalCost()
865+
866+
// Reconstruct LastMessage from the parent session's last assistant
867+
// message so that FinishReason (and other per-message fields) are
868+
// available on session restore. We intentionally iterate
869+
// sess.Messages (not GetAllMessages) so the result reflects the
870+
// parent agent's state: this event carries the parent session_id,
871+
// and sub-agents emit their own token_usage events with their own
872+
// session_id during live streaming.
873+
for i := len(sess.Messages) - 1; i >= 0; i-- {
874+
item := &sess.Messages[i]
875+
if !item.IsMessage() || item.Message.Message.Role != chat.MessageRoleAssistant {
876+
continue
877+
}
878+
msg := &item.Message.Message
879+
lm := &MessageUsage{
880+
Model: msg.Model,
881+
Cost: msg.Cost,
882+
FinishReason: msg.FinishReason,
883+
}
884+
if msg.Usage != nil {
885+
lm.Usage = *msg.Usage
886+
}
887+
usage.LastMessage = lm
888+
break
889+
}
890+
864891
send(NewTokenUsageEvent(sess.ID, r.CurrentAgentName(), usage))
865892
}
866893

pkg/runtime/runtime_test.go

Lines changed: 65 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -280,8 +280,9 @@ func TestSimple(t *testing.T) {
280280
AgentChoice("root", sess.ID, "Hello"),
281281
MessageAdded(sess.ID, msgAdded.Message, "root"),
282282
NewTokenUsageEvent(sess.ID, "root", &Usage{InputTokens: 3, OutputTokens: 2, ContextLength: 5, LastMessage: &MessageUsage{
283-
Usage: chat.Usage{InputTokens: 3, OutputTokens: 2},
284-
Model: "test/mock-model",
283+
Usage: chat.Usage{InputTokens: 3, OutputTokens: 2},
284+
Model: "test/mock-model",
285+
FinishReason: chat.FinishReasonStop,
285286
}}),
286287
StreamStopped(sess.ID, "root"),
287288
}
@@ -323,8 +324,9 @@ func TestMultipleContentChunks(t *testing.T) {
323324
AgentChoice("root", sess.ID, "you?"),
324325
MessageAdded(sess.ID, msgAdded.Message, "root"),
325326
NewTokenUsageEvent(sess.ID, "root", &Usage{InputTokens: 8, OutputTokens: 12, ContextLength: 20, LastMessage: &MessageUsage{
326-
Usage: chat.Usage{InputTokens: 8, OutputTokens: 12},
327-
Model: "test/mock-model",
327+
Usage: chat.Usage{InputTokens: 8, OutputTokens: 12},
328+
Model: "test/mock-model",
329+
FinishReason: chat.FinishReasonStop,
328330
}}),
329331
StreamStopped(sess.ID, "root"),
330332
}
@@ -362,8 +364,9 @@ func TestWithReasoning(t *testing.T) {
362364
AgentChoice("root", sess.ID, "Hello, how can I help you?"),
363365
MessageAdded(sess.ID, msgAdded.Message, "root"),
364366
NewTokenUsageEvent(sess.ID, "root", &Usage{InputTokens: 10, OutputTokens: 15, ContextLength: 25, LastMessage: &MessageUsage{
365-
Usage: chat.Usage{InputTokens: 10, OutputTokens: 15},
366-
Model: "test/mock-model",
367+
Usage: chat.Usage{InputTokens: 10, OutputTokens: 15},
368+
Model: "test/mock-model",
369+
FinishReason: chat.FinishReasonStop,
367370
}}),
368371
StreamStopped(sess.ID, "root"),
369372
}
@@ -403,8 +406,9 @@ func TestMixedContentAndReasoning(t *testing.T) {
403406
AgentChoice("root", sess.ID, " How can I help you today?"),
404407
MessageAdded(sess.ID, msgAdded.Message, "root"),
405408
NewTokenUsageEvent(sess.ID, "root", &Usage{InputTokens: 15, OutputTokens: 20, ContextLength: 35, LastMessage: &MessageUsage{
406-
Usage: chat.Usage{InputTokens: 15, OutputTokens: 20},
407-
Model: "test/mock-model",
409+
Usage: chat.Usage{InputTokens: 15, OutputTokens: 20},
410+
Model: "test/mock-model",
411+
FinishReason: chat.FinishReasonStop,
408412
}}),
409413
StreamStopped(sess.ID, "root"),
410414
}
@@ -963,6 +967,59 @@ func TestEmitStartupInfo_CostIncludesSubSessions(t *testing.T) {
963967
"cost should include sub-session costs (TotalCost, not OwnCost)")
964968
}
965969

970+
func TestEmitStartupInfo_LastMessageFinishReason(t *testing.T) {
971+
// When restoring a session whose last assistant message has a
972+
// FinishReason, the emitted TokenUsageEvent.LastMessage must carry
973+
// that FinishReason so the UI can identify the final response.
974+
prov := &mockProvider{id: "test/startup-model", stream: &mockStream{}}
975+
root := agent.New("root", "agent",
976+
agent.WithModel(prov),
977+
agent.WithDescription("Root"),
978+
)
979+
tm := team.New(team.WithAgents(root))
980+
981+
rt, err := NewLocalRuntime(tm, WithCurrentAgent("root"),
982+
WithModelStore(mockModelStoreWithLimit{limit: 128_000}))
983+
require.NoError(t, err)
984+
985+
sess := session.New()
986+
sess.InputTokens = 500
987+
sess.OutputTokens = 200
988+
989+
sess.Messages = append(sess.Messages, session.Item{
990+
Message: &session.Message{
991+
AgentName: "root",
992+
Message: chat.Message{
993+
Role: chat.MessageRoleAssistant,
994+
Content: "final answer",
995+
Cost: 0.02,
996+
Model: "test/startup-model",
997+
FinishReason: chat.FinishReasonStop,
998+
Usage: &chat.Usage{InputTokens: 500, OutputTokens: 200},
999+
},
1000+
},
1001+
})
1002+
1003+
events := make(chan Event, 20)
1004+
rt.EmitStartupInfo(t.Context(), sess, events)
1005+
close(events)
1006+
1007+
var tokenEvent *TokenUsageEvent
1008+
for event := range events {
1009+
if te, ok := event.(*TokenUsageEvent); ok {
1010+
tokenEvent = te
1011+
}
1012+
}
1013+
1014+
require.NotNil(t, tokenEvent, "should emit TokenUsageEvent")
1015+
require.NotNil(t, tokenEvent.Usage.LastMessage, "LastMessage should be populated on session restore")
1016+
assert.Equal(t, chat.FinishReasonStop, tokenEvent.Usage.LastMessage.FinishReason)
1017+
assert.Equal(t, "test/startup-model", tokenEvent.Usage.LastMessage.Model)
1018+
assert.InDelta(t, 0.02, tokenEvent.Usage.LastMessage.Cost, 0.0001)
1019+
assert.Equal(t, int64(500), tokenEvent.Usage.LastMessage.InputTokens)
1020+
assert.Equal(t, int64(200), tokenEvent.Usage.LastMessage.OutputTokens)
1021+
}
1022+
9661023
func TestEmitStartupInfo_NilSessionNoTokenEvent(t *testing.T) {
9671024
// When sess is nil, no TokenUsageEvent should be emitted.
9681025
prov := &mockProvider{id: "test/startup-model", stream: &mockStream{}}

pkg/runtime/streaming.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ type streamResult struct {
2626
ThinkingSignature string
2727
ThoughtSignature []byte
2828
Stopped bool
29+
FinishReason chat.FinishReason
2930
Usage *chat.Usage
3031
RateLimit *chat.RateLimit
3132
}
@@ -44,6 +45,7 @@ func (r *LocalRuntime) handleStream(ctx context.Context, stream chat.MessageStre
4445
var toolCalls []tools.ToolCall
4546
var messageUsage *chat.Usage
4647
var messageRateLimit *chat.RateLimit
48+
var providerFinishReason chat.FinishReason
4749

4850
toolCallIndex := make(map[string]int) // toolCallID -> index in toolCalls slice
4951
emittedPartial := make(map[string]bool) // toolCallID -> whether we've emitted a partial event
@@ -109,11 +111,19 @@ func (r *LocalRuntime) handleStream(ctx context.Context, stream chat.MessageStre
109111
ThinkingSignature: thinkingSignature,
110112
ThoughtSignature: thoughtSignature,
111113
Stopped: true,
114+
FinishReason: choice.FinishReason,
112115
Usage: messageUsage,
113116
RateLimit: messageRateLimit,
114117
}, nil
115118
}
116119

120+
// Track the provider's explicit finish reason (e.g. tool_calls) so we
121+
// can prefer it over inference after the loop. stop/length are already
122+
// handled by the early return above.
123+
if choice.FinishReason != "" {
124+
providerFinishReason = choice.FinishReason
125+
}
126+
117127
// Handle tool calls
118128
if len(choice.Delta.ToolCalls) > 0 {
119129
// Process each tool call delta
@@ -191,13 +201,40 @@ func (r *LocalRuntime) handleStream(ctx context.Context, stream chat.MessageStre
191201
// If the stream completed without producing any content or tool calls, likely because of a token limit, stop to avoid breaking the request loop
192202
// NOTE(krissetto): this can likely be removed once compaction works properly with all providers (aka dmr)
193203
stoppedDueToNoOutput := fullContent.Len() == 0 && len(toolCalls) == 0
204+
205+
// Prefer the provider's explicit finish reason when available (e.g.
206+
// tool_calls). Only fall back to inference when no explicit reason was
207+
// received (stream ended with bare EOF):
208+
// - tool calls present → tool_calls (model was requesting tools)
209+
// - content but no tool calls → stop (natural completion)
210+
// - no output at all → null (unknown; likely token limit)
211+
finishReason := providerFinishReason
212+
if finishReason == "" {
213+
switch {
214+
case len(toolCalls) > 0:
215+
finishReason = chat.FinishReasonToolCalls
216+
case fullContent.Len() > 0:
217+
finishReason = chat.FinishReasonStop
218+
default:
219+
finishReason = chat.FinishReasonNull
220+
}
221+
}
222+
// Ensure finish reason agrees with the actual stream output.
223+
switch {
224+
case finishReason == chat.FinishReasonToolCalls && len(toolCalls) == 0:
225+
finishReason = chat.FinishReasonNull
226+
case finishReason == chat.FinishReasonStop && len(toolCalls) > 0:
227+
finishReason = chat.FinishReasonToolCalls
228+
}
229+
194230
return streamResult{
195231
Calls: toolCalls,
196232
Content: fullContent.String(),
197233
ReasoningContent: fullReasoningContent.String(),
198234
ThinkingSignature: thinkingSignature,
199235
ThoughtSignature: thoughtSignature,
200236
Stopped: stoppedDueToNoOutput,
237+
FinishReason: finishReason,
201238
Usage: messageUsage,
202239
RateLimit: messageRateLimit,
203240
}, nil

0 commit comments

Comments
 (0)