Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 136 additions & 0 deletions backend/internal/pkg/apicompat/anthropic_responses_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1597,3 +1597,139 @@ func TestAnthropicToResponses_TemperatureStrippedForAllGpt5Variants(t *testing.T
})
}
}

// ---------------------------------------------------------------------------
// AnthropicToResponsesResponse: Anthropic input_tokens excludes cached tokens
// while OpenAI Responses input_tokens is the total including cached tokens.
// ---------------------------------------------------------------------------

func TestAnthropicToResponsesResponse_CacheTokensUseOpenAIInputSemantics(t *testing.T) {
resp := &AnthropicResponse{
ID: "msg_cache",
Model: "claude-sonnet-4-5-20250929",
Content: []AnthropicContentBlock{
{Type: "text", Text: "ok"},
},
StopReason: "end_turn",
Usage: AnthropicUsage{
InputTokens: 3318,
OutputTokens: 123,
CacheReadInputTokens: 50688,
CacheCreationInputTokens: 200,
},
}

out := AnthropicToResponsesResponse(resp)
require.NotNil(t, out.Usage)
// 3318 (uncached) + 50688 (read) + 200 (creation) = 54206
assert.Equal(t, 54206, out.Usage.InputTokens)
assert.Equal(t, 123, out.Usage.OutputTokens)
assert.Equal(t, 54329, out.Usage.TotalTokens)
require.NotNil(t, out.Usage.InputTokensDetails)
assert.Equal(t, 50688, out.Usage.InputTokensDetails.CachedTokens)
}

func TestAnthropicToResponsesResponse_NoCacheTokens(t *testing.T) {
resp := &AnthropicResponse{
ID: "msg_nocache",
Model: "claude-sonnet-4-5-20250929",
Content: []AnthropicContentBlock{
{Type: "text", Text: "ok"},
},
StopReason: "end_turn",
Usage: AnthropicUsage{
InputTokens: 100,
OutputTokens: 50,
},
}

out := AnthropicToResponsesResponse(resp)
require.NotNil(t, out.Usage)
assert.Equal(t, 100, out.Usage.InputTokens)
assert.Equal(t, 50, out.Usage.OutputTokens)
assert.Equal(t, 150, out.Usage.TotalTokens)
assert.Nil(t, out.Usage.InputTokensDetails)
}

func TestAnthropicEventToResponses_CacheTokensRoundTripFromMessageStart(t *testing.T) {
state := NewAnthropicEventToResponsesState()

// message_start carries cache fields on the initial Usage object.
AnthropicEventToResponsesEvents(&AnthropicStreamEvent{
Type: "message_start",
Message: &AnthropicResponse{
ID: "msg_stream_cache",
Model: "claude-sonnet-4-5-20250929",
Usage: AnthropicUsage{
InputTokens: 12,
CacheReadInputTokens: 9,
CacheCreationInputTokens: 3,
},
},
}, state)

AnthropicEventToResponsesEvents(&AnthropicStreamEvent{
Type: "message_delta",
Usage: &AnthropicUsage{
OutputTokens: 7,
},
}, state)

events := AnthropicEventToResponsesEvents(&AnthropicStreamEvent{Type: "message_stop"}, state)

// The terminal response.completed event must include OpenAI-semantic usage.
var completed *ResponsesStreamEvent
for i := range events {
if events[i].Type == "response.completed" {
completed = &events[i]
}
}
require.NotNil(t, completed, "response.completed event must be emitted")
require.NotNil(t, completed.Response)
require.NotNil(t, completed.Response.Usage)
// 12 (uncached) + 9 (read) + 3 (creation) = 24
assert.Equal(t, 24, completed.Response.Usage.InputTokens)
assert.Equal(t, 7, completed.Response.Usage.OutputTokens)
assert.Equal(t, 31, completed.Response.Usage.TotalTokens)
require.NotNil(t, completed.Response.Usage.InputTokensDetails)
assert.Equal(t, 9, completed.Response.Usage.InputTokensDetails.CachedTokens)
}

func TestAnthropicEventToResponses_CacheTokensFromMessageDelta(t *testing.T) {
state := NewAnthropicEventToResponsesState()

AnthropicEventToResponsesEvents(&AnthropicStreamEvent{
Type: "message_start",
Message: &AnthropicResponse{
ID: "msg_delta_cache",
Model: "claude-sonnet-4-5-20250929",
Usage: AnthropicUsage{InputTokens: 20},
},
}, state)

// Some upstreams only emit cache fields on the final message_delta.
AnthropicEventToResponsesEvents(&AnthropicStreamEvent{
Type: "message_delta",
Usage: &AnthropicUsage{
OutputTokens: 8,
CacheReadInputTokens: 11,
CacheCreationInputTokens: 4,
},
}, state)

events := AnthropicEventToResponsesEvents(&AnthropicStreamEvent{Type: "message_stop"}, state)

var completed *ResponsesStreamEvent
for i := range events {
if events[i].Type == "response.completed" {
completed = &events[i]
}
}
require.NotNil(t, completed)
require.NotNil(t, completed.Response.Usage)
// 20 (uncached) + 11 (read) + 4 (creation) = 35
assert.Equal(t, 35, completed.Response.Usage.InputTokens)
assert.Equal(t, 8, completed.Response.Usage.OutputTokens)
require.NotNil(t, completed.Response.Usage.InputTokensDetails)
assert.Equal(t, 11, completed.Response.Usage.InputTokensDetails.CachedTokens)
}
40 changes: 32 additions & 8 deletions backend/internal/pkg/apicompat/anthropic_to_responses_response.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,16 @@ func AnthropicToResponsesResponse(resp *AnthropicResponse) *ResponsesResponse {
}

// Usage
// Anthropic's input_tokens excludes cache_read/cache_creation, while OpenAI
// Responses' input_tokens is the total including cached tokens. Add them back
// when converting so downstream consumers see OpenAI semantics.
totalInputTokens := resp.Usage.InputTokens +
resp.Usage.CacheReadInputTokens +
resp.Usage.CacheCreationInputTokens
out.Usage = &ResponsesUsage{
InputTokens: resp.Usage.InputTokens,
InputTokens: totalInputTokens,
OutputTokens: resp.Usage.OutputTokens,
TotalTokens: resp.Usage.InputTokens + resp.Usage.OutputTokens,
TotalTokens: totalInputTokens + resp.Usage.OutputTokens,
}
if resp.Usage.CacheReadInputTokens > 0 {
out.Usage.InputTokensDetails = &ResponsesInputTokensDetails{
Expand Down Expand Up @@ -150,10 +156,13 @@ type AnthropicEventToResponsesState struct {
CurrentCallID string
CurrentName string

// Usage from message_delta
InputTokens int
OutputTokens int
CacheReadInputTokens int
// Usage from message_start / message_delta. InputTokens here follows
// Anthropic semantics (excludes cached tokens); they are added back when
// emitting the OpenAI Responses usage.
InputTokens int
OutputTokens int
CacheReadInputTokens int
CacheCreationInputTokens int
}

// NewAnthropicEventToResponsesState returns an initialised stream state.
Expand Down Expand Up @@ -225,6 +234,12 @@ func anthToResHandleMessageStart(evt *AnthropicStreamEvent, state *AnthropicEven
if evt.Message.Usage.InputTokens > 0 {
state.InputTokens = evt.Message.Usage.InputTokens
}
if evt.Message.Usage.CacheReadInputTokens > 0 {
state.CacheReadInputTokens = evt.Message.Usage.CacheReadInputTokens
}
if evt.Message.Usage.CacheCreationInputTokens > 0 {
state.CacheCreationInputTokens = evt.Message.Usage.CacheCreationInputTokens
}
}

if state.CreatedSent {
Expand Down Expand Up @@ -392,9 +407,15 @@ func anthToResHandleMessageDelta(evt *AnthropicStreamEvent, state *AnthropicEven
// Update usage
if evt.Usage != nil {
state.OutputTokens = evt.Usage.OutputTokens
if evt.Usage.InputTokens > 0 {
state.InputTokens = evt.Usage.InputTokens
}
if evt.Usage.CacheReadInputTokens > 0 {
state.CacheReadInputTokens = evt.Usage.CacheReadInputTokens
}
if evt.Usage.CacheCreationInputTokens > 0 {
state.CacheCreationInputTokens = evt.Usage.CacheCreationInputTokens
}
}

return nil
Expand Down Expand Up @@ -472,10 +493,13 @@ func makeResponsesCompletedEvent(
seq := state.SequenceNumber
state.SequenceNumber++

// Anthropic's input_tokens excludes cache_read/cache_creation; add them
// back to match OpenAI Responses semantics where input_tokens is the total.
totalInputTokens := state.InputTokens + state.CacheReadInputTokens + state.CacheCreationInputTokens
usage := &ResponsesUsage{
InputTokens: state.InputTokens,
InputTokens: totalInputTokens,
OutputTokens: state.OutputTokens,
TotalTokens: state.InputTokens + state.OutputTokens,
TotalTokens: totalInputTokens + state.OutputTokens,
}
if state.CacheReadInputTokens > 0 {
usage.InputTokensDetails = &ResponsesInputTokensDetails{
Expand Down
Loading