Skip to content

Commit abef96e

Browse files
rumpldgageot
authored andcommitted
Preserve recent messages during session compaction
Before compaction, the last ~20k tokens of messages are now kept aside and excluded from summarization. The summary item records a FirstKeptEntry index pointing to the first preserved message, and GetMessages reconstructs the conversation as [summary, kept_messages...]. This allows the LLM to continue naturally after a compaction event by retaining recent conversational context verbatim while still compacting older history. Signed-off-by: Djordje Lukic <djordje.lukic@docker.com>
1 parent 2b3cccc commit abef96e

File tree

8 files changed

+305
-44
lines changed

8 files changed

+305
-44
lines changed

pkg/runtime/event.go

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -334,17 +334,19 @@ func SessionTitle(sessionID, title string) Event {
334334
type SessionSummaryEvent struct {
335335
AgentContext
336336

337-
Type string `json:"type"`
338-
SessionID string `json:"session_id"`
339-
Summary string `json:"summary"`
337+
Type string `json:"type"`
338+
SessionID string `json:"session_id"`
339+
Summary string `json:"summary"`
340+
FirstKeptEntry int `json:"first_kept_entry,omitempty"`
340341
}
341342

342-
func SessionSummary(sessionID, summary, agentName string) Event {
343+
func SessionSummary(sessionID, summary, agentName string, firstKeptEntry int) Event {
343344
return &SessionSummaryEvent{
344-
Type: "session_summary",
345-
SessionID: sessionID,
346-
Summary: summary,
347-
AgentContext: newAgentContext(agentName),
345+
Type: "session_summary",
346+
SessionID: sessionID,
347+
Summary: summary,
348+
FirstKeptEntry: firstKeptEntry,
349+
AgentContext: newAgentContext(agentName),
348350
}
349351
}
350352

pkg/runtime/persistent_runtime.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ func (r *PersistentRuntime) handleEvent(ctx context.Context, sess *session.Sessi
131131
}
132132

133133
case *SessionSummaryEvent:
134-
if err := r.sessionStore.AddSummary(ctx, e.SessionID, e.Summary); err != nil {
134+
if err := r.sessionStore.AddSummary(ctx, e.SessionID, e.Summary, e.FirstKeptEntry); err != nil {
135135
slog.Warn("Failed to persist summary", "session_id", e.SessionID, "error", err)
136136
}
137137

pkg/runtime/remote_runtime.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ func (r *RemoteRuntime) Resume(ctx context.Context, req ResumeRequest) {
228228
// Summarize generates a summary for the session
229229
func (r *RemoteRuntime) Summarize(_ context.Context, sess *session.Session, _ string, events chan Event) {
230230
slog.Debug("Summarize not yet implemented for remote runtime", "session_id", r.sessionID)
231-
events <- SessionSummary(sess.ID, "Summary generation not yet implemented for remote runtime", r.currentAgent)
231+
events <- SessionSummary(sess.ID, "Summary generation not yet implemented for remote runtime", r.currentAgent, 0)
232232
}
233233

234234
func (r *RemoteRuntime) convertSessionMessages(sess *session.Session) []api.Message {

pkg/runtime/session_compaction.go

Lines changed: 70 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@ import (
1717

1818
const maxSummaryTokens = 16_000
1919

20+
// maxKeepTokens is the maximum number of tokens to preserve from the end of
21+
// the conversation during compaction. These recent messages are kept verbatim
22+
// so the LLM can continue naturally after compaction.
23+
const maxKeepTokens = 20_000
24+
2025
// doCompact runs compaction on a session and applies the result (events,
2126
// persistence, token count updates). The agent is used to extract the
2227
// conversation from the session and to obtain the model for summarization.
@@ -41,8 +46,8 @@ func (r *LocalRuntime) doCompact(ctx context.Context, sess *session.Session, a *
4146

4247
compactionAgent := agent.New("root", compaction.SystemPrompt, agent.WithModel(summaryModel))
4348

44-
// Compute the messages to compact.
45-
messages := extractMessagesToCompact(sess, compactionAgent, int64(m.Limit.Context), additionalPrompt)
49+
// Compute the messages to compact, keeping recent messages aside.
50+
messages, firstKeptEntry := extractMessagesToCompact(sess, compactionAgent, int64(m.Limit.Context), additionalPrompt)
4651

4752
// Run the compaction.
4853
compactionSession := session.New(
@@ -72,16 +77,21 @@ func (r *LocalRuntime) doCompact(ctx context.Context, sess *session.Session, a *
7277
sess.InputTokens = compactionSession.OutputTokens
7378
sess.OutputTokens = 0
7479
sess.Messages = append(sess.Messages, session.Item{
75-
Summary: summary,
76-
Cost: compactionSession.TotalCost(),
80+
Summary: summary,
81+
FirstKeptEntry: firstKeptEntry,
82+
Cost: compactionSession.TotalCost(),
7783
})
7884
_ = r.sessionStore.UpdateSession(ctx, sess)
7985

8086
slog.Debug("Generated session summary", "session_id", sess.ID, "summary_length", len(summary))
81-
events <- SessionSummary(sess.ID, summary, a.Name())
87+
events <- SessionSummary(sess.ID, summary, a.Name(), firstKeptEntry)
8288
}
8389

84-
func extractMessagesToCompact(sess *session.Session, compactionAgent *agent.Agent, contextLimit int64, additionalPrompt string) []chat.Message {
90+
// extractMessagesToCompact returns the messages to send to the compaction model
91+
// and the index (into sess.Messages) of the first message that was kept aside.
92+
// Recent messages (up to maxKeepTokens) are excluded from compaction so they
93+
// can be preserved verbatim in the session after summarization.
94+
func extractMessagesToCompact(sess *session.Session, compactionAgent *agent.Agent, contextLimit int64, additionalPrompt string) ([]chat.Message, int) {
8595
// Add all the existing messages.
8696
var messages []chat.Message
8797
for _, msg := range sess.GetMessages(compactionAgent) {
@@ -95,6 +105,17 @@ func extractMessagesToCompact(sess *session.Session, compactionAgent *agent.Agen
95105
messages = append(messages, msg)
96106
}
97107

108+
// Split: keep the last N tokens of messages aside so the LLM retains
109+
// recent context after compaction.
110+
splitIdx := splitIndexForKeep(messages, maxKeepTokens)
111+
messagesToCompact := messages[:splitIdx]
112+
// Compute firstKeptEntry: index into sess.Messages of the first kept message.
113+
// The kept messages start at splitIdx in the non-system filtered list. We
114+
// need to map this back to the original sess.Messages index.
115+
firstKeptEntry := mapToSessionIndex(sess, splitIdx)
116+
117+
messages = messagesToCompact
118+
98119
// Prepare the first (system) message.
99120
systemPromptMessage := chat.Message{
100121
Role: chat.MessageRoleSystem,
@@ -131,7 +152,49 @@ func extractMessagesToCompact(sess *session.Session, compactionAgent *agent.Agen
131152
// Append the last (user) message.
132153
messages = append(messages, userPromptMessage)
133154

134-
return messages
155+
return messages, firstKeptEntry
156+
}
157+
158+
// splitIndexForKeep returns the index that splits messages into [0:idx] (to
159+
// compact) and [idx:] (to keep). It walks backwards accumulating tokens up to
160+
// maxTokens, snapping to user/assistant boundaries.
161+
func splitIndexForKeep(messages []chat.Message, maxTokens int64) int {
162+
if len(messages) == 0 {
163+
return 0
164+
}
165+
166+
var tokens int64
167+
// Walk from the end; find the earliest index whose suffix fits in maxTokens.
168+
lastValidBoundary := len(messages)
169+
for i := len(messages) - 1; i >= 0; i-- {
170+
tokens += compaction.EstimateMessageTokens(&messages[i])
171+
if tokens > maxTokens {
172+
return lastValidBoundary
173+
}
174+
role := messages[i].Role
175+
if role == chat.MessageRoleUser || role == chat.MessageRoleAssistant {
176+
lastValidBoundary = i
177+
}
178+
}
179+
// All messages fit within maxTokens — don't keep any aside (compact everything).
180+
return len(messages)
181+
}
182+
183+
// mapToSessionIndex maps an index in the non-system-filtered message list back
184+
// to the corresponding index in sess.Messages. It counts only message items
185+
// that are not system messages.
186+
func mapToSessionIndex(sess *session.Session, filteredIdx int) int {
187+
count := 0
188+
for i, item := range sess.Messages {
189+
if item.IsMessage() && item.Message.Message.Role != chat.MessageRoleSystem {
190+
if count == filteredIdx {
191+
return i
192+
}
193+
count++
194+
}
195+
}
196+
// filteredIdx is past the end — no messages to keep.
197+
return len(sess.Messages)
135198
}
136199

137200
func firstMessageToKeep(messages []chat.Message, contextLimit int64) int {

pkg/runtime/session_compaction_test.go

Lines changed: 169 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
package runtime
22

33
import (
4+
"strings"
45
"testing"
56

67
"github.com/stretchr/testify/assert"
8+
"github.com/stretchr/testify/require"
79

810
"github.com/docker/docker-agent/pkg/agent"
911
"github.com/docker/docker-agent/pkg/chat"
@@ -96,7 +98,7 @@ func TestExtractMessagesToCompact(t *testing.T) {
9698
sess := session.New(session.WithMessages(tt.messages))
9799

98100
a := agent.New("test", "test prompt")
99-
result := extractMessagesToCompact(sess, a, tt.contextLimit, tt.additionalPrompt)
101+
result, _ := extractMessagesToCompact(sess, a, tt.contextLimit, tt.additionalPrompt)
100102

101103
assert.GreaterOrEqual(t, len(result), tt.wantConversationMsgCount+2)
102104
assert.Equal(t, chat.MessageRoleSystem, result[0].Role)
@@ -121,3 +123,169 @@ func TestExtractMessagesToCompact(t *testing.T) {
121123
})
122124
}
123125
}
126+
127+
func TestSplitIndexForKeep(t *testing.T) {
128+
msg := func(role chat.MessageRole, content string) chat.Message {
129+
return chat.Message{Role: role, Content: content}
130+
}
131+
132+
tests := []struct {
133+
name string
134+
messages []chat.Message
135+
maxTokens int64
136+
wantSplit int // expected split index
137+
}{
138+
{
139+
name: "empty messages",
140+
messages: nil,
141+
maxTokens: 1000,
142+
wantSplit: 0,
143+
},
144+
{
145+
name: "all messages fit in keep budget - compact everything",
146+
messages: []chat.Message{
147+
msg(chat.MessageRoleUser, "short"),
148+
msg(chat.MessageRoleAssistant, "short"),
149+
},
150+
maxTokens: 100_000,
151+
wantSplit: 2, // all fit → compact everything
152+
},
153+
{
154+
name: "recent messages kept, older ones compacted",
155+
messages: []chat.Message{
156+
msg(chat.MessageRoleUser, strings.Repeat("a", 40000)), // ~10005 tokens
157+
msg(chat.MessageRoleAssistant, strings.Repeat("b", 40000)), // ~10005 tokens
158+
msg(chat.MessageRoleUser, strings.Repeat("c", 40000)), // ~10005 tokens
159+
msg(chat.MessageRoleAssistant, strings.Repeat("d", 40000)), // ~10005 tokens
160+
msg(chat.MessageRoleUser, strings.Repeat("e", 40000)), // ~10005 tokens
161+
msg(chat.MessageRoleAssistant, strings.Repeat("f", 40000)), // ~10005 tokens
162+
},
163+
maxTokens: 20_100, // enough for exactly 2 messages
164+
wantSplit: 4, // last 2 messages are kept
165+
},
166+
}
167+
168+
for _, tt := range tests {
169+
t.Run(tt.name, func(t *testing.T) {
170+
got := splitIndexForKeep(tt.messages, tt.maxTokens)
171+
assert.Equal(t, tt.wantSplit, got)
172+
})
173+
}
174+
}
175+
176+
func TestExtractMessagesToCompact_KeepsRecentMessages(t *testing.T) {
177+
// Create a session with many messages, some large enough that the last
178+
// ~20k tokens are kept aside.
179+
var items []session.Item
180+
for range 10 {
181+
items = append(items, session.NewMessageItem(&session.Message{
182+
Message: chat.Message{
183+
Role: chat.MessageRoleUser,
184+
Content: strings.Repeat("x", 20000), // ~5k tokens each
185+
},
186+
}), session.NewMessageItem(&session.Message{
187+
Message: chat.Message{
188+
Role: chat.MessageRoleAssistant,
189+
Content: strings.Repeat("y", 20000), // ~5k tokens each
190+
},
191+
}))
192+
}
193+
194+
sess := session.New(session.WithMessages(items))
195+
a := agent.New("test", "test prompt")
196+
197+
result, firstKeptEntry := extractMessagesToCompact(sess, a, 200_000, "")
198+
199+
// The kept messages should not appear in the compaction result
200+
// (only system + compacted messages + user prompt).
201+
// Total: 20 messages × ~5k tokens = ~100k tokens.
202+
// Keep budget: 20k tokens → ~4 messages kept.
203+
// So compacted messages should be 20 - 4 = 16.
204+
compactedMsgCount := len(result) - 2 // minus system and user prompt
205+
assert.Less(t, compactedMsgCount, 20, "some messages should have been kept aside")
206+
assert.Positive(t, compactedMsgCount, "some messages should be compacted")
207+
208+
// firstKeptEntry should point into sess.Messages
209+
assert.Positive(t, firstKeptEntry, "firstKeptEntry should be > 0")
210+
assert.Less(t, firstKeptEntry, len(sess.Messages), "firstKeptEntry should be within bounds")
211+
}
212+
213+
func TestSessionGetMessages_WithFirstKeptEntry(t *testing.T) {
214+
// Build a session with some messages, then add a summary with FirstKeptEntry.
215+
items := []session.Item{
216+
session.NewMessageItem(&session.Message{
217+
Message: chat.Message{Role: chat.MessageRoleUser, Content: "m1"},
218+
}),
219+
session.NewMessageItem(&session.Message{
220+
Message: chat.Message{Role: chat.MessageRoleAssistant, Content: "m2"},
221+
}),
222+
session.NewMessageItem(&session.Message{
223+
Message: chat.Message{Role: chat.MessageRoleUser, Content: "m3"},
224+
}),
225+
session.NewMessageItem(&session.Message{
226+
Message: chat.Message{Role: chat.MessageRoleAssistant, Content: "m4"},
227+
}),
228+
session.NewMessageItem(&session.Message{
229+
Message: chat.Message{Role: chat.MessageRoleUser, Content: "m5"},
230+
}),
231+
}
232+
233+
// Add summary that says "first kept entry is index 3" (m4).
234+
// So we expect: [system...] + [summary] + [m4, m5]
235+
items = append(items, session.Item{
236+
Summary: "This is a summary of m1-m3",
237+
FirstKeptEntry: 3, // index of m4 in the Messages slice
238+
})
239+
240+
sess := session.New(session.WithMessages(items))
241+
a := agent.New("test", "test instruction")
242+
243+
messages := sess.GetMessages(a)
244+
245+
// Extract just the non-system messages
246+
var conversationMessages []chat.Message
247+
for _, msg := range messages {
248+
if msg.Role != chat.MessageRoleSystem {
249+
conversationMessages = append(conversationMessages, msg)
250+
}
251+
}
252+
253+
// Should have: summary (as user message), m4, m5
254+
require.Len(t, conversationMessages, 3, "expected summary + 2 kept messages")
255+
assert.Contains(t, conversationMessages[0].Content, "Session Summary:")
256+
assert.Equal(t, "m4", conversationMessages[1].Content)
257+
assert.Equal(t, "m5", conversationMessages[2].Content)
258+
}
259+
260+
func TestSessionGetMessages_SummaryWithoutFirstKeptEntry(t *testing.T) {
261+
// Backward compatibility: summary without FirstKeptEntry should work as before.
262+
items := []session.Item{
263+
session.NewMessageItem(&session.Message{
264+
Message: chat.Message{Role: chat.MessageRoleUser, Content: "m1"},
265+
}),
266+
session.NewMessageItem(&session.Message{
267+
Message: chat.Message{Role: chat.MessageRoleAssistant, Content: "m2"},
268+
}),
269+
{Summary: "This is a summary"},
270+
session.NewMessageItem(&session.Message{
271+
Message: chat.Message{Role: chat.MessageRoleUser, Content: "m3"},
272+
}),
273+
}
274+
275+
sess := session.New(session.WithMessages(items))
276+
a := agent.New("test", "test instruction")
277+
278+
messages := sess.GetMessages(a)
279+
280+
var conversationMessages []chat.Message
281+
for _, msg := range messages {
282+
if msg.Role != chat.MessageRoleSystem {
283+
conversationMessages = append(conversationMessages, msg)
284+
}
285+
}
286+
287+
// Should have: summary + m3 (messages after the summary)
288+
require.Len(t, conversationMessages, 2)
289+
assert.Contains(t, conversationMessages[0].Content, "Session Summary:")
290+
assert.Equal(t, "m3", conversationMessages[1].Content)
291+
}

pkg/session/migrations.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,12 @@ func getAllMigrations() []Migration {
350350
Description: "Drop the legacy messages JSON column now that all data lives in session_items",
351351
UpSQL: `ALTER TABLE sessions DROP COLUMN messages`,
352352
},
353+
{
354+
ID: 21,
355+
Name: "021_add_first_kept_entry_column",
356+
Description: "Add first_kept_entry column to session_items for compaction-preserved messages",
357+
UpSQL: `ALTER TABLE session_items ADD COLUMN first_kept_entry INTEGER DEFAULT 0`,
358+
},
353359
}
354360
}
355361

0 commit comments

Comments
 (0)