Skip to content

Commit 5ae74aa

Browse files
committed
Merge branch 'nxl/improve-compaction-strategy' into dev
2 parents 6eddf08 + a488229 commit 5ae74aa

7 files changed

Lines changed: 695 additions & 24 deletions

File tree

packages/opencode/src/agent/prompt/compaction.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
You are a helpful AI assistant tasked with summarizing conversations.
22

3-
When asked to summarize, provide a detailed but concise summary of the conversation.
3+
When asked to summarize, provide a detailed but concise summary of the older conversation history.
4+
The most recent turns may be preserved verbatim outside your summary, so focus on information that would still be needed to continue the work with that recent context available.
45
Focus on information that would be helpful for continuing the conversation, including:
56
- What was done
67
- What is currently being worked on

packages/opencode/src/config/config.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,13 @@ const InfoSchema = Schema.Struct({
204204
prune: Schema.optional(Schema.Boolean).annotate({
205205
description: "Enable pruning of old tool outputs (default: true)",
206206
}),
207+
tail_turns: Schema.optional(NonNegativeInt).annotate({
208+
description:
209+
"Number of recent user turns, including their following assistant/tool responses, to keep verbatim during compaction (default: 2)",
210+
}),
211+
tail_tokens: Schema.optional(NonNegativeInt).annotate({
212+
description: "Token budget for retained recent turn spans during compaction",
213+
}),
207214
reserved: Schema.optional(NonNegativeInt).annotate({
208215
description: "Token buffer for compaction. Leaves enough window to avoid overflow during compaction.",
209216
}),

packages/opencode/src/session/compaction.ts

Lines changed: 123 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@ import { NotFoundError } from "@/storage"
1515
import { ModelID, ProviderID } from "@/provider/schema"
1616
import { Effect, Layer, Context } from "effect"
1717
import { InstanceState } from "@/effect"
18-
import { isOverflow as overflow } from "./overflow"
18+
import { isOverflow as overflow, usable } from "./overflow"
19+
import { makeRuntime } from "@/effect/run-service"
20+
import { fn } from "@/util/fn"
1921

2022
const log = Log.create({ service: "session.compaction" })
2123

@@ -31,6 +33,39 @@ export const Event = {
3133
export const PRUNE_MINIMUM = 20_000
3234
export const PRUNE_PROTECT = 40_000
3335
const PRUNE_PROTECTED_TOOLS = ["skill"]
36+
const DEFAULT_TAIL_TURNS = 2
37+
const MIN_TAIL_TOKENS = 2_000
38+
const MAX_TAIL_TOKENS = 8_000
39+
type Turn = {
40+
start: number
41+
end: number
42+
id: MessageID
43+
}
44+
45+
function tailBudget(input: { cfg: Config.Info; model: Provider.Model }) {
46+
return (
47+
input.cfg.compaction?.tail_tokens ??
48+
Math.min(MAX_TAIL_TOKENS, Math.max(MIN_TAIL_TOKENS, Math.floor(usable(input) * 0.25)))
49+
)
50+
}
51+
52+
function turns(messages: MessageV2.WithParts[]) {
53+
const result: Turn[] = []
54+
for (let i = 0; i < messages.length; i++) {
55+
const msg = messages[i]
56+
if (msg.info.role !== "user") continue
57+
if (msg.parts.some((part) => part.type === "compaction")) continue
58+
result.push({
59+
start: i,
60+
end: messages.length,
61+
id: msg.info.id,
62+
})
63+
}
64+
for (let i = 0; i < result.length - 1; i++) {
65+
result[i].end = result[i + 1].start
66+
}
67+
return result
68+
}
3469

3570
export interface Interface {
3671
readonly isOverflow: (input: {
@@ -84,6 +119,55 @@ export const layer: Layer.Layer<
84119
return overflow({ cfg: yield* config.get(), tokens: input.tokens, model: input.model })
85120
})
86121

122+
const estimate = Effect.fn("SessionCompaction.estimate")(function* (input: {
123+
messages: MessageV2.WithParts[]
124+
model: Provider.Model
125+
}) {
126+
const msgs = yield* MessageV2.toModelMessagesEffect(input.messages, input.model)
127+
return Token.estimate(JSON.stringify(msgs))
128+
})
129+
130+
const select = Effect.fn("SessionCompaction.select")(function* (input: {
131+
messages: MessageV2.WithParts[]
132+
cfg: Config.Info
133+
model: Provider.Model
134+
}) {
135+
const limit = input.cfg.compaction?.tail_turns ?? DEFAULT_TAIL_TURNS
136+
if (limit <= 0) return { head: input.messages, tail_start_id: undefined }
137+
const budget = tailBudget({ cfg: input.cfg, model: input.model })
138+
const all = turns(input.messages)
139+
if (!all.length) return { head: input.messages, tail_start_id: undefined }
140+
const recent = all.slice(-limit)
141+
const sizes = yield* Effect.forEach(
142+
recent,
143+
(turn) =>
144+
estimate({
145+
messages: input.messages.slice(turn.start, turn.end),
146+
model: input.model,
147+
}),
148+
{ concurrency: 1 },
149+
)
150+
if (sizes.at(-1)! > budget) {
151+
log.info("tail fallback", { budget, size: sizes.at(-1) })
152+
return { head: input.messages, tail_start_id: undefined }
153+
}
154+
155+
let total = 0
156+
let keep: Turn | undefined
157+
for (let i = recent.length - 1; i >= 0; i--) {
158+
const size = sizes[i]
159+
if (total + size > budget) break
160+
total += size
161+
keep = recent[i]
162+
}
163+
164+
if (!keep || keep.start === 0) return { head: input.messages, tail_start_id: undefined }
165+
return {
166+
head: input.messages.slice(0, keep.start),
167+
tail_start_id: keep.id,
168+
}
169+
})
170+
87171
// goes backwards through parts until there are PRUNE_PROTECT tokens worth of tool
88172
// calls, then erases output of older tool calls to free context space
89173
const prune = Effect.fn("SessionCompaction.prune")(function* (input: { sessionID: SessionID }) {
@@ -146,6 +230,7 @@ export const layer: Layer.Layer<
146230
throw new Error(`Compaction parent must be a user message: ${input.parentID}`)
147231
}
148232
const userMessage = parent.info
233+
const compactionPart = parent.parts.find((part): part is MessageV2.CompactionPart => part.type === "compaction")
149234

150235
let messages = input.messages
151236
let replay:
@@ -176,19 +261,20 @@ export const layer: Layer.Layer<
176261
const model = agent.model
177262
? yield* provider.getModel(agent.model.providerID, agent.model.modelID)
178263
: yield* provider.getModel(userMessage.model.providerID, userMessage.model.modelID)
264+
const cfg = yield* config.get()
265+
const history = compactionPart && messages.at(-1)?.info.id === input.parentID ? messages.slice(0, -1) : messages
266+
const selected = yield* select({
267+
messages: history,
268+
cfg,
269+
model,
270+
})
179271
// Allow plugins to inject context or replace compaction prompt.
180272
const compacting = yield* plugin.trigger(
181273
"experimental.session.compacting",
182274
{ sessionID: input.sessionID },
183275
{ context: [], prompt: undefined },
184276
)
185-
const defaultPrompt = `Provide a detailed prompt for continuing our conversation above.
186-
Focus on information that would be helpful for continuing the conversation, including what we did, what we're doing, which files we're working on, and what we're going to do next.
187-
The summary that you construct will be used so that another agent can read it and continue the work.
188-
Do not call any tools. Respond only with the summary text.
189-
Respond in the same language as the user's messages in the conversation.
190-
191-
When constructing the summary, try to stick to this template:
277+
const defaultPrompt = `When constructing the summary, try to stick to this template:
192278
---
193279
## Goal
194280
@@ -213,7 +299,7 @@ When constructing the summary, try to stick to this template:
213299
---`
214300

215301
const prompt = compacting.prompt ?? [defaultPrompt, ...compacting.context].join("\n\n")
216-
const msgs = structuredClone(messages)
302+
const msgs = structuredClone(selected.head)
217303
yield* plugin.trigger("experimental.chat.messages.transform", {}, { messages: msgs })
218304
const modelMessages = yield* MessageV2.toModelMessagesEffect(msgs, model, { stripMedia: true })
219305
const ctx = yield* InstanceState.context
@@ -276,6 +362,13 @@ When constructing the summary, try to stick to this template:
276362
return "stop"
277363
}
278364

365+
if (compactionPart && selected.tail_start_id && compactionPart.tail_start_id !== selected.tail_start_id) {
366+
yield* session.updatePart({
367+
...compactionPart,
368+
tail_start_id: selected.tail_start_id,
369+
})
370+
}
371+
279372
if (result === "continue" && input.auto) {
280373
if (replay) {
281374
const original = replay.info
@@ -409,4 +502,25 @@ export const defaultLayer = Layer.suspend(() =>
409502
),
410503
)
411504

505+
const { runPromise } = makeRuntime(Service, defaultLayer)
506+
507+
export async function isOverflow(input: { tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) {
508+
return runPromise((svc) => svc.isOverflow(input))
509+
}
510+
511+
export async function prune(input: { sessionID: SessionID }) {
512+
return runPromise((svc) => svc.prune(input))
513+
}
514+
515+
export const create = fn(
516+
z.object({
517+
sessionID: SessionID.zod,
518+
agent: z.string(),
519+
model: z.object({ providerID: ProviderID.zod, modelID: ModelID.zod }),
520+
auto: z.boolean(),
521+
overflow: z.boolean().optional(),
522+
}),
523+
(input) => runPromise((svc) => svc.create(input)),
524+
)
525+
412526
export * as SessionCompaction from "./compaction"

packages/opencode/src/session/message-v2.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ export const CompactionPart = PartBase.extend({
208208
type: z.literal("compaction"),
209209
auto: z.boolean(),
210210
overflow: z.boolean().optional(),
211+
tail_start_id: MessageID.zod.optional(),
211212
}).meta({
212213
ref: "CompactionPart",
213214
})
@@ -923,8 +924,21 @@ export function get(input: { sessionID: SessionID; messageID: MessageID }): With
923924
export function filterCompacted(msgs: Iterable<WithParts>) {
924925
const result = [] as WithParts[]
925926
const completed = new Set<string>()
927+
let retain: MessageID | undefined
926928
for (const msg of msgs) {
927929
result.push(msg)
930+
if (retain) {
931+
if (msg.info.id === retain) break
932+
continue
933+
}
934+
if (msg.info.role === "user" && completed.has(msg.info.id)) {
935+
const part = msg.parts.find((item): item is CompactionPart => item.type === "compaction")
936+
if (!part) continue
937+
if (!part.tail_start_id) break
938+
retain = part.tail_start_id
939+
if (msg.info.id === retain) break
940+
continue
941+
}
928942
if (msg.info.role === "user" && completed.has(msg.info.id) && msg.parts.some((part) => part.type === "compaction"))
929943
break
930944
if (msg.info.role === "assistant" && msg.info.summary && msg.info.finish && !msg.info.error)

packages/opencode/src/session/overflow.ts

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,22 @@ import type { MessageV2 } from "./message-v2"
55

66
const COMPACTION_BUFFER = 20_000
77

8+
export function usable(input: { cfg: Config.Info; model: Provider.Model }) {
9+
const context = input.model.limit.context
10+
if (context === 0) return 0
11+
12+
const reserved =
13+
input.cfg.compaction?.reserved ?? Math.min(COMPACTION_BUFFER, ProviderTransform.maxOutputTokens(input.model))
14+
return input.model.limit.input
15+
? Math.max(0, input.model.limit.input - reserved)
16+
: Math.max(0, context - ProviderTransform.maxOutputTokens(input.model))
17+
}
18+
819
export function isOverflow(input: { cfg: Config.Info; tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) {
920
if (input.cfg.compaction?.auto === false) return false
10-
const context = input.model.limit.context
11-
if (context === 0) return false
21+
if (input.model.limit.context === 0) return false
1222

1323
const count =
1424
input.tokens.total || input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write
15-
16-
const reserved =
17-
input.cfg.compaction?.reserved ?? Math.min(COMPACTION_BUFFER, ProviderTransform.maxOutputTokens(input.model))
18-
const usable = input.model.limit.input
19-
? input.model.limit.input - reserved
20-
: context - ProviderTransform.maxOutputTokens(input.model)
21-
return count >= usable
25+
return count >= usable(input)
2226
}

0 commit comments

Comments
 (0)