Merge branch 'nxl/improve-compaction-strategy' into dev

thdxr · thdxr · commit 5ae74aa88190 · 2026-04-19T19:38:46.000-04:00
diff --git a/packages/opencode/src/agent/prompt/compaction.txt b/packages/opencode/src/agent/prompt/compaction.txt
@@ -1,6 +1,7 @@
 You are a helpful AI assistant tasked with summarizing conversations.
 
-When asked to summarize, provide a detailed but concise summary of the conversation.
+When asked to summarize, provide a detailed but concise summary of the older conversation history.
+The most recent turns may be preserved verbatim outside your summary, so focus on information that would still be needed to continue the work with that recent context available.
 Focus on information that would be helpful for continuing the conversation, including:
 - What was done
 - What is currently being worked on
diff --git a/packages/opencode/src/config/config.ts b/packages/opencode/src/config/config.ts
@@ -204,6 +204,13 @@ const InfoSchema = Schema.Struct({
       prune: Schema.optional(Schema.Boolean).annotate({
         description: "Enable pruning of old tool outputs (default: true)",
       }),
+      tail_turns: Schema.optional(NonNegativeInt).annotate({
+        description:
+          "Number of recent user turns, including their following assistant/tool responses, to keep verbatim during compaction (default: 2)",
+      }),
+      tail_tokens: Schema.optional(NonNegativeInt).annotate({
+        description: "Token budget for retained recent turn spans during compaction",
+      }),
       reserved: Schema.optional(NonNegativeInt).annotate({
         description: "Token buffer for compaction. Leaves enough window to avoid overflow during compaction.",
       }),
diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts
@@ -15,7 +15,9 @@ import { NotFoundError } from "@/storage"
 import { ModelID, ProviderID } from "@/provider/schema"
 import { Effect, Layer, Context } from "effect"
 import { InstanceState } from "@/effect"
-import { isOverflow as overflow } from "./overflow"
+import { isOverflow as overflow, usable } from "./overflow"
+import { makeRuntime } from "@/effect/run-service"
+import { fn } from "@/util/fn"
 
 const log = Log.create({ service: "session.compaction" })
 
@@ -31,6 +33,39 @@ export const Event = {
 export const PRUNE_MINIMUM = 20_000
 export const PRUNE_PROTECT = 40_000
 const PRUNE_PROTECTED_TOOLS = ["skill"]
+const DEFAULT_TAIL_TURNS = 2
+const MIN_TAIL_TOKENS = 2_000
+const MAX_TAIL_TOKENS = 8_000
+type Turn = {
+  start: number
+  end: number
+  id: MessageID
+}
+
+function tailBudget(input: { cfg: Config.Info; model: Provider.Model }) {
+  return (
+    input.cfg.compaction?.tail_tokens ??
+    Math.min(MAX_TAIL_TOKENS, Math.max(MIN_TAIL_TOKENS, Math.floor(usable(input) * 0.25)))
+  )
+}
+
+function turns(messages: MessageV2.WithParts[]) {
+  const result: Turn[] = []
+  for (let i = 0; i < messages.length; i++) {
+    const msg = messages[i]
+    if (msg.info.role !== "user") continue
+    if (msg.parts.some((part) => part.type === "compaction")) continue
+    result.push({
+      start: i,
+      end: messages.length,
+      id: msg.info.id,
+    })
+  }
+  for (let i = 0; i < result.length - 1; i++) {
+    result[i].end = result[i + 1].start
+  }
+  return result
+}
 
 export interface Interface {
   readonly isOverflow: (input: {
@@ -84,6 +119,55 @@ export const layer: Layer.Layer<
       return overflow({ cfg: yield* config.get(), tokens: input.tokens, model: input.model })
     })
 
+    const estimate = Effect.fn("SessionCompaction.estimate")(function* (input: {
+      messages: MessageV2.WithParts[]
+      model: Provider.Model
+    }) {
+      const msgs = yield* MessageV2.toModelMessagesEffect(input.messages, input.model)
+      return Token.estimate(JSON.stringify(msgs))
+    })
+
+    const select = Effect.fn("SessionCompaction.select")(function* (input: {
+      messages: MessageV2.WithParts[]
+      cfg: Config.Info
+      model: Provider.Model
+    }) {
+      const limit = input.cfg.compaction?.tail_turns ?? DEFAULT_TAIL_TURNS
+      if (limit <= 0) return { head: input.messages, tail_start_id: undefined }
+      const budget = tailBudget({ cfg: input.cfg, model: input.model })
+      const all = turns(input.messages)
+      if (!all.length) return { head: input.messages, tail_start_id: undefined }
+      const recent = all.slice(-limit)
+      const sizes = yield* Effect.forEach(
+        recent,
+        (turn) =>
+          estimate({
+            messages: input.messages.slice(turn.start, turn.end),
+            model: input.model,
+          }),
+        { concurrency: 1 },
+      )
+      if (sizes.at(-1)! > budget) {
+        log.info("tail fallback", { budget, size: sizes.at(-1) })
+        return { head: input.messages, tail_start_id: undefined }
+      }
+
+      let total = 0
+      let keep: Turn | undefined
+      for (let i = recent.length - 1; i >= 0; i--) {
+        const size = sizes[i]
+        if (total + size > budget) break
+        total += size
+        keep = recent[i]
+      }
+
+      if (!keep || keep.start === 0) return { head: input.messages, tail_start_id: undefined }
+      return {
+        head: input.messages.slice(0, keep.start),
+        tail_start_id: keep.id,
+      }
+    })
+
     // goes backwards through parts until there are PRUNE_PROTECT tokens worth of tool
     // calls, then erases output of older tool calls to free context space
     const prune = Effect.fn("SessionCompaction.prune")(function* (input: { sessionID: SessionID }) {
@@ -146,6 +230,7 @@ export const layer: Layer.Layer<
         throw new Error(`Compaction parent must be a user message: ${input.parentID}`)
       }
       const userMessage = parent.info
+      const compactionPart = parent.parts.find((part): part is MessageV2.CompactionPart => part.type === "compaction")
 
       let messages = input.messages
       let replay:
@@ -176,19 +261,20 @@ export const layer: Layer.Layer<
       const model = agent.model
         ? yield* provider.getModel(agent.model.providerID, agent.model.modelID)
         : yield* provider.getModel(userMessage.model.providerID, userMessage.model.modelID)
+      const cfg = yield* config.get()
+      const history = compactionPart && messages.at(-1)?.info.id === input.parentID ? messages.slice(0, -1) : messages
+      const selected = yield* select({
+        messages: history,
+        cfg,
+        model,
+      })
       // Allow plugins to inject context or replace compaction prompt.
       const compacting = yield* plugin.trigger(
         "experimental.session.compacting",
         { sessionID: input.sessionID },
         { context: [], prompt: undefined },
       )
-      const defaultPrompt = `Provide a detailed prompt for continuing our conversation above.
-Focus on information that would be helpful for continuing the conversation, including what we did, what we're doing, which files we're working on, and what we're going to do next.
-The summary that you construct will be used so that another agent can read it and continue the work.
-Do not call any tools. Respond only with the summary text.
-Respond in the same language as the user's messages in the conversation.
-
-When constructing the summary, try to stick to this template:
+      const defaultPrompt = `When constructing the summary, try to stick to this template:
 ---
 ## Goal
 
@@ -213,7 +299,7 @@ When constructing the summary, try to stick to this template:
 ---`
 
       const prompt = compacting.prompt ?? [defaultPrompt, ...compacting.context].join("\n\n")
-      const msgs = structuredClone(messages)
+      const msgs = structuredClone(selected.head)
       yield* plugin.trigger("experimental.chat.messages.transform", {}, { messages: msgs })
       const modelMessages = yield* MessageV2.toModelMessagesEffect(msgs, model, { stripMedia: true })
       const ctx = yield* InstanceState.context
@@ -276,6 +362,13 @@ When constructing the summary, try to stick to this template:
         return "stop"
       }
 
+      if (compactionPart && selected.tail_start_id && compactionPart.tail_start_id !== selected.tail_start_id) {
+        yield* session.updatePart({
+          ...compactionPart,
+          tail_start_id: selected.tail_start_id,
+        })
+      }
+
       if (result === "continue" && input.auto) {
         if (replay) {
           const original = replay.info
@@ -409,4 +502,25 @@ export const defaultLayer = Layer.suspend(() =>
   ),
 )
 
+const { runPromise } = makeRuntime(Service, defaultLayer)
+
+export async function isOverflow(input: { tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) {
+  return runPromise((svc) => svc.isOverflow(input))
+}
+
+export async function prune(input: { sessionID: SessionID }) {
+  return runPromise((svc) => svc.prune(input))
+}
+
+export const create = fn(
+  z.object({
+    sessionID: SessionID.zod,
+    agent: z.string(),
+    model: z.object({ providerID: ProviderID.zod, modelID: ModelID.zod }),
+    auto: z.boolean(),
+    overflow: z.boolean().optional(),
+  }),
+  (input) => runPromise((svc) => svc.create(input)),
+)
+
 export * as SessionCompaction from "./compaction"
diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts
@@ -208,6 +208,7 @@ export const CompactionPart = PartBase.extend({
   type: z.literal("compaction"),
   auto: z.boolean(),
   overflow: z.boolean().optional(),
+  tail_start_id: MessageID.zod.optional(),
 }).meta({
   ref: "CompactionPart",
 })
@@ -923,8 +924,21 @@ export function get(input: { sessionID: SessionID; messageID: MessageID }): With
 export function filterCompacted(msgs: Iterable<WithParts>) {
   const result = [] as WithParts[]
   const completed = new Set<string>()
+  let retain: MessageID | undefined
   for (const msg of msgs) {
     result.push(msg)
+    if (retain) {
+      if (msg.info.id === retain) break
+      continue
+    }
+    if (msg.info.role === "user" && completed.has(msg.info.id)) {
+      const part = msg.parts.find((item): item is CompactionPart => item.type === "compaction")
+      if (!part) continue
+      if (!part.tail_start_id) break
+      retain = part.tail_start_id
+      if (msg.info.id === retain) break
+      continue
+    }
     if (msg.info.role === "user" && completed.has(msg.info.id) && msg.parts.some((part) => part.type === "compaction"))
       break
     if (msg.info.role === "assistant" && msg.info.summary && msg.info.finish && !msg.info.error)
diff --git a/packages/opencode/src/session/overflow.ts b/packages/opencode/src/session/overflow.ts
@@ -5,18 +5,22 @@ import type { MessageV2 } from "./message-v2"
 
 const COMPACTION_BUFFER = 20_000
 
+export function usable(input: { cfg: Config.Info; model: Provider.Model }) {
+  const context = input.model.limit.context
+  if (context === 0) return 0
+
+  const reserved =
+    input.cfg.compaction?.reserved ?? Math.min(COMPACTION_BUFFER, ProviderTransform.maxOutputTokens(input.model))
+  return input.model.limit.input
+    ? Math.max(0, input.model.limit.input - reserved)
+    : Math.max(0, context - ProviderTransform.maxOutputTokens(input.model))
+}
+
 export function isOverflow(input: { cfg: Config.Info; tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) {
   if (input.cfg.compaction?.auto === false) return false
-  const context = input.model.limit.context
-  if (context === 0) return false
+  if (input.model.limit.context === 0) return false
 
   const count =
     input.tokens.total || input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write
-
-  const reserved =
-    input.cfg.compaction?.reserved ?? Math.min(COMPACTION_BUFFER, ProviderTransform.maxOutputTokens(input.model))
-  const usable = input.model.limit.input
-    ? input.model.limit.input - reserved
-    : context - ProviderTransform.maxOutputTokens(input.model)
-  return count >= usable
+  return count >= usable(input)
 }
diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts
diff --git a/packages/opencode/test/session/messages-pagination.test.ts b/packages/opencode/test/session/messages-pagination.test.ts