Skip to content

Commit d9d4f89

Browse files
authored
fix(test): auto-acknowledge tool-result follow-ups in mock LLM server (#20528)
1 parent 48db7cf commit d9d4f89

13 files changed

Lines changed: 482 additions & 117 deletions

File tree

.github/workflows/test.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,6 @@ jobs:
100100
run: bun --cwd packages/app test:e2e:local
101101
env:
102102
CI: true
103-
OPENCODE_API_KEY: ${{ secrets.OPENCODE_API_KEY }}
104-
OPENCODE_E2E_MODEL: opencode/claude-haiku-4-5
105-
OPENCODE_E2E_REQUIRE_PAID: "true"
106103
timeout-minutes: 30
107104

108105
- name: Upload Playwright artifacts

packages/app/e2e/fixtures.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import {
1515
waitSlug,
1616
waitSession,
1717
} from "./actions"
18+
import { openaiModel, withMockOpenAI } from "./prompt/mock"
1819
import { createSdk, dirSlug, getWorktree, sessionPath } from "./utils"
1920

2021
type LLMFixture = {
@@ -47,6 +48,7 @@ type LLMFixture = {
4748
wait: (count: number) => Promise<void>
4849
inputs: () => Promise<Record<string, unknown>[]>
4950
pending: () => Promise<number>
51+
misses: () => Promise<Array<{ url: URL; body: Record<string, unknown> }>>
5052
}
5153

5254
export const settingsKey = "settings.v3"
@@ -83,6 +85,7 @@ type TestFixtures = {
8385
gotoSession: (sessionID?: string) => Promise<void>
8486
withProject: <T>(callback: (project: ProjectHandle) => Promise<T>, options?: ProjectOptions) => Promise<T>
8587
withBackendProject: <T>(callback: (project: ProjectHandle) => Promise<T>, options?: ProjectOptions) => Promise<T>
88+
withMockProject: <T>(callback: (project: ProjectHandle) => Promise<T>, options?: ProjectOptions) => Promise<T>
8689
}
8790

8891
type WorkerFixtures = {
@@ -132,6 +135,7 @@ export const test = base.extend<TestFixtures, WorkerFixtures>({
132135
wait: (count) => rt.runPromise(svc.wait(count)),
133136
inputs: () => rt.runPromise(svc.inputs),
134137
pending: () => rt.runPromise(svc.pending),
138+
misses: () => rt.runPromise(svc.misses),
135139
})
136140
} finally {
137141
await rt.dispose()
@@ -193,6 +197,21 @@ export const test = base.extend<TestFixtures, WorkerFixtures>({
193197
runProject(page, callback, { ...options, serverUrl: backend.url, sdk: backend.sdk }),
194198
)
195199
},
200+
withMockProject: async ({ page, llm, backend }, use) => {
201+
await use((callback, options) =>
202+
withMockOpenAI({
203+
serverUrl: backend.url,
204+
llmUrl: llm.url,
205+
fn: () =>
206+
runProject(page, callback, {
207+
...options,
208+
model: options?.model ?? openaiModel,
209+
serverUrl: backend.url,
210+
sdk: backend.sdk,
211+
}),
212+
}),
213+
)
214+
},
196215
})
197216

198217
async function runProject<T>(

packages/app/e2e/models/model-picker.spec.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { test, expect } from "../fixtures"
22
import { promptSelector } from "../selectors"
33
import { clickListItem } from "../actions"
44

5-
test("smoke model selection updates prompt footer", async ({ page, gotoSession }) => {
5+
test.fixme("smoke model selection updates prompt footer", async ({ page, gotoSession }) => {
66
await gotoSession()
77

88
await page.locator(promptSelector).click()

packages/app/e2e/prompt/mock.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,16 @@ export function promptMatch(token: string) {
1616
return (hit: Hit) => bodyText(hit).includes(token)
1717
}
1818

19+
/**
20+
* Match requests whose body contains the exact serialized tool input.
21+
* The seed prompts embed JSON.stringify(input) in the prompt text, which
22+
* gets escaped again inside the JSON body — so we double-escape to match.
23+
*/
24+
export function inputMatch(input: unknown) {
25+
const escaped = JSON.stringify(JSON.stringify(input)).slice(1, -1)
26+
return (hit: Hit) => bodyText(hit).includes(escaped)
27+
}
28+
1929
export async function withMockOpenAI<T>(input: { serverUrl: string; llmUrl: string; fn: () => Promise<T> }) {
2030
const sdk = createSdk(undefined, input.serverUrl)
2131
const prev = await sdk.global.config.get().then((res) => res.data ?? {})

packages/app/e2e/session/session-child-navigation.spec.ts

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import { seedSessionTask, withSession } from "../actions"
22
import { test, expect } from "../fixtures"
3+
import { inputMatch } from "../prompt/mock"
34
import { promptSelector } from "../selectors"
45

5-
test("task tool child-session link does not trigger stale show errors", async ({ page, withBackendProject }) => {
6+
test("task tool child-session link does not trigger stale show errors", async ({ page, llm, withMockProject }) => {
67
test.setTimeout(120_000)
78

89
const errs: string[] = []
@@ -12,12 +13,18 @@ test("task tool child-session link does not trigger stale show errors", async ({
1213
page.on("pageerror", onError)
1314

1415
try {
15-
await withBackendProject(async ({ gotoSession, trackSession, sdk }) => {
16+
await withMockProject(async ({ gotoSession, trackSession, sdk }) => {
1617
await withSession(sdk, `e2e child nav ${Date.now()}`, async (session) => {
17-
const child = await seedSessionTask(sdk, {
18-
sessionID: session.id,
18+
const taskInput = {
1919
description: "Open child session",
2020
prompt: "Search the repository for AssistantParts and then reply with exactly CHILD_OK.",
21+
subagent_type: "general",
22+
}
23+
await llm.toolMatch(inputMatch(taskInput), "task", taskInput)
24+
const child = await seedSessionTask(sdk, {
25+
sessionID: session.id,
26+
description: taskInput.description,
27+
prompt: taskInput.prompt,
2128
})
2229
trackSession(child.sessionID)
2330

packages/app/e2e/session/session-composer-dock.spec.ts

Lines changed: 50 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import {
1414
sessionTodoToggleButtonSelector,
1515
} from "../selectors"
1616
import { modKey } from "../utils"
17+
import { inputMatch } from "../prompt/mock"
1718

1819
type Sdk = Parameters<typeof clearSessionDockSeed>[0]
1920
type PermissionRule = { permission: string; pattern: string; action: "allow" | "deny" | "ask" }
@@ -36,6 +37,17 @@ async function withDockSession<T>(
3637
}
3738
}
3839

40+
const defaultQuestions = [
41+
{
42+
header: "Need input",
43+
question: "Pick one option",
44+
options: [
45+
{ label: "Continue", description: "Continue now" },
46+
{ label: "Stop", description: "Stop here" },
47+
],
48+
},
49+
]
50+
3951
test.setTimeout(120_000)
4052

4153
async function withDockSeed<T>(sdk: Sdk, sessionID: string, fn: () => Promise<T>) {
@@ -291,27 +303,19 @@ test("auto-accept toggle works before first submit", async ({ page, withBackendP
291303
})
292304
})
293305

294-
test("blocked question flow unblocks after submit", async ({ page, withBackendProject }) => {
295-
await withBackendProject(async (project) => {
306+
test("blocked question flow unblocks after submit", async ({ page, llm, withMockProject }) => {
307+
await withMockProject(async (project) => {
296308
await withDockSession(
297309
project.sdk,
298310
"e2e composer dock question",
299311
async (session) => {
300312
await withDockSeed(project.sdk, session.id, async () => {
301313
await project.gotoSession(session.id)
302314

315+
await llm.toolMatch(inputMatch({ questions: defaultQuestions }), "question", { questions: defaultQuestions })
303316
await seedSessionQuestion(project.sdk, {
304317
sessionID: session.id,
305-
questions: [
306-
{
307-
header: "Need input",
308-
question: "Pick one option",
309-
options: [
310-
{ label: "Continue", description: "Continue now" },
311-
{ label: "Stop", description: "Stop here" },
312-
],
313-
},
314-
],
318+
questions: defaultQuestions,
315319
})
316320

317321
const dock = page.locator(questionDockSelector)
@@ -328,27 +332,19 @@ test("blocked question flow unblocks after submit", async ({ page, withBackendPr
328332
})
329333
})
330334

331-
test("blocked question flow supports keyboard shortcuts", async ({ page, withBackendProject }) => {
332-
await withBackendProject(async (project) => {
335+
test("blocked question flow supports keyboard shortcuts", async ({ page, llm, withMockProject }) => {
336+
await withMockProject(async (project) => {
333337
await withDockSession(
334338
project.sdk,
335339
"e2e composer dock question keyboard",
336340
async (session) => {
337341
await withDockSeed(project.sdk, session.id, async () => {
338342
await project.gotoSession(session.id)
339343

344+
await llm.toolMatch(inputMatch({ questions: defaultQuestions }), "question", { questions: defaultQuestions })
340345
await seedSessionQuestion(project.sdk, {
341346
sessionID: session.id,
342-
questions: [
343-
{
344-
header: "Need input",
345-
question: "Pick one option",
346-
options: [
347-
{ label: "Continue", description: "Continue now" },
348-
{ label: "Stop", description: "Stop here" },
349-
],
350-
},
351-
],
347+
questions: defaultQuestions,
352348
})
353349

354350
const dock = page.locator(questionDockSelector)
@@ -371,27 +367,19 @@ test("blocked question flow supports keyboard shortcuts", async ({ page, withBac
371367
})
372368
})
373369

374-
test("blocked question flow supports escape dismiss", async ({ page, withBackendProject }) => {
375-
await withBackendProject(async (project) => {
370+
test("blocked question flow supports escape dismiss", async ({ page, llm, withMockProject }) => {
371+
await withMockProject(async (project) => {
376372
await withDockSession(
377373
project.sdk,
378374
"e2e composer dock question escape",
379375
async (session) => {
380376
await withDockSeed(project.sdk, session.id, async () => {
381377
await project.gotoSession(session.id)
382378

379+
await llm.toolMatch(inputMatch({ questions: defaultQuestions }), "question", { questions: defaultQuestions })
383380
await seedSessionQuestion(project.sdk, {
384381
sessionID: session.id,
385-
questions: [
386-
{
387-
header: "Need input",
388-
question: "Pick one option",
389-
options: [
390-
{ label: "Continue", description: "Continue now" },
391-
{ label: "Stop", description: "Stop here" },
392-
],
393-
},
394-
],
382+
questions: defaultQuestions,
395383
})
396384

397385
const dock = page.locator(questionDockSelector)
@@ -512,9 +500,20 @@ test("blocked permission flow supports allow always", async ({ page, withBackend
512500

513501
test("child session question request blocks parent dock and unblocks after submit", async ({
514502
page,
515-
withBackendProject,
503+
llm,
504+
withMockProject,
516505
}) => {
517-
await withBackendProject(async (project) => {
506+
const questions = [
507+
{
508+
header: "Child input",
509+
question: "Pick one child option",
510+
options: [
511+
{ label: "Continue", description: "Continue child" },
512+
{ label: "Stop", description: "Stop child" },
513+
],
514+
},
515+
]
516+
await withMockProject(async (project) => {
518517
await withDockSession(
519518
project.sdk,
520519
"e2e composer dock child question parent",
@@ -532,18 +531,10 @@ test("child session question request blocks parent dock and unblocks after submi
532531

533532
try {
534533
await withDockSeed(project.sdk, child.id, async () => {
534+
await llm.toolMatch(inputMatch({ questions }), "question", { questions })
535535
await seedSessionQuestion(project.sdk, {
536536
sessionID: child.id,
537-
questions: [
538-
{
539-
header: "Child input",
540-
question: "Pick one child option",
541-
options: [
542-
{ label: "Continue", description: "Continue child" },
543-
{ label: "Stop", description: "Stop child" },
544-
],
545-
},
546-
],
537+
questions,
547538
})
548539

549540
const dock = page.locator(questionDockSelector)
@@ -652,24 +643,26 @@ test("todo dock transitions and collapse behavior", async ({ page, withBackendPr
652643
})
653644
})
654645

655-
test("keyboard focus stays off prompt while blocked", async ({ page, withBackendProject }) => {
656-
await withBackendProject(async (project) => {
646+
test("keyboard focus stays off prompt while blocked", async ({ page, llm, withMockProject }) => {
647+
const questions = [
648+
{
649+
header: "Need input",
650+
question: "Pick one option",
651+
options: [{ label: "Continue", description: "Continue now" }],
652+
},
653+
]
654+
await withMockProject(async (project) => {
657655
await withDockSession(
658656
project.sdk,
659657
"e2e composer dock keyboard",
660658
async (session) => {
661659
await withDockSeed(project.sdk, session.id, async () => {
662660
await project.gotoSession(session.id)
663661

662+
await llm.toolMatch(inputMatch({ questions }), "question", { questions })
664663
await seedSessionQuestion(project.sdk, {
665664
sessionID: session.id,
666-
questions: [
667-
{
668-
header: "Need input",
669-
question: "Pick one option",
670-
options: [{ label: "Continue", description: "Continue now" }],
671-
},
672-
],
665+
questions,
673666
})
674667

675668
await expectQuestionBlocked(page)

0 commit comments

Comments
 (0)