Skip to content

Commit 61c6802

Browse files
committed
fix: refresh tool cache after remote MCP server reconnect
After a successful reconnect in watchConnection, eagerly call refreshToolCache so the runtime picks up the new server's tools and the toolsChangedHandler is invoked. Without this, the runtime kept using its stale copy of the tool list. Add TestRemoteReconnectRefreshesTools to verify that when a remote MCP server restarts with a different set of tools the Toolset returns the updated tools and notifies the handler. Extract startMCPServer and allocateAddr test helpers to reduce duplication across reconnect tests. Fixes #2244 Assisted-By: docker-agent
1 parent fa167bd commit 61c6802

2 files changed

Lines changed: 127 additions & 5 deletions

File tree

pkg/tools/mcp/mcp.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,13 @@ func (ts *Toolset) watchConnection(ctx context.Context) {
292292
if !ts.tryRestart(ctx) {
293293
return
294294
}
295+
296+
// After a successful restart, eagerly refresh the tool and prompt
297+
// caches and notify the runtime so it picks up the new server's
298+
// state. The new server may expose a different set of tools/prompts,
299+
// and without this the runtime would keep using its stale copy.
300+
ts.refreshToolCache(ctx)
301+
ts.refreshPromptCache(ctx)
295302
}
296303
}
297304

pkg/tools/mcp/reconnect_test.go

Lines changed: 120 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,46 @@ import (
1717
"github.com/docker/docker-agent/pkg/tools"
1818
)
1919

20+
// startMCPServer creates a minimal MCP server on addr with the given tools
21+
// and returns a function to shut it down.
22+
func startMCPServer(t *testing.T, addr string, mcpTools ...*gomcp.Tool) (shutdown func()) {
23+
t.Helper()
24+
25+
s := gomcp.NewServer(&gomcp.Implementation{Name: "test-server", Version: "1.0.0"}, nil)
26+
for _, tool := range mcpTools {
27+
s.AddTool(tool, func(_ context.Context, _ *gomcp.CallToolRequest) (*gomcp.CallToolResult, error) {
28+
return &gomcp.CallToolResult{
29+
Content: []gomcp.Content{&gomcp.TextContent{Text: "ok-" + tool.Name}},
30+
}, nil
31+
})
32+
}
33+
34+
// Retry Listen until the port is available (e.g. after a server shutdown).
35+
var srvLn net.Listener
36+
require.Eventually(t, func() bool {
37+
var listenErr error
38+
srvLn, listenErr = net.Listen("tcp", addr)
39+
return listenErr == nil
40+
}, 2*time.Second, 50*time.Millisecond, "port %s not available in time", addr)
41+
42+
srv := &http.Server{
43+
Handler: gomcp.NewStreamableHTTPHandler(func(*http.Request) *gomcp.Server { return s }, nil),
44+
}
45+
go func() { _ = srv.Serve(srvLn) }()
46+
47+
return func() { _ = srv.Close() }
48+
}
49+
50+
// allocateAddr returns a free TCP address on localhost.
51+
func allocateAddr(t *testing.T) string {
52+
t.Helper()
53+
ln, err := net.Listen("tcp", "127.0.0.1:0")
54+
require.NoError(t, err)
55+
addr := ln.Addr().String()
56+
ln.Close()
57+
return addr
58+
}
59+
2060
// TestRemoteReconnectAfterServerRestart verifies that a Toolset backed by a
2161
// real remote (streamable-HTTP) MCP server transparently recovers when the
2262
// server is restarted.
@@ -33,11 +73,7 @@ import (
3373
func TestRemoteReconnectAfterServerRestart(t *testing.T) {
3474
t.Parallel()
3575

36-
// Use a fixed listener address so we can restart on the same port.
37-
ln, err := net.Listen("tcp", "127.0.0.1:0")
38-
require.NoError(t, err)
39-
addr := ln.Addr().String()
40-
ln.Close() // We only needed the address; close so startServer can bind it.
76+
addr := allocateAddr(t)
4177

4278
var callCount atomic.Int32
4379

@@ -123,3 +159,82 @@ func TestRemoteReconnectAfterServerRestart(t *testing.T) {
123159
t.Fatal("reconnect did not complete: restarted channel was not closed")
124160
}
125161
}
162+
163+
// TestRemoteReconnectRefreshesTools verifies that after a remote MCP server
164+
// restarts with a different set of tools, the Toolset picks up the new tools
165+
// and notifies the runtime via the toolsChangedHandler.
166+
//
167+
// This is the scenario from https://github.com/docker/docker-agent/issues/2244:
168+
// - Server v1 exposes tools [alpha, shared].
169+
// - Client connects and caches [alpha, shared].
170+
// - Server v1 shuts down; server v2 starts with tools [beta, shared].
171+
// - A tool call to "shared" triggers reconnection.
172+
// - After reconnection, Tools() must return [beta, shared], not the stale [alpha, shared].
173+
// - The toolsChangedHandler must be called so the runtime refreshes its own state.
174+
func TestRemoteReconnectRefreshesTools(t *testing.T) {
175+
t.Parallel()
176+
177+
addr := allocateAddr(t)
178+
179+
// "shared" exists on both servers so we can call it to trigger reconnect.
180+
sharedTool := &gomcp.Tool{Name: "shared", InputSchema: &jsonschema.Schema{Type: "object"}}
181+
alphaTool := &gomcp.Tool{Name: "alpha", InputSchema: &jsonschema.Schema{Type: "object"}}
182+
betaTool := &gomcp.Tool{Name: "beta", InputSchema: &jsonschema.Schema{Type: "object"}}
183+
184+
// --- Start server v1 with tools "alpha" + "shared" ---
185+
shutdown1 := startMCPServer(t, addr, alphaTool, sharedTool)
186+
187+
ts := NewRemoteToolset("ns", fmt.Sprintf("http://%s/mcp", addr), "streamable-http", nil)
188+
189+
// Track toolsChangedHandler invocations.
190+
toolsChangedCh := make(chan struct{}, 1)
191+
ts.SetToolsChangedHandler(func() {
192+
select {
193+
case toolsChangedCh <- struct{}{}:
194+
default:
195+
}
196+
})
197+
198+
require.NoError(t, ts.Start(t.Context()))
199+
200+
// Verify initial tools.
201+
toolList, err := ts.Tools(t.Context())
202+
require.NoError(t, err)
203+
require.Len(t, toolList, 2)
204+
toolNames := []string{toolList[0].Name, toolList[1].Name}
205+
assert.Contains(t, toolNames, "ns_alpha")
206+
assert.Contains(t, toolNames, "ns_shared")
207+
208+
// --- Shut down server v1, start server v2 with tools "beta" + "shared" ---
209+
shutdown1()
210+
211+
shutdown2 := startMCPServer(t, addr, betaTool, sharedTool)
212+
t.Cleanup(func() {
213+
_ = ts.Stop(t.Context())
214+
shutdown2()
215+
})
216+
217+
// Call "shared" to trigger ErrSessionMissing → reconnect.
218+
result, callErr := ts.callTool(t.Context(), tools.ToolCall{
219+
Function: tools.FunctionCall{Name: "shared", Arguments: "{}"},
220+
})
221+
require.NoError(t, callErr)
222+
assert.Equal(t, "ok-shared", result.Output)
223+
224+
// Wait for the toolsChangedHandler to be called (signals reconnect + refresh).
225+
select {
226+
case <-toolsChangedCh:
227+
// Good — the handler was called.
228+
case <-time.After(30 * time.Second):
229+
t.Fatal("timed out waiting for toolsChangedHandler after reconnect")
230+
}
231+
232+
// Verify the toolset now reports the new server's tools.
233+
toolList, err = ts.Tools(t.Context())
234+
require.NoError(t, err)
235+
require.Len(t, toolList, 2, "expected exactly two tools from the new server")
236+
toolNames = []string{toolList[0].Name, toolList[1].Name}
237+
assert.Contains(t, toolNames, "ns_beta", "expected the new server's tool, got stale tool")
238+
assert.Contains(t, toolNames, "ns_shared")
239+
assert.NotContains(t, toolNames, "ns_alpha", "stale tool from old server should not be present")
240+
}

0 commit comments

Comments
 (0)