Skip to content

Commit 0dbc1be

Browse files
authored
Merge pull request #2215 from dgageot/fix-flaky-test
fix: retry MCP callTool on any connection error, not just ErrSessionMissing
2 parents 665ae4e + cdfed81 commit 0dbc1be

1 file changed

Lines changed: 31 additions & 5 deletions

File tree

pkg/tools/mcp/mcp.go

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"io"
1111
"iter"
1212
"log/slog"
13+
"net"
1314
"net/url"
1415
"strings"
1516
"sync"
@@ -454,12 +455,13 @@ func (ts *Toolset) callTool(ctx context.Context, toolCall tools.ToolCall) (*tool
454455

455456
resp, err := ts.mcpClient.CallTool(ctx, request)
456457

457-
// If the server lost our session (e.g. it restarted), force a
458-
// reconnection and retry the call once.
459-
if errors.Is(err, mcp.ErrSessionMissing) {
460-
slog.Warn("MCP session missing, forcing reconnect and retrying", "tool", toolCall.Function.Name, "server", ts.logID)
458+
// If the call failed with a connection or session error (e.g. the
459+
// server restarted), trigger or wait for a reconnection and retry
460+
// the call once.
461+
if err != nil && isConnectionError(err) && ctx.Err() == nil {
462+
slog.Warn("MCP call failed, forcing reconnect and retrying", "tool", toolCall.Function.Name, "server", ts.logID, "error", err)
461463
if waitErr := ts.forceReconnectAndWait(ctx); waitErr != nil {
462-
return nil, fmt.Errorf("failed to reconnect after session loss: %w", waitErr)
464+
return nil, fmt.Errorf("failed to reconnect after call failure: %w", waitErr)
463465
}
464466
resp, err = ts.mcpClient.CallTool(ctx, request)
465467
}
@@ -690,3 +692,27 @@ func (ts *Toolset) GetPrompt(ctx context.Context, name string, arguments map[str
690692
slog.Debug("Retrieved MCP prompt", "prompt", name, "messages_count", len(result.Messages))
691693
return result, nil
692694
}
695+
696+
// isConnectionError reports whether err is a connection or session error
697+
// that warrants a reconnect-and-retry (as opposed to an application-level
698+
// error that would fail again even after reconnecting).
699+
func isConnectionError(err error) bool {
700+
if errors.Is(err, mcp.ErrSessionMissing) || errors.Is(err, io.EOF) {
701+
return true
702+
}
703+
var netErr net.Error
704+
if errors.As(err, &netErr) {
705+
return true
706+
}
707+
// The MCP SDK wraps transport failures (e.g. connection reset, EOF from
708+
// client.Do) with its internal ErrRejected sentinel using %v, which
709+
// drops the original error from the chain. Detect these by checking
710+
// the error message for common transport-failure substrings.
711+
if msg := err.Error(); strings.Contains(msg, "connection reset") ||
712+
strings.Contains(msg, "connection refused") ||
713+
strings.Contains(msg, "broken pipe") ||
714+
strings.Contains(msg, "EOF") {
715+
return true
716+
}
717+
return false
718+
}

0 commit comments

Comments
 (0)