@@ -10,6 +10,7 @@ import (
1010 "io"
1111 "iter"
1212 "log/slog"
13+ "net"
1314 "net/url"
1415 "strings"
1516 "sync"
@@ -454,12 +455,13 @@ func (ts *Toolset) callTool(ctx context.Context, toolCall tools.ToolCall) (*tool
454455
455456 resp , err := ts .mcpClient .CallTool (ctx , request )
456457
457- // If the server lost our session (e.g. it restarted), force a
458- // reconnection and retry the call once.
459- if errors .Is (err , mcp .ErrSessionMissing ) {
460- slog .Warn ("MCP session missing, forcing reconnect and retrying" , "tool" , toolCall .Function .Name , "server" , ts .logID )
458+ // If the call failed with a connection or session error (e.g. the
459+ // server restarted), trigger or wait for a reconnection and retry
460+ // the call once.
461+ if err != nil && isConnectionError (err ) && ctx .Err () == nil {
462+ slog .Warn ("MCP call failed, forcing reconnect and retrying" , "tool" , toolCall .Function .Name , "server" , ts .logID , "error" , err )
461463 if waitErr := ts .forceReconnectAndWait (ctx ); waitErr != nil {
462- return nil , fmt .Errorf ("failed to reconnect after session loss : %w" , waitErr )
464+ return nil , fmt .Errorf ("failed to reconnect after call failure : %w" , waitErr )
463465 }
464466 resp , err = ts .mcpClient .CallTool (ctx , request )
465467 }
@@ -690,3 +692,27 @@ func (ts *Toolset) GetPrompt(ctx context.Context, name string, arguments map[str
690692 slog .Debug ("Retrieved MCP prompt" , "prompt" , name , "messages_count" , len (result .Messages ))
691693 return result , nil
692694}
695+
696+ // isConnectionError reports whether err is a connection or session error
697+ // that warrants a reconnect-and-retry (as opposed to an application-level
698+ // error that would fail again even after reconnecting).
699+ func isConnectionError (err error ) bool {
700+ if errors .Is (err , mcp .ErrSessionMissing ) || errors .Is (err , io .EOF ) {
701+ return true
702+ }
703+ var netErr net.Error
704+ if errors .As (err , & netErr ) {
705+ return true
706+ }
707+ // The MCP SDK wraps transport failures (e.g. connection reset, EOF from
708+ // client.Do) with its internal ErrRejected sentinel using %v, which
709+ // drops the original error from the chain. Detect these by checking
710+ // the error message for common transport-failure substrings.
711+ if msg := err .Error (); strings .Contains (msg , "connection reset" ) ||
712+ strings .Contains (msg , "connection refused" ) ||
713+ strings .Contains (msg , "broken pipe" ) ||
714+ strings .Contains (msg , "EOF" ) {
715+ return true
716+ }
717+ return false
718+ }
0 commit comments