Skip to content

Commit c9f14c7

Browse files
committed
fix: use low reasoning effort instead of omitting it for NoThinking
OpenAI reasoning models (o-series, gpt-5) always reason internally; omitting the reasoning parameter does not disable reasoning -- it uses the model's default effort. Those hidden reasoning tokens count against max_output_tokens, so with a small budget (e.g. title generation) the model can exhaust all tokens on reasoning and return empty visible text. When NoThinking is set, explicitly send reasoning_effort: low so the model spends as few output tokens as possible on reasoning. We use "low" rather than "minimal" because older models (o3-mini, o1) only accept low/medium/high. Also normalize the reasoning field in the VCR request matcher so cassettes recorded with or without reasoning config still match. Fixes #2318
1 parent b15c72b commit c9f14c7

2 files changed

Lines changed: 49 additions & 21 deletions

File tree

pkg/fake/proxy.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,8 @@ func DefaultMatcher(onError func(err error)) recorder.MatcherFunc {
218218
// Normalize Gemini thinkingConfig (varies based on provider defaults for thinking budget).
219219
// This handles both camelCase (API) variants of the thinkingConfig field.
220220
thinkingConfigRegex := regexp.MustCompile(`"thinkingConfig":\{[^}]*\},?`)
221+
// Normalize OpenAI reasoning config (varies based on NoThinking flag and thinking budget).
222+
reasoningRegex := regexp.MustCompile(`"reasoning":\{[^}]*\},?`)
221223

222224
return func(r *http.Request, i cassette.Request) bool {
223225
if r.Body == nil || r.Body == http.NoBody {
@@ -246,9 +248,11 @@ func DefaultMatcher(onError func(err error)) recorder.MatcherFunc {
246248
normalizedReq := callIDRegex.ReplaceAllString(string(reqBody), "call_ID")
247249
normalizedReq = maxTokensRegex.ReplaceAllString(normalizedReq, "")
248250
normalizedReq = thinkingConfigRegex.ReplaceAllString(normalizedReq, "")
251+
normalizedReq = reasoningRegex.ReplaceAllString(normalizedReq, "")
249252
normalizedCassette := callIDRegex.ReplaceAllString(i.Body, "call_ID")
250253
normalizedCassette = maxTokensRegex.ReplaceAllString(normalizedCassette, "")
251254
normalizedCassette = thinkingConfigRegex.ReplaceAllString(normalizedCassette, "")
255+
normalizedCassette = reasoningRegex.ReplaceAllString(normalizedCassette, "")
252256

253257
return normalizedReq == normalizedCassette
254258
}

pkg/model/provider/openai/client.go

Lines changed: 45 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -274,15 +274,25 @@ func (c *Client) CreateChatCompletionStream(
274274
}
275275
}
276276

277-
// Apply thinking budget: set reasoning_effort for reasoning models (o-series, gpt-5)
278-
if c.ModelConfig.ThinkingBudget != nil && isOpenAIReasoningModel(c.ModelConfig.Model) {
279-
effortStr, err := openAIReasoningEffort(c.ModelConfig.ThinkingBudget)
280-
if err != nil {
281-
slog.Error("OpenAI request using thinking_budget failed", "error", err)
282-
return nil, err
277+
// Apply thinking budget: set reasoning_effort for reasoning models (o-series, gpt-5).
278+
// Reasoning models always reason; omitting the param uses the default effort.
279+
// When NoThinking is set we still need to send low effort so hidden
280+
// reasoning tokens don't exhaust the max_completion_tokens budget.
281+
// We use "low" instead of "minimal" because older models (o3-mini, o1)
282+
// only accept low/medium/high.
283+
if isOpenAIReasoningModel(c.ModelConfig.Model) {
284+
if c.ModelOptions.NoThinking() {
285+
params.ReasoningEffort = shared.ReasoningEffort("low")
286+
slog.Debug("OpenAI request using low reasoning (NoThinking)")
287+
} else if c.ModelConfig.ThinkingBudget != nil {
288+
effortStr, err := openAIReasoningEffort(c.ModelConfig.ThinkingBudget)
289+
if err != nil {
290+
slog.Error("OpenAI request using thinking_budget failed", "error", err)
291+
return nil, err
292+
}
293+
params.ReasoningEffort = shared.ReasoningEffort(effortStr)
294+
slog.Debug("OpenAI request using thinking_budget", "reasoning_effort", effortStr)
283295
}
284-
params.ReasoningEffort = shared.ReasoningEffort(effortStr)
285-
slog.Debug("OpenAI request using thinking_budget", "reasoning_effort", effortStr)
286296
}
287297

288298
// Apply structured output configuration
@@ -384,20 +394,34 @@ func (c *Client) CreateResponseStream(
384394
}
385395

386396
// Configure reasoning for models that support it (o-series, gpt-5).
387-
// Skip reasoning entirely when NoThinking is set (e.g. title generation)
388-
// to avoid wasting output tokens on internal reasoning.
389-
if isOpenAIReasoningModel(c.ModelConfig.Model) && !c.ModelOptions.NoThinking() {
390-
params.Reasoning = shared.ReasoningParam{
391-
Summary: shared.ReasoningSummaryDetailed,
392-
}
393-
if c.ModelConfig.ThinkingBudget != nil {
394-
effortStr, err := openAIReasoningEffort(c.ModelConfig.ThinkingBudget)
395-
if err != nil {
396-
slog.Error("OpenAI responses request using thinking_budget failed", "error", err)
397-
return nil, err
397+
// Reasoning models always reason internally; omitting the reasoning param
398+
// does NOT disable reasoning — it just uses the model's default effort.
399+
// Those hidden reasoning tokens still count against max_output_tokens,
400+
// so with a small budget (e.g. title generation) the model can exhaust
401+
// all tokens on reasoning and return empty visible text.
402+
if isOpenAIReasoningModel(c.ModelConfig.Model) {
403+
if c.ModelOptions.NoThinking() {
404+
// Use low effort so the model spends as few output tokens as
405+
// possible on reasoning, leaving room for visible text.
406+
// We use "low" instead of "minimal" because older models
407+
// (o3-mini, o1) only accept low/medium/high.
408+
params.Reasoning = shared.ReasoningParam{
409+
Effort: shared.ReasoningEffort("low"),
410+
}
411+
slog.Debug("OpenAI responses request using low reasoning (NoThinking)")
412+
} else {
413+
params.Reasoning = shared.ReasoningParam{
414+
Summary: shared.ReasoningSummaryDetailed,
415+
}
416+
if c.ModelConfig.ThinkingBudget != nil {
417+
effortStr, err := openAIReasoningEffort(c.ModelConfig.ThinkingBudget)
418+
if err != nil {
419+
slog.Error("OpenAI responses request using thinking_budget failed", "error", err)
420+
return nil, err
421+
}
422+
params.Reasoning.Effort = shared.ReasoningEffort(effortStr)
423+
slog.Debug("OpenAI responses request using thinking_budget", "reasoning_effort", effortStr)
398424
}
399-
params.Reasoning.Effort = shared.ReasoningEffort(effortStr)
400-
slog.Debug("OpenAI responses request using thinking_budget", "reasoning_effort", effortStr)
401425
}
402426
}
403427

0 commit comments

Comments
 (0)