Skip to content

Commit 03a22ec

Browse files
authored
Merge pull request #2412 from trungutt/fix/2318-gpt5-title-generation
fix: title generation fails with OpenAI reasoning models
2 parents 8ca294e + 5b00c42 commit 03a22ec

File tree

4 files changed

+82
-22
lines changed

4 files changed

+82
-22
lines changed

e2e/debug_title_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ func TestDebug_Title(t *testing.T) {
3535

3636
title := runCLI(t, "debug", "title", "testdata/basic.yaml", "--model="+tt.model, "What can you do?")
3737

38+
// The non-empty check is the key invariant: reasoning models
39+
// (o-series, gpt-5) must produce visible title text despite
40+
// hidden reasoning tokens consuming part of the output budget.
41+
assert.NotEmpty(t, title, "title must not be empty")
3842
assert.Equal(t, tt.want, title)
3943
})
4044
}

pkg/fake/proxy.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,8 @@ func DefaultMatcher(onError func(err error)) recorder.MatcherFunc {
218218
// Normalize Gemini thinkingConfig (varies based on provider defaults for thinking budget).
219219
// This handles both camelCase (API) variants of the thinkingConfig field.
220220
thinkingConfigRegex := regexp.MustCompile(`"thinkingConfig":\{[^}]*\},?`)
221+
// Normalize OpenAI reasoning config (varies based on NoThinking flag and thinking budget).
222+
reasoningRegex := regexp.MustCompile(`"reasoning":\{[^}]*\},?`)
221223

222224
return func(r *http.Request, i cassette.Request) bool {
223225
if r.Body == nil || r.Body == http.NoBody {
@@ -246,9 +248,11 @@ func DefaultMatcher(onError func(err error)) recorder.MatcherFunc {
246248
normalizedReq := callIDRegex.ReplaceAllString(string(reqBody), "call_ID")
247249
normalizedReq = maxTokensRegex.ReplaceAllString(normalizedReq, "")
248250
normalizedReq = thinkingConfigRegex.ReplaceAllString(normalizedReq, "")
251+
normalizedReq = reasoningRegex.ReplaceAllString(normalizedReq, "")
249252
normalizedCassette := callIDRegex.ReplaceAllString(i.Body, "call_ID")
250253
normalizedCassette = maxTokensRegex.ReplaceAllString(normalizedCassette, "")
251254
normalizedCassette = thinkingConfigRegex.ReplaceAllString(normalizedCassette, "")
255+
normalizedCassette = reasoningRegex.ReplaceAllString(normalizedCassette, "")
252256

253257
return normalizedReq == normalizedCassette
254258
}

pkg/model/provider/openai/client.go

Lines changed: 67 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -274,15 +274,34 @@ func (c *Client) CreateChatCompletionStream(
274274
}
275275
}
276276

277-
// Apply thinking budget: set reasoning_effort for reasoning models (o-series, gpt-5)
278-
if c.ModelConfig.ThinkingBudget != nil && isOpenAIReasoningModel(c.ModelConfig.Model) {
279-
effortStr, err := openAIReasoningEffort(c.ModelConfig.ThinkingBudget)
280-
if err != nil {
281-
slog.Error("OpenAI request using thinking_budget failed", "error", err)
282-
return nil, err
277+
// Apply thinking budget: set reasoning_effort for reasoning models (o-series, gpt-5).
278+
// Reasoning models always reason; omitting the param uses the default effort.
279+
// When NoThinking is set we still need to send low effort so hidden
280+
// reasoning tokens don't exhaust the max_completion_tokens budget.
281+
// We use "low" instead of "minimal" because older models (o3-mini, o1)
282+
// only accept low/medium/high.
283+
if isOpenAIReasoningModel(c.ModelConfig.Model) {
284+
if c.ModelOptions.NoThinking() {
285+
params.ReasoningEffort = shared.ReasoningEffort("low")
286+
// Hidden reasoning tokens count against the output budget even
287+
// with low effort. Enforce a floor so visible text isn't starved.
288+
if c.ModelConfig.MaxTokens != nil && *c.ModelConfig.MaxTokens < noThinkingMinOutputTokens {
289+
if !isResponsesModel(c.ModelConfig.Model) {
290+
params.MaxTokens = openai.Int(noThinkingMinOutputTokens)
291+
} else {
292+
params.MaxCompletionTokens = openai.Int(noThinkingMinOutputTokens)
293+
}
294+
}
295+
slog.Debug("OpenAI request using low reasoning (NoThinking)")
296+
} else if c.ModelConfig.ThinkingBudget != nil {
297+
effortStr, err := openAIReasoningEffort(c.ModelConfig.ThinkingBudget)
298+
if err != nil {
299+
slog.Error("OpenAI request using thinking_budget failed", "error", err)
300+
return nil, err
301+
}
302+
params.ReasoningEffort = shared.ReasoningEffort(effortStr)
303+
slog.Debug("OpenAI request using thinking_budget", "reasoning_effort", effortStr)
283304
}
284-
params.ReasoningEffort = shared.ReasoningEffort(effortStr)
285-
slog.Debug("OpenAI request using thinking_budget", "reasoning_effort", effortStr)
286305
}
287306

288307
// Apply structured output configuration
@@ -384,20 +403,39 @@ func (c *Client) CreateResponseStream(
384403
}
385404

386405
// Configure reasoning for models that support it (o-series, gpt-5).
387-
// Skip reasoning entirely when NoThinking is set (e.g. title generation)
388-
// to avoid wasting output tokens on internal reasoning.
389-
if isOpenAIReasoningModel(c.ModelConfig.Model) && !c.ModelOptions.NoThinking() {
390-
params.Reasoning = shared.ReasoningParam{
391-
Summary: shared.ReasoningSummaryDetailed,
392-
}
393-
if c.ModelConfig.ThinkingBudget != nil {
394-
effortStr, err := openAIReasoningEffort(c.ModelConfig.ThinkingBudget)
395-
if err != nil {
396-
slog.Error("OpenAI responses request using thinking_budget failed", "error", err)
397-
return nil, err
406+
// Reasoning models always reason internally; omitting the reasoning param
407+
// does NOT disable reasoning — it just uses the model's default effort.
408+
// Those hidden reasoning tokens still count against max_output_tokens,
409+
// so with a small budget (e.g. title generation) the model can exhaust
410+
// all tokens on reasoning and return empty visible text.
411+
if isOpenAIReasoningModel(c.ModelConfig.Model) {
412+
if c.ModelOptions.NoThinking() {
413+
// Use low effort so the model spends as few output tokens as
414+
// possible on reasoning, leaving room for visible text.
415+
// We use "low" instead of "minimal" because older models
416+
// (o3-mini, o1) only accept low/medium/high.
417+
params.Reasoning = shared.ReasoningParam{
418+
Effort: shared.ReasoningEffort("low"),
419+
}
420+
// Hidden reasoning tokens count against max_output_tokens even
421+
// with low effort. Enforce a floor so visible text isn't starved.
422+
if c.ModelConfig.MaxTokens != nil && *c.ModelConfig.MaxTokens < noThinkingMinOutputTokens {
423+
params.MaxOutputTokens = param.NewOpt(noThinkingMinOutputTokens)
424+
}
425+
slog.Debug("OpenAI responses request using low reasoning (NoThinking)")
426+
} else {
427+
params.Reasoning = shared.ReasoningParam{
428+
Summary: shared.ReasoningSummaryDetailed,
429+
}
430+
if c.ModelConfig.ThinkingBudget != nil {
431+
effortStr, err := openAIReasoningEffort(c.ModelConfig.ThinkingBudget)
432+
if err != nil {
433+
slog.Error("OpenAI responses request using thinking_budget failed", "error", err)
434+
return nil, err
435+
}
436+
params.Reasoning.Effort = shared.ReasoningEffort(effortStr)
437+
slog.Debug("OpenAI responses request using thinking_budget", "reasoning_effort", effortStr)
398438
}
399-
params.Reasoning.Effort = shared.ReasoningEffort(effortStr)
400-
slog.Debug("OpenAI responses request using thinking_budget", "reasoning_effort", effortStr)
401439
}
402440
}
403441

@@ -1036,6 +1074,14 @@ func isOpenAIReasoningModel(model string) bool {
10361074
strings.HasPrefix(m, "gpt-5")
10371075
}
10381076

1077+
// noThinkingMinOutputTokens is the minimum max-output-token budget for
1078+
// reasoning models when NoThinking is set. Even with low reasoning effort
1079+
// the model still produces hidden reasoning tokens that count against
1080+
// max_output_tokens / max_completion_tokens. A small budget (e.g. 20)
1081+
// gets entirely consumed by reasoning, leaving nothing for visible text.
1082+
// 256 tokens is enough for low-effort reasoning plus a short visible response.
1083+
const noThinkingMinOutputTokens int64 = 256
1084+
10391085
// openAIReasoningEffort validates a ThinkingBudget effort string for the
10401086
// OpenAI API. Returns the effort string or an error.
10411087
func openAIReasoningEffort(b *latest.ThinkingBudget) (string, error) {

pkg/sessiontitle/generator.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@ const (
2121
systemPrompt = "You are a helpful AI assistant that generates concise, descriptive titles for conversations. You will be given up to 2 recent user messages and asked to create a single-line title that captures the main topic. Never use newlines or line breaks in your response."
2222
userPromptFormat = "Based on the following recent user messages from a conversation with an AI assistant, generate a short, descriptive title (maximum 50 characters) that captures the main topic or purpose of the conversation. Return ONLY the title text on a single line, nothing else. Do not include any newlines, explanations, or formatting.\n\nRecent user messages:\n%s\n\n"
2323

24+
// titleMaxTokens is the max output token budget for title generation.
25+
// This is sized for visible output only (~50 chars ≈ 12-15 tokens).
26+
// Providers that need extra headroom for hidden reasoning tokens
27+
// (e.g. OpenAI reasoning models) handle the adjustment internally.
28+
titleMaxTokens = 20
29+
2430
// titleGenerationTimeout is the maximum time to wait for title generation.
2531
// Title generation should be quick since we disable thinking and use low max_tokens.
2632
// If the API is slow or hanging (e.g., due to server-side thinking), we should timeout.
@@ -103,7 +109,7 @@ func (g *Generator) Generate(ctx context.Context, sessionID string, userMessages
103109
ctx,
104110
baseModel,
105111
options.WithStructuredOutput(nil),
106-
options.WithMaxTokens(20),
112+
options.WithMaxTokens(titleMaxTokens),
107113
options.WithNoThinking(),
108114
options.WithGeneratingTitle(),
109115
)

0 commit comments

Comments
 (0)