diff --git a/CHANGELOG.md b/CHANGELOG.md index 33a02bf53..593e735db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Added per-step token cost tracking and estimated tool call token usage to Ask Sourcebot chat history. [#1353](https://github.com/sourcebot-dev/sourcebot/pull/1353) + ## [5.0.4] - 2026-06-18 ### Changed diff --git a/packages/web/src/ee/features/chat/agent.test.ts b/packages/web/src/ee/features/chat/agent.test.ts index a47e10434..5ac87c551 100644 --- a/packages/web/src/ee/features/chat/agent.test.ts +++ b/packages/web/src/ee/features/chat/agent.test.ts @@ -137,7 +137,8 @@ const createAssistantMessage = (parts: SBChatMessagePart[]): SBChatMessage => ({ }); const createFakeStreamResult = () => ({ - response: Promise.resolve(new Response()), + response: Promise.resolve({ messages: [] }), + steps: Promise.resolve([]), totalUsage: Promise.resolve({ inputTokens: 1, outputTokens: 1, diff --git a/packages/web/src/ee/features/chat/agent.ts b/packages/web/src/ee/features/chat/agent.ts index 32c9befbf..c415e80df 100644 --- a/packages/web/src/ee/features/chat/agent.ts +++ b/packages/web/src/ee/features/chat/agent.ts @@ -1,4 +1,5 @@ -import { SBChatMessage, SBChatMessageMetadata } from "@/features/chat/types"; +import { SBChatMessage, SBChatMessageMetadata, StepTokenUsageEntry, ToolTokenUsageEntry } from "@/features/chat/types"; +import { estimateModelToolOutputTokens } from "@/ee/features/chat/tokenEstimation"; import { getFileSource } from '@/features/git'; import { isServiceError } from "@/lib/utils"; import { LanguageModelV3 as AISDKLanguageModelV3 } from "@ai-sdk/provider"; @@ -190,19 +191,76 @@ export const createMessageStream = async ({ }); const totalUsage = await researchStream.totalUsage; + const steps = await researchStream.steps; + const response = await researchStream.response; + + // Tool output estimates are derived from `response.messages` rather + // than per-step `toolResults` because the response messages cover + // tool calls that never run inside a step — approval-gated tools + // execute before the step loop, and thrown tool errors are recorded + // as `tool-error` parts that `toolResults` excludes. Their + // `tool-result` parts also carry the output in model-visible form + // (`toModelOutput` already applied), which is exactly the payload + // whose token footprint we want to estimate. + const toolUsageByToolCallId = new Map( + response.messages.flatMap((message) => + message.role !== 'tool' ? [] : message.content.flatMap((part) => + part.type !== 'tool-result' ? [] : [[part.toolCallId, { + toolCallId: part.toolCallId, + toolName: part.toolName, + estimatedOutputTokens: estimateModelToolOutputTokens(part.output), + }] as const] + ) + ) + ); + + // One entry per step, in step order. The UI joins its step groups + // to these entries by array position, so the order and count must + // mirror the stream's steps exactly. Tool calls nest under the + // step they ran in; `content` is matched rather than `toolResults` + // so that thrown tool errors (`tool-error` parts, which + // `toolResults` excludes) are still attributed to their step. + const stepTokenUsage: StepTokenUsageEntry[] = steps.map(({ usage, content }) => ({ + inputTokens: usage.inputTokens, + outputTokens: usage.outputTokens, + cacheReadTokens: usage.inputTokenDetails?.cacheReadTokens, + tools: content.flatMap((part) => { + if (part.type !== 'tool-result' && part.type !== 'tool-error') { + return []; + } + const entry = toolUsageByToolCallId.get(part.toolCallId); + if (!entry) { + return []; + } + toolUsageByToolCallId.delete(part.toolCallId); + return [entry]; + }), + })); + + // Any estimates left unclaimed belong to tool calls that executed + // before the step loop (approval continuations). Their output + // enters the context as input to this phase's first step, so nest + // them under it. + if (toolUsageByToolCallId.size > 0 && stepTokenUsage.length > 0) { + stepTokenUsage[0].tools.unshift(...toolUsageByToolCallId.values()); + } writer.write({ type: 'message-metadata', messageMetadata: { + // Spread first so the derived fields below can't be overwritten by caller metadata. + ...metadata, totalTokens: (priorMetadata?.totalTokens ?? 0) + (totalUsage.totalTokens ?? 0), totalInputTokens: (priorMetadata?.totalInputTokens ?? 0) + (totalUsage.inputTokens ?? 0), totalOutputTokens: (priorMetadata?.totalOutputTokens ?? 0) + (totalUsage.outputTokens ?? 0), totalCacheReadTokens: (priorMetadata?.totalCacheReadTokens ?? 0) + (totalUsage.inputTokenDetails?.cacheReadTokens ?? 0), totalCacheWriteTokens: (priorMetadata?.totalCacheWriteTokens ?? 0) + (totalUsage.inputTokenDetails?.cacheWriteTokens ?? 0), totalResponseTimeMs: (priorMetadata?.totalResponseTimeMs ?? 0) + (new Date().getTime() - startTime.getTime()), + // Concatenated (not summed) across approval-continuation + // phases so earlier phases' steps are preserved in order. + stepTokenUsage: [...(priorMetadata?.stepTokenUsage ?? []), ...stepTokenUsage], modelName, traceId, - ...metadata, } }); @@ -430,6 +488,13 @@ const createAgentStream = async ({ logger.warn(`Tool call repair failed for "${toolCall.toolName}": ${error.message}`); return null; }, + // Token usage collection deliberately does NOT happen here: the SDK + // awaits this callback before starting the next step, so it must + // stay cheap, and `toolResults` misses tool calls that never run + // inside a step (approval-gated tools execute before the step loop) + // as well as thrown tool errors (recorded as `tool-error` parts). + // Both are instead derived post-stream in `createMessageStream` + // from `steps` and `response.messages`. onStepFinish: ({ toolResults }) => { toolResults.forEach(({ output, dynamic }) => { if (dynamic || isServiceError(output)) { diff --git a/packages/web/src/ee/features/chat/components/chatThread/chatThreadListItem.tsx b/packages/web/src/ee/features/chat/components/chatThread/chatThreadListItem.tsx index d8776fe57..a9dc9d2f4 100644 --- a/packages/web/src/ee/features/chat/components/chatThread/chatThreadListItem.tsx +++ b/packages/web/src/ee/features/chat/components/chatThread/chatThreadListItem.tsx @@ -91,33 +91,57 @@ const ChatThreadListItemComponent = forwardRef { - const steps = groupMessageIntoSteps(assistantMessage?.parts ?? []); - - // Filter out the answerPart and empty steps - return steps - .map( - (step) => step - // First, filter out any parts that are not text - .filter((part) => { - if (part.type === 'text') { - return !part.text.includes(ANSWER_TAG); - } - - return true; - }) - .filter((part) => { - // Only include text, reasoning, and tool parts - return ( - part.type === 'text' || - part.type === 'reasoning' || - part.type.startsWith('tool-') || - part.type === 'dynamic-tool' - ) - }) - ) + // + // Each step is tagged with its stepIndex — the invocation's position in + // the turn, which indexes into `metadata.stepTokenUsage`. Indices are + // assigned by counting 'step-start' markers (one per invocation) BEFORE + // any filtering, so dropping empty or answer-only steps below cannot + // shift the indices of the steps that remain. + const { uiVisibleThinkingSteps, answerStepIndex } = useMemo(() => { + const groupedParts = groupMessageIntoSteps(assistantMessage?.parts ?? []); + + // Parts written before the first step-start (e.g. data parts) don't + // belong to any step; they get stepIndex -1 and never survive the + // visibility filters below. + let stepIndex = -1; + let answerStepIndex: number | undefined = undefined; + + const steps = groupedParts + .map((stepParts) => { + if (stepParts[0]?.type === 'step-start') { + stepIndex++; + } + + if (stepParts.some((part) => part.type === 'text' && part.text.includes(ANSWER_TAG))) { + answerStepIndex = stepIndex; + } + + return { + stepIndex, + parts: stepParts + // First, filter out the answer text + .filter((part) => { + if (part.type === 'text') { + return !part.text.includes(ANSWER_TAG); + } + + return true; + }) + .filter((part) => { + // Only include text, reasoning, and tool parts + return ( + part.type === 'text' || + part.type === 'reasoning' || + part.type.startsWith('tool-') || + part.type === 'dynamic-tool' + ) + }), + }; + }) // Then, filter out any steps that are empty - .filter(step => step.length > 0); + .filter((step) => step.parts.length > 0); + + return { uiVisibleThinkingSteps: steps, answerStepIndex }; }, [assistantMessage?.parts]); // "thinking" is when the agent is generating output that is not the answer. @@ -379,6 +403,7 @@ const ChatThreadListItemComponent = forwardRef diff --git a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.test.tsx b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.test.tsx index 01ebbaf13..7e63de144 100644 --- a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.test.tsx +++ b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.test.tsx @@ -111,7 +111,7 @@ describe('DetailsCard', () => { isTurnInProgress={true} isNetworkActive={false} isAwaitingToolApproval={false} - thinkingSteps={[[failedActivationPart]]} + thinkingSteps={[{ stepIndex: 0, parts: [failedActivationPart] }]} /> ); diff --git a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx index 63bd1525e..e95af69d4 100644 --- a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx +++ b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx @@ -10,9 +10,9 @@ import { cn, getShortenedNumberDisplayString } from '@/lib/utils'; import isEqual from "fast-deep-equal/react"; import { useStickToBottom } from 'use-stick-to-bottom'; import { Brain, ChevronDown, ChevronRight, Clock, InfoIcon, Loader2, ScanSearchIcon, ShieldQuestion, Wrench, Zap } from 'lucide-react'; -import { memo, useCallback, useEffect, useMemo, useState } from 'react'; +import { memo, ReactNode, useCallback, useEffect, useMemo, useState } from 'react'; import { usePrevious } from '@uidotdev/usehooks'; -import { SBChatMessageMetadata, SBChatMessagePart } from '@/features/chat/types'; +import { SBChatMessageMetadata, SBChatMessagePart, StepTokenUsageEntry } from '@/features/chat/types'; import { SearchScopeIcon } from '@/features/chat/components/searchScopeIcon'; import { MarkdownRenderer } from './markdownRenderer'; import { FindSymbolDefinitionsToolComponent } from './tools/findSymbolDefinitionsToolComponent'; @@ -29,6 +29,15 @@ import { McpToolComponent } from './tools/mcpToolComponent'; import { ToolSearchToolComponent } from './tools/toolSearchToolComponent'; +// A UI-visible step: the parts of one LLM invocation, tagged with the +// invocation's position in the turn. The stepIndex indexes directly into +// `metadata.stepTokenUsage`, whose entries are recorded 1:1 with the turn's +// steps in order (across approval-continuation phases). +export interface ThinkingStep { + stepIndex: number; + parts: SBChatMessagePart[]; +} + interface DetailsCardProps { chatId: string; isExpanded: boolean; @@ -37,7 +46,10 @@ interface DetailsCardProps { isTurnInProgress: boolean; isNetworkActive: boolean; isAwaitingToolApproval: boolean; - thinkingSteps: SBChatMessagePart[][]; + thinkingSteps: ThinkingStep[]; + // Index of the step that produced the answer. That step is filtered out + // of `thinkingSteps`, so its usage is rendered as a dedicated final row. + answerStepIndex?: number; metadata?: SBChatMessageMetadata; } @@ -51,14 +63,23 @@ const DetailsCardComponent = ({ isAwaitingToolApproval, metadata, thinkingSteps, + answerStepIndex, }: DetailsCardProps) => { const captureEvent = useCaptureEvent(); - const toolCallCount = useMemo(() => thinkingSteps.flat().filter(part => + const toolCallCount = useMemo(() => thinkingSteps.flatMap(({ parts }) => parts).filter(part => part.type.startsWith('tool-') || (part.type === 'dynamic-tool' && part.toolName.startsWith('mcp_')) ).length, [thinkingSteps]); + // Lookup of estimated output tokens by tool call id, used to badge + // individual tool calls in the thinking steps. + const toolTokenUsageMap = useMemo(() => new Map( + (metadata?.stepTokenUsage ?? []).flatMap(({ tools }) => + tools.map(({ toolCallId, estimatedOutputTokens }) => [toolCallId, estimatedOutputTokens] as const) + ) + ), [metadata?.stepTokenUsage]); + const cacheReadTokens = metadata?.totalCacheReadTokens ?? 0; const inputTokens = metadata?.totalInputTokens ?? 0; const cachedInputPercent = inputTokens > 0 @@ -202,6 +223,9 @@ const DetailsCardComponent = ({ thinkingSteps={thinkingSteps} isNetworkActive={isNetworkActive} isThinking={isThinking} + toolTokenUsageMap={toolTokenUsageMap} + stepTokenUsage={metadata?.stepTokenUsage} + answerStepIndex={answerStepIndex} /> @@ -213,7 +237,7 @@ const DetailsCardComponent = ({ export const DetailsCard = memo(DetailsCardComponent, isEqual); -const ThinkingSteps = ({ thinkingSteps, isNetworkActive, isThinking }: { thinkingSteps: SBChatMessagePart[][], isNetworkActive: boolean, isThinking: boolean }) => { +const ThinkingSteps = ({ thinkingSteps, isNetworkActive, isThinking, toolTokenUsageMap, stepTokenUsage, answerStepIndex }: { thinkingSteps: ThinkingStep[], isNetworkActive: boolean, isThinking: boolean, toolTokenUsageMap?: Map, stepTokenUsage?: StepTokenUsageEntry[], answerStepIndex?: number }) => { const { scrollRef, contentRef, scrollToBottom } = useStickToBottom(); const [shouldStick, setShouldStick] = useState(isThinking); const prevIsThinking = usePrevious(isThinking); @@ -227,6 +251,14 @@ const ThinkingSteps = ({ thinkingSteps, isNetworkActive, isThinking }: { thinkin } }, [isThinking, prevIsThinking, scrollToBottom]); + // The answer step is normally filtered out of `thinkingSteps` (its only + // part is the answer text), so its usage gets a dedicated row. If the + // step is still visible (e.g. it also contained narration), the index + // join below already covers it — skip the extra row. + const answerUsage = answerStepIndex !== undefined && !thinkingSteps.some(({ stepIndex }) => stepIndex === answerStepIndex) + ? stepTokenUsage?.[answerStepIndex] + : undefined; + return (
@@ -236,22 +268,141 @@ const ThinkingSteps = ({ thinkingSteps, isNetworkActive, isThinking }: { thinkin ) : (

No thinking steps

) - ) : thinkingSteps.map((step, index) => ( -
- {step.map((part, index) => ( -
- + ) : ( + <> + {thinkingSteps.map(({ stepIndex, parts }) => { + // A step's usage is simply the entry at its position + // in the turn's step sequence. Out-of-range lookups + // (e.g. an aborted turn whose last step never + // finished) return undefined and render no usage line. + const stepUsage = stepTokenUsage?.[stepIndex]; + + // Inline the step's usage alongside the step's first part + // when that part is narration text, so the cost reads as a + // property of the step, not of the tool calls below it. + // Steps that open directly with a tool call get the usage + // on its own line instead — tool rows already carry their + // own right-aligned info. + const [firstPart, ...restParts] = parts; + const isFirstPartNarration = firstPart.type === 'text' || firstPart.type === 'reasoning'; + + return ( +
+ {stepUsage && !isFirstPartNarration && ( +
+ +
+ )} +
+
+ +
+ {stepUsage && isFirstPartNarration && } +
+ {restParts.map((part, index) => ( +
+ +
+ ))} +
+ ); + })} + {answerUsage && ( +
+
- ))} -
- ))} + )} + + )}
); } -export const StepPartRenderer = ({ part }: { part: SBChatMessagePart }) => { +// The provider-reported input/output token pair of a single agent step, +// rendered at the end of the step's group of parts. +const StepTokenUsage = ({ usage, label = 'step' }: { usage: StepTokenUsageEntry, label?: string }) => { + if (usage.inputTokens === undefined && usage.outputTokens === undefined) { + return null; + } + + const cachedPercent = usage.inputTokens && usage.cacheReadTokens + ? Math.round((usage.cacheReadTokens / usage.inputTokens) * 100) + : 0; + + const compactParts = [ + ...(usage.inputTokens !== undefined ? [`${getShortenedNumberDisplayString(usage.inputTokens, 0)} in`] : []), + ...(usage.outputTokens !== undefined ? [`${getShortenedNumberDisplayString(usage.outputTokens, 0)} out`] : []), + ]; + + return ( +
+ + + + {label} · {compactParts.join(' · ')} + + + +
+
+ Input + {usage.inputTokens?.toLocaleString() ?? '—'}{cachedPercent > 0 ? ` (${cachedPercent}% cached)` : ''} +
+
+ Output + {usage.outputTokens?.toLocaleString() ?? '—'} +
+
+
+
+
+ ); +} + +type GuardedToolType = + | 'tool-read_file' + | 'tool-grep' + | 'tool-glob' + | 'tool-find_symbol_definitions' + | 'tool-find_symbol_references' + | 'tool-list_repos' + | 'tool-list_commits' + | 'tool-get_diff' + | 'tool-list_tree'; + +type GuardedToolPart = Extract; + +// The builtin tools that render through ToolOutputGuard, which differ only in +// their loading text and output component. The `satisfies` mapped type checks +// each entry's `render` against its own tool's output shape. +const TOOL_GUARD_CONFIG = { + 'tool-read_file': { loadingText: 'Reading file...', render: (output) => }, + 'tool-grep': { loadingText: 'Searching...', render: (output) => }, + 'tool-glob': { loadingText: 'Searching files...', render: (output) => }, + 'tool-find_symbol_definitions': { loadingText: 'Resolving definitions...', render: (output) => }, + 'tool-find_symbol_references': { loadingText: 'Resolving references...', render: (output) => }, + 'tool-list_repos': { loadingText: 'Listing repositories...', render: (output) => }, + 'tool-list_commits': { loadingText: 'Listing commits...', render: (output) => }, + 'tool-get_diff': { loadingText: 'Comparing revisions...', render: (output) => }, + 'tool-list_tree': { loadingText: 'Listing tree...', render: (output) => }, +} satisfies { + [K in GuardedToolType]: { + loadingText: string; + render: (output: Extract['output']) => ReactNode; + } +}; + +export const StepPartRenderer = ({ part, toolTokenUsageMap }: { part: SBChatMessagePart, toolTokenUsageMap?: Map }) => { + const estimatedOutputTokens = 'toolCallId' in part ? toolTokenUsageMap?.get(part.toolCallId) : undefined; + switch (part.type) { case 'reasoning': case 'text': @@ -262,86 +413,30 @@ export const StepPartRenderer = ({ part }: { part: SBChatMessagePart }) => { /> ) case 'tool-read_file': - return ( - - {(output) => } - - ) case 'tool-grep': - return ( - - {(output) => } - - ) case 'tool-glob': - return ( - - {(output) => } - - ) case 'tool-find_symbol_definitions': - return ( - - {(output) => } - - ) case 'tool-find_symbol_references': - return ( - - {(output) => } - - ) case 'tool-list_repos': - return ( - - {(output) => } - - ) case 'tool-list_commits': - return ( - - {(output) => } - - ) case 'tool-get_diff': + case 'tool-list_tree': { + const { loadingText, render } = TOOL_GUARD_CONFIG[part.type]; return ( - {(output) => } + {/* The table lookup erases the per-tool correlation between + `render` and `output` (TypeScript can't track it across + a union), so re-assert it here. Each entry's `render` is + checked against its own tool's output by the table's + `satisfies` type. */} + {(output) => (render as (o: typeof output) => ReactNode)(output)} - ) - case 'tool-list_tree': - return ( - - {(output) => } - - ) + ); + } case 'tool-tool_request_activation': if (part.state === 'output-error') { return Tool activation failed: {part.errorText}; @@ -349,10 +444,10 @@ export const StepPartRenderer = ({ part }: { part: SBChatMessagePart }) => { if (part.state !== 'output-available') { return Activating tool...; } - return ; + return ; case 'dynamic-tool': if (part.toolName.startsWith('mcp_')) { - return ; + return ; } return null; case 'data-source': diff --git a/packages/web/src/ee/features/chat/components/chatThread/tools/mcpToolComponent.tsx b/packages/web/src/ee/features/chat/components/chatThread/tools/mcpToolComponent.tsx index c162d2841..2b4cc840f 100644 --- a/packages/web/src/ee/features/chat/components/chatThread/tools/mcpToolComponent.tsx +++ b/packages/web/src/ee/features/chat/components/chatThread/tools/mcpToolComponent.tsx @@ -4,10 +4,12 @@ import { CopyIconButton } from "@/app/(app)/components/copyIconButton"; import { McpFavicon } from "@/ee/features/chat/mcp/components/mcpFavicon"; import { useMcpServerIconMap } from "@/ee/features/chat/mcpServerIconContext"; import { cn } from "@/lib/utils"; +import { Separator } from "@/components/ui/separator"; import { DynamicToolUIPart } from "ai"; import { CheckCircle, ChevronDown, XCircle } from "lucide-react"; import { useCallback, useMemo, useState } from "react"; import { JsonHighlighter, unescapeJsonStrings } from "./jsonHighlighter"; +import { ToolTokenBadge } from "./toolTokenBadge"; export function parseMcpToolName(toolName: string): { serverName: string; toolName: string } | null { if (!toolName.startsWith('mcp_')) { @@ -24,7 +26,7 @@ export function parseMcpToolName(toolName: string): { serverName: string; toolNa }; } -export const McpToolComponent = ({ part }: { part: DynamicToolUIPart }) => { +export const McpToolComponent = ({ part, estimatedOutputTokens }: { part: DynamicToolUIPart, estimatedOutputTokens?: number }) => { const needsApproval = part.state === 'approval-requested'; const [isExpanded, setIsExpanded] = useState(needsApproval); const onToggle = useCallback(() => setIsExpanded(v => !v), []); @@ -128,6 +130,12 @@ export const McpToolComponent = ({ part }: { part: DynamicToolUIPart }) => {
{renderStatus()}
+ {estimatedOutputTokens !== undefined && ( + <> + + + + )} {hasInput && (
+ {estimatedOutputTokens !== undefined && ( + <> + + + + )} {hasInput && } {hasInput && isExpanded && ( diff --git a/packages/web/src/ee/features/chat/components/chatThread/tools/toolSearchToolComponent.tsx b/packages/web/src/ee/features/chat/components/chatThread/tools/toolSearchToolComponent.tsx index 58bcf4e90..36e1e2002 100644 --- a/packages/web/src/ee/features/chat/components/chatThread/tools/toolSearchToolComponent.tsx +++ b/packages/web/src/ee/features/chat/components/chatThread/tools/toolSearchToolComponent.tsx @@ -5,6 +5,7 @@ import { Collapsible, CollapsibleContent, CollapsibleTrigger } from "@/component import { ChevronRight } from "lucide-react"; import { useState } from "react"; import { cn } from "@/lib/utils"; +import { ToolTokenBadge } from "./toolTokenBadge"; interface ToolSearchResult { name: string; @@ -14,9 +15,10 @@ interface ToolSearchResult { interface ToolSearchToolComponentProps { query: string; results: ToolSearchResult[]; + estimatedOutputTokens?: number; } -export const ToolSearchToolComponent = ({ query, results }: ToolSearchToolComponentProps) => { +export const ToolSearchToolComponent = ({ query, results, estimatedOutputTokens }: ToolSearchToolComponentProps) => { const [isOpen, setIsOpen] = useState(false); return ( @@ -27,6 +29,12 @@ export const ToolSearchToolComponent = ({ query, results }: ToolSearchToolCompon Searched connector tools: {query} {results.length} result{results.length === 1 ? '' : 's'} + {estimatedOutputTokens !== undefined && ( + <> + + + + )} diff --git a/packages/web/src/ee/features/chat/components/chatThread/tools/toolTokenBadge.tsx b/packages/web/src/ee/features/chat/components/chatThread/tools/toolTokenBadge.tsx new file mode 100644 index 000000000..5d1a9b892 --- /dev/null +++ b/packages/web/src/ee/features/chat/components/chatThread/tools/toolTokenBadge.tsx @@ -0,0 +1,19 @@ +'use client'; + +import { Tooltip, TooltipContent, TooltipTrigger } from '@/components/ui/tooltip'; +import { getShortenedNumberDisplayString } from '@/lib/utils'; + +export const ToolTokenBadge = ({ estimatedOutputTokens }: { estimatedOutputTokens: number }) => ( + + + + ~{getShortenedNumberDisplayString(estimatedOutputTokens, 0)} tokens + + + +
+ Estimated input-token footprint of this tool's output when fed back to the model. +
+
+
+); \ No newline at end of file diff --git a/packages/web/src/ee/features/chat/tokenEstimation.test.ts b/packages/web/src/ee/features/chat/tokenEstimation.test.ts new file mode 100644 index 000000000..541e9e461 --- /dev/null +++ b/packages/web/src/ee/features/chat/tokenEstimation.test.ts @@ -0,0 +1,51 @@ +import { describe, expect, it } from 'vitest'; +import { estimateModelToolOutputTokens, estimateToolOutputTokens } from './tokenEstimation'; + +describe('estimateToolOutputTokens', () => { + it('measures the serialized JSON form', () => { + const output = { output: 'a'.repeat(100) }; + // {"output":"aaa...a"} -> 113 chars / 2 bytes per token + expect(estimateToolOutputTokens(output)).toBe(Math.round(113 / 2)); + }); + + it('handles undefined output', () => { + expect(estimateToolOutputTokens(undefined)).toBe(0); + }); +}); + +describe('estimateModelToolOutputTokens', () => { + it('measures text variants', () => { + expect(estimateModelToolOutputTokens({ + type: 'text', + value: 'a'.repeat(100), + })).toBe(50); + }); + + it('measures json variants, excluding the wrapper', () => { + expect(estimateModelToolOutputTokens({ + type: 'json', + value: { result: 'a'.repeat(100) }, + })).toBe(Math.round(113 / 2)); + }); + + it('sums the text parts of content variants', () => { + expect(estimateModelToolOutputTokens({ + type: 'content', + value: [ + { type: 'text', text: 'a'.repeat(100) }, + { type: 'text', text: 'b'.repeat(100) }, + ], + })).toBe(100); + }); + + it('only counts the model-visible text of a mapped tool result', () => { + // Mirrors toVercelAITool's `toModelOutput`: the model sees only the + // `output` string; UI-only metadata must not inflate the estimate. + const text = 'Found 3 matches in 2 files'; + const estimate = estimateModelToolOutputTokens({ + type: 'content', + value: [{ type: 'text', text }], + }); + expect(estimate).toBe(Math.round(text.length / 2)); + }); +}); \ No newline at end of file diff --git a/packages/web/src/ee/features/chat/tokenEstimation.ts b/packages/web/src/ee/features/chat/tokenEstimation.ts new file mode 100644 index 000000000..88c10a338 --- /dev/null +++ b/packages/web/src/ee/features/chat/tokenEstimation.ts @@ -0,0 +1,63 @@ +import { ToolResultOutput } from "@ai-sdk/provider-utils"; + +/** + * Offline, provider-agnostic token estimation for tool outputs. + * + * Numbers produced here are diagnostics: they approximate how many input + * tokens a tool result will consume when it re-enters the model's context on + * the following step. They are intentionally kept separate from the billed + * usage totals reported by the provider (`totalInputTokens`, `totalTokens`, + * ...) — always present them as estimates (e.g. with a `~` prefix). + */ + +// Empirically, ~2 characters per token tracks the true input-token cost of +// tool results when checked against provider-reported per-step usage. Tool +// outputs are dominated by token-dense content — source code, file paths, +// JSON — and re-enter the context wrapped in tool-call/result envelopes, +// pushing the effective ratio well below the ~4 chars/token typical of +// English prose. The dense ratio applies across the board: overestimating +// the occasional prose-heavy output is acceptable, while underestimating +// would let oversized results look small. +const ESTIMATED_BYTES_PER_TOKEN = 2; + +export const estimateTokenCount = (content: string, bytesPerToken: number = ESTIMATED_BYTES_PER_TOKEN): number => { + return Math.round(content.length / bytesPerToken); +} + +/** + * Estimates the input-token footprint of an arbitrary value that reaches the + * model as a serialized JSON object — structural overhead included. + */ +export const estimateToolOutputTokens = (output: unknown): number => { + // JSON.stringify returns undefined (not a string) for undefined input. + const serialized = JSON.stringify(output) ?? ''; + return estimateTokenCount(serialized); +} + +/** + * Estimates the input-token footprint of a `toModelOutput` result — the + * payload that is actually sent back to the model as the tool result. + */ +export const estimateModelToolOutputTokens = (modelOutput: ToolResultOutput): number => { + switch (modelOutput.type) { + case 'text': + case 'error-text': + return estimateTokenCount(modelOutput.value); + case 'json': + case 'error-json': + return estimateToolOutputTokens(modelOutput.value); + case 'content': + return modelOutput.value.reduce((sum, part) => { + if (part.type === 'text') { + return sum + estimateTokenCount(part.text); + } + // Non-text parts (media, file references) have no meaningful + // text length; fall back to their serialized size. + return sum + estimateToolOutputTokens(part); + }, 0); + // Variants without estimable text (e.g. 'execution-denied') fall back + // to their serialized size. + default: + return estimateToolOutputTokens(modelOutput); + } +} diff --git a/packages/web/src/features/chat/types.ts b/packages/web/src/features/chat/types.ts index 8a7d51dd2..38a737a09 100644 --- a/packages/web/src/features/chat/types.ts +++ b/packages/web/src/features/chat/types.ts @@ -67,10 +67,32 @@ export const sbChatMessageMetadataSchema = z.object({ selectedSearchScopes: z.array(searchScopeSchema).optional(), disabledMcpServerIds: z.array(z.string()).optional(), traceId: z.string().optional(), + // Token usage of each agent step in this message's turn, in step order + // across all approval-continuation phases. The step array position is the + // step's identity: the UI joins these entries to its steps by array + // index, so the array must stay 1:1 with the turn's steps. + stepTokenUsage: z.array(z.object({ + // Provider-reported (billed, not estimated) usage of this step. + inputTokens: z.number().optional(), + outputTokens: z.number().optional(), + cacheReadTokens: z.number().optional(), + // Tool calls that ran in this step, each with the estimated + // input-token footprint its output imposes when fed back to the model + // on the next step. These are local estimates — never to be confused + // with the authoritative provider-reported fields. + tools: z.array(z.object({ + toolCallId: z.string(), + toolName: z.string(), + estimatedOutputTokens: z.number(), + })), + })).optional(), }); export type SBChatMessageMetadata = z.infer; +export type StepTokenUsageEntry = NonNullable[number]; +export type ToolTokenUsageEntry = StepTokenUsageEntry['tools'][number]; + export type SBChatMessageToolTypes = { [K in keyof ReturnType]: InferUITool[K]>; } & {