Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added
- Added per-step token cost tracking and estimated tool call token usage to Ask Sourcebot chat history. [#1353](https://github.com/sourcebot-dev/sourcebot/pull/1353)

## [5.0.4] - 2026-06-18

### Changed
Expand Down
3 changes: 2 additions & 1 deletion packages/web/src/ee/features/chat/agent.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,8 @@ const createAssistantMessage = (parts: SBChatMessagePart[]): SBChatMessage => ({
});

const createFakeStreamResult = () => ({
response: Promise.resolve(new Response()),
response: Promise.resolve({ messages: [] }),
steps: Promise.resolve([]),
totalUsage: Promise.resolve({
inputTokens: 1,
outputTokens: 1,
Expand Down
69 changes: 67 additions & 2 deletions packages/web/src/ee/features/chat/agent.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { SBChatMessage, SBChatMessageMetadata } from "@/features/chat/types";
import { SBChatMessage, SBChatMessageMetadata, StepTokenUsageEntry, ToolTokenUsageEntry } from "@/features/chat/types";
import { estimateModelToolOutputTokens } from "@/ee/features/chat/tokenEstimation";
import { getFileSource } from '@/features/git';
import { isServiceError } from "@/lib/utils";
import { LanguageModelV3 as AISDKLanguageModelV3 } from "@ai-sdk/provider";
Expand Down Expand Up @@ -190,19 +191,76 @@ export const createMessageStream = async ({
});

const totalUsage = await researchStream.totalUsage;
const steps = await researchStream.steps;
const response = await researchStream.response;

// Tool output estimates are derived from `response.messages` rather
// than per-step `toolResults` because the response messages cover
// tool calls that never run inside a step — approval-gated tools
// execute before the step loop, and thrown tool errors are recorded
// as `tool-error` parts that `toolResults` excludes. Their
// `tool-result` parts also carry the output in model-visible form
// (`toModelOutput` already applied), which is exactly the payload
// whose token footprint we want to estimate.
const toolUsageByToolCallId = new Map<string, ToolTokenUsageEntry>(
response.messages.flatMap((message) =>
message.role !== 'tool' ? [] : message.content.flatMap((part) =>
part.type !== 'tool-result' ? [] : [[part.toolCallId, {
toolCallId: part.toolCallId,
toolName: part.toolName,
estimatedOutputTokens: estimateModelToolOutputTokens(part.output),
}] as const]
)
)
);

// One entry per step, in step order. The UI joins its step groups
// to these entries by array position, so the order and count must
// mirror the stream's steps exactly. Tool calls nest under the
// step they ran in; `content` is matched rather than `toolResults`
// so that thrown tool errors (`tool-error` parts, which
// `toolResults` excludes) are still attributed to their step.
const stepTokenUsage: StepTokenUsageEntry[] = steps.map(({ usage, content }) => ({
inputTokens: usage.inputTokens,
outputTokens: usage.outputTokens,
cacheReadTokens: usage.inputTokenDetails?.cacheReadTokens,
tools: content.flatMap((part) => {
if (part.type !== 'tool-result' && part.type !== 'tool-error') {
return [];
}
const entry = toolUsageByToolCallId.get(part.toolCallId);
if (!entry) {
return [];
}
toolUsageByToolCallId.delete(part.toolCallId);
return [entry];
}),
}));

// Any estimates left unclaimed belong to tool calls that executed
// before the step loop (approval continuations). Their output
// enters the context as input to this phase's first step, so nest
// them under it.
if (toolUsageByToolCallId.size > 0 && stepTokenUsage.length > 0) {
stepTokenUsage[0].tools.unshift(...toolUsageByToolCallId.values());
}

writer.write({
type: 'message-metadata',
messageMetadata: {
// Spread first so the derived fields below can't be overwritten by caller metadata.
...metadata,
totalTokens: (priorMetadata?.totalTokens ?? 0) + (totalUsage.totalTokens ?? 0),
totalInputTokens: (priorMetadata?.totalInputTokens ?? 0) + (totalUsage.inputTokens ?? 0),
totalOutputTokens: (priorMetadata?.totalOutputTokens ?? 0) + (totalUsage.outputTokens ?? 0),
totalCacheReadTokens: (priorMetadata?.totalCacheReadTokens ?? 0) + (totalUsage.inputTokenDetails?.cacheReadTokens ?? 0),
totalCacheWriteTokens: (priorMetadata?.totalCacheWriteTokens ?? 0) + (totalUsage.inputTokenDetails?.cacheWriteTokens ?? 0),
totalResponseTimeMs: (priorMetadata?.totalResponseTimeMs ?? 0) + (new Date().getTime() - startTime.getTime()),
// Concatenated (not summed) across approval-continuation
// phases so earlier phases' steps are preserved in order.
stepTokenUsage: [...(priorMetadata?.stepTokenUsage ?? []), ...stepTokenUsage],
modelName,
traceId,
...metadata,
}
});

Expand Down Expand Up @@ -430,6 +488,13 @@ const createAgentStream = async ({
logger.warn(`Tool call repair failed for "${toolCall.toolName}": ${error.message}`);
return null;
},
// Token usage collection deliberately does NOT happen here: the SDK
// awaits this callback before starting the next step, so it must
// stay cheap, and `toolResults` misses tool calls that never run
// inside a step (approval-gated tools execute before the step loop)
// as well as thrown tool errors (recorded as `tool-error` parts).
// Both are instead derived post-stream in `createMessageStream`
// from `steps` and `response.messages`.
onStepFinish: ({ toolResults }) => {
toolResults.forEach(({ output, dynamic }) => {
if (dynamic || isServiceError(output)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,33 +91,57 @@ const ChatThreadListItemComponent = forwardRef<HTMLDivElement, ChatThreadListIte
// should be visible to the user. By "steps", we mean parts that originated
// from the same LLM invocation. By "visibile", we mean parts that have some
// visual representation in the UI (e.g., text, reasoning, tool calls, etc.).
const uiVisibleThinkingSteps = useMemo(() => {
const steps = groupMessageIntoSteps(assistantMessage?.parts ?? []);

// Filter out the answerPart and empty steps
return steps
.map(
(step) => step
// First, filter out any parts that are not text
.filter((part) => {
if (part.type === 'text') {
return !part.text.includes(ANSWER_TAG);
}

return true;
})
.filter((part) => {
// Only include text, reasoning, and tool parts
return (
part.type === 'text' ||
part.type === 'reasoning' ||
part.type.startsWith('tool-') ||
part.type === 'dynamic-tool'
)
})
)
//
// Each step is tagged with its stepIndex — the invocation's position in
// the turn, which indexes into `metadata.stepTokenUsage`. Indices are
// assigned by counting 'step-start' markers (one per invocation) BEFORE
// any filtering, so dropping empty or answer-only steps below cannot
// shift the indices of the steps that remain.
const { uiVisibleThinkingSteps, answerStepIndex } = useMemo(() => {
const groupedParts = groupMessageIntoSteps(assistantMessage?.parts ?? []);

// Parts written before the first step-start (e.g. data parts) don't
// belong to any step; they get stepIndex -1 and never survive the
// visibility filters below.
let stepIndex = -1;
let answerStepIndex: number | undefined = undefined;

const steps = groupedParts
.map((stepParts) => {
if (stepParts[0]?.type === 'step-start') {
stepIndex++;
}

if (stepParts.some((part) => part.type === 'text' && part.text.includes(ANSWER_TAG))) {
answerStepIndex = stepIndex;
}

return {
stepIndex,
parts: stepParts
// First, filter out the answer text
.filter((part) => {
if (part.type === 'text') {
return !part.text.includes(ANSWER_TAG);
}

return true;
})
.filter((part) => {
// Only include text, reasoning, and tool parts
return (
part.type === 'text' ||
part.type === 'reasoning' ||
part.type.startsWith('tool-') ||
part.type === 'dynamic-tool'
)
}),
};
})
// Then, filter out any steps that are empty
.filter(step => step.length > 0);
.filter((step) => step.parts.length > 0);

return { uiVisibleThinkingSteps: steps, answerStepIndex };
}, [assistantMessage?.parts]);

// "thinking" is when the agent is generating output that is not the answer.
Expand Down Expand Up @@ -379,6 +403,7 @@ const ChatThreadListItemComponent = forwardRef<HTMLDivElement, ChatThreadListIte
isNetworkActive={isNetworkActive}
isAwaitingToolApproval={isAwaitingToolApproval}
thinkingSteps={uiVisibleThinkingSteps}
answerStepIndex={answerStepIndex}
metadata={assistantMessage?.metadata}
/>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ describe('DetailsCard', () => {
isTurnInProgress={true}
isNetworkActive={false}
isAwaitingToolApproval={false}
thinkingSteps={[[failedActivationPart]]}
thinkingSteps={[{ stepIndex: 0, parts: [failedActivationPart] }]}
/>
</TooltipProvider>
);
Expand Down
Loading
Loading