From 2b105fdd419ff80392858197aea17d3e992a9991 Mon Sep 17 00:00:00 2001 From: George Ng Date: Wed, 1 Jul 2026 01:04:10 -0700 Subject: [PATCH 01/11] feat(dispatcher): contextSelector engine (tokenizer, signal, keywords, scorer, decision) Deterministic, LLM-free core for context-weighted collision resolution (design: docs/architecture/collision/context-weighted-collision-resolution-design.md). All pure/leaf modules under src/context/contextSelector/, not yet wired in. Two extensibility seams the design centers on: - ConversationSignalSource (context-vector creation) - v1 RingBufferSignalSource (raw-token ring buffer, lambda=0.9/N=20 decay) swappable for knowPro entities. - CollisionScorer (scoring) - v1 TfIdfScorer (candidate-local IDF) swappable for knowPro-entity / embedding-similarity scorers; same CandidateScore contract. Modules: - tokenize.ts: pinned canonicalizer/tokenizer with protected patterns (C#, .NET, A1:B2). - conversationSignal.ts: ContextVector + source seam + decayed ring buffer (history-only). - keywordVector/keywordExtractor/keywordSidecar/keywordIndex: derived lexical floor plus live-tunable collision-keywords.json overrides (effective = derived + add - remove). - scorer.ts: candidate-local IDF TF-IDF over flattened keyword sets. - decision.ts: coverage + evidence gate + margin with quantization and total ordering. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../contextSelector/conversationSignal.ts | 103 ++++++++ .../src/context/contextSelector/decision.ts | 104 ++++++++ .../contextSelector/keywordExtractor.ts | 152 +++++++++++ .../context/contextSelector/keywordIndex.ts | 126 +++++++++ .../context/contextSelector/keywordSidecar.ts | 234 ++++++++++++++++ .../context/contextSelector/keywordVector.ts | 37 +++ .../src/context/contextSelector/scorer.ts | 109 ++++++++ .../src/context/contextSelector/tokenize.ts | 250 ++++++++++++++++++ 8 files changed, 1115 insertions(+) create mode 100644 ts/packages/dispatcher/dispatcher/src/context/contextSelector/conversationSignal.ts create mode 100644 ts/packages/dispatcher/dispatcher/src/context/contextSelector/decision.ts create mode 100644 ts/packages/dispatcher/dispatcher/src/context/contextSelector/keywordExtractor.ts create mode 100644 ts/packages/dispatcher/dispatcher/src/context/contextSelector/keywordIndex.ts create mode 100644 ts/packages/dispatcher/dispatcher/src/context/contextSelector/keywordSidecar.ts create mode 100644 ts/packages/dispatcher/dispatcher/src/context/contextSelector/keywordVector.ts create mode 100644 ts/packages/dispatcher/dispatcher/src/context/contextSelector/scorer.ts create mode 100644 ts/packages/dispatcher/dispatcher/src/context/contextSelector/tokenize.ts diff --git a/ts/packages/dispatcher/dispatcher/src/context/contextSelector/conversationSignal.ts b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/conversationSignal.ts new file mode 100644 index 000000000..53cadc05d --- /dev/null +++ b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/conversationSignal.ts @@ -0,0 +1,103 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +// The conversation signal (§7–8): the data half of contextSelector. Produces a +// ContextVector — a decayed keyword-frequency map of the recent conversation — +// which the scorer (§9) ranks candidates against. Kept behind a small interface +// so v1's raw-token ring buffer can be swapped for v2's knowPro topics/entities +// without touching the scorer or decision rule (the source seam, §7.3). + +import { tokenize } from "./tokenize.js"; + +// { canonical token -> decay-weighted conversational frequency }. The single +// shape both the v1 raw-token source and the v2 knowPro source project into, and +// the only conversation input the scorer sees (§9). +export type ContextVector = ReadonlyMap; + +// The seam. A source owns whatever conversational state it needs and exposes it +// only as a ContextVector; the scorer never learns which implementation is +// behind it. +export interface ConversationSignalSource { + // Ingest one completed user turn. Called once per turn at an ungated point + // (§7.2). Implementations that read from an external store (knowPro) may + // treat this as a no-op. + recordRequest(request: string): void; + // The current decayed context vector, history-only — it must reflect turns + // *before* the request now being routed (§10). The v1 source guarantees this + // by recording each turn only after it completes. + getContextVector(): ContextVector; + // Invalidation hook — `@history clear`, session switch (§7.2, §12). + reset(): void; +} + +// Windowing/decay knobs (§8). A shape, not the session type, so the source stays +// decoupled and unit-testable. +export type SignalConfig = { + // Ring-buffer look-back N (default 20). + windowTurns: number; + // Per-turn recency decay lambda (default 0.9). + decay: number; +}; + +// V1 source (§7.2): a contextSelector-owned ring buffer of recent raw user +// requests, tokenized + recency-decayed into the context vector on demand. The +// only user-words signal available in agent-server mode (`ChatHistory` is empty +// there, §7.1). Deterministic: a pure function of the request sequence and the +// config, with no wall-clock input (§12). +export class RingBufferSignalSource implements ConversationSignalSource { + private readonly buffer: string[] = []; + + // `getConfig` is read on every call so runtime `@config` edits to windowTurns + // / decay take effect immediately. + constructor(private readonly getConfig: () => SignalConfig) {} + + private cap(): number { + return Math.max(1, Math.floor(this.getConfig().windowTurns)); + } + + public recordRequest(request: string): void { + const trimmed = request.trim(); + if (trimmed.length === 0) { + return; + } + this.buffer.push(trimmed); + const cap = this.cap(); + if (this.buffer.length > cap) { + this.buffer.splice(0, this.buffer.length - cap); + } + } + + // Sum each buffered turn's tokens weighted by lambda^age, age = turns ago. + // The buffer holds only prior turns (the current one is recorded after it + // completes), so the newest buffered turn is age 1 — history-only by + // construction (§10, §14). + public getContextVector(): ContextVector { + const { decay } = this.getConfig(); + const cap = this.cap(); + const start = Math.max(0, this.buffer.length - cap); + const turns = this.buffer.slice(start); + const len = turns.length; + const vector = new Map(); + for (let i = 0; i < len; i++) { + const age = len - i; + const weight = Math.pow(decay, age); + for (const token of tokenize(turns[i])) { + vector.set(token, (vector.get(token) ?? 0) + weight); + } + } + return vector; + } + + public reset(): void { + this.buffer.length = 0; + } + + // Inspection hooks (telemetry / tests). Not part of the seam. + public get size(): number { + return this.buffer.length; + } + + public snapshot(): readonly string[] { + return [...this.buffer]; + } +} diff --git a/ts/packages/dispatcher/dispatcher/src/context/contextSelector/decision.ts b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/decision.ts new file mode 100644 index 000000000..d754f9062 --- /dev/null +++ b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/decision.ts @@ -0,0 +1,104 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +// The decision rule (§10): given the scorer's per-candidate numbers, decide +// whether to resolve to the top candidate or abstain. Biased toward abstaining — +// a wrong silent reroute is worse than a missed opportunity. All comparisons run +// on quantized scores over a total ordering so the same conversation state always +// yields the same decision (§12). + +import { CandidateScore } from "./scorer.js"; + +export type DecisionConfig = { + // Evidence gate: minimum distinct distinguishing tokens the winner must + // match (default 2). + minUniqueTokens: number; + // Evidence gate: minimum winner score (matched mass). + minMass: number; + // Clear-winner margin: winner must beat the runner-up by at least this. + margin: number; +}; + +export type AbstainReason = + | "coverage" + | "no-candidates" + | "no-signal" + | "min-unique-tokens" + | "min-mass" + | "margin"; + +export type ContextSelectorDecision = + | { + kind: "resolve"; + winner: CandidateScore; + runnerUp: CandidateScore | undefined; + ranked: CandidateScore[]; + } + | { + kind: "abstain"; + reason: AbstainReason; + ranked: CandidateScore[]; + }; + +// Fixed-precision quantization so float summation order can't flip a borderline +// threshold/margin comparison (§12). +const QUANTUM = 1e6; +export function quantize(x: number): number { + return Math.round(x * QUANTUM) / QUANTUM; +} + +// Total ordering (§12): quantized score desc, then schemaName asc, then +// actionName asc. No reliance on Map/insertion order. +export function rankScores(scores: CandidateScore[]): CandidateScore[] { + return [...scores].sort((a, b) => { + const qa = quantize(a.score); + const qb = quantize(b.score); + if (qa !== qb) { + return qb - qa; + } + if (a.schemaName !== b.schemaName) { + return a.schemaName < b.schemaName ? -1 : 1; + } + return a.actionName < b.actionName + ? -1 + : a.actionName > b.actionName + ? 1 + : 0; + }); +} + +// The four checks, in order (§10). `covered` is the coverage guard result +// (every colliding candidate has a non-empty keyword vector), computed by the +// caller since it needs the keyword sets. History-only (check 2) is guaranteed +// upstream by the signal source, so it is not re-checked here. +export function decide( + scores: CandidateScore[], + covered: boolean, + config: DecisionConfig, +): ContextSelectorDecision { + const ranked = rankScores(scores); + if (!covered) { + return { kind: "abstain", reason: "coverage", ranked }; + } + if (ranked.length === 0) { + return { kind: "abstain", reason: "no-candidates", ranked }; + } + const winner = ranked[0]; + const runnerUp = ranked.length > 1 ? ranked[1] : undefined; + + const winnerScore = quantize(winner.score); + if (winner.uniqueTokenCount === 0 || winnerScore <= 0) { + return { kind: "abstain", reason: "no-signal", ranked }; + } + if (winner.uniqueTokenCount < config.minUniqueTokens) { + return { kind: "abstain", reason: "min-unique-tokens", ranked }; + } + if (winnerScore < quantize(config.minMass)) { + return { kind: "abstain", reason: "min-mass", ranked }; + } + const runnerUpScore = runnerUp ? quantize(runnerUp.score) : 0; + if (winnerScore - runnerUpScore < quantize(config.margin)) { + return { kind: "abstain", reason: "margin", ranked }; + } + return { kind: "resolve", winner, runnerUp, ranked }; +} diff --git a/ts/packages/dispatcher/dispatcher/src/context/contextSelector/keywordExtractor.ts b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/keywordExtractor.ts new file mode 100644 index 000000000..2f8ccfd3c --- /dev/null +++ b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/keywordExtractor.ts @@ -0,0 +1,152 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +// Lexical keyword extraction (§6.1) — the deterministic fallback floor that +// guarantees a keyword vector for every action, including runtime/dynamic agents +// where an LLM distillation pass hasn't run. Classic IR, no model call: mine the +// action's own schema text (schema description + de-camelCased action name + +// parameter names + their JSDoc), drop stopwords/generic verbs, count term +// frequency, emit the top-N. Recomputed from the live schema, so it is +// drift-proof (§6.1). LLM distillation, when available, is layered on top via the +// sidecar (§5) — it is not part of this floor. + +import { + ActionSchemaTypeDefinition, + SchemaType, +} from "@typeagent/action-schema"; +import { tokenize, tokenizeIdentifier } from "./tokenize.js"; +import { KeywordVector } from "./keywordVector.js"; + +// Cap on emitted keywords per action. Keyword lists are tiny (~8 tokens) in +// practice; the cap only bounds a pathologically verbose schema. +export const KEYWORD_TOP_N = 32; + +// Depth cap for walking nested parameter types — guards against deep or +// recursive schemas while still reaching realistic nested params. +const MAX_PARAM_DEPTH = 4; + +// The text sources feeding extraction, already separated by kind so the caller +// (schema-reading, impure) stays thin and this core stays pure/testable. +export type KeywordExtractionInput = { + // Schema/manifest description text. + schemaDescription?: string | undefined; + // Raw action name identifier (e.g. "addItems"); de-camelCased here. + actionName: string; + // JSDoc comment lines on the action definition. + actionComments?: readonly string[] | undefined; + // Parameter field name identifiers (de-camelCased here). + paramNames?: readonly string[] | undefined; + // JSDoc comment lines on parameter fields. + paramComments?: readonly string[] | undefined; +}; + +// Extract a keyword vector from separated schema text. Deterministic: frequency +// desc, then token asc for a stable order before the top-N cut. +export function extractKeywords( + input: KeywordExtractionInput, + topN: number = KEYWORD_TOP_N, +): KeywordVector { + const counts = new Map(); + const bump = (tokens: string[]) => { + for (const t of tokens) { + counts.set(t, (counts.get(t) ?? 0) + 1); + } + }; + + bump(tokenize(input.schemaDescription ?? "")); + bump(tokenizeIdentifier(input.actionName)); + for (const c of input.actionComments ?? []) { + bump(tokenize(c)); + } + for (const p of input.paramNames ?? []) { + bump(tokenizeIdentifier(p)); + } + for (const c of input.paramComments ?? []) { + bump(tokenize(c)); + } + + const ranked = [...counts.entries()].sort((a, b) => { + if (b[1] !== a[1]) { + return b[1] - a[1]; + } + return a[0] < b[0] ? -1 : a[0] > b[0] ? 1 : 0; + }); + return new Set(ranked.slice(0, topN).map(([t]) => t)); +} + +// Walk a parameter type collecting field names + JSDoc comments. Follows +// references (with a visited guard) and array element types, depth-capped. +function collectParamText( + type: SchemaType | undefined, + depth: number, + visited: Set, + names: string[], + comments: string[], +): void { + if (type === undefined || depth > MAX_PARAM_DEPTH) { + return; + } + switch (type.type) { + case "object": + for (const [name, field] of Object.entries(type.fields)) { + names.push(name); + if (field.comments) { + comments.push(...field.comments); + } + collectParamText( + field.type, + depth + 1, + visited, + names, + comments, + ); + } + break; + case "array": + collectParamText( + type.elementType, + depth + 1, + visited, + names, + comments, + ); + break; + case "type-reference": + if (type.definition !== undefined && !visited.has(type.name)) { + visited.add(type.name); + collectParamText( + type.definition.type, + depth + 1, + visited, + names, + comments, + ); + } + break; + default: + break; + } +} + +// Build extraction input from a parsed action definition + its schema +// description. Impure boundary kept small; the heavy lifting is in +// `extractKeywords`. +export function buildExtractionInput( + actionName: string, + definition: ActionSchemaTypeDefinition, + schemaDescription?: string, +): KeywordExtractionInput { + const names: string[] = []; + const comments: string[] = []; + const parametersField = definition.type.fields.parameters; + if (parametersField !== undefined) { + collectParamText(parametersField.type, 0, new Set(), names, comments); + } + return { + schemaDescription, + actionName, + actionComments: definition.comments, + paramNames: names, + paramComments: comments, + }; +} diff --git a/ts/packages/dispatcher/dispatcher/src/context/contextSelector/keywordIndex.ts b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/keywordIndex.ts new file mode 100644 index 000000000..bab3f80be --- /dev/null +++ b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/keywordIndex.ts @@ -0,0 +1,126 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +// The keyword index (§5–6): resolves a candidate's *effective* keyword vector = +// derived lexical defaults (memoized, drift-proof) layered with sidecar deltas +// (live-tunable). The one place the scorer reads candidate keywords from. The +// schema-reading side is behind `ActionSchemaSource` so the index is unit- +// testable with a stub source. + +import { ActionSchemaTypeDefinition } from "@typeagent/action-schema"; +import { KeywordVector, applyKeywordDelta } from "./keywordVector.js"; +import { buildExtractionInput, extractKeywords } from "./keywordExtractor.js"; +import { KeywordSidecar, keywordId } from "./keywordSidecar.js"; + +// Read-only access to the live schema text an action derives its keywords from. +// The seam that keeps the index decoupled from the AppAgentManager. +export interface ActionSchemaSource { + getSchemaDescription(schemaName: string): string | undefined; + getActionDefinition( + schemaName: string, + actionName: string, + ): ActionSchemaTypeDefinition | undefined; +} + +// Structural view of the AppAgentManager methods the production source needs — +// avoids importing the concrete manager type here. +export interface AgentSchemaProvider { + tryGetActionConfig( + schemaName: string, + ): { description?: string } | undefined; + tryGetActionSchemaFile(schemaName: string): + | { + parsedActionSchema: { + actionSchemas: Map; + }; + } + | undefined; +} + +// Adapt the AppAgentManager to `ActionSchemaSource`. All reads are guarded — a +// schema that isn't loadable yet (agent failed/slow to start) yields undefined +// text, which the extractor treats as an empty (uncovered) vector. +export function agentSchemaSource( + agents: AgentSchemaProvider, +): ActionSchemaSource { + return { + getSchemaDescription(schemaName: string): string | undefined { + try { + return agents.tryGetActionConfig(schemaName)?.description; + } catch { + return undefined; + } + }, + getActionDefinition( + schemaName: string, + actionName: string, + ): ActionSchemaTypeDefinition | undefined { + try { + return agents + .tryGetActionSchemaFile(schemaName) + ?.parsedActionSchema.actionSchemas.get(actionName); + } catch { + return undefined; + } + }, + }; +} + +export class KeywordIndex { + // Derived-only vectors, keyed by `schema.action`. Cached because extraction + // reads/parses schema text; sidecar deltas are applied fresh on top so live + // `@collision keywords` edits take effect without invalidating this. + private readonly derivedMemo = new Map(); + + constructor( + private readonly source: ActionSchemaSource, + private readonly getSidecar: () => KeywordSidecar, + ) {} + + // Lexical-floor keywords for one action (memoized). + public derived(schemaName: string, actionName: string): KeywordVector { + const id = keywordId(schemaName, actionName); + const cached = this.derivedMemo.get(id); + if (cached !== undefined) { + return cached; + } + const definition = this.source.getActionDefinition( + schemaName, + actionName, + ); + const vector: KeywordVector = + definition === undefined + ? new Set() + : extractKeywords( + buildExtractionInput( + actionName, + definition, + this.source.getSchemaDescription(schemaName), + ), + ); + this.derivedMemo.set(id, vector); + return vector; + } + + // Effective keywords = derived ∪ add − remove (or replace) from the sidecar. + public effective(schemaName: string, actionName: string): KeywordVector { + const derived = this.derived(schemaName, actionName); + const delta = this.getSidecar().deltaFor(schemaName, actionName); + return applyKeywordDelta(derived, delta); + } + + // Drop cached derived vectors when a schema may have changed (agent + // add/remove/reload). Clears all when no schema is given. + public invalidate(schemaName?: string): void { + if (schemaName === undefined) { + this.derivedMemo.clear(); + return; + } + const prefix = `${schemaName}.`; + for (const id of [...this.derivedMemo.keys()]) { + if (id.startsWith(prefix)) { + this.derivedMemo.delete(id); + } + } + } +} diff --git a/ts/packages/dispatcher/dispatcher/src/context/contextSelector/keywordSidecar.ts b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/keywordSidecar.ts new file mode 100644 index 000000000..266851b44 --- /dev/null +++ b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/keywordSidecar.ts @@ -0,0 +1,234 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +// The override sidecar (§5): a small, separate `collision-keywords.json` that +// stores per-(schema, action) keyword deltas over the derived lexical defaults. +// Profile-scoped and edited live via `@collision keywords …` (§5.3) — so it +// mirrors the CollisionPreferenceStore (load + edit + save), not the read-only +// registry. Missing/malformed degrades to empty and never throws. + +import fs from "node:fs"; +import path from "node:path"; +import registerDebug from "debug"; +import { tokenize } from "./tokenize.js"; + +const debugSidecar = registerDebug( + "typeagent:dispatcher:collision:contextSelector:sidecar", +); + +export const COLLISION_KEYWORDS_FILE = "collision-keywords.json"; +export const COLLISION_KEYWORDS_SCHEMA_VERSION = 1; + +// A delta over the derived defaults for one action (§5.1). `replace` is the +// escape hatch (verbatim), otherwise `add`/`remove` layer on top. +export type KeywordDelta = { + add?: string[]; + remove?: string[]; + replace?: string[]; +}; + +type KeywordSidecarFile = { + schemaVersion: number; + overrides: Record; +}; + +export function keywordId(schemaName: string, actionName: string): string { + return `${schemaName}.${actionName}`; +} + +// Canonicalize user-supplied keywords through the shared tokenizer so the +// sidecar stores the same token forms the extractor and context vector use +// (a multi-word entry like "pivot table" expands to its tokens). +function canonicalizeKeywords(keywords: string[]): string[] { + const out: string[] = []; + const seen = new Set(); + for (const raw of keywords) { + for (const t of tokenize(raw)) { + if (!seen.has(t)) { + seen.add(t); + out.push(t); + } + } + } + return out; +} + +function canonicalizeDelta(delta: KeywordDelta): KeywordDelta { + const out: KeywordDelta = {}; + if (delta.replace !== undefined) { + out.replace = canonicalizeKeywords(delta.replace); + } + if (delta.add !== undefined) { + out.add = canonicalizeKeywords(delta.add); + } + if (delta.remove !== undefined) { + out.remove = canonicalizeKeywords(delta.remove); + } + return out; +} + +function isEmptyDelta(delta: KeywordDelta): boolean { + return ( + (delta.replace === undefined || delta.replace.length === 0) && + (delta.add === undefined || delta.add.length === 0) && + (delta.remove === undefined || delta.remove.length === 0) + ); +} + +// Assemble a delta from mutable sets, omitting empty parts entirely (never set +// a property to `undefined` — the package compiles with exactOptionalPropertyTypes). +function buildDelta( + replace: string[] | undefined, + add: ReadonlySet, + remove: ReadonlySet, +): KeywordDelta { + const out: KeywordDelta = {}; + if (replace !== undefined && replace.length > 0) { + out.replace = [...replace]; + } + if (add.size > 0) { + out.add = [...add]; + } + if (remove.size > 0) { + out.remove = [...remove]; + } + return out; +} + +export class KeywordSidecar { + private readonly byId: Map; + + private constructor( + private readonly filePath: string | undefined, + overrides: Record, + ) { + this.byId = new Map( + Object.entries(overrides).map(([id, d]) => [ + id, + canonicalizeDelta(d), + ]), + ); + } + + public static empty(): KeywordSidecar { + return new KeywordSidecar(undefined, {}); + } + + // Load from the instance (profile) directory. When `dir` is undefined + // (non-persistent sessions / tests) the sidecar is in-memory only and + // `save()` is a no-op. + public static load(dir: string | undefined): KeywordSidecar { + if (dir === undefined) { + return new KeywordSidecar(undefined, {}); + } + const filePath = path.join(dir, COLLISION_KEYWORDS_FILE); + let overrides: Record = {}; + try { + if (fs.existsSync(filePath)) { + const raw = fs.readFileSync(filePath, "utf8"); + const parsed = JSON.parse(raw); + // Accept both the wrapped form ({ schemaVersion, overrides }) + // and a bare `{ "schema.action": delta }` map (§5.1 example). + const map = + parsed && typeof parsed === "object" && parsed.overrides + ? parsed.overrides + : parsed; + if (map && typeof map === "object") { + for (const [id, d] of Object.entries(map)) { + if (id === "schemaVersion") { + continue; + } + if (d && typeof d === "object") { + overrides[id] = d as KeywordDelta; + } + } + } + } + } catch (e) { + debugSidecar(`Failed to load sidecar from ${filePath}: ${e}`); + } + return new KeywordSidecar(filePath, overrides); + } + + public get isEmpty(): boolean { + return this.byId.size === 0; + } + + public deltaFor( + schemaName: string, + actionName: string, + ): KeywordDelta | undefined { + return this.byId.get(keywordId(schemaName, actionName)); + } + + public list(): { id: string; delta: KeywordDelta }[] { + return [...this.byId.entries()].map(([id, delta]) => ({ id, delta })); + } + + // `@collision keywords add ` — merge into `add`, and drop + // the same tokens from `remove` so an add reverses a prior remove. + public addKeywords(id: string, keywords: string[]): void { + const canon = canonicalizeKeywords(keywords); + const delta = this.byId.get(id) ?? {}; + const add = new Set(delta.add ?? []); + const remove = new Set(delta.remove ?? []); + for (const k of canon) { + add.add(k); + remove.delete(k); + } + this.writeEntry(id, buildDelta(delta.replace, add, remove)); + } + + // `@collision keywords remove ` — merge into `remove`, and + // drop the same tokens from `add`. + public removeKeywords(id: string, keywords: string[]): void { + const canon = canonicalizeKeywords(keywords); + const delta = this.byId.get(id) ?? {}; + const add = new Set(delta.add ?? []); + const remove = new Set(delta.remove ?? []); + for (const k of canon) { + remove.add(k); + add.delete(k); + } + this.writeEntry(id, buildDelta(delta.replace, add, remove)); + } + + // Clear a single entry (revert to derived-only). Returns true if removed. + public clearEntry(id: string): boolean { + const removed = this.byId.delete(id); + if (removed) { + this.save(); + } + return removed; + } + + private writeEntry(id: string, delta: KeywordDelta): void { + const canon = canonicalizeDelta(delta); + if (isEmptyDelta(canon)) { + this.byId.delete(id); + } else { + this.byId.set(id, canon); + } + this.save(); + } + + private save(): void { + if (this.filePath === undefined) { + return; + } + const data: KeywordSidecarFile = { + schemaVersion: COLLISION_KEYWORDS_SCHEMA_VERSION, + overrides: Object.fromEntries(this.byId.entries()), + }; + try { + fs.mkdirSync(path.dirname(this.filePath), { recursive: true }); + fs.writeFileSync( + this.filePath, + JSON.stringify(data, null, 2), + "utf8", + ); + } catch (e) { + debugSidecar(`Failed to save sidecar to ${this.filePath}: ${e}`); + } + } +} diff --git a/ts/packages/dispatcher/dispatcher/src/context/contextSelector/keywordVector.ts b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/keywordVector.ts new file mode 100644 index 000000000..dbfacfd1d --- /dev/null +++ b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/keywordVector.ts @@ -0,0 +1,37 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +// A candidate's keyword vector (§6). Flattened — order is ignored, so a set is +// the exact representation (§9: "each candidate's keywords are a set"). Shared by +// the extractor, sidecar, index, and scorer. + +export type KeywordVector = ReadonlySet; + +export function emptyKeywordVector(): KeywordVector { + return new Set(); +} + +// Apply sidecar deltas (§5.1): effective = derived ∪ add − remove, or the +// `replace` escape hatch verbatim. Canonical tokens only (callers tokenize +// before this). Returns a new set; inputs are not mutated. +export function applyKeywordDelta( + derived: KeywordVector, + delta: + | { add?: string[]; remove?: string[]; replace?: string[] } + | undefined, +): KeywordVector { + if (delta === undefined) { + return derived; + } + if (delta.replace !== undefined) { + return new Set(delta.replace); + } + const out = new Set(derived); + for (const k of delta.add ?? []) { + out.add(k); + } + for (const k of delta.remove ?? []) { + out.delete(k); + } + return out; +} diff --git a/ts/packages/dispatcher/dispatcher/src/context/contextSelector/scorer.ts b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/scorer.ts new file mode 100644 index 000000000..4d6d7af66 --- /dev/null +++ b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/scorer.ts @@ -0,0 +1,109 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +// The scorer (§9): ranks colliding candidates by how much the recent +// conversation overlaps each one's keywords, counting most the tokens that +// uniquely point to one candidate (candidate-local IDF) and cancelling tokens +// the candidates share. Behind `CollisionScorer` so the v1 TF-IDF scorer can be +// swapped for a knowPro-entity or embedding scorer later (the scoring roadmap, +// §9) — the decision rule (§10) consumes `CandidateScore` regardless. + +import { ContextVector } from "./conversationSignal.js"; +import { KeywordVector } from "./keywordVector.js"; + +// One colliding candidate handed to the scorer — its identity plus its flattened +// (order-ignored) keyword vector. +export type ScorerCandidate = { + schemaName: string; + actionName: string; + keywords: KeywordVector; +}; + +// A token that fired for a candidate, with the pieces of its contribution kept +// separate for explainable telemetry (§13.4). +export type MatchedToken = { + token: string; + // Decay-weighted conversational frequency C[token] (§8). + contextWeight: number; + // Candidate-local discriminativeness disc(token) ∈ [0,1] (§9). + disc: number; + // contextWeight × disc — what this token adds to the candidate's score. + contribution: number; +}; + +export type CandidateScore = { + schemaName: string; + actionName: string; + // Σ contribution over matched tokens. + score: number; + // Distinct matched tokens that actually distinguish this candidate + // (disc > 0). The evidence gate's `minUniqueTokens` counts these (§10). + uniqueTokenCount: number; + // All firing tokens, sorted by token for stable output. + matched: MatchedToken[]; +}; + +export interface CollisionScorer { + score( + contextVector: ContextVector, + candidates: ScorerCandidate[], + ): CandidateScore[]; +} + +// V1 scorer (§9): score(a) = Σ_{t ∈ C ∩ K_a} C[t] · disc(t), with candidate-local +// IDF disc(t) = log(N/df(t)) / log(N) — 1 for a token unique to one candidate, 0 +// for one shared by all N colliding candidates, graduated in between. Fully +// deterministic; the returned order mirrors the input candidate order (the +// decision rule imposes the total ordering, §10/§12). +export class TfIdfScorer implements CollisionScorer { + public score( + contextVector: ContextVector, + candidates: ScorerCandidate[], + ): CandidateScore[] { + const n = candidates.length; + // Document frequency: how many candidates' keyword sets contain a token. + const df = new Map(); + for (const candidate of candidates) { + for (const token of candidate.keywords) { + df.set(token, (df.get(token) ?? 0) + 1); + } + } + const logN = n > 1 ? Math.log(n) : 0; + const disc = (token: string): number => { + if (logN <= 0) { + return 1; + } + const d = df.get(token) ?? 1; + return Math.log(n / d) / logN; + }; + + return candidates.map((candidate) => { + const matched: MatchedToken[] = []; + let score = 0; + let uniqueTokenCount = 0; + for (const token of candidate.keywords) { + const contextWeight = contextVector.get(token); + if (contextWeight === undefined || contextWeight === 0) { + continue; + } + const d = disc(token); + const contribution = contextWeight * d; + matched.push({ token, contextWeight, disc: d, contribution }); + if (d > 0) { + score += contribution; + uniqueTokenCount += 1; + } + } + matched.sort((a, b) => + a.token < b.token ? -1 : a.token > b.token ? 1 : 0, + ); + return { + schemaName: candidate.schemaName, + actionName: candidate.actionName, + score, + uniqueTokenCount, + matched, + }; + }); + } +} diff --git a/ts/packages/dispatcher/dispatcher/src/context/contextSelector/tokenize.ts b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/tokenize.ts new file mode 100644 index 000000000..c1daee639 --- /dev/null +++ b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/tokenize.ts @@ -0,0 +1,250 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +// Deterministic canonicalizer + tokenizer shared by keyword extraction (§6) and +// the conversation signal (§7). Pinned as part of the determinism contract (§12): +// same input string always yields the same token list, independent of locale, +// Map ordering, or wall-clock. No stemmer in v1 (avoids a heavy NLP dependency +// and a non-deterministic normalization step); parked as a tuning lever. + +// Product/language names and spreadsheet refs whose punctuation would otherwise +// be stripped ("C#" -> "c", ".NET" -> "net"). Matched before the generic word +// rule so they survive as whole tokens. Lower-cased forms (input is lowercased +// first). Order within the alternation matters — longer/more-specific first. +const PROTECTED_ALTERNATION = String.raw`c\+\+|objective-c|f#|c#|\.net|[a-z]+[0-9]+:[a-z]+[0-9]+`; + +// One token = a protected pattern OR a run of letters/digits. Rebuilt per call +// (RegExp with /g carries lastIndex state — never share a compiled instance). +function tokenRegExp(): RegExp { + return new RegExp(`${PROTECTED_ALTERNATION}|[a-z0-9]+`, "g"); +} + +// English stopwords — non-topical glue that never distinguishes a candidate. +const STOPWORDS: ReadonlySet = new Set([ + "a", + "an", + "the", + "this", + "that", + "these", + "those", + "it", + "its", + "of", + "to", + "in", + "on", + "at", + "by", + "for", + "with", + "from", + "into", + "onto", + "as", + "is", + "are", + "was", + "were", + "be", + "been", + "being", + "and", + "or", + "but", + "not", + "no", + "yes", + "do", + "does", + "did", + "done", + "have", + "has", + "had", + "having", + "will", + "would", + "shall", + "should", + "can", + "could", + "may", + "might", + "must", + "my", + "your", + "our", + "their", + "his", + "her", + "me", + "you", + "we", + "they", + "he", + "she", + "them", + "us", + "i", + "am", + "if", + "then", + "else", + "so", + "than", + "too", + "very", + "just", + "now", + "about", + "over", + "up", + "down", + "out", + "off", + "please", + "let", + "want", + "need", + "some", + "any", + "all", + "each", + "every", + "there", + "here", + "what", + "which", + "who", + "whom", + "whose", + "when", + "where", + "why", + "how", +]); + +// Generic CRUD / imperative verbs — present in most action names, so they carry +// no discriminative signal (§6). Dropped from both keyword vectors and the +// context vector so they never dominate a match. +const GENERIC_VERBS: ReadonlySet = new Set([ + "add", + "create", + "insert", + "append", + "new", + "make", + "get", + "fetch", + "show", + "display", + "list", + "find", + "search", + "lookup", + "query", + "read", + "view", + "open", + "update", + "edit", + "change", + "modify", + "set", + "put", + "remove", + "delete", + "clear", + "drop", + "erase", + "cancel", + "close", + "save", + "store", + "load", + "run", + "start", + "stop", + "enable", + "disable", + "toggle", + "select", + "pick", + "choose", + "use", + "do", + "perform", + "execute", + "action", +]); + +export type TokenizeOptions = { + // Drop English stopwords (default true). + dropStopwords?: boolean; + // Drop generic CRUD verbs (default true). + dropGenericVerbs?: boolean; + // Minimum token length to keep, protected patterns exempt (default 2). + minLength?: number; +}; + +// Split identifier casing/separators into space-delimited words: +// "addItems" -> "add Items", "HTMLParser" -> "HTML Parser", "add_items"/"add-items" -> "add items". +export function deCamelCase(identifier: string): string { + return identifier + .replace(/([a-z0-9])([A-Z])/g, "$1 $2") + .replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2") + .replace(/[_\-]+/g, " "); +} + +// Canonicalize + tokenize. NFKC-normalize, lowercase, extract protected/word +// tokens, then drop stopwords, generic verbs, and sub-minimum-length tokens. +// Deterministic and order-preserving (a caller that needs multiplicity gets it). +export function tokenize(text: string, options?: TokenizeOptions): string[] { + const dropStopwords = options?.dropStopwords ?? true; + const dropGenericVerbs = options?.dropGenericVerbs ?? true; + const minLength = options?.minLength ?? 2; + + if (!text) { + return []; + } + const normalized = text.normalize("NFKC").toLowerCase(); + const re = tokenRegExp(); + const out: string[] = []; + let m: RegExpExecArray | null; + while ((m = re.exec(normalized)) !== null) { + const token = m[0]; + const isProtected = token.length > 0 && !/^[a-z0-9]+$/.test(token); + if (!isProtected) { + if (token.length < minLength) { + continue; + } + if (dropStopwords && STOPWORDS.has(token)) { + continue; + } + if (dropGenericVerbs && GENERIC_VERBS.has(token)) { + continue; + } + } + out.push(token); + } + return out; +} + +// Convenience: tokenize an identifier (de-camel first). Used by the keyword +// extractor for action/parameter names. +export function tokenizeIdentifier( + identifier: string, + options?: TokenizeOptions, +): string[] { + return tokenize(deCamelCase(identifier), options); +} + +// Test hooks / callers that want to inspect the pinned vocabularies. +export function isStopword(token: string): boolean { + return STOPWORDS.has(token); +} + +export function isGenericVerb(token: string): boolean { + return GENERIC_VERBS.has(token); +} From cb7b6ec4db7cf80c1bab93837b0e4404d972184e Mon Sep 17 00:00:00 2001 From: George Ng Date: Wed, 1 Jul 2026 01:11:27 -0700 Subject: [PATCH 02/11] feat(dispatcher): wire contextSelector tier into the grammar/cache path Integrate the engine as a deterministic pre-strategy tier on the grammarMatch path (design section 11): a confident topical pick resolves on the cache path (no LLM); abstain falls through to the configured grammar strategy (default) or escalates to LLM translation. - session.ts: add collision.contextSelector config block + defaults (detect off, windowTurns 20, decay 0.9, minUniqueTokens 2, minMass 1.0, margin 1.0, abstainFallback defer-to-strategy). Only detect is @config-exposed. - matchContextSelector.ts: orchestrator - adapts validated MatchResults to scorer candidates, runs signal -> keywords -> scorer -> decision, emits telemetry, returns the winning match + U-2 affordance note or abstains. - collisionTelemetry.ts: add "context-weight" strategy label + optional per-candidate matchedTokens for explainable events. - commandHandlerContext.ts: construct conversationSignal (ring buffer), contextSelectorSidecar, contextSelectorKeywords (KeywordIndex); reset signal + invalidate keyword cache on session switch. - matchRequest.ts: insert the tier between registry-first and the grammar strategy. - interpretRequest.ts: record each completed turn into the signal (history-only). - historyCommandHandler.ts / systemAgent.ts: reset the signal on `@history clear` and clear-deep. Off by default; detect:false preserves byte-identical legacy behavior. Builds green. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../src/context/collisionTelemetry.ts | 13 +- .../src/context/commandHandlerContext.ts | 31 ++++ .../dispatcher/src/context/session.ts | 30 ++++ .../system/handlers/historyCommandHandler.ts | 1 + .../src/context/system/systemAgent.ts | 1 + .../src/translation/interpretRequest.ts | 5 + .../src/translation/matchContextSelector.ts | 168 ++++++++++++++++++ .../src/translation/matchRequest.ts | 31 +++- 8 files changed, 277 insertions(+), 3 deletions(-) create mode 100644 ts/packages/dispatcher/dispatcher/src/translation/matchContextSelector.ts diff --git a/ts/packages/dispatcher/dispatcher/src/context/collisionTelemetry.ts b/ts/packages/dispatcher/dispatcher/src/context/collisionTelemetry.ts index 994b081eb..3550e031a 100644 --- a/ts/packages/dispatcher/dispatcher/src/context/collisionTelemetry.ts +++ b/ts/packages/dispatcher/dispatcher/src/context/collisionTelemetry.ts @@ -55,6 +55,12 @@ export type CollisionCandidate = { * (`collision.priorityOrder`); 0 = highest priority. */ priorityRank?: number; + /** + * Optional matched context tokens and their decayed weights — populated by + * the `contextSelector` tier so a topical decision is explainable from + * counts (§13.4). Left undefined for other detection points. + */ + matchedTokens?: { token: string; weight: number }[]; }; export type CollisionEvent = { @@ -77,7 +83,12 @@ export type CollisionEvent = { * undefined for static (where there's no runtime ranking). */ firstMatchCandidate?: CollisionCandidate | undefined; - strategy: CollisionStrategy | "warn" | "error" | "downgraded"; + strategy: + | CollisionStrategy + | "warn" + | "error" + | "downgraded" + | "context-weight"; /** * For `kind="grammarMatch"`: which classifier flagged this collision * (`distinctActions` vs `tiedHeuristics`). Helps decide whether to diff --git a/ts/packages/dispatcher/dispatcher/src/context/commandHandlerContext.ts b/ts/packages/dispatcher/dispatcher/src/context/commandHandlerContext.ts index b4a04233d..0d4c532d6 100644 --- a/ts/packages/dispatcher/dispatcher/src/context/commandHandlerContext.ts +++ b/ts/packages/dispatcher/dispatcher/src/context/commandHandlerContext.ts @@ -94,6 +94,15 @@ import { } from "./collisionTelemetry.js"; import { CollisionPreferenceStore } from "./collisionPreferences.js"; import { CollisionRegistry } from "./collisionRegistry.js"; +import { + ConversationSignalSource, + RingBufferSignalSource, +} from "./contextSelector/conversationSignal.js"; +import { + KeywordIndex, + agentSchemaSource, +} from "./contextSelector/keywordIndex.js"; +import { KeywordSidecar } from "./contextSelector/keywordSidecar.js"; import { ChoiceManager } from "@typeagent/agent-sdk/helpers/action"; import lockfile from "proper-lockfile"; import { IndexManager } from "./indexManager.js"; @@ -238,6 +247,13 @@ export type CommandHandlerContext = { // chosen candidate; consumed on first read. Covers the "don't remember" // case (no durable preference written). collisionOneShotPicks: Set; + + // contextSelector (§11). The conversation signal source (produces the + // per-turn context vector), the effective-keyword index (derived floor + + // sidecar overrides), and the live-tunable keyword sidecar it reads. + conversationSignal: ConversationSignalSource; + contextSelectorKeywords: KeywordIndex; + contextSelectorSidecar: KeywordSidecar; }; export function getRequestId(context: CommandHandlerContext): RequestId { @@ -729,11 +745,22 @@ export async function initializeCommandHandlerContext( session.getConfig().collision.preference.registryPath, collisionChoiceManager: new ChoiceManager(), collisionOneShotPicks: new Set(), + conversationSignal: new RingBufferSignalSource( + () => session.getConfig().collision.contextSelector, + ), + contextSelectorSidecar: KeywordSidecar.load(instanceDir), + // Needs `context` for the sidecar getter; assigned after the literal. + contextSelectorKeywords: undefined as unknown as KeywordIndex, // Replaced below; the queue's broadcaster needs `context` to be // available so it can route through `context.clientIO`. requestQueue: undefined as unknown as RequestQueue, }; + context.contextSelectorKeywords = new KeywordIndex( + agentSchemaSource(agents), + () => context.contextSelectorSidecar, + ); + const snapshotCoalescer = createSnapshotCoalescer((snapshot) => { context.clientIO.queueStateChanged?.(snapshot); }); @@ -1184,6 +1211,10 @@ export async function setSessionOnCommandHandlerContext( ); await setAppAgentStates(context); context.translatorCache.clear(); + // Session switch (§7.2): drop the contextSelector conversation buffer and + // the derived-keyword cache (agents were closed/reloaded above). + context.conversationSignal.reset(); + context.contextSelectorKeywords.invalidate(); } export async function reloadSessionOnCommandHandlerContext( diff --git a/ts/packages/dispatcher/dispatcher/src/context/session.ts b/ts/packages/dispatcher/dispatcher/src/context/session.ts index 7cf31f982..c78116290 100644 --- a/ts/packages/dispatcher/dispatcher/src/context/session.ts +++ b/ts/packages/dispatcher/dispatcher/src/context/session.ts @@ -261,6 +261,27 @@ export type CollisionConfig = { scorer: "placeholder" | "actionEmbedding"; strategy: CollisionStrategy; }; + // Context-weighted resolution (the `contextSelector` tier, §11.3). A + // deterministic, LLM-free tiebreaker that runs on the grammarMatch path + // before the configured strategy: confident -> resolve (no LLM); abstain -> + // fall through. Off by default; only `detect` is exposed via `@config`. + contextSelector: { + detect: boolean; + // Ring-buffer look-back N over recent user turns (default 20). + windowTurns: number; + // Per-turn recency decay lambda (default 0.9). + decay: number; + // Evidence gate: min distinct distinguishing tokens the winner must + // match (default 2). + minUniqueTokens: number; + // Evidence gate: min winner score / matched mass (default 1.0). + minMass: number; + // Clear-winner margin the winner must beat the runner-up by (default 1.0). + margin: number; + // On abstain: hand to the configured grammar strategy (default) or + // escalate the request to the LLM translation path. + abstainFallback: "defer-to-strategy" | "escalate-to-llm"; + }; // Optional explicit ranking — comma-separated agent names (e.g. "list,music,player"). // Stored as a string because the dispatcher config system rejects arrays. // Empty / unset falls back to agent registration order. @@ -441,6 +462,15 @@ const defaultSessionConfig: SessionConfig = { scorer: "placeholder", strategy: "first-match", }, + contextSelector: { + detect: false, + windowTurns: 20, + decay: 0.9, + minUniqueTokens: 2, + minMass: 1.0, + margin: 1.0, + abstainFallback: "defer-to-strategy", + }, priorityOrder: "", multipleActionBehavior: "downgrade-to-priority", telemetry: { diff --git a/ts/packages/dispatcher/dispatcher/src/context/system/handlers/historyCommandHandler.ts b/ts/packages/dispatcher/dispatcher/src/context/system/handlers/historyCommandHandler.ts index c7edd4c05..30064529c 100644 --- a/ts/packages/dispatcher/dispatcher/src/context/system/handlers/historyCommandHandler.ts +++ b/ts/packages/dispatcher/dispatcher/src/context/system/handlers/historyCommandHandler.ts @@ -46,6 +46,7 @@ class HistoryClearCommandHandler implements CommandHandler { const systemContext = context.sessionContext.agentContext; const history = systemContext.chatHistory; history.clear(); + systemContext.conversationSignal.reset(); clearClaudeReasoningSession(systemContext); clearCopilotReasoningSession(systemContext); if (param.flags.activity) { diff --git a/ts/packages/dispatcher/dispatcher/src/context/system/systemAgent.ts b/ts/packages/dispatcher/dispatcher/src/context/system/systemAgent.ts index 7a01690db..1b489c915 100644 --- a/ts/packages/dispatcher/dispatcher/src/context/system/systemAgent.ts +++ b/ts/packages/dispatcher/dispatcher/src/context/system/systemAgent.ts @@ -82,6 +82,7 @@ class ClearDeepCommandHandler implements CommandHandlerNoParams { public async run(context: ActionContext) { const systemContext = context.sessionContext.agentContext; systemContext.chatHistory.clear(); + systemContext.conversationSignal.reset(); clearClaudeReasoningSession(systemContext); clearCopilotReasoningSession(systemContext); setActivityContext(DispatcherActivityName, null, systemContext); diff --git a/ts/packages/dispatcher/dispatcher/src/translation/interpretRequest.ts b/ts/packages/dispatcher/dispatcher/src/translation/interpretRequest.ts index 711a239b9..a7cb32c8c 100644 --- a/ts/packages/dispatcher/dispatcher/src/translation/interpretRequest.ts +++ b/ts/packages/dispatcher/dispatcher/src/translation/interpretRequest.ts @@ -294,6 +294,11 @@ export async function interpretRequest( }); } + // Record this completed user turn into the contextSelector signal *after* + // resolution, so it never contributes to its own context vector + // (history-only, §10). Runs once per user turn at this ungated ingress. + systemContext.conversationSignal.recordRequest(request); + return { elapsedMs: translateResult.elapsedMs, requestAction, diff --git a/ts/packages/dispatcher/dispatcher/src/translation/matchContextSelector.ts b/ts/packages/dispatcher/dispatcher/src/translation/matchContextSelector.ts new file mode 100644 index 000000000..d7086d6f6 --- /dev/null +++ b/ts/packages/dispatcher/dispatcher/src/translation/matchContextSelector.ts @@ -0,0 +1,168 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +// The contextSelector orchestrator (§11): adapts the grammar path's validated +// MatchResults into scorer candidates, runs the deterministic pipeline +// (signal -> keywords -> scorer -> decision), emits telemetry, and returns the +// winning match plus a UX affordance note — or undefined to abstain. Pure +// engine logic lives under ../context/contextSelector/; this file is the thin +// MatchResult-aware seam plus telemetry. + +import { MatchResult } from "agent-cache"; +import { CommandHandlerContext } from "../context/commandHandlerContext.js"; +import { + CollisionCandidate, + emitCollisionEvent, +} from "../context/collisionTelemetry.js"; +import { getAppAgentName } from "./agentTranslators.js"; +import { + CandidateScore, + ScorerCandidate, + TfIdfScorer, +} from "../context/contextSelector/scorer.js"; +import { decide } from "../context/contextSelector/decision.js"; + +// v1 scorer selection (§9). Stateless — a single instance is reused. This is the +// swap point for a future knowPro-entity / embedding scorer. +const scorer = new TfIdfScorer(); + +export type ContextSelectorResolution = { + // The winning validated match to resolve to (avoids the downstream LLM). + match: MatchResult; + // Non-blocking UX affordance shown on a reroute (U-2, §11.2). + note: string; +}; + +function primaryOf(match: MatchResult): { + schemaName: string; + actionName: string; +} { + const first = match.match.actions[0]?.action; + return { + schemaName: first?.schemaName ?? "", + actionName: first?.actionName ?? "", + }; +} + +type Candidate = ScorerCandidate & { match: MatchResult }; + +function toTelemetryCandidates(scores: CandidateScore[]): CollisionCandidate[] { + return scores.map((s) => ({ + schemaName: s.schemaName, + actionName: s.actionName, + score: s.score, + matchedTokens: s.matched.map((m) => ({ + token: m.token, + weight: m.contribution, + })), + })); +} + +// Resolve a grammar-path collision by topical proximity, or abstain. Assumes the +// caller has already confirmed this is a collision and that +// `contextSelector.detect` is on. +export function resolveContextSelector( + validated: MatchResult[], + ctx: CommandHandlerContext, + request: string, +): ContextSelectorResolution | undefined { + const cfg = ctx.session.getConfig().collision; + const startedAt = performance.now(); + + // Distinct (schema, action) candidates, each keeping the first MatchResult + // to resolve to. Effective keywords = derived floor + sidecar overrides. + const byId = new Map(); + for (const match of validated) { + const { schemaName, actionName } = primaryOf(match); + if (schemaName === "" || actionName === "") { + continue; + } + const id = `${schemaName}.${actionName}`; + if (!byId.has(id)) { + byId.set(id, { + schemaName, + actionName, + keywords: ctx.contextSelectorKeywords.effective( + schemaName, + actionName, + ), + match, + }); + } + } + const candidates = [...byId.values()]; + if (candidates.length < 2) { + return undefined; + } + + const covered = candidates.every((c) => c.keywords.size > 0); + const contextVector = ctx.conversationSignal.getContextVector(); + const scores = scorer.score(contextVector, candidates); + const decision = decide(scores, covered, { + minUniqueTokens: cfg.contextSelector.minUniqueTokens, + minMass: cfg.contextSelector.minMass, + margin: cfg.contextSelector.margin, + }); + + const telemetryCandidates = toTelemetryCandidates(decision.ranked); + const elapsedMs = performance.now() - startedAt; + + if (decision.kind === "abstain") { + emitCollisionEvent( + { + kind: "grammarMatch", + request, + candidates: telemetryCandidates, + classifier: cfg.grammarMatch.classifier, + strategy: "context-weight", + elapsedMs, + note: `abstain:${decision.reason}`, + }, + ctx, + ); + return undefined; + } + + const winner = decision.winner; + const winning = byId.get(`${winner.schemaName}.${winner.actionName}`); + if (winning === undefined) { + // Defensive: winner id must be present. Treat as abstain rather than + // resolving to the wrong match. + return undefined; + } + const chosen: CollisionCandidate = { + schemaName: winner.schemaName, + actionName: winner.actionName, + score: winner.score, + matchedTokens: winner.matched.map((m) => ({ + token: m.token, + weight: m.contribution, + })), + }; + emitCollisionEvent( + { + kind: "grammarMatch", + request, + candidates: telemetryCandidates, + chosen, + classifier: cfg.grammarMatch.classifier, + strategy: "context-weight", + elapsedMs, + note: `resolve; matched ${winner.uniqueTokenCount} token(s), mass ${winner.score.toFixed(3)}`, + }, + ctx, + ); + + const agentName = getAppAgentName(winner.schemaName); + const topTokens = winner.matched + .slice() + .sort((a, b) => b.contribution - a.contribution) + .slice(0, 3) + .map((m) => m.token); + const topicSuffix = + topTokens.length > 0 ? ` (${topTokens.join(", ")})` : ""; + return { + match: winning.match, + note: `↪ routed to ${agentName} — recent topic${topicSuffix}`, + }; +} diff --git a/ts/packages/dispatcher/dispatcher/src/translation/matchRequest.ts b/ts/packages/dispatcher/dispatcher/src/translation/matchRequest.ts index 94f0eddb6..f35b1ec1a 100644 --- a/ts/packages/dispatcher/dispatcher/src/translation/matchRequest.ts +++ b/ts/packages/dispatcher/dispatcher/src/translation/matchRequest.ts @@ -25,6 +25,8 @@ import { resolveGrammarCollision, resolveGrammarRegistryFirst, } from "./matchCollision.js"; +import { resolveContextSelector } from "./matchContextSelector.js"; +import { displayInfo } from "@typeagent/agent-sdk/helpers/display"; const debugConstValidation = registerDebug("typeagent:const:validation"); @@ -282,6 +284,7 @@ export async function matchRequest( // Collision detection — opt-in via session config. With detect=false this // is a no-op and we use validated[0], identical to legacy behavior. const collisionCfg = config.collision.grammarMatch; + const contextSelectorCfg = config.collision.contextSelector; let chosen = validated[0]; // Registry-first detection runs independently of grammarMatch.detect: a @@ -293,10 +296,34 @@ export async function matchRequest( ); if ( decision === undefined && - collisionCfg.detect && + (contextSelectorCfg.detect || collisionCfg.detect) && isCollision(validated, collisionCfg.classifier) ) { - decision = resolveGrammarCollision(validated, systemContext, request); + // contextSelector tier (§11): a confident topical pick resolves here on + // the cache path (no LLM). On abstain it either falls through to the + // configured grammar strategy (default) or escalates to LLM translation. + if (contextSelectorCfg.detect) { + const resolution = resolveContextSelector( + validated, + systemContext, + request, + ); + if (resolution !== undefined) { + decision = { kind: "match", match: resolution.match }; + await displayInfo(resolution.note, context); + } else if ( + contextSelectorCfg.abstainFallback === "escalate-to-llm" + ) { + return undefined; + } + } + if (decision === undefined && collisionCfg.detect) { + decision = resolveGrammarCollision( + validated, + systemContext, + request, + ); + } } if (decision !== undefined) { if (decision.kind === "fallthrough") { From 4a9414a165d4dce0d4c1b973272db7564596cb30 Mon Sep 17 00:00:00 2001 From: George Ng Date: Wed, 1 Jul 2026 01:16:00 -0700 Subject: [PATCH 03/11] feat(dispatcher): @config collision contextSelector + @collision keywords surface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - configCommandHandlers.ts: `@config collision contextSelector detect [on|off]` toggle (only detect is exposed, per §11.3); render the contextSelector row in both the HTML and text `@config collision` views; include it in the anyOn summary. - collisionKeywordHandlers.ts: `@collision keywords add|remove|list|clear` to inspect derived + override keywords and edit the collision-keywords.json sidecar (§5.3). `list` with no arg lists all overrides; with a target shows derived, delta, and merged effective sets. - collisionCommandHandlers.ts: register the `keywords` sub-command. The U-2 reroute affordance (displayInfo) shipped with the integration commit. Builds green. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../handlers/collisionCommandHandlers.ts | 2 + .../handlers/collisionKeywordHandlers.ts | 256 ++++++++++++++++++ .../system/handlers/configCommandHandlers.ts | 51 +++- 3 files changed, 308 insertions(+), 1 deletion(-) create mode 100644 ts/packages/dispatcher/dispatcher/src/context/system/handlers/collisionKeywordHandlers.ts diff --git a/ts/packages/dispatcher/dispatcher/src/context/system/handlers/collisionCommandHandlers.ts b/ts/packages/dispatcher/dispatcher/src/context/system/handlers/collisionCommandHandlers.ts index 04c47785e..2b6a9425c 100644 --- a/ts/packages/dispatcher/dispatcher/src/context/system/handlers/collisionCommandHandlers.ts +++ b/ts/packages/dispatcher/dispatcher/src/context/system/handlers/collisionCommandHandlers.ts @@ -34,6 +34,7 @@ import { getCollisionCorpusCommandHandlers } from "./collisionCorpusHandlers.js" import { CollisionNeighborhoodsCommandHandler } from "./collisionNeighborhoodHandlers.js"; import { getCollisionOptimizeCommandHandlers } from "./collisionOptimizeHandlers.js"; import { getCollisionPreferenceCommandHandlers } from "./collisionPreferenceHandlers.js"; +import { getCollisionKeywordCommandHandlers } from "./collisionKeywordHandlers.js"; // --------------------------------------------------------------------------- // `@collision events` — show recent events captured in the in-memory ring @@ -1282,6 +1283,7 @@ export function getCollisionCommandHandlers(): CommandHandlerTable { neighborhoods: new CollisionNeighborhoodsCommandHandler(), optimize: getCollisionOptimizeCommandHandlers(), preferences: getCollisionPreferenceCommandHandlers(), + keywords: getCollisionKeywordCommandHandlers(), "list-strategies": new CollisionSimilarListStrategiesCommandHandler(), }, diff --git a/ts/packages/dispatcher/dispatcher/src/context/system/handlers/collisionKeywordHandlers.ts b/ts/packages/dispatcher/dispatcher/src/context/system/handlers/collisionKeywordHandlers.ts new file mode 100644 index 000000000..0d488f286 --- /dev/null +++ b/ts/packages/dispatcher/dispatcher/src/context/system/handlers/collisionKeywordHandlers.ts @@ -0,0 +1,256 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +// `@collision keywords` — inspect and edit the per-(schema, action) keyword +// overrides consumed by the contextSelector tier (design §5.3). Edits land in +// the profile-scoped `collision-keywords.json` sidecar as deltas over the +// derived lexical defaults; the merged effective set feeds the scorer (§9). + +import { ActionContext, ParsedCommandParams } from "@typeagent/agent-sdk"; +import { + CommandHandler, + CommandHandlerTable, +} from "@typeagent/agent-sdk/helpers/command"; +import { + displayResult, + displayWarn, +} from "@typeagent/agent-sdk/helpers/display"; +import { CommandHandlerContext } from "../../commandHandlerContext.js"; +import { KeywordVector } from "../../contextSelector/keywordVector.js"; + +type Target = { schemaName: string; actionName: string; id: string }; + +// Parse a `schema.action` token. The action name is the segment after the LAST +// dot (action names are single identifiers); the schema name is the remainder +// (may itself contain dots for sub-schemas). Returns undefined when malformed. +function parseTarget(token: string): Target | undefined { + const t = token.trim(); + const dot = t.lastIndexOf("."); + if (dot <= 0 || dot >= t.length - 1) { + return undefined; + } + return { + schemaName: t.slice(0, dot), + actionName: t.slice(dot + 1), + id: t, + }; +} + +function fmt(vector: KeywordVector): string { + const list = [...vector].sort(); + return list.length > 0 ? list.join(", ") : "(none)"; +} + +function showEffective( + context: ActionContext, + target: Target, +): void { + const ctx = context.sessionContext.agentContext; + const derived = ctx.contextSelectorKeywords.derived( + target.schemaName, + target.actionName, + ); + const effective = ctx.contextSelectorKeywords.effective( + target.schemaName, + target.actionName, + ); + const delta = ctx.contextSelectorSidecar.deltaFor( + target.schemaName, + target.actionName, + ); + const lines = [ + `keywords for ${target.id}:`, + ` derived: ${fmt(derived)}`, + ]; + if (delta?.replace !== undefined) { + lines.push(` replace: ${delta.replace.join(", ") || "(none)"}`); + } else { + if (delta?.add && delta.add.length > 0) { + lines.push(` + add: ${delta.add.join(", ")}`); + } + if (delta?.remove && delta.remove.length > 0) { + lines.push(` - remove: ${delta.remove.join(", ")}`); + } + } + lines.push(` effective: ${fmt(effective)}`); + displayResult(lines.join("\n"), context); +} + +class CollisionKeywordsListCommandHandler implements CommandHandler { + public readonly description = + "Show derived + override keywords, merged, for a schema.action (or list all overrides)"; + public readonly parameters = { + args: { + target: { + description: + 'A schema.action, e.g. "excel.addRow". Omit to list all overrides.', + type: "string", + optional: true, + }, + }, + } as const; + + public async run( + context: ActionContext, + params: ParsedCommandParams, + ) { + const ctx = context.sessionContext.agentContext; + if (params.args.target === undefined) { + const entries = ctx.contextSelectorSidecar.list(); + if (entries.length === 0) { + displayResult( + "No keyword overrides. Add one with `@collision keywords add `.", + context, + ); + return; + } + const lines = entries.map(({ id, delta }) => { + const parts: string[] = []; + if (delta.replace) parts.push(`replace=[${delta.replace}]`); + if (delta.add?.length) parts.push(`add=[${delta.add}]`); + if (delta.remove?.length) + parts.push(`remove=[${delta.remove}]`); + return `- ${id}: ${parts.join(" ")}`; + }); + displayResult( + `Keyword overrides (${entries.length}):\n${lines.join("\n")}`, + context, + ); + return; + } + const target = parseTarget(params.args.target); + if (target === undefined) { + displayWarn( + `Invalid target "${params.args.target}". Expected schema.action.`, + context, + ); + return; + } + showEffective(context, target); + } +} + +class CollisionKeywordsAddCommandHandler implements CommandHandler { + public readonly description = + "Add discriminative keywords for a schema.action (layered over the derived defaults)"; + public readonly parameters = { + args: { + target: { + description: 'The schema.action to tune, e.g. "excel.addRow".', + type: "string", + }, + keywords: { + description: "One or more keywords to add.", + type: "string", + multiple: true, + }, + }, + } as const; + + public async run( + context: ActionContext, + params: ParsedCommandParams, + ) { + const target = parseTarget(params.args.target); + if (target === undefined) { + displayWarn( + `Invalid target "${params.args.target}". Expected schema.action.`, + context, + ); + return; + } + context.sessionContext.agentContext.contextSelectorSidecar.addKeywords( + target.id, + params.args.keywords, + ); + showEffective(context, target); + } +} + +class CollisionKeywordsRemoveCommandHandler implements CommandHandler { + public readonly description = + "Remove keywords from a schema.action's effective set (masks derived + added)"; + public readonly parameters = { + args: { + target: { + description: 'The schema.action to tune, e.g. "excel.addRow".', + type: "string", + }, + keywords: { + description: "One or more keywords to remove.", + type: "string", + multiple: true, + }, + }, + } as const; + + public async run( + context: ActionContext, + params: ParsedCommandParams, + ) { + const target = parseTarget(params.args.target); + if (target === undefined) { + displayWarn( + `Invalid target "${params.args.target}". Expected schema.action.`, + context, + ); + return; + } + context.sessionContext.agentContext.contextSelectorSidecar.removeKeywords( + target.id, + params.args.keywords, + ); + showEffective(context, target); + } +} + +class CollisionKeywordsClearCommandHandler implements CommandHandler { + public readonly description = + "Clear all overrides for a schema.action (revert to derived-only)"; + public readonly parameters = { + args: { + target: { + description: 'The schema.action to reset, e.g. "excel.addRow".', + type: "string", + }, + }, + } as const; + + public async run( + context: ActionContext, + params: ParsedCommandParams, + ) { + const target = parseTarget(params.args.target); + if (target === undefined) { + displayWarn( + `Invalid target "${params.args.target}". Expected schema.action.`, + context, + ); + return; + } + const removed = + context.sessionContext.agentContext.contextSelectorSidecar.clearEntry( + target.id, + ); + displayResult( + removed + ? `Cleared overrides for ${target.id} (reverted to derived-only).` + : `No overrides for ${target.id}.`, + context, + ); + } +} + +export function getCollisionKeywordCommandHandlers(): CommandHandlerTable { + return { + description: + "Inspect and tune per-action keyword vectors used by the contextSelector tier", + defaultSubCommand: "list", + commands: { + list: new CollisionKeywordsListCommandHandler(), + add: new CollisionKeywordsAddCommandHandler(), + remove: new CollisionKeywordsRemoveCommandHandler(), + clear: new CollisionKeywordsClearCommandHandler(), + }, + }; +} diff --git a/ts/packages/dispatcher/dispatcher/src/context/system/handlers/configCommandHandlers.ts b/ts/packages/dispatcher/dispatcher/src/context/system/handlers/configCommandHandlers.ts index 80a431b82..7b7ea50a8 100644 --- a/ts/packages/dispatcher/dispatcher/src/context/system/handlers/configCommandHandlers.ts +++ b/ts/packages/dispatcher/dispatcher/src/context/system/handlers/configCommandHandlers.ts @@ -2158,6 +2158,15 @@ function renderCollisionShowHTML(cfg: { registryFirst: boolean; remember: string; }; + contextSelector: { + detect: boolean; + windowTurns: number; + decay: number; + minUniqueTokens: number; + minMass: number; + margin: number; + abstainFallback: string; + }; }): string { const C_MUTED = "#777"; const C_LABEL = "#555"; @@ -2253,13 +2262,24 @@ function renderCollisionShowHTML(cfg: { : `(no registry)` } +
+ contextSelector + detect ${statusPill(cfg.contextSelector.detect)} + window ${cfg.contextSelector.windowTurns} + decay ${cfg.contextSelector.decay} + minTokens ${cfg.contextSelector.minUniqueTokens} + minMass ${cfg.contextSelector.minMass} + margin ${cfg.contextSelector.margin} + abstain ${escapeHtml(cfg.contextSelector.abstainFallback)} +
`; const anyOn = cfg.static.detect || cfg.grammarMatch.detect || cfg.llmSelect.detect || - cfg.fuzzy.detect; + cfg.fuzzy.detect || + cfg.contextSelector.detect; const summary = anyOn ? `
Detection is active on at least one point. Telemetry is captured when emit=on; remote upload requires @config log db on.
` : `
All detection points are off — runtime behavior is byte-identical to legacy first-match. Opt in with @config collision <point> detect on.
`; @@ -2315,6 +2335,15 @@ function renderCollisionShowText(cfg: { registryFirst: boolean; remember: string; }; + contextSelector: { + detect: boolean; + windowTurns: number; + decay: number; + minUniqueTokens: number; + minMass: number; + margin: number; + abstainFallback: string; + }; }): string[] { const onOff = (b: boolean) => (b ? "on" : "off"); const expId = cfg.telemetry.experimentId @@ -2330,6 +2359,7 @@ function renderCollisionShowText(cfg: { ` multipleActionBehavior: ${cfg.multipleActionBehavior}`, ` telemetry: emit=${onOff(cfg.telemetry.emit)} debugLog=${onOff(cfg.telemetry.debugLog)}${expId}`, ` preference: enabled=${onOff(cfg.preference.enabled)} source=${cfg.preference.ambiguitySource} registryFirst=${onOff(cfg.preference.registryFirst)} remember=${cfg.preference.remember} registry=${cfg.preference.registryPath ? `"${cfg.preference.registryPath}"` : "(empty)"}`, + ` contextSelector: detect=${onOff(cfg.contextSelector.detect)} window=${cfg.contextSelector.windowTurns} decay=${cfg.contextSelector.decay} minTokens=${cfg.contextSelector.minUniqueTokens} minMass=${cfg.contextSelector.minMass} margin=${cfg.contextSelector.margin} abstain=${cfg.contextSelector.abstainFallback}`, ]; } @@ -2758,6 +2788,25 @@ function getCollisionCommandHandlers(): CommandHandlerTable { experimentId: new CollisionExperimentIdCommandHandler(), }, }, + contextSelector: { + description: + "Configure the context-weighted resolution tier (deterministic topical tiebreaker on the grammar path)", + commands: { + detect: getToggleHandlerTable( + "context-weighted resolution (contextSelector)", + async (context, enable) => { + await changeContextConfig( + { + collision: { + contextSelector: { detect: enable }, + }, + } as SessionOptions, + context, + ); + }, + ), + }, + }, }, }; } From aa4ab797426106dd7e489efe2f7ce2b184c7316e Mon Sep 17 00:00:00 2001 From: George Ng Date: Wed, 1 Jul 2026 01:23:22 -0700 Subject: [PATCH 04/11] test(dispatcher): unit tests for the contextSelector engine + orchestrator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 48 specs across 6 suites, all deterministic and LLM-free: - contextSelectorTokenize: protected patterns, stopword/verb drops, de-camel, NFKC. - contextSelectorSignal: decay math (design §14 Scenario 1), window cap, history-only, reset, within-turn multiplicity. - contextSelectorScorer: candidate-local IDF (disc=1 unique, 0 shared, graduated), matched-token detail, stable ordering. - contextSelectorDecision: coverage / no-signal / min-unique-tokens / min-mass / margin gates, total ordering, quantize; §14 Scenario 1 (resolve) + Scenario 2 (abstain). - contextSelectorKeywords: lexical extraction, param walk (arrays/refs), sidecar add/remove/clear + canonicalization + disk reload + malformed-degrades-to-empty, index derive/memoize/effective/invalidate. - contextSelectorResolve: end-to-end resolve + abstain (coverage/margin/no-signal) with telemetry assertions. Full package suite green: 66 suites / 1032 tests. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../test/contextSelectorDecision.spec.ts | 183 ++++++++++++++++ .../test/contextSelectorKeywords.spec.ts | 200 ++++++++++++++++++ .../test/contextSelectorResolve.spec.ts | 156 ++++++++++++++ .../test/contextSelectorScorer.spec.ts | 89 ++++++++ .../test/contextSelectorSignal.spec.ts | 87 ++++++++ .../test/contextSelectorTokenize.spec.ts | 70 ++++++ 6 files changed, 785 insertions(+) create mode 100644 ts/packages/dispatcher/dispatcher/test/contextSelectorDecision.spec.ts create mode 100644 ts/packages/dispatcher/dispatcher/test/contextSelectorKeywords.spec.ts create mode 100644 ts/packages/dispatcher/dispatcher/test/contextSelectorResolve.spec.ts create mode 100644 ts/packages/dispatcher/dispatcher/test/contextSelectorScorer.spec.ts create mode 100644 ts/packages/dispatcher/dispatcher/test/contextSelectorSignal.spec.ts create mode 100644 ts/packages/dispatcher/dispatcher/test/contextSelectorTokenize.spec.ts diff --git a/ts/packages/dispatcher/dispatcher/test/contextSelectorDecision.spec.ts b/ts/packages/dispatcher/dispatcher/test/contextSelectorDecision.spec.ts new file mode 100644 index 000000000..8d57b42b4 --- /dev/null +++ b/ts/packages/dispatcher/dispatcher/test/contextSelectorDecision.spec.ts @@ -0,0 +1,183 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +import { + decide, + rankScores, + quantize, + DecisionConfig, +} from "../src/context/contextSelector/decision.js"; +import { + CandidateScore, + TfIdfScorer, + ScorerCandidate, +} from "../src/context/contextSelector/scorer.js"; +import { ContextVector } from "../src/context/contextSelector/conversationSignal.js"; + +function scored( + schemaName: string, + actionName: string, + score: number, + uniqueTokenCount: number, +): CandidateScore { + return { schemaName, actionName, score, uniqueTokenCount, matched: [] }; +} + +const CONFIG: DecisionConfig = { + minUniqueTokens: 2, + minMass: 1.0, + margin: 1.0, +}; + +describe("contextSelector/decision", () => { + it("abstains when coverage fails", () => { + const d = decide([scored("a", "x", 9, 5)], false, CONFIG); + expect(d.kind).toBe("abstain"); + if (d.kind === "abstain") expect(d.reason).toBe("coverage"); + }); + + it("abstains with no signal (winner score 0)", () => { + const d = decide( + [scored("a", "x", 0, 0), scored("b", "y", 0, 0)], + true, + CONFIG, + ); + expect(d.kind).toBe("abstain"); + if (d.kind === "abstain") expect(d.reason).toBe("no-signal"); + }); + + it("abstains below minUniqueTokens", () => { + const d = decide( + [scored("a", "x", 5, 1), scored("b", "y", 0, 0)], + true, + CONFIG, + ); + expect(d.kind).toBe("abstain"); + if (d.kind === "abstain") expect(d.reason).toBe("min-unique-tokens"); + }); + + it("abstains below minMass", () => { + const d = decide( + [scored("a", "x", 0.4, 2), scored("b", "y", 0, 0)], + true, + CONFIG, + ); + expect(d.kind).toBe("abstain"); + if (d.kind === "abstain") expect(d.reason).toBe("min-mass"); + }); + + it("abstains on a genuine tie (margin)", () => { + const d = decide( + [scored("a", "x", 3.11, 4), scored("b", "y", 3.08, 4)], + true, + CONFIG, + ); + expect(d.kind).toBe("abstain"); + if (d.kind === "abstain") expect(d.reason).toBe("margin"); + }); + + it("resolves a clear topical winner", () => { + const d = decide( + [ + scored("excel", "addRow", 5.54, 5), + scored("list", "addItems", 0, 0), + ], + true, + CONFIG, + ); + expect(d.kind).toBe("resolve"); + if (d.kind === "resolve") { + expect(d.winner.schemaName).toBe("excel"); + expect(d.runnerUp?.schemaName).toBe("list"); + } + }); + + it("rankScores imposes a total order (score desc, then schema, then action)", () => { + const ranked = rankScores([ + scored("b", "y", 1, 1), + scored("a", "z", 1, 1), + scored("a", "a", 1, 1), + scored("c", "c", 2, 1), + ]); + expect(ranked.map((r) => `${r.schemaName}.${r.actionName}`)).toEqual([ + "c.c", + "a.a", + "a.z", + "b.y", + ]); + }); + + it("quantize collapses float noise", () => { + expect(quantize(0.1 + 0.2)).toBe(0.3); + }); +}); + +// End-to-end §14 worked examples: signal-free — scores are supplied directly to +// the scorer via a synthetic context vector, then decided. +describe("contextSelector §14 worked examples (scorer + decision)", () => { + const scorer = new TfIdfScorer(); + const excel: ScorerCandidate = { + schemaName: "excel", + actionName: "addRow", + keywords: new Set([ + "excel", + "spreadsheet", + "cell", + "formula", + "workbook", + "row", + "column", + ]), + }; + const list: ScorerCandidate = { + schemaName: "list", + actionName: "addItems", + keywords: new Set([ + "list", + "item", + "todo", + "grocery", + "shopping", + "checklist", + ]), + }; + + it("Scenario 1 resolves to excel", () => { + const c: ContextVector = new Map([ + ["formula", 1.71], + ["spreadsheet", 1.63], + ["cell", 0.81], + ["excel", 0.73], + ["row", 0.66], + ]); + const d = decide(scorer.score(c, [excel, list]), true, CONFIG); + expect(d.kind).toBe("resolve"); + if (d.kind === "resolve") expect(d.winner.schemaName).toBe("excel"); + }); + + it("Scenario 2 abstains on a genuine tie", () => { + const c: ContextVector = new Map([ + ["spreadsheet", 0.9], + ["formula", 0.9], + ["grocery", 0.81], + ["shopping", 0.81], + ["todo", 0.73], + ["checklist", 0.73], + ["excel", 0.66], + ["cell", 0.66], + ]); + const d = decide(scorer.score(c, [excel, list]), true, CONFIG); + expect(d.kind).toBe("abstain"); + if (d.kind === "abstain") expect(d.reason).toBe("margin"); + }); + + it("abstains when the conversation matches neither candidate", () => { + const c: ContextVector = new Map([ + ["meeting", 6], + ["calendar", 3], + ]); + const d = decide(scorer.score(c, [excel, list]), true, CONFIG); + expect(d.kind).toBe("abstain"); + if (d.kind === "abstain") expect(d.reason).toBe("no-signal"); + }); +}); diff --git a/ts/packages/dispatcher/dispatcher/test/contextSelectorKeywords.spec.ts b/ts/packages/dispatcher/dispatcher/test/contextSelectorKeywords.spec.ts new file mode 100644 index 000000000..d52e5002b --- /dev/null +++ b/ts/packages/dispatcher/dispatcher/test/contextSelectorKeywords.spec.ts @@ -0,0 +1,200 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { ActionSchemaTypeDefinition } from "@typeagent/action-schema"; +import { + extractKeywords, + buildExtractionInput, +} from "../src/context/contextSelector/keywordExtractor.js"; +import { + KeywordSidecar, + COLLISION_KEYWORDS_FILE, +} from "../src/context/contextSelector/keywordSidecar.js"; +import { + KeywordIndex, + ActionSchemaSource, +} from "../src/context/contextSelector/keywordIndex.js"; + +function actionDef( + actionName: string, + comments: string[], + params: Record, +): ActionSchemaTypeDefinition { + return { + alias: false, + name: actionName, + comments, + type: { + type: "object", + fields: { + actionName: { + type: { type: "string-union", typeEnum: [actionName] }, + }, + parameters: { + type: { type: "object", fields: params }, + }, + }, + }, + } as unknown as ActionSchemaTypeDefinition; +} + +describe("contextSelector/keywordExtractor", () => { + it("extracts topical keywords and drops generic verbs / stopwords", () => { + const kw = extractKeywords({ + schemaDescription: "Grocery shopping and todo lists", + actionName: "addItems", + actionComments: ["Add items to the list"], + paramNames: ["itemName"], + paramComments: ["the grocery item to add"], + }); + expect(kw.has("grocery")).toBe(true); + expect(kw.has("shopping")).toBe(true); + expect(kw.has("todo")).toBe(true); + expect(kw.has("items")).toBe(true); + expect(kw.has("item")).toBe(true); + // generic verbs / stopwords / "list" (generic) are dropped + expect(kw.has("add")).toBe(false); + expect(kw.has("the")).toBe(false); + expect(kw.has("list")).toBe(false); + }); + + it("caps at topN, keeping the most frequent (deterministic tiebreak)", () => { + const kw = extractKeywords( + { + schemaDescription: "alpha alpha beta gamma delta", + actionName: "doThing", + }, + 2, + ); + // alpha (freq 2) always kept; the second slot is the alphabetically + // first among the freq-1 tokens (beta). + expect([...kw].sort()).toEqual(["alpha", "beta"]); + }); + + it("buildExtractionInput walks parameter names, comments, arrays and refs", () => { + const def = actionDef("addRow", ["Add a row to the spreadsheet"], { + rowData: { type: { type: "string" }, comments: ["the row values"] }, + tags: { + type: { type: "array", elementType: { type: "string" } }, + comments: ["category labels"], + }, + }); + const input = buildExtractionInput( + "addRow", + def, + "Spreadsheet editing", + ); + const kw = extractKeywords(input); + expect(kw.has("row")).toBe(true); + expect(kw.has("spreadsheet")).toBe(true); + expect(kw.has("values")).toBe(true); + expect(kw.has("category")).toBe(true); + expect(kw.has("labels")).toBe(true); + }); +}); + +describe("contextSelector/keywordSidecar", () => { + it("adds, removes, and canonicalizes multi-word keywords", () => { + const s = KeywordSidecar.load(undefined); + s.addKeywords("excel.addRow", ["Spreadsheet", "pivot table"]); + const d = s.deltaFor("excel", "addRow")!; + expect(new Set(d.add)).toEqual( + new Set(["spreadsheet", "pivot", "table"]), + ); + + s.removeKeywords("excel.addRow", ["spreadsheet"]); + const d2 = s.deltaFor("excel", "addRow")!; + expect(d2.add).not.toContain("spreadsheet"); + expect(d2.remove).toContain("spreadsheet"); + }); + + it("keys sub-schema ids by the last dot", () => { + const s = KeywordSidecar.load(undefined); + s.addKeywords("excel.chart.addSeries", ["series"]); + expect(s.deltaFor("excel.chart", "addSeries")?.add).toContain("series"); + }); + + it("clears an entry", () => { + const s = KeywordSidecar.load(undefined); + s.addKeywords("a.b", ["coupon"]); + expect(s.clearEntry("a.b")).toBe(true); + expect(s.isEmpty).toBe(true); + }); + + it("ignores sub-minimum-length keywords (canonicalized away)", () => { + const s = KeywordSidecar.load(undefined); + s.addKeywords("a.b", ["x"]); + expect(s.isEmpty).toBe(true); + }); + + it("persists to disk and reloads; degrades to empty on malformed JSON", () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), "kw-sidecar-")); + try { + const s = KeywordSidecar.load(dir); + s.addKeywords("excel.addRow", ["spreadsheet"]); + const reloaded = KeywordSidecar.load(dir); + expect(reloaded.deltaFor("excel", "addRow")?.add).toContain( + "spreadsheet", + ); + + fs.writeFileSync( + path.join(dir, COLLISION_KEYWORDS_FILE), + "{ not json", + "utf8", + ); + expect(KeywordSidecar.load(dir).isEmpty).toBe(true); + } finally { + fs.rmSync(dir, { recursive: true, force: true }); + } + }); +}); + +describe("contextSelector/keywordIndex", () => { + const def = actionDef("addItems", ["Add items"], { + items: { type: { type: "string" }, comments: ["grocery items"] }, + }); + const source: ActionSchemaSource = { + getSchemaDescription: () => "grocery shopping list", + getActionDefinition: (_s, a) => (a === "addItems" ? def : undefined), + }; + + it("derives keywords and memoizes them", () => { + const sidecar = KeywordSidecar.load(undefined); + const index = new KeywordIndex(source, () => sidecar); + const a = index.derived("list", "addItems"); + const b = index.derived("list", "addItems"); + expect(a).toBe(b); // memoized identity + expect(a.has("grocery")).toBe(true); + expect(a.has("shopping")).toBe(true); + }); + + it("returns an empty vector for an unknown action", () => { + const index = new KeywordIndex(source, () => + KeywordSidecar.load(undefined), + ); + expect(index.derived("list", "missing").size).toBe(0); + }); + + it("merges sidecar add/remove into the effective vector", () => { + const sidecar = KeywordSidecar.load(undefined); + const index = new KeywordIndex(source, () => sidecar); + sidecar.addKeywords("list.addItems", ["coupon"]); + sidecar.removeKeywords("list.addItems", ["shopping"]); + const eff = index.effective("list", "addItems"); + expect(eff.has("coupon")).toBe(true); + expect(eff.has("shopping")).toBe(false); + expect(eff.has("grocery")).toBe(true); + }); + + it("invalidate drops the derived memo", () => { + const index = new KeywordIndex(source, () => + KeywordSidecar.load(undefined), + ); + const a = index.derived("list", "addItems"); + index.invalidate("list"); + expect(index.derived("list", "addItems")).not.toBe(a); + }); +}); diff --git a/ts/packages/dispatcher/dispatcher/test/contextSelectorResolve.spec.ts b/ts/packages/dispatcher/dispatcher/test/contextSelectorResolve.spec.ts new file mode 100644 index 000000000..2439186a9 --- /dev/null +++ b/ts/packages/dispatcher/dispatcher/test/contextSelectorResolve.spec.ts @@ -0,0 +1,156 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +import { MatchResult } from "agent-cache"; +import type { CommandHandlerContext } from "../src/context/commandHandlerContext.js"; +import { resolveContextSelector } from "../src/translation/matchContextSelector.js"; +import { ContextVector } from "../src/context/contextSelector/conversationSignal.js"; +import { CollisionEvent } from "../src/context/collisionTelemetry.js"; + +type Overrides = { + contextVector: ContextVector; + keywords: Record>; + contextSelector?: Record; +}; + +function makeCtx(o: Overrides): { + ctx: CommandHandlerContext; + events: CollisionEvent[]; +} { + const contextSelector = { + detect: true, + windowTurns: 20, + decay: 0.9, + minUniqueTokens: 2, + minMass: 1.0, + margin: 1.0, + abstainFallback: "defer-to-strategy", + ...o.contextSelector, + }; + const events: CollisionEvent[] = []; + const ctx = { + collisionEvents: events, + session: { + sessionDirPath: undefined, + getConfig: () => ({ + collision: { + contextSelector, + grammarMatch: { classifier: "distinctActions" }, + telemetry: { emit: true, debugLog: false }, + }, + }), + }, + conversationSignal: { getContextVector: () => o.contextVector }, + contextSelectorKeywords: { + effective: (s: string, a: string) => + o.keywords[`${s}.${a}`] ?? new Set(), + }, + } as unknown as CommandHandlerContext; + return { ctx, events }; +} + +function fakeMatch(schemaName: string, actionName: string): MatchResult { + return { + match: { actions: [{ action: { schemaName, actionName } }] }, + } as unknown as MatchResult; +} + +const excelMatch = fakeMatch("excel", "addRow"); +const listMatch = fakeMatch("list", "addItems"); + +function vector(entries: Record): ContextVector { + return new Map(Object.entries(entries)); +} + +describe("resolveContextSelector", () => { + it("resolves to the topical winner and emits a context-weight event", () => { + const { ctx, events } = makeCtx({ + contextVector: vector({ spreadsheet: 2, formula: 1.5 }), + keywords: { + "excel.addRow": new Set(["spreadsheet", "formula", "cell"]), + "list.addItems": new Set(["grocery", "shopping"]), + }, + }); + const res = resolveContextSelector( + [excelMatch, listMatch], + ctx, + "add a row", + ); + expect(res).toBeDefined(); + expect(res!.match).toBe(excelMatch); + expect(res!.note).toContain("excel"); + expect(res!.note).toContain("routed"); + expect(events).toHaveLength(1); + expect(events[0].strategy).toBe("context-weight"); + expect(events[0].chosen?.schemaName).toBe("excel"); + }); + + it("abstains (coverage) when a candidate has no keywords", () => { + const { ctx, events } = makeCtx({ + contextVector: vector({ spreadsheet: 8, formula: 5 }), + keywords: { + "excel.addRow": new Set(["spreadsheet", "formula"]), + "list.addItems": new Set(), + }, + }); + const res = resolveContextSelector( + [excelMatch, listMatch], + ctx, + "add a row", + ); + expect(res).toBeUndefined(); + expect(events[0].note).toBe("abstain:coverage"); + }); + + it("abstains (margin) on a genuine tie", () => { + const { ctx, events } = makeCtx({ + contextVector: vector({ + spreadsheet: 1, + formula: 1, + grocery: 1, + shopping: 1, + }), + keywords: { + "excel.addRow": new Set(["spreadsheet", "formula"]), + "list.addItems": new Set(["grocery", "shopping"]), + }, + }); + const res = resolveContextSelector( + [excelMatch, listMatch], + ctx, + "add a row", + ); + expect(res).toBeUndefined(); + expect(events[0].note).toBe("abstain:margin"); + }); + + it("abstains (no-signal) when the conversation matches neither", () => { + const { ctx, events } = makeCtx({ + contextVector: vector({ meeting: 6, calendar: 3 }), + keywords: { + "excel.addRow": new Set(["spreadsheet", "formula"]), + "list.addItems": new Set(["grocery", "shopping"]), + }, + }); + const res = resolveContextSelector( + [excelMatch, listMatch], + ctx, + "add a row", + ); + expect(res).toBeUndefined(); + expect(events[0].note).toBe("abstain:no-signal"); + }); + + it("returns undefined when there are fewer than two distinct candidates", () => { + const { ctx } = makeCtx({ + contextVector: vector({ spreadsheet: 5 }), + keywords: { "excel.addRow": new Set(["spreadsheet"]) }, + }); + const res = resolveContextSelector( + [excelMatch, fakeMatch("excel", "addRow")], + ctx, + "add a row", + ); + expect(res).toBeUndefined(); + }); +}); diff --git a/ts/packages/dispatcher/dispatcher/test/contextSelectorScorer.spec.ts b/ts/packages/dispatcher/dispatcher/test/contextSelectorScorer.spec.ts new file mode 100644 index 000000000..981f2eebb --- /dev/null +++ b/ts/packages/dispatcher/dispatcher/test/contextSelectorScorer.spec.ts @@ -0,0 +1,89 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +import { + TfIdfScorer, + ScorerCandidate, +} from "../src/context/contextSelector/scorer.js"; +import { ContextVector } from "../src/context/contextSelector/conversationSignal.js"; + +const scorer = new TfIdfScorer(); + +function cand( + schemaName: string, + actionName: string, + keywords: string[], +): ScorerCandidate { + return { schemaName, actionName, keywords: new Set(keywords) }; +} + +function vector(entries: Record): ContextVector { + return new Map(Object.entries(entries)); +} + +describe("contextSelector/scorer (TF-IDF, candidate-local IDF)", () => { + it("scores tokens unique to a candidate with disc=1", () => { + const c = vector({ spreadsheet: 2, grocery: 3 }); + const scores = scorer.score(c, [ + cand("excel", "addRow", ["spreadsheet", "cell"]), + cand("list", "addItems", ["grocery", "todo"]), + ]); + const excel = scores.find((s) => s.schemaName === "excel")!; + const list = scores.find((s) => s.schemaName === "list")!; + expect(excel.score).toBeCloseTo(2, 5); // only spreadsheet in context + expect(excel.uniqueTokenCount).toBe(1); + expect(list.score).toBeCloseTo(3, 5); + }); + + it("cancels a token shared by all candidates (disc=0)", () => { + const c = vector({ shared: 5, spreadsheet: 2, grocery: 4 }); + const scores = scorer.score(c, [ + cand("excel", "addRow", ["shared", "spreadsheet"]), + cand("list", "addItems", ["shared", "grocery"]), + ]); + const excel = scores.find((s) => s.schemaName === "excel")!; + // "shared" (disc=0) contributes nothing; only "spreadsheet". + expect(excel.score).toBeCloseTo(2, 5); + expect(excel.uniqueTokenCount).toBe(1); + // Shared token still appears in the matched detail with disc 0. + const sharedMatch = excel.matched.find((m) => m.token === "shared")!; + expect(sharedMatch.disc).toBeCloseTo(0, 5); + expect(sharedMatch.contribution).toBeCloseTo(0, 5); + }); + + it("graduates disc for a token shared by 2 of 3 candidates", () => { + const c = vector({ item: 10 }); + const scores = scorer.score(c, [ + cand("a", "x", ["item"]), + cand("b", "y", ["item"]), + cand("c", "z", ["other"]), + ]); + // disc(item) = log(3/2)/log(3) + const expected = 10 * (Math.log(3 / 2) / Math.log(3)); + const a = scores.find((s) => s.schemaName === "a")!; + expect(a.score).toBeCloseTo(expected, 5); + }); + + it("ignores candidate keywords absent from the context vector", () => { + const c = vector({ spreadsheet: 1 }); + const scores = scorer.score(c, [ + cand("excel", "addRow", ["spreadsheet", "pivot", "chart"]), + cand("list", "addItems", ["grocery"]), + ]); + const excel = scores.find((s) => s.schemaName === "excel")!; + expect(excel.uniqueTokenCount).toBe(1); // only spreadsheet matched + }); + + it("sorts matched tokens by token for stable telemetry", () => { + const c = vector({ zebra: 1, apple: 1, mango: 1 }); + const [only] = scorer.score(c, [ + cand("a", "x", ["zebra", "apple", "mango"]), + cand("b", "y", ["other"]), + ]); + expect(only.matched.map((m) => m.token)).toEqual([ + "apple", + "mango", + "zebra", + ]); + }); +}); diff --git a/ts/packages/dispatcher/dispatcher/test/contextSelectorSignal.spec.ts b/ts/packages/dispatcher/dispatcher/test/contextSelectorSignal.spec.ts new file mode 100644 index 000000000..41f215440 --- /dev/null +++ b/ts/packages/dispatcher/dispatcher/test/contextSelectorSignal.spec.ts @@ -0,0 +1,87 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +import { + RingBufferSignalSource, + SignalConfig, +} from "../src/context/contextSelector/conversationSignal.js"; + +function source(config: Partial = {}) { + const cfg: SignalConfig = { windowTurns: 20, decay: 0.9, ...config }; + return new RingBufferSignalSource(() => cfg); +} + +describe("contextSelector/conversationSignal", () => { + it("recency-decays tokens by turn age (design §14 Scenario 1)", () => { + const s = source(); + // Recorded oldest -> newest; the newest prior turn is age 1. + s.recordRequest("scroll to the last row"); // age 4 + s.recordRequest("open the excel spreadsheet"); // age 3 + s.recordRequest("which cell has that formula"); // age 2 + s.recordRequest("fix the spreadsheet formula"); // age 1 + + const v = s.getContextVector(); + expect(v.get("formula")).toBeCloseTo(0.9 + 0.81, 5); // ages 1,2 + expect(v.get("spreadsheet")).toBeCloseTo(0.9 + 0.729, 5); // ages 1,3 + expect(v.get("cell")).toBeCloseTo(0.81, 5); // age 2 + expect(v.get("excel")).toBeCloseTo(0.729, 5); // age 3 + expect(v.get("row")).toBeCloseTo(0.6561, 5); // age 4 + }); + + it("is history-only — the current request is not recorded until after resolution", () => { + const s = source(); + s.recordRequest("open the spreadsheet"); + // Simulate scoring the current turn BEFORE it is recorded. + const v = s.getContextVector(); + expect(v.has("spreadsheet")).toBe(true); + expect(v.has("row")).toBe(false); // "add a row" not yet recorded + }); + + it("caps the buffer at windowTurns, dropping the oldest", () => { + const s = source({ windowTurns: 3 }); + for (let i = 0; i < 10; i++) { + s.recordRequest(`topic${i}`); + } + expect(s.size).toBe(3); + expect(s.snapshot()).toEqual(["topic7", "topic8", "topic9"]); + }); + + it("honors a shrunk window at scoring time", () => { + let windowTurns = 20; + const s = new RingBufferSignalSource(() => ({ + windowTurns, + decay: 0.9, + })); + for (let i = 0; i < 5; i++) { + s.recordRequest(`alpha${i}`); + } + windowTurns = 2; + const v = s.getContextVector(); + // Only the last 2 turns contribute. + expect(v.has("alpha4")).toBe(true); + expect(v.has("alpha3")).toBe(true); + expect(v.has("alpha2")).toBe(false); + }); + + it("ignores empty / whitespace-only requests", () => { + const s = source(); + s.recordRequest(" "); + s.recordRequest(""); + expect(s.size).toBe(0); + }); + + it("reset clears the buffer", () => { + const s = source(); + s.recordRequest("spreadsheet formula"); + s.reset(); + expect(s.size).toBe(0); + expect(s.getContextVector().size).toBe(0); + }); + + it("counts within-turn multiplicity", () => { + const s = source(); + s.recordRequest("formula formula formula"); + // Single turn at age 1: three occurrences each weighted 0.9. + expect(s.getContextVector().get("formula")).toBeCloseTo(0.9 * 3, 5); + }); +}); diff --git a/ts/packages/dispatcher/dispatcher/test/contextSelectorTokenize.spec.ts b/ts/packages/dispatcher/dispatcher/test/contextSelectorTokenize.spec.ts new file mode 100644 index 000000000..9f0716cdf --- /dev/null +++ b/ts/packages/dispatcher/dispatcher/test/contextSelectorTokenize.spec.ts @@ -0,0 +1,70 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +import { + tokenize, + tokenizeIdentifier, + deCamelCase, + isStopword, + isGenericVerb, +} from "../src/context/contextSelector/tokenize.js"; + +describe("contextSelector/tokenize", () => { + it("lowercases, NFKC-normalizes, and strips punctuation", () => { + expect(tokenize("Spreadsheet, FORMULA!")).toEqual([ + "spreadsheet", + "formula", + ]); + }); + + it("drops stopwords and generic CRUD verbs", () => { + // "add" (generic verb) and "the"/"to" (stopwords) are dropped. + expect(tokenize("add the eggs to my grocery")).toEqual([ + "eggs", + "grocery", + ]); + expect(isStopword("the")).toBe(true); + expect(isGenericVerb("add")).toBe(true); + expect(isGenericVerb("spreadsheet")).toBe(false); + }); + + it("drops sub-minimum-length tokens", () => { + expect(tokenize("a i x")).toEqual([]); + }); + + it("preserves protected product / language / ref patterns", () => { + expect(tokenize("I love C# and C++ and .NET")).toEqual([ + "love", + "c#", + "c++", + ".net", + ]); + expect(tokenize("select range A1:B2")).toEqual(["range", "a1:b2"]); + }); + + it("de-camelCases identifiers", () => { + expect(deCamelCase("addItems")).toBe("add Items"); + expect(deCamelCase("HTMLParser")).toBe("HTML Parser"); + expect(deCamelCase("row_data")).toBe("row data"); + expect(deCamelCase("row-data")).toBe("row data"); + }); + + it("tokenizeIdentifier de-camels then drops generic verbs", () => { + expect(tokenizeIdentifier("addItems")).toEqual(["items"]); + expect(tokenizeIdentifier("getUserProfile")).toEqual([ + "user", + "profile", + ]); + }); + + it("is deterministic and order-preserving", () => { + const a = tokenize("spreadsheet formula spreadsheet"); + expect(a).toEqual(["spreadsheet", "formula", "spreadsheet"]); + expect(tokenize("spreadsheet formula spreadsheet")).toEqual(a); + }); + + it("returns [] for empty input", () => { + expect(tokenize("")).toEqual([]); + expect(tokenize(" ")).toEqual([]); + }); +}); From ed539c06695707c6f110c11a83fd5183737c13a5 Mon Sep 17 00:00:00 2001 From: George Ng Date: Wed, 1 Jul 2026 01:25:04 -0700 Subject: [PATCH 05/11] docs(dispatcher): document the contextSelector collision-resolution tier Update the dispatcher README "Action Collision Detection" section: - add the contextSelector config block to the schema listing; - new "Context-weighted resolution (contextSelector)" subsection covering the per-collision pipeline (context vector -> keywords -> TF-IDF -> decide), the two extensibility seams, telemetry/affordance, and the abstain fallback; - add the `@config collision contextSelector detect` and `@collision keywords` rows to the shell command table. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ts/packages/dispatcher/dispatcher/README.md | 55 ++++++++++++++++----- 1 file changed, 44 insertions(+), 11 deletions(-) diff --git a/ts/packages/dispatcher/dispatcher/README.md b/ts/packages/dispatcher/dispatcher/README.md index dbbab66ac..7a2c1f72d 100644 --- a/ts/packages/dispatcher/dispatcher/README.md +++ b/ts/packages/dispatcher/dispatcher/README.md @@ -307,6 +307,15 @@ collision: { scorer: "placeholder" | "actionEmbedding"; strategy: "first-match" | "score-rank" | "priority" | "user-clarify"; }; + contextSelector: { // context-weighted resolution tier (grammar path only) + detect: boolean; // off by default + windowTurns: number; // ring-buffer look-back N (default 20) + decay: number; // per-turn recency decay λ (default 0.9) + minUniqueTokens: number; // evidence gate (default 2) + minMass: number; // evidence gate — min winner score (default 1.0) + margin: number; // clear-winner margin over runner-up (default 1.0) + abstainFallback: "defer-to-strategy" | "escalate-to-llm"; // default defer-to-strategy + }; priorityOrder: string; // comma-separated agent names; "" = registration order multipleActionBehavior: | "downgrade-to-priority" // safest default @@ -335,17 +344,19 @@ Each event carries `kind` (detection point), `strategy`, `candidates[]` (with pe Runtime opt-in via `@config collision …` and ring-buffer inspection via `@collision events`: -| Command | Effect | -| ------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `@config collision` | Render the current collision config as an HTML status table. | -| `@config collision detect [on\|off]` | Toggle a detection point (`static` / `grammarMatch` / `llmSelect` / `fuzzy`). Persisted to `data.json`. | -| `@config collision strategy ` | Set the resolution strategy (`first-match` / `score-rank` / `priority` / `user-clarify`; static uses `warn` / `error`). | -| `@config collision priority []` | Set / show the comma-separated `priorityOrder` used by the `priority` strategy. | -| `@config collision telemetry emit [on\|off]` | Toggle the ring-buffer + JSONL capture. | -| `@config collision telemetry debugLog [on\|off]` | Toggle the `typeagent:dispatcher:collision` debug log. | -| `@config collision telemetry experimentId []` | Stamp every emitted event with this tag. Use to slice Cosmos queries per experiment. | -| `@collision events [-n ] [-k ]` | Show recent events from the in-memory ring buffer with kind / strategy badges and a ⚡ marker on rows where the chosen candidate diverged from `first-match`. | -| `@config log db [on\|off]` | Toggle DocumentDB upload (gates remote sink — independent of the per-session local capture). | +| Command | Effect | +| ------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `@config collision` | Render the current collision config as an HTML status table. | +| `@config collision detect [on\|off]` | Toggle a detection point (`static` / `grammarMatch` / `llmSelect` / `fuzzy`). Persisted to `data.json`. | +| `@config collision contextSelector detect [on\|off]` | Toggle the context-weighted resolution tier. Other `contextSelector` knobs (windowTurns / decay / evidence gates / margin / abstainFallback) are `data.json` hand-edits. | +| `@collision keywords [add\|remove\|list\|clear] …` | Inspect / tune per-action keyword vectors used by `contextSelector` (edits the `collision-keywords.json` sidecar). | +| `@config collision strategy ` | Set the resolution strategy (`first-match` / `score-rank` / `priority` / `user-clarify`; static uses `warn` / `error`). | +| `@config collision priority []` | Set / show the comma-separated `priorityOrder` used by the `priority` strategy. | +| `@config collision telemetry emit [on\|off]` | Toggle the ring-buffer + JSONL capture. | +| `@config collision telemetry debugLog [on\|off]` | Toggle the `typeagent:dispatcher:collision` debug log. | +| `@config collision telemetry experimentId []` | Stamp every emitted event with this tag. Use to slice Cosmos queries per experiment. | +| `@collision events [-n ] [-k ]` | Show recent events from the in-memory ring buffer with kind / strategy badges and a ⚡ marker on rows where the chosen candidate diverged from `first-match`. | +| `@config log db [on\|off]` | Toggle DocumentDB upload (gates remote sink — independent of the per-session local capture). | Calibration knobs (`classifier` / `topN` / `scoreDeltaThreshold` / `scorer` / `similarityThreshold`) are intentionally not exposed via `@config collision` — they're long-tail tuning, not opt-in toggles, and the same `data.json` accepts hand edits when needed. @@ -440,6 +451,28 @@ Related `@config collision preference` knobs: > (`pnpm --filter agent-dispatcher build`) you must restart the shell > (`pnpm run shell`) for code changes to take effect. +### Context-weighted resolution (`contextSelector`) + +A deterministic, LLM-free resolution tier on the **grammarMatch** path that ranks colliding candidates by how closely they match the recent conversation topic, then **resolves** the collision on the cache path (avoiding the downstream LLM translation) or **abstains** and falls through to the configured strategy. Off by default; opt in with `@config collision contextSelector detect on`. See the full design in [`context-weighted-collision-resolution-design.md`](../../../docs/architecture/collision/context-weighted-collision-resolution-design.md). + +How it works, per collision (all steps deterministic): + +1. **Context vector** — a decayed keyword-frequency map of the recent conversation, built from a `contextSelector`-owned ring buffer of the last `windowTurns` user requests, each weighted by `decay^age` (history-only: the current request is excluded). Sourced behind the `ConversationSignalSource` seam so it can later read knowPro topics/entities instead of raw tokens. +2. **Candidate keywords** — each `(schema, action)`'s effective keyword vector = a derived lexical floor (mined from the live schema text) layered with `collision-keywords.json` sidecar overrides. +3. **Score** — `TfIdfScorer` sums, per candidate, `contextWeight × candidate-local-IDF` over the overlapping tokens (shared tokens cancel, unique tokens distinguish). Behind the `CollisionScorer` seam so a knowPro-entity or embedding scorer can replace it. +4. **Decide** — resolve only when a coverage guard, an evidence gate (`minUniqueTokens` + `minMass`), and a clear-winner `margin` all pass; otherwise abstain. Biased toward abstaining. + +On a resolve it emits a `context-weight` telemetry event and a non-blocking affordance (`↪ routed to — recent topic …`). On abstain it emits a `context-weight` event noting the reason and, per `abstainFallback`, either defers to the configured grammar strategy (default) or escalates the request to LLM translation. Detection is independent of `grammarMatch.detect` — with `detect: false` everywhere, behavior is byte-identical to legacy first-match. + +Tune the discriminative keywords for the handful of actions that actually collide: + +``` +@collision keywords excel.addRow add spreadsheet formula # add discriminative keywords +@collision keywords excel.addRow remove office # mask a derived keyword +@collision keywords excel.addRow list # show derived + overrides, merged +@collision keywords excel.addRow clear # revert to derived-only +``` + ### MultipleAction interaction `multipleActionBehavior` controls what happens when `user-clarify` would fire on a sub-action inside a `MultipleAction` batch: From becabba6c28301e097750d99f6ae9ed51722306c Mon Sep 17 00:00:00 2001 From: George Ng Date: Wed, 1 Jul 2026 01:47:29 -0700 Subject: [PATCH 06/11] refactor(dispatcher): round 1 adversarial-review fixes for contextSelector MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses two independent Round-1 reviews (code-review + rubber-duck). Correctness (code-review, Medium): - keywordIndex: stop memoizing a missing definition (queried-before-load actions were cached empty forever); invalidate the derived-keyword cache for an agent's schemas on in-place `reloadAgentSchema` (learn/remember-how-to flows) so the scorer re-extracts from reloaded schema text instead of stale keywords. Extensibility seam (rubber-duck, blocking): - Add ContextResolutionStrategy (strategy.ts) bundling scoring + decision policy so an embedding / knowPro scorer swaps as one unit; the orchestrator now depends only on the seam, not on TF-IDF internals. TfIdfStrategy = TfIdfScorer + the count-based evidence gate. Tuning / scope (rubber-duck, non-blocking): - Retune conservative defaults to the λ=0.9 scale: minMass 1.0->0.75, margin 1.0->0.5 (margin 1.0 pathologically abstained "2 fresh vs 1" cases); add boundary fixtures. - Stop dropping domain nouns that double as verbs ("list", "search") as generic verbs — "list" is the named excel↔list scenario keyword. - Document that ContextVector is the knowPro projection target (design §9 step 2) and that v1 keyword source = lexical floor + manual sidecar (distillation + auto-derived layers are follow-ups) in README + code comments. Build green; contextSelector suite 52/52. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ts/packages/dispatcher/dispatcher/README.md | 6 ++- .../dispatcher/src/context/appAgentManager.ts | 4 ++ .../contextSelector/conversationSignal.ts | 6 ++- .../context/contextSelector/keywordIndex.ts | 30 +++++++----- .../src/context/contextSelector/strategy.ts | 48 +++++++++++++++++++ .../src/context/contextSelector/tokenize.ts | 6 +-- .../dispatcher/src/context/session.ts | 8 ++-- .../src/translation/matchContextSelector.ts | 17 +++---- .../test/contextSelectorDecision.spec.ts | 39 +++++++++++++++ .../test/contextSelectorKeywords.spec.ts | 4 +- .../test/contextSelectorResolve.spec.ts | 4 +- .../test/contextSelectorTokenize.spec.ts | 7 +++ 12 files changed, 146 insertions(+), 33 deletions(-) create mode 100644 ts/packages/dispatcher/dispatcher/src/context/contextSelector/strategy.ts diff --git a/ts/packages/dispatcher/dispatcher/README.md b/ts/packages/dispatcher/dispatcher/README.md index 7a2c1f72d..272bce757 100644 --- a/ts/packages/dispatcher/dispatcher/README.md +++ b/ts/packages/dispatcher/dispatcher/README.md @@ -312,8 +312,8 @@ collision: { windowTurns: number; // ring-buffer look-back N (default 20) decay: number; // per-turn recency decay λ (default 0.9) minUniqueTokens: number; // evidence gate (default 2) - minMass: number; // evidence gate — min winner score (default 1.0) - margin: number; // clear-winner margin over runner-up (default 1.0) + minMass: number; // evidence gate — min winner score (default 0.75) + margin: number; // clear-winner margin over runner-up (default 0.5) abstainFallback: "defer-to-strategy" | "escalate-to-llm"; // default defer-to-strategy }; priorityOrder: string; // comma-separated agent names; "" = registration order @@ -464,6 +464,8 @@ How it works, per collision (all steps deterministic): On a resolve it emits a `context-weight` telemetry event and a non-blocking affordance (`↪ routed to — recent topic …`). On abstain it emits a `context-weight` event noting the reason and, per `abstainFallback`, either defers to the configured grammar strategy (default) or escalates the request to LLM translation. Detection is independent of `grammarMatch.detect` — with `detect: false` everywhere, behavior is byte-identical to legacy first-match. +**v1 scope (keyword source).** This ships the design's **deterministic lexical floor** (keywords mined from the live schema text) plus the **manual sidecar** override layer. The design's other keyword layers — a preferred **LLM-distilled** baseline and the **auto-derived** sidecar layers (misroute mining, learned-preference deltas) — are follow-ups; the index/sidecar layering already accommodates them without a shape change. The thresholds (`minMass` / `margin` / `minUniqueTokens`) ship as conservative defaults to be calibrated on fixtures. + Tune the discriminative keywords for the handful of actions that actually collide: ``` diff --git a/ts/packages/dispatcher/dispatcher/src/context/appAgentManager.ts b/ts/packages/dispatcher/dispatcher/src/context/appAgentManager.ts index 961ffa873..2d49dbc4b 100644 --- a/ts/packages/dispatcher/dispatcher/src/context/appAgentManager.ts +++ b/ts/packages/dispatcher/dispatcher/src/context/appAgentManager.ts @@ -1858,6 +1858,10 @@ export class AppAgentManager implements ActionConfigProvider { // Clear translator cache to force re-translation with new schema context.translatorCache.clear(); + // Drop cached derived keyword vectors for this agent's schemas so the + // contextSelector re-extracts from the reloaded (possibly changed) + // schema text instead of scoring against stale keywords. + context.contextSelectorKeywords.invalidate(appAgentName); // Drop construction-cache entries whose schema hash no longer matches // the reloaded schema (e.g. constructions for a deleted or edited flow), diff --git a/ts/packages/dispatcher/dispatcher/src/context/contextSelector/conversationSignal.ts b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/conversationSignal.ts index 53cadc05d..3ab0a76b4 100644 --- a/ts/packages/dispatcher/dispatcher/src/context/contextSelector/conversationSignal.ts +++ b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/conversationSignal.ts @@ -11,7 +11,11 @@ import { tokenize } from "./tokenize.js"; // { canonical token -> decay-weighted conversational frequency }. The single // shape both the v1 raw-token source and the v2 knowPro source project into, and -// the only conversation input the scorer sees (§9). +// the only conversation input the scorer sees (§9). The v2 knowPro source +// projects its topics / entity names+types into these same canonical string keys +// (design §9 step 2 — "project into the same { key -> weight } map"); a richer +// entity-structure-aware or embedding scorer is the one flagged future that would +// extend this shape, not v1. export type ContextVector = ReadonlyMap; // The seam. A source owns whatever conversational state it needs and exposes it diff --git a/ts/packages/dispatcher/dispatcher/src/context/contextSelector/keywordIndex.ts b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/keywordIndex.ts index bab3f80be..d28918ef2 100644 --- a/ts/packages/dispatcher/dispatcher/src/context/contextSelector/keywordIndex.ts +++ b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/keywordIndex.ts @@ -6,6 +6,12 @@ // (live-tunable). The one place the scorer reads candidate keywords from. The // schema-reading side is behind `ActionSchemaSource` so the index is unit- // testable with a stub source. +// +// v1 baseline = the deterministic lexical floor (keywordExtractor). LLM +// distillation (the design's preferred baseline) and auto-derived sidecar layers +// (misroute mining, learned-preference deltas) are follow-ups: they slot in as +// an alternate baseline and additional sidecar deltas without changing this +// index's shape. import { ActionSchemaTypeDefinition } from "@typeagent/action-schema"; import { KeywordVector, applyKeywordDelta } from "./keywordVector.js"; @@ -77,7 +83,9 @@ export class KeywordIndex { private readonly getSidecar: () => KeywordSidecar, ) {} - // Lexical-floor keywords for one action (memoized). + // Lexical-floor keywords for one action (memoized). A missing definition + // (schema not loaded yet / freshly-learned action) is NOT memoized, so it is + // re-read once the schema is available rather than cached empty forever. public derived(schemaName: string, actionName: string): KeywordVector { const id = keywordId(schemaName, actionName); const cached = this.derivedMemo.get(id); @@ -88,16 +96,16 @@ export class KeywordIndex { schemaName, actionName, ); - const vector: KeywordVector = - definition === undefined - ? new Set() - : extractKeywords( - buildExtractionInput( - actionName, - definition, - this.source.getSchemaDescription(schemaName), - ), - ); + if (definition === undefined) { + return new Set(); + } + const vector = extractKeywords( + buildExtractionInput( + actionName, + definition, + this.source.getSchemaDescription(schemaName), + ), + ); this.derivedMemo.set(id, vector); return vector; } diff --git a/ts/packages/dispatcher/dispatcher/src/context/contextSelector/strategy.ts b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/strategy.ts new file mode 100644 index 000000000..3a3b9dfab --- /dev/null +++ b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/strategy.ts @@ -0,0 +1,48 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +// The resolution-strategy seam (§9 scoring roadmap). A strategy bundles a +// *scoring* method with the *decision policy* that interprets its scores, so the +// two swap together — the count-based TF-IDF scorer pairs with the count-based +// evidence gate (§10), while a future knowPro-entity or embedding scorer would +// pair with its own (e.g. cosine-threshold) policy. The orchestrator +// (matchContextSelector.ts) depends only on this seam and the generic +// `ContextSelectorDecision`, never on TF-IDF internals — which is what makes the +// user-requested "swap TF-IDF -> knowPro -> embeddings" a drop-in. + +import { ContextVector } from "./conversationSignal.js"; +import { ScorerCandidate, CollisionScorer, TfIdfScorer } from "./scorer.js"; +import { decide, ContextSelectorDecision, DecisionConfig } from "./decision.js"; + +export interface ContextResolutionStrategy { + // Score the candidates against the conversation and decide resolve/abstain. + // Deterministic and synchronous — the collision hot path is LLM-free (§12); + // a scorer needing a model call would precompute vectors, not call here. + evaluate( + contextVector: ContextVector, + candidates: ScorerCandidate[], + config: DecisionConfig, + ): ContextSelectorDecision; +} + +// v1 strategy: candidate-local IDF TF-IDF scoring (§9) + the count-based +// coverage / evidence-gate / margin decision (§10). Coverage is a strategy +// concern (a non-lexical strategy defines its own notion), computed here from +// the candidates' keyword sets. +export class TfIdfStrategy implements ContextResolutionStrategy { + private readonly scorer: CollisionScorer; + + constructor(scorer: CollisionScorer = new TfIdfScorer()) { + this.scorer = scorer; + } + + public evaluate( + contextVector: ContextVector, + candidates: ScorerCandidate[], + config: DecisionConfig, + ): ContextSelectorDecision { + const covered = candidates.every((c) => c.keywords.size > 0); + const scores = this.scorer.score(contextVector, candidates); + return decide(scores, covered, config); + } +} diff --git a/ts/packages/dispatcher/dispatcher/src/context/contextSelector/tokenize.ts b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/tokenize.ts index c1daee639..f254fa4eb 100644 --- a/ts/packages/dispatcher/dispatcher/src/context/contextSelector/tokenize.ts +++ b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/tokenize.ts @@ -127,7 +127,9 @@ const STOPWORDS: ReadonlySet = new Set([ // Generic CRUD / imperative verbs — present in most action names, so they carry // no discriminative signal (§6). Dropped from both keyword vectors and the -// context vector so they never dominate a match. +// context vector so they never dominate a match. Deliberately excludes domain +// nouns that double as verbs (e.g. "list", "search") so an app's own topic word +// survives — this set is a calibration lever (§9), not a fixed truth. const GENERIC_VERBS: ReadonlySet = new Set([ "add", "create", @@ -139,9 +141,7 @@ const GENERIC_VERBS: ReadonlySet = new Set([ "fetch", "show", "display", - "list", "find", - "search", "lookup", "query", "read", diff --git a/ts/packages/dispatcher/dispatcher/src/context/session.ts b/ts/packages/dispatcher/dispatcher/src/context/session.ts index c78116290..3cb0a5c40 100644 --- a/ts/packages/dispatcher/dispatcher/src/context/session.ts +++ b/ts/packages/dispatcher/dispatcher/src/context/session.ts @@ -274,9 +274,9 @@ export type CollisionConfig = { // Evidence gate: min distinct distinguishing tokens the winner must // match (default 2). minUniqueTokens: number; - // Evidence gate: min winner score / matched mass (default 1.0). + // Evidence gate: min winner score / matched mass (default 0.75). minMass: number; - // Clear-winner margin the winner must beat the runner-up by (default 1.0). + // Clear-winner margin the winner must beat the runner-up by (default 0.5). margin: number; // On abstain: hand to the configured grammar strategy (default) or // escalate the request to the LLM translation path. @@ -467,8 +467,8 @@ const defaultSessionConfig: SessionConfig = { windowTurns: 20, decay: 0.9, minUniqueTokens: 2, - minMass: 1.0, - margin: 1.0, + minMass: 0.75, + margin: 0.5, abstainFallback: "defer-to-strategy", }, priorityOrder: "", diff --git a/ts/packages/dispatcher/dispatcher/src/translation/matchContextSelector.ts b/ts/packages/dispatcher/dispatcher/src/translation/matchContextSelector.ts index d7086d6f6..f9eccd21c 100644 --- a/ts/packages/dispatcher/dispatcher/src/translation/matchContextSelector.ts +++ b/ts/packages/dispatcher/dispatcher/src/translation/matchContextSelector.ts @@ -18,13 +18,16 @@ import { getAppAgentName } from "./agentTranslators.js"; import { CandidateScore, ScorerCandidate, - TfIdfScorer, } from "../context/contextSelector/scorer.js"; -import { decide } from "../context/contextSelector/decision.js"; +import { + ContextResolutionStrategy, + TfIdfStrategy, +} from "../context/contextSelector/strategy.js"; -// v1 scorer selection (§9). Stateless — a single instance is reused. This is the -// swap point for a future knowPro-entity / embedding scorer. -const scorer = new TfIdfScorer(); +// v1 strategy selection (§9): TF-IDF scoring + count-based decision, bundled so +// a future knowPro-entity / embedding strategy swaps as one unit. Stateless — +// a single instance is reused. +const strategy: ContextResolutionStrategy = new TfIdfStrategy(); export type ContextSelectorResolution = { // The winning validated match to resolve to (avoids the downstream LLM). @@ -95,10 +98,8 @@ export function resolveContextSelector( return undefined; } - const covered = candidates.every((c) => c.keywords.size > 0); const contextVector = ctx.conversationSignal.getContextVector(); - const scores = scorer.score(contextVector, candidates); - const decision = decide(scores, covered, { + const decision = strategy.evaluate(contextVector, candidates, { minUniqueTokens: cfg.contextSelector.minUniqueTokens, minMass: cfg.contextSelector.minMass, margin: cfg.contextSelector.margin, diff --git a/ts/packages/dispatcher/dispatcher/test/contextSelectorDecision.spec.ts b/ts/packages/dispatcher/dispatcher/test/contextSelectorDecision.spec.ts index 8d57b42b4..86f235f24 100644 --- a/ts/packages/dispatcher/dispatcher/test/contextSelectorDecision.spec.ts +++ b/ts/packages/dispatcher/dispatcher/test/contextSelectorDecision.spec.ts @@ -112,6 +112,45 @@ describe("contextSelector/decision", () => { }); }); +// Boundary fixtures at the shipped defaults (minMass 0.75, margin 0.5) over the +// λ=0.9 decay scale — guards against pathological abstain/resolve tuning (§10). +describe("contextSelector/decision — default-threshold boundaries", () => { + const DEFAULTS: DecisionConfig = { + minUniqueTokens: 2, + minMass: 0.75, + margin: 0.5, + }; + + it("resolves two fresh winner tokens over one fresh runner-up token", () => { + // winner 0.9+0.9=1.8 (2 tokens) vs runner 0.9 (1 token) → margin 0.9 ≥ 0.5 + const d = decide( + [scored("a", "x", 1.8, 2), scored("b", "y", 0.9, 1)], + true, + DEFAULTS, + ); + expect(d.kind).toBe("resolve"); + }); + + it("resolves two fresh winner tokens over one older runner-up token", () => { + const d = decide( + [scored("a", "x", 1.8, 2), scored("b", "y", 0.81, 1)], + true, + DEFAULTS, + ); + expect(d.kind).toBe("resolve"); + }); + + it("still abstains when two strong candidates are close", () => { + const d = decide( + [scored("a", "x", 1.8, 2), scored("b", "y", 1.5, 2)], + true, + DEFAULTS, + ); + expect(d.kind).toBe("abstain"); + if (d.kind === "abstain") expect(d.reason).toBe("margin"); + }); +}); + // End-to-end §14 worked examples: signal-free — scores are supplied directly to // the scorer via a synthetic context vector, then decided. describe("contextSelector §14 worked examples (scorer + decision)", () => { diff --git a/ts/packages/dispatcher/dispatcher/test/contextSelectorKeywords.spec.ts b/ts/packages/dispatcher/dispatcher/test/contextSelectorKeywords.spec.ts index d52e5002b..396523c4e 100644 --- a/ts/packages/dispatcher/dispatcher/test/contextSelectorKeywords.spec.ts +++ b/ts/packages/dispatcher/dispatcher/test/contextSelectorKeywords.spec.ts @@ -55,10 +55,10 @@ describe("contextSelector/keywordExtractor", () => { expect(kw.has("todo")).toBe(true); expect(kw.has("items")).toBe(true); expect(kw.has("item")).toBe(true); - // generic verbs / stopwords / "list" (generic) are dropped + // generic verbs / stopwords are dropped; "list" is a kept domain noun. expect(kw.has("add")).toBe(false); expect(kw.has("the")).toBe(false); - expect(kw.has("list")).toBe(false); + expect(kw.has("list")).toBe(true); }); it("caps at topN, keeping the most frequent (deterministic tiebreak)", () => { diff --git a/ts/packages/dispatcher/dispatcher/test/contextSelectorResolve.spec.ts b/ts/packages/dispatcher/dispatcher/test/contextSelectorResolve.spec.ts index 2439186a9..20563aea1 100644 --- a/ts/packages/dispatcher/dispatcher/test/contextSelectorResolve.spec.ts +++ b/ts/packages/dispatcher/dispatcher/test/contextSelectorResolve.spec.ts @@ -22,8 +22,8 @@ function makeCtx(o: Overrides): { windowTurns: 20, decay: 0.9, minUniqueTokens: 2, - minMass: 1.0, - margin: 1.0, + minMass: 0.75, + margin: 0.5, abstainFallback: "defer-to-strategy", ...o.contextSelector, }; diff --git a/ts/packages/dispatcher/dispatcher/test/contextSelectorTokenize.spec.ts b/ts/packages/dispatcher/dispatcher/test/contextSelectorTokenize.spec.ts index 9f0716cdf..b1b4f6b03 100644 --- a/ts/packages/dispatcher/dispatcher/test/contextSelectorTokenize.spec.ts +++ b/ts/packages/dispatcher/dispatcher/test/contextSelectorTokenize.spec.ts @@ -28,6 +28,13 @@ describe("contextSelector/tokenize", () => { expect(isGenericVerb("spreadsheet")).toBe(false); }); + it("keeps domain nouns that double as verbs (list, search)", () => { + // "list" / "search" are app/topic words, not dropped as generic verbs. + expect(isGenericVerb("list")).toBe(false); + expect(isGenericVerb("search")).toBe(false); + expect(tokenize("show my grocery list")).toEqual(["grocery", "list"]); + }); + it("drops sub-minimum-length tokens", () => { expect(tokenize("a i x")).toEqual([]); }); From 244756a2419fbe8246a307d26e138faa54992d24 Mon Sep 17 00:00:00 2001 From: George Ng Date: Wed, 1 Jul 2026 02:28:04 -0700 Subject: [PATCH 07/11] refactor(dispatcher): round 2 adversarial-review fixes for contextSelector MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses Round-2 code-review + rubber-duck. Correctness: - matchContextSelector now returns a 3-way outcome (resolve/abstain/skip). "skip" (< 2 distinct candidates — e.g. a tiedHeuristics tie between two constructions of the SAME action) no longer collapses into "abstain", so `escalate-to-llm` can't send a confident single-action match to the LLM (code-review, Medium). - matchRequest: `defer-to-strategy` on abstain now runs the configured grammar strategy even when `grammarMatch.detect` is off (design §11.1 abstain semantics); "skip" falls through to today's behavior, never escalates (rubber-duck, blocking). - Emit `firstMatchCandidate` on contextSelector events (reuse exported toCandidate) so the rollout can compare treatment vs first-match control when contextSelector short-circuits the strategy (rubber-duck, blocking). - On duplicate (schema, action), keep the heuristically-best MatchResult, not the first seen (rubber-duck). Extensibility seam (rubber-duck, blocking): - Stop the strategy leaking TF-IDF vocabulary into the orchestrator: CandidateScore `uniqueTokenCount`/`matched` are now optional evidence; the strategy returns a `winnerNote` phrase; the user affordance is generic ("↪ routed to X — recent topic"). An embedding strategy is now a clean drop-in (score-only + its own note). Tuning: - minMass 0.75 -> 1.0 (Round 1 over-corrected); at λ=0.9 this bounds a stale two-token turn to ~age 7 so an old topic stops silently resolving; added staleness + dedup fixtures. Build green; contextSelector 54/54; full dispatcher suite 66 suites / 1038 tests. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ts/packages/dispatcher/dispatcher/README.md | 2 +- .../src/context/contextSelector/decision.ts | 5 +- .../src/context/contextSelector/scorer.ts | 14 ++- .../src/context/contextSelector/strategy.ts | 21 +++- .../dispatcher/src/context/session.ts | 6 +- .../src/translation/matchCollision.ts | 2 +- .../src/translation/matchContextSelector.ts | 113 +++++++++++------- .../src/translation/matchRequest.ts | 31 +++-- .../test/contextSelectorDecision.spec.ts | 13 +- .../test/contextSelectorResolve.spec.ts | 73 +++++++++-- .../test/contextSelectorScorer.spec.ts | 4 +- 11 files changed, 199 insertions(+), 85 deletions(-) diff --git a/ts/packages/dispatcher/dispatcher/README.md b/ts/packages/dispatcher/dispatcher/README.md index 272bce757..7f9361b00 100644 --- a/ts/packages/dispatcher/dispatcher/README.md +++ b/ts/packages/dispatcher/dispatcher/README.md @@ -312,7 +312,7 @@ collision: { windowTurns: number; // ring-buffer look-back N (default 20) decay: number; // per-turn recency decay λ (default 0.9) minUniqueTokens: number; // evidence gate (default 2) - minMass: number; // evidence gate — min winner score (default 0.75) + minMass: number; // evidence gate — min winner score (default 1.0) margin: number; // clear-winner margin over runner-up (default 0.5) abstainFallback: "defer-to-strategy" | "escalate-to-llm"; // default defer-to-strategy }; diff --git a/ts/packages/dispatcher/dispatcher/src/context/contextSelector/decision.ts b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/decision.ts index d754f9062..9d96542c4 100644 --- a/ts/packages/dispatcher/dispatcher/src/context/contextSelector/decision.ts +++ b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/decision.ts @@ -87,10 +87,11 @@ export function decide( const runnerUp = ranked.length > 1 ? ranked[1] : undefined; const winnerScore = quantize(winner.score); - if (winner.uniqueTokenCount === 0 || winnerScore <= 0) { + const winnerUnique = winner.uniqueTokenCount ?? 0; + if (winnerUnique === 0 || winnerScore <= 0) { return { kind: "abstain", reason: "no-signal", ranked }; } - if (winner.uniqueTokenCount < config.minUniqueTokens) { + if (winnerUnique < config.minUniqueTokens) { return { kind: "abstain", reason: "min-unique-tokens", ranked }; } if (winnerScore < quantize(config.minMass)) { diff --git a/ts/packages/dispatcher/dispatcher/src/context/contextSelector/scorer.ts b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/scorer.ts index 4d6d7af66..1aaf0df5d 100644 --- a/ts/packages/dispatcher/dispatcher/src/context/contextSelector/scorer.ts +++ b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/scorer.ts @@ -34,13 +34,15 @@ export type MatchedToken = { export type CandidateScore = { schemaName: string; actionName: string; - // Σ contribution over matched tokens. + // Generic magnitude the decision rule ranks on — always populated. score: number; - // Distinct matched tokens that actually distinguish this candidate - // (disc > 0). The evidence gate's `minUniqueTokens` counts these (§10). - uniqueTokenCount: number; - // All firing tokens, sorted by token for stable output. - matched: MatchedToken[]; + // Lexical evidence (TF-IDF): distinct distinguishing tokens matched. A + // non-lexical scorer (embedding) may omit it — the count-based gate treats + // absent as 0. Optional so the seam isn't TF-IDF-shaped. + uniqueTokenCount?: number; + // Lexical evidence: the tokens that fired, for explainable telemetry. + // Empty/omitted for non-lexical scorers. + matched?: MatchedToken[]; }; export interface CollisionScorer { diff --git a/ts/packages/dispatcher/dispatcher/src/context/contextSelector/strategy.ts b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/strategy.ts index 3a3b9dfab..dd355e238 100644 --- a/ts/packages/dispatcher/dispatcher/src/context/contextSelector/strategy.ts +++ b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/strategy.ts @@ -22,9 +22,19 @@ export interface ContextResolutionStrategy { contextVector: ContextVector, candidates: ScorerCandidate[], config: DecisionConfig, - ): ContextSelectorDecision; + ): ContextSelectorEvaluation; } +// A strategy's output: the generic decision plus a one-line, strategy-specific +// evidence phrase for the resolved winner (e.g. TF-IDF "matched 3 token(s), mass +// 5.54"; a future embedding strategy "cosine 0.82"). Empty when abstaining. +// Keeping the phrasing here — not in the orchestrator — is what stops TF-IDF +// vocabulary leaking into the caller (so a non-lexical strategy is a clean swap). +export type ContextSelectorEvaluation = { + decision: ContextSelectorDecision; + winnerNote: string; +}; + // v1 strategy: candidate-local IDF TF-IDF scoring (§9) + the count-based // coverage / evidence-gate / margin decision (§10). Coverage is a strategy // concern (a non-lexical strategy defines its own notion), computed here from @@ -40,9 +50,14 @@ export class TfIdfStrategy implements ContextResolutionStrategy { contextVector: ContextVector, candidates: ScorerCandidate[], config: DecisionConfig, - ): ContextSelectorDecision { + ): ContextSelectorEvaluation { const covered = candidates.every((c) => c.keywords.size > 0); const scores = this.scorer.score(contextVector, candidates); - return decide(scores, covered, config); + const decision = decide(scores, covered, config); + const winnerNote = + decision.kind === "resolve" + ? `matched ${decision.winner.uniqueTokenCount ?? 0} token(s), mass ${decision.winner.score.toFixed(3)}` + : ""; + return { decision, winnerNote }; } } diff --git a/ts/packages/dispatcher/dispatcher/src/context/session.ts b/ts/packages/dispatcher/dispatcher/src/context/session.ts index 3cb0a5c40..52e66bc7c 100644 --- a/ts/packages/dispatcher/dispatcher/src/context/session.ts +++ b/ts/packages/dispatcher/dispatcher/src/context/session.ts @@ -274,7 +274,9 @@ export type CollisionConfig = { // Evidence gate: min distinct distinguishing tokens the winner must // match (default 2). minUniqueTokens: number; - // Evidence gate: min winner score / matched mass (default 0.75). + // Evidence gate: min winner score / matched mass (default 1.0). Also + // bounds staleness — at λ=0.9 two matched tokens fall below 1.0 past + // roughly age 7, so a single old topic turn stops resolving. minMass: number; // Clear-winner margin the winner must beat the runner-up by (default 0.5). margin: number; @@ -467,7 +469,7 @@ const defaultSessionConfig: SessionConfig = { windowTurns: 20, decay: 0.9, minUniqueTokens: 2, - minMass: 0.75, + minMass: 1.0, margin: 0.5, abstainFallback: "defer-to-strategy", }, diff --git a/ts/packages/dispatcher/dispatcher/src/translation/matchCollision.ts b/ts/packages/dispatcher/dispatcher/src/translation/matchCollision.ts index 5293a93ae..4696fe3b1 100644 --- a/ts/packages/dispatcher/dispatcher/src/translation/matchCollision.ts +++ b/ts/packages/dispatcher/dispatcher/src/translation/matchCollision.ts @@ -53,7 +53,7 @@ function getPrimary(match: MatchResult): { * uses these fields to reconstruct alternative rankings (e.g. * counterfactual `score-rank` outcomes) without re-running the matcher. */ -function toCandidate( +export function toCandidate( match: MatchResult, ctx?: CommandHandlerContext, ): CollisionCandidate { diff --git a/ts/packages/dispatcher/dispatcher/src/translation/matchContextSelector.ts b/ts/packages/dispatcher/dispatcher/src/translation/matchContextSelector.ts index f9eccd21c..8d65fbc8b 100644 --- a/ts/packages/dispatcher/dispatcher/src/translation/matchContextSelector.ts +++ b/ts/packages/dispatcher/dispatcher/src/translation/matchContextSelector.ts @@ -3,10 +3,11 @@ // The contextSelector orchestrator (§11): adapts the grammar path's validated // MatchResults into scorer candidates, runs the deterministic pipeline -// (signal -> keywords -> scorer -> decision), emits telemetry, and returns the -// winning match plus a UX affordance note — or undefined to abstain. Pure -// engine logic lives under ../context/contextSelector/; this file is the thin -// MatchResult-aware seam plus telemetry. +// (signal -> strategy: score + decide), emits telemetry, and returns a 3-way +// outcome — resolve (with the winning match + a UX affordance note), abstain, or +// skip (not a topical collision). Pure engine logic lives under +// ../context/contextSelector/; this file is the thin MatchResult-aware seam plus +// telemetry. import { MatchResult } from "agent-cache"; import { CommandHandlerContext } from "../context/commandHandlerContext.js"; @@ -15,6 +16,7 @@ import { emitCollisionEvent, } from "../context/collisionTelemetry.js"; import { getAppAgentName } from "./agentTranslators.js"; +import { toCandidate } from "./matchCollision.js"; import { CandidateScore, ScorerCandidate, @@ -29,12 +31,17 @@ import { // a single instance is reused. const strategy: ContextResolutionStrategy = new TfIdfStrategy(); -export type ContextSelectorResolution = { - // The winning validated match to resolve to (avoids the downstream LLM). - match: MatchResult; - // Non-blocking UX affordance shown on a reroute (U-2, §11.2). - note: string; -}; +export type ContextSelectorOutcome = + // Confident topical pick — resolve to `match` (avoids the LLM); `note` is the + // U-2 affordance (§11.2). + | { kind: "resolve"; match: MatchResult; note: string } + // Scored >= 2 distinct candidates but the signal was weak/ambiguous. The + // caller applies the configured abstain fallback. + | { kind: "abstain" } + // Fewer than 2 distinct (schema, action) candidates — not a topical collision + // (e.g. a tiedHeuristics tie between two constructions of the SAME action). + // The caller must fall through to today's behavior, never escalate. + | { kind: "skip" }; function primaryOf(match: MatchResult): { schemaName: string; @@ -49,31 +56,44 @@ function primaryOf(match: MatchResult): { type Candidate = ScorerCandidate & { match: MatchResult }; +// Prefer the heuristically-stronger MatchResult when the same (schema, action) +// appears twice (matchedCount desc, nonOptionalCount desc, wildcardCharCount +// asc) so a resolve returns the best representative, not just the first seen. +function isBetterMatch(next: MatchResult, current: MatchResult): boolean { + if (next.matchedCount !== current.matchedCount) { + return next.matchedCount > current.matchedCount; + } + if (next.nonOptionalCount !== current.nonOptionalCount) { + return next.nonOptionalCount > current.nonOptionalCount; + } + return next.wildcardCharCount < current.wildcardCharCount; +} + function toTelemetryCandidates(scores: CandidateScore[]): CollisionCandidate[] { return scores.map((s) => ({ schemaName: s.schemaName, actionName: s.actionName, score: s.score, - matchedTokens: s.matched.map((m) => ({ + matchedTokens: (s.matched ?? []).map((m) => ({ token: m.token, weight: m.contribution, })), })); } -// Resolve a grammar-path collision by topical proximity, or abstain. Assumes the -// caller has already confirmed this is a collision and that -// `contextSelector.detect` is on. +// Resolve a grammar-path collision by topical proximity, abstain, or skip. +// Assumes the caller confirmed `isCollision` and that `contextSelector.detect` +// is on. export function resolveContextSelector( validated: MatchResult[], ctx: CommandHandlerContext, request: string, -): ContextSelectorResolution | undefined { +): ContextSelectorOutcome { const cfg = ctx.session.getConfig().collision; const startedAt = performance.now(); - // Distinct (schema, action) candidates, each keeping the first MatchResult - // to resolve to. Effective keywords = derived floor + sidecar overrides. + // Distinct (schema, action) candidates, keeping the best MatchResult per + // action. Effective keywords = derived floor + sidecar overrides. const byId = new Map(); for (const match of validated) { const { schemaName, actionName } = primaryOf(match); @@ -81,7 +101,8 @@ export function resolveContextSelector( continue; } const id = `${schemaName}.${actionName}`; - if (!byId.has(id)) { + const existing = byId.get(id); + if (existing === undefined) { byId.set(id, { schemaName, actionName, @@ -91,21 +112,32 @@ export function resolveContextSelector( ), match, }); + } else if (isBetterMatch(match, existing.match)) { + existing.match = match; } } const candidates = [...byId.values()]; if (candidates.length < 2) { - return undefined; + // Not a topical collision — nothing for contextSelector to weigh in on. + return { kind: "skip" }; } const contextVector = ctx.conversationSignal.getContextVector(); - const decision = strategy.evaluate(contextVector, candidates, { - minUniqueTokens: cfg.contextSelector.minUniqueTokens, - minMass: cfg.contextSelector.minMass, - margin: cfg.contextSelector.margin, - }); + const { decision, winnerNote } = strategy.evaluate( + contextVector, + candidates, + { + minUniqueTokens: cfg.contextSelector.minUniqueTokens, + minMass: cfg.contextSelector.minMass, + margin: cfg.contextSelector.margin, + }, + ); const telemetryCandidates = toTelemetryCandidates(decision.ranked); + // What first-match would have picked — preserved so the rollout can compare + // treatment vs control even when contextSelector short-circuits the strategy + // (§13). validated[0] is the cache's heuristic-best. + const firstMatchCandidate = toCandidate(validated[0], ctx); const elapsedMs = performance.now() - startedAt; if (decision.kind === "abstain") { @@ -114,6 +146,7 @@ export function resolveContextSelector( kind: "grammarMatch", request, candidates: telemetryCandidates, + firstMatchCandidate, classifier: cfg.grammarMatch.classifier, strategy: "context-weight", elapsedMs, @@ -121,7 +154,7 @@ export function resolveContextSelector( }, ctx, ); - return undefined; + return { kind: "abstain" }; } const winner = decision.winner; @@ -129,41 +162,31 @@ export function resolveContextSelector( if (winning === undefined) { // Defensive: winner id must be present. Treat as abstain rather than // resolving to the wrong match. - return undefined; + return { kind: "abstain" }; } - const chosen: CollisionCandidate = { - schemaName: winner.schemaName, - actionName: winner.actionName, - score: winner.score, - matchedTokens: winner.matched.map((m) => ({ - token: m.token, - weight: m.contribution, - })), - }; emitCollisionEvent( { kind: "grammarMatch", request, candidates: telemetryCandidates, - chosen, + chosen: telemetryCandidates.find( + (c) => + c.schemaName === winner.schemaName && + c.actionName === winner.actionName, + ), + firstMatchCandidate, classifier: cfg.grammarMatch.classifier, strategy: "context-weight", elapsedMs, - note: `resolve; matched ${winner.uniqueTokenCount} token(s), mass ${winner.score.toFixed(3)}`, + note: `resolve; ${winnerNote}`, }, ctx, ); const agentName = getAppAgentName(winner.schemaName); - const topTokens = winner.matched - .slice() - .sort((a, b) => b.contribution - a.contribution) - .slice(0, 3) - .map((m) => m.token); - const topicSuffix = - topTokens.length > 0 ? ` (${topTokens.join(", ")})` : ""; return { + kind: "resolve", match: winning.match, - note: `↪ routed to ${agentName} — recent topic${topicSuffix}`, + note: `↪ routed to ${agentName} — recent topic`, }; } diff --git a/ts/packages/dispatcher/dispatcher/src/translation/matchRequest.ts b/ts/packages/dispatcher/dispatcher/src/translation/matchRequest.ts index f35b1ec1a..d36ca2795 100644 --- a/ts/packages/dispatcher/dispatcher/src/translation/matchRequest.ts +++ b/ts/packages/dispatcher/dispatcher/src/translation/matchRequest.ts @@ -300,24 +300,33 @@ export async function matchRequest( isCollision(validated, collisionCfg.classifier) ) { // contextSelector tier (§11): a confident topical pick resolves here on - // the cache path (no LLM). On abstain it either falls through to the - // configured grammar strategy (default) or escalates to LLM translation. + // the cache path (no LLM). On abstain it either defers to the configured + // grammar strategy or escalates the request to LLM translation. + let deferToStrategy = false; if (contextSelectorCfg.detect) { - const resolution = resolveContextSelector( + const outcome = resolveContextSelector( validated, systemContext, request, ); - if (resolution !== undefined) { - decision = { kind: "match", match: resolution.match }; - await displayInfo(resolution.note, context); - } else if ( - contextSelectorCfg.abstainFallback === "escalate-to-llm" - ) { - return undefined; + if (outcome.kind === "resolve") { + decision = { kind: "match", match: outcome.match }; + await displayInfo(outcome.note, context); + } else if (outcome.kind === "abstain") { + if (contextSelectorCfg.abstainFallback === "escalate-to-llm") { + return undefined; + } + // defer-to-strategy: hand the collision to the configured grammar + // strategy below, even if grammarMatch.detect is off (§11.1). + deferToStrategy = true; } + // outcome.kind === "skip" (not a topical collision): fall through to + // today's behavior — never escalate. } - if (decision === undefined && collisionCfg.detect) { + if ( + decision === undefined && + (collisionCfg.detect || deferToStrategy) + ) { decision = resolveGrammarCollision( validated, systemContext, diff --git a/ts/packages/dispatcher/dispatcher/test/contextSelectorDecision.spec.ts b/ts/packages/dispatcher/dispatcher/test/contextSelectorDecision.spec.ts index 86f235f24..4228f95a2 100644 --- a/ts/packages/dispatcher/dispatcher/test/contextSelectorDecision.spec.ts +++ b/ts/packages/dispatcher/dispatcher/test/contextSelectorDecision.spec.ts @@ -117,7 +117,7 @@ describe("contextSelector/decision", () => { describe("contextSelector/decision — default-threshold boundaries", () => { const DEFAULTS: DecisionConfig = { minUniqueTokens: 2, - minMass: 0.75, + minMass: 1.0, margin: 0.5, }; @@ -149,6 +149,17 @@ describe("contextSelector/decision — default-threshold boundaries", () => { expect(d.kind).toBe("abstain"); if (d.kind === "abstain") expect(d.reason).toBe("margin"); }); + + it("abstains on a single stale two-token turn (min-mass bounds staleness)", () => { + // Two matched tokens at age ~8 (2 * 0.9^8 ≈ 0.861) fall below minMass 1.0. + const d = decide( + [scored("a", "x", 0.861, 2), scored("b", "y", 0, 0)], + true, + DEFAULTS, + ); + expect(d.kind).toBe("abstain"); + if (d.kind === "abstain") expect(d.reason).toBe("min-mass"); + }); }); // End-to-end §14 worked examples: signal-free — scores are supplied directly to diff --git a/ts/packages/dispatcher/dispatcher/test/contextSelectorResolve.spec.ts b/ts/packages/dispatcher/dispatcher/test/contextSelectorResolve.spec.ts index 20563aea1..1be8b1ab3 100644 --- a/ts/packages/dispatcher/dispatcher/test/contextSelectorResolve.spec.ts +++ b/ts/packages/dispatcher/dispatcher/test/contextSelectorResolve.spec.ts @@ -22,7 +22,7 @@ function makeCtx(o: Overrides): { windowTurns: 20, decay: 0.9, minUniqueTokens: 2, - minMass: 0.75, + minMass: 1.0, margin: 0.5, abstainFallback: "defer-to-strategy", ...o.contextSelector, @@ -30,6 +30,8 @@ function makeCtx(o: Overrides): { const events: CollisionEvent[] = []; const ctx = { collisionEvents: events, + // `toCandidate` (firstMatchCandidate) reads agent priority. + agents: { getAgentRank: () => 0 }, session: { sessionDirPath: undefined, getConfig: () => ({ @@ -37,6 +39,7 @@ function makeCtx(o: Overrides): { contextSelector, grammarMatch: { classifier: "distinctActions" }, telemetry: { emit: true, debugLog: false }, + priorityOrder: "", }, }), }, @@ -52,6 +55,9 @@ function makeCtx(o: Overrides): { function fakeMatch(schemaName: string, actionName: string): MatchResult { return { match: { actions: [{ action: { schemaName, actionName } }] }, + matchedCount: 1, + nonOptionalCount: 1, + wildcardCharCount: 0, } as unknown as MatchResult; } @@ -76,13 +82,56 @@ describe("resolveContextSelector", () => { ctx, "add a row", ); - expect(res).toBeDefined(); - expect(res!.match).toBe(excelMatch); - expect(res!.note).toContain("excel"); - expect(res!.note).toContain("routed"); + expect(res.kind).toBe("resolve"); + if (res.kind === "resolve") { + expect(res.match).toBe(excelMatch); + expect(res.note).toContain("excel"); + expect(res.note).toContain("routed"); + } expect(events).toHaveLength(1); expect(events[0].strategy).toBe("context-weight"); expect(events[0].chosen?.schemaName).toBe("excel"); + expect(events[0].firstMatchCandidate?.schemaName).toBe("excel"); + }); + + it("resolves to the best MatchResult when the winner action is duplicated", () => { + const { ctx } = makeCtx({ + contextVector: vector({ spreadsheet: 2, formula: 1.5 }), + keywords: { + "excel.addRow": new Set(["spreadsheet", "formula", "cell"]), + "list.addItems": new Set(["grocery", "shopping"]), + }, + }); + const weak = { + match: { + actions: [ + { action: { schemaName: "excel", actionName: "addRow" } }, + ], + }, + matchedCount: 1, + nonOptionalCount: 1, + wildcardCharCount: 5, + } as unknown as MatchResult; + const strong = { + match: { + actions: [ + { action: { schemaName: "excel", actionName: "addRow" } }, + ], + }, + matchedCount: 3, + nonOptionalCount: 3, + wildcardCharCount: 0, + } as unknown as MatchResult; + // weak first, strong second — dedup must keep the stronger one. + const res = resolveContextSelector( + [weak, strong, listMatch], + ctx, + "add a row", + ); + expect(res.kind).toBe("resolve"); + if (res.kind === "resolve") { + expect(res.match).toBe(strong); + } }); it("abstains (coverage) when a candidate has no keywords", () => { @@ -98,7 +147,7 @@ describe("resolveContextSelector", () => { ctx, "add a row", ); - expect(res).toBeUndefined(); + expect(res.kind).toBe("abstain"); expect(events[0].note).toBe("abstain:coverage"); }); @@ -120,7 +169,7 @@ describe("resolveContextSelector", () => { ctx, "add a row", ); - expect(res).toBeUndefined(); + expect(res.kind).toBe("abstain"); expect(events[0].note).toBe("abstain:margin"); }); @@ -137,12 +186,12 @@ describe("resolveContextSelector", () => { ctx, "add a row", ); - expect(res).toBeUndefined(); + expect(res.kind).toBe("abstain"); expect(events[0].note).toBe("abstain:no-signal"); }); - it("returns undefined when there are fewer than two distinct candidates", () => { - const { ctx } = makeCtx({ + it("skips (no telemetry) with fewer than two distinct candidates", () => { + const { ctx, events } = makeCtx({ contextVector: vector({ spreadsheet: 5 }), keywords: { "excel.addRow": new Set(["spreadsheet"]) }, }); @@ -151,6 +200,8 @@ describe("resolveContextSelector", () => { ctx, "add a row", ); - expect(res).toBeUndefined(); + expect(res.kind).toBe("skip"); + // A non-collision must not emit a context-weight abstain event. + expect(events).toHaveLength(0); }); }); diff --git a/ts/packages/dispatcher/dispatcher/test/contextSelectorScorer.spec.ts b/ts/packages/dispatcher/dispatcher/test/contextSelectorScorer.spec.ts index 981f2eebb..e242e7c07 100644 --- a/ts/packages/dispatcher/dispatcher/test/contextSelectorScorer.spec.ts +++ b/ts/packages/dispatcher/dispatcher/test/contextSelectorScorer.spec.ts @@ -46,7 +46,7 @@ describe("contextSelector/scorer (TF-IDF, candidate-local IDF)", () => { expect(excel.score).toBeCloseTo(2, 5); expect(excel.uniqueTokenCount).toBe(1); // Shared token still appears in the matched detail with disc 0. - const sharedMatch = excel.matched.find((m) => m.token === "shared")!; + const sharedMatch = excel.matched!.find((m) => m.token === "shared")!; expect(sharedMatch.disc).toBeCloseTo(0, 5); expect(sharedMatch.contribution).toBeCloseTo(0, 5); }); @@ -80,7 +80,7 @@ describe("contextSelector/scorer (TF-IDF, candidate-local IDF)", () => { cand("a", "x", ["zebra", "apple", "mango"]), cand("b", "y", ["other"]), ]); - expect(only.matched.map((m) => m.token)).toEqual([ + expect(only.matched!.map((m) => m.token)).toEqual([ "apple", "mango", "zebra", From 3247730107145b443a7d06f21644aafb87519173 Mon Sep 17 00:00:00 2001 From: George Ng Date: Wed, 1 Jul 2026 02:43:43 -0700 Subject: [PATCH 08/11] =?UTF-8?q?refactor(dispatcher):=20round=203=20revie?= =?UTF-8?q?w=20polish=20=E2=80=94=20strategy=20config/reason=20genericity?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round-3 code-review found no feature issues; round-3 rubber-duck raised one non-blocking residual: the strategy seam was clean at the orchestrator/telemetry layer but the shared decision types still carried TF-IDF vocabulary. - ContextResolutionStrategy is now generic over its config type (``), so a non-lexical strategy defines its own thresholds rather than being handed TF-IDF's minUniqueTokens/minMass/margin. - ContextSelectorDecision.reason widened to `string` (AbstainReason documents the count-based strategy's values) so a strategy can emit its own abstain reason. - Add contextSelectorStrategy.spec.ts proving an alternate "similarity" strategy with its own config type, reason ("similarity-floor"), and no lexical evidence fields satisfies the seam with zero changes to the engine/decision/orchestrator. contextSelector 57/57; full dispatcher suite 67 suites / 1041 tests green. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../src/context/contextSelector/decision.ts | 7 +- .../src/context/contextSelector/strategy.ts | 13 +- .../test/contextSelectorStrategy.spec.ts | 116 ++++++++++++++++++ 3 files changed, 132 insertions(+), 4 deletions(-) create mode 100644 ts/packages/dispatcher/dispatcher/test/contextSelectorStrategy.spec.ts diff --git a/ts/packages/dispatcher/dispatcher/src/context/contextSelector/decision.ts b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/decision.ts index 9d96542c4..28f55b0ed 100644 --- a/ts/packages/dispatcher/dispatcher/src/context/contextSelector/decision.ts +++ b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/decision.ts @@ -19,6 +19,9 @@ export type DecisionConfig = { margin: number; }; +// The count-based (TF-IDF) strategy's abstain reasons. A non-lexical strategy +// (embedding) supplies its own reason string — hence `ContextSelectorDecision` +// types `reason` as the wider `string`. export type AbstainReason = | "coverage" | "no-candidates" @@ -36,7 +39,9 @@ export type ContextSelectorDecision = } | { kind: "abstain"; - reason: AbstainReason; + // See AbstainReason for the count-based strategy's values; any strategy + // may supply its own. Surfaced in telemetry as `abstain:`. + reason: string; ranked: CandidateScore[]; }; diff --git a/ts/packages/dispatcher/dispatcher/src/context/contextSelector/strategy.ts b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/strategy.ts index dd355e238..d3abee27d 100644 --- a/ts/packages/dispatcher/dispatcher/src/context/contextSelector/strategy.ts +++ b/ts/packages/dispatcher/dispatcher/src/context/contextSelector/strategy.ts @@ -14,14 +14,19 @@ import { ContextVector } from "./conversationSignal.js"; import { ScorerCandidate, CollisionScorer, TfIdfScorer } from "./scorer.js"; import { decide, ContextSelectorDecision, DecisionConfig } from "./decision.js"; -export interface ContextResolutionStrategy { +// A resolution strategy is generic over its *config* type — the count-based +// TF-IDF strategy takes `DecisionConfig` (minUniqueTokens/minMass/margin), while +// a future embedding strategy would define its own (e.g. a cosine floor). The +// orchestrator maps the session config to whichever the active strategy expects, +// so the interface itself carries no TF-IDF assumptions. +export interface ContextResolutionStrategy { // Score the candidates against the conversation and decide resolve/abstain. // Deterministic and synchronous — the collision hot path is LLM-free (§12); // a scorer needing a model call would precompute vectors, not call here. evaluate( contextVector: ContextVector, candidates: ScorerCandidate[], - config: DecisionConfig, + config: C, ): ContextSelectorEvaluation; } @@ -39,7 +44,9 @@ export type ContextSelectorEvaluation = { // coverage / evidence-gate / margin decision (§10). Coverage is a strategy // concern (a non-lexical strategy defines its own notion), computed here from // the candidates' keyword sets. -export class TfIdfStrategy implements ContextResolutionStrategy { +export class TfIdfStrategy + implements ContextResolutionStrategy +{ private readonly scorer: CollisionScorer; constructor(scorer: CollisionScorer = new TfIdfScorer()) { diff --git a/ts/packages/dispatcher/dispatcher/test/contextSelectorStrategy.spec.ts b/ts/packages/dispatcher/dispatcher/test/contextSelectorStrategy.spec.ts new file mode 100644 index 000000000..e118e6899 --- /dev/null +++ b/ts/packages/dispatcher/dispatcher/test/contextSelectorStrategy.spec.ts @@ -0,0 +1,116 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +import { + ContextResolutionStrategy, + ContextSelectorEvaluation, + TfIdfStrategy, +} from "../src/context/contextSelector/strategy.js"; +import { ScorerCandidate } from "../src/context/contextSelector/scorer.js"; +import { rankScores } from "../src/context/contextSelector/decision.js"; +import { ContextVector } from "../src/context/contextSelector/conversationSignal.js"; + +function cand( + schemaName: string, + actionName: string, + keywords: string[], +): ScorerCandidate { + return { schemaName, actionName, keywords: new Set(keywords) }; +} + +function vector(entries: Record): ContextVector { + return new Map(Object.entries(entries)); +} + +describe("contextSelector/strategy", () => { + it("TfIdfStrategy bundles scoring + the count-based decision", () => { + const strategy = new TfIdfStrategy(); + const c = vector({ spreadsheet: 2, formula: 1.5 }); + const { decision, winnerNote } = strategy.evaluate( + c, + [ + cand("excel", "addRow", ["spreadsheet", "formula", "cell"]), + cand("list", "addItems", ["grocery", "shopping"]), + ], + { minUniqueTokens: 2, minMass: 1.0, margin: 0.5 }, + ); + expect(decision.kind).toBe("resolve"); + if (decision.kind === "resolve") { + expect(decision.winner.schemaName).toBe("excel"); + } + expect(winnerNote).toContain("mass"); + }); + + it("TfIdfStrategy abstains via its own coverage guard", () => { + const strategy = new TfIdfStrategy(); + const { decision } = strategy.evaluate( + vector({ spreadsheet: 5 }), + [ + cand("excel", "addRow", ["spreadsheet"]), + cand("list", "addItems", []), // uncovered + ], + { minUniqueTokens: 2, minMass: 1.0, margin: 0.5 }, + ); + expect(decision.kind).toBe("abstain"); + if (decision.kind === "abstain") + expect(decision.reason).toBe("coverage"); + }); + + // Requirement B: a non-TF-IDF strategy (here a stand-in "similarity" scorer + // with its own config, decision policy, evidence, and reason vocabulary) + // satisfies the seam with no change to the engine, decision, or orchestrator. + it("supports an alternate strategy with its own config and reasons", () => { + type SimConfig = { floor: number }; + const scores = new Map([ + ["excel.addRow", 0.82], + ["list.addItems", 0.31], + ]); + const embeddingLike: ContextResolutionStrategy = { + evaluate(_ctx, candidates, config): ContextSelectorEvaluation { + const ranked = rankScores( + candidates.map((cd) => ({ + schemaName: cd.schemaName, + actionName: cd.actionName, + score: + scores.get(`${cd.schemaName}.${cd.actionName}`) ?? + 0, + })), + ); + const winner = ranked[0]; + if (winner.score < config.floor) { + return { + decision: { + kind: "abstain", + reason: "similarity-floor", + ranked, + }, + winnerNote: "", + }; + } + return { + decision: { + kind: "resolve", + winner, + runnerUp: ranked[1], + ranked, + }, + winnerNote: `cosine ${winner.score.toFixed(3)}`, + }; + }, + }; + + const { decision, winnerNote } = embeddingLike.evaluate( + vector({}), + [cand("excel", "addRow", []), cand("list", "addItems", [])], + { floor: 0.7 }, + ); + expect(decision.kind).toBe("resolve"); + if (decision.kind === "resolve") { + expect(decision.winner.schemaName).toBe("excel"); + // No lexical evidence fields required from a non-lexical scorer. + expect(decision.winner.matched).toBeUndefined(); + expect(decision.winner.uniqueTokenCount).toBeUndefined(); + } + expect(winnerNote).toBe("cosine 0.820"); + }); +}); From bc9147e7606e75c76bd7e700a245e2ef3d14bfa4 Mon Sep 17 00:00:00 2001 From: George Ng Date: Wed, 1 Jul 2026 14:45:54 -0700 Subject: [PATCH 09/11] Update documentation to cover benchmarking goals as well --- ...xt-weighted-collision-resolution-design.md | 154 ++++++++++++++++-- 1 file changed, 141 insertions(+), 13 deletions(-) diff --git a/ts/docs/architecture/collision/context-weighted-collision-resolution-design.md b/ts/docs/architecture/collision/context-weighted-collision-resolution-design.md index 6dce47eac..e36172213 100644 --- a/ts/docs/architecture/collision/context-weighted-collision-resolution-design.md +++ b/ts/docs/architecture/collision/context-weighted-collision-resolution-design.md @@ -8,10 +8,12 @@ (specific source files are cited inline where relevant) > **How to read this doc.** §1–§2 motivate the feature; §3 is the end-to-end architecture -> diagram; §4 shows how it fits the existing dispatcher code. §5–§12 specify the design one +> diagram; §4 shows how it fits the existing dispatcher code. §5–§11 specify the design one > component at a time (each opens with its locked decision, then the reasoning and the -> alternatives weighed). §13 composes them into the v1 we ship; §14 walks two worked -> examples; §15 archives the rejected alternatives. +> alternatives weighed); §12 is the determinism checklist. §13 composes them into the v1 we +> ship and defines how we prove it works — the two-tier benchmark, its control and net-gain +> scorecard, and the layer ablation (§13.4–§13.6); §14 walks two worked examples; §15 archives +> the rejected alternatives. --- @@ -239,6 +241,11 @@ contextSelector covers the first and defers the second. `static` is a build-time ### Existing infrastructure we reuse +- **Registry-first tiers** (`matchRequest.ts`, `collisionResolution.ts`) run _ahead of_ any + strategy, always, independent of `detect`: they short-circuit on **Tier-0** (a pending one-shot + pick from a resolved clarify card) or **Tier-1** (a learned/explicit preference), else raise a + **Tier-2** registry clarify. `contextSelector` slots in only _after_ registry-first returns + nothing — so it never overrides an explicit user choice. - **`resolvePreferenceClarify`** (`collisionResolution.ts`) is the shared resolution policy both stages already call for `preference-clarify` — the natural host if we later want both-stage coverage from a single change. @@ -989,7 +996,8 @@ keyword pipeline does **not** violate G3. All of the above ship **together** as v1: no manifest change, no onboarding-LLM in the hot path, one integration point, a fixed correctness guard, and a trust-preserving affordance. -Delivers the named excel↔list scenario. +Delivers the named excel↔list scenario. Before `contextSelector.detect` flips on, a two-tier +local benchmark must show a net gain over today's funnel with **zero regressions** (§13.4–§13.6). ### 13.2 Deferred to later (stretch goals, not in v1) @@ -1013,17 +1021,137 @@ learned-preference bootstrap** (confirm-then-learn). ### 13.4 Rollout & validation -No users / no production traffic, so there is **no real-traffic shadow phase**. Instead: +No users and no production traffic, so there is **no real-traffic shadow phase**. The gate to +ship is a local, two-tier benchmark: -1. Ship with `detect: off` (a simple on/off feature gate). -2. **Validate locally against fixtures** — labeled collision scenarios (the vampire↔list set - plus spreadsheet/calendar cases) — checking resolve/abstain behavior and calibrating the - evidence-gate thresholds (`minUniqueTokens` / `minMass` / `margin`). `λ=0.9` / `N=20` were - chosen up front (§8) and are likewise fixture-validated. -3. Flip `detect: on` once the local benchmarks pass. +1. **Ship dark.** Land with `contextSelector.detect: off` — a simple on/off feature gate, + invisible until flipped. +2. **Calibrate on unit fixtures (deterministic, no LLM).** Replay labeled collision scenarios — + the `excel↔list` running example (§10, §14), the `calendar↔taskflow` case (§6.2), and the + adversarial `list↔vampire` cluster from the shipped registry — to check every resolve/abstain + decision and tune the evidence-gate thresholds (`minUniqueTokens` / `minMass` / `margin`). + `λ=0.9` and `N=20` are fixed up front (§8) and re-validated here. +3. **Confirm net gain on the funnel benchmark (§13.5).** Run the end-to-end A/B/C and prove a net + gain (accuracy and/or cost) with zero regressions. +4. **Flip `contextSelector.detect: on`** once both tiers pass. -Telemetry (per-candidate score + the matched `token→weight` pairs) is emitted so the local -benchmark output is explainable and exact. +Every decision emits telemetry — per-candidate score plus the matched `token→weight` pairs — so +both tiers are explainable and exact, not merely pass/fail. + +### 13.5 Measuring net gain — the control and the scorecard + +> **The one question this benchmark must answer: does adding `contextSelector` route more +> collisions correctly (or as correctly, but cheaper) than the system does today — without ever +> making a route it already gets right worse?** + +**The control is the whole current funnel, not `first-match` alone.** A colliding request already +flows through several resolution layers before any answer comes back (§4): + +``` +grammar / cache match (≥2 validated matches → collision) + 1. registry-first (always) → Tier-0 one-shot · Tier-1 preference → resolve (cache path, no LLM) + · else Tier-2 registry clarify + · pick names an unmatched sibling → fall through ↓ + 2. contextSelector (if detect) → confident → resolve (cache path, no LLM) ← inserts here + · abstain → defer to strategy (default) | escalate-to-llm ↓ + 3. collision strategy (if detect) → first-match | score-rank | priority → resolve (cache path, no LLM) + · user-clarify → clarify card + ── only a registry fall-through, an escalate-to-llm abstain, or a Stage-1 miss continues: ── + cache-miss path → embedding pickInitialSchema (no LLM) → LLM translation +``` + +The shape matters for everything below: **most collisions resolve on the cache path with no +LLM.** The `first-match` / `score-rank` / `priority` strategies each return a Stage-1 match +(`matchCollision.ts`), so a collision reaches the translator only on a registry fall-through, an +`escalate-to-llm` abstain, or a Stage-1 miss (§4). + +So the **control** is this exact funnel with `contextSelector` disabled (its slot abstains +always). Because the default fallback — `first-match` — is itself LLM-free, the benchmark needs +**two baselines, one per axis**: a silent accuracy baseline and an escalate-to-llm cost baseline. +`user-clarify` is excluded from both — it interrupts the user, so it is not apples-to-apples with +a silent selector (it is the correctness _ceiling_, not a control). + +**A/B/C configuration** (`grammarMatch.detect` on for all three — measure-only; with `first-match` +the behavior is identical to legacy, `matchRequest.ts`): + +| Arm | `contextSelector` | fallback when unresolved | reaches LLM? | measures | +| ------------------------ | ----------------- | ------------------------ | ------------ | ----------------- | +| **Control-A (accuracy)** | off | `first-match` | no | accuracy baseline | +| **Treatment-B** | on | `first-match` on abstain | no | accuracy · cost | +| **Control-C (cost)** | off | `escalate-to-llm` | yes | cost baseline | + +Control-C has no production code path — `escalate-to-llm` exists only as a `contextSelector` +abstain mode (`matchRequest.ts`) — so arm C is a **benchmark-only harness toggle** that routes +every slot-reaching collision to the translator, purely to give Cost Δ a denominator. + +**Scope the denominator honestly.** `contextSelector` runs _after_ registry-first and _ahead of_ +the configured strategy (`matchRequest.ts`); it can never override a Tier-0 one-shot or a Tier-1 +preference — those short-circuit upstream. So the measurable denominator is **N = collisions that +reach the contextSelector slot** (they pass registry-first); report N with every result. Cases a +preference / one-shot / registry already resolves are included only to prove **non-interference** +(treatment ≡ control there by construction). + +**Ground truth** is the corpus's authored target per phrase — the same label `@collision corpus +run` uses to classify CLEAN / TIGHT / MISROUTE. The scorecard is a three-way per collision, +`groundTruth × outcome(control) × outcome(treatment)`, rolling up to three numbers: + +| Metric | Question | Baseline | Target | +| --------------- | ------------------------------------------------------------------------------------------------- | -------- | ----------------- | +| **Accuracy Δ** | Did routing get _more_ correct than the silent control? | A | ≥ 0 (ideally > 0) | +| **Cost Δ** | How many LLM translations do confident picks eliminate? _(0 vs `first-match`, already LLM-free.)_ | C | ≥ 0 | +| **Regressions** | Control-right → treatment-wrong (a route we used to get right, now broken) | A | **0 — hard gate** | + +**Ship if** regressions = 0 (vs A), accuracy does not drop, and at least one axis is a strict +win — more correct routes (vs A) _or_ the same routes at fewer LLM calls (vs C). Net gain may come +from **either** axis: the value proposition (§11) is a confident pick that routes well on the +cache path _instead of_ escalating to the LLM. The regression count is the release gate — the +abstain bias (§10) exists precisely to keep it at zero, so the benchmark's real job is to _prove_ +that, not merely to tally wins. + +**Controlling LLM noise.** The accuracy comparison (A vs B) is entirely LLM-free and therefore +deterministic — its gate never rests on a stochastic run. Where the LLM _is_ in the loop (the cost +arm C, registry fall-throughs, cache-miss translation), pin translation to temperature 0 with a +fixed seed and count a regression only if it reproduces across replays. The deterministic unit +tier (§13.4) carries the precise threshold calibration. + +Reuse the existing corpus pipeline (`@collision corpus run`) as the end-to-end harness, and the +`firstMatchCandidate` telemetry field — already recorded on every collision (`matchCollision.ts`) +— as the built-in `first-match` comparator. + +### 13.6 Layer ablation (secondary — apples-to-apples per layer) + +> **Lower priority than §13.5.** Net gain vs. the control is the release gate; the ablation is a +> follow-on that tells us _which layers still earn their slot_ once `contextSelector` exists — +> i.e. whether any can be simplified away. + +Because the layers form a cascade, a layer's value is **conditional on the layers downstream of +it**, and leave-one-out deltas do **not** sum. So the useful artifact is not a single scalar but +an **overlap matrix**: for each collision, record which layer _would_ resolve it and to what, +scored against ground truth, then read off each layer's _unique_ contribution. + +| For a case a layer resolves correctly, it is also caught by… | reading | +| ------------------------------------------------------------ | ---------------------------------- | +| a cheaper upstream layer (e.g. `first-match`) | redundant — **skip candidate** | +| only the LLM tail | pure **cost** win (saves the call) | +| **no other layer** | **unique contribution — keep** | + +**Concrete skip candidates to settle, per automatic layer:** + +| Layer | Question the ablation answers | +| ----------------------------- | ----------------------------------------------------------------------------------------------------------- | +| `score-rank` vs `first-match` | Do they ever diverge, and is `score-rank` right when they do? (the open question in `collision-rollout.md`) | +| `priority` | Does a static order still pay once `contextSelector` supplies a dynamic topical signal? | +| `contextSelector` | Unique correct routes over the LLM tail, or purely a cost optimization? | +| embedding `pickInitialSchema` | Is its pick ever consequential when the translator runs regardless? | + +**Two guardrails on the ablation:** + +- **Do not ablate the user-intent layers on accuracy.** Tier-0 one-shot and Tier-1 learned + preferences encode _explicit user intent_ ("remember this choice"), not topical correctness — + their ground truth is the user's stated preference. They are constraints, not routers; hold them + fixed and scope the ablation to the automatic layers above. +- **Results are corpus-dependent.** A layer that looks redundant on today's agent mix may matter + on another; always report the corpus and the denominator alongside any "skip" recommendation. --- From 807f5aa5465b15f28bd78db6669cbd005477e682 Mon Sep 17 00:00:00 2001 From: George Ng Date: Wed, 1 Jul 2026 15:57:12 -0700 Subject: [PATCH 10/11] Fix bug with workflow cli build --- ts/examples/workflow/cli/src/cli.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ts/examples/workflow/cli/src/cli.ts b/ts/examples/workflow/cli/src/cli.ts index c673fd254..88b490760 100644 --- a/ts/examples/workflow/cli/src/cli.ts +++ b/ts/examples/workflow/cli/src/cli.ts @@ -16,6 +16,7 @@ import { isGenericTask, isTypeParamRef, SchemaTemplateDefinition, + TaskTypeParameter, } from "workflow-model"; import { TaskRegistry, @@ -224,7 +225,9 @@ function cmdListTasks(): void { for (const task of allBuiltinTasks) { console.log(`${task.name}`); if (isGenericTask(task)) { - const params = task.typeParameters.map((p) => p.name).join(", "); + const params = task.typeParameters + .map((p: TaskTypeParameter) => p.name) + .join(", "); console.log(` <${params}> (generic)`); const inputTmpl = task.inputSchemaTemplate; if ( From 6ed556536bd53b656f6b662a564b350abe79bbed Mon Sep 17 00:00:00 2001 From: George Ng Date: Wed, 1 Jul 2026 17:48:21 -0700 Subject: [PATCH 11/11] Update collision keyword ordering --- .../handlers/collisionKeywordHandlers.ts | 244 +++++++----------- 1 file changed, 92 insertions(+), 152 deletions(-) diff --git a/ts/packages/dispatcher/dispatcher/src/context/system/handlers/collisionKeywordHandlers.ts b/ts/packages/dispatcher/dispatcher/src/context/system/handlers/collisionKeywordHandlers.ts index 0d488f286..ecdf26a20 100644 --- a/ts/packages/dispatcher/dispatcher/src/context/system/handlers/collisionKeywordHandlers.ts +++ b/ts/packages/dispatcher/dispatcher/src/context/system/handlers/collisionKeywordHandlers.ts @@ -5,12 +5,17 @@ // overrides consumed by the contextSelector tier (design §5.3). Edits land in // the profile-scoped `collision-keywords.json` sidecar as deltas over the // derived lexical defaults; the merged effective set feeds the scorer (§9). +// +// Target-first grammar (the schema.action is the primary key, §5.3): +// @collision keywords # list all overrides +// @collision keywords # show derived + overrides, merged +// @collision keywords list # (same as above) +// @collision keywords add k1 k2… # add discriminative keywords +// @collision keywords remove k1… # mask keywords +// @collision keywords clear # revert to derived-only import { ActionContext, ParsedCommandParams } from "@typeagent/agent-sdk"; -import { - CommandHandler, - CommandHandlerTable, -} from "@typeagent/agent-sdk/helpers/command"; +import { CommandHandler } from "@typeagent/agent-sdk/helpers/command"; import { displayResult, displayWarn, @@ -76,73 +81,43 @@ function showEffective( displayResult(lines.join("\n"), context); } -class CollisionKeywordsListCommandHandler implements CommandHandler { - public readonly description = - "Show derived + override keywords, merged, for a schema.action (or list all overrides)"; - public readonly parameters = { - args: { - target: { - description: - 'A schema.action, e.g. "excel.addRow". Omit to list all overrides.', - type: "string", - optional: true, - }, - }, - } as const; - - public async run( - context: ActionContext, - params: ParsedCommandParams, - ) { - const ctx = context.sessionContext.agentContext; - if (params.args.target === undefined) { - const entries = ctx.contextSelectorSidecar.list(); - if (entries.length === 0) { - displayResult( - "No keyword overrides. Add one with `@collision keywords add `.", - context, - ); - return; - } - const lines = entries.map(({ id, delta }) => { - const parts: string[] = []; - if (delta.replace) parts.push(`replace=[${delta.replace}]`); - if (delta.add?.length) parts.push(`add=[${delta.add}]`); - if (delta.remove?.length) - parts.push(`remove=[${delta.remove}]`); - return `- ${id}: ${parts.join(" ")}`; - }); - displayResult( - `Keyword overrides (${entries.length}):\n${lines.join("\n")}`, - context, - ); - return; - } - const target = parseTarget(params.args.target); - if (target === undefined) { - displayWarn( - `Invalid target "${params.args.target}". Expected schema.action.`, - context, - ); - return; - } - showEffective(context, target); +function listAllOverrides(context: ActionContext): void { + const entries = + context.sessionContext.agentContext.contextSelectorSidecar.list(); + if (entries.length === 0) { + displayResult( + "No keyword overrides. Add one with `@collision keywords add `.", + context, + ); + return; } + const lines = entries.map(({ id, delta }) => { + const parts: string[] = []; + if (delta.replace) parts.push(`replace=[${delta.replace}]`); + if (delta.add?.length) parts.push(`add=[${delta.add}]`); + if (delta.remove?.length) parts.push(`remove=[${delta.remove}]`); + return `- ${id}: ${parts.join(" ")}`; + }); + displayResult( + `Keyword overrides (${entries.length}):\n${lines.join("\n")}`, + context, + ); } -class CollisionKeywordsAddCommandHandler implements CommandHandler { +// Single target-first handler (§5.3). Tokens are parsed manually so the target +// (schema.action) can come first with an optional trailing verb — matching the +// documented syntax rather than the framework's verb-first subcommand shape. +class CollisionKeywordsCommandHandler implements CommandHandler { public readonly description = - "Add discriminative keywords for a schema.action (layered over the derived defaults)"; + "Inspect/tune contextSelector keyword vectors: @collision keywords [ [list|add|remove|clear] [keywords…]]"; public readonly parameters = { args: { - target: { - description: 'The schema.action to tune, e.g. "excel.addRow".', - type: "string", - }, - keywords: { - description: "One or more keywords to add.", + tokens: { + description: + 'e.g. "list.addItems", "list.addItems add grocery shopping", or omit to list all overrides.', type: "string", multiple: true, + optional: true, }, }, } as const; @@ -151,106 +126,71 @@ class CollisionKeywordsAddCommandHandler implements CommandHandler { context: ActionContext, params: ParsedCommandParams, ) { - const target = parseTarget(params.args.target); - if (target === undefined) { - displayWarn( - `Invalid target "${params.args.target}". Expected schema.action.`, - context, - ); + const tokens = params.args.tokens ?? []; + if (tokens.length === 0) { + listAllOverrides(context); return; } - context.sessionContext.agentContext.contextSelectorSidecar.addKeywords( - target.id, - params.args.keywords, - ); - showEffective(context, target); - } -} - -class CollisionKeywordsRemoveCommandHandler implements CommandHandler { - public readonly description = - "Remove keywords from a schema.action's effective set (masks derived + added)"; - public readonly parameters = { - args: { - target: { - description: 'The schema.action to tune, e.g. "excel.addRow".', - type: "string", - }, - keywords: { - description: "One or more keywords to remove.", - type: "string", - multiple: true, - }, - }, - } as const; - - public async run( - context: ActionContext, - params: ParsedCommandParams, - ) { - const target = parseTarget(params.args.target); + const target = parseTarget(tokens[0]); if (target === undefined) { displayWarn( - `Invalid target "${params.args.target}". Expected schema.action.`, + `Invalid target "${tokens[0]}". Expected schema.action, e.g. "list.addItems".`, context, ); return; } - context.sessionContext.agentContext.contextSelectorSidecar.removeKeywords( - target.id, - params.args.keywords, - ); - showEffective(context, target); - } -} - -class CollisionKeywordsClearCommandHandler implements CommandHandler { - public readonly description = - "Clear all overrides for a schema.action (revert to derived-only)"; - public readonly parameters = { - args: { - target: { - description: 'The schema.action to reset, e.g. "excel.addRow".', - type: "string", - }, - }, - } as const; + const verb = (tokens[1] ?? "list").toLowerCase(); + const keywords = tokens.slice(2); + const sidecar = + context.sessionContext.agentContext.contextSelectorSidecar; - public async run( - context: ActionContext, - params: ParsedCommandParams, - ) { - const target = parseTarget(params.args.target); - if (target === undefined) { - displayWarn( - `Invalid target "${params.args.target}". Expected schema.action.`, - context, - ); - return; + switch (verb) { + case "list": + case "show": + showEffective(context, target); + return; + case "add": + if (keywords.length === 0) { + displayWarn( + `Provide keywords to add, e.g. "@collision keywords ${target.id} add grocery shopping".`, + context, + ); + return; + } + sidecar.addKeywords(target.id, keywords); + showEffective(context, target); + return; + case "remove": + if (keywords.length === 0) { + displayWarn( + `Provide keywords to remove, e.g. "@collision keywords ${target.id} remove office".`, + context, + ); + return; + } + sidecar.removeKeywords(target.id, keywords); + showEffective(context, target); + return; + case "clear": { + const removed = sidecar.clearEntry(target.id); + displayResult( + removed + ? `Cleared overrides for ${target.id} (reverted to derived-only).` + : `No overrides for ${target.id}.`, + context, + ); + return; + } + default: + displayWarn( + `Unknown verb "${verb}". Use list, add, remove, or clear.`, + context, + ); + return; } - const removed = - context.sessionContext.agentContext.contextSelectorSidecar.clearEntry( - target.id, - ); - displayResult( - removed - ? `Cleared overrides for ${target.id} (reverted to derived-only).` - : `No overrides for ${target.id}.`, - context, - ); } } -export function getCollisionKeywordCommandHandlers(): CommandHandlerTable { - return { - description: - "Inspect and tune per-action keyword vectors used by the contextSelector tier", - defaultSubCommand: "list", - commands: { - list: new CollisionKeywordsListCommandHandler(), - add: new CollisionKeywordsAddCommandHandler(), - remove: new CollisionKeywordsRemoveCommandHandler(), - clear: new CollisionKeywordsClearCommandHandler(), - }, - }; +export function getCollisionKeywordCommandHandlers(): CommandHandler { + return new CollisionKeywordsCommandHandler(); }