diff --git a/apps/elf-api/src/routes.rs b/apps/elf-api/src/routes.rs
index 421f0488..2f6e6516 100644
--- a/apps/elf-api/src/routes.rs
+++ b/apps/elf-api/src/routes.rs
@@ -2960,9 +2960,21 @@ mod tests {
assert_eq!(ADMIN_VIEWER_PATH, "/viewer");
assert!(html.contains("/v2/admin/searches"));
assert!(html.contains("/v2/admin/traces/recent"));
+ assert!(html.contains("/v2/admin/traces/${encodeURIComponent(traceId)}/bundle"));
assert!(html.contains("/v2/admin/notes/"));
+ assert!(html.contains("mode: \"full\""));
+ assert!(html.contains("candidates_limit: 200"));
+ assert!(html.contains("Replay Candidates"));
+ assert!(html.contains("Selected Final Results"));
+ assert!(html.contains("Providers And Ranking"));
+ assert!(html.contains("Relation Context"));
+ assert!(html.contains("directTraceId"));
assert!(!html.contains("method: \"PATCH\""));
+ assert!(!html.contains("method: \"PUT\""));
assert!(!html.contains("method: \"DELETE\""));
+ assert!(!html.contains("/v2/notes/ingest"));
+ assert!(!html.contains("/v2/events/ingest"));
+ assert!(!html.contains("/publish"));
}
#[test]
diff --git a/apps/elf-api/static/viewer.html b/apps/elf-api/static/viewer.html
index 0bf852d2..f25cb956 100644
--- a/apps/elf-api/static/viewer.html
+++ b/apps/elf-api/static/viewer.html
@@ -419,6 +419,69 @@
gap: 8px;
}
+ .metrics {
+ display: grid;
+ gap: 8px;
+ grid-template-columns: repeat(4, minmax(0, 1fr));
+ }
+
+ .metric {
+ background: var(--surface-alt);
+ border: 1px solid var(--line);
+ border-radius: 8px;
+ display: grid;
+ gap: 3px;
+ min-width: 0;
+ padding: 9px;
+ }
+
+ .metric-label {
+ color: var(--muted);
+ font-size: 11px;
+ font-weight: 800;
+ text-transform: uppercase;
+ }
+
+ .metric-value {
+ font-size: 16px;
+ font-weight: 750;
+ overflow-wrap: anywhere;
+ }
+
+ .table-wrap {
+ border: 1px solid var(--line);
+ border-radius: 8px;
+ overflow: auto;
+ }
+
+ table {
+ border-collapse: collapse;
+ min-width: 100%;
+ }
+
+ th,
+ td {
+ border-bottom: 1px solid var(--line);
+ padding: 7px 8px;
+ text-align: left;
+ vertical-align: top;
+ white-space: nowrap;
+ }
+
+ th {
+ background: var(--surface-alt);
+ color: var(--muted);
+ font-size: 11px;
+ font-weight: 800;
+ text-transform: uppercase;
+ }
+
+ td.wrap {
+ max-width: 360px;
+ white-space: normal;
+ overflow-wrap: anywhere;
+ }
+
@media (max-width: 980px) {
.app {
grid-template-columns: 1fr;
@@ -431,7 +494,8 @@
.grid-2,
.grid-3,
- .form-row {
+ .form-row,
+ .metrics {
grid-template-columns: 1fr;
}
@@ -653,6 +717,14 @@
Recent Traces
+
@@ -675,7 +747,8 @@
Recent Traces
activeTab: "searchView",
session: null,
selectedNoteId: null,
- traceBundle: null
+ traceBundle: null,
+ traceMetrics: {}
};
const $ = (selector, root = document) => root.querySelector(selector);
@@ -751,6 +824,20 @@ Recent Traces
return value.toFixed(4);
}
+ function ms(value) {
+ return typeof value === "number" ? `${value.toFixed(1)} ms` : "none";
+ }
+
+ function recordTraceMetric(traceId, key, value) {
+ if (!traceId || typeof value !== "number") {
+ return;
+ }
+ state.traceMetrics[traceId] = {
+ ...(state.traceMetrics[traceId] || {}),
+ [key]: value
+ };
+ }
+
function chip(text, variant = "") {
return make("span", { className: `chip ${variant}`.trim(), text: String(text) });
}
@@ -770,6 +857,62 @@ Recent Traces
return make("div", { className: "kv" }, rows);
}
+ function metricGrid(pairs) {
+ return make("div", { className: "metrics" }, pairs.map(([label, value]) => {
+ return make("div", { className: "metric" }, [
+ make("div", { className: "metric-label", text: label }),
+ make("div", { className: "metric-value", text: value === undefined || value === null || value === "" ? "none" : String(value) })
+ ]);
+ }));
+ }
+
+ function table(headers, rows) {
+ const head = make("thead", {}, [
+ make("tr", {}, headers.map((header) => make("th", { text: header })))
+ ]);
+ const body = make("tbody", {}, rows.map((row) => {
+ return make("tr", {}, row.map((cell) => {
+ const value = cell && typeof cell === "object" && "value" in cell ? cell.value : cell;
+ const className = cell && typeof cell === "object" && cell.wrap ? "wrap" : "";
+ return make("td", { className, text: value === undefined || value === null || value === "" ? "none" : String(value) });
+ }));
+ }));
+ return make("div", { className: "table-wrap" }, [make("table", {}, [head, body])]);
+ }
+
+ function section(title, children) {
+ return make("div", { className: "row" }, [
+ make("div", { className: "row-head" }, [make("div", { className: "title", text: title })]),
+ ...children
+ ]);
+ }
+
+ function getPath(value, path) {
+ return path.reduce((current, key) => {
+ if (current && typeof current === "object" && key in current) {
+ return current[key];
+ }
+ return undefined;
+ }, value);
+ }
+
+ function stageByName(bundle, name) {
+ return (bundle.stages || []).find((stage) => stage.stage_name === name);
+ }
+
+ function termValue(item, name) {
+ const terms = getPath(item, ["explain", "ranking", "terms"]) || [];
+ const term = terms.find((candidate) => candidate.name === name);
+ return term ? score(term.value) : "none";
+ }
+
+ function relationContexts(items) {
+ return (items || []).flatMap((item) => {
+ const contexts = getPath(item, ["explain", "relation_context"]) || [];
+ return contexts.map((context) => ({ item, context }));
+ });
+ }
+
function loadContext() {
const saved = JSON.parse(localStorage.getItem("elf.viewer.context") || "{}");
if (saved.tenantId) $("#tenantId").value = saved.tenantId;
@@ -906,10 +1049,12 @@ Recent Traces
payload_level: $("#searchPayloadLevel").value
};
try {
+ const started = performance.now();
const session = await api("/v2/admin/searches", {
method: "POST",
body: JSON.stringify(body)
});
+ recordTraceMetric(session.trace_id, "search_request_ms", performance.now() - started);
state.selectedNoteId = session.items && session.items[0] ? session.items[0].note_id : null;
renderSearchSession(session);
$("#loadSearchId").value = session.search_id;
@@ -934,7 +1079,9 @@ Recent Traces
}
setStatus("Loading session...");
try {
+ const started = performance.now();
const session = await api(`/v2/admin/searches/${encodeURIComponent(searchId)}${queryString({ top_k: $("#topK").value || 12, touch: "true" })}`);
+ recordTraceMetric(session.trace_id, "session_readback_ms", performance.now() - started);
state.selectedNoteId = session.items && session.items[0] ? session.items[0].note_id : null;
renderSearchSession(session);
await Promise.all([
@@ -1126,14 +1273,28 @@ Recent Traces
}
}
+ async function loadTraceById() {
+ const traceId = $("#directTraceId").value.trim();
+ if (!traceId) {
+ setStatus("Trace ID is required.", true);
+ return;
+ }
+ await loadTraceBundle(traceId, $("#traceBundleDetail"));
+ showTab("tracesView");
+ }
+
async function loadTraceBundle(traceId, target) {
if (!traceId) {
return;
}
const detailTarget = target || $("#traceBundleDetail");
+ $("#directTraceId").value = traceId;
detailTarget.replaceChildren(empty("Loading trace..."));
try {
- const bundle = await api(`/v2/admin/traces/${encodeURIComponent(traceId)}/bundle${queryString({ mode: "bounded", stage_items_limit: 64, candidates_limit: 0 })}`);
+ const started = performance.now();
+ const bundle = await api(`/v2/admin/traces/${encodeURIComponent(traceId)}/bundle${queryString({ mode: "full", stage_items_limit: 128, candidates_limit: 200 })}`);
+ recordTraceMetric(traceId, "trace_readback_ms", performance.now() - started);
+ bundle.viewer_metrics = state.traceMetrics[traceId] || {};
renderTraceBundle(detailTarget, bundle);
if (detailTarget === $("#traceDetail")) {
state.traceBundle = bundle;
@@ -1150,53 +1311,192 @@ Recent Traces
return;
}
const trace = bundle.trace;
- const items = bundle.items || [];
+ const metrics = bundle.viewer_metrics || {};
const stages = bundle.stages || [];
+ const candidates = bundle.candidates || [];
+ const recall = stageByName(bundle, "recall.candidates");
+ const fusion = stageByName(bundle, "fusion.merge");
+ const rerank = stageByName(bundle, "rerank.score");
+ const finalStage = stageByName(bundle, "selection.final");
target.replaceChildren(
- kvTable([
- ["trace_id", trace.trace_id],
- ["query", trace.query],
- ["agent", trace.agent_id],
- ["read_profile", trace.read_profile],
- ["expansion_mode", trace.expansion_mode],
- ["candidate_count", trace.candidate_count],
- ["top_k", trace.top_k],
- ["created_at", dateText(trace.created_at)],
- ["trace_version", trace.trace_version]
- ]),
- make("div", { className: "split-stack", style: "margin-top: 12px;" }, [
- make("div", { className: "title", text: "Expanded queries" }),
- make("div", { className: "chips" }, (trace.expanded_queries || []).map((query) => chip(query, "teal"))),
- make("div", { className: "title", text: "Config snapshot" }),
- pre(trace.config_snapshot || {}),
- make("div", { className: "title", text: "Items" }),
- items.length ? make("div", { className: "list" }, items.map(traceItemRow)) : empty("No trace items."),
- make("div", { className: "title", text: "Stages" }),
- stages.length ? make("div", { className: "list" }, stages.map(stageRow)) : empty("No stages.")
+ make("div", { className: "split-stack" }, [
+ metricGrid([
+ ["Candidates", trace.candidate_count],
+ ["Replay Rows", candidates.length],
+ ["Final Results", (bundle.items || []).length],
+ ["Top K", trace.top_k],
+ ["Search Latency", ms(metrics.search_request_ms)],
+ ["Session Readback", ms(metrics.session_readback_ms)],
+ ["Trace Readback", ms(metrics.trace_readback_ms)],
+ ["Trace Age", trace.created_at ? `${Math.max(0, (Date.now() - new Date(trace.created_at).getTime()) / 1000).toFixed(0)}s` : "none"]
+ ]),
+ section("Trace", [
+ kvTable([
+ ["trace_id", trace.trace_id],
+ ["query", trace.query],
+ ["agent", trace.agent_id],
+ ["read_profile", trace.read_profile],
+ ["expansion_mode", trace.expansion_mode],
+ ["allowed_scopes", (trace.allowed_scopes || []).join(", ")],
+ ["created_at", dateText(trace.created_at)],
+ ["generated_at", dateText(bundle.generated_at)],
+ ["trace_version", trace.trace_version]
+ ]),
+ make("div", { className: "chips" }, (trace.expanded_queries || []).map((query) => chip(query, "teal")))
+ ]),
+ renderProviderSection(trace),
+ renderStageSummarySection(stages),
+ section("Retrieval Funnel", [
+ metricGrid([
+ ["Recall Before Filter", getPath(recall, ["stage_payload", "stats", "candidate_count_before_filter"]) ?? "none"],
+ ["Recall After Filter", getPath(recall, ["stage_payload", "stats", "candidate_count_after_filter"]) ?? "none"],
+ ["Fusion Scored", getPath(fusion, ["stage_payload", "stats", "scored_count"]) ?? "none"],
+ ["Reranked", getPath(rerank, ["stage_payload", "stats", "reranked_count"]) ?? "none"],
+ ["Selected", getPath(finalStage, ["stage_payload", "stats", "selected_count"]) ?? "none"],
+ ["Snippets", getPath(recall, ["stage_payload", "stats", "snippet_count"]) ?? "none"],
+ ["Fusion Weight", getPath(fusion, ["stage_payload", "decisions", "fusion_weight"]) ?? "none"],
+ ["Structured Weight", getPath(fusion, ["stage_payload", "decisions", "structured_field_weight"]) ?? "none"]
+ ])
+ ]),
+ renderFinalResultsSection(bundle),
+ renderCandidateSection(bundle),
+ renderRelationContextSection(bundle.items || []),
+ renderStageDetailsSection(stages)
])
);
}
- function traceItemRow(item) {
- const terms = item.explain && item.explain.ranking ? item.explain.ranking.terms || [] : [];
- const termChips = terms.slice(0, 6).map((term) => chip(`${term.name}: ${score(term.value)}`));
- return make("div", { className: "row trace-item" }, [
- make("div", { className: "row-head" }, [
- make("div", { className: "title", text: `Rank ${item.rank} | ${item.note_id}` }),
- chip(item.result_handle, "indigo")
- ]),
- make("div", { className: "chips" }, termChips),
- pre(item.explain || {})
+ function renderProviderSection(trace) {
+ const cfg = trace.config_snapshot || {};
+ const embedding = getPath(cfg, ["providers", "embedding"]) || {};
+ const rerank = getPath(cfg, ["providers", "rerank"]) || {};
+ const qdrant = getPath(cfg, ["storage", "qdrant"]) || {};
+ const ranking = cfg.ranking || {};
+ const blend = ranking.blend || {};
+ const diversity = ranking.diversity || {};
+ const retrievalSources = ranking.retrieval_sources || {};
+ return section("Providers And Ranking", [
+ kvTable([
+ ["retrieval_channels", "dense + bm25 via Qdrant fusion"],
+ ["embedding", `${embedding.provider_id || "none"} / ${embedding.model || "none"} / ${embedding.dimensions || "none"} dims`],
+ ["rerank", `${rerank.provider_id || "none"} / ${rerank.model || "none"}`],
+ ["qdrant", `${qdrant.collection || "none"} / vector_dim ${qdrant.vector_dim || "none"}`],
+ ["policy_id", ranking.policy_id],
+ ["blend", `enabled ${blend.enabled ?? "none"} / ${blend.retrieval_normalization || "none"} -> ${blend.rerank_normalization || "none"}`],
+ ["diversity", `enabled ${diversity.enabled ?? "none"} / sim ${diversity.sim_threshold ?? "none"}`],
+ ["source_weights", `fusion ${retrievalSources.fusion_weight ?? "none"} / structured ${retrievalSources.structured_field_weight ?? "none"} / recursive ${retrievalSources.recursive_weight ?? "none"}`],
+ ["override", ranking.override ? "present" : "none"]
+ ])
]);
}
- function stageRow(stage) {
- return make("div", { className: "row" }, [
- make("div", { className: "row-head" }, [
- make("div", { className: "title", text: `${stage.stage_order}. ${stage.stage_name}` }),
- chip(`${(stage.items || []).length} items`, "teal")
- ]),
- pre(stage.stage_payload || stage)
+ function renderStageSummarySection(stages) {
+ if (!stages.length) {
+ return section("Stage Summary", [empty("No stages.")]);
+ }
+ return section("Stage Summary", [
+ table(
+ ["Stage", "Items", "Stats", "Decisions"],
+ stages.map((stage) => [
+ `${stage.stage_order}. ${stage.stage_name}`,
+ (stage.items || []).length,
+ { value: formatJson(getPath(stage, ["stage_payload", "stats"]) || {}), wrap: true },
+ { value: formatJson(getPath(stage, ["stage_payload", "decisions"]) || {}), wrap: true }
+ ])
+ )
+ ]);
+ }
+
+ function renderFinalResultsSection(bundle) {
+ const items = bundle.items || [];
+ if (!items.length) {
+ return section("Selected Final Results", [empty("No final items.")]);
+ }
+ return section("Selected Final Results", [
+ table(
+ ["Rank", "Note", "Chunk", "Final", "Retrieval", "Rerank", "Tie", "Scope Boost", "Relations", "Handle"],
+ items.map((item) => [
+ item.rank,
+ item.note_id,
+ item.chunk_id || "none",
+ score(getPath(item, ["explain", "ranking", "final_score"])),
+ termValue(item, "blend.retrieval"),
+ termValue(item, "blend.rerank"),
+ termValue(item, "tie_breaker"),
+ termValue(item, "context.scope_boost"),
+ (getPath(item, ["explain", "relation_context"]) || []).length,
+ item.result_handle
+ ])
+ )
+ ]);
+ }
+
+ function renderCandidateSection(bundle) {
+ const candidates = bundle.candidates || [];
+ if (!candidates.length) {
+ return section("Replay Candidates", [empty("No persisted candidate snapshots.")]);
+ }
+ const finalRanks = new Map((bundle.items || []).map((item) => [item.note_id, item.rank]));
+ return section("Replay Candidates", [
+ table(
+ ["Retrieval Rank", "Final Rank", "Note", "Chunk", "Retrieval Score", "Rerank Score", "Scope", "Importance", "Updated", "Snippet"],
+ candidates.map((candidate) => [
+ candidate.retrieval_rank,
+ finalRanks.get(candidate.note_id) || "-",
+ candidate.note_id,
+ candidate.chunk_id,
+ score(candidate.retrieval_score),
+ score(candidate.rerank_score),
+ candidate.note_scope,
+ score(candidate.note_importance),
+ dateText(candidate.note_updated_at),
+ { value: candidate.snippet, wrap: true }
+ ])
+ )
+ ]);
+ }
+
+ function renderRelationContextSection(items) {
+ const relations = relationContexts(items);
+ if (!relations.length) {
+ return section("Relation Context", [empty("No relation context attached to selected results.")]);
+ }
+ return section("Relation Context", [
+ table(
+ ["Rank", "Scope", "Subject", "Predicate", "Object", "Evidence Notes"],
+ relations.map(({ item, context }) => [
+ item.rank,
+ context.scope,
+ getPath(context, ["subject", "canonical"]) || "none",
+ context.predicate,
+ getPath(context, ["object", "entity", "canonical"]) || getPath(context, ["object", "value"]) || "none",
+ (context.evidence_note_ids || []).join(", ")
+ ])
+ )
+ ]);
+ }
+
+ function renderStageDetailsSection(stages) {
+ if (!stages.length) {
+ return section("Stage Details", [empty("No stage details.")]);
+ }
+ return section("Stage Details", [
+ make("div", { className: "list" }, stages.map((stage) => {
+ const rows = (stage.items || []).map((item) => [
+ item.note_id || "none",
+ item.chunk_id || "none",
+ item.item_id || "none",
+ { value: formatJson(item.metrics || {}), wrap: true }
+ ]);
+ return make("div", { className: "row" }, [
+ make("div", { className: "row-head" }, [
+ make("div", { className: "title", text: `${stage.stage_order}. ${stage.stage_name}` }),
+ chip(`${(stage.items || []).length} items`, "teal")
+ ]),
+ pre(stage.stage_payload || {}),
+ rows.length ? table(["Note", "Chunk", "Item", "Metrics"], rows) : empty("No stage items.")
+ ]);
+ }))
]);
}
@@ -1231,6 +1531,7 @@ Recent Traces
$("#loadTimelineButton").addEventListener("click", loadTimeline);
$("#loadNotesButton").addEventListener("click", loadNotes);
$("#loadTracesButton").addEventListener("click", loadRecentTraces);
+ $("#loadTraceByIdButton").addEventListener("click", loadTraceById);
$("#refreshActive").addEventListener("click", refreshActive);
}
diff --git a/apps/elf-eval/src/app.rs b/apps/elf-eval/src/app.rs
index 94bd819d..b5234bc9 100644
--- a/apps/elf-eval/src/app.rs
+++ b/apps/elf-eval/src/app.rs
@@ -916,6 +916,7 @@ fn decode_trace_replay_candidates(
chunk_index: row.chunk_index,
snippet: row.snippet,
retrieval_rank: u32::try_from(row.retrieval_rank).unwrap_or(0),
+ retrieval_score: None,
rerank_score: row.rerank_score,
note_scope: row.note_scope,
note_importance: row.note_importance,
@@ -1481,6 +1482,7 @@ mod tests {
chunk_index: 0,
snippet: "a".to_string(),
retrieval_rank: 1,
+ retrieval_score: None,
rerank_score: 0.1,
note_scope: "project_shared".to_string(),
note_importance: 0.1,
@@ -1502,6 +1504,7 @@ mod tests {
chunk_index: 1,
snippet: "a".to_string(),
retrieval_rank: 2,
+ retrieval_score: None,
rerank_score: 0.2,
note_scope: "project_shared".to_string(),
note_importance: 0.1,
@@ -1523,6 +1526,7 @@ mod tests {
chunk_index: 0,
snippet: "b".to_string(),
retrieval_rank: 3,
+ retrieval_score: None,
rerank_score: 0.3,
note_scope: "org_shared".to_string(),
note_importance: 0.1,
@@ -1544,6 +1548,7 @@ mod tests {
chunk_index: 0,
snippet: "c".to_string(),
retrieval_rank: 4,
+ retrieval_score: None,
rerank_score: 0.4,
note_scope: "org_shared".to_string(),
note_importance: 0.1,
diff --git a/apps/elf-eval/src/bin/live_baseline_elf.rs b/apps/elf-eval/src/bin/live_baseline_elf.rs
index 18ec7ba0..82703ad8 100644
--- a/apps/elf-eval/src/bin/live_baseline_elf.rs
+++ b/apps/elf-eval/src/bin/live_baseline_elf.rs
@@ -290,6 +290,7 @@ struct CheckResult {
struct QueryResult {
id: String,
task: Option,
+ trace_id: Uuid,
query: String,
expected_doc: String,
allowed_alternate_docs: Vec,
@@ -1924,6 +1925,7 @@ async fn run_single_query(
Ok(QueryResult {
id: case.id,
task: case.task,
+ trace_id: response.trace_id,
query: case.query,
expected_doc: case.expected_doc,
allowed_alternate_docs: case.allowed_alternate_docs,
diff --git a/apps/elf-eval/src/bin/trace_regression_gate.rs b/apps/elf-eval/src/bin/trace_regression_gate.rs
index 44dd93e4..54716bf7 100644
--- a/apps/elf-eval/src/bin/trace_regression_gate.rs
+++ b/apps/elf-eval/src/bin/trace_regression_gate.rs
@@ -191,6 +191,7 @@ fn decode_trace_replay_candidates(
chunk_index: row.chunk_index,
snippet: row.snippet,
retrieval_rank: u32::try_from(row.retrieval_rank).unwrap_or(0),
+ retrieval_score: None,
rerank_score: row.rerank_score,
note_scope: row.note_scope,
note_importance: row.note_importance,
diff --git a/docs/guide/benchmarking/2026-06-09-live-baseline-report.md b/docs/guide/benchmarking/2026-06-09-live-baseline-report.md
index ed94704f..78df93bb 100644
--- a/docs/guide/benchmarking/2026-06-09-live-baseline-report.md
+++ b/docs/guide/benchmarking/2026-06-09-live-baseline-report.md
@@ -152,6 +152,13 @@ The benchmark is intentionally stricter than a feature checklist. It exercises w
project can ingest the same corpus, return expected evidence for the same queries, and
preserve basic lifecycle behavior under the runner's encoded contract.
+## Retrieval Observability
+
+Generated live-baseline reports include per-query ELF trace IDs when the ELF service
+path runs. Open the admin viewer at `/viewer`, paste a trace ID into the Traces panel,
+and inspect the full trace bundle to compare candidates, fusion/rerank terms, relation
+context, provider metadata, and selected final results without raw SQL.
+
ELF checks covered in this run:
- production-provider embeddings through the same service path used by ELF;
diff --git a/docs/guide/benchmarking/live_baseline_benchmark.md b/docs/guide/benchmarking/live_baseline_benchmark.md
index 05108f19..e6995f00 100644
--- a/docs/guide/benchmarking/live_baseline_benchmark.md
+++ b/docs/guide/benchmarking/live_baseline_benchmark.md
@@ -204,8 +204,11 @@ synthetic or private production-corpus results. Each project record includes
that distinguishes real, mocked, unsupported, blocked, incomplete, and not-encoded
behavior surfaces. ELF project records also include an `embedding` summary so
deterministic local and production-provider runs are not confused. ELF query records
-include task, expected evidence IDs, allowed alternate evidence IDs, top evidence ID,
-wrong-result count, and per-query latency. Each project record also includes
+include task, trace ID, expected evidence IDs, allowed alternate evidence IDs, top
+evidence ID, wrong-result count, and per-query latency. Each ELF trace ID can be opened
+from the admin viewer at `/viewer` by loading it in the Traces panel; the full trace
+bundle shows stage-level candidates, rerank terms, relation context, and provider
+metadata without raw SQL. Each project record also includes
`backfill` evidence with source count, completed count, batch size, worker
concurrency, resume state, duplicate-source count, and backfill elapsed seconds. Each
project record also includes `checks` and `check_summary`; the aggregate
diff --git a/packages/elf-service/src/search.rs b/packages/elf-service/src/search.rs
index 4fbbc268..1325c00e 100644
--- a/packages/elf-service/src/search.rs
+++ b/packages/elf-service/src/search.rs
@@ -974,6 +974,9 @@ pub struct TraceReplayCandidate {
pub snippet: String,
/// 1-based retrieval rank.
pub retrieval_rank: u32,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ /// Optional merged retrieval score captured before rerank.
+ pub retrieval_score: Option,
/// Raw rerank-model score.
pub rerank_score: f32,
/// Scope key for the note.
@@ -1123,6 +1126,7 @@ struct ChunkCandidate {
note_id: Uuid,
chunk_index: i32,
retrieval_rank: u32,
+ retrieval_score: Option,
scope: Option,
updated_at: Option,
embedding_version: Option,
@@ -1277,6 +1281,7 @@ struct ChunkSnippet {
chunk: ChunkMeta,
snippet: String,
retrieval_rank: u32,
+ retrieval_score: Option,
}
#[derive(Clone, Debug, Deserialize, Serialize)]
@@ -4332,6 +4337,7 @@ ORDER BY c.note_id ASC, e.vec <=> $3::text::vector ASC",
chunk,
snippet,
retrieval_rank: candidate.retrieval_rank,
+ retrieval_score: candidate.retrieval_score,
});
}
@@ -5110,6 +5116,7 @@ fn build_trace_candidate_record(
chunk_index: scored_chunk.item.chunk.chunk_index,
snippet: scored_chunk.item.snippet.clone(),
retrieval_rank: scored_chunk.item.retrieval_rank,
+ retrieval_score: scored_chunk.item.retrieval_score,
rerank_score: scored_chunk.rerank_score,
note_scope: note.scope.clone(),
note_importance: note.importance,
@@ -5298,6 +5305,7 @@ fn build_structured_field_candidates(
note_id,
chunk_index: *chunk_index,
retrieval_rank: next_rank,
+ retrieval_score: None,
scope: None,
updated_at: None,
embedding_version: Some(embed_version.to_string()),
@@ -6574,6 +6582,7 @@ mod tests {
note_id,
chunk_index: 0,
retrieval_rank,
+ retrieval_score: None,
scope: None,
updated_at: None,
embedding_version: Some("v1".to_string()),
@@ -6636,6 +6645,7 @@ mod tests {
note_id: shared_note_id,
chunk_index: 0,
retrieval_rank: 9,
+ retrieval_score: None,
scope: None,
updated_at: None,
embedding_version: Some("v1".to_string()),
@@ -6645,6 +6655,7 @@ mod tests {
note_id: fusion_only_note_id,
chunk_index: 0,
retrieval_rank: 1,
+ retrieval_score: None,
scope: None,
updated_at: None,
embedding_version: Some("v1".to_string()),
@@ -6655,6 +6666,7 @@ mod tests {
note_id: shared_note_id,
chunk_index: 0,
retrieval_rank: 1,
+ retrieval_score: None,
scope: None,
updated_at: None,
embedding_version: Some("v1".to_string()),
@@ -6804,8 +6816,13 @@ mod tests {
};
let chunk =
ChunkMeta { chunk_id: Uuid::new_v4(), chunk_index: 0, start_offset: 0, end_offset: 10 };
- let item =
- ChunkSnippet { note, chunk, snippet: "deploy steps".to_string(), retrieval_rank: 1 };
+ let item = ChunkSnippet {
+ note,
+ chunk,
+ snippet: "deploy steps".to_string(),
+ retrieval_rank: 1,
+ retrieval_score: None,
+ };
let mut scored = ScoredChunk {
item,
final_score: 1.0,
@@ -6881,8 +6898,13 @@ mod tests {
};
let chunk =
ChunkMeta { chunk_id: Uuid::new_v4(), chunk_index: 0, start_offset: 0, end_offset: 10 };
- let item =
- ChunkSnippet { note, chunk, snippet: "deploy steps".to_string(), retrieval_rank: 1 };
+ let item = ChunkSnippet {
+ note,
+ chunk,
+ snippet: "deploy steps".to_string(),
+ retrieval_rank: 1,
+ retrieval_score: None,
+ };
let mut scored = ScoredChunk {
item,
final_score: 1.0,
@@ -6967,6 +6989,7 @@ mod tests {
chunk,
snippet: format!("snippet-{retrieval_rank}"),
retrieval_rank,
+ retrieval_score: None,
};
ScoredChunk {
@@ -7063,6 +7086,7 @@ mod tests {
chunk_index: 0,
snippet: "first".to_string(),
retrieval_rank: 2,
+ retrieval_score: None,
rerank_score: 0.2,
note_scope: "project_shared".to_string(),
note_importance: 0.1,
@@ -7084,6 +7108,7 @@ mod tests {
chunk_index: 1,
snippet: "second".to_string(),
retrieval_rank: 1,
+ retrieval_score: None,
rerank_score: 0.3,
note_scope: "project_shared".to_string(),
note_importance: 0.1,
@@ -7186,6 +7211,7 @@ mod tests {
chunk_index: 0,
snippet: "deployment steps".to_string(),
retrieval_rank: 1,
+ retrieval_score: None,
rerank_score: 0.1,
note_scope: "project_shared".to_string(),
note_importance: 0.1,
@@ -7207,6 +7233,7 @@ mod tests {
chunk_index: 0,
snippet: "deployment steps".to_string(),
retrieval_rank: 2,
+ retrieval_score: None,
rerank_score: 0.9,
note_scope: "project_shared".to_string(),
note_importance: 0.1,
@@ -7228,6 +7255,7 @@ mod tests {
chunk_index: 0,
snippet: "deployment steps".to_string(),
retrieval_rank: 3,
+ retrieval_score: None,
rerank_score: 0.2,
note_scope: "org_shared".to_string(),
note_importance: 0.1,
diff --git a/packages/elf-service/src/search/filter.rs b/packages/elf-service/src/search/filter.rs
index 2c0adfaa..7e94077e 100644
--- a/packages/elf-service/src/search/filter.rs
+++ b/packages/elf-service/src/search/filter.rs
@@ -1026,6 +1026,7 @@ mod tests {
chunk_id: Uuid::new_v4(),
chunk_index: 0,
retrieval_rank: 1,
+ retrieval_score: None,
scope: Some("project_shared".to_string()),
updated_at: None,
embedding_version: None,
@@ -1092,6 +1093,7 @@ mod tests {
chunk_id: Uuid::new_v4(),
chunk_index: 0,
retrieval_rank: 1,
+ retrieval_score: None,
scope: None,
updated_at: None,
embedding_version: None,
@@ -1101,6 +1103,7 @@ mod tests {
chunk_id: Uuid::new_v4(),
chunk_index: 1,
retrieval_rank: 2,
+ retrieval_score: None,
scope: None,
updated_at: None,
embedding_version: None,
@@ -1110,6 +1113,7 @@ mod tests {
chunk_id: Uuid::new_v4(),
chunk_index: 2,
retrieval_rank: 3,
+ retrieval_score: None,
scope: None,
updated_at: None,
embedding_version: None,
diff --git a/packages/elf-service/src/search/ranking/retrieval.rs b/packages/elf-service/src/search/ranking/retrieval.rs
index 776b0642..1f7d826f 100644
--- a/packages/elf-service/src/search/ranking/retrieval.rs
+++ b/packages/elf-service/src/search/ranking/retrieval.rs
@@ -60,6 +60,7 @@ pub fn collect_chunk_candidates(
note_id,
chunk_index,
retrieval_rank: idx as u32 + 1,
+ retrieval_score: Some(point.score),
updated_at,
embedding_version,
scope,
@@ -205,6 +206,7 @@ pub fn merge_retrieval_candidates(
for (idx, mut candidate) in merged.into_iter().take(candidate_k as usize).enumerate() {
candidate.candidate.retrieval_rank = idx as u32 + 1;
+ candidate.candidate.retrieval_score = Some(candidate.combined_score);
out.push(candidate.candidate);
}
diff --git a/packages/elf-service/tests/acceptance/trace_admin_observability.rs b/packages/elf-service/tests/acceptance/trace_admin_observability.rs
index 52fcc839..30453fe9 100644
--- a/packages/elf-service/tests/acceptance/trace_admin_observability.rs
+++ b/packages/elf-service/tests/acceptance/trace_admin_observability.rs
@@ -284,6 +284,7 @@ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16)",
chunk_index: rank,
snippet: "trace candidate snippet".to_string(),
retrieval_rank: retrieval_rank as u32,
+ retrieval_score: Some(retrieval_score),
rerank_score: retrieval_score,
note_scope: "agent_private".to_string(),
note_importance: 0.6,
@@ -541,6 +542,11 @@ async fn trace_bundle_truncation_and_candidate_limits() {
assert_eq!(candidates[0].retrieval_rank, 1);
assert_eq!(candidates[1].retrieval_rank, 2);
+ assert!(
+ candidates[0].retrieval_score.is_some_and(|score| (score - 0.8_f32).abs() < 1e-6),
+ "Unexpected retrieval_score: {:?}",
+ candidates[0].retrieval_score
+ );
assert!(candidates[0].rerank_score >= candidates[1].rerank_score);
test_db.cleanup().await.expect("Failed to cleanup test database.");
diff --git a/scripts/live-baseline-report-to-md.sh b/scripts/live-baseline-report-to-md.sh
index 411fe682..6b2605db 100755
--- a/scripts/live-baseline-report-to-md.sh
+++ b/scripts/live-baseline-report-to-md.sh
@@ -117,14 +117,15 @@ render_report() {
| if ($query_projects | length) > 0 then
"## Query Evidence",
"",
- "| Project | Query | Task | Expected Evidence | Allowed Alternates | Top Evidence | Matched | Latency |",
- "| --- | --- | --- | --- | --- | --- | --- | --- |",
+ "| Project | Query | Trace ID | Task | Expected Evidence | Allowed Alternates | Top Evidence | Matched | Latency |",
+ "| --- | --- | --- | --- | --- | --- | --- | --- | --- |",
(
$query_projects[]
| .project as $project
| .queries[]
| "| " + ($project | md)
+ " | `" + (.id | md) + "`"
+ + " | `" + ((.trace_id // "-") | md) + "`"
+ " | `" + ((.task // "-") | md) + "`"
+ " | `" + (((.expected_evidence_ids // []) | join(", ")) | md) + "`"
+ " | `" + (((.allowed_alternate_evidence_ids // []) | join(", ")) | md) + "`"