From 7a729efd7928fe976afbf92ff6fec8829ebfd516 Mon Sep 17 00:00:00 2001 From: Yvette Carlisle Date: Tue, 9 Jun 2026 15:25:53 +0800 Subject: [PATCH 1/2] {"schema":"decodex/commit/1","summary":"Add viewer trace observability panels","authority":"XY-27"} --- apps/elf-api/src/routes.rs | 12 + apps/elf-api/static/viewer.html | 383 ++++++++++++++++-- apps/elf-eval/src/app.rs | 5 + apps/elf-eval/src/bin/live_baseline_elf.rs | 2 + .../elf-eval/src/bin/trace_regression_gate.rs | 1 + .../2026-06-09-live-baseline-report.md | 7 + .../benchmarking/live_baseline_benchmark.md | 7 +- packages/elf-service/src/search.rs | 36 +- packages/elf-service/src/search/filter.rs | 4 + .../src/search/ranking/retrieval.rs | 2 + .../acceptance/trace_admin_observability.rs | 2 + scripts/live-baseline-report-to-md.sh | 5 +- 12 files changed, 417 insertions(+), 49 deletions(-) diff --git a/apps/elf-api/src/routes.rs b/apps/elf-api/src/routes.rs index 421f0488..2f6e6516 100644 --- a/apps/elf-api/src/routes.rs +++ b/apps/elf-api/src/routes.rs @@ -2960,9 +2960,21 @@ mod tests { assert_eq!(ADMIN_VIEWER_PATH, "/viewer"); assert!(html.contains("/v2/admin/searches")); assert!(html.contains("/v2/admin/traces/recent")); + assert!(html.contains("/v2/admin/traces/${encodeURIComponent(traceId)}/bundle")); assert!(html.contains("/v2/admin/notes/")); + assert!(html.contains("mode: \"full\"")); + assert!(html.contains("candidates_limit: 200")); + assert!(html.contains("Replay Candidates")); + assert!(html.contains("Selected Final Results")); + assert!(html.contains("Providers And Ranking")); + assert!(html.contains("Relation Context")); + assert!(html.contains("directTraceId")); assert!(!html.contains("method: \"PATCH\"")); + assert!(!html.contains("method: \"PUT\"")); assert!(!html.contains("method: \"DELETE\"")); + assert!(!html.contains("/v2/notes/ingest")); + assert!(!html.contains("/v2/events/ingest")); + assert!(!html.contains("/publish")); } #[test] diff --git a/apps/elf-api/static/viewer.html b/apps/elf-api/static/viewer.html index 0bf852d2..f25cb956 100644 --- a/apps/elf-api/static/viewer.html +++ b/apps/elf-api/static/viewer.html @@ -419,6 +419,69 @@ gap: 8px; } + .metrics { + display: grid; + gap: 8px; + grid-template-columns: repeat(4, minmax(0, 1fr)); + } + + .metric { + background: var(--surface-alt); + border: 1px solid var(--line); + border-radius: 8px; + display: grid; + gap: 3px; + min-width: 0; + padding: 9px; + } + + .metric-label { + color: var(--muted); + font-size: 11px; + font-weight: 800; + text-transform: uppercase; + } + + .metric-value { + font-size: 16px; + font-weight: 750; + overflow-wrap: anywhere; + } + + .table-wrap { + border: 1px solid var(--line); + border-radius: 8px; + overflow: auto; + } + + table { + border-collapse: collapse; + min-width: 100%; + } + + th, + td { + border-bottom: 1px solid var(--line); + padding: 7px 8px; + text-align: left; + vertical-align: top; + white-space: nowrap; + } + + th { + background: var(--surface-alt); + color: var(--muted); + font-size: 11px; + font-weight: 800; + text-transform: uppercase; + } + + td.wrap { + max-width: 360px; + white-space: normal; + overflow-wrap: anywhere; + } + @media (max-width: 980px) { .app { grid-template-columns: 1fr; @@ -431,7 +494,8 @@ .grid-2, .grid-3, - .form-row { + .form-row, + .metrics { grid-template-columns: 1fr; } @@ -653,6 +717,14 @@

Recent Traces

+
+ +
+
+ +
@@ -675,7 +747,8 @@

Recent Traces

activeTab: "searchView", session: null, selectedNoteId: null, - traceBundle: null + traceBundle: null, + traceMetrics: {} }; const $ = (selector, root = document) => root.querySelector(selector); @@ -751,6 +824,20 @@

Recent Traces

return value.toFixed(4); } + function ms(value) { + return typeof value === "number" ? `${value.toFixed(1)} ms` : "none"; + } + + function recordTraceMetric(traceId, key, value) { + if (!traceId || typeof value !== "number") { + return; + } + state.traceMetrics[traceId] = { + ...(state.traceMetrics[traceId] || {}), + [key]: value + }; + } + function chip(text, variant = "") { return make("span", { className: `chip ${variant}`.trim(), text: String(text) }); } @@ -770,6 +857,62 @@

Recent Traces

return make("div", { className: "kv" }, rows); } + function metricGrid(pairs) { + return make("div", { className: "metrics" }, pairs.map(([label, value]) => { + return make("div", { className: "metric" }, [ + make("div", { className: "metric-label", text: label }), + make("div", { className: "metric-value", text: value === undefined || value === null || value === "" ? "none" : String(value) }) + ]); + })); + } + + function table(headers, rows) { + const head = make("thead", {}, [ + make("tr", {}, headers.map((header) => make("th", { text: header }))) + ]); + const body = make("tbody", {}, rows.map((row) => { + return make("tr", {}, row.map((cell) => { + const value = cell && typeof cell === "object" && "value" in cell ? cell.value : cell; + const className = cell && typeof cell === "object" && cell.wrap ? "wrap" : ""; + return make("td", { className, text: value === undefined || value === null || value === "" ? "none" : String(value) }); + })); + })); + return make("div", { className: "table-wrap" }, [make("table", {}, [head, body])]); + } + + function section(title, children) { + return make("div", { className: "row" }, [ + make("div", { className: "row-head" }, [make("div", { className: "title", text: title })]), + ...children + ]); + } + + function getPath(value, path) { + return path.reduce((current, key) => { + if (current && typeof current === "object" && key in current) { + return current[key]; + } + return undefined; + }, value); + } + + function stageByName(bundle, name) { + return (bundle.stages || []).find((stage) => stage.stage_name === name); + } + + function termValue(item, name) { + const terms = getPath(item, ["explain", "ranking", "terms"]) || []; + const term = terms.find((candidate) => candidate.name === name); + return term ? score(term.value) : "none"; + } + + function relationContexts(items) { + return (items || []).flatMap((item) => { + const contexts = getPath(item, ["explain", "relation_context"]) || []; + return contexts.map((context) => ({ item, context })); + }); + } + function loadContext() { const saved = JSON.parse(localStorage.getItem("elf.viewer.context") || "{}"); if (saved.tenantId) $("#tenantId").value = saved.tenantId; @@ -906,10 +1049,12 @@

Recent Traces

payload_level: $("#searchPayloadLevel").value }; try { + const started = performance.now(); const session = await api("/v2/admin/searches", { method: "POST", body: JSON.stringify(body) }); + recordTraceMetric(session.trace_id, "search_request_ms", performance.now() - started); state.selectedNoteId = session.items && session.items[0] ? session.items[0].note_id : null; renderSearchSession(session); $("#loadSearchId").value = session.search_id; @@ -934,7 +1079,9 @@

Recent Traces

} setStatus("Loading session..."); try { + const started = performance.now(); const session = await api(`/v2/admin/searches/${encodeURIComponent(searchId)}${queryString({ top_k: $("#topK").value || 12, touch: "true" })}`); + recordTraceMetric(session.trace_id, "session_readback_ms", performance.now() - started); state.selectedNoteId = session.items && session.items[0] ? session.items[0].note_id : null; renderSearchSession(session); await Promise.all([ @@ -1126,14 +1273,28 @@

Recent Traces

} } + async function loadTraceById() { + const traceId = $("#directTraceId").value.trim(); + if (!traceId) { + setStatus("Trace ID is required.", true); + return; + } + await loadTraceBundle(traceId, $("#traceBundleDetail")); + showTab("tracesView"); + } + async function loadTraceBundle(traceId, target) { if (!traceId) { return; } const detailTarget = target || $("#traceBundleDetail"); + $("#directTraceId").value = traceId; detailTarget.replaceChildren(empty("Loading trace...")); try { - const bundle = await api(`/v2/admin/traces/${encodeURIComponent(traceId)}/bundle${queryString({ mode: "bounded", stage_items_limit: 64, candidates_limit: 0 })}`); + const started = performance.now(); + const bundle = await api(`/v2/admin/traces/${encodeURIComponent(traceId)}/bundle${queryString({ mode: "full", stage_items_limit: 128, candidates_limit: 200 })}`); + recordTraceMetric(traceId, "trace_readback_ms", performance.now() - started); + bundle.viewer_metrics = state.traceMetrics[traceId] || {}; renderTraceBundle(detailTarget, bundle); if (detailTarget === $("#traceDetail")) { state.traceBundle = bundle; @@ -1150,53 +1311,192 @@

Recent Traces

return; } const trace = bundle.trace; - const items = bundle.items || []; + const metrics = bundle.viewer_metrics || {}; const stages = bundle.stages || []; + const candidates = bundle.candidates || []; + const recall = stageByName(bundle, "recall.candidates"); + const fusion = stageByName(bundle, "fusion.merge"); + const rerank = stageByName(bundle, "rerank.score"); + const finalStage = stageByName(bundle, "selection.final"); target.replaceChildren( - kvTable([ - ["trace_id", trace.trace_id], - ["query", trace.query], - ["agent", trace.agent_id], - ["read_profile", trace.read_profile], - ["expansion_mode", trace.expansion_mode], - ["candidate_count", trace.candidate_count], - ["top_k", trace.top_k], - ["created_at", dateText(trace.created_at)], - ["trace_version", trace.trace_version] - ]), - make("div", { className: "split-stack", style: "margin-top: 12px;" }, [ - make("div", { className: "title", text: "Expanded queries" }), - make("div", { className: "chips" }, (trace.expanded_queries || []).map((query) => chip(query, "teal"))), - make("div", { className: "title", text: "Config snapshot" }), - pre(trace.config_snapshot || {}), - make("div", { className: "title", text: "Items" }), - items.length ? make("div", { className: "list" }, items.map(traceItemRow)) : empty("No trace items."), - make("div", { className: "title", text: "Stages" }), - stages.length ? make("div", { className: "list" }, stages.map(stageRow)) : empty("No stages.") + make("div", { className: "split-stack" }, [ + metricGrid([ + ["Candidates", trace.candidate_count], + ["Replay Rows", candidates.length], + ["Final Results", (bundle.items || []).length], + ["Top K", trace.top_k], + ["Search Latency", ms(metrics.search_request_ms)], + ["Session Readback", ms(metrics.session_readback_ms)], + ["Trace Readback", ms(metrics.trace_readback_ms)], + ["Trace Age", trace.created_at ? `${Math.max(0, (Date.now() - new Date(trace.created_at).getTime()) / 1000).toFixed(0)}s` : "none"] + ]), + section("Trace", [ + kvTable([ + ["trace_id", trace.trace_id], + ["query", trace.query], + ["agent", trace.agent_id], + ["read_profile", trace.read_profile], + ["expansion_mode", trace.expansion_mode], + ["allowed_scopes", (trace.allowed_scopes || []).join(", ")], + ["created_at", dateText(trace.created_at)], + ["generated_at", dateText(bundle.generated_at)], + ["trace_version", trace.trace_version] + ]), + make("div", { className: "chips" }, (trace.expanded_queries || []).map((query) => chip(query, "teal"))) + ]), + renderProviderSection(trace), + renderStageSummarySection(stages), + section("Retrieval Funnel", [ + metricGrid([ + ["Recall Before Filter", getPath(recall, ["stage_payload", "stats", "candidate_count_before_filter"]) ?? "none"], + ["Recall After Filter", getPath(recall, ["stage_payload", "stats", "candidate_count_after_filter"]) ?? "none"], + ["Fusion Scored", getPath(fusion, ["stage_payload", "stats", "scored_count"]) ?? "none"], + ["Reranked", getPath(rerank, ["stage_payload", "stats", "reranked_count"]) ?? "none"], + ["Selected", getPath(finalStage, ["stage_payload", "stats", "selected_count"]) ?? "none"], + ["Snippets", getPath(recall, ["stage_payload", "stats", "snippet_count"]) ?? "none"], + ["Fusion Weight", getPath(fusion, ["stage_payload", "decisions", "fusion_weight"]) ?? "none"], + ["Structured Weight", getPath(fusion, ["stage_payload", "decisions", "structured_field_weight"]) ?? "none"] + ]) + ]), + renderFinalResultsSection(bundle), + renderCandidateSection(bundle), + renderRelationContextSection(bundle.items || []), + renderStageDetailsSection(stages) ]) ); } - function traceItemRow(item) { - const terms = item.explain && item.explain.ranking ? item.explain.ranking.terms || [] : []; - const termChips = terms.slice(0, 6).map((term) => chip(`${term.name}: ${score(term.value)}`)); - return make("div", { className: "row trace-item" }, [ - make("div", { className: "row-head" }, [ - make("div", { className: "title", text: `Rank ${item.rank} | ${item.note_id}` }), - chip(item.result_handle, "indigo") - ]), - make("div", { className: "chips" }, termChips), - pre(item.explain || {}) + function renderProviderSection(trace) { + const cfg = trace.config_snapshot || {}; + const embedding = getPath(cfg, ["providers", "embedding"]) || {}; + const rerank = getPath(cfg, ["providers", "rerank"]) || {}; + const qdrant = getPath(cfg, ["storage", "qdrant"]) || {}; + const ranking = cfg.ranking || {}; + const blend = ranking.blend || {}; + const diversity = ranking.diversity || {}; + const retrievalSources = ranking.retrieval_sources || {}; + return section("Providers And Ranking", [ + kvTable([ + ["retrieval_channels", "dense + bm25 via Qdrant fusion"], + ["embedding", `${embedding.provider_id || "none"} / ${embedding.model || "none"} / ${embedding.dimensions || "none"} dims`], + ["rerank", `${rerank.provider_id || "none"} / ${rerank.model || "none"}`], + ["qdrant", `${qdrant.collection || "none"} / vector_dim ${qdrant.vector_dim || "none"}`], + ["policy_id", ranking.policy_id], + ["blend", `enabled ${blend.enabled ?? "none"} / ${blend.retrieval_normalization || "none"} -> ${blend.rerank_normalization || "none"}`], + ["diversity", `enabled ${diversity.enabled ?? "none"} / sim ${diversity.sim_threshold ?? "none"}`], + ["source_weights", `fusion ${retrievalSources.fusion_weight ?? "none"} / structured ${retrievalSources.structured_field_weight ?? "none"} / recursive ${retrievalSources.recursive_weight ?? "none"}`], + ["override", ranking.override ? "present" : "none"] + ]) ]); } - function stageRow(stage) { - return make("div", { className: "row" }, [ - make("div", { className: "row-head" }, [ - make("div", { className: "title", text: `${stage.stage_order}. ${stage.stage_name}` }), - chip(`${(stage.items || []).length} items`, "teal") - ]), - pre(stage.stage_payload || stage) + function renderStageSummarySection(stages) { + if (!stages.length) { + return section("Stage Summary", [empty("No stages.")]); + } + return section("Stage Summary", [ + table( + ["Stage", "Items", "Stats", "Decisions"], + stages.map((stage) => [ + `${stage.stage_order}. ${stage.stage_name}`, + (stage.items || []).length, + { value: formatJson(getPath(stage, ["stage_payload", "stats"]) || {}), wrap: true }, + { value: formatJson(getPath(stage, ["stage_payload", "decisions"]) || {}), wrap: true } + ]) + ) + ]); + } + + function renderFinalResultsSection(bundle) { + const items = bundle.items || []; + if (!items.length) { + return section("Selected Final Results", [empty("No final items.")]); + } + return section("Selected Final Results", [ + table( + ["Rank", "Note", "Chunk", "Final", "Retrieval", "Rerank", "Tie", "Scope Boost", "Relations", "Handle"], + items.map((item) => [ + item.rank, + item.note_id, + item.chunk_id || "none", + score(getPath(item, ["explain", "ranking", "final_score"])), + termValue(item, "blend.retrieval"), + termValue(item, "blend.rerank"), + termValue(item, "tie_breaker"), + termValue(item, "context.scope_boost"), + (getPath(item, ["explain", "relation_context"]) || []).length, + item.result_handle + ]) + ) + ]); + } + + function renderCandidateSection(bundle) { + const candidates = bundle.candidates || []; + if (!candidates.length) { + return section("Replay Candidates", [empty("No persisted candidate snapshots.")]); + } + const finalRanks = new Map((bundle.items || []).map((item) => [item.note_id, item.rank])); + return section("Replay Candidates", [ + table( + ["Retrieval Rank", "Final Rank", "Note", "Chunk", "Retrieval Score", "Rerank Score", "Scope", "Importance", "Updated", "Snippet"], + candidates.map((candidate) => [ + candidate.retrieval_rank, + finalRanks.get(candidate.note_id) || "-", + candidate.note_id, + candidate.chunk_id, + score(candidate.retrieval_score), + score(candidate.rerank_score), + candidate.note_scope, + score(candidate.note_importance), + dateText(candidate.note_updated_at), + { value: candidate.snippet, wrap: true } + ]) + ) + ]); + } + + function renderRelationContextSection(items) { + const relations = relationContexts(items); + if (!relations.length) { + return section("Relation Context", [empty("No relation context attached to selected results.")]); + } + return section("Relation Context", [ + table( + ["Rank", "Scope", "Subject", "Predicate", "Object", "Evidence Notes"], + relations.map(({ item, context }) => [ + item.rank, + context.scope, + getPath(context, ["subject", "canonical"]) || "none", + context.predicate, + getPath(context, ["object", "entity", "canonical"]) || getPath(context, ["object", "value"]) || "none", + (context.evidence_note_ids || []).join(", ") + ]) + ) + ]); + } + + function renderStageDetailsSection(stages) { + if (!stages.length) { + return section("Stage Details", [empty("No stage details.")]); + } + return section("Stage Details", [ + make("div", { className: "list" }, stages.map((stage) => { + const rows = (stage.items || []).map((item) => [ + item.note_id || "none", + item.chunk_id || "none", + item.item_id || "none", + { value: formatJson(item.metrics || {}), wrap: true } + ]); + return make("div", { className: "row" }, [ + make("div", { className: "row-head" }, [ + make("div", { className: "title", text: `${stage.stage_order}. ${stage.stage_name}` }), + chip(`${(stage.items || []).length} items`, "teal") + ]), + pre(stage.stage_payload || {}), + rows.length ? table(["Note", "Chunk", "Item", "Metrics"], rows) : empty("No stage items.") + ]); + })) ]); } @@ -1231,6 +1531,7 @@

Recent Traces

$("#loadTimelineButton").addEventListener("click", loadTimeline); $("#loadNotesButton").addEventListener("click", loadNotes); $("#loadTracesButton").addEventListener("click", loadRecentTraces); + $("#loadTraceByIdButton").addEventListener("click", loadTraceById); $("#refreshActive").addEventListener("click", refreshActive); } diff --git a/apps/elf-eval/src/app.rs b/apps/elf-eval/src/app.rs index 94bd819d..b5234bc9 100644 --- a/apps/elf-eval/src/app.rs +++ b/apps/elf-eval/src/app.rs @@ -916,6 +916,7 @@ fn decode_trace_replay_candidates( chunk_index: row.chunk_index, snippet: row.snippet, retrieval_rank: u32::try_from(row.retrieval_rank).unwrap_or(0), + retrieval_score: None, rerank_score: row.rerank_score, note_scope: row.note_scope, note_importance: row.note_importance, @@ -1481,6 +1482,7 @@ mod tests { chunk_index: 0, snippet: "a".to_string(), retrieval_rank: 1, + retrieval_score: None, rerank_score: 0.1, note_scope: "project_shared".to_string(), note_importance: 0.1, @@ -1502,6 +1504,7 @@ mod tests { chunk_index: 1, snippet: "a".to_string(), retrieval_rank: 2, + retrieval_score: None, rerank_score: 0.2, note_scope: "project_shared".to_string(), note_importance: 0.1, @@ -1523,6 +1526,7 @@ mod tests { chunk_index: 0, snippet: "b".to_string(), retrieval_rank: 3, + retrieval_score: None, rerank_score: 0.3, note_scope: "org_shared".to_string(), note_importance: 0.1, @@ -1544,6 +1548,7 @@ mod tests { chunk_index: 0, snippet: "c".to_string(), retrieval_rank: 4, + retrieval_score: None, rerank_score: 0.4, note_scope: "org_shared".to_string(), note_importance: 0.1, diff --git a/apps/elf-eval/src/bin/live_baseline_elf.rs b/apps/elf-eval/src/bin/live_baseline_elf.rs index 18ec7ba0..82703ad8 100644 --- a/apps/elf-eval/src/bin/live_baseline_elf.rs +++ b/apps/elf-eval/src/bin/live_baseline_elf.rs @@ -290,6 +290,7 @@ struct CheckResult { struct QueryResult { id: String, task: Option, + trace_id: Uuid, query: String, expected_doc: String, allowed_alternate_docs: Vec, @@ -1924,6 +1925,7 @@ async fn run_single_query( Ok(QueryResult { id: case.id, task: case.task, + trace_id: response.trace_id, query: case.query, expected_doc: case.expected_doc, allowed_alternate_docs: case.allowed_alternate_docs, diff --git a/apps/elf-eval/src/bin/trace_regression_gate.rs b/apps/elf-eval/src/bin/trace_regression_gate.rs index 44dd93e4..54716bf7 100644 --- a/apps/elf-eval/src/bin/trace_regression_gate.rs +++ b/apps/elf-eval/src/bin/trace_regression_gate.rs @@ -191,6 +191,7 @@ fn decode_trace_replay_candidates( chunk_index: row.chunk_index, snippet: row.snippet, retrieval_rank: u32::try_from(row.retrieval_rank).unwrap_or(0), + retrieval_score: None, rerank_score: row.rerank_score, note_scope: row.note_scope, note_importance: row.note_importance, diff --git a/docs/guide/benchmarking/2026-06-09-live-baseline-report.md b/docs/guide/benchmarking/2026-06-09-live-baseline-report.md index ed94704f..78df93bb 100644 --- a/docs/guide/benchmarking/2026-06-09-live-baseline-report.md +++ b/docs/guide/benchmarking/2026-06-09-live-baseline-report.md @@ -152,6 +152,13 @@ The benchmark is intentionally stricter than a feature checklist. It exercises w project can ingest the same corpus, return expected evidence for the same queries, and preserve basic lifecycle behavior under the runner's encoded contract. +## Retrieval Observability + +Generated live-baseline reports include per-query ELF trace IDs when the ELF service +path runs. Open the admin viewer at `/viewer`, paste a trace ID into the Traces panel, +and inspect the full trace bundle to compare candidates, fusion/rerank terms, relation +context, provider metadata, and selected final results without raw SQL. + ELF checks covered in this run: - production-provider embeddings through the same service path used by ELF; diff --git a/docs/guide/benchmarking/live_baseline_benchmark.md b/docs/guide/benchmarking/live_baseline_benchmark.md index 05108f19..e6995f00 100644 --- a/docs/guide/benchmarking/live_baseline_benchmark.md +++ b/docs/guide/benchmarking/live_baseline_benchmark.md @@ -204,8 +204,11 @@ synthetic or private production-corpus results. Each project record includes that distinguishes real, mocked, unsupported, blocked, incomplete, and not-encoded behavior surfaces. ELF project records also include an `embedding` summary so deterministic local and production-provider runs are not confused. ELF query records -include task, expected evidence IDs, allowed alternate evidence IDs, top evidence ID, -wrong-result count, and per-query latency. Each project record also includes +include task, trace ID, expected evidence IDs, allowed alternate evidence IDs, top +evidence ID, wrong-result count, and per-query latency. Each ELF trace ID can be opened +from the admin viewer at `/viewer` by loading it in the Traces panel; the full trace +bundle shows stage-level candidates, rerank terms, relation context, and provider +metadata without raw SQL. Each project record also includes `backfill` evidence with source count, completed count, batch size, worker concurrency, resume state, duplicate-source count, and backfill elapsed seconds. Each project record also includes `checks` and `check_summary`; the aggregate diff --git a/packages/elf-service/src/search.rs b/packages/elf-service/src/search.rs index 4fbbc268..1325c00e 100644 --- a/packages/elf-service/src/search.rs +++ b/packages/elf-service/src/search.rs @@ -974,6 +974,9 @@ pub struct TraceReplayCandidate { pub snippet: String, /// 1-based retrieval rank. pub retrieval_rank: u32, + #[serde(skip_serializing_if = "Option::is_none")] + /// Optional merged retrieval score captured before rerank. + pub retrieval_score: Option, /// Raw rerank-model score. pub rerank_score: f32, /// Scope key for the note. @@ -1123,6 +1126,7 @@ struct ChunkCandidate { note_id: Uuid, chunk_index: i32, retrieval_rank: u32, + retrieval_score: Option, scope: Option, updated_at: Option, embedding_version: Option, @@ -1277,6 +1281,7 @@ struct ChunkSnippet { chunk: ChunkMeta, snippet: String, retrieval_rank: u32, + retrieval_score: Option, } #[derive(Clone, Debug, Deserialize, Serialize)] @@ -4332,6 +4337,7 @@ ORDER BY c.note_id ASC, e.vec <=> $3::text::vector ASC", chunk, snippet, retrieval_rank: candidate.retrieval_rank, + retrieval_score: candidate.retrieval_score, }); } @@ -5110,6 +5116,7 @@ fn build_trace_candidate_record( chunk_index: scored_chunk.item.chunk.chunk_index, snippet: scored_chunk.item.snippet.clone(), retrieval_rank: scored_chunk.item.retrieval_rank, + retrieval_score: scored_chunk.item.retrieval_score, rerank_score: scored_chunk.rerank_score, note_scope: note.scope.clone(), note_importance: note.importance, @@ -5298,6 +5305,7 @@ fn build_structured_field_candidates( note_id, chunk_index: *chunk_index, retrieval_rank: next_rank, + retrieval_score: None, scope: None, updated_at: None, embedding_version: Some(embed_version.to_string()), @@ -6574,6 +6582,7 @@ mod tests { note_id, chunk_index: 0, retrieval_rank, + retrieval_score: None, scope: None, updated_at: None, embedding_version: Some("v1".to_string()), @@ -6636,6 +6645,7 @@ mod tests { note_id: shared_note_id, chunk_index: 0, retrieval_rank: 9, + retrieval_score: None, scope: None, updated_at: None, embedding_version: Some("v1".to_string()), @@ -6645,6 +6655,7 @@ mod tests { note_id: fusion_only_note_id, chunk_index: 0, retrieval_rank: 1, + retrieval_score: None, scope: None, updated_at: None, embedding_version: Some("v1".to_string()), @@ -6655,6 +6666,7 @@ mod tests { note_id: shared_note_id, chunk_index: 0, retrieval_rank: 1, + retrieval_score: None, scope: None, updated_at: None, embedding_version: Some("v1".to_string()), @@ -6804,8 +6816,13 @@ mod tests { }; let chunk = ChunkMeta { chunk_id: Uuid::new_v4(), chunk_index: 0, start_offset: 0, end_offset: 10 }; - let item = - ChunkSnippet { note, chunk, snippet: "deploy steps".to_string(), retrieval_rank: 1 }; + let item = ChunkSnippet { + note, + chunk, + snippet: "deploy steps".to_string(), + retrieval_rank: 1, + retrieval_score: None, + }; let mut scored = ScoredChunk { item, final_score: 1.0, @@ -6881,8 +6898,13 @@ mod tests { }; let chunk = ChunkMeta { chunk_id: Uuid::new_v4(), chunk_index: 0, start_offset: 0, end_offset: 10 }; - let item = - ChunkSnippet { note, chunk, snippet: "deploy steps".to_string(), retrieval_rank: 1 }; + let item = ChunkSnippet { + note, + chunk, + snippet: "deploy steps".to_string(), + retrieval_rank: 1, + retrieval_score: None, + }; let mut scored = ScoredChunk { item, final_score: 1.0, @@ -6967,6 +6989,7 @@ mod tests { chunk, snippet: format!("snippet-{retrieval_rank}"), retrieval_rank, + retrieval_score: None, }; ScoredChunk { @@ -7063,6 +7086,7 @@ mod tests { chunk_index: 0, snippet: "first".to_string(), retrieval_rank: 2, + retrieval_score: None, rerank_score: 0.2, note_scope: "project_shared".to_string(), note_importance: 0.1, @@ -7084,6 +7108,7 @@ mod tests { chunk_index: 1, snippet: "second".to_string(), retrieval_rank: 1, + retrieval_score: None, rerank_score: 0.3, note_scope: "project_shared".to_string(), note_importance: 0.1, @@ -7186,6 +7211,7 @@ mod tests { chunk_index: 0, snippet: "deployment steps".to_string(), retrieval_rank: 1, + retrieval_score: None, rerank_score: 0.1, note_scope: "project_shared".to_string(), note_importance: 0.1, @@ -7207,6 +7233,7 @@ mod tests { chunk_index: 0, snippet: "deployment steps".to_string(), retrieval_rank: 2, + retrieval_score: None, rerank_score: 0.9, note_scope: "project_shared".to_string(), note_importance: 0.1, @@ -7228,6 +7255,7 @@ mod tests { chunk_index: 0, snippet: "deployment steps".to_string(), retrieval_rank: 3, + retrieval_score: None, rerank_score: 0.2, note_scope: "org_shared".to_string(), note_importance: 0.1, diff --git a/packages/elf-service/src/search/filter.rs b/packages/elf-service/src/search/filter.rs index 2c0adfaa..7e94077e 100644 --- a/packages/elf-service/src/search/filter.rs +++ b/packages/elf-service/src/search/filter.rs @@ -1026,6 +1026,7 @@ mod tests { chunk_id: Uuid::new_v4(), chunk_index: 0, retrieval_rank: 1, + retrieval_score: None, scope: Some("project_shared".to_string()), updated_at: None, embedding_version: None, @@ -1092,6 +1093,7 @@ mod tests { chunk_id: Uuid::new_v4(), chunk_index: 0, retrieval_rank: 1, + retrieval_score: None, scope: None, updated_at: None, embedding_version: None, @@ -1101,6 +1103,7 @@ mod tests { chunk_id: Uuid::new_v4(), chunk_index: 1, retrieval_rank: 2, + retrieval_score: None, scope: None, updated_at: None, embedding_version: None, @@ -1110,6 +1113,7 @@ mod tests { chunk_id: Uuid::new_v4(), chunk_index: 2, retrieval_rank: 3, + retrieval_score: None, scope: None, updated_at: None, embedding_version: None, diff --git a/packages/elf-service/src/search/ranking/retrieval.rs b/packages/elf-service/src/search/ranking/retrieval.rs index 776b0642..1f7d826f 100644 --- a/packages/elf-service/src/search/ranking/retrieval.rs +++ b/packages/elf-service/src/search/ranking/retrieval.rs @@ -60,6 +60,7 @@ pub fn collect_chunk_candidates( note_id, chunk_index, retrieval_rank: idx as u32 + 1, + retrieval_score: Some(point.score), updated_at, embedding_version, scope, @@ -205,6 +206,7 @@ pub fn merge_retrieval_candidates( for (idx, mut candidate) in merged.into_iter().take(candidate_k as usize).enumerate() { candidate.candidate.retrieval_rank = idx as u32 + 1; + candidate.candidate.retrieval_score = Some(candidate.combined_score); out.push(candidate.candidate); } diff --git a/packages/elf-service/tests/acceptance/trace_admin_observability.rs b/packages/elf-service/tests/acceptance/trace_admin_observability.rs index 52fcc839..86838128 100644 --- a/packages/elf-service/tests/acceptance/trace_admin_observability.rs +++ b/packages/elf-service/tests/acceptance/trace_admin_observability.rs @@ -284,6 +284,7 @@ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16)", chunk_index: rank, snippet: "trace candidate snippet".to_string(), retrieval_rank: retrieval_rank as u32, + retrieval_score: Some(retrieval_score), rerank_score: retrieval_score, note_scope: "agent_private".to_string(), note_importance: 0.6, @@ -541,6 +542,7 @@ async fn trace_bundle_truncation_and_candidate_limits() { assert_eq!(candidates[0].retrieval_rank, 1); assert_eq!(candidates[1].retrieval_rank, 2); + assert_eq!(candidates[0].retrieval_score, Some(0.8_f32)); assert!(candidates[0].rerank_score >= candidates[1].rerank_score); test_db.cleanup().await.expect("Failed to cleanup test database."); diff --git a/scripts/live-baseline-report-to-md.sh b/scripts/live-baseline-report-to-md.sh index 411fe682..6b2605db 100755 --- a/scripts/live-baseline-report-to-md.sh +++ b/scripts/live-baseline-report-to-md.sh @@ -117,14 +117,15 @@ render_report() { | if ($query_projects | length) > 0 then "## Query Evidence", "", - "| Project | Query | Task | Expected Evidence | Allowed Alternates | Top Evidence | Matched | Latency |", - "| --- | --- | --- | --- | --- | --- | --- | --- |", + "| Project | Query | Trace ID | Task | Expected Evidence | Allowed Alternates | Top Evidence | Matched | Latency |", + "| --- | --- | --- | --- | --- | --- | --- | --- | --- |", ( $query_projects[] | .project as $project | .queries[] | "| " + ($project | md) + " | `" + (.id | md) + "`" + + " | `" + ((.trace_id // "-") | md) + "`" + " | `" + ((.task // "-") | md) + "`" + " | `" + (((.expected_evidence_ids // []) | join(", ")) | md) + "`" + " | `" + (((.allowed_alternate_evidence_ids // []) | join(", ")) | md) + "`" From 40023771cb5c7606e5b5eee55db5a304a5200d6f Mon Sep 17 00:00:00 2001 From: Yvette Carlisle Date: Tue, 9 Jun 2026 15:50:37 +0800 Subject: [PATCH 2/2] {"schema":"decodex/commit/1","summary":"Tolerate trace candidate score precision","authority":"XY-27"} --- .../tests/acceptance/trace_admin_observability.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/packages/elf-service/tests/acceptance/trace_admin_observability.rs b/packages/elf-service/tests/acceptance/trace_admin_observability.rs index 86838128..30453fe9 100644 --- a/packages/elf-service/tests/acceptance/trace_admin_observability.rs +++ b/packages/elf-service/tests/acceptance/trace_admin_observability.rs @@ -542,7 +542,11 @@ async fn trace_bundle_truncation_and_candidate_limits() { assert_eq!(candidates[0].retrieval_rank, 1); assert_eq!(candidates[1].retrieval_rank, 2); - assert_eq!(candidates[0].retrieval_score, Some(0.8_f32)); + assert!( + candidates[0].retrieval_score.is_some_and(|score| (score - 0.8_f32).abs() < 1e-6), + "Unexpected retrieval_score: {:?}", + candidates[0].retrieval_score + ); assert!(candidates[0].rerank_score >= candidates[1].rerank_score); test_db.cleanup().await.expect("Failed to cleanup test database.");