From a0c1ca6685480c9f8b71c0fe6b3525f7bc91c14e Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Fri, 12 Jun 2026 00:14:03 +0800
Subject: [PATCH 1/7] {"schema":"decodex/commit/1","summary":"Add Letta-style
 core archival benchmark","authority":"XY-927"}

---
 Makefile.toml                                 |  52 ++++
 README.md                                     |  17 +-
 .../memory_projects_manifest.json             |  88 ++++++-
 .../archival_fallback.json                    | 192 +++++++++++++++
 .../core_block_attachment.json                | 192 +++++++++++++++
 .../core_block_provenance.json                | 192 +++++++++++++++
 .../core_block_scope.json                     | 192 +++++++++++++++
 .../project_decision_recovery.json            | 230 ++++++++++++++++++
 .../stale_core_detection.json                 | 206 ++++++++++++++++
 .../src/bin/real_world_job_benchmark.rs       |   1 +
 .../tests/real_world_job_benchmark.rs         | 178 +++++++++++---
 ...-11-competitor-strength-adoption-report.md |  29 ++-
 .../2026-06-11-measurement-coverage-audit.md  |  55 +++--
 docs/guide/benchmarking/index.md              |  13 +-
 .../real_world_agent_memory_benchmark.md      |  10 +-
 ...1-competitor-strength-adoption-report.json |  30 ++-
 ...2026-06-11-measurement-coverage-audit.json |  34 +--
 .../real_world_agent_memory_benchmark_v1.md   |   1 +
 18 files changed, 1590 insertions(+), 122 deletions(-)
 create mode 100644 apps/elf-eval/fixtures/real_world_memory/core_archival_memory/archival_fallback.json
 create mode 100644 apps/elf-eval/fixtures/real_world_memory/core_archival_memory/core_block_attachment.json
 create mode 100644 apps/elf-eval/fixtures/real_world_memory/core_archival_memory/core_block_provenance.json
 create mode 100644 apps/elf-eval/fixtures/real_world_memory/core_archival_memory/core_block_scope.json
 create mode 100644 apps/elf-eval/fixtures/real_world_memory/core_archival_memory/project_decision_recovery.json
 create mode 100644 apps/elf-eval/fixtures/real_world_memory/core_archival_memory/stale_core_detection.json

diff --git a/Makefile.toml b/Makefile.toml
index 42b2033c..33dc2044 100644
--- a/Makefile.toml
+++ b/Makefile.toml
@@ -428,6 +428,9 @@ args = [
 # | real-world-memory-production-ops        | composite |     |
 # | real-world-memory-production-ops-json   | command   |     |
 # | real-world-memory-production-ops-report | command   |     |
+# | real-world-memory-core-archival         | composite |     |
+# | real-world-memory-core-archival-json    | command   |     |
+# | real-world-memory-core-archival-report  | command   |     |
 # | real-world-memory-live-adapters         | command   |     |
 
 [tasks.real-world-job-smoke]
@@ -824,6 +827,55 @@ args = [
 	"tmp/real-world-memory/consolidation/report.md",
 ]
 
+[tasks.real-world-memory-core-archival]
+workspace = false
+dependencies = [
+	"real-world-memory-core-archival-report",
+]
+
+[tasks.real-world-memory-core-archival-json]
+workspace = false
+command = "cargo"
+args = [
+	"run",
+	"-p",
+	"elf-eval",
+	"--bin",
+	"real_world_job_benchmark",
+	"--",
+	"run",
+	"--fixtures",
+	"apps/elf-eval/fixtures/real_world_memory/core_archival_memory",
+	"--out",
+	"tmp/real-world-memory/core-archival/report.json",
+	"--run-id",
+	"real-world-memory-core-archival",
+	"--adapter-id",
+	"fixture_core_archival_memory",
+	"--adapter-name",
+	"ELF core and archival memory fixture",
+]
+
+[tasks.real-world-memory-core-archival-report]
+workspace = false
+dependencies = [
+	"real-world-memory-core-archival-json",
+]
+command = "cargo"
+args = [
+	"run",
+	"-p",
+	"elf-eval",
+	"--bin",
+	"real_world_job_benchmark",
+	"--",
+	"publish",
+	"--report",
+	"tmp/real-world-memory/core-archival/report.json",
+	"--out",
+	"tmp/real-world-memory/core-archival/report.md",
+]
+
 [tasks.real-world-memory-live-adapters]
 workspace = false
 command = "bash"
diff --git a/README.md b/README.md
index 8261bf13..f2480a25 100644
--- a/README.md
+++ b/README.md
@@ -149,13 +149,18 @@ provider-backed ELF evidence was required.
   mem0, OpenViking, and claude-mem remained typed non-pass states. OpenViking now
   reaches its pinned Docker local embedding path and is reported as `wrong_result`
   when same-corpus evidence terms are missed; setup failures remain `incomplete`.
-- Real-world agent memory aggregate after the P1 benchmark batch: 38 fixture-backed
-  jobs across 11 suites, 36 pass, 0 incomplete, 2 blocked, 0 wrong-result,
-  0 not-encoded, and 0 unsupported-claim results. The remaining non-pass jobs are
-  production-ops operator boundaries, not hidden benchmark wins.
+- Real-world agent memory aggregate after the P1 benchmark batch and XY-927
+  core-vs-archival fixture update: 44 fixture-backed jobs across 12 suites, 42 pass,
+  0 incomplete, 2 blocked, 0 wrong-result, 0 not-encoded, and 0 unsupported-claim
+  results. The remaining non-pass jobs are production-ops operator boundaries, not
+  hidden benchmark wins. The new `core_archival_memory` suite passes 6 fixture jobs
+  for core block attachment, scope, provenance, stale-core detection, archival
+  fallback, and project-decision recovery; it does not create an ELF-over-Letta
+  claim.
 - Full-suite live real-world adapter sweep after XY-899: ELF and qmd emit
-  Docker-isolated `live_real_world` records for all 38 encoded jobs across 11 suites
-  through `cargo make real-world-memory-live-adapters`. Both keep the original
+  Docker-isolated `live_real_world` records for the previously measured 38 encoded
+  jobs across 11 suites through `cargo make real-world-memory-live-adapters`. Both
+  keep the original
   targeted `work_resume`, `retrieval`, and `project_decisions` slice passing, but the
   full sweep is not a full-suite pass. The fresh ELF sweep reports 18 pass,
   5 wrong_result, 2 blocked, and 13 not_encoded jobs. The fresh qmd sweep reports
diff --git a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
index 2832b202..8cc03e41 100644
--- a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
+++ b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
@@ -2088,24 +2088,24 @@
       "evidence_class": "research_gate",
       "docker_default": true,
       "host_global_installs_required": false,
-      "overall_status": "not_encoded",
+      "overall_status": "blocked",
       "setup": {
-        "status": "not_encoded",
-        "evidence": "Letta is D1 reviewed as a core/archival memory reference, but no Docker real_world_job adapter is implemented."
+        "status": "blocked",
+        "evidence": "Letta is D1 reviewed as a core/archival memory reference. The contained comparison contract is a Docker-only benchmark-created agent export that must return core block JSON, archival search readback, and source ids before any scenario claim is scored."
       },
       "run": {
         "status": "not_encoded",
-        "evidence": "No Letta core block, archival memory, or shared-memory job is encoded."
+        "evidence": "No Letta materializer currently creates the benchmark agent, imports the ELF core_archival_memory fixture corpus, or exports comparable core and archival evidence."
       },
       "result": {
         "status": "not_encoded",
-        "evidence": "No Letta personalization or project-decision suite result is claimed."
+        "evidence": "No Letta core block, archival fallback, stale-core, scope, provenance, or project-decision result is claimed."
       },
       "capabilities": [
         {
           "capability": "core_archival_memory",
-          "status": "not_encoded",
-          "evidence": "Core blocks and archival memory are reference semantics but not scored."
+          "status": "blocked",
+          "evidence": "ELF fixture jobs now score core block attachment, scope, provenance, stale-core detection, archival fallback, and project-decision recovery separately from archival note search; Letta remains blocked until its export maps equivalent source ids."
         },
         {
           "capability": "docker_embedding_configuration",
@@ -2133,6 +2133,67 @@
           "suite_id": "work_resume",
           "status": "not_encoded",
           "evidence": "Agent resumption through Letta memory blocks is not encoded."
+        },
+        {
+          "suite_id": "core_archival_memory",
+          "status": "blocked",
+          "evidence": "ELF fixture coverage exists, but Letta has no contained export/readback artifact for the same core-vs-archival jobs."
+        }
+      ],
+      "scenarios": [
+        {
+          "scenario_id": "core_block_attachment_readback",
+          "suite_id": "core_archival_memory",
+          "status": "not_encoded",
+          "elf_position": "untested",
+          "comparison_outcome": "not_tested",
+          "evidence": "ELF fixture core-archival-core-block-attachment-001 scores exact core block attachment and keeps core readback out of Qdrant-backed archival search. Letta has no comparable exported core block attachment evidence.",
+          "artifact": "apps/elf-eval/fixtures/real_world_memory/core_archival_memory/core_block_attachment.json"
+        },
+        {
+          "scenario_id": "core_block_scope_readback",
+          "suite_id": "core_archival_memory",
+          "status": "not_encoded",
+          "elf_position": "untested",
+          "comparison_outcome": "not_tested",
+          "evidence": "ELF fixture core-archival-core-block-scope-001 scores read_profile, shared scope, and private-owner boundaries. Letta scope behavior remains unscored without a contained export of agent, block, and visibility metadata.",
+          "artifact": "apps/elf-eval/fixtures/real_world_memory/core_archival_memory/core_block_scope.json"
+        },
+        {
+          "scenario_id": "core_block_provenance_readback",
+          "suite_id": "core_archival_memory",
+          "status": "not_encoded",
+          "elf_position": "untested",
+          "comparison_outcome": "not_tested",
+          "evidence": "ELF fixture core-archival-core-block-provenance-001 scores source_ref and audit_history readback. Letta provenance remains not_tested until exported core memory includes stable source ids and audit-equivalent events.",
+          "artifact": "apps/elf-eval/fixtures/real_world_memory/core_archival_memory/core_block_provenance.json"
+        },
+        {
+          "scenario_id": "stale_core_detection",
+          "suite_id": "core_archival_memory",
+          "status": "blocked",
+          "elf_position": "untested",
+          "comparison_outcome": "blocked",
+          "evidence": "ELF fixture core-archival-stale-core-detection-001 scores archival evidence superseding a stale core block. Letta stale-core comparison is blocked until core export and archival readback can be joined by source ids.",
+          "artifact": "apps/elf-eval/fixtures/real_world_memory/core_archival_memory/stale_core_detection.json"
+        },
+        {
+          "scenario_id": "archival_fallback_readback",
+          "suite_id": "core_archival_memory",
+          "status": "blocked",
+          "elf_position": "untested",
+          "comparison_outcome": "blocked",
+          "evidence": "ELF fixture core-archival-archival-fallback-001 scores fallback from insufficient core memory to archival note search. Letta fallback comparison is blocked until archival search output can be exported with source ids.",
+          "artifact": "apps/elf-eval/fixtures/real_world_memory/core_archival_memory/archival_fallback.json"
+        },
+        {
+          "scenario_id": "core_archival_project_decision_recovery",
+          "suite_id": "core_archival_memory",
+          "status": "not_encoded",
+          "elf_position": "untested",
+          "comparison_outcome": "not_tested",
+          "evidence": "ELF fixture core-archival-project-decision-recovery-001 scores core routing plus archival decision rationale. Letta project-decision recovery remains not_tested until the contained export/readback contract exists.",
+          "artifact": "apps/elf-eval/fixtures/real_world_memory/core_archival_memory/project_decision_recovery.json"
         }
       ],
       "evidence": [
@@ -2160,14 +2221,15 @@
             "evidence": "Official Docker deployment guide and embedding configuration boundary."
           }
         ],
-        "setup_path": "Define Docker server setup, embedding model configuration, and a core/archival memory fixture flow.",
-        "runtime_boundary": "Docker-only Letta server or CLI flow with benchmark-created agents and no host-global state.",
-        "resource_expectation": "Embedding model and agent server state must be explicit; record storage and provider boundaries.",
+        "setup_path": "Use a Docker-only Letta server or CLI flow that creates a benchmark-owned agent, loads the checked-in core_archival_memory fixture corpus, writes core memory and archival memory with fixture source ids, then exports core block JSON plus archival search/readback JSON.",
+        "runtime_boundary": "Docker-only Letta server or CLI flow with benchmark-created agents, benchmark-owned storage, no host-global state, and no unstated hosted service dependency.",
+        "resource_expectation": "Embedding model, agent server state, exported core memory, archival search output, and provider boundaries must be explicit in the artifact.",
         "retry_guidance": [
-          "Create a tiny Docker agent with archival memory search.",
-          "Score core-versus-archival retrieval only after source evidence can be exported."
+          "Create a tiny Docker agent with core memory and archival memory loaded from the ELF core_archival_memory fixtures.",
+          "Export core block readback, archival search results, source ids, and any audit-equivalent metadata as JSON before scoring.",
+          "Score core-versus-archival scenarios only after source evidence can be exported and mapped to the fixture evidence ids."
         ],
-        "research_depth": "D1 feasibility verdict: research_only (XY-882); core/archival reference, adapter not encoded"
+        "research_depth": "D1 feasibility verdict: research_only (XY-882); XY-927 selects the contained export/readback contract, but the Letta adapter remains blocked until that artifact exists"
       }
     },
     {
diff --git a/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/archival_fallback.json b/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/archival_fallback.json
new file mode 100644
index 00000000..b1928711
--- /dev/null
+++ b/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/archival_fallback.json
@@ -0,0 +1,192 @@
+{
+  "schema": "elf.real_world_job/v1",
+  "job_id": "core-archival-archival-fallback-001",
+  "suite": "core_archival_memory",
+  "title": "Fall back to archival notes when core memory is insufficient",
+  "corpus": {
+    "corpus_id": "real-world-memory-core-archival-2026-06-11",
+    "profile": "synthetic",
+    "items": [
+      {
+        "evidence_id": "fallback-core-insufficient",
+        "kind": "core_block",
+        "text": "Core block summary: a rollback runbook exists for single-user production, but this core block intentionally omits the rollback steps.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "archival_fallback",
+            "evidence_id": "fallback-core-insufficient"
+          },
+          "locator": {
+            "quote": "intentionally omits the rollback steps"
+          }
+        },
+        "created_at": "2026-06-11T04:40:00Z"
+      },
+      {
+        "evidence_id": "fallback-archival-runbook",
+        "kind": "runbook",
+        "text": "Archival rollback note: restore the Postgres backup, rebuild Qdrant from Postgres chunk vectors, and verify search recovers the restored note.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "archival_fallback",
+            "evidence_id": "fallback-archival-runbook"
+          },
+          "locator": {
+            "quote": "restore the Postgres backup, rebuild Qdrant from Postgres chunk vectors"
+          }
+        },
+        "created_at": "2026-06-11T04:41:00Z"
+      },
+      {
+        "evidence_id": "fallback-core-only-trap",
+        "kind": "unsupported_claim",
+        "text": "Unsupported shortcut: answer the rollback steps from the core block alone without archival note search.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "archival_fallback",
+            "evidence_id": "fallback-core-only-trap"
+          }
+        },
+        "created_at": "2026-06-11T04:39:00Z"
+      }
+    ],
+    "adapter_response": {
+      "adapter_id": "fixture_core_archival_memory",
+      "answer": {
+        "content": "The core block is insufficient because it says the rollback runbook exists but omits the steps. Fall back to archival note search: restore the Postgres backup, rebuild Qdrant from Postgres chunk vectors, and verify search recovers the restored note.",
+        "claims": [
+          {
+            "claim_id": "core_memory_insufficient",
+            "text": "The core block is insufficient because it omits the rollback steps.",
+            "evidence_ids": ["fallback-core-insufficient"],
+            "confidence": "high"
+          },
+          {
+            "claim_id": "archival_fallback_steps",
+            "text": "The archival rollback steps are restore Postgres, rebuild Qdrant from Postgres vectors, and verify search recovers the restored note.",
+            "evidence_ids": ["fallback-archival-runbook"],
+            "confidence": "high"
+          }
+        ],
+        "evidence_ids": ["fallback-core-insufficient", "fallback-archival-runbook"],
+        "latency_ms": 1.3,
+        "cost": {
+          "currency": "USD",
+          "amount": 0.0,
+          "input_tokens": 0,
+          "output_tokens": 0
+        }
+      }
+    }
+  },
+  "timeline": [
+    {
+      "event_id": "core-rollback-summary-attached",
+      "ts": "2026-06-11T04:40:00Z",
+      "actor": "agent",
+      "action": "attachment_added",
+      "evidence_ids": ["fallback-core-insufficient"],
+      "summary": "A core block pointed at the rollback runbook but did not include the steps."
+    },
+    {
+      "event_id": "archival-rollback-note-recorded",
+      "ts": "2026-06-11T04:41:00Z",
+      "actor": "agent",
+      "action": "recorded_runbook",
+      "evidence_ids": ["fallback-archival-runbook"],
+      "summary": "The detailed rollback steps were recorded as archival note evidence."
+    }
+  ],
+  "prompt": {
+    "role": "user",
+    "content": "The attached core block only says a rollback runbook exists. What are the rollback steps?",
+    "job_mode": "answer",
+    "constraints": ["cite_evidence", "use_archival_fallback", "avoid_core_only_hallucination"]
+  },
+  "expected_answer": {
+    "must_include": [
+      {
+        "claim_id": "core_memory_insufficient",
+        "text": "The core block is insufficient because it omits the rollback steps."
+      },
+      {
+        "claim_id": "archival_fallback_steps",
+        "text": "The archival rollback steps are restore Postgres, rebuild Qdrant from Postgres vectors, and verify search recovers the restored note."
+      }
+    ],
+    "must_not_include": [
+      "answer the rollback steps from the core block alone"
+    ],
+    "evidence_links": {
+      "core_memory_insufficient": ["fallback-core-insufficient"],
+      "archival_fallback_steps": ["fallback-archival-runbook"]
+    },
+    "answer_type": "archival_fallback_answer",
+    "accepted_alternates": [],
+    "requires_caveat": false,
+    "requires_refusal": false
+  },
+  "required_evidence": [
+    {
+      "evidence_id": "fallback-core-insufficient",
+      "claim_id": "core_memory_insufficient",
+      "requirement": "explain",
+      "quote": "intentionally omits the rollback steps"
+    },
+    {
+      "evidence_id": "fallback-archival-runbook",
+      "claim_id": "archival_fallback_steps",
+      "requirement": "cite",
+      "quote": "restore the Postgres backup, rebuild Qdrant from Postgres chunk vectors"
+    }
+  ],
+  "negative_traps": [
+    {
+      "trap_id": "core-only-rollback-hallucination",
+      "type": "unsupported_claim",
+      "evidence_ids": ["fallback-core-only-trap"],
+      "failure_if_used": true
+    }
+  ],
+  "scoring_rubric": {
+    "dimensions": {
+      "answer_correctness": {
+        "weight": 0.3,
+        "max_points": 1.0,
+        "criteria": "Provides the archival rollback steps."
+      },
+      "evidence_grounding": {
+        "weight": 0.3,
+        "max_points": 1.0,
+        "criteria": "Cites both insufficient core memory and archival fallback evidence."
+      },
+      "trap_avoidance": {
+        "weight": 0.2,
+        "max_points": 1.0,
+        "criteria": "Avoids core-only hallucination."
+      },
+      "workflow_helpfulness": {
+        "weight": 0.2,
+        "max_points": 1.0,
+        "criteria": "Makes the fallback path explicit."
+      }
+    },
+    "pass_threshold": 0.85,
+    "hard_fail_rules": [
+      "unsupported high-confidence claim about a required decision or fact",
+      "use of a negative trap marked failure_if_used = true"
+    ]
+  },
+  "allowed_uncertainty": {
+    "can_answer_unknown": false,
+    "acceptable_phrases": ["The fixture does not provide that evidence."],
+    "fallback_action": "state_blocker"
+  },
+  "tags": ["synthetic", "core_archival_memory", "archival_fallback", "rollback", "no_live_claim"]
+}
diff --git a/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/core_block_attachment.json b/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/core_block_attachment.json
new file mode 100644
index 00000000..c1f34487
--- /dev/null
+++ b/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/core_block_attachment.json
@@ -0,0 +1,192 @@
+{
+  "schema": "elf.real_world_job/v1",
+  "job_id": "core-archival-core-block-attachment-001",
+  "suite": "core_archival_memory",
+  "title": "Read an explicitly attached core block without treating it as archival search",
+  "corpus": {
+    "corpus_id": "real-world-memory-core-archival-2026-06-11",
+    "profile": "synthetic",
+    "items": [
+      {
+        "evidence_id": "core-attachment-active",
+        "kind": "core_block",
+        "text": "Core block attachment: key project_style has an active attachment for tenant local-tenant project ELF agent local-agent read_profile private_plus_project.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "core_block_attachment",
+            "evidence_id": "core-attachment-active"
+          },
+          "locator": {
+            "quote": "active attachment for tenant local-tenant project ELF agent local-agent read_profile private_plus_project"
+          }
+        },
+        "created_at": "2026-06-11T04:00:00Z"
+      },
+      {
+        "evidence_id": "core-attachment-not-search",
+        "kind": "core_block_contract",
+        "text": "Core block readback is not archival search; it does not embed, rerank, search Qdrant, create a search session, or record note hits.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "core_block_attachment",
+            "evidence_id": "core-attachment-not-search"
+          },
+          "locator": {
+            "quote": "does not embed, rerank, search Qdrant"
+          }
+        },
+        "created_at": "2026-06-11T04:01:00Z"
+      },
+      {
+        "evidence_id": "core-attachment-qdrant-trap",
+        "kind": "stale_claim",
+        "text": "Stale shortcut: core blocks are indexed into Qdrant and returned as normal archival note search hits.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "core_block_attachment",
+            "evidence_id": "core-attachment-qdrant-trap"
+          }
+        },
+        "created_at": "2026-06-11T03:59:00Z"
+      }
+    ],
+    "adapter_response": {
+      "adapter_id": "fixture_core_archival_memory",
+      "answer": {
+        "content": "Return the project_style core block because it has an active attachment for the exact tenant, project, agent, and private_plus_project read profile. Keep that readback separate from archival search because core blocks do not embed, rerank, search Qdrant, create search sessions, or record note hits.",
+        "claims": [
+          {
+            "claim_id": "attached_core_block_readback",
+            "text": "The project_style core block is returned through its exact active attachment.",
+            "evidence_ids": ["core-attachment-active"],
+            "confidence": "high"
+          },
+          {
+            "claim_id": "core_not_archival_search",
+            "text": "Core block readback is separate from archival search and Qdrant-derived note retrieval.",
+            "evidence_ids": ["core-attachment-not-search"],
+            "confidence": "high"
+          }
+        ],
+        "evidence_ids": ["core-attachment-active", "core-attachment-not-search"],
+        "latency_ms": 1.0,
+        "cost": {
+          "currency": "USD",
+          "amount": 0.0,
+          "input_tokens": 0,
+          "output_tokens": 0
+        }
+      }
+    }
+  },
+  "timeline": [
+    {
+      "event_id": "core-project-style-attached",
+      "ts": "2026-06-11T04:00:00Z",
+      "actor": "agent",
+      "action": "attachment_added",
+      "evidence_ids": ["core-attachment-active"],
+      "summary": "The project_style core block was attached for the exact read profile."
+    },
+    {
+      "event_id": "core-archival-boundary-recorded",
+      "ts": "2026-06-11T04:01:00Z",
+      "actor": "agent",
+      "action": "recorded_contract",
+      "evidence_ids": ["core-attachment-not-search"],
+      "summary": "The core block readback boundary was recorded separately from archival search."
+    }
+  ],
+  "prompt": {
+    "role": "user",
+    "content": "Which always-loaded project style block is attached for this agent, and should it appear as a normal archival search hit?",
+    "job_mode": "answer",
+    "constraints": ["cite_evidence", "separate_core_from_archival_search", "avoid_qdrant_core_block_claims"]
+  },
+  "expected_answer": {
+    "must_include": [
+      {
+        "claim_id": "attached_core_block_readback",
+        "text": "The project_style core block is returned through its exact active attachment."
+      },
+      {
+        "claim_id": "core_not_archival_search",
+        "text": "Core block readback is separate from archival search and Qdrant-derived note retrieval."
+      }
+    ],
+    "must_not_include": [
+      "core blocks are indexed into Qdrant and returned as normal archival note search hits"
+    ],
+    "evidence_links": {
+      "attached_core_block_readback": ["core-attachment-active"],
+      "core_not_archival_search": ["core-attachment-not-search"]
+    },
+    "answer_type": "direct_answer",
+    "accepted_alternates": [],
+    "requires_caveat": false,
+    "requires_refusal": false
+  },
+  "required_evidence": [
+    {
+      "evidence_id": "core-attachment-active",
+      "claim_id": "attached_core_block_readback",
+      "requirement": "cite",
+      "quote": "active attachment for tenant local-tenant project ELF agent local-agent read_profile private_plus_project"
+    },
+    {
+      "evidence_id": "core-attachment-not-search",
+      "claim_id": "core_not_archival_search",
+      "requirement": "cite",
+      "quote": "does not embed, rerank, search Qdrant"
+    }
+  ],
+  "negative_traps": [
+    {
+      "trap_id": "qdrant-core-block-search-hit",
+      "type": "stale_fact",
+      "evidence_ids": ["core-attachment-qdrant-trap"],
+      "failure_if_used": true
+    }
+  ],
+  "scoring_rubric": {
+    "dimensions": {
+      "answer_correctness": {
+        "weight": 0.3,
+        "max_points": 1.0,
+        "criteria": "Identifies the attached core block."
+      },
+      "evidence_grounding": {
+        "weight": 0.3,
+        "max_points": 1.0,
+        "criteria": "Cites attachment and core-search boundary evidence."
+      },
+      "trap_avoidance": {
+        "weight": 0.2,
+        "max_points": 1.0,
+        "criteria": "Avoids indexing core blocks into Qdrant-backed archival search."
+      },
+      "lifecycle_behavior": {
+        "weight": 0.2,
+        "max_points": 1.0,
+        "criteria": "Preserves explicit attachment semantics."
+      }
+    },
+    "pass_threshold": 0.85,
+    "hard_fail_rules": [
+      "unsupported high-confidence claim about a required decision or fact",
+      "use of a negative trap marked failure_if_used = true"
+    ]
+  },
+  "allowed_uncertainty": {
+    "can_answer_unknown": false,
+    "acceptable_phrases": ["The fixture does not provide that evidence."],
+    "fallback_action": "state_blocker"
+  },
+  "tags": ["synthetic", "core_archival_memory", "core_block", "attachment", "no_live_claim"]
+}
diff --git a/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/core_block_provenance.json b/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/core_block_provenance.json
new file mode 100644
index 00000000..f1fd4f92
--- /dev/null
+++ b/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/core_block_provenance.json
@@ -0,0 +1,192 @@
+{
+  "schema": "elf.real_world_job/v1",
+  "job_id": "core-archival-core-block-provenance-001",
+  "suite": "core_archival_memory",
+  "title": "Return source refs and audit events for core block assertions",
+  "corpus": {
+    "corpus_id": "real-world-memory-core-archival-2026-06-11",
+    "profile": "synthetic",
+    "items": [
+      {
+        "evidence_id": "core-provenance-source-ref",
+        "kind": "core_block",
+        "text": "Provenance evidence: core block release_policy returns source_ref schema source_ref/v1 with resolver real_world_job_fixture/v1 and locator quote retained for reviewer inspection.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "core_block_provenance",
+            "evidence_id": "core-provenance-source-ref"
+          },
+          "locator": {
+            "quote": "source_ref schema source_ref/v1"
+          }
+        },
+        "created_at": "2026-06-11T04:20:00Z"
+      },
+      {
+        "evidence_id": "core-provenance-audit-events",
+        "kind": "core_block_event",
+        "text": "Audit evidence: release_policy has append-only events block_created, block_updated, and attachment_added returned in audit_history.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "core_block_provenance",
+            "evidence_id": "core-provenance-audit-events"
+          },
+          "locator": {
+            "quote": "block_created, block_updated, and attachment_added"
+          }
+        },
+        "created_at": "2026-06-11T04:21:00Z"
+      },
+      {
+        "evidence_id": "core-provenance-trusted-memory-trap",
+        "kind": "stale_claim",
+        "text": "Stale shortcut: always-loaded core memory is trusted without returning source_ref or audit_history.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "core_block_provenance",
+            "evidence_id": "core-provenance-trusted-memory-trap"
+          }
+        },
+        "created_at": "2026-06-11T04:19:00Z"
+      }
+    ],
+    "adapter_response": {
+      "adapter_id": "fixture_core_archival_memory",
+      "answer": {
+        "content": "The release_policy core block must return its source_ref with source_ref/v1 resolver data and retain the locator quote for inspection. Its provenance also includes append-only block_created, block_updated, and attachment_added events in audit_history.",
+        "claims": [
+          {
+            "claim_id": "core_source_ref_returned",
+            "text": "The release_policy core block returns source_ref/v1 provenance.",
+            "evidence_ids": ["core-provenance-source-ref"],
+            "confidence": "high"
+          },
+          {
+            "claim_id": "core_audit_history_returned",
+            "text": "The release_policy core block returns block_created, block_updated, and attachment_added audit events.",
+            "evidence_ids": ["core-provenance-audit-events"],
+            "confidence": "high"
+          }
+        ],
+        "evidence_ids": ["core-provenance-source-ref", "core-provenance-audit-events"],
+        "latency_ms": 1.0,
+        "cost": {
+          "currency": "USD",
+          "amount": 0.0,
+          "input_tokens": 0,
+          "output_tokens": 0
+        }
+      }
+    }
+  },
+  "timeline": [
+    {
+      "event_id": "core-release-policy-created",
+      "ts": "2026-06-11T04:20:00Z",
+      "actor": "agent",
+      "action": "block_created",
+      "evidence_ids": ["core-provenance-source-ref"],
+      "summary": "The release_policy block was created with a source_ref pointer."
+    },
+    {
+      "event_id": "core-release-policy-attached",
+      "ts": "2026-06-11T04:21:00Z",
+      "actor": "agent",
+      "action": "attachment_added",
+      "evidence_ids": ["core-provenance-audit-events"],
+      "summary": "The release_policy block attachment event was added to audit history."
+    }
+  ],
+  "prompt": {
+    "role": "user",
+    "content": "What provenance should a returned core release_policy block include?",
+    "job_mode": "answer",
+    "constraints": ["cite_evidence", "include_source_ref", "include_audit_history"]
+  },
+  "expected_answer": {
+    "must_include": [
+      {
+        "claim_id": "core_source_ref_returned",
+        "text": "The release_policy core block returns source_ref/v1 provenance."
+      },
+      {
+        "claim_id": "core_audit_history_returned",
+        "text": "The release_policy core block returns block_created, block_updated, and attachment_added audit events."
+      }
+    ],
+    "must_not_include": [
+      "always-loaded core memory is trusted without returning source_ref or audit_history"
+    ],
+    "evidence_links": {
+      "core_source_ref_returned": ["core-provenance-source-ref"],
+      "core_audit_history_returned": ["core-provenance-audit-events"]
+    },
+    "answer_type": "provenance_bundle",
+    "accepted_alternates": [],
+    "requires_caveat": false,
+    "requires_refusal": false
+  },
+  "required_evidence": [
+    {
+      "evidence_id": "core-provenance-source-ref",
+      "claim_id": "core_source_ref_returned",
+      "requirement": "cite",
+      "quote": "source_ref schema source_ref/v1"
+    },
+    {
+      "evidence_id": "core-provenance-audit-events",
+      "claim_id": "core_audit_history_returned",
+      "requirement": "cite",
+      "quote": "block_created, block_updated, and attachment_added"
+    }
+  ],
+  "negative_traps": [
+    {
+      "trap_id": "trusted-core-no-provenance",
+      "type": "unsupported_claim",
+      "evidence_ids": ["core-provenance-trusted-memory-trap"],
+      "failure_if_used": true
+    }
+  ],
+  "scoring_rubric": {
+    "dimensions": {
+      "answer_correctness": {
+        "weight": 0.25,
+        "max_points": 1.0,
+        "criteria": "States the returned provenance fields."
+      },
+      "evidence_grounding": {
+        "weight": 0.35,
+        "max_points": 1.0,
+        "criteria": "Cites source_ref and audit-history evidence."
+      },
+      "trap_avoidance": {
+        "weight": 0.2,
+        "max_points": 1.0,
+        "criteria": "Avoids trusted-without-provenance claims."
+      },
+      "workflow_helpfulness": {
+        "weight": 0.2,
+        "max_points": 1.0,
+        "criteria": "Answers in a reviewer-usable provenance bundle shape."
+      }
+    },
+    "pass_threshold": 0.85,
+    "hard_fail_rules": [
+      "unsupported high-confidence claim about a required decision or fact",
+      "use of a negative trap marked failure_if_used = true"
+    ]
+  },
+  "allowed_uncertainty": {
+    "can_answer_unknown": false,
+    "acceptable_phrases": ["The fixture does not provide that evidence."],
+    "fallback_action": "state_blocker"
+  },
+  "tags": ["synthetic", "core_archival_memory", "provenance", "audit_history", "source_ref"]
+}
diff --git a/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/core_block_scope.json b/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/core_block_scope.json
new file mode 100644
index 00000000..3b379b85
--- /dev/null
+++ b/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/core_block_scope.json
@@ -0,0 +1,192 @@
+{
+  "schema": "elf.real_world_job/v1",
+  "job_id": "core-archival-core-block-scope-001",
+  "suite": "core_archival_memory",
+  "title": "Apply core block scope and private-owner checks before readback",
+  "corpus": {
+    "corpus_id": "real-world-memory-core-archival-2026-06-11",
+    "profile": "synthetic",
+    "items": [
+      {
+        "evidence_id": "core-scope-project-shared-readable",
+        "kind": "core_block",
+        "text": "Scope evidence: project_shared block release_gate is readable for tenant local-tenant project ELF agent local-agent only when the active attachment and read_profile all_scopes allow project_shared.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "core_block_scope",
+            "evidence_id": "core-scope-project-shared-readable"
+          },
+          "locator": {
+            "quote": "active attachment and read_profile all_scopes allow project_shared"
+          }
+        },
+        "created_at": "2026-06-11T04:10:00Z"
+      },
+      {
+        "evidence_id": "core-scope-private-owner",
+        "kind": "core_block",
+        "text": "Private owner evidence: agent_private block agent_a_workflow belongs to agent-a and must not be returned to agent-b even if agent-b has a matching read_profile label.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "core_block_scope",
+            "evidence_id": "core-scope-private-owner"
+          },
+          "locator": {
+            "quote": "must not be returned to agent-b"
+          }
+        },
+        "created_at": "2026-06-11T04:11:00Z"
+      },
+      {
+        "evidence_id": "core-scope-bypass-trap",
+        "kind": "stale_claim",
+        "text": "Stale shortcut: a core block attachment bypasses read_profile scope checks, private-owner checks, and shared grants.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "core_block_scope",
+            "evidence_id": "core-scope-bypass-trap"
+          }
+        },
+        "created_at": "2026-06-11T04:09:00Z"
+      }
+    ],
+    "adapter_response": {
+      "adapter_id": "fixture_core_archival_memory",
+      "answer": {
+        "content": "Return the release_gate core block only when the active attachment and all_scopes read profile allow project_shared. Do not return agent_a_workflow to agent-b, because private-owner checks still apply to agent_private core blocks.",
+        "claims": [
+          {
+            "claim_id": "shared_core_scope_allowed",
+            "text": "The project_shared release_gate block is readable only when attachment and read_profile allow project_shared.",
+            "evidence_ids": ["core-scope-project-shared-readable"],
+            "confidence": "high"
+          },
+          {
+            "claim_id": "private_core_scope_denied",
+            "text": "The agent_private agent_a_workflow block must not be returned to agent-b.",
+            "evidence_ids": ["core-scope-private-owner"],
+            "confidence": "high"
+          }
+        ],
+        "evidence_ids": ["core-scope-project-shared-readable", "core-scope-private-owner"],
+        "latency_ms": 1.1,
+        "cost": {
+          "currency": "USD",
+          "amount": 0.0,
+          "input_tokens": 0,
+          "output_tokens": 0
+        }
+      }
+    }
+  },
+  "timeline": [
+    {
+      "event_id": "core-release-gate-shared",
+      "ts": "2026-06-11T04:10:00Z",
+      "actor": "agent",
+      "action": "attachment_added",
+      "evidence_ids": ["core-scope-project-shared-readable"],
+      "summary": "The release_gate block was attached with project_shared scope."
+    },
+    {
+      "event_id": "core-agent-a-private",
+      "ts": "2026-06-11T04:11:00Z",
+      "actor": "agent-a",
+      "action": "block_created",
+      "evidence_ids": ["core-scope-private-owner"],
+      "summary": "The agent_a_workflow block remained private to agent-a."
+    }
+  ],
+  "prompt": {
+    "role": "user",
+    "content": "For core memory readback, which shared block can this agent see, and can agent-b also see agent-a's private block?",
+    "job_mode": "answer",
+    "constraints": ["cite_evidence", "enforce_scope", "avoid_private_owner_leakage"]
+  },
+  "expected_answer": {
+    "must_include": [
+      {
+        "claim_id": "shared_core_scope_allowed",
+        "text": "The project_shared release_gate block is readable only when attachment and read_profile allow project_shared."
+      },
+      {
+        "claim_id": "private_core_scope_denied",
+        "text": "The agent_private agent_a_workflow block must not be returned to agent-b."
+      }
+    ],
+    "must_not_include": [
+      "a core block attachment bypasses read_profile scope checks"
+    ],
+    "evidence_links": {
+      "shared_core_scope_allowed": ["core-scope-project-shared-readable"],
+      "private_core_scope_denied": ["core-scope-private-owner"]
+    },
+    "answer_type": "direct_answer",
+    "accepted_alternates": [],
+    "requires_caveat": false,
+    "requires_refusal": false
+  },
+  "required_evidence": [
+    {
+      "evidence_id": "core-scope-project-shared-readable",
+      "claim_id": "shared_core_scope_allowed",
+      "requirement": "cite",
+      "quote": "active attachment and read_profile all_scopes allow project_shared"
+    },
+    {
+      "evidence_id": "core-scope-private-owner",
+      "claim_id": "private_core_scope_denied",
+      "requirement": "cite",
+      "quote": "must not be returned to agent-b"
+    }
+  ],
+  "negative_traps": [
+    {
+      "trap_id": "core-attachment-bypasses-scope",
+      "type": "scope_leak",
+      "evidence_ids": ["core-scope-bypass-trap"],
+      "failure_if_used": true
+    }
+  ],
+  "scoring_rubric": {
+    "dimensions": {
+      "answer_correctness": {
+        "weight": 0.3,
+        "max_points": 1.0,
+        "criteria": "Applies readable shared scope and denied private owner scope."
+      },
+      "evidence_grounding": {
+        "weight": 0.25,
+        "max_points": 1.0,
+        "criteria": "Cites scope and private-owner evidence."
+      },
+      "trap_avoidance": {
+        "weight": 0.25,
+        "max_points": 1.0,
+        "criteria": "Avoids scope-bypass claims."
+      },
+      "ownership_correctness": {
+        "weight": 0.2,
+        "max_points": 1.0,
+        "criteria": "Does not leak private core blocks across agents."
+      }
+    },
+    "pass_threshold": 0.85,
+    "hard_fail_rules": [
+      "unsupported high-confidence claim about a required decision or fact",
+      "use of a negative trap marked failure_if_used = true"
+    ]
+  },
+  "allowed_uncertainty": {
+    "can_answer_unknown": false,
+    "acceptable_phrases": ["The fixture does not provide that evidence."],
+    "fallback_action": "state_blocker"
+  },
+  "tags": ["synthetic", "core_archival_memory", "scope", "private_owner", "no_live_claim"]
+}
diff --git a/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/project_decision_recovery.json b/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/project_decision_recovery.json
new file mode 100644
index 00000000..229ecc34
--- /dev/null
+++ b/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/project_decision_recovery.json
@@ -0,0 +1,230 @@
+{
+  "schema": "elf.real_world_job/v1",
+  "job_id": "core-archival-project-decision-recovery-001",
+  "suite": "core_archival_memory",
+  "title": "Recover a project decision from core routing and archival rationale",
+  "corpus": {
+    "corpus_id": "real-world-memory-core-archival-2026-06-11",
+    "profile": "synthetic",
+    "items": [
+      {
+        "evidence_id": "decision-core-routing-block",
+        "kind": "core_block",
+        "text": "Core decision routing block: keep the benchmark outcome policy always attached and route detailed rationale to archival notes.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "project_decision_recovery",
+            "evidence_id": "decision-core-routing-block"
+          },
+          "locator": {
+            "quote": "route detailed rationale to archival notes"
+          }
+        },
+        "created_at": "2026-06-11T04:50:00Z"
+      },
+      {
+        "evidence_id": "decision-archival-outcome-policy",
+        "kind": "decision",
+        "text": "Archival decision record: scenario outcomes use win, tie, loss, not_tested, blocked, or non_goal only when scenario evidence supports them.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "project_decision_recovery",
+            "evidence_id": "decision-archival-outcome-policy"
+          },
+          "locator": {
+            "quote": "use win, tie, loss, not_tested, blocked, or non_goal only when scenario evidence supports them"
+          }
+        },
+        "created_at": "2026-06-11T04:51:00Z"
+      },
+      {
+        "evidence_id": "decision-archival-core-search-boundary",
+        "kind": "decision",
+        "text": "Archival project decision: core blocks stay separate from archival note search and Qdrant-derived retrieval.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "project_decision_recovery",
+            "evidence_id": "decision-archival-core-search-boundary"
+          },
+          "locator": {
+            "quote": "core blocks stay separate from archival note search"
+          }
+        },
+        "created_at": "2026-06-11T04:52:00Z"
+      },
+      {
+        "evidence_id": "decision-letta-win-trap",
+        "kind": "unsupported_claim",
+        "text": "Wrong claim: Letta comparison can be scored as an ELF win because ELF has core blocks.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "project_decision_recovery",
+            "evidence_id": "decision-letta-win-trap"
+          }
+        },
+        "created_at": "2026-06-11T04:49:00Z"
+      }
+    ],
+    "adapter_response": {
+      "adapter_id": "fixture_core_archival_memory",
+      "answer": {
+        "content": "Use the always-attached core routing block to find the benchmark outcome policy, then cite archival notes for the detailed decision. The archival decision says to use win, tie, loss, not_tested, blocked, or non_goal only when scenario evidence supports them. It also says core blocks stay separate from archival note search and Qdrant-derived retrieval, so no ELF-over-Letta claim follows from ELF having core blocks.",
+        "claims": [
+          {
+            "claim_id": "core_routes_to_archival_rationale",
+            "text": "The core routing block points detailed decision rationale to archival notes.",
+            "evidence_ids": ["decision-core-routing-block"],
+            "confidence": "high"
+          },
+          {
+            "claim_id": "outcomes_require_evidence",
+            "text": "Scenario outcomes use win, tie, loss, not_tested, blocked, or non_goal only when evidence supports them.",
+            "evidence_ids": ["decision-archival-outcome-policy"],
+            "confidence": "high"
+          },
+          {
+            "claim_id": "core_archival_boundary_preserved",
+            "text": "Core blocks stay separate from archival note search and Qdrant-derived retrieval.",
+            "evidence_ids": ["decision-archival-core-search-boundary"],
+            "confidence": "high"
+          }
+        ],
+        "evidence_ids": [
+          "decision-core-routing-block",
+          "decision-archival-outcome-policy",
+          "decision-archival-core-search-boundary"
+        ],
+        "latency_ms": 1.4,
+        "cost": {
+          "currency": "USD",
+          "amount": 0.0,
+          "input_tokens": 0,
+          "output_tokens": 0
+        }
+      }
+    }
+  },
+  "timeline": [
+    {
+      "event_id": "decision-routing-core-attached",
+      "ts": "2026-06-11T04:50:00Z",
+      "actor": "agent",
+      "action": "attachment_added",
+      "evidence_ids": ["decision-core-routing-block"],
+      "summary": "A core block kept the outcome-policy routing pointer always attached."
+    },
+    {
+      "event_id": "decision-outcome-policy-archived",
+      "ts": "2026-06-11T04:51:00Z",
+      "actor": "agent",
+      "action": "recorded_decision",
+      "evidence_ids": ["decision-archival-outcome-policy", "decision-archival-core-search-boundary"],
+      "summary": "Archival notes recorded the detailed outcome policy and core-search boundary."
+    }
+  ],
+  "prompt": {
+    "role": "user",
+    "content": "What is the benchmark outcome policy, and does having ELF core blocks make Letta a measured loss?",
+    "job_mode": "decide",
+    "constraints": ["cite_evidence", "recover_project_decision", "avoid_unsupported_letta_claims"]
+  },
+  "expected_answer": {
+    "must_include": [
+      {
+        "claim_id": "core_routes_to_archival_rationale",
+        "text": "The core routing block points detailed decision rationale to archival notes."
+      },
+      {
+        "claim_id": "outcomes_require_evidence",
+        "text": "Scenario outcomes use win, tie, loss, not_tested, blocked, or non_goal only when evidence supports them."
+      },
+      {
+        "claim_id": "core_archival_boundary_preserved",
+        "text": "Core blocks stay separate from archival note search and Qdrant-derived retrieval."
+      }
+    ],
+    "must_not_include": [
+      "Letta comparison can be scored as an ELF win because ELF has core blocks"
+    ],
+    "evidence_links": {
+      "core_routes_to_archival_rationale": ["decision-core-routing-block"],
+      "outcomes_require_evidence": ["decision-archival-outcome-policy"],
+      "core_archival_boundary_preserved": ["decision-archival-core-search-boundary"]
+    },
+    "answer_type": "decision_record",
+    "accepted_alternates": [],
+    "requires_caveat": false,
+    "requires_refusal": false
+  },
+  "required_evidence": [
+    {
+      "evidence_id": "decision-core-routing-block",
+      "claim_id": "core_routes_to_archival_rationale",
+      "requirement": "cite",
+      "quote": "route detailed rationale to archival notes"
+    },
+    {
+      "evidence_id": "decision-archival-outcome-policy",
+      "claim_id": "outcomes_require_evidence",
+      "requirement": "cite",
+      "quote": "use win, tie, loss, not_tested, blocked, or non_goal only when scenario evidence supports them"
+    },
+    {
+      "evidence_id": "decision-archival-core-search-boundary",
+      "claim_id": "core_archival_boundary_preserved",
+      "requirement": "cite",
+      "quote": "core blocks stay separate from archival note search"
+    }
+  ],
+  "negative_traps": [
+    {
+      "trap_id": "unsupported-letta-loss-from-elf-core",
+      "type": "unsupported_claim",
+      "evidence_ids": ["decision-letta-win-trap"],
+      "failure_if_used": true
+    }
+  ],
+  "scoring_rubric": {
+    "dimensions": {
+      "answer_correctness": {
+        "weight": 0.25,
+        "max_points": 1.0,
+        "criteria": "Recovers the benchmark outcome policy."
+      },
+      "evidence_grounding": {
+        "weight": 0.3,
+        "max_points": 1.0,
+        "criteria": "Cites core routing and archival decision evidence."
+      },
+      "trap_avoidance": {
+        "weight": 0.25,
+        "max_points": 1.0,
+        "criteria": "Avoids an unsupported Letta win or loss claim."
+      },
+      "workflow_helpfulness": {
+        "weight": 0.2,
+        "max_points": 1.0,
+        "criteria": "Explains how core memory and archival decision evidence work together."
+      }
+    },
+    "pass_threshold": 0.85,
+    "hard_fail_rules": [
+      "unsupported high-confidence claim about a required decision or fact",
+      "use of a negative trap marked failure_if_used = true"
+    ]
+  },
+  "allowed_uncertainty": {
+    "can_answer_unknown": false,
+    "acceptable_phrases": ["The fixture does not provide that evidence."],
+    "fallback_action": "state_blocker"
+  },
+  "tags": ["synthetic", "core_archival_memory", "project_decisions", "letta_boundary", "no_live_claim"]
+}
diff --git a/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/stale_core_detection.json b/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/stale_core_detection.json
new file mode 100644
index 00000000..084c26cb
--- /dev/null
+++ b/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/stale_core_detection.json
@@ -0,0 +1,206 @@
+{
+  "schema": "elf.real_world_job/v1",
+  "job_id": "core-archival-stale-core-detection-001",
+  "suite": "core_archival_memory",
+  "title": "Detect a stale core block when archival evidence supersedes it",
+  "corpus": {
+    "corpus_id": "real-world-memory-core-archival-2026-06-11",
+    "profile": "synthetic",
+    "items": [
+      {
+        "evidence_id": "stale-core-validation-gate",
+        "kind": "core_block",
+        "text": "Stale core block: the validation gate is cargo make lint and cargo make test.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "stale_core_detection",
+            "evidence_id": "stale-core-validation-gate"
+          }
+        },
+        "created_at": "2026-06-10T10:00:00Z"
+      },
+      {
+        "evidence_id": "archival-current-validation-gate",
+        "kind": "decision",
+        "text": "Archival decision update: before pushing a refreshed PR head, run cargo make fmt, cargo make lint-fix, and cargo make checks.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "stale_core_detection",
+            "evidence_id": "archival-current-validation-gate"
+          },
+          "locator": {
+            "quote": "cargo make fmt, cargo make lint-fix, and cargo make checks"
+          }
+        },
+        "created_at": "2026-06-11T04:30:00Z"
+      },
+      {
+        "evidence_id": "archival-supersedes-core-rationale",
+        "kind": "decision",
+        "text": "Rationale: archival note evidence supersedes the attached core block until the core block is updated from source-of-truth state.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "stale_core_detection",
+            "evidence_id": "archival-supersedes-core-rationale"
+          },
+          "locator": {
+            "quote": "supersedes the attached core block"
+          }
+        },
+        "created_at": "2026-06-11T04:31:00Z"
+      },
+      {
+        "evidence_id": "stale-core-answer-trap",
+        "kind": "stale_claim",
+        "text": "Wrong answer trap: cite the core block as current and skip archival evidence.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "stale_core_detection",
+            "evidence_id": "stale-core-answer-trap"
+          }
+        },
+        "created_at": "2026-06-11T04:29:00Z"
+      }
+    ],
+    "adapter_response": {
+      "adapter_id": "fixture_core_archival_memory",
+      "answer": {
+        "content": "Treat the attached validation-gate core block as stale. The current archival decision says to run cargo make fmt, cargo make lint-fix, and cargo make checks before pushing a refreshed PR head, and the archival rationale says that evidence supersedes the core block until it is updated from source-of-truth state.",
+        "claims": [
+          {
+            "claim_id": "stale_core_detected",
+            "text": "The attached validation-gate core block is stale.",
+            "evidence_ids": ["archival-supersedes-core-rationale"],
+            "confidence": "high"
+          },
+          {
+            "claim_id": "archival_current_gate",
+            "text": "The current archival validation gate is cargo make fmt, cargo make lint-fix, and cargo make checks.",
+            "evidence_ids": ["archival-current-validation-gate"],
+            "confidence": "high"
+          }
+        ],
+        "evidence_ids": ["archival-current-validation-gate", "archival-supersedes-core-rationale"],
+        "latency_ms": 1.2,
+        "cost": {
+          "currency": "USD",
+          "amount": 0.0,
+          "input_tokens": 0,
+          "output_tokens": 0
+        }
+      }
+    }
+  },
+  "timeline": [
+    {
+      "event_id": "old-core-gate-attached",
+      "ts": "2026-06-10T10:00:00Z",
+      "actor": "agent",
+      "action": "block_created",
+      "evidence_ids": ["stale-core-validation-gate"],
+      "summary": "A core block recorded an old validation gate."
+    },
+    {
+      "event_id": "archival-gate-updated",
+      "ts": "2026-06-11T04:30:00Z",
+      "actor": "agent",
+      "action": "updated_decision",
+      "evidence_ids": ["archival-current-validation-gate", "archival-supersedes-core-rationale"],
+      "summary": "Archival evidence superseded the old core validation gate."
+    }
+  ],
+  "prompt": {
+    "role": "user",
+    "content": "The attached core block says the gate is lint and test. Is that still current before a refreshed PR push?",
+    "job_mode": "answer",
+    "constraints": ["cite_evidence", "detect_stale_core", "prefer_current_archival_evidence"]
+  },
+  "expected_answer": {
+    "must_include": [
+      {
+        "claim_id": "stale_core_detected",
+        "text": "The attached validation-gate core block is stale."
+      },
+      {
+        "claim_id": "archival_current_gate",
+        "text": "The current archival validation gate is cargo make fmt, cargo make lint-fix, and cargo make checks."
+      }
+    ],
+    "must_not_include": [
+      "the validation gate is cargo make lint and cargo make test"
+    ],
+    "evidence_links": {
+      "stale_core_detected": ["archival-supersedes-core-rationale"],
+      "archival_current_gate": ["archival-current-validation-gate"]
+    },
+    "answer_type": "current_state_with_stale_core_caveat",
+    "accepted_alternates": [],
+    "requires_caveat": false,
+    "requires_refusal": false
+  },
+  "required_evidence": [
+    {
+      "evidence_id": "archival-current-validation-gate",
+      "claim_id": "archival_current_gate",
+      "requirement": "cite",
+      "quote": "cargo make fmt, cargo make lint-fix, and cargo make checks"
+    },
+    {
+      "evidence_id": "archival-supersedes-core-rationale",
+      "claim_id": "stale_core_detected",
+      "requirement": "explain",
+      "quote": "supersedes the attached core block"
+    }
+  ],
+  "negative_traps": [
+    {
+      "trap_id": "stale-core-current-answer",
+      "type": "stale_fact",
+      "evidence_ids": ["stale-core-validation-gate", "stale-core-answer-trap"],
+      "failure_if_used": true
+    }
+  ],
+  "scoring_rubric": {
+    "dimensions": {
+      "answer_correctness": {
+        "weight": 0.3,
+        "max_points": 1.0,
+        "criteria": "States that the attached core block is stale."
+      },
+      "evidence_grounding": {
+        "weight": 0.3,
+        "max_points": 1.0,
+        "criteria": "Cites current archival evidence and supersession rationale."
+      },
+      "trap_avoidance": {
+        "weight": 0.2,
+        "max_points": 1.0,
+        "criteria": "Avoids answering from stale core memory."
+      },
+      "lifecycle_behavior": {
+        "weight": 0.2,
+        "max_points": 1.0,
+        "criteria": "Detects stale core state when archival evidence supersedes it."
+      }
+    },
+    "pass_threshold": 0.85,
+    "hard_fail_rules": [
+      "unsupported high-confidence claim about a required decision or fact",
+      "use of a negative trap marked failure_if_used = true"
+    ]
+  },
+  "allowed_uncertainty": {
+    "can_answer_unknown": false,
+    "acceptable_phrases": ["The fixture does not provide that evidence."],
+    "fallback_action": "state_blocker"
+  },
+  "tags": ["synthetic", "core_archival_memory", "stale_core", "archival_supersession", "no_live_claim"]
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark.rs b/apps/elf-eval/src/bin/real_world_job_benchmark.rs
index a167d2bd..a8bd3973 100644
--- a/apps/elf-eval/src/bin/real_world_job_benchmark.rs
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark.rs
@@ -54,6 +54,7 @@ const SUITES: &[&str] = &[
 	"capture_integration",
 	"production_ops",
 	"personalization",
+	"core_archival_memory",
 ];
 
 #[derive(Debug, Parser)]
diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
index a8c7e927..2300565b 100644
--- a/apps/elf-eval/tests/real_world_job_benchmark.rs
+++ b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -60,6 +60,10 @@ fn production_ops_fixture_dir() -> PathBuf {
 	real_world_memory_fixture_dir().join("production_ops")
 }
 
+fn core_archival_memory_fixture_dir() -> PathBuf {
+	real_world_memory_fixture_dir().join("core_archival_memory")
+}
+
 fn workspace_root() -> Result<PathBuf> {
 	let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR"));
 	let root = manifest_dir
@@ -373,7 +377,7 @@ fn external_adapter_run_summarizes_nonzero_scenario_losses() -> Result<()> {
 		report
 			.pointer("/external_adapters/summary/scenario_position_counts/untested")
 			.and_then(Value::as_u64),
-		Some(10)
+		Some(16)
 	);
 	assert_eq!(
 		report
@@ -385,7 +389,7 @@ fn external_adapter_run_summarizes_nonzero_scenario_losses() -> Result<()> {
 		report
 			.pointer("/external_adapters/summary/scenario_outcome_counts/not_tested")
 			.and_then(Value::as_u64),
-		Some(7)
+		Some(11)
 	);
 
 	let adapters = array_at(&report, "/external_adapters/adapters")?;
@@ -472,13 +476,13 @@ fn assert_external_adapter_manifest_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/overall_status_counts/blocked")
 			.and_then(Value::as_u64),
-		Some(5)
+		Some(6)
 	);
 	assert_eq!(
 		report
 			.pointer("/external_adapters/summary/overall_status_counts/not_encoded")
 			.and_then(Value::as_u64),
-		Some(7)
+		Some(6)
 	);
 	assert_eq!(
 		report
@@ -496,7 +500,7 @@ fn assert_external_adapter_manifest_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/suite_status_counts/blocked")
 			.and_then(Value::as_u64),
-		Some(13)
+		Some(14)
 	);
 	assert_eq!(
 		report
@@ -531,7 +535,7 @@ fn assert_external_adapter_manifest_scenario_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/scenario_status_counts/blocked")
 			.and_then(Value::as_u64),
-		Some(2)
+		Some(4)
 	);
 	assert_eq!(
 		report
@@ -561,7 +565,7 @@ fn assert_external_adapter_manifest_scenario_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/scenario_status_counts/not_encoded")
 			.and_then(Value::as_u64),
-		Some(3)
+		Some(7)
 	);
 	assert_eq!(
 		report
@@ -585,7 +589,7 @@ fn assert_external_adapter_manifest_scenario_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/scenario_position_counts/untested")
 			.and_then(Value::as_u64),
-		Some(11)
+		Some(17)
 	);
 	assert_eq!(
 		report
@@ -609,13 +613,13 @@ fn assert_external_adapter_manifest_scenario_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/scenario_outcome_counts/not_tested")
 			.and_then(Value::as_u64),
-		Some(8)
+		Some(12)
 	);
 	assert_eq!(
 		report
 			.pointer("/external_adapters/summary/scenario_outcome_counts/blocked")
 			.and_then(Value::as_u64),
-		Some(1)
+		Some(3)
 	);
 	assert_eq!(
 		report
@@ -645,6 +649,7 @@ fn assert_external_adapter_manifest_records(report: &Value) -> Result<()> {
 	let graphify = find_by_field(adapters, "/adapter_id", "graphify_docker_smoke")?;
 	let qmd_deep = find_by_field(adapters, "/adapter_id", "qmd_deep_profile_gate")?;
 	let openviking_deep = find_by_field(adapters, "/adapter_id", "openviking_deep_profile_gate")?;
+	let letta = find_by_field(adapters, "/adapter_id", "letta_research_gate")?;
 
 	assert_eq!(elf.pointer("/evidence_class").and_then(Value::as_str), Some("fixture_backed"));
 	assert_eq!(elf.pointer("/overall_status").and_then(Value::as_str), Some("blocked"));
@@ -678,6 +683,36 @@ fn assert_external_adapter_manifest_records(report: &Value) -> Result<()> {
 	assert_first_generation_adapter_records(agentmemory, mem0, memsearch, claude_mem);
 
 	assert_eq!(openviking.pointer("/overall_status").and_then(Value::as_str), Some("wrong_result"));
+
+	assert_graph_rag_research_gate_records(ragflow, lightrag, graphrag);
+	assert_graphiti_zep_adapter(graphiti_zep);
+	assert_graphify_adapter(graphify)?;
+	assert_letta_core_archival_gate(letta)?;
+
+	assert_eq!(
+		qmd_deep.pointer("/capabilities/2/status").and_then(Value::as_str),
+		Some("unsupported")
+	);
+	assert_eq!(
+		qmd_deep.pointer("/result/artifact").and_then(Value::as_str),
+		Some("docs/research/2026-06-11-qmd-openviking-strength-profile-report.json")
+	);
+	assert_eq!(
+		openviking_deep.pointer("/adapter_kind").and_then(Value::as_str),
+		Some("docker_local_embed_context_trajectory_gate")
+	);
+
+	assert_openviking_deep_profile_gate(openviking_deep);
+
+	assert_eq!(
+		openviking_deep.pointer("/result/artifact").and_then(Value::as_str),
+		Some("docs/research/2026-06-11-qmd-openviking-strength-profile-report.json")
+	);
+
+	Ok(())
+}
+
+fn assert_graph_rag_research_gate_records(ragflow: &Value, lightrag: &Value, graphrag: &Value) {
 	assert_eq!(ragflow.pointer("/evidence_class").and_then(Value::as_str), Some("research_gate"));
 	assert_eq!(ragflow.pointer("/overall_status").and_then(Value::as_str), Some("blocked"));
 	assert_eq!(
@@ -718,29 +753,54 @@ fn assert_external_adapter_manifest_records(report: &Value) -> Result<()> {
 		Some("cargo make graphrag-docker-smoke")
 	);
 	assert_eq!(graphrag.pointer("/suites/1/status").and_then(Value::as_str), Some("not_encoded"));
+}
 
-	assert_graphiti_zep_adapter(graphiti_zep);
-	assert_graphify_adapter(graphify)?;
-
-	assert_eq!(
-		qmd_deep.pointer("/capabilities/2/status").and_then(Value::as_str),
-		Some("unsupported")
-	);
-	assert_eq!(
-		qmd_deep.pointer("/result/artifact").and_then(Value::as_str),
-		Some("docs/research/2026-06-11-qmd-openviking-strength-profile-report.json")
-	);
-	assert_eq!(
-		openviking_deep.pointer("/adapter_kind").and_then(Value::as_str),
-		Some("docker_local_embed_context_trajectory_gate")
+fn assert_letta_core_archival_gate(adapter: &Value) -> Result<()> {
+	assert_eq!(adapter.pointer("/overall_status").and_then(Value::as_str), Some("blocked"));
+	assert!(
+		adapter
+			.pointer("/setup/evidence")
+			.and_then(Value::as_str)
+			.is_some_and(|evidence| evidence.contains("Docker-only benchmark-created agent export"))
 	);
+	assert!(adapter.pointer("/execution_metadata/setup_path").and_then(Value::as_str).is_some_and(
+		|setup| setup.contains("exports core block JSON plus archival search/readback JSON")
+	));
 
-	assert_openviking_deep_profile_gate(openviking_deep);
+	let suites = array_at(adapter, "/suites")?;
+	let core_suite = find_by_field(suites, "/suite_id", "core_archival_memory")?;
+
+	assert_eq!(core_suite.pointer("/status").and_then(Value::as_str), Some("blocked"));
+
+	let scenarios = array_at(adapter, "/scenarios")?;
+	let attachment = find_by_field(scenarios, "/scenario_id", "core_block_attachment_readback")?;
+	let scope = find_by_field(scenarios, "/scenario_id", "core_block_scope_readback")?;
+	let provenance = find_by_field(scenarios, "/scenario_id", "core_block_provenance_readback")?;
+	let stale = find_by_field(scenarios, "/scenario_id", "stale_core_detection")?;
+	let fallback = find_by_field(scenarios, "/scenario_id", "archival_fallback_readback")?;
+	let decision =
+		find_by_field(scenarios, "/scenario_id", "core_archival_project_decision_recovery")?;
+
+	assert_eq!(scenarios.len(), 6);
+
+	for scenario in [attachment, scope, provenance, stale, fallback, decision] {
+		assert_eq!(scenario.pointer("/elf_position").and_then(Value::as_str), Some("untested"));
+		assert!(
+			["not_tested", "blocked"].contains(
+				&scenario
+					.pointer("/comparison_outcome")
+					.and_then(Value::as_str)
+					.ok_or_else(|| eyre::eyre!("missing Letta comparison_outcome"))?
+			)
+		);
+	}
 
 	assert_eq!(
-		openviking_deep.pointer("/result/artifact").and_then(Value::as_str),
-		Some("docs/research/2026-06-11-qmd-openviking-strength-profile-report.json")
+		attachment.pointer("/comparison_outcome").and_then(Value::as_str),
+		Some("not_tested")
 	);
+	assert_eq!(stale.pointer("/comparison_outcome").and_then(Value::as_str), Some("blocked"));
+	assert_eq!(fallback.pointer("/comparison_outcome").and_then(Value::as_str), Some("blocked"));
 
 	Ok(())
 }
@@ -1320,7 +1380,7 @@ fn assert_live_sweep_record(adapter: &Value, production_ops_status: &str) -> Res
 fn runner_discovers_nested_fixture_layout() -> Result<()> {
 	let report = run_json_report_from(fixture_root())?;
 
-	assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(38));
+	assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(44));
 
 	Ok(())
 }
@@ -2497,9 +2557,9 @@ fn generated_json_report_renders_markdown() -> Result<()> {
 	assert!(markdown.contains("xy844-current-worktree"));
 	assert!(markdown.contains("Existing live-baseline reports remain valid"));
 	assert!(markdown.contains("### Adapter Scenario Judgments"));
-	assert!(markdown.contains("ELF scenario positions: `wins=8, ties=8, loses=1, untested=11`"));
+	assert!(markdown.contains("ELF scenario positions: `wins=8, ties=8, loses=1, untested=17`"));
 	assert!(markdown.contains(
-		"Scenario comparison outcomes: `win=8, tie=8, loss=1, not_tested=8, blocked=1, non_goal=2`"
+		"Scenario comparison outcomes: `win=8, tie=8, loss=1, not_tested=12, blocked=3, non_goal=2`"
 	));
 	assert!(markdown.contains("| `claude_mem_live_baseline` | `same_corpus_retrieval`"));
 	assert!(markdown.contains("| `memsearch_live_baseline` | `ttl_expiry_lifecycle`"));
@@ -2776,6 +2836,46 @@ fn production_ops_fixtures_report_bounded_typed_states() -> Result<()> {
 	Ok(())
 }
 
+#[test]
+fn core_archival_memory_fixtures_score_separate_core_and_archival_jobs() -> Result<()> {
+	let report = run_json_report_from(core_archival_memory_fixture_dir())?;
+
+	assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(6));
+	assert_eq!(report.pointer("/summary/encoded_suite_count").and_then(Value::as_u64), Some(1));
+	assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(6));
+	assert_eq!(report.pointer("/summary/wrong_result").and_then(Value::as_u64), Some(0));
+	assert_eq!(report.pointer("/summary/blocked").and_then(Value::as_u64), Some(0));
+	assert_eq!(
+		report.pointer("/summary/expected_evidence_recall").and_then(Value::as_f64),
+		Some(1.0)
+	);
+	assert_eq!(report.pointer("/summary/evidence_coverage").and_then(Value::as_f64), Some(1.0));
+
+	let suites = array_at(&report, "/suites")?;
+	let core = find_by_field(suites, "/suite_id", "core_archival_memory")?;
+
+	assert_eq!(core.pointer("/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(core.pointer("/encoded_job_count").and_then(Value::as_u64), Some(6));
+
+	let jobs = array_at(&report, "/jobs")?;
+
+	for job_id in [
+		"core-archival-core-block-attachment-001",
+		"core-archival-core-block-scope-001",
+		"core-archival-core-block-provenance-001",
+		"core-archival-stale-core-detection-001",
+		"core-archival-archival-fallback-001",
+		"core-archival-project-decision-recovery-001",
+	] {
+		let job = find_by_field(jobs, "/job_id", job_id)?;
+
+		assert_eq!(job.pointer("/suite_id").and_then(Value::as_str), Some("core_archival_memory"));
+		assert_eq!(job.pointer("/status").and_then(Value::as_str), Some("pass"));
+	}
+
+	Ok(())
+}
+
 fn assert_root_knowledge_summary(report: &Value) {
 	assert_eq!(report.pointer("/summary/knowledge/job_count").and_then(Value::as_u64), Some(2));
 	assert_eq!(report.pointer("/summary/knowledge/page_count").and_then(Value::as_u64), Some(4));
@@ -2786,8 +2886,8 @@ fn assert_root_knowledge_summary(report: &Value) {
 }
 
 fn assert_root_aggregate_summary(report: &Value) {
-	assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(38));
-	assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(36));
+	assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(44));
+	assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(42));
 	assert_eq!(report.pointer("/summary/wrong_result").and_then(Value::as_u64), Some(0));
 	assert_eq!(report.pointer("/summary/incomplete").and_then(Value::as_u64), Some(0));
 	assert_eq!(report.pointer("/summary/blocked").and_then(Value::as_u64), Some(2));
@@ -2830,9 +2930,9 @@ fn assert_root_aggregate_summary(report: &Value) {
 	);
 	assert_eq!(
 		report.pointer("/summary/evidence_required_count").and_then(Value::as_u64),
-		Some(84)
+		Some(97)
 	);
-	assert_eq!(report.pointer("/summary/evidence_covered_count").and_then(Value::as_u64), Some(84));
+	assert_eq!(report.pointer("/summary/evidence_covered_count").and_then(Value::as_u64), Some(97));
 	assert_eq!(report.pointer("/summary/evidence_coverage").and_then(Value::as_f64), Some(1.0));
 	assert_eq!(report.pointer("/summary/source_ref_coverage").and_then(Value::as_f64), Some(1.0));
 	assert_eq!(report.pointer("/summary/quote_coverage").and_then(Value::as_f64), Some(1.0));
@@ -2876,6 +2976,7 @@ fn assert_root_aggregate_suites(report: &Value) -> Result<()> {
 		"knowledge_compilation",
 		"operator_debugging_ux",
 		"memory_evolution",
+		"core_archival_memory",
 	] {
 		let suite = find_by_field(suites, "/suite_id", suite_id)?;
 
@@ -2898,6 +2999,11 @@ fn assert_root_aggregate_suites(report: &Value) -> Result<()> {
 
 	assert_eq!(debug_suite.pointer("/status").and_then(Value::as_str), Some("pass"));
 
+	let core_suite = find_by_field(suites, "/suite_id", "core_archival_memory")?;
+
+	assert_eq!(core_suite.pointer("/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(core_suite.pointer("/encoded_job_count").and_then(Value::as_u64), Some(6));
+
 	let production_ops = find_by_field(suites, "/suite_id", "production_ops")?;
 
 	assert_eq!(production_ops.pointer("/status").and_then(Value::as_str), Some("blocked"));
@@ -2915,6 +3021,8 @@ fn assert_root_aggregate_jobs(report: &Value) -> Result<()> {
 	let stage_job = find_by_field(jobs, "/job_id", "operator-debug-stage-attribution-001")?;
 	let production_restore =
 		find_by_field(jobs, "/job_id", "production-ops-restore-cold-start-001")?;
+	let core_fallback = find_by_field(jobs, "/job_id", "core-archival-archival-fallback-001")?;
+	let stale_core = find_by_field(jobs, "/job_id", "core-archival-stale-core-detection-001")?;
 
 	assert_eq!(rebuild.pointer("/qdrant_rebuild_case").and_then(Value::as_bool), Some(true));
 	assert_eq!(
@@ -2926,6 +3034,8 @@ fn assert_root_aggregate_jobs(report: &Value) -> Result<()> {
 	assert_eq!(personalization.pointer("/scope_correct_count").and_then(Value::as_u64), Some(1));
 	assert_eq!(stage_job.pointer("/status").and_then(Value::as_str), Some("pass"));
 	assert_eq!(relation_job.pointer("/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(core_fallback.pointer("/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(stale_core.pointer("/status").and_then(Value::as_str), Some("pass"));
 	assert_eq!(
 		stage_job.pointer("/trace_explainability/failure_stage").and_then(Value::as_str),
 		Some("rerank.score")
diff --git a/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md b/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
index 120c6b3d..d3f19cce 100644
--- a/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
+++ b/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
@@ -38,14 +38,15 @@ The remaining caveats are material:
   setup exists.
 - Several competitor strengths remain `not_tested` or blocked: OpenMemory
   UI/export is blocked by the XY-931 export-helper setup probe, hosted mem0 Platform
-  behavior remains a non-goal, and OpenViking trajectory, Letta core-vs-archival
-  memory, and graph/RAG navigation remain unproven. mem0 local OSS preference history
-  is measured separately and is an ELF loss on the current correction history
-  scenario. The XY-923 follow-up also scores qmd's immediate top-10/replay artifact
-  ergonomics as stronger than ELF's default stress report, while expansion, fusion,
-  and rerank remain untested. XY-932 adds a narrow live operator-debug slice where
-  ELF beats qmd on trace hydration and candidate-drop visibility, but OpenMemory
-  UI/export and claude-mem viewer workflows remain blocked or not encoded.
+  behavior remains a non-goal, OpenViking trajectory and graph/RAG navigation remain
+  unproven, and Letta core-vs-archival comparison is blocked until the selected
+  contained export/readback path exists. mem0 local OSS preference history is
+  measured separately and is an ELF loss on the current correction history scenario.
+  The XY-923 follow-up also scores qmd's immediate top-10/replay artifact ergonomics
+  as stronger than ELF's default stress report, while expansion, fusion, and rerank
+  remain untested. XY-932 adds a narrow live operator-debug slice where ELF beats qmd
+  on trace hydration and candidate-drop visibility, but OpenMemory UI/export and
+  claude-mem viewer workflows remain blocked or not encoded.
 
 ## Evidence Classes
 
@@ -70,7 +71,8 @@ results, or lifecycle failures into one aggregate leaderboard.
 
 | Command or run | Artifact | Supported claim |
 | --- | --- | --- |
-| `cargo make real-world-memory` | `2026-06-11-measurement-coverage-audit.md` | ELF fixture aggregate covers 38 jobs across 11 suites with 36 pass and 2 blocked production-ops operator boundaries. |
+| `cargo make real-world-memory` | `2026-06-11-measurement-coverage-audit.md` | ELF fixture aggregate covers 44 jobs across 12 suites with 42 pass and 2 blocked production-ops operator boundaries, including 6 passing `core_archival_memory` jobs. |
+| `cargo make real-world-memory-core-archival` | `tmp/real-world-memory/core-archival/report.json` | ELF core-block behavior is scored separately from archival note search for attachment, scope, provenance, stale-core detection, archival fallback, and project-decision recovery. |
 | `cargo make real-world-memory-live-adapters` | `2026-06-11-measurement-coverage-audit.md` | ELF live service adapter reports 18 pass, 5 wrong_result, 2 blocked, and 13 not_encoded jobs; qmd reports 17 pass, 6 wrong_result, 2 blocked, and 13 not_encoded jobs. |
 | `cargo make real-world-job-operator-ux-live-adapters` | `tmp/real-world-job/operator-ux-live-adapters/summary.json` | The narrow live operator-debug slice scores ELF as pass and qmd as wrong_result: ELF wins trace hydration, candidate-drop visibility, and selected-but-not-narrated evidence; both systems expose replay commands and repair-action guidance. |
 | `ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker` | `2026-06-11-first-generation-oss-adapter-promotion-report.md` | mem0/OpenMemory and memsearch pass basic local baseline smokes; agentmemory remains lifecycle_fail and claude-mem remains wrong_result. |
@@ -86,7 +88,7 @@ results, or lifecycle failures into one aggregate leaderboard.
 | --- | --- | --- | --- | --- |
 | Source-of-truth rebuild and evidence-bound writes | `win` | `fixture_backed`, `live_real_world`, `live_baseline_only` | ELF has the strongest measured source-of-truth and rebuild story: Postgres is authoritative, Qdrant is rebuildable, trust-source jobs pass, and production restore/rebuild proof exists. | None |
 | Work resume and coding-agent continuity | `tie` | `fixture_backed`, `live_real_world`, `live_baseline_only`, `blocked`, `not_encoded` | ELF and qmd both pass encoded live `work_resume` jobs; agentmemory, claude-mem, and OpenViking continuity strengths remain blocked or not encoded. | XY-925, XY-928 |
-| Project decisions and reversals | `tie` | `fixture_backed`, `live_real_world`, `research_gate`, `not_encoded` | ELF and qmd both pass encoded `project_decisions` jobs; Letta-style core/archival decision memory is not tested. | XY-927 |
+| Project decisions and reversals | `tie` | `fixture_backed`, `live_real_world`, `research_gate`, `not_encoded` | ELF and qmd both pass encoded `project_decisions` jobs. The ELF `core_archival_memory` fixture also scores project-decision recovery through core routing plus archival rationale, but Letta-style comparison remains blocked without contained export evidence. | XY-927 |
 | Retrieval quality | `tie` | `fixture_backed`, `live_real_world`, `live_baseline_only` | ELF and qmd both pass encoded live retrieval and stress/same-corpus retrieval evidence. | XY-923 |
 | Retrieval quality and local debug UX | `loss` | `live_baseline_only`, `research_gate`, `wrong_result`, `not_encoded` | The XY-923 trace/replay report scores qmd stronger on immediate top-10 candidate artifacts and short CLI replay commands. ELF keeps useful service trace/admin replay surfaces, and expansion, fusion, rerank-on, and candidate-drop diagnostics remain untested. | XY-923 |
 | Memory evolution and temporal history | `loss` | `fixture_backed`, `live_real_world`, `live_baseline_only`, `wrong_result`, `blocked` | ELF fixture memory evolution passes, but live ELF passes only delete/TTL and reports five wrong_result jobs where current-vs-historical state is not reconciled. The mem0 local OSS preference-correction history scenario is now measured and is also an ELF loss. | XY-905 |
@@ -98,7 +100,7 @@ results, or lifecycle failures into one aggregate leaderboard.
 | Private corpus and provider boundaries | `blocked` | `blocked` | Private production profile fails closed without an operator-owned manifest; provider-backed production-ops gates require explicit credentials. | XY-930 |
 | Personalization and scoped preferences | `tie` | `fixture_backed`, `live_real_world`, `live_baseline_only`, `not_encoded` | ELF and qmd both pass the single encoded live personalization job. mem0 local OSS now passes entity-scoped personalization, so scoped preference behavior is a measured tie; preference correction history remains a separate ELF loss. | XY-927 |
 | Context trajectory and hierarchical retrieval | `not_tested` | `live_baseline_only`, `research_gate`, `wrong_result`, `not_encoded` | OpenViking reaches the pinned Docker local embedding path but misses expected same-corpus evidence; staged trajectory/hierarchy scoring is not encoded. | XY-928 |
-| Core-vs-archival memory | `not_tested` | `research_gate`, `not_encoded` | ELF has core block semantics in the service contract, but comparable core-vs-archival jobs and a contained Letta export path are not encoded. | XY-927 |
+| Core-vs-archival memory | `blocked` | `fixture_backed`, `research_gate`, `blocked`, `not_encoded` | ELF now has 6 fixture-backed `core_archival_memory` jobs that score core block attachment, scope, provenance, stale-core detection, archival fallback, and project-decision recovery separately from archival note search. Letta remains blocked or not tested until its contained export/readback artifact maps core and archival source ids. | XY-927 |
 | Graph/RAG navigation and citations | `not_tested` | `smoke_only`, `research_gate`, `blocked`, `wrong_result`, `not_encoded` | Graph/RAG smokes produce scored or typed non-pass adapter reports where possible, but broad graph/RAG navigation and citation quality are not tested. | XY-929 |
 
 ## Follow-Up Queue
@@ -110,7 +112,7 @@ results, or lifecycle failures into one aggregate leaderboard.
 | XY-924/XY-931 | P0 | Encoded local OSS history; UI/export setup blocker measured | mem0/OpenMemory local OSS history and SDK export-style readback are measured; OpenMemory UI/export has a blocked export-helper setup probe and still needs a dedicated compose/import path before any product-UX comparison. |
 | XY-925 | P1 | Backlog | First-generation OSS continuity and source-store adapters. |
 | XY-926 | P1 | Backlog | Live operator-debugging, capture, consolidation, and knowledge-page suites. |
-| XY-927 | P1 | Backlog | Letta-style core-vs-archival memory comparison. |
+| XY-927 | P1 | Fixture encoded; Letta export blocked | ELF core-vs-archival fixture coverage is encoded; a contained Letta export/readback adapter remains future work before win/tie/loss claims. |
 | XY-928 | P1 | Backlog | OpenViking context-trajectory and hierarchy benchmark. |
 | XY-929 | P2 | Backlog | Graph/RAG adapters beyond scored smokes. |
 | XY-930 | P1 | Backlog | Private-corpus and credentialed production gates after operator inputs exist. |
@@ -123,6 +125,9 @@ results, or lifecycle failures into one aggregate leaderboard.
   evidence among the tracked systems.
 - ELF ties qmd on encoded live retrieval, work-resume, project-decisions, and
   personalization slices.
+- ELF fixture-backed `core_archival_memory` coverage passes attachment, scope,
+  provenance, stale-core detection, archival fallback, and project-decision recovery
+  jobs separately from archival search.
 - ELF has a narrow live operator-debug win over qmd for trace hydration,
   candidate-drop visibility, and selected-but-not-narrated evidence, with
   replay-command availability and repair-action clarity tied.
diff --git a/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md b/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md
index e10ce945..ee4d9de0 100644
--- a/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md
+++ b/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md
@@ -5,9 +5,9 @@ not comparable, and which measurement reports should guide future ELF iteration.
 Read this when: You need to answer whether ELF has enough empirical evidence to
 claim a win, tie, loss, or non-claim against tracked memory, RAG, graph, and
 agent-continuity projects.
-Inputs: Fresh local runs of `cargo make real-world-memory` and
-`cargo make real-world-memory-live-adapters` in the current XY-898 lane after
-adapter-report consistency repairs, plus
+Inputs: Fresh local runs of `cargo make real-world-memory-core-archival`,
+`cargo make real-world-memory`, and the earlier `cargo make real-world-memory-live-adapters`
+measurement in the current benchmark lane after adapter-report consistency repairs, plus
 `apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json`,
 `2026-06-11-competitor-strength-evidence-matrix.md`, and
 `2026-06-11-elf-iteration-direction-from-competitor-benchmarks.md`.
@@ -22,8 +22,11 @@ tracked project's strongest scenario.
 
 What is proven today:
 
-- ELF has a strong fixture-backed real-world benchmark contract: 38 jobs, 36 pass,
-  2 blocked operator boundaries, and no wrong results in the fixture aggregate.
+- ELF has a strong fixture-backed real-world benchmark contract: 44 jobs across 12
+  suites, 42 pass, 2 blocked operator boundaries, and no wrong results in the
+  fixture aggregate. The new `core_archival_memory` suite contributes 6 passing jobs
+  for core block attachment, scope, provenance, stale-core detection, archival
+  fallback, and project-decision recovery.
 - ELF and qmd have comparable full-suite live real-world sweeps, but neither has a
   full-suite live pass. ELF is one pass ahead in the fresh aggregate because qmd
   misses the memory-evolution delete/TTL tombstone job.
@@ -31,9 +34,10 @@ What is proven today:
   checked-in provider synthetic, stress, backfill, backup/restore, and Qdrant rebuild
   evidence.
 - The current comparison still undermeasures most competitor strengths. OpenViking
-  trajectory, mem0/OpenMemory entity history and UI, Letta core-vs-archival memory,
-  Graphiti/Zep temporal graph behavior, graph/RAG navigation, agentmemory and
-  claude-mem capture/continuity, and knowledge-page workflows remain non-claims.
+  trajectory, mem0/OpenMemory entity history and UI, Letta product export/readback
+  for core-vs-archival memory, Graphiti/Zep temporal graph behavior, graph/RAG
+  navigation, agentmemory and claude-mem capture/continuity, and knowledge-page
+  workflows remain non-claims.
   The separate XY-932 operator-debug live slice now scores ELF against qmd for trace
   hydration and candidate-drop visibility, but does not cover OpenMemory or
   claude-mem UI flows.
@@ -43,12 +47,13 @@ production," but the competitiveness objective remains open.
 
 ## Fresh Runs
 
-These commands were run in the current XY-898 lane after adapter-report consistency
-repairs:
+These commands were run in the current benchmark lanes after adapter-report
+consistency repairs and the XY-927 core-vs-archival fixture update:
 
 | Command | Result | Runtime |
 | --- | --- | ---: |
-| `cargo make real-world-memory` | pass | 11.91 seconds |
+| `cargo make real-world-memory-core-archival` | pass | 57.01 seconds |
+| `cargo make real-world-memory` | pass | 8.94 seconds |
 | `cargo make real-world-memory-live-adapters` | pass | 121.51 seconds |
 
 The live adapter run emitted repeated Qdrant client/server compatibility warnings, but
@@ -62,21 +67,21 @@ failure.
 
 | Metric | Value |
 | --- | ---: |
-| Jobs | `38` |
-| Encoded suites | `11` |
-| Pass | `36` |
+| Jobs | `44` |
+| Encoded suites | `12` |
+| Pass | `42` |
 | Blocked | `2` |
 | Wrong result | `0` |
 | Lifecycle fail | `0` |
 | Incomplete | `0` |
 | Not encoded | `0` |
 | Unsupported claim | `0` |
-| Mean score | `0.947` |
-| Mean latency | `4.411 ms` |
-| Expected evidence recall | `77/77` |
-| Evidence coverage | `84/84` |
-| Source-ref coverage | `84/84` |
-| Quote coverage | `84/84` |
+| Mean score | `0.955` |
+| Mean latency | `3.958 ms` |
+| Expected evidence recall | `90/90` |
+| Evidence coverage | `97/97` |
+| Source-ref coverage | `97/97` |
+| Quote coverage | `97/97` |
 
 This proves fixture contract breadth and scoring behavior. It does not prove every
 live adapter or competitor runtime can complete those jobs.
@@ -136,8 +141,8 @@ The checked-in manifest records 23 adapter records across 17 unique project name
 | `pass` | `4` |
 | `wrong_result` | `6` |
 | `lifecycle_fail` | `1` |
-| `blocked` | `5` |
-| `not_encoded` | `7` |
+| `blocked` | `6` |
+| `not_encoded` | `6` |
 
 The generated JSON report emits `external_project_count: 16`, matching the unique
 non-ELF project-name count from the manifest. The companion audit JSON separately
@@ -158,7 +163,7 @@ records `unique_project_names: 17` for the full project list including ELF.
 | LightRAG | `research_gate` | `blocked`. | Graph/RAG context export with source-path citations. | Docker context-export report with explicit provider config and source citation mapping. |
 | GraphRAG | `research_gate` | `blocked`. | Graph summaries and document/text-unit evidence tables. | Cost-bounded Docker adapter report over a tiny corpus. |
 | Graphiti/Zep | `research_gate` | `blocked`. | Temporal graph facts and validity windows. | Docker-local temporal graph adapter report for current and historical facts. |
-| Letta | `research_gate` | `not_encoded`. | Core memory blocks versus archival memory. | Contained export contract, then core-vs-archival and decision-memory report. |
+| Letta | `research_gate` | `blocked` for the selected contained export/readback path; scenario rows remain `not_tested` or `blocked`. | Core memory blocks versus archival memory. | Implement the Docker-only export/readback adapter before any Letta win/tie/loss claim. |
 | LangGraph | `research_gate` | `not_encoded`; direct memory backend is unsupported. | Checkpoint replay and fork/regression debugging. | Treat as benchmark-infra reference unless a memory-output contract emerges. |
 | nanograph | `research_gate` | `not_encoded`; full memory backend is unsupported. | Typed graph schema and query ergonomics. | Typed relation query report only if evidence ids can be emitted. |
 | llm-wiki | `research_gate` | `not_encoded`. | Wiki/page generation, query-save, lint and repair loops. | Contained page-generation report with citation and unsupported-claim lint. |
@@ -171,7 +176,7 @@ records `unique_project_names: 17` for the full project list including ELF.
 | --- | --- | --- | --- |
 | Retrieval/debug | ELF and qmd live retrieval pass; qmd same-corpus baseline passes. | Tie on encoded live retrieval; no ELF-over-qmd UX claim. | qmd/ELF deep trace replay and debug ergonomics scoring. |
 | Work resume | ELF and qmd live pass. | ELF is credible on encoded work resume. | agentmemory, claude-mem, and OpenViking comparable continuity adapters. |
-| Project decisions | ELF and qmd live pass. | ELF is credible on encoded project-decision recovery. | Letta core/archival decision memory comparison. |
+| Project decisions | ELF and qmd live pass; ELF fixture coverage also passes core routing plus archival rationale recovery. | ELF is credible on encoded project-decision recovery. | Letta core/archival decision memory export and scoring. |
 | Source of truth | ELF and qmd live pass; ELF has stronger production restore/rebuild evidence. | ELF has strongest measured source-of-truth discipline. | memsearch source-of-truth reindex/reload evidence. |
 | Memory evolution | ELF live fails 5/6 jobs; qmd live fails 6/6 jobs after missing the delete/TTL tombstone evidence; fixture aggregate passes. | No broad live superiority claim. | Historical conflict evidence links and Graphiti/Zep temporal comparison. |
 | Consolidation | Fixture aggregate passes; live adapters are not encoded. | Fixture-only claim. | Live proposal generation with lineage, confidence, and review-action audit. |
@@ -181,7 +186,7 @@ records `unique_project_names: 17` for the full project list including ELF.
 | Production ops | ELF has separate production-provider/backfill/restore evidence; live sweep is not a full production-ops pass. | Bounded personal-production adoption claim with caveats. | Private corpus manifest and credentialed provider gates. |
 | Personalization | ELF and qmd live pass one scoped preference job. | Narrow encoded pass only. | mem0/OpenMemory and Letta entity/preference history comparison. |
 | Context trajectory | Not comparable. | No claim. | OpenViking staged hierarchy/trajectory scoring. |
-| Core-vs-archival memory | Not comparable. | No claim. | Letta contained export and ELF core-block benchmark. |
+| Core-vs-archival memory | ELF fixture suite passes 6/6; Letta comparison is blocked until export/readback evidence exists. | Fixture-only ELF core-block claim; no ELF-over-Letta claim. | Letta contained export/readback artifact with core block JSON, archival search/readback JSON, and source ids. |
 | Graph/RAG navigation | RAGFlow, LightRAG, GraphRAG, and Graphiti/Zep remain typed research gates; graphify has a tiny scored `wrong_result` smoke. | No graph/RAG parity claim; only graphify's bounded non-pass smoke can be cited. | Larger contained RAG/graph adapters with evidence-linked outputs before any ELF graph/RAG win, tie, or loss claim. |
 
 ## Next Measurement Reports
diff --git a/docs/guide/benchmarking/index.md b/docs/guide/benchmarking/index.md
index 6030af7b..7e17b183 100644
--- a/docs/guide/benchmarking/index.md
+++ b/docs/guide/benchmarking/index.md
@@ -55,9 +55,9 @@ cleanup, use `docs/guide/single_user_production.md`.
   optimization-direction report that translates measured benchmark data and competitor
   strengths into prioritized ELF iteration themes and explicit non-claims.
 - `2026-06-11-measurement-coverage-audit.md`: fresh coverage audit that separates
-  current measured ELF/qmd data, fixture evidence, external adapter ledger coverage,
-  scenario non-claims, and the next measurement reports needed before stronger
-  competitor claims.
+  current measured ELF/qmd data, fixture evidence including the XY-927
+  `core_archival_memory` suite, external adapter ledger coverage, scenario non-claims,
+  and the next measurement reports needed before stronger competitor claims.
 - `2026-06-11-elf-qmd-retrieval-debug-profile.md`: fresh ELF/qmd retrieval-debug
   profile with real-world retrieval-suite evidence, 480-document stress baseline
   evidence, qmd top-10 artifact inspection, and explicit rerank/fusion non-claims.
@@ -89,9 +89,10 @@ cleanup, use `docs/guide/single_user_production.md`.
   `real_world_job` adapter reports without converting smoke evidence into quality
   claims.
 - `2026-06-11-competitor-strength-adoption-report.md`: XY-901 final
-  competitor-strength adoption report with the bounded personal-production decision,
-  scenario-level win/tie/loss/not-tested matrix, claim boundaries, and optimization
-  issue queue.
+  competitor-strength adoption report, updated by XY-927 with fixture-backed
+  core-vs-archival coverage and a blocked Letta export/readback boundary, plus the
+  bounded personal-production decision, scenario-level win/tie/loss/not-tested
+  matrix, claim boundaries, and optimization issue queue.
 - `2026-06-11-mem0-openmemory-history-ui-export-report.md`: XY-924 plus XY-931
   mem0/OpenMemory local OSS history, preference-correction, deletion-audit,
   personalization, and export-readback comparison with normalized
diff --git a/docs/guide/benchmarking/real_world_agent_memory_benchmark.md b/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
index e4745d72..7cae59a3 100644
--- a/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
+++ b/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
@@ -58,6 +58,7 @@ compile knowledge, and state honest uncertainty.
 | Capture/integration | Accuracy of hooks, imports, exclusions, and write policies. | Capture a session decision while excluding private spans. |
 | Production ops | Backfill, restore, cold start, resource, and bounded-failure behavior. | Resume interrupted import without duplicate source notes. |
 | Personalization | Scoped preferences without cross-tenant leakage. | Apply the user's current preference and ignore another project's note. |
+| Core/archival memory | Always-loaded core memory behavior kept separate from archival note search. | Detect a stale core block and fall back to archival evidence. |
 
 ## External Reference Mapping
 
@@ -163,6 +164,9 @@ including the retrieval-quality slice below. The suite currently encodes:
   classification, and provider credential boundary `blocked` classification.
 - `personalization`: scoped stable preference correction without temporary or
   cross-project preference leakage.
+- `core_archival_memory`: core block attachment, scope, provenance, stale-core
+  detection, archival fallback, and project-decision recovery through core routing
+  plus archival rationale.
 
 The generated report includes evidence coverage, source-ref coverage, quote coverage,
 unsupported-claim count, stale retrieval count, stale-answer count, conflict detection
@@ -221,8 +225,10 @@ research gates. Its `external_adapters` report section distinguishes:
   future adapter path, not fixture-backed or live execution evidence.
 
 Current state: the `elf_live_real_world` and `qmd_live_real_world` adapters run a full
-encoded-suite sweep through `cargo make real-world-memory-live-adapters`. Each adapter
-materializes generated runtime answers for 38 jobs across 11 suites before scoring.
+encoded-suite sweep through `cargo make real-world-memory-live-adapters`. The latest
+recorded live sweep materializes generated runtime answers for 38 jobs across 11
+suites before scoring; the newer fixture-only `core_archival_memory` suite is not yet
+included in that live sweep.
 The original targeted `work_resume`, `retrieval`, and `project_decisions` slice still
 passes, but the full sweep is not a full-suite pass: memory_evolution is
 `wrong_result`, production_ops remains typed `incomplete`/`blocked`/`not_encoded`, and
diff --git a/docs/research/2026-06-11-competitor-strength-adoption-report.json b/docs/research/2026-06-11-competitor-strength-adoption-report.json
index 56ec65a5..7a9d9d85 100644
--- a/docs/research/2026-06-11-competitor-strength-adoption-report.json
+++ b/docs/research/2026-06-11-competitor-strength-adoption-report.json
@@ -12,7 +12,7 @@
       "Live temporal reconciliation remains wrong_result for five of six memory_evolution jobs.",
       "Private-corpus production quality is blocked until an operator-owned manifest exists.",
       "Credentialed provider production-ops gates are blocked until explicit provider setup exists.",
-      "Several competitor strengths remain not_tested or blocked: OpenMemory UI/export is blocked by the XY-931 export-helper setup probe, hosted mem0 Platform behavior remains a non-goal, and OpenViking trajectory, Letta core-vs-archival memory, and graph/RAG navigation remain unproven. mem0 local OSS preference history is measured separately and is an ELF loss on the current correction-history scenario. The XY-923 follow-up scores qmd immediate top-10/replay artifact ergonomics as stronger than ELF's default stress report, while expansion, fusion, and rerank remain untested. XY-932 adds a narrow live operator-debug slice where ELF beats qmd on trace hydration and candidate-drop visibility, but OpenMemory UI/export and claude-mem viewer workflows remain blocked or not encoded."
+      "Several competitor strengths remain not_tested or blocked: OpenMemory UI/export is blocked by the XY-931 export-helper setup probe, hosted mem0 Platform behavior remains a non-goal, OpenViking trajectory and graph/RAG navigation remain unproven, and Letta core-vs-archival comparison is blocked until the selected contained export/readback path exists. mem0 local OSS preference history is measured separately and is an ELF loss on the current correction-history scenario. The XY-923 follow-up scores qmd immediate top-10/replay artifact ergonomics as stronger than ELF's default stress report, while expansion, fusion, and rerank remain untested. XY-932 adds a narrow live operator-debug slice where ELF beats qmd on trace hydration and candidate-drop visibility, but OpenMemory UI/export and claude-mem viewer workflows remain blocked or not encoded."
     ]
   },
   "evidence_class_terms": [
@@ -39,7 +39,12 @@
     {
       "command": "cargo make real-world-memory",
       "artifact": "docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md",
-      "claim": "ELF fixture aggregate covers 38 jobs across 11 suites with 36 pass and 2 blocked production-ops operator boundaries."
+      "claim": "ELF fixture aggregate covers 44 jobs across 12 suites with 42 pass and 2 blocked production-ops operator boundaries, including 6 passing core_archival_memory jobs."
+    },
+    {
+      "command": "cargo make real-world-memory-core-archival",
+      "artifact": "tmp/real-world-memory/core-archival/report.json",
+      "claim": "ELF core_archival_memory fixture coverage scores core block attachment, scope, provenance, stale-core detection, archival fallback, and project-decision recovery separately from archival note search."
     },
     {
       "command": "cargo make real-world-memory-live-adapters",
@@ -132,14 +137,14 @@
         "research_gate",
         "not_encoded"
       ],
-      "measured_claim": "ELF and qmd both pass encoded project_decisions jobs. Letta-style core/archival decision memory is not tested.",
+      "measured_claim": "ELF and qmd both pass encoded project_decisions jobs. The new ELF core_archival_memory fixture also scores project-decision recovery through core routing plus archival rationale, but Letta-style comparison remains blocked without contained export evidence.",
       "command_artifacts": [
         "docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md"
       ],
       "follow_up_issues": [
         "XY-927"
       ],
-      "caveat": "No Letta comparison exists until a contained export path is selected."
+      "caveat": "No Letta comparison exists until the selected contained export/readback path produces source-id-mapped evidence."
     },
     {
       "scenario_id": "retrieval_quality",
@@ -361,20 +366,24 @@
     {
       "scenario_id": "core_vs_archival_memory",
       "title": "Core-vs-archival memory",
-      "outcome": "not_tested",
+      "outcome": "blocked",
       "evidence_classes": [
+        "fixture_backed",
         "research_gate",
+        "blocked",
         "not_encoded"
       ],
-      "measured_claim": "ELF has core block semantics in the service contract, but comparable core-vs-archival benchmark jobs and a contained Letta export path are not encoded.",
+      "measured_claim": "ELF now has 6 fixture-backed core_archival_memory jobs that score core block attachment, scope, provenance, stale-core detection, archival fallback, and project-decision recovery separately from archival note search. Letta remains blocked or not_tested until its contained export/readback artifact maps core and archival source ids.",
       "command_artifacts": [
         "docs/spec/system_elf_memory_service_v2.md",
-        "docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md"
+        "apps/elf-eval/fixtures/real_world_memory/core_archival_memory",
+        "apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json",
+        "tmp/real-world-memory/core-archival/report.json"
       ],
       "follow_up_issues": [
         "XY-927"
       ],
-      "caveat": "No ELF-over-Letta claim is allowed."
+      "caveat": "No ELF-over-Letta claim is allowed; the selected Letta path must export core block JSON, archival search/readback JSON, and source ids before scoring."
     },
     {
       "scenario_id": "graph_rag_navigation_citations",
@@ -431,8 +440,8 @@
     {
       "issue": "XY-927",
       "priority": "P1",
-      "state": "Backlog",
-      "gap": "Letta-style core-vs-archival memory comparison."
+      "state": "Fixture encoded; Letta export blocked",
+      "gap": "ELF core_archival_memory fixture coverage is encoded; a contained Letta export/readback adapter remains future work before win/tie/loss claims."
     },
     {
       "issue": "XY-928",
@@ -464,6 +473,7 @@
       "ELF is adoptable for bounded personal production use with caveats.",
       "ELF has the strongest measured source-of-truth, rebuild, restore, and backfill evidence among the tracked systems.",
       "ELF ties qmd on encoded live retrieval, work_resume, project_decisions, and personalization slices.",
+      "ELF fixture-backed core_archival_memory coverage passes attachment, scope, provenance, stale-core detection, archival fallback, and project-decision recovery jobs separately from archival search.",
       "ELF has a live temporal reconciliation loss against the benchmark expectation: five memory_evolution jobs remain wrong_result.",
       "Most competitor strengths outside qmd retrieval are not_tested, blocked, smoke_only, or research_gate.",
       "ELF has a narrow live operator-debug win over qmd for trace hydration, candidate-drop visibility, and selected-but-not-narrated evidence, with replay-command availability and repair-action clarity tied."
diff --git a/docs/research/2026-06-11-measurement-coverage-audit.json b/docs/research/2026-06-11-measurement-coverage-audit.json
index ab71c30e..0ebe1ec9 100644
--- a/docs/research/2026-06-11-measurement-coverage-audit.json
+++ b/docs/research/2026-06-11-measurement-coverage-audit.json
@@ -1,14 +1,20 @@
 {
   "schema": "elf.benchmark_measurement_coverage_audit/v2",
   "run_id": "2026-06-11-measurement-coverage-audit",
-  "source_revision": "current XY-898 lane after adapter-report consistency repairs",
+  "source_revision": "current benchmark lane after adapter-report consistency repairs and XY-927 core-vs-archival fixture update",
   "created_at": "2026-06-11",
   "scope": "ELF memory-system competitiveness measurement coverage, external competitor comparison evidence, and next report directions",
   "commands": [
+    {
+      "command": "cargo make real-world-memory-core-archival",
+      "status": "pass",
+      "runtime_seconds": 57.01,
+      "artifact": "tmp/real-world-memory/core-archival/report.json"
+    },
     {
       "command": "cargo make real-world-memory",
       "status": "pass",
-      "runtime_seconds": 11.91,
+      "runtime_seconds": 8.94,
       "artifact": "tmp/real-world-memory/real-world-memory-report.json"
     },
     {
@@ -19,21 +25,21 @@
     }
   ],
   "fixture_aggregate": {
-    "job_count": 38,
-    "encoded_suite_count": 11,
-    "pass": 36,
+    "job_count": 44,
+    "encoded_suite_count": 12,
+    "pass": 42,
     "wrong_result": 0,
     "lifecycle_fail": 0,
     "incomplete": 0,
     "blocked": 2,
     "not_encoded": 0,
     "unsupported_claim": 0,
-    "mean_score": 0.947,
-    "mean_latency_ms": 4.411,
-    "expected_evidence_total": 77,
-    "expected_evidence_matched": 77,
-    "evidence_required_count": 84,
-    "evidence_covered_count": 84
+    "mean_score": 0.955,
+    "mean_latency_ms": 3.958,
+    "expected_evidence_total": 90,
+    "expected_evidence_matched": 90,
+    "evidence_required_count": 97,
+    "evidence_covered_count": 97
   },
   "live_real_world_adapters": [
     {
@@ -197,8 +203,8 @@
       "pass": 4,
       "wrong_result": 6,
       "lifecycle_fail": 1,
-      "blocked": 5,
-      "not_encoded": 7
+      "blocked": 6,
+      "not_encoded": 6
     },
     "xy900_update_note": "XY-900 promotes graphify from research_gate/blocked to a tiny scored live_real_world wrong_result smoke; broad graph/RAG quality remains unproven.",
     "xy932_update_note": "XY-932 adds narrow ELF/qmd operator-debug live_real_world records: ELF pass and qmd wrong_result for trace hydration/candidate-drop visibility, with OpenMemory and claude-mem UI still unmeasured."
@@ -212,7 +218,7 @@
       "OpenViking_context_trajectory",
       "mem0_OpenMemory_entity_history_ui",
       "agentmemory_claude_mem_capture_continuity",
-      "Letta_core_vs_archival_memory",
+      "Letta_core_vs_archival_export_path",
       "Graphiti_Zep_temporal_graph",
       "RAG_graph_navigation",
       "llm_wiki_gbrain_graphify_knowledge_workflows"
diff --git a/docs/spec/real_world_agent_memory_benchmark_v1.md b/docs/spec/real_world_agent_memory_benchmark_v1.md
index 5bb56574..aa5c78c3 100644
--- a/docs/spec/real_world_agent_memory_benchmark_v1.md
+++ b/docs/spec/real_world_agent_memory_benchmark_v1.md
@@ -525,6 +525,7 @@ Suite ids are stable public names. Each suite MUST contain at least one
 | `capture_integration` | Evaluate how accurately work observations become usable memory across agents and tools. | Capture a session decision; exclude private spans; import external agent observations. | Hook/import logs, write policy audits, excluded spans, resulting note ids. | answer_correctness, evidence_grounding, trap_avoidance, lifecycle_behavior. | agentmemory, claude-mem, memsearch, mem0. |
 | `production_ops` | Prove safe operation under backup, restore, backfill, cold start, resource, and credential boundaries. | Resume interrupted import; restore from backup; report missing private manifest as bounded caveat. | Command/report artifacts, resource envelope, checkpoint state, failure guard evidence. | lifecycle_behavior, latency_resource, uncertainty_handling, evidence_grounding. | ELF, qmd, memsearch, LangGraph. |
 | `personalization` | Apply user/project preferences correctly without leaking across scopes or overfitting stale preferences. | Remember preferred response style; avoid using another project tenant's note; update a preference. | Scoped memory ids, preference versions, tenant/project/agent context, negative cross-scope traps. | personalization_fit, trap_avoidance, evidence_grounding, answer_correctness. | mem0, Letta, agentmemory, ELF. |
+| `core_archival_memory` | Verify always-loaded core memory behavior separately from archival note search and derived retrieval indexes. | Read an attached core block; enforce core block scope; detect stale core state from archival evidence; fall back to archival notes; recover a decision from core routing plus archival rationale. | Core block ids, attachment ids, read_profile/scope metadata, source_ref and audit history, archival note evidence ids, stale-core traps, and explicit no-Qdrant-core-block boundary evidence. | answer_correctness, evidence_grounding, trap_avoidance, lifecycle_behavior, workflow_helpfulness. | Letta, ELF. |
 
 ## Report Semantics
 

From 0ff95b0201a5ea139762bb2ffee5d9af0cb55612 Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Fri, 12 Jun 2026 00:47:57 +0800
Subject: [PATCH 2/7] {"schema":"decodex/commit/1","summary":"Repair Letta
 benchmark review drift","authority":"XY-927"}

---
 .../memory_projects_manifest.json             |  7 +++-
 .../tests/real_world_job_benchmark.rs         | 15 +++++++
 ...-11-competitor-strength-evidence-matrix.md | 21 ++++++----
 ...on-direction-from-competitor-benchmarks.md | 22 ++++++----
 .../research/research_projects_inventory.md   |  2 +-
 ...-11-xy-897-competitor-strength-matrix.json | 42 ++++++++++---------
 6 files changed, 69 insertions(+), 40 deletions(-)

diff --git a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
index a5822e69..e10585a8 100644
--- a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
+++ b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
@@ -29,7 +29,7 @@
       },
       "run": {
         "status": "blocked",
-        "evidence": "The current fixture set reports 40 jobs, 38 pass, 0 incomplete, 2 blocked, 0 wrong_result, 0 not_encoded, and 0 unsupported_claim.",
+        "evidence": "The current fixture set reports 46 jobs across 12 suites: 44 pass, 0 incomplete, 2 blocked, 0 wrong_result, 0 not_encoded, and 0 unsupported_claim. The six core_archival_memory jobs pass as ELF fixture evidence, not as live Letta comparison evidence.",
         "command": "cargo make real-world-memory",
         "artifact": "tmp/real-world-memory/real-world-memory-report.json"
       },
@@ -101,6 +101,11 @@
           "status": "pass",
           "evidence": "Four redaction, exclusion, source-id, evidence-binding, and capture-boundary fixtures are encoded and passing."
         },
+        {
+          "suite_id": "core_archival_memory",
+          "status": "pass",
+          "evidence": "Six fixture jobs score core block attachment, scope, provenance, stale-core detection, archival fallback, and project-decision recovery separately from archival note search."
+        },
         {
           "suite_id": "production_ops",
           "status": "blocked",
diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
index fa20dc07..d7d5eae7 100644
--- a/apps/elf-eval/tests/real_world_job_benchmark.rs
+++ b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -705,6 +705,21 @@ fn assert_external_adapter_manifest_records(report: &Value) -> Result<()> {
 
 	assert_eq!(elf.pointer("/evidence_class").and_then(Value::as_str), Some("fixture_backed"));
 	assert_eq!(elf.pointer("/overall_status").and_then(Value::as_str), Some("blocked"));
+	assert!(elf.pointer("/run/evidence").and_then(Value::as_str).is_some_and(|evidence| {
+		evidence.contains("46 jobs across 12 suites")
+			&& evidence.contains("44 pass")
+			&& evidence.contains("core_archival_memory")
+	}));
+
+	let elf_suites = array_at(elf, "/suites")?;
+	let elf_core_archival = find_by_field(elf_suites, "/suite_id", "core_archival_memory")?;
+
+	assert_eq!(elf_core_archival.pointer("/status").and_then(Value::as_str), Some("pass"));
+	assert!(elf_core_archival.pointer("/evidence").and_then(Value::as_str).is_some_and(
+		|evidence| evidence.contains("core block attachment")
+			&& evidence.contains("project-decision recovery")
+			&& evidence.contains("archival note search")
+	));
 	assert_eq!(
 		elf_live.pointer("/evidence_class").and_then(Value::as_str),
 		Some("live_real_world")
diff --git a/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md b/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md
index d042d0ec..58692226 100644
--- a/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md
+++ b/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md
@@ -7,6 +7,8 @@ non-claim against a tracked memory, RAG, or graph project.
 Inputs: `docs/guide/benchmarking/2026-06-10-production-adoption-refresh.md`,
 `docs/guide/benchmarking/2026-06-10-real-world-comparison-report.md`,
 `docs/guide/benchmarking/2026-06-10-live-real-world-sweep-report.md`,
+`docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md`,
+`docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md`,
 `docs/guide/research/external_memory_improvement_plan.md`,
 `docs/guide/research/research_projects_inventory.md`,
 `apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json`,
@@ -29,9 +31,10 @@ Current boundary:
   live pass. The fresh ELF sweep produced 40 jobs with 22 pass, 5 wrong_result,
   0 incomplete, 2 blocked, and 11 not_encoded; the fresh qmd sweep produced 17 pass,
   6 wrong_result, 0 incomplete, 2 blocked, and 15 not_encoded.
-- ELF fixture evidence is strong: `cargo make real-world-memory` reports 40 jobs
-  across 11 suites with 38 pass and 2 blocked production-ops operator boundaries.
-  That proves the fixture contract, not live-service parity.
+- ELF fixture evidence is strong: `cargo make real-world-memory` reports 46 jobs
+  across 12 suites with 44 pass and 2 blocked production-ops operator boundaries.
+  The added `core_archival_memory` suite contributes 6 fixture-only passes for ELF
+  core-block behavior; it does not create an ELF-over-Letta claim.
 - qmd is the strongest measured local retrieval-debug comparison, but the current
   evidence still separates its same-corpus/live-retrieval strengths from the full-suite
   live non-pass sweep.
@@ -45,7 +48,7 @@ Current boundary:
 The current manifest has 23 adapter records across 16 external projects plus ELF.
 Evidence-class counts: 1 `fixture_backed`, 6 `live_baseline_only`, 5
 `live_real_world`, and 11 `research_gate`. Overall adapter-status counts: 4 `pass`,
-6 `wrong_result`, 1 `lifecycle_fail`, 5 `blocked`, and 7 `not_encoded`.
+6 `wrong_result`, 1 `lifecycle_fail`, 6 `blocked`, and 6 `not_encoded`.
 
 ## State Taxonomy
 
@@ -83,7 +86,7 @@ lifecycle-fail -> `lifecycle_fail`, and not-encoded -> `not_encoded`.
 | LightRAG | Lightweight graph/RAG context export with source file-path citation shape. | `research_gate`. | `blocked`: `ELF_LIGHTRAG_CONTEXT_START=1 cargo make lightrag-docker-context-smoke`, `tmp/real-world-memory/lightrag-context/summary.json`. | `blocked`: Docker service setup and context export are not proven. | XY-886 Docker context-export adapter with explicit provider config and source citation mapping. | Context-only query modes, graph-aware retrieval layout, and file-path citation readback. |
 | GraphRAG | GraphRAG indexing, graph summaries, and document/text-unit evidence tables. | `research_gate`. | `blocked`: `ELF_GRAPHRAG_SMOKE_RUN=1 cargo make graphrag-docker-smoke`, `tmp/real-world-memory/graphrag-smoke/summary.json`. | `blocked`: indexing resource envelope and source citation mapping are not proven. | XY-887 cost-bounded Docker adapter over a tiny corpus and scored output tables. | Graph summary artifacts, local/global search separation, and source table evidence mapping. |
 | Graphiti/Zep | Temporal graph memory with current, historical, and future fact validity windows. | `research_gate`. | `blocked`: `ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make graphiti-zep-docker-temporal-smoke`, `tmp/real-world-memory/graphiti-zep-smoke/summary.json`. | `blocked`: Docker graph-store and temporal adapter are not proven. | XY-888 Docker-local temporal graph adapter scoring current/historical fact validity. | Temporal fact windows, invalidation/supersession semantics, and graph fact provenance. |
-| Letta | Core memory blocks versus archival memory with explicit operating-context surfaces. | `research_gate`. | `not_encoded`: `docs/research/2026-06-10-xy-882-rag-graph-adapter-feasibility.json`. | `blocked`: contained evidence export path is not selected. | Select contained export contract, then encode core-vs-archival, personalization, and project-decision jobs. | Core memory block ergonomics, archival separation, and shared operating context readback. |
+| Letta | Core memory blocks versus archival memory with explicit operating-context surfaces. | `research_gate`. | `blocked`: the selected comparison contract is a Docker-only benchmark-created agent export that returns core block JSON, archival search/readback JSON, and source ids; no materialized export exists yet. | `blocked`: no Letta materializer currently creates the benchmark agent, imports the ELF `core_archival_memory` fixture corpus, or exports comparable core and archival evidence. | Implement and run the contained export/readback adapter before any Letta win, tie, or loss claim; keep personalization and project-decision scenarios blocked or not tested until that evidence exists. | Core memory block ergonomics, archival separation, and shared operating context readback. |
 | LangGraph | Checkpoint/replay regression workflow and durable state replay for agent runs. | `research_gate`. | `not_encoded`: `docs/research/2026-06-10-xy-882-rag-graph-adapter-feasibility.json`. | `unsupported`: not a standalone memory backend adapter. | Non-goal for direct win/loss until a standalone memory output contract exists; use replay jobs as benchmark infrastructure reference. | Checkpoint replay, deterministic regression, and state-diff evaluation patterns. |
 | nanograph | Typed graph schema and query ergonomics for graph-lite developer experience. | `research_gate`. | `not_encoded`: `docs/research/2026-06-10-xy-882-rag-graph-adapter-feasibility.json`. | `unsupported`: not a memory backend comparison target. | Non-goal for direct win/loss unless a contained memory-backed target emerges; measure ELF graph-lite DX instead. | Typed relation schema, query ergonomics, and small graph developer experience. |
 | llm-wiki | LLM-maintained wiki or knowledge-page workflow with query-save and lint loops. | `research_gate`. | `not_encoded`: `docs/research/2026-06-10-xy-882-rag-graph-adapter-feasibility.json`. | `unsupported`: no live service runtime for adapter proof. | Select contained plugin or instruction harness, then score knowledge pages for citations, unsupported claims, rebuild, and stale-source lint. | Maintained wiki workflows, page lint, query-save loops, and topic-scoped navigation. |
@@ -96,7 +99,7 @@ lifecycle-fail -> `lifecycle_fail`, and not-encoded -> `not_encoded`.
 | --- | --- | --- | --- | --- |
 | Retrieval/debug | Fixture retrieval passes; live retrieval passes. | qmd. | qmd live retrieval passes and live baseline passes, but full-suite live status is `wrong_result`. | Run qmd deep profile and ELF/qmd trace-level replay with expansion, fusion, rerank, and candidate-drop diagnostics. |
 | Work resume | Fixture and live work_resume pass. | agentmemory, claude-mem, OpenViking. | agentmemory `lifecycle_fail`, claude-mem `wrong_result`, OpenViking work_resume `not_encoded`. | Encode durable work_resume adapters or keep each blocked with lifecycle/setup evidence. |
-| Project decisions | Fixture and live project_decisions pass. | qmd, Letta. | qmd live project_decisions pass; Letta is `research_gate` `not_encoded`. | Add Letta core/archival decision jobs only after a contained export path exists. |
+| Project decisions | Fixture and live project_decisions pass; the ELF core-archival fixture also scores project-decision recovery through core routing plus archival rationale. | qmd, Letta. | qmd live project_decisions pass; Letta project-decision recovery is `research_gate` `not_tested` or `blocked` until the contained export path exists. | Run the Letta core/archival export/readback contract before treating project-decision recovery as a comparable scenario. |
 | Source-of-truth | Fixture and live trust_source_of_truth pass. | memsearch. | memsearch canonical-store, reindex, delete, and reload smoke now passes, but source-of-truth real_world_job prompts are `not_encoded`. | Score memsearch source-of-truth rebuild/reload jobs before any suite-level win/loss claim. |
 | Temporal/current-vs-historical memory | Fixture memory_evolution passes; live memory_evolution is `wrong_result`. | Graphiti/Zep, mem0/OpenMemory. | Graphiti/Zep is `research_gate` `blocked`; mem0/OpenMemory local OSS preference history, entity scope, deletion audit, and SDK `get_all` now pass; OpenMemory UI/export is blocked by the export-helper setup probe; graph-memory scenarios are `not_encoded`. | Fix ELF/qmd live memory_evolution evidence links, add OpenMemory product app import/export readback, and run XY-888. |
 | Consolidation | Fixture consolidation passes; live consolidation is `not_encoded`. | agentmemory, managed-memory references, llm-wiki. | No manifest project has live consolidation scoring. | Run reviewable consolidation proposal generation with source refs, unsupported-claim flags, and audit transitions. |
@@ -104,9 +107,9 @@ lifecycle-fail -> `lifecycle_fail`, and not-encoded -> `not_encoded`.
 | Operator debugging | Fixture operator_debugging_ux passes, and the narrow live operator-debug slice passes for trace hydration, candidate-drop visibility, selected-but-not-narrated evidence, replay-command availability, and repair-action clarity. | qmd, claude-mem, OpenMemory. | qmd ties replay-command availability and repair-action clarity but is `wrong_result` for trace hydration, candidate-drop stage visibility, and selected-but-not-narrated evidence; claude-mem and OpenMemory UX remain `not_encoded` or blocked. | Add bounded OpenMemory and claude-mem UI/export or viewer runners before any broader operator-UX claim. |
 | Capture/write policy | Fixture capture_integration passes; ELF live capture_integration passes 4/4 with zero redaction leaks, source ids, write-policy audit, and evidence binding. | agentmemory, claude-mem. | agentmemory capture is `blocked` by mocked/in-memory storage; claude-mem hook/viewer capture is `not_encoded`. | Run durable agentmemory and claude-mem capture-hook jobs proving redaction, exclusion, evidence binding, source ids, and no secret leakage. |
 | Production ops | Fixture production_ops has 4 pass and 2 blocked; live production_ops is `blocked`; production adoption has provider/backfill/restore evidence. | ELF production gate, qmd, RAG/RAGFlow resource gates. | qmd live production_ops is `blocked`; RAG/resource gates are `research_gate` `blocked`. | Rerun private-corpus and credentialed gates only when operator-owned manifest and credentials exist. |
-| Personalization | Fixture and live personalization pass. | mem0/OpenMemory, Letta. | mem0/OpenMemory and Letta personalization are `not_encoded`. | Encode scoped preference readback for mem0/OpenMemory and Letta before personalization superiority claims. |
+| Personalization | Fixture and live personalization pass. | mem0/OpenMemory, Letta. | mem0/OpenMemory personalization is `not_encoded`; Letta scoped preference readback remains `not_tested` until the contained core/archival export path exists. | Encode scoped preference readback for mem0/OpenMemory and Letta before personalization superiority claims. |
 | Context trajectory | ELF has trace direction but no comparable staged trajectory scenario. | OpenViking. | OpenViking setup is pinned, same-corpus retrieval is `wrong_result`, and hierarchy trajectory is `not_encoded`. | Make OpenViking evidence-bearing retrieval pass, then score staged context trajectory outputs. |
-| Core-vs-archival memory | ELF core-block semantics exist in the service contract, but comparative benchmark coverage is not encoded here. | Letta. | Letta is `research_gate` `not_encoded` until contained export proof exists. | Add ELF core-block versus archival-search jobs; compare Letta only after contained export proof. |
+| Core-vs-archival memory | Fixture `core_archival_memory` passes 6/6 and scores core block attachment, scope, provenance, stale-core detection, archival fallback, and project-decision recovery separately from archival note search. | Letta. | Letta is `research_gate` `blocked`/`not_tested` until the selected contained export/readback artifact exists. | Implement the Letta export/readback adapter, then compare only scenarios whose core block JSON, archival search/readback JSON, and source ids are present. |
 | Graph/RAG navigation | ELF relation context is not enough to claim graph/RAG navigation parity. | RAGFlow, LightRAG, GraphRAG, Graphiti/Zep, graphify. | RAGFlow, LightRAG, GraphRAG, and Graphiti/Zep remain `research_gate` blocked/incomplete without explicit setup; graphify has only a tiny scored smoke `wrong_result`. | Run larger contained graph/RAG adapters with evidence-linked outputs before any ELF graph/RAG win, tie, or loss claim. |
 
 ## Parallelizable Benchmark Follow-Ups
@@ -129,7 +132,7 @@ now explicit:
 | Graphiti/Zep temporal graph adapter | XY-888 | yes | Docker-local graph store setup. | Current/historical/future fact validity and evidence ids. |
 | graphify graph report adapter | XY-889 plus post-XY-900 expansion | yes | Representative graph/RAG jobs beyond the tiny scored smoke. | `graph.json` and `GRAPH_REPORT` evidence mapped to scored graph navigation and knowledge synthesis ids. |
 | Private corpus and credentialed production ops | Operator-owned benchmark gates | no | Sanitized private manifest and routed provider credentials. | Private-corpus retrieval quality and credentialed production-ops evidence. |
-| Letta, LangGraph, nanograph, llm-wiki direct adapters | Research-only until output contract | no | Contained evidence export or non-memory-backend comparability contract. | Run only after each has a comparable output contract; otherwise keep as product-reference evidence. |
+| Letta, LangGraph, nanograph, llm-wiki direct adapters | Letta export artifact blocked; others research-only until output contract | no | Letta needs the selected contained export/readback artifact; the others need a non-memory-backend comparability contract. | Run only after comparable output exists; otherwise keep as product-reference evidence. |
 
 ## Validation Contract
 
diff --git a/docs/guide/benchmarking/2026-06-11-elf-iteration-direction-from-competitor-benchmarks.md b/docs/guide/benchmarking/2026-06-11-elf-iteration-direction-from-competitor-benchmarks.md
index 5948ba26..1363d3f0 100644
--- a/docs/guide/benchmarking/2026-06-11-elf-iteration-direction-from-competitor-benchmarks.md
+++ b/docs/guide/benchmarking/2026-06-11-elf-iteration-direction-from-competitor-benchmarks.md
@@ -116,8 +116,8 @@ Overall adapter statuses:
 | `pass` | `4` |
 | `wrong_result` | `6` |
 | `lifecycle_fail` | `1` |
-| `blocked` | `5` |
-| `not_encoded` | `7` |
+| `blocked` | `6` |
+| `not_encoded` | `6` |
 
 The ledger is intentionally not a leaderboard. It prevents fixture evidence,
 same-corpus checks, research gates, and live real-world runs from being collapsed into
@@ -129,7 +129,7 @@ one misleading score.
 | --- | --- | --- |
 | Retrieval/debug | ELF and qmd are tied on encoded live retrieval; qmd remains the stronger debug UX reference. | Add trace-level replay, expansion/fusion/rerank knobs, candidate-drop diagnosis, and command-line replay. |
 | Work resume | ELF live work-resume passes; continuity-oriented competitors are undermeasured. | Borrow agentmemory/claude-mem capture breadth and OpenViking staged context, but require durable adapter proof. |
-| Project decisions | ELF and qmd live project-decision suites pass; Letta is not encoded. | Add core-vs-archival decision-memory scenarios before comparing Letta. |
+| Project decisions | ELF and qmd live project-decision suites pass; ELF fixture-backed `core_archival_memory` also scores project-decision recovery, while Letta remains blocked without export evidence. | Run the Letta core/archival export/readback contract before treating project-decision recovery as comparable. |
 | Source of truth | ELF has the strongest measured source-of-truth evidence. | Borrow memsearch's local canonical-store ergonomics without making files or vectors authoritative. |
 | Temporal memory | ELF fixture passes, but live memory evolution is wrong_result. | Prioritize current-vs-historical evidence links and Graphiti/Zep-style validity windows. |
 | Consolidation | ELF fixture passes, but live proposal generation is not encoded. | Build reviewable derived proposals with source refs, confidence, unsupported-claim flags, and apply/defer/discard audit. |
@@ -137,9 +137,9 @@ one misleading score.
 | Operator debugging | Fixture UX passes and the narrow live trace/viewer slice is scored: ELF passes, qmd ties replay/repair clarity but is wrong_result for trace hydration and candidate-drop visibility. | Expand coverage to OpenMemory and claude-mem UI/export or viewer runners before any broader operator-UX claim. |
 | Capture/write policy | ELF live capture/write-policy self-check passes with zero redaction leaks; qmd is `not_encoded`; agentmemory is `blocked`; claude-mem is `not_encoded`. | Borrow agentmemory/claude-mem capture breadth only after durable local hook/viewer evidence exists, while preserving redaction and evidence binding. |
 | Production ops | ELF has the strongest checked-in evidence, with private/credential gates blocked. | Keep Docker-first production proof and add private corpus only when an operator-owned manifest exists. |
-| Personalization | ELF live personalization passes; mem0/OpenMemory and Letta are not encoded. | Add entity-scoped preference history and UI readback before claiming stronger personalization. |
+| Personalization | ELF live personalization passes; mem0/OpenMemory is not encoded and Letta scoped preference readback remains not tested until its contained export path exists. | Add entity-scoped preference history and UI readback before claiming stronger personalization. |
 | Context trajectory | Not comparable yet; OpenViking remains the reference. | Score staged retrieval, hierarchy expansion, and trajectory readback. |
-| Core-vs-archival | Product gap, not a measured comparison yet. | Borrow Letta's core memory block shape with explicit scope, provenance, and read-only attachment. |
+| Core-vs-archival | ELF fixture-backed `core_archival_memory` passes 6/6, but Letta remains blocked/not tested because no contained export artifact exists. | Borrow Letta's core memory block shape while keeping any win/tie/loss claim gated on exported core block, archival readback, and source-id evidence. |
 | Graph/RAG navigation | RAGFlow, LightRAG, GraphRAG, and Graphiti/Zep remain research gates; graphify has a tiny scored `wrong_result` smoke. | Run larger contained graph/RAG adapters before any broad graph-navigation claim. |
 
 ## Project Guidance Matrix
@@ -157,7 +157,7 @@ one misleading score.
 | LightRAG | `research_gate`; current status is `blocked`. | Lightweight graph/RAG context export and source-path citation shape. | Borrow context-export ideas for graph/RAG navigation after Docker proof. |
 | GraphRAG | `research_gate`; current status is `blocked`. | Graph summaries, document/text-unit tables, local/global search separation. | Borrow graph summary artifacts for knowledge pages and graph navigation after cost-bounded output proof. |
 | Graphiti/Zep | `research_gate`; current status is `blocked`. | Temporal graph facts, validity windows, current-vs-historical answers. | Use as the semantic model for ELF temporal memory and relation validity benchmarks. |
-| Letta | `research_gate`; current status is `not_encoded`. | Core memory blocks versus archival memory. | Add explicit scoped core blocks in ELF, but compare Letta only after a contained export path exists. |
+| Letta | `research_gate`; current status is `blocked` until the selected contained export/readback artifact exists. | Core memory blocks versus archival memory. | Keep ELF's fixture-backed core block coverage separate from Letta comparison claims; compare Letta only after exported core and archival evidence exists. |
 | LangGraph | `research_gate`; current status is `not_encoded` or `unsupported` as a direct memory backend. | Checkpoint, replay, fork, and regression debugging for agent state. | Borrow replay/regression patterns for benchmark infrastructure, not as direct memory parity. |
 | nanograph | `research_gate`; current status is `not_encoded` or `unsupported` as a full memory backend. | Typed graph schema and query ergonomics. | Borrow graph-lite DX and typed relation query ideas. |
 | llm-wiki | `research_gate`; current status is `not_encoded`. | Maintained wiki pages, query-save, lint, and repair loops. | Use as a reference for rebuildable, cited knowledge pages. |
@@ -225,8 +225,10 @@ These improve day-to-day usefulness while preserving ELF's evidence-bound core.
    - Borrow from: Letta core memory versus archival memory.
    - ELF shape: scoped read-only blocks with provenance and attachment rules, separate
      from archival search.
-   - Benchmark gate: core-vs-archival jobs prove correct attachment, sharing, and
-     fallback to search.
+   - Benchmark gate: ELF fixture jobs now prove attachment, scope, provenance,
+     stale-core detection, archival fallback, and project-decision recovery; Letta
+     comparison remains gated on exported core block, archival readback, and source-id
+     evidence.
 
 ### P2 - Expand External Comparison Without Fake Wins
 
@@ -265,7 +267,9 @@ Do not claim:
 - ELF beats mem0/OpenMemory on hosted memory, entity history, UI, or optional graph
   memory. Those scenarios are not encoded; the operator-debug win is only against
   qmd on a narrow trace/replay slice.
-- ELF beats Letta on core-vs-archival memory. That scenario is not encoded.
+- ELF beats Letta on core-vs-archival memory. ELF has fixture-backed coverage, but
+  Letta remains blocked/not tested until the selected contained export/readback path
+  produces comparable source-id-mapped evidence.
 - ELF beats RAGFlow, LightRAG, GraphRAG, Graphiti/Zep, or graphify on graph/RAG
   navigation. Current evidence is research-gate or blocked except graphify's tiny
   non-pass smoke.
diff --git a/docs/guide/research/research_projects_inventory.md b/docs/guide/research/research_projects_inventory.md
index 2f1cb9c0..be322238 100644
--- a/docs/guide/research/research_projects_inventory.md
+++ b/docs/guide/research/research_projects_inventory.md
@@ -31,7 +31,7 @@ Last updated: June 11, 2026.
 | [gbrain](https://github.com/garrytan/gbrain) | D1 | Reviewed; XY-882 verdict `blocked` | `rw.knowledge-synthesis`, `rw.operator-continuity` | Operational knowledge brain, `compiled_truth` + timeline pages, enrichment and maintenance loops; blocked on Docker-local brain repo and database proof | `docs/guide/research/comparison_external_projects.md`; `docs/research/2026-06-09-xy-841-external-memory-benchmark-dimensions.json`; `docs/research/2026-06-10-xy-882-rag-graph-adapter-feasibility.json` |
 | [Always-On Memory Agent](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/gemini/agents/always-on-memory-agent) | D1 | Reviewed | `rw.consolidation-review`, `rw.operator-continuity` | Always-on multimodal ingest + scheduled consolidation loop with simple local ops surface | `docs/guide/research/comparison_external_projects.md`; `docs/research/2026-06-09-xy-841-external-memory-benchmark-dimensions.json` |
 | [graphify](https://github.com/safishamsi/graphify) | D1 | Reviewed; XY-882 verdict `adapter_candidate`; XY-889 adds Docker graph/report smoke | `rw.graph-navigation`, `rw.knowledge-synthesis`, `rw.resume-evidence` | Multimodal graph compression, deterministic code extraction, and graph/report outputs with source-file/source-location references; current ELF evidence is a generated-corpus Docker smoke, not broad graph-quality proof | `docs/guide/research/comparison_external_projects.md`; `docs/research/2026-06-09-xy-841-external-memory-benchmark-dimensions.json`; `docs/research/2026-06-10-xy-882-rag-graph-adapter-feasibility.json`; `apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json` |
-| [Letta](https://github.com/letta-ai/letta) | D1 | Reviewed; XY-882 verdict `research_only` | `rw.core-archival`, `rw.operator-continuity` | Core vs archival memory split, shared blocks; not an implementation candidate until a supported contained server path can export evidence | `docs/guide/research/comparison_external_projects.md`; `docs/research/2026-06-09-xy-841-external-memory-benchmark-dimensions.json`; `docs/research/2026-06-10-xy-882-rag-graph-adapter-feasibility.json` |
+| [Letta](https://github.com/letta-ai/letta) | D1 | Reviewed; XY-882 verdict `research_only`; XY-927 selects blocked contained export/readback path | `rw.core-archival`, `rw.operator-continuity` | Core vs archival memory split, shared blocks; compare only after a Docker-only benchmark-created agent export returns core block JSON, archival readback JSON, and source ids | `docs/guide/research/comparison_external_projects.md`; `docs/research/2026-06-09-xy-841-external-memory-benchmark-dimensions.json`; `docs/research/2026-06-10-xy-882-rag-graph-adapter-feasibility.json`; `apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json` |
 | [LangGraph](https://docs.langchain.com/oss/python/langgraph/persistence) | D1 | Reviewed; XY-882 verdict `research_only` | `rw.replay-regression`, `rw.resume-evidence` | Checkpoint/replay mindset for quality regression workflows; not a standalone memory backend adapter | `docs/guide/research/comparison_external_projects.md`; `docs/research/2026-06-09-xy-841-external-memory-benchmark-dimensions.json`; `docs/research/2026-06-10-xy-882-rag-graph-adapter-feasibility.json` |
 | [Graphiti / Zep](https://help.getzep.com/graphiti/core-concepts/temporal-awareness) | D1 | Reviewed; XY-882 verdict `adapter_candidate` | `rw.graph-temporal`, `rw.resume-evidence` | Temporal fact validity model with Docker-local graph-store options and UUID/fact/validity-window output | `docs/guide/research/comparison_external_projects.md`; `docs/research/2026-06-09-xy-841-external-memory-benchmark-dimensions.json`; `docs/research/2026-06-10-xy-882-rag-graph-adapter-feasibility.json` |
 | [nanograph](https://github.com/nanograph/nanograph) | D1 | Reviewed; XY-882 verdict `research_only` | `rw.graph-temporal`, `rw.retrieval-debug` | Typed schema + typed query ergonomics for graph-lite developer experience; official shape is no server/no Docker | `docs/guide/research/comparison_external_projects.md`; `docs/research/2026-06-09-xy-841-external-memory-benchmark-dimensions.json`; `apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json`; `docs/research/2026-06-10-xy-882-rag-graph-adapter-feasibility.json` |
diff --git a/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json b/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json
index 528fc057..558fa520 100644
--- a/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json
+++ b/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json
@@ -8,6 +8,8 @@
     "docs/guide/benchmarking/2026-06-10-production-adoption-refresh.md",
     "docs/guide/benchmarking/2026-06-10-real-world-comparison-report.md",
     "docs/guide/benchmarking/2026-06-10-live-real-world-sweep-report.md",
+    "docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md",
+    "docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md",
     "docs/guide/research/external_memory_improvement_plan.md",
     "docs/guide/research/research_projects_inventory.md",
     "apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json",
@@ -30,8 +32,8 @@
     },
     "overall_status_counts": {
       "lifecycle_fail": 1,
-      "blocked": 5,
-      "not_encoded": 7,
+      "blocked": 6,
+      "not_encoded": 6,
       "pass": 4,
       "wrong_result": 6
     }
@@ -310,17 +312,17 @@
       "supporting_evidence_classes": [
         "research_gate"
       ],
-      "measured_status": "not_encoded",
+      "measured_status": "blocked",
       "proof": {
-        "command": null,
-        "artifact": "docs/research/2026-06-10-xy-882-rag-graph-adapter-feasibility.json"
+        "command": "blocked until a Docker-only benchmark-created agent export is implemented",
+        "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json"
       },
       "unsupported_or_blocked_status": {
         "state": "blocked",
-        "typed_reason": "contained_evidence_export_path_not_selected",
-        "details": "Research-only until a supported contained server path can export core/archival evidence without relying on unsupported setup."
+        "typed_reason": "contained_export_readback_artifact_missing",
+        "details": "The selected contract requires a benchmark-created Letta agent export with core block JSON, archival search/readback JSON, and source ids before any scenario claim can be scored."
       },
-      "benchmark_before_claim": "Select a contained evidence export contract, then encode core-vs-archival memory, personalization, and project-decision jobs.",
+      "benchmark_before_claim": "Implement and run the contained export/readback adapter before any Letta win, tie, or loss claim; keep personalization and project-decision scenarios blocked or not tested until that evidence exists.",
       "borrow_if_stronger": "Borrow explicit core memory block ergonomics, archival separation, and shared operating context readback."
     },
     {
@@ -446,11 +448,11 @@
     {
       "scenario_id": "project_decisions",
       "scenario": "project decisions",
-      "current_elf_evidence": "ELF fixture-backed and live_real_world project_decisions suites pass.",
+      "current_elf_evidence": "ELF fixture-backed and live_real_world project_decisions suites pass; the ELF core_archival_memory fixture also scores project-decision recovery through core routing plus archival rationale.",
       "strongest_competitor_or_reference": "qmd, Letta",
-      "current_competitor_evidence": "qmd live_real_world project_decisions passes; Letta project_decisions is research_gate not_encoded.",
-      "current_state": "ELF and qmd are the only measured live competitors for this scenario.",
-      "next_measurement": "Add core/archival decision-memory jobs for Letta only after a contained export path exists; otherwise keep Letta as design reference."
+      "current_competitor_evidence": "qmd live_real_world project_decisions passes; Letta project-decision recovery is research_gate not_tested or blocked until the contained export path exists.",
+      "current_state": "ELF and qmd are the only measured live competitors for this scenario; Letta remains a product-reference comparison target.",
+      "next_measurement": "Run the Letta core/archival export/readback contract before treating project-decision recovery as a comparable scenario."
     },
     {
       "scenario_id": "source_of_truth",
@@ -520,7 +522,7 @@
       "scenario": "personalization",
       "current_elf_evidence": "ELF fixture-backed personalization passes and ELF live_real_world personalization passes.",
       "strongest_competitor_or_reference": "mem0/OpenMemory, Letta",
-      "current_competitor_evidence": "mem0/OpenMemory personalization is not_encoded and Letta personalization is research_gate not_encoded.",
+      "current_competitor_evidence": "mem0/OpenMemory personalization is not_encoded and Letta scoped preference readback remains not_tested until the contained core/archival export path exists.",
       "current_state": "ELF and qmd have live encoded evidence; personalization-specialized competitors are not yet comparable.",
       "next_measurement": "Encode mem0/OpenMemory and Letta scoped-preference readback jobs before making personalization superiority claims."
     },
@@ -536,11 +538,11 @@
     {
       "scenario_id": "core_vs_archival_memory",
       "scenario": "core-vs-archival memory",
-      "current_elf_evidence": "ELF spec and admin surfaces define core blocks, but comparative benchmark coverage is not yet encoded here.",
+      "current_elf_evidence": "ELF fixture core_archival_memory passes 6/6 and scores core block attachment, scope, provenance, stale-core detection, archival fallback, and project-decision recovery separately from archival note search.",
       "strongest_competitor_or_reference": "Letta",
-      "current_competitor_evidence": "Letta is research_gate not_encoded until a contained evidence export path is selected.",
-      "current_state": "Scenario is a product gap measurement target, not a current win/loss surface.",
-      "next_measurement": "Add core-block versus archival-search jobs for ELF and only compare Letta after contained export proof exists."
+      "current_competitor_evidence": "Letta is research_gate blocked/not_tested until the selected contained export/readback artifact exists.",
+      "current_state": "ELF has fixture-only core-block evidence; Letta remains unscored, so no win, tie, or loss claim is allowed.",
+      "next_measurement": "Implement the Letta export/readback adapter, then compare only scenarios whose core block JSON, archival search/readback JSON, and source ids are present."
     },
     {
       "scenario_id": "graph_rag_navigation",
@@ -646,10 +648,10 @@
     },
     {
       "workstream": "Letta, LangGraph, nanograph, llm-wiki direct adapters",
-      "issue_or_candidate": "research-only until output contract",
+      "issue_or_candidate": "Letta export artifact blocked; others research-only until output contract",
       "parallelizable": false,
-      "blocked_by": "Contained evidence export or non-memory-backend comparability contract.",
-      "measurement": "Only run after each has a comparable output contract; otherwise treat as product-reference evidence."
+      "blocked_by": "Letta needs the selected contained export/readback artifact; the others need a non-memory-backend comparability contract.",
+      "measurement": "Only run after comparable output exists; otherwise treat as product-reference evidence."
     }
   ]
 }

From 2e0b926183f58f3230ac36750c727f6afb0cc1db Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Fri, 12 Jun 2026 01:01:41 +0800
Subject: [PATCH 3/7] {"schema":"decodex/commit/1","summary":"Repair Letta
 benchmark report drift","authority":"XY-927"}

---
 .../tests/real_world_job_benchmark.rs         | 38 +++++++++++++++++++
 ...-11-competitor-strength-evidence-matrix.md |  2 +-
 ...on-direction-from-competitor-benchmarks.md | 14 +++----
 ...-11-xy-897-competitor-strength-matrix.json |  6 +--
 4 files changed, 49 insertions(+), 11 deletions(-)

diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
index d7d5eae7..44d94368 100644
--- a/apps/elf-eval/tests/real_world_job_benchmark.rs
+++ b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -1923,6 +1923,7 @@ fn current_benchmark_reports_preserve_live_sweep_boundaries() -> Result<()> {
 	let competitor_matrix_json = serde_json::from_str::<Value>(&fs::read_to_string(
 		competitor_strength_matrix_json_path()?,
 	)?)?;
+	let iteration_direction = fs::read_to_string(iteration_direction_report_path()?)?;
 	let external_manifest = fs::read_to_string(external_adapter_manifest_path())?;
 	let retrieval_debug_profile =
 		serde_json::from_str::<Value>(&fs::read_to_string(retrieval_debug_profile_json_path()?)?)?;
@@ -1949,6 +1950,16 @@ fn current_benchmark_reports_preserve_live_sweep_boundaries() -> Result<()> {
 	assert!(external_manifest.contains(
 		"The qmd live real-world sweep covers the current encoded fixture corpus; expanded retrieval-debug strength suites still need their own materialized adapter run."
 	));
+	assert!(iteration_direction.contains("| Jobs | `46` |"));
+	assert!(iteration_direction.contains("| Encoded suites | `12` |"));
+	assert!(iteration_direction.contains("| Pass | `44` |"));
+	assert!(iteration_direction.contains("| Evidence coverage | `101/101` |"));
+	assert!(iteration_direction.contains("| Expected evidence recall | `93/93` |"));
+	assert!(competitor_matrix.contains("scenario-level `live_baseline_only` tie"));
+	assert!(
+		competitor_matrix
+			.contains("broader real-world personalization prompt adapter remains `not_encoded`")
+	);
 
 	for stale_phrase in [
 		"same live sweep shape as ELF",
@@ -1957,9 +1968,13 @@ fn current_benchmark_reports_preserve_live_sweep_boundaries() -> Result<()> {
 		"wrong_result, incomplete, blocked, and not_encoded states remain visible",
 		"broader live suites remain `wrong_result`, `incomplete`, or `not_encoded`",
 		"The qmd live real-world slice covers representative jobs only",
+		"| Jobs | `40` |",
+		"| Encoded suites | `11` |",
+		"| Pass | `38` |",
 	] {
 		assert!(!measurement_audit.contains(stale_phrase));
 		assert!(!competitor_matrix.contains(stale_phrase));
+		assert!(!iteration_direction.contains(stale_phrase));
 		assert!(!external_manifest.contains(stale_phrase));
 	}
 
@@ -2243,6 +2258,7 @@ fn assert_competitor_strength_matrix_json(matrix: &Value) -> Result<()> {
 	let scenarios = array_at(matrix, "/scenario_matrix")?;
 	let retrieval_debug = find_by_field(scenarios, "/scenario_id", "retrieval_debug")?;
 	let operator_debug = find_by_field(scenarios, "/scenario_id", "operator_debugging")?;
+	let personalization = find_by_field(scenarios, "/scenario_id", "personalization")?;
 	let context_trajectory = find_by_field(scenarios, "/scenario_id", "context_trajectory")?;
 
 	assert_competitor_strength_matrix_manifest_counts(matrix);
@@ -2330,6 +2346,9 @@ fn assert_competitor_strength_matrix_json(matrix: &Value) -> Result<()> {
 			.and_then(Value::as_str)
 			.is_some_and(|claim| claim.contains("OpenMemory and claude-mem UI/export"))
 	);
+
+	assert_personalization_matrix_record(personalization);
+
 	assert!(
 		context_trajectory
 			.pointer("/current_state")
@@ -2346,6 +2365,25 @@ fn assert_competitor_strength_matrix_json(matrix: &Value) -> Result<()> {
 	Ok(())
 }
 
+fn assert_personalization_matrix_record(personalization: &Value) {
+	assert!(
+		personalization
+			.pointer("/current_competitor_evidence")
+			.and_then(Value::as_str)
+			.is_some_and(|claim| claim.contains("scenario-level live_baseline_only tie")
+				&& claim.contains(
+					"broader real_world_job personalization prompt adapter remains not_encoded"
+				))
+	);
+	assert!(
+		personalization
+			.pointer("/current_state")
+			.and_then(Value::as_str)
+			.is_some_and(|state| state.contains("ties the scoped-personalization smoke")
+				&& state.contains("not yet comparable across the broader suite"))
+	);
+}
+
 fn assert_competitor_strength_matrix_manifest_counts(matrix: &Value) {
 	assert_eq!(
 		matrix.pointer("/manifest_summary/adapter_records").and_then(Value::as_u64),
diff --git a/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md b/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md
index 58692226..8ce82a39 100644
--- a/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md
+++ b/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md
@@ -107,7 +107,7 @@ lifecycle-fail -> `lifecycle_fail`, and not-encoded -> `not_encoded`.
 | Operator debugging | Fixture operator_debugging_ux passes, and the narrow live operator-debug slice passes for trace hydration, candidate-drop visibility, selected-but-not-narrated evidence, replay-command availability, and repair-action clarity. | qmd, claude-mem, OpenMemory. | qmd ties replay-command availability and repair-action clarity but is `wrong_result` for trace hydration, candidate-drop stage visibility, and selected-but-not-narrated evidence; claude-mem and OpenMemory UX remain `not_encoded` or blocked. | Add bounded OpenMemory and claude-mem UI/export or viewer runners before any broader operator-UX claim. |
 | Capture/write policy | Fixture capture_integration passes; ELF live capture_integration passes 4/4 with zero redaction leaks, source ids, write-policy audit, and evidence binding. | agentmemory, claude-mem. | agentmemory capture is `blocked` by mocked/in-memory storage; claude-mem hook/viewer capture is `not_encoded`. | Run durable agentmemory and claude-mem capture-hook jobs proving redaction, exclusion, evidence binding, source ids, and no secret leakage. |
 | Production ops | Fixture production_ops has 4 pass and 2 blocked; live production_ops is `blocked`; production adoption has provider/backfill/restore evidence. | ELF production gate, qmd, RAG/RAGFlow resource gates. | qmd live production_ops is `blocked`; RAG/resource gates are `research_gate` `blocked`. | Rerun private-corpus and credentialed gates only when operator-owned manifest and credentials exist. |
-| Personalization | Fixture and live personalization pass. | mem0/OpenMemory, Letta. | mem0/OpenMemory personalization is `not_encoded`; Letta scoped preference readback remains `not_tested` until the contained core/archival export path exists. | Encode scoped preference readback for mem0/OpenMemory and Letta before personalization superiority claims. |
+| Personalization | Fixture and live personalization pass. | mem0/OpenMemory, Letta. | mem0/OpenMemory has a scenario-level `live_baseline_only` tie for entity-scoped personalization, while the broader real-world personalization prompt adapter remains `not_encoded`; Letta scoped preference readback remains `not_tested` until the contained core/archival export path exists. | Encode broader mem0/OpenMemory real-world personalization prompts and Letta scoped preference readback before personalization superiority claims. |
 | Context trajectory | ELF has trace direction but no comparable staged trajectory scenario. | OpenViking. | OpenViking setup is pinned, same-corpus retrieval is `wrong_result`, and hierarchy trajectory is `not_encoded`. | Make OpenViking evidence-bearing retrieval pass, then score staged context trajectory outputs. |
 | Core-vs-archival memory | Fixture `core_archival_memory` passes 6/6 and scores core block attachment, scope, provenance, stale-core detection, archival fallback, and project-decision recovery separately from archival note search. | Letta. | Letta is `research_gate` `blocked`/`not_tested` until the selected contained export/readback artifact exists. | Implement the Letta export/readback adapter, then compare only scenarios whose core block JSON, archival search/readback JSON, and source ids are present. |
 | Graph/RAG navigation | ELF relation context is not enough to claim graph/RAG navigation parity. | RAGFlow, LightRAG, GraphRAG, Graphiti/Zep, graphify. | RAGFlow, LightRAG, GraphRAG, and Graphiti/Zep remain `research_gate` blocked/incomplete without explicit setup; graphify has only a tiny scored smoke `wrong_result`. | Run larger contained graph/RAG adapters with evidence-linked outputs before any ELF graph/RAG win, tie, or loss claim. |
diff --git a/docs/guide/benchmarking/2026-06-11-elf-iteration-direction-from-competitor-benchmarks.md b/docs/guide/benchmarking/2026-06-11-elf-iteration-direction-from-competitor-benchmarks.md
index 1363d3f0..cffe4849 100644
--- a/docs/guide/benchmarking/2026-06-11-elf-iteration-direction-from-competitor-benchmarks.md
+++ b/docs/guide/benchmarking/2026-06-11-elf-iteration-direction-from-competitor-benchmarks.md
@@ -44,18 +44,18 @@ The strongest current statement is:
 
 | Metric | Value |
 | --- | ---: |
-| Jobs | `40` |
-| Encoded suites | `11` |
-| Pass | `38` |
+| Jobs | `46` |
+| Encoded suites | `12` |
+| Pass | `44` |
 | Blocked | `2` |
 | Wrong result | `0` |
 | Lifecycle fail | `0` |
 | Incomplete | `0` |
 | Not encoded | `0` |
 | Unsupported claim | `0` |
-| Mean score | `0.950` |
-| Evidence coverage | `88/88` |
-| Expected evidence recall | `80/80` |
+| Mean score | `0.957` |
+| Evidence coverage | `101/101` |
+| Expected evidence recall | `93/93` |
 
 This proves the fixture contract is broad and well controlled. It does not prove that
 every live adapter or every competitor runtime passes those scenarios.
@@ -137,7 +137,7 @@ one misleading score.
 | Operator debugging | Fixture UX passes and the narrow live trace/viewer slice is scored: ELF passes, qmd ties replay/repair clarity but is wrong_result for trace hydration and candidate-drop visibility. | Expand coverage to OpenMemory and claude-mem UI/export or viewer runners before any broader operator-UX claim. |
 | Capture/write policy | ELF live capture/write-policy self-check passes with zero redaction leaks; qmd is `not_encoded`; agentmemory is `blocked`; claude-mem is `not_encoded`. | Borrow agentmemory/claude-mem capture breadth only after durable local hook/viewer evidence exists, while preserving redaction and evidence binding. |
 | Production ops | ELF has the strongest checked-in evidence, with private/credential gates blocked. | Keep Docker-first production proof and add private corpus only when an operator-owned manifest exists. |
-| Personalization | ELF live personalization passes; mem0/OpenMemory is not encoded and Letta scoped preference readback remains not tested until its contained export path exists. | Add entity-scoped preference history and UI readback before claiming stronger personalization. |
+| Personalization | ELF live personalization passes; mem0/OpenMemory ties the entity-scoped personalization smoke but still lacks a broader real-world prompt adapter, and Letta scoped preference readback remains not tested until its contained export path exists. | Add broader entity/preference history and UI readback before claiming stronger personalization. |
 | Context trajectory | Not comparable yet; OpenViking remains the reference. | Score staged retrieval, hierarchy expansion, and trajectory readback. |
 | Core-vs-archival | ELF fixture-backed `core_archival_memory` passes 6/6, but Letta remains blocked/not tested because no contained export artifact exists. | Borrow Letta's core memory block shape while keeping any win/tie/loss claim gated on exported core block, archival readback, and source-id evidence. |
 | Graph/RAG navigation | RAGFlow, LightRAG, GraphRAG, and Graphiti/Zep remain research gates; graphify has a tiny scored `wrong_result` smoke. | Run larger contained graph/RAG adapters before any broad graph-navigation claim. |
diff --git a/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json b/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json
index 558fa520..d7dd1938 100644
--- a/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json
+++ b/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json
@@ -522,9 +522,9 @@
       "scenario": "personalization",
       "current_elf_evidence": "ELF fixture-backed personalization passes and ELF live_real_world personalization passes.",
       "strongest_competitor_or_reference": "mem0/OpenMemory, Letta",
-      "current_competitor_evidence": "mem0/OpenMemory personalization is not_encoded and Letta scoped preference readback remains not_tested until the contained core/archival export path exists.",
-      "current_state": "ELF and qmd have live encoded evidence; personalization-specialized competitors are not yet comparable.",
-      "next_measurement": "Encode mem0/OpenMemory and Letta scoped-preference readback jobs before making personalization superiority claims."
+      "current_competitor_evidence": "mem0/OpenMemory has a scenario-level live_baseline_only tie for entity_scoped_personalization, while the broader real_world_job personalization prompt adapter remains not_encoded; Letta scoped preference readback remains not_tested until the contained core/archival export path exists.",
+      "current_state": "ELF and qmd have live encoded personalization evidence; mem0/OpenMemory ties the scoped-personalization smoke but is not yet comparable across the broader suite, and Letta remains unscored.",
+      "next_measurement": "Encode broader mem0/OpenMemory real_world_job personalization prompts and Letta scoped-preference readback jobs before making personalization superiority claims."
     },
     {
       "scenario_id": "context_trajectory",

From 69617e455579415649601431cb979bd4cd7a32ea Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Fri, 12 Jun 2026 02:08:10 +0800
Subject: [PATCH 4/7] {"schema":"decodex/commit/1","summary":"Repair core
 archival benchmark guide aggregate","authority":"XY-927"}

---
 .../real_world_agent_memory_benchmark.md          | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/docs/guide/benchmarking/real_world_agent_memory_benchmark.md b/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
index 9d6f279d..a5fb2eca 100644
--- a/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
+++ b/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
@@ -229,16 +229,19 @@ research gates. Its `external_adapters` report section distinguishes:
 - `research_gate`: checked-in source/setup/runtime/resource/retry metadata for a
   future adapter path, not fixture-backed or live execution evidence.
 
-Current fixture state: `cargo make real-world-memory` covers 43 jobs across 12 suites,
-with 38 pass and 5 blocked. The blocked jobs are production-ops operator boundaries
-plus the XY-928 OpenViking `context_trajectory` gates for staged retrieval, hierarchy
-selection, and recursive/context expansion.
+Current fixture state: `cargo make real-world-memory` covers 49 jobs across 13 suites,
+with 44 pass and 5 blocked. The added `core_archival_memory` suite contributes six
+passing fixture jobs for core block attachment, scope, provenance, stale-core
+detection, archival fallback, and project-decision recovery. The blocked jobs are
+production-ops operator boundaries plus the XY-928 OpenViking `context_trajectory`
+gates for staged retrieval, hierarchy selection, and recursive/context expansion.
 
 Current live-adapter state: the `elf_live_real_world` and `qmd_live_real_world` adapters run a full
 encoded-suite sweep through `cargo make real-world-memory-live-adapters`. Each adapter
 materializes generated runtime answers for 40 jobs across 11 suites before scoring.
-The newer fixture-only `core_archival_memory` suite is scored separately and is not yet
-included in that live sweep.
+The fixture-only `core_archival_memory` suite can also be run through
+`cargo make real-world-memory-core-archival`; it is not yet included in that live
+sweep.
 The original targeted `work_resume`, `retrieval`, and `project_decisions` slice still
 passes, and ELF now passes the live `capture_integration` self-checks for redaction,
 exclusions, source ids, evidence binding, and no secret leakage. The full sweep is

From c82e9f7e2a2a24a996a1fe73b20784ffc0069784 Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Fri, 12 Jun 2026 02:50:43 +0800
Subject: [PATCH 5/7] {"schema":"decodex/commit/1","summary":"Enforce Letta
 core archival benchmark boundaries","authority":"XY-927"}

---
 .../project_decision_recovery.json            | 53 ++++++++--
 .../src/bin/real_world_job_benchmark.rs       | 47 ++++++++-
 .../tests/real_world_job_benchmark.rs         | 99 +++++++++++++++++--
 ...on-direction-from-competitor-benchmarks.md |  8 +-
 .../2026-06-11-measurement-coverage-audit.md  |  8 +-
 ...2026-06-11-measurement-coverage-audit.json |  8 +-
 6 files changed, 192 insertions(+), 31 deletions(-)

diff --git a/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/project_decision_recovery.json b/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/project_decision_recovery.json
index 229ecc34..423db375 100644
--- a/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/project_decision_recovery.json
+++ b/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/project_decision_recovery.json
@@ -58,10 +58,27 @@
         },
         "created_at": "2026-06-11T04:52:00Z"
       },
+      {
+        "evidence_id": "decision-letta-export-boundary",
+        "kind": "comparison_boundary",
+        "text": "Letta comparison boundary: no contained export/readback artifact maps core block JSON, archival search/readback JSON, and source ids, so Letta remains blocked or not_tested and no win, tie, or loss claim is allowed.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "project_decision_recovery",
+            "evidence_id": "decision-letta-export-boundary"
+          },
+          "locator": {
+            "quote": "no contained export/readback artifact maps core block JSON"
+          }
+        },
+        "created_at": "2026-06-11T04:53:00Z"
+      },
       {
         "evidence_id": "decision-letta-win-trap",
         "kind": "unsupported_claim",
-        "text": "Wrong claim: Letta comparison can be scored as an ELF win because ELF has core blocks.",
+        "text": "Wrong claim: Letta comparison can be scored as an ELF win or measured loss because ELF has core blocks.",
         "source_ref": {
           "schema": "source_ref/v1",
           "resolver": "real_world_job_fixture/v1",
@@ -76,7 +93,7 @@
     "adapter_response": {
       "adapter_id": "fixture_core_archival_memory",
       "answer": {
-        "content": "Use the always-attached core routing block to find the benchmark outcome policy, then cite archival notes for the detailed decision. The archival decision says to use win, tie, loss, not_tested, blocked, or non_goal only when scenario evidence supports them. It also says core blocks stay separate from archival note search and Qdrant-derived retrieval, so no ELF-over-Letta claim follows from ELF having core blocks.",
+        "content": "Use the always-attached core routing block to find the benchmark outcome policy, then cite archival notes for the detailed decision. The archival decision says to use win, tie, loss, not_tested, blocked, or non_goal only when scenario evidence supports them. It also says core blocks stay separate from archival note search and Qdrant-derived retrieval. Letta remains blocked or not_tested until a contained export/readback artifact maps core and archival source ids, so no ELF-over-Letta claim follows from ELF having core blocks.",
         "claims": [
           {
             "claim_id": "core_routes_to_archival_rationale",
@@ -95,12 +112,19 @@
             "text": "Core blocks stay separate from archival note search and Qdrant-derived retrieval.",
             "evidence_ids": ["decision-archival-core-search-boundary"],
             "confidence": "high"
+          },
+          {
+            "claim_id": "letta_comparison_requires_export",
+            "text": "Letta remains blocked or not_tested until a contained export/readback artifact maps core and archival source ids.",
+            "evidence_ids": ["decision-letta-export-boundary"],
+            "confidence": "high"
           }
         ],
         "evidence_ids": [
           "decision-core-routing-block",
           "decision-archival-outcome-policy",
-          "decision-archival-core-search-boundary"
+          "decision-archival-core-search-boundary",
+          "decision-letta-export-boundary"
         ],
         "latency_ms": 1.4,
         "cost": {
@@ -126,7 +150,11 @@
       "ts": "2026-06-11T04:51:00Z",
       "actor": "agent",
       "action": "recorded_decision",
-      "evidence_ids": ["decision-archival-outcome-policy", "decision-archival-core-search-boundary"],
+      "evidence_ids": [
+        "decision-archival-outcome-policy",
+        "decision-archival-core-search-boundary",
+        "decision-letta-export-boundary"
+      ],
       "summary": "Archival notes recorded the detailed outcome policy and core-search boundary."
     }
   ],
@@ -149,15 +177,22 @@
       {
         "claim_id": "core_archival_boundary_preserved",
         "text": "Core blocks stay separate from archival note search and Qdrant-derived retrieval."
+      },
+      {
+        "claim_id": "letta_comparison_requires_export",
+        "text": "Letta remains blocked or not_tested until a contained export/readback artifact maps core and archival source ids."
       }
     ],
     "must_not_include": [
-      "Letta comparison can be scored as an ELF win because ELF has core blocks"
+      "Letta comparison can be scored as an ELF win",
+      "Letta is a measured loss",
+      "Letta comparison can be scored as a measured loss"
     ],
     "evidence_links": {
       "core_routes_to_archival_rationale": ["decision-core-routing-block"],
       "outcomes_require_evidence": ["decision-archival-outcome-policy"],
-      "core_archival_boundary_preserved": ["decision-archival-core-search-boundary"]
+      "core_archival_boundary_preserved": ["decision-archival-core-search-boundary"],
+      "letta_comparison_requires_export": ["decision-letta-export-boundary"]
     },
     "answer_type": "decision_record",
     "accepted_alternates": [],
@@ -182,6 +217,12 @@
       "claim_id": "core_archival_boundary_preserved",
       "requirement": "cite",
       "quote": "core blocks stay separate from archival note search"
+    },
+    {
+      "evidence_id": "decision-letta-export-boundary",
+      "claim_id": "letta_comparison_requires_export",
+      "requirement": "cite",
+      "quote": "no contained export/readback artifact maps core block JSON"
     }
   ],
   "negative_traps": [
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark.rs b/apps/elf-eval/src/bin/real_world_job_benchmark.rs
index 11ed5106..8590b5ae 100644
--- a/apps/elf-eval/src/bin/real_world_job_benchmark.rs
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark.rs
@@ -3111,9 +3111,15 @@ fn job_metrics(job: &RealWorldJob, answer: &ProducedAnswer) -> JobMetrics {
 		.filter(|evidence| produced_evidence.contains(&evidence.evidence_id))
 		.count();
 	let stale_retrieval_count = trap_use_count(job, &produced_evidence, "stale_fact", answer);
-	let scope_violation_count = trap_use_count(job, &produced_evidence, "near_duplicate", answer);
-	let scope_check_count =
-		job.negative_traps.iter().filter(|trap| trap.trap_type == "near_duplicate").count();
+	let scope_violation_count = ["near_duplicate", "scope_leak"]
+		.into_iter()
+		.map(|trap_type| trap_use_count(job, &produced_evidence, trap_type, answer))
+		.sum();
+	let scope_check_count = job
+		.negative_traps
+		.iter()
+		.filter(|trap| is_scope_trap_type(trap.trap_type.as_str()))
+		.count();
 	let redaction_leak_count = trap_use_count(job, &produced_evidence, "privacy_leak", answer);
 	let scope_correct_count = scope_check_count.saturating_sub(scope_violation_count);
 	let qdrant_rebuild_case = job.tags.iter().any(|tag| tag == "qdrant_rebuild");
@@ -3138,6 +3144,10 @@ fn source_ref_by_evidence(job: &RealWorldJob) -> BTreeMap<&str, &Value> {
 	job.corpus.items.iter().map(|item| (item.evidence_id.as_str(), &item.source_ref)).collect()
 }
 
+fn is_scope_trap_type(trap_type: &str) -> bool {
+	matches!(trap_type, "near_duplicate" | "scope_leak")
+}
+
 fn trap_use_count(
 	job: &RealWorldJob,
 	produced_evidence: &BTreeSet<String>,
@@ -3933,11 +3943,42 @@ fn validate_adapter_scenarios(path: &Path, adapter: &ExternalAdapterReport) -> R
 				suite_id
 			));
 		}
+
+		let outcome = scenario_comparison_outcome(scenario);
+
+		if unmeasured_status_has_measured_outcome(scenario.status, outcome) {
+			return Err(eyre::eyre!(
+				"{} adapter {} scenario {} uses {} status with {} outcome.",
+				path.display(),
+				adapter.adapter_id,
+				scenario.scenario_id,
+				adapter_status_str(scenario.status),
+				scenario_comparison_outcome_str(outcome)
+			));
+		}
 	}
 
 	Ok(())
 }
 
+fn unmeasured_status_has_measured_outcome(
+	status: AdapterCoverageStatus,
+	outcome: ScenarioComparisonOutcome,
+) -> bool {
+	matches!(
+		status,
+		AdapterCoverageStatus::Blocked
+			| AdapterCoverageStatus::Incomplete
+			| AdapterCoverageStatus::NotEncoded
+			| AdapterCoverageStatus::Unsupported
+	) && matches!(
+		outcome,
+		ScenarioComparisonOutcome::Win
+			| ScenarioComparisonOutcome::Tie
+			| ScenarioComparisonOutcome::Loss
+	)
+}
+
 fn validate_adapter_evidence(path: &Path, adapter: &ExternalAdapterReport) -> Result<()> {
 	for evidence in &adapter.evidence {
 		if evidence.kind.trim().is_empty() || evidence.reference.trim().is_empty() {
diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
index 7fe90f1a..26e50498 100644
--- a/apps/elf-eval/tests/real_world_job_benchmark.rs
+++ b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -1428,6 +1428,59 @@ fn operator_debug_live_adapter_task_is_docker_scoped() -> Result<()> {
 	Ok(())
 }
 
+#[test]
+fn external_adapter_manifest_rejects_unmeasured_win_loss_scenario_outcomes() -> Result<()> {
+	let mut manifest =
+		serde_json::from_str::<Value>(&fs::read_to_string(external_adapter_manifest_path())?)?;
+	let adapters = manifest
+		.pointer_mut("/adapters")
+		.and_then(Value::as_array_mut)
+		.ok_or_else(|| eyre::eyre!("missing manifest adapters"))?;
+	let letta = adapters
+		.iter_mut()
+		.find(|adapter| {
+			adapter.pointer("/adapter_id").and_then(Value::as_str) == Some("letta_research_gate")
+		})
+		.ok_or_else(|| eyre::eyre!("missing Letta adapter"))?;
+	let scenarios = letta
+		.pointer_mut("/scenarios")
+		.and_then(Value::as_array_mut)
+		.ok_or_else(|| eyre::eyre!("missing Letta scenarios"))?;
+	let attachment = scenarios
+		.iter_mut()
+		.find(|scenario| {
+			scenario.pointer("/scenario_id").and_then(Value::as_str)
+				== Some("core_block_attachment_readback")
+		})
+		.ok_or_else(|| eyre::eyre!("missing Letta attachment scenario"))?;
+
+	set_json_pointer(attachment, "/comparison_outcome", serde_json::json!("win"))?;
+
+	let temp_dir =
+		env::temp_dir().join(format!("elf-real-world-invalid-scenario-test-{}", process::id()));
+
+	fs::create_dir_all(&temp_dir)?;
+
+	let manifest_path = temp_dir.join("memory_projects_manifest.json");
+
+	fs::write(&manifest_path, serde_json::to_vec_pretty(&manifest)?)?;
+
+	let output = Command::new(env!("CARGO_BIN_EXE_real_world_job_benchmark"))
+		.arg("run")
+		.arg("--fixtures")
+		.arg(fixture_dir())
+		.arg("--external-adapter-manifest")
+		.arg(&manifest_path)
+		.output()?;
+
+	assert!(!output.status.success(), "invalid scenario outcome unexpectedly passed");
+	assert!(
+		String::from_utf8_lossy(&output.stderr).contains("not_encoded status with win outcome")
+	);
+
+	Ok(())
+}
+
 #[test]
 fn live_adapter_supports_elf_capture_write_policy_without_external_hook_claims() -> Result<()> {
 	let workspace = workspace_root()?;
@@ -2060,8 +2113,8 @@ fn assert_current_report_text_boundaries(
 	assert!(iteration_direction.contains("| Jobs | `49` |"));
 	assert!(iteration_direction.contains("| Encoded suites | `13` |"));
 	assert!(iteration_direction.contains("| Pass | `44` |"));
-	assert!(iteration_direction.contains("| Evidence coverage | `110/110` |"));
-	assert!(iteration_direction.contains("| Expected evidence recall | `99/99` |"));
+	assert!(iteration_direction.contains("| Evidence coverage | `111/111` |"));
+	assert!(iteration_direction.contains("| Expected evidence recall | `100/100` |"));
 
 	for stale_phrase in [
 		"same live sweep shape as ELF",
@@ -2850,10 +2903,10 @@ fn assert_iteration_direction_current_measurement_counts(markdown: &str) {
 		"| Encoded suites | `13` |",
 		"| Blocked | `5` |",
 		"| Mean score | `0.898` |",
-		"| Evidence coverage | `110/110` |",
-		"| Source-ref coverage | `110/110` |",
-		"| Quote coverage | `110/110` |",
-		"| Expected evidence recall | `99/99` |",
+		"| Evidence coverage | `111/111` |",
+		"| Source-ref coverage | `111/111` |",
+		"| Quote coverage | `111/111` |",
+		"| Expected evidence recall | `100/100` |",
 		"| `blocked` | `7` |",
 		"| `not_encoded` | `5` |",
 		"`live_baseline_only`, `fixture_backed`, and `research_gate`",
@@ -3211,6 +3264,14 @@ fn core_archival_memory_fixtures_score_separate_core_and_archival_jobs() -> Resu
 		Some(1.0)
 	);
 	assert_eq!(report.pointer("/summary/evidence_coverage").and_then(Value::as_f64), Some(1.0));
+	assert_eq!(
+		report.pointer("/summary/evidence_required_count").and_then(Value::as_u64),
+		Some(14)
+	);
+	assert_eq!(report.pointer("/summary/evidence_covered_count").and_then(Value::as_u64), Some(14));
+	assert_eq!(report.pointer("/summary/scope_check_count").and_then(Value::as_u64), Some(1));
+	assert_eq!(report.pointer("/summary/scope_correct_count").and_then(Value::as_u64), Some(1));
+	assert_eq!(report.pointer("/summary/scope_violation_count").and_then(Value::as_u64), Some(0));
 
 	let suites = array_at(&report, "/suites")?;
 	let core = find_by_field(suites, "/suite_id", "core_archival_memory")?;
@@ -3234,6 +3295,24 @@ fn core_archival_memory_fixtures_score_separate_core_and_archival_jobs() -> Resu
 		assert_eq!(job.pointer("/status").and_then(Value::as_str), Some("pass"));
 	}
 
+	let scope = find_by_field(jobs, "/job_id", "core-archival-core-block-scope-001")?;
+	let decision = find_by_field(jobs, "/job_id", "core-archival-project-decision-recovery-001")?;
+
+	assert_eq!(scope.pointer("/scope_check_count").and_then(Value::as_u64), Some(1));
+	assert_eq!(scope.pointer("/scope_correct_count").and_then(Value::as_u64), Some(1));
+	assert_eq!(scope.pointer("/scope_violation_count").and_then(Value::as_u64), Some(0));
+	assert!(
+		decision
+			.pointer("/produced_answer")
+			.and_then(Value::as_str)
+			.is_some_and(|content| content.contains("Letta remains blocked or not_tested"))
+	);
+	assert!(
+		array_at(decision, "/produced_evidence")?
+			.iter()
+			.any(|id| id.as_str() == Some("decision-letta-export-boundary"))
+	);
+
 	Ok(())
 }
 
@@ -3319,8 +3398,8 @@ fn assert_root_aggregate_summary(report: &Value) {
 		Some(0)
 	);
 	assert_eq!(report.pointer("/summary/redaction_leak_count").and_then(Value::as_u64), Some(0));
-	assert_eq!(report.pointer("/summary/scope_check_count").and_then(Value::as_u64), Some(2));
-	assert_eq!(report.pointer("/summary/scope_correct_count").and_then(Value::as_u64), Some(2));
+	assert_eq!(report.pointer("/summary/scope_check_count").and_then(Value::as_u64), Some(3));
+	assert_eq!(report.pointer("/summary/scope_correct_count").and_then(Value::as_u64), Some(3));
 	assert_eq!(report.pointer("/summary/scope_violation_count").and_then(Value::as_u64), Some(0));
 	assert_eq!(
 		report.pointer("/summary/qdrant_rebuild_case_count").and_then(Value::as_u64),
@@ -3332,11 +3411,11 @@ fn assert_root_aggregate_summary(report: &Value) {
 	);
 	assert_eq!(
 		report.pointer("/summary/evidence_required_count").and_then(Value::as_u64),
-		Some(110)
+		Some(111)
 	);
 	assert_eq!(
 		report.pointer("/summary/evidence_covered_count").and_then(Value::as_u64),
-		Some(110)
+		Some(111)
 	);
 	assert_eq!(report.pointer("/summary/evidence_coverage").and_then(Value::as_f64), Some(1.0));
 	assert_eq!(report.pointer("/summary/source_ref_coverage").and_then(Value::as_f64), Some(1.0));
diff --git a/docs/guide/benchmarking/2026-06-11-elf-iteration-direction-from-competitor-benchmarks.md b/docs/guide/benchmarking/2026-06-11-elf-iteration-direction-from-competitor-benchmarks.md
index e32910a1..6fa05a45 100644
--- a/docs/guide/benchmarking/2026-06-11-elf-iteration-direction-from-competitor-benchmarks.md
+++ b/docs/guide/benchmarking/2026-06-11-elf-iteration-direction-from-competitor-benchmarks.md
@@ -54,10 +54,10 @@ The strongest current statement is:
 | Not encoded | `0` |
 | Unsupported claim | `0` |
 | Mean score | `0.898` |
-| Evidence coverage | `110/110` |
-| Source-ref coverage | `110/110` |
-| Quote coverage | `110/110` |
-| Expected evidence recall | `99/99` |
+| Evidence coverage | `111/111` |
+| Source-ref coverage | `111/111` |
+| Quote coverage | `111/111` |
+| Expected evidence recall | `100/100` |
 
 This proves the fixture contract is broad and well controlled. It does not prove that
 every live adapter or every competitor runtime passes those scenarios.
diff --git a/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md b/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md
index 90cd444c..c4e8381a 100644
--- a/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md
+++ b/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md
@@ -90,10 +90,10 @@ failure.
 | Unsupported claim | `0` |
 | Mean score | `0.898` |
 | Mean latency | `3.940 ms` |
-| Expected evidence recall | `99/99` |
-| Evidence coverage | `110/110` |
-| Source-ref coverage | `110/110` |
-| Quote coverage | `110/110` |
+| Expected evidence recall | `100/100` |
+| Evidence coverage | `111/111` |
+| Source-ref coverage | `111/111` |
+| Quote coverage | `111/111` |
 
 This proves fixture contract breadth and scoring behavior. It does not prove every
 live adapter or competitor runtime can complete those jobs.
diff --git a/docs/research/2026-06-11-measurement-coverage-audit.json b/docs/research/2026-06-11-measurement-coverage-audit.json
index bd7637f0..397f781e 100644
--- a/docs/research/2026-06-11-measurement-coverage-audit.json
+++ b/docs/research/2026-06-11-measurement-coverage-audit.json
@@ -36,10 +36,10 @@
     "unsupported_claim": 0,
     "mean_score": 0.898,
     "mean_latency_ms": 3.94,
-    "expected_evidence_total": 99,
-    "expected_evidence_matched": 99,
-    "evidence_required_count": 110,
-    "evidence_covered_count": 110
+    "expected_evidence_total": 100,
+    "expected_evidence_matched": 100,
+    "evidence_required_count": 111,
+    "evidence_covered_count": 111
   },
   "live_real_world_adapters": [
     {

From 5534191f2b3ed8ddf04cff84cbf7d56e767bff18 Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Fri, 12 Jun 2026 03:06:56 +0800
Subject: [PATCH 6/7] {"schema":"decodex/commit/1","summary":"Guard unmeasured
 adapter scenario positions","authority":"XY-927"}

---
 .../src/bin/real_world_job_benchmark.rs       |  35 +++++
 .../tests/real_world_job_benchmark.rs         | 126 +++++++++++-------
 ...-11-competitor-strength-evidence-matrix.md |   2 +-
 .../2026-06-11-measurement-coverage-audit.md  |   4 +-
 ...2026-06-11-measurement-coverage-audit.json |   4 +-
 ...-11-xy-897-competitor-strength-matrix.json |   4 +-
 6 files changed, 121 insertions(+), 54 deletions(-)

diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark.rs b/apps/elf-eval/src/bin/real_world_job_benchmark.rs
index 8590b5ae..81cda7c7 100644
--- a/apps/elf-eval/src/bin/real_world_job_benchmark.rs
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark.rs
@@ -3956,6 +3956,16 @@ fn validate_adapter_scenarios(path: &Path, adapter: &ExternalAdapterReport) -> R
 				scenario_comparison_outcome_str(outcome)
 			));
 		}
+		if unmeasured_status_has_measured_position(scenario.status, scenario.elf_position) {
+			return Err(eyre::eyre!(
+				"{} adapter {} scenario {} uses {} status with {} position.",
+				path.display(),
+				adapter.adapter_id,
+				scenario.scenario_id,
+				adapter_status_str(scenario.status),
+				scenario_position_str(scenario.elf_position)
+			));
+		}
 	}
 
 	Ok(())
@@ -3979,6 +3989,22 @@ fn unmeasured_status_has_measured_outcome(
 	)
 }
 
+fn unmeasured_status_has_measured_position(
+	status: AdapterCoverageStatus,
+	position: ElfScenarioPosition,
+) -> bool {
+	matches!(
+		status,
+		AdapterCoverageStatus::Blocked
+			| AdapterCoverageStatus::Incomplete
+			| AdapterCoverageStatus::NotEncoded
+			| AdapterCoverageStatus::Unsupported
+	) && matches!(
+		position,
+		ElfScenarioPosition::Wins | ElfScenarioPosition::Ties | ElfScenarioPosition::Loses
+	)
+}
+
 fn validate_adapter_evidence(path: &Path, adapter: &ExternalAdapterReport) -> Result<()> {
 	for evidence in &adapter.evidence {
 		if evidence.kind.trim().is_empty() || evidence.reference.trim().is_empty() {
@@ -5036,6 +5062,15 @@ fn scenario_comparison_outcome_str(outcome: ScenarioComparisonOutcome) -> &'stat
 	}
 }
 
+fn scenario_position_str(position: ElfScenarioPosition) -> &'static str {
+	match position {
+		ElfScenarioPosition::Wins => "wins",
+		ElfScenarioPosition::Ties => "ties",
+		ElfScenarioPosition::Loses => "loses",
+		ElfScenarioPosition::Untested => "untested",
+	}
+}
+
 fn adapter_status_counts_display(counts: &AdapterStatusCounts) -> String {
 	[
 		("real", counts.real),
diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
index 26e50498..5ae959a7 100644
--- a/apps/elf-eval/tests/real_world_job_benchmark.rs
+++ b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -5,7 +5,7 @@
 use std::{
 	env, fs,
 	path::{Path, PathBuf},
-	process::{self, Command},
+	process::{self, Command, Output},
 };
 
 use color_eyre::{Result, eyre};
@@ -267,6 +267,56 @@ fn set_json_pointer(value: &mut Value, pointer: &str, replacement: Value) -> Res
 	Ok(())
 }
 
+fn run_external_manifest_with_letta_attachment_mutation<F>(
+	slug: &str,
+	mutation: F,
+) -> Result<Output>
+where
+	F: FnOnce(&mut Value) -> Result<()>,
+{
+	let mut manifest =
+		serde_json::from_str::<Value>(&fs::read_to_string(external_adapter_manifest_path())?)?;
+	let adapters = manifest
+		.pointer_mut("/adapters")
+		.and_then(Value::as_array_mut)
+		.ok_or_else(|| eyre::eyre!("missing manifest adapters"))?;
+	let letta = adapters
+		.iter_mut()
+		.find(|adapter| {
+			adapter.pointer("/adapter_id").and_then(Value::as_str) == Some("letta_research_gate")
+		})
+		.ok_or_else(|| eyre::eyre!("missing Letta adapter"))?;
+	let scenarios = letta
+		.pointer_mut("/scenarios")
+		.and_then(Value::as_array_mut)
+		.ok_or_else(|| eyre::eyre!("missing Letta scenarios"))?;
+	let attachment = scenarios
+		.iter_mut()
+		.find(|scenario| {
+			scenario.pointer("/scenario_id").and_then(Value::as_str)
+				== Some("core_block_attachment_readback")
+		})
+		.ok_or_else(|| eyre::eyre!("missing Letta attachment scenario"))?;
+
+	mutation(attachment)?;
+
+	let temp_dir = env::temp_dir().join(format!("elf-real-world-{slug}-{}", process::id()));
+
+	fs::create_dir_all(&temp_dir)?;
+
+	let manifest_path = temp_dir.join("memory_projects_manifest.json");
+
+	fs::write(&manifest_path, serde_json::to_vec_pretty(&manifest)?)?;
+
+	Ok(Command::new(env!("CARGO_BIN_EXE_real_world_job_benchmark"))
+		.arg("run")
+		.arg("--fixtures")
+		.arg(fixture_dir())
+		.arg("--external-adapter-manifest")
+		.arg(&manifest_path)
+		.output()?)
+}
+
 #[test]
 fn smoke_fixture_produces_typed_json_report() -> Result<()> {
 	let report = run_json_report()?;
@@ -1430,52 +1480,34 @@ fn operator_debug_live_adapter_task_is_docker_scoped() -> Result<()> {
 
 #[test]
 fn external_adapter_manifest_rejects_unmeasured_win_loss_scenario_outcomes() -> Result<()> {
-	let mut manifest =
-		serde_json::from_str::<Value>(&fs::read_to_string(external_adapter_manifest_path())?)?;
-	let adapters = manifest
-		.pointer_mut("/adapters")
-		.and_then(Value::as_array_mut)
-		.ok_or_else(|| eyre::eyre!("missing manifest adapters"))?;
-	let letta = adapters
-		.iter_mut()
-		.find(|adapter| {
-			adapter.pointer("/adapter_id").and_then(Value::as_str) == Some("letta_research_gate")
-		})
-		.ok_or_else(|| eyre::eyre!("missing Letta adapter"))?;
-	let scenarios = letta
-		.pointer_mut("/scenarios")
-		.and_then(Value::as_array_mut)
-		.ok_or_else(|| eyre::eyre!("missing Letta scenarios"))?;
-	let attachment = scenarios
-		.iter_mut()
-		.find(|scenario| {
-			scenario.pointer("/scenario_id").and_then(Value::as_str)
-				== Some("core_block_attachment_readback")
-		})
-		.ok_or_else(|| eyre::eyre!("missing Letta attachment scenario"))?;
-
-	set_json_pointer(attachment, "/comparison_outcome", serde_json::json!("win"))?;
-
-	let temp_dir =
-		env::temp_dir().join(format!("elf-real-world-invalid-scenario-test-{}", process::id()));
-
-	fs::create_dir_all(&temp_dir)?;
+	let output = run_external_manifest_with_letta_attachment_mutation(
+		"invalid-scenario-outcome-test",
+		|scenario| set_json_pointer(scenario, "/comparison_outcome", serde_json::json!("win")),
+	)?;
 
-	let manifest_path = temp_dir.join("memory_projects_manifest.json");
+	assert!(!output.status.success(), "invalid scenario outcome unexpectedly passed");
+	assert!(
+		String::from_utf8_lossy(&output.stderr).contains("not_encoded status with win outcome")
+	);
 
-	fs::write(&manifest_path, serde_json::to_vec_pretty(&manifest)?)?;
+	Ok(())
+}
 
-	let output = Command::new(env!("CARGO_BIN_EXE_real_world_job_benchmark"))
-		.arg("run")
-		.arg("--fixtures")
-		.arg(fixture_dir())
-		.arg("--external-adapter-manifest")
-		.arg(&manifest_path)
-		.output()?;
+#[test]
+fn external_adapter_manifest_rejects_unmeasured_win_loss_scenario_positions() -> Result<()> {
+	let output = run_external_manifest_with_letta_attachment_mutation(
+		"invalid-scenario-position-test",
+		|scenario| {
+			set_json_pointer(scenario, "/status", serde_json::json!("not_encoded"))?;
+			set_json_pointer(scenario, "/elf_position", serde_json::json!("wins"))?;
+
+			set_json_pointer(scenario, "/comparison_outcome", serde_json::json!("not_tested"))
+		},
+	)?;
 
-	assert!(!output.status.success(), "invalid scenario outcome unexpectedly passed");
+	assert!(!output.status.success(), "invalid scenario position unexpectedly passed");
 	assert!(
-		String::from_utf8_lossy(&output.stderr).contains("not_encoded status with win outcome")
+		String::from_utf8_lossy(&output.stderr).contains("not_encoded status with wins position")
 	);
 
 	Ok(())
@@ -2500,13 +2532,13 @@ fn assert_competitor_strength_matrix_manifest_counts(matrix: &Value) {
 	);
 	assert_eq!(
 		matrix.pointer("/manifest_summary/overall_status_counts/blocked").and_then(Value::as_u64),
-		Some(6)
+		Some(7)
 	);
 	assert_eq!(
 		matrix
 			.pointer("/manifest_summary/overall_status_counts/not_encoded")
 			.and_then(Value::as_u64),
-		Some(6)
+		Some(5)
 	);
 	assert_eq!(
 		matrix
@@ -2886,13 +2918,13 @@ fn assert_operator_facing_strength_profile_boundaries(
 
 fn assert_measurement_audit_adapter_status_counts(markdown: &str) {
 	for expected in [
-		"| `blocked` | `6` |",
-		"| `not_encoded` | `6` |",
+		"| `blocked` | `7` |",
+		"| `not_encoded` | `5` |",
 		"The generated JSON report emits `external_project_count: 16`",
 	] {
 		assert!(markdown.contains(expected), "missing measurement audit text: {expected}");
 	}
-	for stale in ["| `blocked` | `5` |", "| `not_encoded` | `7` |"] {
+	for stale in ["| `blocked` | `6` |", "| `not_encoded` | `6` |"] {
 		assert!(!markdown.contains(stale), "stale measurement audit text: {stale}");
 	}
 }
diff --git a/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md b/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md
index d5c9200a..06680c4e 100644
--- a/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md
+++ b/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md
@@ -49,7 +49,7 @@ Current boundary:
 The current manifest has 23 adapter records across 16 external projects plus ELF.
 Evidence-class counts: 1 `fixture_backed`, 6 `live_baseline_only`, 5
 `live_real_world`, and 11 `research_gate`. Overall adapter-status counts: 4 `pass`,
-6 `wrong_result`, 1 `lifecycle_fail`, 6 `blocked`, and 6 `not_encoded`.
+6 `wrong_result`, 1 `lifecycle_fail`, 7 `blocked`, and 5 `not_encoded`.
 
 ## State Taxonomy
 
diff --git a/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md b/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md
index c4e8381a..67c26673 100644
--- a/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md
+++ b/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md
@@ -156,8 +156,8 @@ The checked-in manifest records 23 adapter records across 17 unique project name
 | `pass` | `4` |
 | `wrong_result` | `6` |
 | `lifecycle_fail` | `1` |
-| `blocked` | `6` |
-| `not_encoded` | `6` |
+| `blocked` | `7` |
+| `not_encoded` | `5` |
 
 The generated JSON report emits `external_project_count: 16`, matching the unique
 non-ELF project-name count from the manifest. The companion audit JSON separately
diff --git a/docs/research/2026-06-11-measurement-coverage-audit.json b/docs/research/2026-06-11-measurement-coverage-audit.json
index 397f781e..ff2405b1 100644
--- a/docs/research/2026-06-11-measurement-coverage-audit.json
+++ b/docs/research/2026-06-11-measurement-coverage-audit.json
@@ -203,8 +203,8 @@
       "pass": 4,
       "wrong_result": 6,
       "lifecycle_fail": 1,
-      "blocked": 6,
-      "not_encoded": 6
+      "blocked": 7,
+      "not_encoded": 5
     },
     "xy900_update_note": "XY-900 promotes graphify from research_gate/blocked to a tiny scored live_real_world wrong_result smoke; broad graph/RAG quality remains unproven.",
     "xy932_update_note": "XY-932 adds narrow ELF/qmd operator-debug live_real_world records: ELF pass and qmd wrong_result for trace hydration/candidate-drop visibility, with OpenMemory and claude-mem UI still unmeasured.",
diff --git a/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json b/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json
index 92665fdb..93e23158 100644
--- a/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json
+++ b/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json
@@ -32,8 +32,8 @@
     },
     "overall_status_counts": {
       "lifecycle_fail": 1,
-      "blocked": 6,
-      "not_encoded": 6,
+      "blocked": 7,
+      "not_encoded": 5,
       "pass": 4,
       "wrong_result": 6
     }

From 05232fbad4e3ad5f96cfe0757181135278b9cbda Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Fri, 12 Jun 2026 03:44:56 +0800
Subject: [PATCH 7/7] {"schema":"decodex/commit/1","summary":"Align Letta core
 archival comparison contract","authority":"XY-927"}

---
 README.md                                     |  11 +-
 .../memory_projects_manifest.json             |   1 +
 .../src/bin/real_world_job_benchmark.rs       |  48 +++++++
 .../tests/real_world_job_benchmark.rs         | 128 +++++++++++++++---
 ...-11-competitor-strength-adoption-report.md |   7 +-
 .../2026-06-11-measurement-coverage-audit.md  |   2 +-
 .../real_world_agent_memory_benchmark.md      |  16 ++-
 .../research/comparison_external_projects.md  |  16 ++-
 ...1-competitor-strength-adoption-report.json |   2 +-
 .../real_world_agent_memory_benchmark_v1.md   |  14 +-
 10 files changed, 202 insertions(+), 43 deletions(-)

diff --git a/README.md b/README.md
index 08c35d00..5bcef8ee 100644
--- a/README.md
+++ b/README.md
@@ -145,10 +145,13 @@ provider-backed ELF evidence was required.
   rebuild returned `rebuilt_count=1`, `missing_vector_count=0`, `error_count=0`, and
   search recovered the restored note.
 - Fresh all-project smoke run: ELF and qmd passed every encoded check. agentmemory
-  passed same-corpus retrieval but failed lifecycle/cold-start coverage. memsearch,
-  mem0, OpenViking, and claude-mem remained typed non-pass states. OpenViking now
-  reaches its pinned Docker local embedding path and is reported as `wrong_result`
-  when same-corpus evidence terms are missed; setup failures remain `incomplete`.
+  passed same-corpus retrieval but failed lifecycle/cold-start coverage. mem0/OpenMemory
+  and memsearch now pass their scoped local baseline smokes, while OpenMemory
+  UI/export, hosted mem0 Platform, optional graph memory, and broader memsearch prompt
+  and TTL coverage remain blocked, unsupported, or not encoded. OpenViking now reaches
+  its pinned Docker local embedding path and is reported as `wrong_result` when
+  same-corpus evidence terms are missed; claude-mem and OpenViking non-retrieval
+  coverage remain typed non-pass states.
 - Real-world agent memory aggregate after XY-927 and XY-928: 49 fixture-backed
   jobs across 13 suites, 44 pass, 0 incomplete, 5 blocked, 0 wrong-result,
   0 not-encoded, and 0 unsupported-claim results. The remaining non-pass jobs are
diff --git a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
index 66813da7..42d3ab15 100644
--- a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
+++ b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
@@ -858,6 +858,7 @@
           "suite_id": "work_resume",
           "status": "blocked",
           "elf_position": "untested",
+          "comparison_outcome": "blocked",
           "evidence": "agentmemory's relevant strength is durable coding-agent continuity and capture, but the Docker harness has not proven a persistent session/capture path. Keep work_resume and capture claims blocked until a durable local adapter path exists.",
           "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json"
         },
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark.rs b/apps/elf-eval/src/bin/real_world_job_benchmark.rs
index 81cda7c7..d4d0c6ac 100644
--- a/apps/elf-eval/src/bin/real_world_job_benchmark.rs
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark.rs
@@ -3946,6 +3946,14 @@ fn validate_adapter_scenarios(path: &Path, adapter: &ExternalAdapterReport) -> R
 
 		let outcome = scenario_comparison_outcome(scenario);
 
+		if blocked_status_missing_blocked_outcome(scenario.status, scenario.comparison_outcome) {
+			return Err(eyre::eyre!(
+				"{} adapter {} scenario {} uses blocked status without blocked comparison outcome.",
+				path.display(),
+				adapter.adapter_id,
+				scenario.scenario_id
+			));
+		}
 		if unmeasured_status_has_measured_outcome(scenario.status, outcome) {
 			return Err(eyre::eyre!(
 				"{} adapter {} scenario {} uses {} status with {} outcome.",
@@ -3966,11 +3974,28 @@ fn validate_adapter_scenarios(path: &Path, adapter: &ExternalAdapterReport) -> R
 				scenario_position_str(scenario.elf_position)
 			));
 		}
+		if explicit_outcome_conflicts_with_position(scenario) {
+			return Err(eyre::eyre!(
+				"{} adapter {} scenario {} uses {} position with {} outcome.",
+				path.display(),
+				adapter.adapter_id,
+				scenario.scenario_id,
+				scenario_position_str(scenario.elf_position),
+				scenario_comparison_outcome_str(outcome)
+			));
+		}
 	}
 
 	Ok(())
 }
 
+fn blocked_status_missing_blocked_outcome(
+	status: AdapterCoverageStatus,
+	outcome: Option<ScenarioComparisonOutcome>,
+) -> bool {
+	status == AdapterCoverageStatus::Blocked && outcome != Some(ScenarioComparisonOutcome::Blocked)
+}
+
 fn unmeasured_status_has_measured_outcome(
 	status: AdapterCoverageStatus,
 	outcome: ScenarioComparisonOutcome,
@@ -4005,6 +4030,29 @@ fn unmeasured_status_has_measured_position(
 	)
 }
 
+fn explicit_outcome_conflicts_with_position(scenario: &AdapterScenarioJudgment) -> bool {
+	let Some(outcome) = scenario.comparison_outcome else {
+		return false;
+	};
+
+	!position_supports_outcome(scenario.elf_position, outcome)
+}
+
+fn position_supports_outcome(
+	position: ElfScenarioPosition,
+	outcome: ScenarioComparisonOutcome,
+) -> bool {
+	matches!(
+		(position, outcome),
+		(ElfScenarioPosition::Wins, ScenarioComparisonOutcome::Win)
+			| (ElfScenarioPosition::Ties, ScenarioComparisonOutcome::Tie)
+			| (ElfScenarioPosition::Loses, ScenarioComparisonOutcome::Loss)
+			| (ElfScenarioPosition::Untested, ScenarioComparisonOutcome::NotTested)
+			| (ElfScenarioPosition::Untested, ScenarioComparisonOutcome::Blocked)
+			| (ElfScenarioPosition::Untested, ScenarioComparisonOutcome::NonGoal)
+	)
+}
+
 fn validate_adapter_evidence(path: &Path, adapter: &ExternalAdapterReport) -> Result<()> {
 	for evidence in &adapter.evidence {
 		if evidence.kind.trim().is_empty() || evidence.reference.trim().is_empty() {
diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
index 5ae959a7..024a0697 100644
--- a/apps/elf-eval/tests/real_world_job_benchmark.rs
+++ b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -190,6 +190,14 @@ fn readme_path() -> Result<PathBuf> {
 	Ok(workspace_root()?.join("README.md"))
 }
 
+fn comparison_external_projects_path() -> Result<PathBuf> {
+	Ok(workspace_root()?
+		.join("docs")
+		.join("guide")
+		.join("research")
+		.join("comparison_external_projects.md"))
+}
+
 fn benchmarking_index_path() -> Result<PathBuf> {
 	Ok(workspace_root()?.join("docs").join("guide").join("benchmarking").join("index.md"))
 }
@@ -271,6 +279,23 @@ fn run_external_manifest_with_letta_attachment_mutation<F>(
 	slug: &str,
 	mutation: F,
 ) -> Result<Output>
+where
+	F: FnOnce(&mut Value) -> Result<()>,
+{
+	run_external_manifest_scenario_mutation(
+		slug,
+		"letta_research_gate",
+		"core_block_attachment_readback",
+		mutation,
+	)
+}
+
+fn run_external_manifest_scenario_mutation<F>(
+	slug: &str,
+	adapter_id: &str,
+	scenario_id: &str,
+	mutation: F,
+) -> Result<Output>
 where
 	F: FnOnce(&mut Value) -> Result<()>,
 {
@@ -280,25 +305,22 @@ where
 		.pointer_mut("/adapters")
 		.and_then(Value::as_array_mut)
 		.ok_or_else(|| eyre::eyre!("missing manifest adapters"))?;
-	let letta = adapters
+	let adapter = adapters
 		.iter_mut()
-		.find(|adapter| {
-			adapter.pointer("/adapter_id").and_then(Value::as_str) == Some("letta_research_gate")
-		})
-		.ok_or_else(|| eyre::eyre!("missing Letta adapter"))?;
-	let scenarios = letta
+		.find(|adapter| adapter.pointer("/adapter_id").and_then(Value::as_str) == Some(adapter_id))
+		.ok_or_else(|| eyre::eyre!("missing {adapter_id} adapter"))?;
+	let scenarios = adapter
 		.pointer_mut("/scenarios")
 		.and_then(Value::as_array_mut)
-		.ok_or_else(|| eyre::eyre!("missing Letta scenarios"))?;
-	let attachment = scenarios
+		.ok_or_else(|| eyre::eyre!("missing {adapter_id} scenarios"))?;
+	let scenario = scenarios
 		.iter_mut()
 		.find(|scenario| {
-			scenario.pointer("/scenario_id").and_then(Value::as_str)
-				== Some("core_block_attachment_readback")
+			scenario.pointer("/scenario_id").and_then(Value::as_str) == Some(scenario_id)
 		})
-		.ok_or_else(|| eyre::eyre!("missing Letta attachment scenario"))?;
+		.ok_or_else(|| eyre::eyre!("missing {scenario_id} scenario"))?;
 
-	mutation(attachment)?;
+	mutation(scenario)?;
 
 	let temp_dir = env::temp_dir().join(format!("elf-real-world-{slug}-{}", process::id()));
 
@@ -495,7 +517,7 @@ fn external_adapter_run_summarizes_nonzero_scenario_losses() -> Result<()> {
 		report
 			.pointer("/external_adapters/summary/scenario_outcome_counts/not_tested")
 			.and_then(Value::as_u64),
-		Some(11)
+		Some(10)
 	);
 
 	let adapters = array_at(&report, "/external_adapters/adapters")?;
@@ -719,13 +741,13 @@ fn assert_external_adapter_manifest_scenario_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/scenario_outcome_counts/not_tested")
 			.and_then(Value::as_u64),
-		Some(12)
+		Some(11)
 	);
 	assert_eq!(
 		report
 			.pointer("/external_adapters/summary/scenario_outcome_counts/blocked")
 			.and_then(Value::as_u64),
-		Some(4)
+		Some(5)
 	);
 	assert_eq!(
 		report
@@ -1097,6 +1119,10 @@ fn assert_first_generation_adapter_records(
 		Some("wins")
 	);
 	assert_eq!(agentmemory.pointer("/scenarios/2/status").and_then(Value::as_str), Some("blocked"));
+	assert_eq!(
+		agentmemory.pointer("/scenarios/2/comparison_outcome").and_then(Value::as_str),
+		Some("blocked")
+	);
 	assert_eq!(
 		mem0.pointer("/capabilities/2/capability").and_then(Value::as_str),
 		Some("local_lifecycle_update_delete_reload")
@@ -1513,6 +1539,49 @@ fn external_adapter_manifest_rejects_unmeasured_win_loss_scenario_positions() ->
 	Ok(())
 }
 
+#[test]
+fn external_adapter_manifest_rejects_blocked_status_without_blocked_outcome() -> Result<()> {
+	let output = run_external_manifest_scenario_mutation(
+		"invalid-blocked-scenario-outcome-test",
+		"letta_research_gate",
+		"stale_core_detection",
+		|scenario| {
+			scenario
+				.as_object_mut()
+				.ok_or_else(|| eyre::eyre!("scenario is not an object"))?
+				.remove("comparison_outcome");
+
+			Ok(())
+		},
+	)?;
+
+	assert!(!output.status.success(), "invalid blocked scenario unexpectedly passed");
+	assert!(
+		String::from_utf8_lossy(&output.stderr)
+			.contains("blocked status without blocked comparison outcome")
+	);
+
+	Ok(())
+}
+
+#[test]
+fn external_adapter_manifest_rejects_conflicting_scenario_position_and_outcome() -> Result<()> {
+	let output = run_external_manifest_with_letta_attachment_mutation(
+		"invalid-scenario-position-outcome-test",
+		|scenario| {
+			set_json_pointer(scenario, "/status", serde_json::json!("pass"))?;
+			set_json_pointer(scenario, "/elf_position", serde_json::json!("ties"))?;
+
+			set_json_pointer(scenario, "/comparison_outcome", serde_json::json!("loss"))
+		},
+	)?;
+
+	assert!(!output.status.success(), "conflicting scenario unexpectedly passed");
+	assert!(String::from_utf8_lossy(&output.stderr).contains("ties position with loss outcome"));
+
+	Ok(())
+}
+
 #[test]
 fn live_adapter_supports_elf_capture_write_policy_without_external_hook_claims() -> Result<()> {
 	let workspace = workspace_root()?;
@@ -1648,6 +1717,8 @@ fn capture_write_policy_live_report_preserves_competitor_boundaries() -> Result<
 	assert!(markdown.contains("Do not claim ELF broadly beats agentmemory or claude-mem"));
 	assert!(benchmarking_index.contains("2026-06-11-capture-write-policy-live-report.md"));
 	assert!(readme.contains("Capture/Write-Policy Live Report - June 11, 2026"));
+	assert!(readme.contains("mem0/OpenMemory"));
+	assert!(readme.contains("and memsearch now pass their scoped local baseline"));
 
 	Ok(())
 }
@@ -2039,6 +2110,7 @@ fn current_benchmark_reports_preserve_live_sweep_boundaries() -> Result<()> {
 	)?)?;
 	let iteration_direction = fs::read_to_string(iteration_direction_report_path()?)?;
 	let external_manifest = fs::read_to_string(external_adapter_manifest_path())?;
+	let comparison_external_projects = fs::read_to_string(comparison_external_projects_path()?)?;
 	let retrieval_debug_profile =
 		serde_json::from_str::<Value>(&fs::read_to_string(retrieval_debug_profile_json_path()?)?)?;
 	let temporal_history = serde_json::from_str::<Value>(&fs::read_to_string(
@@ -2050,6 +2122,7 @@ fn current_benchmark_reports_preserve_live_sweep_boundaries() -> Result<()> {
 		&competitor_matrix,
 		&iteration_direction,
 		&external_manifest,
+		&comparison_external_projects,
 	);
 
 	let qmd_live = find_by_field(
@@ -2114,6 +2187,7 @@ fn assert_current_report_text_boundaries(
 	competitor_matrix: &str,
 	iteration_direction: &str,
 	external_manifest: &str,
+	comparison_external_projects: &str,
 ) {
 	assert!(
 		measurement_audit.contains(
@@ -2124,6 +2198,7 @@ fn assert_current_report_text_boundaries(
 		measurement_audit
 			.contains("qmd live fails 6/6 jobs after missing the delete/TTL tombstone evidence")
 	);
+	assert!(measurement_audit.contains("Basic local smoke and local OSS history/readback pass"));
 
 	assert_measurement_audit_adapter_status_counts(measurement_audit);
 
@@ -2142,6 +2217,14 @@ fn assert_current_report_text_boundaries(
 	assert!(external_manifest.contains(
 		"The qmd live real-world sweep covers the current encoded fixture corpus; expanded retrieval-debug strength suites still need their own materialized adapter run."
 	));
+	assert!(
+		comparison_external_projects
+			.contains("Benchmark-grounded for scoped local OSS same-corpus retrieval")
+	);
+	assert!(
+		comparison_external_projects
+			.contains("Benchmark-grounded for local same-corpus retrieval, reindex/update/delete")
+	);
 	assert!(iteration_direction.contains("| Jobs | `49` |"));
 	assert!(iteration_direction.contains("| Encoded suites | `13` |"));
 	assert!(iteration_direction.contains("| Pass | `44` |"));
@@ -2158,11 +2241,15 @@ fn assert_current_report_text_boundaries(
 		"| Jobs | `40` |",
 		"| Encoded suites | `11` |",
 		"| Pass | `38` |",
+		"history/UI/hosted/graph behavior remains",
+		"current local adapter is incomplete/wrong-result",
+		"current adapter is incomplete/invalid-result",
 	] {
 		assert!(!measurement_audit.contains(stale_phrase));
 		assert!(!competitor_matrix.contains(stale_phrase));
 		assert!(!iteration_direction.contains(stale_phrase));
 		assert!(!external_manifest.contains(stale_phrase));
+		assert!(!comparison_external_projects.contains(stale_phrase));
 	}
 }
 
@@ -2187,10 +2274,19 @@ fn qmd_trace_replay_diagnostics_report_preserves_claim_boundaries() -> Result<()
 	assert!(benchmarking_index.contains("qmd top-10/replay artifact"));
 	assert!(benchmarking_index.contains("ELF trace/admin surfaces"));
 	assert!(adoption_report.contains("| Retrieval quality and local debug UX | `loss` |"));
+	assert!(adoption_report.contains("Letta scenario rows remain"));
+	assert!(adoption_report.contains("blocked or `not_tested`"));
 	assert!(
 		adoption_report
 			.contains("Do not claim qmd's trace/replay artifact win is a broad qmd-over-ELF")
 	);
+	assert!(array_at(&adoption_json, "/adoption_decision/remaining_caveats")?.iter().any(
+		|caveat| {
+			caveat.as_str().is_some_and(|text| {
+				text.contains("Letta scenario rows remain blocked or not_tested")
+			})
+		}
+	));
 
 	assert_trace_replay_adoption_json(&adoption_json)?;
 
@@ -3005,7 +3101,7 @@ fn generated_json_report_renders_markdown() -> Result<()> {
 	assert!(markdown.contains("### Adapter Scenario Judgments"));
 	assert!(markdown.contains("ELF scenario positions: `wins=8, ties=9, loses=1, untested=18`"));
 	assert!(markdown.contains(
-		"Scenario comparison outcomes: `win=8, tie=9, loss=1, not_tested=12, blocked=4, non_goal=2`"
+		"Scenario comparison outcomes: `win=8, tie=9, loss=1, not_tested=11, blocked=5, non_goal=2`"
 	));
 	assert!(markdown.contains("| `claude_mem_live_baseline` | `same_corpus_retrieval`"));
 	assert!(markdown.contains("| `memsearch_live_baseline` | `ttl_expiry_lifecycle`"));
diff --git a/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md b/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
index f12b52ae..ef6eafb1 100644
--- a/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
+++ b/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
@@ -42,9 +42,10 @@ The remaining caveats are material:
   memory, and graph/RAG navigation remain unproven. XY-928 encodes OpenViking staged
   trajectory, hierarchy selection, and recursive/context expansion as blocked fixtures
   behind same-corpus evidence output and missing staged artifacts. XY-927 adds
-  fixture-only `core_archival_memory` coverage, but Letta comparison remains blocked
-  until the selected contained export/readback path exists. mem0 local OSS preference
-  history is measured separately and is an ELF loss on the current correction history
+  fixture-only `core_archival_memory` coverage, but Letta scenario rows remain
+  blocked or `not_tested` until the selected contained export/readback path exists.
+  mem0 local OSS preference history is measured separately and is an ELF loss on the
+  current correction history
   scenario. The XY-923 follow-up also scores qmd's immediate top-10/replay artifact
   ergonomics as stronger than ELF's default stress report, while expansion, fusion,
   and rerank remain untested. XY-932 adds a narrow live operator-debug slice where
diff --git a/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md b/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md
index 67c26673..66cd69b6 100644
--- a/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md
+++ b/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md
@@ -170,7 +170,7 @@ records `unique_project_names: 17` for the full project list including ELF.
 | ELF | `fixture_backed` plus `live_real_world` | Fixture aggregate passes except 5 blocked operator or measurement-gate boundaries; live full sweep is `wrong_result`; live capture/write-policy and narrow operator-debug slices pass. | Full live memory evolution, live consolidation, live knowledge pages, live production ops, competitor capture hooks, OpenViking staged trajectory artifacts, and broader operator UI runners. | Memory-evolution diagnostic report, then consolidation/knowledge reports plus agentmemory/claude-mem capture, OpenViking staged trajectory artifacts, and OpenMemory/claude-mem UI runners. |
 | qmd | `live_real_world` plus `live_baseline_only` | Fresh full sweep is five passes behind ELF because qmd misses the delete/TTL tombstone job and keeps capture/write-policy jobs typed `not_encoded`; same-corpus baseline passes; narrow operator-debug live slice ties replay commands but is `wrong_result` for trace hydration and candidate-drop visibility. | Deep retrieval-debug ergonomics and trace replay beyond the narrow operator-debug slice. | qmd/ELF deep retrieval-debug profile with expansion, fusion, rerank, and dropped-candidate traces. |
 | agentmemory | `live_baseline_only` | `lifecycle_fail`; capture comparison is `blocked` because the Docker baseline uses a process-local StateKV Map and in-memory index, with no durable local session/capture path for source ids, exclusions, write-policy audit, or evidence-bound output. | Durable coding-agent continuity and capture hooks. | Durable lifecycle and work-resume/capture adapter report. |
-| mem0/OpenMemory | `live_baseline_only` | Basic local smoke now passes; history/UI/hosted/graph behavior remains `not_encoded`. | Entity history, lifecycle UI, OpenMemory inspection. | Entity-history, deletion-audit, and UI/export readback report. |
+| mem0/OpenMemory | `live_baseline_only` | Basic local smoke and local OSS history/readback pass; OpenMemory UI/export is blocked, hosted Platform export is a non-goal, and optional graph plus broader prompt coverage remain `not_encoded`. | Entity history, lifecycle UI, OpenMemory inspection. | Entity-history, deletion-audit, and UI/export readback report. |
 | memsearch | `live_baseline_only` | Basic canonical Markdown reindex/reload smoke now passes; real-world prompt coverage remains `not_encoded`. | Markdown canonical store and local reindex clarity. | Source-of-truth and retrieval-debug real-world adapter report. |
 | OpenViking | `live_baseline_only` plus `fixture_backed` and `research_gate` | Same-corpus retrieval is `wrong_result`; staged retrieval, hierarchy selection, and recursive/context expansion are encoded as blocked fixtures. | Hierarchical staged context trajectory. | Evidence-bearing retrieval fix, then materialized staged trajectory report. |
 | claude-mem | `live_baseline_only` | `wrong_result`; capture breadth is `not_encoded` because hooks, timeline, observations, viewer capture, and automatic capture review were not run against real-world jobs. | Progressive disclosure and automatic capture review. | Work-resume, operator-debugging, and capture/write-policy report. |
diff --git a/docs/guide/benchmarking/real_world_agent_memory_benchmark.md b/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
index a5fb2eca..4e6bd18d 100644
--- a/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
+++ b/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
@@ -252,12 +252,16 @@ operator_debugging_ux remain `not_encoded` for this live adapter path. qmd keeps
 `live_baseline_only` same-corpus record for update/delete/cold-start checks; that
 record is not a real-world suite win. agentmemory is blocked on durable upstream
 storage for lifecycle proof and capture breadth. mem0/OpenMemory, memsearch, and
-claude-mem currently retain wrong-result, not-encoded, or incomplete live-baseline
-states for the checked-in adapter evidence. OpenViking now reaches its pinned Docker
-local embedding setup but remains a same-corpus `wrong_result` until it returns
-evidence-bearing retrieval output. The checked-in `context_trajectory` fixtures keep
-OpenViking staged retrieval, hierarchy selection, and recursive/context expansion
-blocked until same-corpus evidence ids match and staged artifacts are materialized.
+claude-mem no longer share one live-baseline boundary: mem0/OpenMemory and memsearch
+now pass scoped local baseline paths, while OpenMemory product UI/export, hosted
+Platform behavior, optional graph memory, memsearch real-world prompt/TTL coverage,
+and claude-mem hook/viewer capture remain blocked, unsupported, not encoded, or
+wrong-result for the checked-in adapter evidence. OpenViking now reaches its pinned
+Docker local embedding setup but remains a same-corpus `wrong_result` until it
+returns evidence-bearing retrieval output. The checked-in `context_trajectory`
+fixtures keep OpenViking staged retrieval, hierarchy selection, and recursive/context
+expansion blocked until same-corpus evidence ids match and staged artifacts are
+materialized.
 The expanded RAG and graph-memory records for
 RAGFlow, LightRAG, GraphRAG, Graphiti/Zep, Letta, LangGraph, nanograph, llm-wiki,
 gbrain, graphify, and deeper qmd/OpenViking profiles are `research_gate` records until
diff --git a/docs/guide/research/comparison_external_projects.md b/docs/guide/research/comparison_external_projects.md
index 05e12a0d..7173ecb1 100644
--- a/docs/guide/research/comparison_external_projects.md
+++ b/docs/guide/research/comparison_external_projects.md
@@ -50,10 +50,14 @@ Use the evidence class before making claims:
   until a deep dive or adapter run exists.
 
 Current benchmark-grounded scope is narrow. The June 9, 2026 all-project smoke run
-proved encoded same-corpus/lifecycle behavior only for the current adapters: ELF and qmd
-passed their encoded smoke checks; agentmemory passed same-corpus retrieval but failed
-or could not prove durable lifecycle behavior; memsearch, mem0, OpenViking, and
-claude-mem retained `incomplete`, wrong-result, or not-encoded states. All broader suite
+proved encoded same-corpus/lifecycle behavior only for the then-current adapters: ELF
+and qmd passed their encoded smoke checks; agentmemory passed same-corpus retrieval but
+failed or could not prove durable lifecycle behavior; memsearch, mem0, OpenViking, and
+claude-mem retained `incomplete`, wrong-result, or not-encoded states. Later June 11
+follow-ups promote scoped local mem0/OpenMemory and memsearch baseline paths, while
+OpenMemory UI/export, hosted Platform behavior, optional graph memory, broader
+memsearch prompt/TTL coverage, OpenViking staged trajectory, and claude-mem hook/viewer
+capture remain blocked, unsupported, not encoded, or wrong-result. All broader suite
 fit below is research guidance, not a benchmark result.
 
 The real-world job runner now carries a separate external adapter coverage manifest:
@@ -100,8 +104,8 @@ Project-to-suite map:
 | agentmemory | `rw.operator-continuity`, `rw.resume-evidence`, `rw.lifecycle-staleness` | Cross-agent hooks, MCP/REST packaging, viewer, lifecycle/consolidation claims, and coding-agent continuity focus make it the right reference for daily agent memory ergonomics. | Use durable upstream storage rather than the current in-memory mock; ingest realistic agent sessions through the public hook/API path; prove restart, update/supersede, delete, and viewer/trace readback. | Mixed: benchmark-grounded only for current same-corpus retrieval; current lifecycle evidence is a failure/blocker, while hooks/viewer/consolidation are docs-grounded. Confidence: medium for suite fit, low for durable adapter quality. | ELF is stronger on evidence-bound writes and source-of-truth discipline; agentmemory remains the reference for capture breadth and agent-continuity UX. |
 | qmd | `rw.retrieval-debug`, `rw.lifecycle-staleness`, `rw.resume-evidence` | Its local CLI, structured JSON query output, expansion modes, hybrid routing, weighted fusion, rerank, update, delete, and cold-start path make it the strongest local retrieval-debug baseline. | Run `qmd` over the real-world corpus, capture query JSON, then rewrite/delete corpus files and rerun update/embed/query in fresh processes. | Benchmark-grounded for current smoke retrieval/update/delete/cold-start pass; docs-grounded for deeper query planning ergonomics. Confidence: high for local adapter baseline. | ELF is not yet stronger on local CLI debug ergonomics; treat qmd as the retrieval-debug reference while keeping ELF's service/provenance model. |
 | claude-mem | `rw.operator-continuity`, `rw.resume-evidence`, `rw.retrieval-debug` | Progressive-disclosure search, auto-capture hooks, local viewer, and observation/timeline workflows are directly aligned with real agent resumption jobs. | Exercise a real local repository with hook-driven capture, then evaluate `search -> timeline -> observations` behavior after restart; do not rely on mocked storage. | Docs-grounded for progressive disclosure/viewer; current benchmark adapter evidence is incomplete/wrong-result and mostly not encoded for lifecycle. Confidence: medium for product reference, low for current adapter claims. | ELF has stronger provenance and service boundaries, but claude-mem remains a reference for operator workflow and progressive disclosure UX. |
-| mem0 / OpenMemory | `rw.lifecycle-staleness`, `rw.graph-temporal`, `rw.operator-continuity`, `rw.resume-evidence` | Entity-scoped memory, memory history, expiration, hosted/OSS surfaces, OpenMemory UI, and optional graph memory make it the broadest lifecycle and ecosystem comparison target. | Separate OSS local FastEmbed/Qdrant evidence from hosted Platform claims; prove add/update/delete/history, entity-scoped retrieval, expiration exclusion, OpenMemory UI readback, and optional graph context on the same corpus. | Docs-grounded for lifecycle/entity/graph/UI claims; current local adapter is incomplete/wrong-result for same-corpus retrieval and delete remains not encoded. Confidence: medium for suite fit, low for current adapter quality. | ELF is stronger on deterministic evidence-bound writes; mem0/OpenMemory is the reference for ecosystem reach, entity-scoped history, hosted option, and optional graph UX. |
-| memsearch | `rw.lifecycle-staleness`, `rw.retrieval-debug`, `rw.resume-evidence` | Markdown as canonical memory plus incremental/content-addressed reindexing is a useful model for source transparency and rebuildable derived indexes. | Index a real-world Markdown corpus, mutate/delete files, rerun index/search from fresh processes, and record Milvus mode so Lite/Server/Cloud behavior is not conflated. | Docs-grounded for architecture; current adapter is incomplete/invalid-result, so no pass/fail quality claim is allowed. Confidence: medium for design pattern, low for current adapter evidence. | ELF already owns source-of-truth plus rebuildable index at service level; memsearch remains a reference for simple local canonical-store ergonomics. |
+| mem0 / OpenMemory | `rw.lifecycle-staleness`, `rw.graph-temporal`, `rw.operator-continuity`, `rw.resume-evidence` | Entity-scoped memory, memory history, expiration, hosted/OSS surfaces, OpenMemory UI, and optional graph memory make it the broadest lifecycle and ecosystem comparison target. | Separate OSS local FastEmbed/Qdrant evidence from hosted Platform claims; prove add/update/delete/history, entity-scoped retrieval, expiration exclusion, OpenMemory UI readback, and optional graph context on the same corpus. | Benchmark-grounded for scoped local OSS same-corpus retrieval, update/delete/reload, history, entity filters, local `get_all` readback, and deletion audit; OpenMemory product UI/export remains blocked, hosted Platform is a non-goal, and optional graph plus broader prompt coverage remain not encoded. Confidence: medium for suite fit and scoped local adapter quality, low for product UI/hosted/graph claims. | ELF is stronger on deterministic evidence-bound writes; mem0/OpenMemory remains the reference for ecosystem reach, entity-scoped history, hosted option, and optional graph UX, with local preference-correction history currently measured as an ELF loss. |
+| memsearch | `rw.lifecycle-staleness`, `rw.retrieval-debug`, `rw.resume-evidence` | Markdown as canonical memory plus incremental/content-addressed reindexing is a useful model for source transparency and rebuildable derived indexes. | Index a real-world Markdown corpus, mutate/delete files, rerun index/search from fresh processes, and record Milvus mode so Lite/Server/Cloud behavior is not conflated. | Benchmark-grounded for local same-corpus retrieval, reindex/update/delete, and cold-start reload smoke; no real-world prompt adapter is encoded, so Markdown-first behavior remains baseline scenario evidence rather than suite pass evidence. Confidence: medium for design pattern and scoped local adapter evidence, low for broad real-world adapter coverage. | ELF already owns source-of-truth plus rebuildable index at service level; memsearch remains a reference for simple local canonical-store ergonomics and transparent local reindexing. |
 | OpenViking | `rw.context-trajectory`, `rw.resume-evidence`, `rw.retrieval-debug` | `viking://` context organization, intent analysis, hierarchical retrieval, staged find/search behavior, and session compression are relevant to multi-hop agent context jobs. | Use the pinned Docker local embedding path, then evaluate `add_resource`/`find`/`search` over multi-stage jobs with stage output, hierarchy, and session memory evidence. | Docs-grounded for mechanism; current benchmark adapter reaches local embedding setup and `add_resource`/`find`, but remains `wrong_result` because same-corpus evidence terms are missed. Confidence: medium for architecture reference, low for runnable adapter quality. | ELF has first-class traces and evidence-bound notes, but OpenViking is the reference for hierarchical context trajectory and filesystem-like organization. |
 | llm-wiki | `rw.knowledge-synthesis`, `rw.resume-evidence` | Query/save/lint flows and topic-scoped wiki pages are a useful reference for turning retrieved memory into maintained project knowledge. | Run a corpus-to-wiki job, ask resume/decision questions, require page citations back to source memory, then mutate a stale source and prove lint/repair catches it. | Docs-grounded D1; no benchmark adapter evidence. Confidence: medium for derived-knowledge fit. | ELF is not yet stronger on derived knowledge pages; llm-wiki should inform rebuildable, evidence-cited dossiers rather than core storage. |
 | gbrain | `rw.knowledge-synthesis`, `rw.operator-continuity` | `compiled_truth`, timeline sections, backlinks, primary-home routing, and enrichment workflows model a living operational brain for project work. | Build or update pages from the real-world corpus, require current-truth plus timeline answers, and prove enrichment/backlink maintenance does not hide unsupported claims. | Docs-grounded D1; no benchmark adapter evidence. Confidence: medium for operator knowledge UX. | ELF should keep source notes authoritative; gbrain is a reference for presentation, enrichment, and maintenance loops. |
diff --git a/docs/research/2026-06-11-competitor-strength-adoption-report.json b/docs/research/2026-06-11-competitor-strength-adoption-report.json
index 71ad0918..abc0fc70 100644
--- a/docs/research/2026-06-11-competitor-strength-adoption-report.json
+++ b/docs/research/2026-06-11-competitor-strength-adoption-report.json
@@ -12,7 +12,7 @@
       "Live temporal reconciliation remains wrong_result for five of six memory_evolution jobs.",
       "Private-corpus production quality is blocked until an operator-owned manifest exists.",
       "Credentialed provider production-ops gates are blocked until explicit provider setup exists.",
-      "Several competitor strengths remain not_tested or blocked: OpenMemory UI/export is blocked by the XY-931 export-helper setup probe, hosted mem0 Platform behavior remains a non-goal, and OpenViking trajectory, Letta core-vs-archival memory, and graph/RAG navigation remain unproven. XY-928 encodes OpenViking staged trajectory, hierarchy selection, and recursive/context expansion as blocked fixtures behind same-corpus evidence output and missing staged artifacts. XY-927 adds six ELF fixture-backed core_archival_memory jobs, but the Letta comparison remains blocked until the selected contained export/readback path exists. mem0 local OSS preference history is measured separately and is an ELF loss on the current correction-history scenario. The XY-923 follow-up scores qmd immediate top-10/replay artifact ergonomics as stronger than ELF's default stress report, while expansion, fusion, and rerank remain untested. XY-932 adds a narrow live operator-debug slice where ELF beats qmd on trace hydration and candidate-drop visibility, but OpenMemory UI/export and claude-mem viewer workflows remain blocked or not encoded. XY-933 adds an ELF live capture/write-policy self-check, but agentmemory capture breadth is blocked by mocked/in-memory storage and claude-mem hook/viewer capture remains untested."
+      "Several competitor strengths remain not_tested or blocked: OpenMemory UI/export is blocked by the XY-931 export-helper setup probe, hosted mem0 Platform behavior remains a non-goal, and OpenViking trajectory, Letta core-vs-archival memory, and graph/RAG navigation remain unproven. XY-928 encodes OpenViking staged trajectory, hierarchy selection, and recursive/context expansion as blocked fixtures behind same-corpus evidence output and missing staged artifacts. XY-927 adds six ELF fixture-backed core_archival_memory jobs, but Letta scenario rows remain blocked or not_tested until the selected contained export/readback path exists. mem0 local OSS preference history is measured separately and is an ELF loss on the current correction-history scenario. The XY-923 follow-up scores qmd immediate top-10/replay artifact ergonomics as stronger than ELF's default stress report, while expansion, fusion, and rerank remain untested. XY-932 adds a narrow live operator-debug slice where ELF beats qmd on trace hydration and candidate-drop visibility, but OpenMemory UI/export and claude-mem viewer workflows remain blocked or not encoded. XY-933 adds an ELF live capture/write-policy self-check, but agentmemory capture breadth is blocked by mocked/in-memory storage and claude-mem hook/viewer capture remains untested."
     ]
   },
   "evidence_class_terms": [
diff --git a/docs/spec/real_world_agent_memory_benchmark_v1.md b/docs/spec/real_world_agent_memory_benchmark_v1.md
index 459f6972..059a14d8 100644
--- a/docs/spec/real_world_agent_memory_benchmark_v1.md
+++ b/docs/spec/real_world_agent_memory_benchmark_v1.md
@@ -190,12 +190,14 @@ Each `adapters[]` record MUST include:
   optional `suite_id`, `status`, `elf_position`, optional `comparison_outcome`,
   `evidence`, and optional `command` and `artifact`. `elf_position` MUST be one of
   `wins`, `ties`, `loses`, or `untested`. `comparison_outcome`, when present, MUST be
-  one of `win`, `tie`, `loss`, `not_tested`, `blocked`, or `non_goal`. Reports SHOULD
-  derive `comparison_outcome` from `elf_position` when omitted, but SHOULD use the
-  explicit field for scenarios where the legacy ELF-relative position is less precise
-  than the report outcome. Scenario judgments are report inputs for dimension-level
-  comparison; they MUST NOT convert live-baseline-only evidence into real-world suite
-  pass claims.
+  one of `win`, `tie`, `loss`, `not_tested`, `blocked`, or `non_goal`. Scenario rows
+  with `status = "blocked"` MUST set `comparison_outcome = "blocked"` explicitly so a
+  blocked evidence path is not derived from `elf_position = "untested"` as
+  `not_tested`. Reports SHOULD derive `comparison_outcome` from `elf_position` when
+  omitted for non-blocked rows, but SHOULD use the explicit field for scenarios where
+  the legacy ELF-relative position is less precise than the report outcome. Scenario
+  judgments are report inputs for dimension-level comparison; they MUST NOT convert
+  live-baseline-only evidence into real-world suite pass claims.
 - `evidence`: array of evidence pointers with `kind`, `ref`, and `status`.
 - `notes`: optional bounded explanatory strings.
 - `follow_up`: optional `title` and `reason`.