From fb6e47c307f22e9e04bef753d1c620265b4828d3 Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Thu, 11 Jun 2026 19:03:25 +0800
Subject: [PATCH 1/4] {"schema":"decodex/commit/1","summary":"Publish mem0
 history and export evidence","authority":"XY-924"}

---
 README.md                                     |  10 +
 .../memory_projects_manifest.json             | 118 +++++--
 .../src/bin/real_world_job_benchmark.rs       |  90 +++++-
 .../tests/real_world_job_benchmark.rs         | 148 +++++++--
 ...-11-competitor-strength-adoption-report.md |  30 +-
 ...generation-oss-adapter-promotion-report.md |   6 +
 ...em0-openmemory-history-ui-export-report.md | 148 +++++++++
 ...-temporal-history-competitor-gap-report.md |   8 +
 docs/guide/benchmarking/index.md              |   5 +
 .../real_world_agent_memory_benchmark_v1.md   |  17 +-
 scripts/live-baseline-benchmark.sh            | 288 +++++++++++++++++-
 11 files changed, 797 insertions(+), 71 deletions(-)
 create mode 100644 docs/guide/benchmarking/2026-06-11-mem0-openmemory-history-ui-export-report.md

diff --git a/README.md b/README.md
index 51452873..0d3fd2ef 100644
--- a/README.md
+++ b/README.md
@@ -176,6 +176,14 @@ provider-backed ELF evidence was required.
   typed blocked or incomplete without explicit service, resource, or provider setup.
   These reports preserve the smoke-only boundary and do not create an ELF win claim
   against graph/RAG strengths.
+- mem0/OpenMemory history follow-up after XY-924: the local OSS mem0 adapter now
+  passes encoded preference correction history, entity-scoped personalization, local
+  `get_all` export-style readback, and deletion audit history in
+  `live-baseline-20260611105855`. The comparison records ELF as a loss on preference
+  correction history, ties on scoped personalization and delete audit, `not_tested`
+  for local SDK export-style parity, `blocked` for OpenMemory UI/export, and
+  `non_goal` for hosted Platform export and optional graph memory in the local OSS
+  lane.
 - The benchmark runner and report publisher are checked in and Docker-isolated:
   `cargo make baseline-live-docker`, `cargo make baseline-backfill-docker`,
   `cargo make baseline-production-private-addendum`,
@@ -197,6 +205,7 @@ Detailed evidence and interpretation:
 - [qmd and OpenViking Strength-Profile Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-qmd-openviking-strength-profile-report.md)
 - [ELF/qmd Trace Replay Diagnostics Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-elf-qmd-trace-replay-diagnostics-report.md)
 - [Graph/RAG Scored Smoke Adapter Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-graph-rag-scored-smoke-adapter-report.md)
+- [mem0/OpenMemory History and UI Export Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-mem0-openmemory-history-ui-export-report.md)
 - [Live Baseline Benchmark Runbook](docs/guide/benchmarking/live_baseline_benchmark.md)
 - [Single-User Production Runbook](docs/guide/single_user_production.md)
 - Benchmark contract:
@@ -272,6 +281,7 @@ Detailed comparison, mechanism-level analysis, and source map:
 - [Temporal History Competitor Gap Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md)
 - [ELF/qmd Trace Replay Diagnostics Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-elf-qmd-trace-replay-diagnostics-report.md)
 - [Graph/RAG Scored Smoke Adapter Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-graph-rag-scored-smoke-adapter-report.md)
+- [mem0/OpenMemory History and UI Export Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-mem0-openmemory-history-ui-export-report.md)
 - [Live Baseline Benchmark Runbook](docs/guide/benchmarking/live_baseline_benchmark.md)
 - [Real-World Agent Memory Benchmark](docs/guide/benchmarking/real_world_agent_memory_benchmark.md)
 - [External Memory Improvement Plan](docs/guide/research/external_memory_improvement_plan.md)
diff --git a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
index 3c023fe2..cfc54fb4 100644
--- a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
+++ b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
@@ -1,6 +1,6 @@
 {
   "schema": "elf.real_world_external_adapter_manifest/v1",
-  "manifest_id": "real-world-memory-project-adapters-2026-06-11",
+  "manifest_id": "real-world-memory-project-adapters-2026-06-11-mem0-history",
   "docker_isolation": {
     "default": true,
     "compose_file": "docker-compose.baseline.yml",
@@ -608,12 +608,13 @@
       },
       "run": {
         "status": "pass",
-        "evidence": "Fresh comparable baseline run live-baseline-20260611061612 exercises local OSS mem0 with FastEmbed, Qdrant path storage, Memory.update, Memory.delete, and cold-start reload; mem0 passed 4/4 encoded checks.",
+        "evidence": "Fresh scoped baseline run live-baseline-20260611105855 exercises local OSS mem0 with FastEmbed, Qdrant path storage, Memory.update, Memory.delete, Memory.history, Memory.get_all, entity filters, and cold-start reload; mem0 passed 8/8 encoded checks.",
+        "command": "ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker",
         "artifact": "tmp/live-baseline/live-baseline-report.json"
       },
       "result": {
         "status": "pass",
-        "evidence": "The local OSS mem0 baseline now passes basic same-corpus/update/delete/reload smoke. No real_world_job mem0/OpenMemory adapter, OpenMemory UI, hosted Platform, entity-history, or graph-memory behavior is encoded.",
+        "evidence": "The local OSS mem0 baseline now passes same-corpus retrieval, update/delete/reload, preference correction history, entity-scoped personalization, local get_all export-style readback, and deletion audit history. It still does not launch the OpenMemory UI, hosted Platform export flow, optional graph memory, or a real_world_job prompt adapter.",
         "artifact": "tmp/live-baseline/live-baseline-report.json"
       },
       "capabilities": [
@@ -625,44 +626,69 @@
         {
           "capability": "same_corpus_retrieval",
           "status": "pass",
-          "evidence": "Fresh comparable baseline run live-baseline-20260611061612 reports mem0 retrieval_pass with 3/3 same-corpus retrieval checks."
+          "evidence": "Fresh scoped baseline run live-baseline-20260611105855 reports mem0 retrieval_pass with 3/3 same-corpus retrieval checks."
         },
         {
           "capability": "local_lifecycle_update_delete_reload",
           "status": "pass",
-          "evidence": "The Docker runner exercises public Memory.update, Memory.delete, and a new Memory.from_config over the same local Qdrant/history paths; the fresh scoped run reports 4/4 encoded checks passing."
+          "evidence": "The Docker runner exercises public Memory.update, Memory.delete, and a new Memory.from_config over the same local Qdrant/history paths; the fresh scoped run reports those lifecycle checks passing."
+        },
+        {
+          "capability": "preference_correction_history",
+          "status": "pass",
+          "evidence": "The fresh scoped run reports preference_correction_history as pass: Memory.history preserved ADD and UPDATE records with old and current preference text, and search returned only the current correction."
+        },
+        {
+          "capability": "entity_scoped_personalization",
+          "status": "pass",
+          "evidence": "The fresh scoped run reports entity_scoped_personalization as pass: user_id, agent_id, and run_id filters returned the ELF scoped preference and omitted a PubFi scoped preference."
+        },
+        {
+          "capability": "local_get_all_export_readback",
+          "status": "pass",
+          "evidence": "The fresh scoped run reports local_get_all_export_readback as pass: Memory.get_all returned the current scoped preference and omitted the other scope."
+        },
+        {
+          "capability": "deletion_audit_history",
+          "status": "pass",
+          "evidence": "The fresh scoped run reports delete_history_audit_readback as pass: Memory.history exposed a DELETE event and search suppressed the deleted memory."
         },
         {
           "capability": "openmemory_ui_readback",
-          "status": "not_encoded",
-          "evidence": "OpenMemory UI readback is not encoded in the Docker baseline or real-world job runner."
+          "status": "blocked",
+          "evidence": "The Docker live-baseline runner does not launch the OpenMemory web UI, dashboard authentication, or browser export flow. Local SDK get_all readback is measured separately and must not be reused as UI evidence."
         },
         {
           "capability": "hosted_managed_memory_claims",
-          "status": "not_encoded",
-          "evidence": "Hosted mem0 Platform behavior is outside the local OSS Docker adapter and is not counted as a local pass."
+          "status": "unsupported",
+          "evidence": "Hosted mem0 Platform behavior and Platform UI export are outside the local OSS Docker adapter and are non-goals for this local evidence record."
         },
         {
           "capability": "real_world_job_adapter",
           "status": "not_encoded",
           "evidence": "No mem0/OpenMemory adapter currently executes real_world_job prompts and answer scoring."
+        },
+        {
+          "capability": "optional_graph_memory",
+          "status": "not_encoded",
+          "evidence": "Optional graph memory is not enabled in the default local OSS path and remains an opt-in scenario gate rather than a default pass/fail claim."
         }
       ],
       "suites": [
         {
           "suite_id": "memory_evolution",
           "status": "not_encoded",
-          "evidence": "Basic local lifecycle checks now pass in Docker, but real_world_job memory-evolution prompts, preference history, deletion audit readback, and entity history are not encoded for mem0/OpenMemory."
+          "evidence": "Scenario-level local OSS checks now measure preference correction history and deletion audit readback, but no mem0 real_world_job memory_evolution prompt adapter is encoded."
         },
         {
           "suite_id": "personalization",
           "status": "not_encoded",
-          "evidence": "Entity-scoped personalization is not encoded as a real_world_job adapter run."
+          "evidence": "Scenario-level local OSS checks now measure entity-scoped personalization, but no mem0 real_world_job personalization prompt adapter is encoded."
         },
         {
           "suite_id": "operator_debugging_ux",
-          "status": "not_encoded",
-          "evidence": "OpenMemory inspection is not encoded in this runner."
+          "status": "blocked",
+          "evidence": "Local SDK get_all inspection is measured, but OpenMemory UI/export readback is blocked because the Docker runner does not launch the web UI or hosted export flow."
         }
       ],
       "scenarios": [
@@ -671,25 +697,77 @@
           "suite_id": "memory_evolution",
           "status": "pass",
           "elf_position": "ties",
-          "evidence": "Fresh comparable baseline run live-baseline-20260611061612 reports ELF passing 8/8 local lifecycle checks and mem0 passing 4/4 same-corpus retrieval, update, delete, and cold-start reload checks. This is a basic local lifecycle tie at the encoded smoke surface, not a claim about OpenMemory UI, hosted behavior, entity history, or graph memory.",
+          "comparison_outcome": "tie",
+          "evidence": "Prior comparable baseline run live-baseline-20260611061612 reports ELF passing 8/8 local lifecycle checks and mem0 passing basic same-corpus retrieval, update, delete, and cold-start reload checks. This remains a basic local lifecycle tie at the encoded smoke surface and is not reused as history/UI evidence.",
           "command": "ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker",
           "artifact": "tmp/live-baseline/live-baseline-report.json"
         },
         {
-          "scenario_id": "preference_entity_history",
+          "scenario_id": "preference_correction_history",
           "suite_id": "personalization",
-          "status": "not_encoded",
+          "status": "pass",
+          "elf_position": "loses",
+          "comparison_outcome": "loss",
+          "evidence": "Fresh scoped baseline run live-baseline-20260611105855 reports mem0 preference_correction_history as pass. The June 11 temporal report records ELF live memory-evolution preference as wrong_result, so the current measured comparison is an ELF loss on this history dimension until ELF temporal reconciliation is fixed.",
+          "command": "ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker",
+          "artifact": "tmp/live-baseline/mem0-checks.json"
+        },
+        {
+          "scenario_id": "entity_scoped_personalization",
+          "suite_id": "personalization",
+          "status": "pass",
+          "elf_position": "ties",
+          "comparison_outcome": "tie",
+          "evidence": "Fresh scoped baseline run live-baseline-20260611105855 reports mem0 entity_scoped_personalization as pass. Existing live real-world evidence records ELF and qmd passing the encoded personalization slice, so this is a measured tie on the current scoped-preference surface.",
+          "command": "ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker",
+          "artifact": "tmp/live-baseline/mem0-checks.json"
+        },
+        {
+          "scenario_id": "delete_audit_readback",
+          "suite_id": "memory_evolution",
+          "status": "pass",
+          "elf_position": "ties",
+          "comparison_outcome": "tie",
+          "evidence": "Fresh scoped baseline run live-baseline-20260611105855 reports mem0 delete_history_audit_readback as pass. The June 11 temporal report records ELF passing the delete/TTL tombstone job, so the current measured delete-audit comparison is a tie.",
+          "command": "ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker",
+          "artifact": "tmp/live-baseline/mem0-checks.json"
+        },
+        {
+          "scenario_id": "local_get_all_export_readback",
+          "suite_id": "operator_debugging_ux",
+          "status": "pass",
           "elf_position": "untested",
-          "evidence": "mem0/OpenMemory's strongest next comparison is preference and entity-scoped history. The current local OSS Docker baseline does not inspect memory history events, correction chains, or entity-scoped readback under real_world_job scoring.",
-          "artifact": "docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md"
+          "comparison_outcome": "not_tested",
+          "evidence": "Fresh scoped baseline run live-baseline-20260611105855 reports mem0 local_get_all_export_readback as pass. This is local SDK inspection/export-style readback, not OpenMemory UI evidence; ELF has no directly comparable live UI/export scoring row in this run.",
+          "command": "ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker",
+          "artifact": "tmp/live-baseline/mem0-checks.json"
         },
         {
           "scenario_id": "openmemory_ui_export_readback",
           "suite_id": "operator_debugging_ux",
+          "status": "blocked",
+          "elf_position": "untested",
+          "comparison_outcome": "blocked",
+          "evidence": "The local Docker runner does not launch OpenMemory UI/dashboard export, and hosted Platform export remains outside local OSS evidence. Basic lifecycle and local get_all readback are not reused as UI/export proof.",
+          "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json"
+        },
+        {
+          "scenario_id": "hosted_platform_export",
+          "suite_id": "operator_debugging_ux",
+          "status": "unsupported",
+          "elf_position": "untested",
+          "comparison_outcome": "non_goal",
+          "evidence": "Hosted mem0 Platform export is explicitly outside the local OSS Docker comparison and is not counted as a local pass, loss, or blocker.",
+          "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json"
+        },
+        {
+          "scenario_id": "optional_graph_memory",
+          "suite_id": "memory_evolution",
           "status": "not_encoded",
           "elf_position": "untested",
-          "evidence": "OpenMemory UI/export readback is not exercised by the local OSS Docker baseline and hosted Platform behavior remains out of scope for local OSS evidence.",
-          "artifact": "docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md"
+          "comparison_outcome": "non_goal",
+          "evidence": "Optional graph memory is kept as an opt-in scenario gate. It is not enabled in the default mem0 local OSS run and is not part of the default pass/fail comparison.",
+          "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json"
         }
       ],
       "evidence": [
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark.rs b/apps/elf-eval/src/bin/real_world_job_benchmark.rs
index 7635c0bd..7f0c74e8 100644
--- a/apps/elf-eval/src/bin/real_world_job_benchmark.rs
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark.rs
@@ -647,6 +647,17 @@ enum ElfScenarioPosition {
 	Untested,
 }
 
+#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Deserialize, Serialize)]
+#[serde(rename_all = "snake_case")]
+enum ScenarioComparisonOutcome {
+	Win,
+	Tie,
+	Loss,
+	NotTested,
+	Blocked,
+	NonGoal,
+}
+
 #[derive(Debug, Deserialize)]
 struct ExternalAdapterManifest {
 	schema: String,
@@ -736,6 +747,8 @@ struct AdapterScenarioJudgment {
 	suite_id: Option<String>,
 	status: AdapterCoverageStatus,
 	elf_position: ElfScenarioPosition,
+	#[serde(skip_serializing_if = "Option::is_none")]
+	comparison_outcome: Option<ScenarioComparisonOutcome>,
 	evidence: String,
 	#[serde(skip_serializing_if = "Option::is_none")]
 	command: Option<String>,
@@ -789,6 +802,8 @@ struct ExternalAdapterSummary {
 	scenario_status_counts: AdapterStatusCounts,
 	#[serde(default)]
 	scenario_position_counts: ScenarioPositionCounts,
+	#[serde(default)]
+	scenario_outcome_counts: ScenarioOutcomeCounts,
 }
 
 #[derive(Clone, Debug, Default, Deserialize, Serialize)]
@@ -812,6 +827,16 @@ struct ScenarioPositionCounts {
 	untested: usize,
 }
 
+#[derive(Clone, Debug, Default, Deserialize, Serialize)]
+struct ScenarioOutcomeCounts {
+	win: usize,
+	tie: usize,
+	loss: usize,
+	not_tested: usize,
+	blocked: usize,
+	non_goal: usize,
+}
+
 #[derive(Clone, Debug, Default, Deserialize, Serialize)]
 struct CaptureIntegrationReport {
 	#[serde(default)]
@@ -3993,6 +4018,10 @@ fn accumulate_adapter_summary(
 			&mut summary.scenario_position_counts,
 			scenario.elf_position,
 		);
+		increment_scenario_outcome_count(
+			&mut summary.scenario_outcome_counts,
+			scenario_comparison_outcome(scenario),
+		);
 	}
 }
 
@@ -4022,6 +4051,29 @@ fn increment_scenario_position_count(
 	}
 }
 
+fn scenario_comparison_outcome(scenario: &AdapterScenarioJudgment) -> ScenarioComparisonOutcome {
+	scenario.comparison_outcome.unwrap_or(match scenario.elf_position {
+		ElfScenarioPosition::Wins => ScenarioComparisonOutcome::Win,
+		ElfScenarioPosition::Ties => ScenarioComparisonOutcome::Tie,
+		ElfScenarioPosition::Loses => ScenarioComparisonOutcome::Loss,
+		ElfScenarioPosition::Untested => ScenarioComparisonOutcome::NotTested,
+	})
+}
+
+fn increment_scenario_outcome_count(
+	counts: &mut ScenarioOutcomeCounts,
+	outcome: ScenarioComparisonOutcome,
+) {
+	match outcome {
+		ScenarioComparisonOutcome::Win => counts.win += 1,
+		ScenarioComparisonOutcome::Tie => counts.tie += 1,
+		ScenarioComparisonOutcome::Loss => counts.loss += 1,
+		ScenarioComparisonOutcome::NotTested => counts.not_tested += 1,
+		ScenarioComparisonOutcome::Blocked => counts.blocked += 1,
+		ScenarioComparisonOutcome::NonGoal => counts.non_goal += 1,
+	}
+}
+
 fn capture_integration_report(jobs: &[RealWorldJob]) -> CaptureIntegrationReport {
 	let mut report = CaptureIntegrationReport::default();
 
@@ -4192,6 +4244,10 @@ fn render_markdown_external_adapters(out: &mut String, report: &RealWorldReport)
 			"- ELF scenario positions: `{}`\n",
 			scenario_position_counts_display(&summary.scenario_position_counts)
 		));
+		out.push_str(&format!(
+			"- Scenario comparison outcomes: `{}`\n",
+			scenario_outcome_counts_display(&summary.scenario_outcome_counts)
+		));
 	}
 
 	out.push('\n');
@@ -4242,7 +4298,7 @@ fn render_markdown_adapter_scenarios(out: &mut String, adapters: &[ExternalAdapt
 	}
 
 	out.push_str("\n### Adapter Scenario Judgments\n\n");
-	out.push_str("| Adapter | Scenario | Suite | Status | ELF Position | Evidence |\n");
+	out.push_str("| Adapter | Scenario | Suite | Status | Outcome | Evidence |\n");
 	out.push_str("| --- | --- | --- | --- | --- | --- |\n");
 
 	for adapter in adapters {
@@ -4257,7 +4313,7 @@ fn render_markdown_adapter_scenarios(out: &mut String, adapters: &[ExternalAdapt
 					.map(|suite| format!("`{}`", md_inline(suite)))
 					.unwrap_or_else(|| "`none`".to_string()),
 				adapter_status_str(scenario.status),
-				scenario_position_str(scenario.elf_position),
+				scenario_comparison_outcome_str(scenario_comparison_outcome(scenario)),
 				adapter_scenario_evidence_cell(scenario)
 			));
 		}
@@ -4906,12 +4962,14 @@ fn adapter_status_str(status: AdapterCoverageStatus) -> &'static str {
 	}
 }
 
-fn scenario_position_str(position: ElfScenarioPosition) -> &'static str {
-	match position {
-		ElfScenarioPosition::Wins => "wins",
-		ElfScenarioPosition::Ties => "ties",
-		ElfScenarioPosition::Loses => "loses",
-		ElfScenarioPosition::Untested => "untested",
+fn scenario_comparison_outcome_str(outcome: ScenarioComparisonOutcome) -> &'static str {
+	match outcome {
+		ScenarioComparisonOutcome::Win => "win",
+		ScenarioComparisonOutcome::Tie => "tie",
+		ScenarioComparisonOutcome::Loss => "loss",
+		ScenarioComparisonOutcome::NotTested => "not_tested",
+		ScenarioComparisonOutcome::Blocked => "blocked",
+		ScenarioComparisonOutcome::NonGoal => "non_goal",
 	}
 }
 
@@ -4948,6 +5006,22 @@ fn scenario_position_counts_display(counts: &ScenarioPositionCounts) -> String {
 	.join(", ")
 }
 
+fn scenario_outcome_counts_display(counts: &ScenarioOutcomeCounts) -> String {
+	[
+		("win", counts.win),
+		("tie", counts.tie),
+		("loss", counts.loss),
+		("not_tested", counts.not_tested),
+		("blocked", counts.blocked),
+		("non_goal", counts.non_goal),
+	]
+	.into_iter()
+	.filter(|(_, count)| *count > 0)
+	.map(|(outcome, count)| format!("{outcome}={count}"))
+	.collect::<Vec<_>>()
+	.join(", ")
+}
+
 fn adapter_suite_cell(suites: &[AdapterSuiteCoverage]) -> String {
 	if suites.is_empty() {
 		return "`none`".to_string();
diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
index bf0b0bbc..6ef0f0d3 100644
--- a/apps/elf-eval/tests/real_world_job_benchmark.rs
+++ b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -360,13 +360,25 @@ fn external_adapter_run_summarizes_nonzero_scenario_losses() -> Result<()> {
 		report
 			.pointer("/external_adapters/summary/scenario_position_counts/loses")
 			.and_then(Value::as_u64),
-		Some(1)
+		Some(2)
 	);
 	assert_eq!(
 		report
 			.pointer("/external_adapters/summary/scenario_position_counts/untested")
 			.and_then(Value::as_u64),
-		Some(8)
+		Some(10)
+	);
+	assert_eq!(
+		report
+			.pointer("/external_adapters/summary/scenario_outcome_counts/loss")
+			.and_then(Value::as_u64),
+		Some(2)
+	);
+	assert_eq!(
+		report
+			.pointer("/external_adapters/summary/scenario_outcome_counts/not_tested")
+			.and_then(Value::as_u64),
+		Some(7)
 	);
 
 	let adapters = array_at(&report, "/external_adapters/adapters")?;
@@ -387,7 +399,7 @@ fn assert_external_adapter_manifest_summary(report: &Value) {
 	);
 	assert_eq!(
 		report.pointer("/external_adapters/manifest_id").and_then(Value::as_str),
-		Some("real-world-memory-project-adapters-2026-06-11")
+		Some("real-world-memory-project-adapters-2026-06-11-mem0-history")
 	);
 	assert_eq!(
 		report.pointer("/external_adapters/docker_isolation/default").and_then(Value::as_bool),
@@ -471,13 +483,13 @@ fn assert_external_adapter_manifest_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/capability_status_counts/unsupported")
 			.and_then(Value::as_u64),
-		Some(5)
+		Some(6)
 	);
 	assert_eq!(
 		report
 			.pointer("/external_adapters/summary/suite_status_counts/blocked")
 			.and_then(Value::as_u64),
-		Some(12)
+		Some(13)
 	);
 	assert_eq!(
 		report
@@ -506,13 +518,13 @@ fn assert_external_adapter_manifest_scenario_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/scenario_status_counts/unsupported")
 			.and_then(Value::as_u64),
-		Some(1)
+		Some(2)
 	);
 	assert_eq!(
 		report
 			.pointer("/external_adapters/summary/scenario_status_counts/blocked")
 			.and_then(Value::as_u64),
-		Some(1)
+		Some(2)
 	);
 	assert_eq!(
 		report
@@ -536,13 +548,13 @@ fn assert_external_adapter_manifest_scenario_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/scenario_status_counts/pass")
 			.and_then(Value::as_u64),
-		Some(5)
+		Some(9)
 	);
 	assert_eq!(
 		report
 			.pointer("/external_adapters/summary/scenario_status_counts/not_encoded")
 			.and_then(Value::as_u64),
-		Some(4)
+		Some(3)
 	);
 	assert_eq!(
 		report
@@ -554,19 +566,55 @@ fn assert_external_adapter_manifest_scenario_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/scenario_position_counts/ties")
 			.and_then(Value::as_u64),
-		Some(2)
+		Some(4)
 	);
 	assert_eq!(
 		report
 			.pointer("/external_adapters/summary/scenario_position_counts/loses")
 			.and_then(Value::as_u64),
-		Some(0)
+		Some(1)
 	);
 	assert_eq!(
 		report
 			.pointer("/external_adapters/summary/scenario_position_counts/untested")
 			.and_then(Value::as_u64),
-		Some(9)
+		Some(11)
+	);
+	assert_eq!(
+		report
+			.pointer("/external_adapters/summary/scenario_outcome_counts/win")
+			.and_then(Value::as_u64),
+		Some(2)
+	);
+	assert_eq!(
+		report
+			.pointer("/external_adapters/summary/scenario_outcome_counts/tie")
+			.and_then(Value::as_u64),
+		Some(4)
+	);
+	assert_eq!(
+		report
+			.pointer("/external_adapters/summary/scenario_outcome_counts/loss")
+			.and_then(Value::as_u64),
+		Some(1)
+	);
+	assert_eq!(
+		report
+			.pointer("/external_adapters/summary/scenario_outcome_counts/not_tested")
+			.and_then(Value::as_u64),
+		Some(8)
+	);
+	assert_eq!(
+		report
+			.pointer("/external_adapters/summary/scenario_outcome_counts/blocked")
+			.and_then(Value::as_u64),
+		Some(1)
+	);
+	assert_eq!(
+		report
+			.pointer("/external_adapters/summary/scenario_outcome_counts/non_goal")
+			.and_then(Value::as_u64),
+		Some(2)
 	);
 }
 
@@ -733,14 +781,41 @@ fn assert_first_generation_adapter_records(
 		Some("local_lifecycle_update_delete_reload")
 	);
 	assert_eq!(mem0.pointer("/capabilities/2/status").and_then(Value::as_str), Some("pass"));
-	assert_eq!(mem0.pointer("/capabilities/4/status").and_then(Value::as_str), Some("not_encoded"));
+	assert_eq!(
+		mem0.pointer("/capabilities/3/capability").and_then(Value::as_str),
+		Some("preference_correction_history")
+	);
+	assert_eq!(mem0.pointer("/capabilities/3/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(
+		mem0.pointer("/capabilities/7/capability").and_then(Value::as_str),
+		Some("openmemory_ui_readback")
+	);
+	assert_eq!(mem0.pointer("/capabilities/7/status").and_then(Value::as_str), Some("blocked"));
+	assert_eq!(
+		mem0.pointer("/capabilities/8/capability").and_then(Value::as_str),
+		Some("hosted_managed_memory_claims")
+	);
+	assert_eq!(mem0.pointer("/capabilities/8/status").and_then(Value::as_str), Some("unsupported"));
 	assert_eq!(mem0.pointer("/scenarios/0/status").and_then(Value::as_str), Some("pass"));
 	assert_eq!(mem0.pointer("/scenarios/0/elf_position").and_then(Value::as_str), Some("ties"));
 	assert_eq!(
-		mem0.pointer("/scenarios/2/scenario_id").and_then(Value::as_str),
+		mem0.pointer("/scenarios/1/scenario_id").and_then(Value::as_str),
+		Some("preference_correction_history")
+	);
+	assert_eq!(mem0.pointer("/scenarios/1/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(
+		mem0.pointer("/scenarios/1/comparison_outcome").and_then(Value::as_str),
+		Some("loss")
+	);
+	assert_eq!(
+		mem0.pointer("/scenarios/5/scenario_id").and_then(Value::as_str),
 		Some("openmemory_ui_export_readback")
 	);
-	assert_eq!(mem0.pointer("/scenarios/2/status").and_then(Value::as_str), Some("not_encoded"));
+	assert_eq!(mem0.pointer("/scenarios/5/status").and_then(Value::as_str), Some("blocked"));
+	assert_eq!(
+		mem0.pointer("/scenarios/6/comparison_outcome").and_then(Value::as_str),
+		Some("non_goal")
+	);
 	assert_eq!(
 		memsearch.pointer("/capabilities/2/capability").and_then(Value::as_str),
 		Some("reindex_update_delete_reload")
@@ -2073,7 +2148,10 @@ fn generated_json_report_renders_markdown() -> Result<()> {
 	assert!(markdown.contains("xy844-current-worktree"));
 	assert!(markdown.contains("Existing live-baseline reports remain valid"));
 	assert!(markdown.contains("### Adapter Scenario Judgments"));
-	assert!(markdown.contains("ELF scenario positions: `wins=2, ties=2, untested=9`"));
+	assert!(markdown.contains("ELF scenario positions: `wins=2, ties=4, loses=1, untested=11`"));
+	assert!(markdown.contains(
+		"Scenario comparison outcomes: `win=2, tie=4, loss=1, not_tested=8, blocked=1, non_goal=2`"
+	));
 	assert!(markdown.contains("| `claude_mem_live_baseline` | `same_corpus_retrieval`"));
 	assert!(markdown.contains("| `memsearch_live_baseline` | `ttl_expiry_lifecycle`"));
 
@@ -2101,9 +2179,21 @@ fn external_adapter_markdown_renders_nonzero_scenario_losses() -> Result<()> {
 		"/external_adapters/summary/scenario_position_counts",
 		serde_json::json!({
 			"wins": 2,
-			"ties": 2,
-			"loses": 1,
-			"untested": 8
+			"ties": 4,
+			"loses": 2,
+			"untested": 10
+		}),
+	)?;
+	set_json_pointer(
+		&mut report,
+		"/external_adapters/summary/scenario_outcome_counts",
+		serde_json::json!({
+			"win": 2,
+			"tie": 4,
+			"loss": 2,
+			"not_tested": 7,
+			"blocked": 1,
+			"non_goal": 2
 		}),
 	)?;
 
@@ -2133,9 +2223,12 @@ fn external_adapter_markdown_renders_nonzero_scenario_losses() -> Result<()> {
 
 	let markdown = fs::read_to_string(markdown_path)?;
 
-	assert!(markdown.contains("ELF scenario positions: `wins=2, ties=2, loses=1, untested=8`"));
+	assert!(markdown.contains("ELF scenario positions: `wins=2, ties=4, loses=2, untested=10`"));
 	assert!(markdown.contains(
-		"| `agentmemory_live_baseline` | `basic_same_corpus_retrieval` | `retrieval` | `pass` | `loses` |"
+		"Scenario comparison outcomes: `win=2, tie=4, loss=2, not_tested=7, blocked=1, non_goal=2`"
+	));
+	assert!(markdown.contains(
+		"| `agentmemory_live_baseline` | `basic_same_corpus_retrieval` | `retrieval` | `pass` | `loss` |"
 	));
 
 	Ok(())
@@ -2178,6 +2271,18 @@ fn external_adapter_markdown_omits_scenario_summary_when_manifest_has_no_scenari
 			"untested": 0
 		}),
 	)?;
+	set_json_pointer(
+		&mut report,
+		"/external_adapters/summary/scenario_outcome_counts",
+		serde_json::json!({
+			"win": 0,
+			"tie": 0,
+			"loss": 0,
+			"not_tested": 0,
+			"blocked": 0,
+			"non_goal": 0
+		}),
+	)?;
 
 	let temp_dir =
 		env::temp_dir().join(format!("elf-real-world-no-scenario-test-{}", process::id()));
@@ -2208,6 +2313,7 @@ fn external_adapter_markdown_omits_scenario_summary_when_manifest_has_no_scenari
 	assert!(markdown.contains("External Adapter Coverage"));
 	assert!(!markdown.contains("Scenario coverage statuses:"));
 	assert!(!markdown.contains("ELF scenario positions:"));
+	assert!(!markdown.contains("Scenario comparison outcomes:"));
 	assert!(!markdown.contains("### Adapter Scenario Judgments"));
 
 	Ok(())
diff --git a/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md b/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
index 1bf607f7..db01c063 100644
--- a/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
+++ b/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
@@ -8,7 +8,8 @@ Inputs: `2026-06-11-measurement-coverage-audit.md`,
 `2026-06-11-first-generation-oss-adapter-promotion-report.md`,
 `2026-06-11-qmd-openviking-strength-profile-report.md`,
 `2026-06-11-temporal-history-competitor-gap-report.md`,
-`2026-06-11-graph-rag-scored-smoke-adapter-report.md`, and
+`2026-06-11-graph-rag-scored-smoke-adapter-report.md`,
+`2026-06-11-mem0-openmemory-history-ui-export-report.md`, and
 `2026-06-10-production-adoption-refresh.md`.
 Depends on: `docs/spec/real_world_agent_memory_benchmark_v1.md` and the current
 external adapter manifest.
@@ -35,11 +36,13 @@ The remaining caveats are material:
   exists.
 - Credentialed provider production-ops gates are blocked until explicit provider
   setup exists.
-- Several competitor strengths remain `not_tested`: mem0/OpenMemory history/UI,
-  OpenViking trajectory, Letta core-vs-archival memory, and graph/RAG navigation.
-  The XY-923 follow-up now scores qmd's immediate top-10/replay artifact ergonomics
-  as stronger than ELF's default stress report, while expansion, fusion, rerank, and
-  candidate-drop diagnosis remain untested.
+- Several competitor strengths remain `not_tested` or blocked: OpenMemory
+  UI/export, hosted mem0 Platform behavior, OpenViking trajectory, Letta
+  core-vs-archival memory, and graph/RAG navigation. mem0 local OSS preference
+  history is now measured separately and is an ELF loss on the current correction
+  history scenario. The XY-923 follow-up also scores qmd's immediate top-10/replay
+  artifact ergonomics as stronger than ELF's default stress report, while
+  expansion, fusion, rerank, and candidate-drop diagnosis remain untested.
 
 ## Evidence Classes
 
@@ -67,6 +70,7 @@ results, or lifecycle failures into one aggregate leaderboard.
 | `cargo make real-world-memory` | `2026-06-11-measurement-coverage-audit.md` | ELF fixture aggregate covers 38 jobs across 11 suites with 36 pass and 2 blocked production-ops operator boundaries. |
 | `cargo make real-world-memory-live-adapters` | `2026-06-11-measurement-coverage-audit.md` | ELF live service adapter reports 18 pass, 5 wrong_result, 2 blocked, and 13 not_encoded jobs; qmd reports 17 pass, 6 wrong_result, 2 blocked, and 13 not_encoded jobs. |
 | `ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker` | `2026-06-11-first-generation-oss-adapter-promotion-report.md` | mem0/OpenMemory and memsearch pass basic local baseline smokes; agentmemory remains lifecycle_fail and claude-mem remains wrong_result. |
+| `ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker` | `2026-06-11-mem0-openmemory-history-ui-export-report.md` | mem0 local OSS passes preference correction history, entity-scoped personalization, local `get_all` export-style readback, and deletion audit history; OpenMemory UI/export remains blocked and hosted Platform export remains non-goal. |
 | `ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make graphiti-zep-docker-temporal-smoke` | `2026-06-11-temporal-history-competitor-gap-report.md` | Graphiti/Zep temporal smoke remains blocked by `provider_api_key_missing`. |
 | `cargo make graphify-docker-graph-report-smoke` | `2026-06-11-graph-rag-scored-smoke-adapter-report.md` | graphify reaches tiny Docker graph/report scoring but remains wrong_result. |
 | `cargo make baseline-production-synthetic`, `cargo make baseline-backfill-docker`, backup/restore, Qdrant rebuild proof | `2026-06-10-production-adoption-refresh.md` | ELF has provider synthetic, stress, backfill, restore, and rebuild evidence; private-corpus proof is blocked by missing operator-owned manifest. |
@@ -81,14 +85,14 @@ results, or lifecycle failures into one aggregate leaderboard.
 | Project decisions and reversals | `tie` | `fixture_backed`, `live_real_world`, `research_gate`, `not_encoded` | ELF and qmd both pass encoded `project_decisions` jobs; Letta-style core/archival decision memory is not tested. | XY-927 |
 | Retrieval quality | `tie` | `fixture_backed`, `live_real_world`, `live_baseline_only` | ELF and qmd both pass encoded live retrieval and stress/same-corpus retrieval evidence. | XY-923 |
 | Retrieval quality and local debug UX | `loss` | `live_baseline_only`, `research_gate`, `wrong_result`, `not_encoded` | The XY-923 trace/replay report scores qmd stronger on immediate top-10 candidate artifacts and short CLI replay commands. ELF keeps useful service trace/admin replay surfaces, and expansion, fusion, rerank-on, and candidate-drop diagnostics remain untested. | XY-923 |
-| Memory evolution and temporal history | `loss` | `fixture_backed`, `live_real_world`, `wrong_result`, `blocked` | ELF fixture memory evolution passes, but live ELF passes only delete/TTL and reports five wrong_result jobs where current-vs-historical state is not reconciled. | XY-905 |
+| Memory evolution and temporal history | `loss` | `fixture_backed`, `live_real_world`, `live_baseline_only`, `wrong_result`, `blocked` | ELF fixture memory evolution passes, but live ELF passes only delete/TTL and reports five wrong_result jobs where current-vs-historical state is not reconciled. The mem0 local OSS preference-correction history scenario is now measured and is also an ELF loss. | XY-905 |
 | Consolidation/proposal review | `not_tested` | `fixture_backed`, `not_encoded` | ELF fixture consolidation passes, but live consolidation proposal generation and review-action scoring are not encoded. | XY-926 |
 | Knowledge page compilation | `not_tested` | `fixture_backed`, `live_real_world`, `wrong_result`, `research_gate`, `not_encoded` | ELF fixture knowledge pages pass, but live knowledge compilation is not encoded; graphify reaches a tiny scored smoke and remains wrong_result. | XY-926, XY-929 |
-| Operator debugging/viewer UX | `not_tested` | `fixture_backed`, `not_encoded`, `research_gate` | ELF fixture operator-debugging UX passes, but live trace/viewer scoring and qmd/OpenMemory/claude-mem UX comparisons are unscored. | XY-923, XY-926 |
+| Operator debugging/viewer UX | `not_tested` | `fixture_backed`, `live_baseline_only`, `blocked`, `not_encoded`, `research_gate` | ELF fixture operator-debugging UX passes. mem0 local SDK `get_all` readback is measured, but OpenMemory UI/export remains blocked and must not be inferred from SDK readback. Live trace/viewer scoring and qmd/OpenMemory/claude-mem UX comparisons remain unscored. | XY-923, XY-926 |
 | Capture/write policy and redaction | `not_tested` | `fixture_backed`, `live_baseline_only`, `blocked`, `not_encoded` | ELF fixture capture/write-policy jobs pass, but live capture integration and agentmemory/claude-mem capture hooks are not comparable yet. | XY-925, XY-926 |
 | Production ops, restore, backfill, and rebuild | `win` | `live_baseline_only`, `blocked` | ELF has the strongest measured local production-operation story: provider synthetic, stress, resumable backfill, backup/restore, and Qdrant rebuild evidence. | XY-930 |
 | Private corpus and provider boundaries | `blocked` | `blocked` | Private production profile fails closed without an operator-owned manifest; provider-backed production-ops gates require explicit credentials. | XY-930 |
-| Personalization and scoped preferences | `tie` | `fixture_backed`, `live_real_world`, `not_encoded` | ELF and qmd both pass the single encoded live personalization job; mem0/OpenMemory and Letta personalization/history are not encoded. | XY-924, XY-927 |
+| Personalization and scoped preferences | `tie` | `fixture_backed`, `live_real_world`, `live_baseline_only`, `not_encoded` | ELF and qmd both pass the single encoded live personalization job. mem0 local OSS now passes entity-scoped personalization, so scoped preference behavior is a measured tie; preference correction history remains a separate ELF loss. | XY-927 |
 | Context trajectory and hierarchical retrieval | `not_tested` | `live_baseline_only`, `research_gate`, `wrong_result`, `not_encoded` | OpenViking reaches the pinned Docker local embedding path but misses expected same-corpus evidence; staged trajectory/hierarchy scoring is not encoded. | XY-928 |
 | Core-vs-archival memory | `not_tested` | `research_gate`, `not_encoded` | ELF has core block semantics in the service contract, but comparable core-vs-archival jobs and a contained Letta export path are not encoded. | XY-927 |
 | Graph/RAG navigation and citations | `not_tested` | `smoke_only`, `research_gate`, `blocked`, `wrong_result`, `not_encoded` | Graph/RAG smokes produce scored or typed non-pass adapter reports where possible, but broad graph/RAG navigation and citation quality are not tested. | XY-929 |
@@ -99,7 +103,7 @@ results, or lifecycle failures into one aggregate leaderboard.
 | --- | --- | --- | --- |
 | XY-905 | P0 | Backlog | Live temporal reconciliation answer and trace contract. |
 | XY-923 | P0 | Backlog | qmd trace-level replay and wrong-result diagnostics. |
-| XY-924 | P0 | Backlog | mem0/OpenMemory history and UI-export comparison. |
+| XY-924 | P0 | Encoded local OSS history; UI/export still gated | mem0/OpenMemory local OSS history and SDK export-style readback are measured; OpenMemory UI/export still needs a UI runner before any product-UX claim. |
 | XY-925 | P1 | Backlog | First-generation OSS continuity and source-store adapters. |
 | XY-926 | P1 | Backlog | Live operator-debugging, capture, consolidation, and knowledge-page suites. |
 | XY-927 | P1 | Backlog | Letta-style core-vs-archival memory comparison. |
@@ -125,8 +129,10 @@ results, or lifecycle failures into one aggregate leaderboard.
 - Do not claim ELF broadly beats qmd.
 - Do not claim qmd's trace/replay artifact win is a broad qmd-over-ELF memory-system
   or retrieval-quality win.
-- Do not claim ELF beats mem0/OpenMemory on history, UI/export, hosted behavior, or
-  graph memory.
+- Do not claim ELF beats mem0/OpenMemory on preference history, UI/export, hosted
+  behavior, or graph memory. The local OSS correction-history scenario is currently
+  an ELF loss, while OpenMemory UI/export, hosted behavior, and graph memory remain
+  outside measured local OSS evidence.
 - Do not claim ELF beats OpenViking on staged context trajectory.
 - Do not claim ELF beats Letta on core-vs-archival memory.
 - Do not claim graph/RAG parity from smoke-only evidence.
diff --git a/docs/guide/benchmarking/2026-06-11-first-generation-oss-adapter-promotion-report.md b/docs/guide/benchmarking/2026-06-11-first-generation-oss-adapter-promotion-report.md
index 368bbb86..63b44b2b 100644
--- a/docs/guide/benchmarking/2026-06-11-first-generation-oss-adapter-promotion-report.md
+++ b/docs/guide/benchmarking/2026-06-11-first-generation-oss-adapter-promotion-report.md
@@ -14,6 +14,12 @@ gates.
 This is benchmark/report evidence only. No ELF retrieval, ranking, memory-quality, or
 service behavior optimization is implemented here.
 
+Update after XY-924: mem0/OpenMemory history and local SDK export-style readback are
+now measured in
+`2026-06-11-mem0-openmemory-history-ui-export-report.md`. The basic lifecycle result
+in this report remains valid, but the mem0 history/UI rows below are historical
+pre-XY-924 gaps and must not be treated as the current complete mem0 comparison.
+
 The updated external adapter manifest now includes scenario-level judgments for the
 first-generation OSS memory projects. These judgments are intentionally narrower than
 suite passes:
diff --git a/docs/guide/benchmarking/2026-06-11-mem0-openmemory-history-ui-export-report.md b/docs/guide/benchmarking/2026-06-11-mem0-openmemory-history-ui-export-report.md
new file mode 100644
index 00000000..7ccef030
--- /dev/null
+++ b/docs/guide/benchmarking/2026-06-11-mem0-openmemory-history-ui-export-report.md
@@ -0,0 +1,148 @@
+# mem0/OpenMemory History and UI Export Report - June 11, 2026
+
+Goal: Add scenario-level mem0/OpenMemory history, personalization, deletion-audit,
+and export-readback evidence without promoting basic lifecycle smoke into UI or
+hosted Platform claims.
+Read this when: You need the current XY-924 comparison between ELF and
+mem0/OpenMemory for entity-scoped history, preference correction, deletion audit,
+personalization, OpenMemory inspection/export, hosted Platform export, or optional
+graph memory.
+Inputs: Fresh scoped mem0 Docker baseline run, refreshed real-world external adapter
+manifest, generated real-world memory report, and the June 11 first-generation,
+temporal/history, and competitor-strength reports.
+Depends on: `docs/spec/real_world_agent_memory_benchmark_v1.md`,
+`scripts/live-baseline-benchmark.sh`, and
+`apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json`.
+Outputs: Per-scenario outcomes using `win`, `tie`, `loss`, `not_tested`, `blocked`,
+and `non_goal`, plus command and artifact evidence for each measured claim.
+
+## Executive Judgment
+
+The XY-924 objective is now encoded for the reproducible local OSS surface.
+
+mem0/OpenMemory now has fresh local OSS evidence for behavior beyond the basic
+lifecycle smoke:
+
+- `preference_correction_history`: `pass`
+- `entity_scoped_personalization`: `pass`
+- `local_get_all_export_readback`: `pass`
+- `delete_history_audit_readback`: `pass`
+
+The comparison is intentionally narrower than a hosted/OpenMemory product verdict.
+The local run measures the mem0 OSS SDK and local FastEmbed/Qdrant/history paths in
+Docker. It does not launch the OpenMemory web UI, does not exercise hosted mem0
+Platform export jobs, and does not enable optional graph memory.
+
+## Fresh Evidence
+
+| Command | Result | Runtime | Artifact |
+| --- | --- | ---: | --- |
+| `ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker` | `pass`; mem0 `8/8` encoded checks pass | 42.89 seconds wall; 41 seconds project runtime | `tmp/live-baseline/live-baseline-report.json`, `tmp/live-baseline/mem0-checks.json` |
+| `cargo make real-world-memory` | `pass`; refreshed external adapter report published | 220.57 seconds | `tmp/real-world-memory/real-world-memory-report.json`, `tmp/real-world-memory/real-world-memory-report.md` |
+
+Fresh mem0 run id: `live-baseline-20260611105855`.
+
+Generated external adapter summary:
+
+- Scenario statuses: `unsupported=2`, `blocked=2`, `wrong_result=1`,
+  `lifecycle_fail=1`, `pass=9`, `not_encoded=3`.
+- Legacy ELF positions: `wins=2`, `ties=4`, `loses=1`, `untested=11`.
+- Normalized comparison outcomes: `win=2`, `tie=4`, `loss=1`,
+  `not_tested=8`, `blocked=1`, `non_goal=2`.
+
+## Scenario Outcomes
+
+| Scenario | mem0/OpenMemory evidence | ELF comparison outcome | Status | Command | Artifact |
+| --- | --- | --- | --- | --- | --- |
+| Basic local lifecycle | mem0 passes same-corpus retrieval, update, delete, and cold-start reload in the prior first-generation baseline. | `tie` | `pass` | `ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker` | `tmp/live-baseline/live-baseline-report.json` |
+| Preference correction history | `Memory.history` preserves old and current preference records; search returns only the current correction. | `loss` | `pass` | `ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker` | `tmp/live-baseline/mem0-checks.json` |
+| Entity-scoped personalization | `search()` with `user_id`, `agent_id`, and `run_id` filters returns the ELF-scoped preference and omits a PubFi-scoped preference. | `tie` | `pass` | `ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker` | `tmp/live-baseline/mem0-checks.json` |
+| Delete audit readback | `Memory.history` exposes a `DELETE` event and post-delete search suppresses the deleted memory. | `tie` | `pass` | `ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker` | `tmp/live-baseline/mem0-checks.json` |
+| Local SDK export-style readback | `Memory.get_all` returns the current scoped preference and omits the other scope. | `not_tested` | `pass` | `ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker` | `tmp/live-baseline/mem0-checks.json` |
+| OpenMemory UI/export readback | No local UI/dashboard export flow is launched by the Docker runner. | `blocked` | `blocked` | Not run; outside current local runner. | `apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json` |
+| Hosted mem0 Platform export | Hosted Platform export is outside local OSS evidence. | `non_goal` | `unsupported` | Not run; local OSS comparison only. | `apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json` |
+| Optional graph memory | Graph memory is not enabled in the default local OSS run. | `non_goal` | `not_encoded` | Not run; opt-in scenario gate. | `apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json` |
+
+## Evidence Details
+
+The fresh mem0 check artifact records eight passing checks:
+
+- `same_corpus_retrieval`
+- `update_replaces_note_text`
+- `preference_correction_history`
+- `entity_scoped_personalization`
+- `local_get_all_export_readback`
+- `delete_suppresses_retrieval`
+- `delete_history_audit_readback`
+- `cold_start_recovery_search`
+
+The `preference_correction_history` check verifies all of:
+
+- history is available;
+- history contains the original preference;
+- history contains the corrected preference;
+- search contains the corrected preference;
+- search omits the old preference.
+
+The `delete_history_audit_readback` check verifies all of:
+
+- history is available;
+- history contains a delete event;
+- search suppresses the deleted memory.
+
+The local SDK export-style readback check is intentionally named separately from UI
+export. It only proves local `get_all` scoped readback through the OSS SDK.
+
+## Source And Product Boundary
+
+Official mem0 documentation distinguishes the OSS/self-hosted surface from hosted
+Platform API paths. The OSS REST page documents CRUD/search/update/delete/reset
+operations by `user_id`, `agent_id`, or `run_id`, an OpenAPI explorer at `/docs`, and
+memory history endpoints. The export guide distinguishes bulk `get_all()`, semantic
+search, structured exports, and Platform UI exports.
+
+This report uses those docs only to set the claim boundary:
+
+- local OSS SDK `history`, `search`, and `get_all` behavior is measurable here;
+- OpenMemory browser/dashboard export is not measured here;
+- hosted Platform export is a `non_goal` for this local OSS lane;
+- optional graph memory remains an opt-in scenario, not a default pass/fail claim.
+
+References:
+
+- Mem0 OSS REST API Server: `https://docs.mem0.ai/open-source/features/rest-api`
+- Mem0 Export Stored Memories: `https://docs.mem0.ai/cookbooks/essentials/exporting-memories`
+
+## Claim Boundaries
+
+Allowed:
+
+- mem0/OpenMemory local OSS passes the new encoded history, correction,
+  personalization, deletion-audit, and local `get_all` readback checks in run
+  `live-baseline-20260611105855`.
+- ELF currently has a measured `loss` against mem0 on the preference correction
+  history dimension because the June 11 temporal/history report records ELF's live
+  memory-evolution preference job as `wrong_result`.
+- ELF and mem0 currently `tie` on the encoded entity-scoped personalization and
+  delete-audit surfaces.
+- OpenMemory UI/export readback is `blocked` until the runner launches and inspects
+  the UI/export flow.
+- Hosted mem0 Platform export and optional graph memory are `non_goal` for this
+  local OSS comparison.
+
+Not allowed:
+
+- Do not reuse the basic lifecycle pass as history, UI, hosted, or graph-memory
+  evidence.
+- Do not claim OpenMemory UI/export quality from local SDK `get_all`.
+- Do not claim hosted mem0 Platform behavior from the local OSS run.
+- Do not treat optional graph memory as a default mem0 pass or ELF loss.
+- Do not convert `blocked`, `unsupported`, `not_encoded`, or `non_goal` scenarios
+  into wins or losses.
+
+## Follow-Up Gate
+
+The next fair UI/export comparison requires a bounded runner that starts OpenMemory,
+loads the same local memories, captures authenticated inspection/export readback, and
+publishes a browser/API artifact. That is separate from the local SDK `get_all`
+export-style readback added here.
diff --git a/docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md b/docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md
index dd86fde4..d0749918 100644
--- a/docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md
+++ b/docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md
@@ -17,6 +17,14 @@ The overall goal is not complete. ELF does not yet have complete, comparable
 benchmark wins across all tracked memory projects and all user-important memory
 scenarios.
 
+Update after XY-924: mem0/OpenMemory local OSS history and local SDK export-style
+readback are now measured in
+`2026-06-11-mem0-openmemory-history-ui-export-report.md`. That report records mem0
+passes for preference correction history, entity-scoped personalization, deletion
+audit history, and local `get_all` readback, while keeping OpenMemory UI/export
+blocked and hosted Platform export plus optional graph memory as local-lane
+non-goals.
+
 The current evidence supports a narrower judgment:
 
 - ELF remains a strong personal-production foundation because its core source of
diff --git a/docs/guide/benchmarking/index.md b/docs/guide/benchmarking/index.md
index efab4bb0..f6795dfb 100644
--- a/docs/guide/benchmarking/index.md
+++ b/docs/guide/benchmarking/index.md
@@ -92,6 +92,11 @@ cleanup, use `docs/guide/single_user_production.md`.
   competitor-strength adoption report with the bounded personal-production decision,
   scenario-level win/tie/loss/not-tested matrix, claim boundaries, and optimization
   issue queue.
+- `2026-06-11-mem0-openmemory-history-ui-export-report.md`: XY-924
+  mem0/OpenMemory local OSS history, preference-correction, deletion-audit,
+  personalization, and export-readback comparison with normalized
+  win/tie/loss/not-tested/blocked/non-goal outcomes and explicit hosted/UI/graph
+  non-claims.
 - `real_world_agent_memory_benchmark.md`: operator overview for the v1 real-world
   agent memory benchmark contract, including suite taxonomy, typed report states,
   knowledge-compilation fixture tasks, and the production-ops fixture target.
diff --git a/docs/spec/real_world_agent_memory_benchmark_v1.md b/docs/spec/real_world_agent_memory_benchmark_v1.md
index fdc2f571..5bb56574 100644
--- a/docs/spec/real_world_agent_memory_benchmark_v1.md
+++ b/docs/spec/real_world_agent_memory_benchmark_v1.md
@@ -175,10 +175,15 @@ Each `adapters[]` record MUST include:
 - `suites`: array of real-world suite coverage records with `suite_id`, `status`, and
   `evidence`.
 - `scenarios`: optional array of scenario judgment records with `scenario_id`,
-  optional `suite_id`, `status`, `elf_position`, `evidence`, and optional `command`
-  and `artifact`. `elf_position` MUST be one of `wins`, `ties`, `loses`, or
-  `untested`. Scenario judgments are report inputs for dimension-level comparison;
-  they MUST NOT convert live-baseline-only evidence into real-world suite pass claims.
+  optional `suite_id`, `status`, `elf_position`, optional `comparison_outcome`,
+  `evidence`, and optional `command` and `artifact`. `elf_position` MUST be one of
+  `wins`, `ties`, `loses`, or `untested`. `comparison_outcome`, when present, MUST be
+  one of `win`, `tie`, `loss`, `not_tested`, `blocked`, or `non_goal`. Reports SHOULD
+  derive `comparison_outcome` from `elf_position` when omitted, but SHOULD use the
+  explicit field for scenarios where the legacy ELF-relative position is less precise
+  than the report outcome. Scenario judgments are report inputs for dimension-level
+  comparison; they MUST NOT convert live-baseline-only evidence into real-world suite
+  pass claims.
 - `evidence`: array of evidence pointers with `kind`, `ref`, and `status`.
 - `notes`: optional bounded explanatory strings.
 - `follow_up`: optional `title` and `reason`.
@@ -580,7 +585,9 @@ Reports MUST include:
 - external adapter coverage when an external adapter manifest is loaded, preserving
   `fixture_backed`, `live_baseline_only`, `live_real_world`, `research_gate`,
   `real`, `mocked`, `unsupported`, `blocked`, `incomplete`, `wrong_result`,
-  `lifecycle_fail`, `pass`, and `not_encoded` distinctions.
+  `lifecycle_fail`, `pass`, and `not_encoded` distinctions. Scenario summaries MUST
+  preserve status counts, legacy `elf_position` counts, and normalized
+  `comparison_outcome` counts when scenario judgments are present.
 
 Reports that encode `memory_evolution` jobs SHOULD also include stale-answer counts,
 conflict detection counts, update rationale availability, and temporal-validity
diff --git a/scripts/live-baseline-benchmark.sh b/scripts/live-baseline-benchmark.sh
index fe607648..15365610 100755
--- a/scripts/live-baseline-benchmark.sh
+++ b/scripts/live-baseline-benchmark.sh
@@ -2073,6 +2073,26 @@ project_mem0() {
       "status": "real",
       "surface": "new Memory.from_config over the same local Qdrant/history paths"
     },
+    "preference_history": {
+      "status": "real",
+      "surface": "Memory.history after a local preference correction update"
+    },
+    "entity_scope_personalization": {
+      "status": "real",
+      "surface": "Memory.add/search with user_id, agent_id, and run_id filters"
+    },
+    "deletion_audit": {
+      "status": "real",
+      "surface": "Memory.history after Memory.delete"
+    },
+    "local_export_readback": {
+      "status": "real",
+      "surface": "Memory.get_all over local OSS storage for inspection/export-style readback"
+    },
+    "openmemory_ui_export": {
+      "status": "blocked",
+      "surface": "the Docker live-baseline runner does not launch the OpenMemory web UI or hosted Platform export flow"
+    },
     "scale_stress_profile": {
       "status": "incomplete",
       "surface": "smoke lifecycle path is encoded; scale/stress timing and resource thresholds are not yet calibrated"
@@ -2170,21 +2190,103 @@ for text, source in docs:
 
 
 def result_entries(search):
-    return search.get("results", []) if isinstance(search, dict) else []
+    if isinstance(search, dict):
+        for key in ("results", "memories"):
+            entries = search.get(key)
+            if isinstance(entries, list):
+                return entries
+    if isinstance(search, list):
+        return search
+    return []
 
 
-def search_memory(memory_instance, query_text):
+def search_memory(memory_instance, query_text, filters=None):
     return memory_instance.search(
         query_text,
-        filters={"user_id": "elf-bench"},
+        filters=filters or {"user_id": "elf-bench"},
         top_k=top_k,
         threshold=0.0,
     )
 
 
+def json_lower(value):
+    return json.dumps(value, default=str).lower()
+
+
+def contains_terms(value, terms):
+    text = json_lower(value)
+    return all(term.lower() in text for term in terms)
+
+
+def first_memory_id(add_result):
+    results = add_result.get("results", []) if isinstance(add_result, dict) else []
+    if results and isinstance(results[0], dict):
+        return results[0].get("id")
+    return None
+
+
+def memory_history(memory_instance, memory_id):
+    if not hasattr(memory_instance, "history"):
+        return {
+            "available": False,
+            "history": None,
+            "error": "Memory.history is unavailable",
+        }
+    try:
+        return {
+            "available": True,
+            "history": memory_instance.history(memory_id),
+            "error": None,
+        }
+    except Exception as exc:
+        return {
+            "available": False,
+            "history": None,
+            "error": repr(exc),
+        }
+
+
+def get_all_memories(memory_instance, filters):
+    if not hasattr(memory_instance, "get_all"):
+        return {
+            "available": False,
+            "memories": None,
+            "error": "Memory.get_all is unavailable",
+        }
+    try:
+        return {
+            "available": True,
+            "memories": memory_instance.get_all(filters=filters),
+            "error": None,
+        }
+    except TypeError:
+        try:
+            return {
+                "available": True,
+                "memories": memory_instance.get_all(
+                    user_id=filters.get("user_id"),
+                    agent_id=filters.get("agent_id"),
+                    run_id=filters.get("run_id"),
+                ),
+                "error": None,
+            }
+        except Exception as exc:
+            return {
+                "available": False,
+                "memories": None,
+                "error": repr(exc),
+            }
+    except Exception as exc:
+        return {
+            "available": False,
+            "memories": None,
+            "error": repr(exc),
+        }
+
+
 def matches_expected(search, expected_doc, expected_terms):
     for entry in result_entries(search):
-        entry_text = json.dumps(entry, default=str).lower()
+        entry_text = json_lower(entry)
         source = ((entry.get("metadata") or {}).get("source") or "")
         if source == expected_doc and all(
             term.lower() in entry_text for term in expected_terms
@@ -2304,6 +2406,152 @@ else:
         )
     )
 
+history_filters = {
+    "user_id": "elf-history-user",
+    "agent_id": "elf-history-agent",
+    "run_id": "elf-project",
+}
+old_preference = (
+    "Preference v1 for ELF: provide verbose tutorial explanations for every answer."
+)
+current_preference = (
+    "Preference v2 for ELF: answer concisely with evidence-linked bullets."
+)
+preference_add = memory.add(
+    old_preference,
+    user_id=history_filters["user_id"],
+    agent_id=history_filters["agent_id"],
+    run_id=history_filters["run_id"],
+    metadata={"source": "preference-history", "kind": "preference"},
+    infer=False,
+)
+preference_id = first_memory_id(preference_add)
+if not preference_id:
+    checks.append(
+        make_check(
+            "preference_correction_history",
+            "incomplete",
+            "The preference memory id was not returned, so correction history could not be inspected.",
+            {"add_result": preference_add},
+        )
+    )
+else:
+    preference_update = memory.update(
+        preference_id,
+        current_preference,
+        metadata={"source": "preference-history", "kind": "preference"},
+    )
+    preference_history = memory_history(memory, preference_id)
+    preference_search = search_memory(
+        memory,
+        "How should answers be written for the ELF project?",
+        history_filters,
+    )
+    history_has_old = contains_terms(preference_history["history"], ["verbose tutorial"])
+    history_has_current = contains_terms(
+        preference_history["history"],
+        ["concise", "evidence-linked"],
+    )
+    search_has_current = contains_terms(
+        result_entries(preference_search),
+        ["concise", "evidence-linked"],
+    )
+    search_omits_old = "verbose tutorial" not in json_lower(result_entries(preference_search))
+    if not preference_history["available"]:
+        preference_status = "blocked"
+        preference_reason = "Memory.history could not be read for the updated preference memory."
+    elif history_has_old and history_has_current and search_has_current and search_omits_old:
+        preference_status = "pass"
+        preference_reason = "mem0 history preserved the old and current preference while search returned only the current correction."
+    else:
+        preference_status = "lifecycle_fail"
+        preference_reason = "mem0 did not expose a clean preference correction chain with current-only search readback."
+    checks.append(
+        make_check(
+            "preference_correction_history",
+            preference_status,
+            preference_reason,
+            {
+                "memory_id": preference_id,
+                "add_result": preference_add,
+                "update_result": preference_update,
+                "history_available": preference_history["available"],
+                "history_error": preference_history["error"],
+                "history_has_old": history_has_old,
+                "history_has_current": history_has_current,
+                "search_has_current": search_has_current,
+                "search_omits_old": search_omits_old,
+                "history": preference_history["history"],
+                "search": preference_search,
+            },
+        )
+    )
+
+other_scope_add = memory.add(
+    "Preference for PubFi: answer in long-form Chinese prose with no bullets.",
+    user_id=history_filters["user_id"],
+    agent_id=history_filters["agent_id"],
+    run_id="pubfi-project",
+    metadata={"source": "pubfi-preference", "kind": "preference"},
+    infer=False,
+)
+entity_search = search_memory(
+    memory,
+    "What answer style preference applies here?",
+    history_filters,
+)
+entity_search_text = json_lower(result_entries(entity_search))
+entity_has_current = "evidence-linked bullets" in entity_search_text
+entity_omits_other = "long-form chinese" not in entity_search_text
+checks.append(
+    make_check(
+        "entity_scoped_personalization",
+        "pass" if entity_has_current and entity_omits_other else "lifecycle_fail",
+        "mem0 search respected user_id, agent_id, and run_id filters for the current preference scope."
+        if entity_has_current and entity_omits_other
+        else "mem0 entity-scoped search did not isolate the current preference from another run/project scope.",
+        {
+            "current_memory_id": preference_id,
+            "other_scope_add": other_scope_add,
+            "filters": history_filters,
+            "has_current": entity_has_current,
+            "omits_other_scope": entity_omits_other,
+            "search": entity_search,
+        },
+    )
+)
+
+export_readback = get_all_memories(memory, history_filters)
+export_has_current = contains_terms(
+    export_readback["memories"],
+    ["concise", "evidence-linked"],
+)
+export_omits_other = "long-form chinese" not in json_lower(export_readback["memories"])
+if not export_readback["available"]:
+    export_status = "blocked"
+    export_reason = "Memory.get_all could not be read for local OSS inspection/export-style evidence."
+elif export_has_current and export_omits_other:
+    export_status = "pass"
+    export_reason = "mem0 get_all returned local export-style readback for the current scoped preference without the other scope."
+else:
+    export_status = "lifecycle_fail"
+    export_reason = "mem0 get_all did not return the current scoped preference cleanly for local export-style readback."
+checks.append(
+    make_check(
+        "local_get_all_export_readback",
+        export_status,
+        export_reason,
+        {
+            "available": export_readback["available"],
+            "error": export_readback["error"],
+            "filters": history_filters,
+            "has_current": export_has_current,
+            "omits_other_scope": export_omits_other,
+            "memories": export_readback["memories"],
+        },
+    )
+)
+
 delete_query = next(
     (
         query
@@ -2352,6 +2600,36 @@ else:
             },
         )
     )
+    delete_history = memory_history(memory, delete_id)
+    delete_history_has_event = delete_history["available"] and contains_terms(
+        delete_history["history"],
+        ["delete"],
+    )
+    if not delete_history["available"]:
+        delete_audit_status = "blocked"
+        delete_audit_reason = "Memory.history could not be read after delete, so deletion audit readback is blocked."
+    elif delete_history_has_event and not deleted_still_matched:
+        delete_audit_status = "pass"
+        delete_audit_reason = "mem0 history exposed a delete event and search suppressed the deleted memory."
+    else:
+        delete_audit_status = "lifecycle_fail"
+        delete_audit_reason = "mem0 did not expose a delete audit event while suppressing the deleted memory."
+    checks.append(
+        make_check(
+            "delete_history_audit_readback",
+            delete_audit_status,
+            delete_audit_reason,
+            {
+                "memory_id": delete_id,
+                "source": delete_source,
+                "history_available": delete_history["available"],
+                "history_error": delete_history["error"],
+                "history_has_delete_event": delete_history_has_event,
+                "deleted_still_matched": deleted_still_matched,
+                "history": delete_history["history"],
+            },
+        )
+    )
 
 del memory
 gc.collect()
@@ -2429,7 +2707,7 @@ PY
       else
         retrieval_status="retrieval_wrong_result"
       fi
-      json_record "${project}" "${repo}" "${head}" "${typed_status}" "${retrieval_status}" "$(typed_status_reason "${project}" "${typed_status}")" "${project}.log" "pip install -e . fastembed ollama; Memory.from_config; add/update/delete/search"
+      json_record "${project}" "${repo}" "${head}" "${typed_status}" "${retrieval_status}" "$(typed_status_reason "${project}" "${typed_status}")" "${project}.log" "pip install -e . fastembed ollama; Memory.from_config; add/update/delete/history/get_all/search"
       return
     fi
     json_record "${project}" "${repo}" "${head}" "incomplete" "invalid_json_result" "mem0 command completed, but did not produce a valid benchmark result" "${project}.log" "pip install -e . fastembed ollama; Memory.from_config; add infer=false; search"

From 5af613ac567d0bff214c7ffdcbfae15dece8bd62 Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Thu, 11 Jun 2026 19:15:18 +0800
Subject: [PATCH 2/4] {"schema":"decodex/commit/1","summary":"Tighten mem0
 deletion-audit evidence","authority":"XY-924"}

---
 README.md                                     |  2 +-
 .../memory_projects_manifest.json             | 24 +++++++++----------
 .../tests/real_world_job_benchmark.rs         | 21 ++++++++++++++++
 ...em0-openmemory-history-ui-export-report.md | 14 +++++------
 scripts/live-baseline-benchmark.sh            | 23 ++++++++++++++++--
 5 files changed, 62 insertions(+), 22 deletions(-)

diff --git a/README.md b/README.md
index 0d3fd2ef..c79a217b 100644
--- a/README.md
+++ b/README.md
@@ -179,7 +179,7 @@ provider-backed ELF evidence was required.
 - mem0/OpenMemory history follow-up after XY-924: the local OSS mem0 adapter now
   passes encoded preference correction history, entity-scoped personalization, local
   `get_all` export-style readback, and deletion audit history in
-  `live-baseline-20260611105855`. The comparison records ELF as a loss on preference
+  `live-baseline-20260611111119`. The comparison records ELF as a loss on preference
   correction history, ties on scoped personalization and delete audit, `not_tested`
   for local SDK export-style parity, `blocked` for OpenMemory UI/export, and
   `non_goal` for hosted Platform export and optional graph memory in the local OSS
diff --git a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
index cfc54fb4..9812feae 100644
--- a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
+++ b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
@@ -608,7 +608,7 @@
       },
       "run": {
         "status": "pass",
-        "evidence": "Fresh scoped baseline run live-baseline-20260611105855 exercises local OSS mem0 with FastEmbed, Qdrant path storage, Memory.update, Memory.delete, Memory.history, Memory.get_all, entity filters, and cold-start reload; mem0 passed 8/8 encoded checks.",
+        "evidence": "Fresh scoped baseline run live-baseline-20260611111119 exercises local OSS mem0 with FastEmbed, Qdrant path storage, Memory.update, Memory.delete, Memory.history, Memory.get_all, entity filters, and cold-start reload; mem0 passed 8/8 encoded checks.",
         "command": "ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker",
         "artifact": "tmp/live-baseline/live-baseline-report.json"
       },
@@ -626,7 +626,7 @@
         {
           "capability": "same_corpus_retrieval",
           "status": "pass",
-          "evidence": "Fresh scoped baseline run live-baseline-20260611105855 reports mem0 retrieval_pass with 3/3 same-corpus retrieval checks."
+          "evidence": "Fresh scoped baseline run live-baseline-20260611111119 reports mem0 retrieval_pass with 3/3 same-corpus retrieval checks."
         },
         {
           "capability": "local_lifecycle_update_delete_reload",
@@ -708,9 +708,9 @@
           "status": "pass",
           "elf_position": "loses",
           "comparison_outcome": "loss",
-          "evidence": "Fresh scoped baseline run live-baseline-20260611105855 reports mem0 preference_correction_history as pass. The June 11 temporal report records ELF live memory-evolution preference as wrong_result, so the current measured comparison is an ELF loss on this history dimension until ELF temporal reconciliation is fixed.",
-          "command": "ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker",
-          "artifact": "tmp/live-baseline/mem0-checks.json"
+          "evidence": "Fresh scoped baseline run live-baseline-20260611111119 reports mem0 preference_correction_history as pass. ELF-side evidence comes from cargo make real-world-memory-live-adapters as summarized in docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md, which records ELF live memory-evolution preference as wrong_result. The current measured comparison is therefore an ELF loss on this history dimension until ELF temporal reconciliation is fixed.",
+          "command": "mem0: ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker; ELF: cargo make real-world-memory-live-adapters",
+          "artifact": "mem0: tmp/live-baseline/mem0-checks.json; ELF: tmp/real-world-memory/live-adapters/ and docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md"
         },
         {
           "scenario_id": "entity_scoped_personalization",
@@ -718,9 +718,9 @@
           "status": "pass",
           "elf_position": "ties",
           "comparison_outcome": "tie",
-          "evidence": "Fresh scoped baseline run live-baseline-20260611105855 reports mem0 entity_scoped_personalization as pass. Existing live real-world evidence records ELF and qmd passing the encoded personalization slice, so this is a measured tie on the current scoped-preference surface.",
-          "command": "ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker",
-          "artifact": "tmp/live-baseline/mem0-checks.json"
+          "evidence": "Fresh scoped baseline run live-baseline-20260611111119 reports mem0 entity_scoped_personalization as pass. ELF-side evidence comes from cargo make real-world-memory-live-adapters as summarized in docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md, which records ELF and qmd passing the encoded personalization slice. This is a measured tie on the current scoped-preference surface.",
+          "command": "mem0: ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker; ELF: cargo make real-world-memory-live-adapters",
+          "artifact": "mem0: tmp/live-baseline/mem0-checks.json; ELF: tmp/real-world-memory/live-adapters/ and docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md"
         },
         {
           "scenario_id": "delete_audit_readback",
@@ -728,9 +728,9 @@
           "status": "pass",
           "elf_position": "ties",
           "comparison_outcome": "tie",
-          "evidence": "Fresh scoped baseline run live-baseline-20260611105855 reports mem0 delete_history_audit_readback as pass. The June 11 temporal report records ELF passing the delete/TTL tombstone job, so the current measured delete-audit comparison is a tie.",
-          "command": "ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker",
-          "artifact": "tmp/live-baseline/mem0-checks.json"
+          "evidence": "Fresh scoped baseline run live-baseline-20260611111119 reports mem0 delete_history_audit_readback as pass. ELF-side evidence comes from cargo make real-world-memory-live-adapters as summarized in docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md, which records ELF passing the delete/TTL tombstone job. The current measured delete-audit comparison is a tie.",
+          "command": "mem0: ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker; ELF: cargo make real-world-memory-live-adapters",
+          "artifact": "mem0: tmp/live-baseline/mem0-checks.json; ELF: tmp/real-world-memory/live-adapters/ and docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md"
         },
         {
           "scenario_id": "local_get_all_export_readback",
@@ -738,7 +738,7 @@
           "status": "pass",
           "elf_position": "untested",
           "comparison_outcome": "not_tested",
-          "evidence": "Fresh scoped baseline run live-baseline-20260611105855 reports mem0 local_get_all_export_readback as pass. This is local SDK inspection/export-style readback, not OpenMemory UI evidence; ELF has no directly comparable live UI/export scoring row in this run.",
+          "evidence": "Fresh scoped baseline run live-baseline-20260611111119 reports mem0 local_get_all_export_readback as pass. This is local SDK inspection/export-style readback, not OpenMemory UI evidence; ELF has no directly comparable live UI/export scoring row in this run.",
           "command": "ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker",
           "artifact": "tmp/live-baseline/mem0-checks.json"
         },
diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
index 6ef0f0d3..402fafff 100644
--- a/apps/elf-eval/tests/real_world_job_benchmark.rs
+++ b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -2319,6 +2319,27 @@ fn external_adapter_markdown_omits_scenario_summary_when_manifest_has_no_scenari
 	Ok(())
 }
 
+#[test]
+fn mem0_delete_audit_probe_requires_explicit_delete_history_event() -> Result<()> {
+	let script =
+		fs::read_to_string(workspace_root()?.join("scripts").join("live-baseline-benchmark.sh"))?;
+
+	assert!(script.contains("def history_has_event"));
+	assert!(script.contains("str(entry.get(\"event\", \"\")).upper() == expected"));
+	assert!(
+		script.contains(
+			"history_has_event(\n        delete_history[\"history\"],\n        \"DELETE\","
+		)
+	);
+	assert!(
+		!script.contains(
+			"contains_terms(\n        delete_history[\"history\"],\n        [\"delete\"],"
+		)
+	);
+
+	Ok(())
+}
+
 #[test]
 fn knowledge_json_report_renders_markdown_metrics() -> Result<()> {
 	let report = run_json_report_from(knowledge_fixture_dir())?;
diff --git a/docs/guide/benchmarking/2026-06-11-mem0-openmemory-history-ui-export-report.md b/docs/guide/benchmarking/2026-06-11-mem0-openmemory-history-ui-export-report.md
index 7ccef030..627465b2 100644
--- a/docs/guide/benchmarking/2026-06-11-mem0-openmemory-history-ui-export-report.md
+++ b/docs/guide/benchmarking/2026-06-11-mem0-openmemory-history-ui-export-report.md
@@ -37,10 +37,10 @@ Platform export jobs, and does not enable optional graph memory.
 
 | Command | Result | Runtime | Artifact |
 | --- | --- | ---: | --- |
-| `ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker` | `pass`; mem0 `8/8` encoded checks pass | 42.89 seconds wall; 41 seconds project runtime | `tmp/live-baseline/live-baseline-report.json`, `tmp/live-baseline/mem0-checks.json` |
-| `cargo make real-world-memory` | `pass`; refreshed external adapter report published | 220.57 seconds | `tmp/real-world-memory/real-world-memory-report.json`, `tmp/real-world-memory/real-world-memory-report.md` |
+| `ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker` | `pass`; mem0 `8/8` encoded checks pass | 35.50 seconds wall; 33 seconds project runtime | `tmp/live-baseline/live-baseline-report.json`, `tmp/live-baseline/mem0-checks.json` |
+| `cargo make real-world-memory` | `pass`; refreshed external adapter report published | 10.18 seconds | `tmp/real-world-memory/real-world-memory-report.json`, `tmp/real-world-memory/real-world-memory-report.md` |
 
-Fresh mem0 run id: `live-baseline-20260611105855`.
+Fresh mem0 run id: `live-baseline-20260611111119`.
 
 Generated external adapter summary:
 
@@ -55,9 +55,9 @@ Generated external adapter summary:
 | Scenario | mem0/OpenMemory evidence | ELF comparison outcome | Status | Command | Artifact |
 | --- | --- | --- | --- | --- | --- |
 | Basic local lifecycle | mem0 passes same-corpus retrieval, update, delete, and cold-start reload in the prior first-generation baseline. | `tie` | `pass` | `ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker` | `tmp/live-baseline/live-baseline-report.json` |
-| Preference correction history | `Memory.history` preserves old and current preference records; search returns only the current correction. | `loss` | `pass` | `ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker` | `tmp/live-baseline/mem0-checks.json` |
-| Entity-scoped personalization | `search()` with `user_id`, `agent_id`, and `run_id` filters returns the ELF-scoped preference and omits a PubFi-scoped preference. | `tie` | `pass` | `ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker` | `tmp/live-baseline/mem0-checks.json` |
-| Delete audit readback | `Memory.history` exposes a `DELETE` event and post-delete search suppresses the deleted memory. | `tie` | `pass` | `ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker` | `tmp/live-baseline/mem0-checks.json` |
+| Preference correction history | `Memory.history` preserves old and current preference records; search returns only the current correction. | `loss` | `pass` | mem0: `ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker`; ELF: `cargo make real-world-memory-live-adapters` | mem0: `tmp/live-baseline/mem0-checks.json`; ELF: `tmp/real-world-memory/live-adapters/`, `docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md` |
+| Entity-scoped personalization | `search()` with `user_id`, `agent_id`, and `run_id` filters returns the ELF-scoped preference and omits a PubFi-scoped preference. | `tie` | `pass` | mem0: `ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker`; ELF: `cargo make real-world-memory-live-adapters` | mem0: `tmp/live-baseline/mem0-checks.json`; ELF: `tmp/real-world-memory/live-adapters/`, `docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md` |
+| Delete audit readback | `Memory.history` exposes a `DELETE` event and post-delete search suppresses the deleted memory. | `tie` | `pass` | mem0: `ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker`; ELF: `cargo make real-world-memory-live-adapters` | mem0: `tmp/live-baseline/mem0-checks.json`; ELF: `tmp/real-world-memory/live-adapters/`, `docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md` |
 | Local SDK export-style readback | `Memory.get_all` returns the current scoped preference and omits the other scope. | `not_tested` | `pass` | `ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker` | `tmp/live-baseline/mem0-checks.json` |
 | OpenMemory UI/export readback | No local UI/dashboard export flow is launched by the Docker runner. | `blocked` | `blocked` | Not run; outside current local runner. | `apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json` |
 | Hosted mem0 Platform export | Hosted Platform export is outside local OSS evidence. | `non_goal` | `unsupported` | Not run; local OSS comparison only. | `apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json` |
@@ -119,7 +119,7 @@ Allowed:
 
 - mem0/OpenMemory local OSS passes the new encoded history, correction,
   personalization, deletion-audit, and local `get_all` readback checks in run
-  `live-baseline-20260611105855`.
+  `live-baseline-20260611111119`.
 - ELF currently has a measured `loss` against mem0 on the preference correction
   history dimension because the June 11 temporal/history report records ELF's live
   memory-evolution preference job as `wrong_result`.
diff --git a/scripts/live-baseline-benchmark.sh b/scripts/live-baseline-benchmark.sh
index 15365610..d899677b 100755
--- a/scripts/live-baseline-benchmark.sh
+++ b/scripts/live-baseline-benchmark.sh
@@ -2218,6 +2218,25 @@ def contains_terms(value, terms):
     return all(term.lower() in text for term in terms)
 
 
+def history_entries(history):
+    if isinstance(history, dict):
+        for key in ("results", "history", "memories"):
+            entries = history.get(key)
+            if isinstance(entries, list):
+                return entries
+    if isinstance(history, list):
+        return history
+    return []
+
+
+def history_has_event(history, expected_event):
+    expected = expected_event.upper()
+    return any(
+        isinstance(entry, dict) and str(entry.get("event", "")).upper() == expected
+        for entry in history_entries(history)
+    )
+
+
 def first_memory_id(add_result):
     results = add_result.get("results", []) if isinstance(add_result, dict) else []
     if results and isinstance(results[0], dict):
@@ -2601,9 +2620,9 @@ else:
         )
     )
     delete_history = memory_history(memory, delete_id)
-    delete_history_has_event = delete_history["available"] and contains_terms(
+    delete_history_has_event = delete_history["available"] and history_has_event(
         delete_history["history"],
-        ["delete"],
+        "DELETE",
     )
     if not delete_history["available"]:
         delete_audit_status = "blocked"

From 6b405e933ad371996b779e8a027c86fe3a84871a Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Thu, 11 Jun 2026 19:25:06 +0800
Subject: [PATCH 3/4] {"schema":"decodex/commit/1","summary":"Make chunking
 tests offline deterministic","authority":"XY-924"}

---
 packages/elf-chunking/src/lib.rs | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/packages/elf-chunking/src/lib.rs b/packages/elf-chunking/src/lib.rs
index f1209da2..00c25670 100644
--- a/packages/elf-chunking/src/lib.rs
+++ b/packages/elf-chunking/src/lib.rs
@@ -128,10 +128,14 @@ fn overlap_tail(text: &str, overlap_tokens: u32, tokenizer: &Tokenizer) -> Strin
 mod tests {
 	use crate::ChunkingConfig;
 
+	fn local_dev_tokenizer_path() -> std::path::PathBuf {
+		std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+			.join("../../config/local/tokenizer.wordlevel.json")
+	}
+
 	#[test]
 	fn loads_local_dev_tokenizer_fixture() {
-		let path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
-			.join("../../config/local/tokenizer.wordlevel.json");
+		let path = local_dev_tokenizer_path();
 		let tokenizer = crate::load_tokenizer(path.to_str().expect("Path must be valid UTF-8"))
 			.expect("Local dev tokenizer must load.");
 		let cfg = ChunkingConfig { max_tokens: 10, overlap_tokens: 2 };
@@ -143,11 +147,14 @@ mod tests {
 
 	#[test]
 	fn splits_into_chunks_with_overlap() {
-		let cfg = ChunkingConfig { max_tokens: 10, overlap_tokens: 2 };
-		let tokenizer = crate::load_tokenizer("Qwen/Qwen3-Embedding-8B").unwrap();
+		let cfg = ChunkingConfig { max_tokens: 2, overlap_tokens: 1 };
+		let path = local_dev_tokenizer_path();
+		let tokenizer = crate::load_tokenizer(path.to_str().expect("Path must be valid UTF-8"))
+			.expect("Local dev tokenizer must load.");
 		let chunks = crate::split_text("One. Two. Three. Four.", &cfg, &tokenizer);
 
-		assert!(!chunks.is_empty());
+		assert!(chunks.len() > 1);
 		assert!(chunks[0].text.contains("One"));
+		assert!(chunks.last().expect("Chunk should exist").text.contains("Four"));
 	}
 }

From 843f100877ded409083c5fcbac5b3f618c61e59b Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Thu, 11 Jun 2026 19:33:55 +0800
Subject: [PATCH 4/4] {"schema":"decodex/commit/1","summary":"Sync mem0 history
 review evidence","authority":"XY-924"}

---
 README.md                                     |  2 +-
 .../memory_projects_manifest.json             | 14 ++++----
 .../tests/real_world_job_benchmark.rs         |  6 ++++
 ...em0-openmemory-history-ui-export-report.md | 16 ++++++----
 ...-temporal-history-competitor-gap-report.md | 18 ++++++-----
 ...1-competitor-strength-adoption-report.json | 32 +++++++++++--------
 ...emporal-history-competitor-gap-report.json | 20 ++++++++----
 scripts/live-baseline-benchmark.sh            | 21 ++++++++++--
 8 files changed, 86 insertions(+), 43 deletions(-)

diff --git a/README.md b/README.md
index c79a217b..1ec443f3 100644
--- a/README.md
+++ b/README.md
@@ -179,7 +179,7 @@ provider-backed ELF evidence was required.
 - mem0/OpenMemory history follow-up after XY-924: the local OSS mem0 adapter now
   passes encoded preference correction history, entity-scoped personalization, local
   `get_all` export-style readback, and deletion audit history in
-  `live-baseline-20260611111119`. The comparison records ELF as a loss on preference
+  `live-baseline-20260611113003`. The comparison records ELF as a loss on preference
   correction history, ties on scoped personalization and delete audit, `not_tested`
   for local SDK export-style parity, `blocked` for OpenMemory UI/export, and
   `non_goal` for hosted Platform export and optional graph memory in the local OSS
diff --git a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
index 9812feae..7bcdef8d 100644
--- a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
+++ b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
@@ -608,7 +608,7 @@
       },
       "run": {
         "status": "pass",
-        "evidence": "Fresh scoped baseline run live-baseline-20260611111119 exercises local OSS mem0 with FastEmbed, Qdrant path storage, Memory.update, Memory.delete, Memory.history, Memory.get_all, entity filters, and cold-start reload; mem0 passed 8/8 encoded checks.",
+        "evidence": "Fresh scoped baseline run live-baseline-20260611113003 exercises local OSS mem0 with FastEmbed, Qdrant path storage, Memory.update, Memory.delete, Memory.history, Memory.get_all, entity filters, and cold-start reload; mem0 passed 8/8 encoded checks.",
         "command": "ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker",
         "artifact": "tmp/live-baseline/live-baseline-report.json"
       },
@@ -626,7 +626,7 @@
         {
           "capability": "same_corpus_retrieval",
           "status": "pass",
-          "evidence": "Fresh scoped baseline run live-baseline-20260611111119 reports mem0 retrieval_pass with 3/3 same-corpus retrieval checks."
+          "evidence": "Fresh scoped baseline run live-baseline-20260611113003 reports mem0 retrieval_pass with 3/3 same-corpus retrieval checks."
         },
         {
           "capability": "local_lifecycle_update_delete_reload",
@@ -636,7 +636,7 @@
         {
           "capability": "preference_correction_history",
           "status": "pass",
-          "evidence": "The fresh scoped run reports preference_correction_history as pass: Memory.history preserved ADD and UPDATE records with old and current preference text, and search returned only the current correction."
+          "evidence": "The fresh scoped run reports preference_correction_history as pass: Memory.history preserved explicit ADD and UPDATE records with old and current preference text, and search returned only the current correction."
         },
         {
           "capability": "entity_scoped_personalization",
@@ -708,7 +708,7 @@
           "status": "pass",
           "elf_position": "loses",
           "comparison_outcome": "loss",
-          "evidence": "Fresh scoped baseline run live-baseline-20260611111119 reports mem0 preference_correction_history as pass. ELF-side evidence comes from cargo make real-world-memory-live-adapters as summarized in docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md, which records ELF live memory-evolution preference as wrong_result. The current measured comparison is therefore an ELF loss on this history dimension until ELF temporal reconciliation is fixed.",
+          "evidence": "Fresh scoped baseline run live-baseline-20260611113003 reports mem0 preference_correction_history as pass. ELF-side evidence comes from cargo make real-world-memory-live-adapters as summarized in docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md, which records ELF live memory-evolution preference as wrong_result. The current measured comparison is therefore an ELF loss on this history dimension until ELF temporal reconciliation is fixed.",
           "command": "mem0: ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker; ELF: cargo make real-world-memory-live-adapters",
           "artifact": "mem0: tmp/live-baseline/mem0-checks.json; ELF: tmp/real-world-memory/live-adapters/ and docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md"
         },
@@ -718,7 +718,7 @@
           "status": "pass",
           "elf_position": "ties",
           "comparison_outcome": "tie",
-          "evidence": "Fresh scoped baseline run live-baseline-20260611111119 reports mem0 entity_scoped_personalization as pass. ELF-side evidence comes from cargo make real-world-memory-live-adapters as summarized in docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md, which records ELF and qmd passing the encoded personalization slice. This is a measured tie on the current scoped-preference surface.",
+          "evidence": "Fresh scoped baseline run live-baseline-20260611113003 reports mem0 entity_scoped_personalization as pass. ELF-side evidence comes from cargo make real-world-memory-live-adapters as summarized in docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md, which records ELF and qmd passing the encoded personalization slice. This is a measured tie on the current scoped-preference surface.",
           "command": "mem0: ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker; ELF: cargo make real-world-memory-live-adapters",
           "artifact": "mem0: tmp/live-baseline/mem0-checks.json; ELF: tmp/real-world-memory/live-adapters/ and docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md"
         },
@@ -728,7 +728,7 @@
           "status": "pass",
           "elf_position": "ties",
           "comparison_outcome": "tie",
-          "evidence": "Fresh scoped baseline run live-baseline-20260611111119 reports mem0 delete_history_audit_readback as pass. ELF-side evidence comes from cargo make real-world-memory-live-adapters as summarized in docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md, which records ELF passing the delete/TTL tombstone job. The current measured delete-audit comparison is a tie.",
+          "evidence": "Fresh scoped baseline run live-baseline-20260611113003 reports mem0 delete_history_audit_readback as pass. ELF-side evidence comes from cargo make real-world-memory-live-adapters as summarized in docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md, which records ELF passing the delete/TTL tombstone job. The current measured delete-audit comparison is a tie.",
           "command": "mem0: ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker; ELF: cargo make real-world-memory-live-adapters",
           "artifact": "mem0: tmp/live-baseline/mem0-checks.json; ELF: tmp/real-world-memory/live-adapters/ and docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md"
         },
@@ -738,7 +738,7 @@
           "status": "pass",
           "elf_position": "untested",
           "comparison_outcome": "not_tested",
-          "evidence": "Fresh scoped baseline run live-baseline-20260611111119 reports mem0 local_get_all_export_readback as pass. This is local SDK inspection/export-style readback, not OpenMemory UI evidence; ELF has no directly comparable live UI/export scoring row in this run.",
+          "evidence": "Fresh scoped baseline run live-baseline-20260611113003 reports mem0 local_get_all_export_readback as pass. This is local SDK inspection/export-style readback, not OpenMemory UI evidence; ELF has no directly comparable live UI/export scoring row in this run.",
           "command": "ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker",
           "artifact": "tmp/live-baseline/mem0-checks.json"
         },
diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
index 402fafff..b76a1ff2 100644
--- a/apps/elf-eval/tests/real_world_job_benchmark.rs
+++ b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -2326,6 +2326,12 @@ fn mem0_delete_audit_probe_requires_explicit_delete_history_event() -> Result<()
 
 	assert!(script.contains("def history_has_event"));
 	assert!(script.contains("str(entry.get(\"event\", \"\")).upper() == expected"));
+	assert!(script.contains(
+		"history_has_event(\n        preference_history[\"history\"],\n        \"ADD\","
+	));
+	assert!(script.contains(
+		"history_has_event(\n        preference_history[\"history\"],\n        \"UPDATE\","
+	));
 	assert!(
 		script.contains(
 			"history_has_event(\n        delete_history[\"history\"],\n        \"DELETE\","
diff --git a/docs/guide/benchmarking/2026-06-11-mem0-openmemory-history-ui-export-report.md b/docs/guide/benchmarking/2026-06-11-mem0-openmemory-history-ui-export-report.md
index 627465b2..91d5dc15 100644
--- a/docs/guide/benchmarking/2026-06-11-mem0-openmemory-history-ui-export-report.md
+++ b/docs/guide/benchmarking/2026-06-11-mem0-openmemory-history-ui-export-report.md
@@ -37,12 +37,12 @@ Platform export jobs, and does not enable optional graph memory.
 
 | Command | Result | Runtime | Artifact |
 | --- | --- | ---: | --- |
-| `ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker` | `pass`; mem0 `8/8` encoded checks pass | 35.50 seconds wall; 33 seconds project runtime | `tmp/live-baseline/live-baseline-report.json`, `tmp/live-baseline/mem0-checks.json` |
-| `cargo make real-world-memory` | `pass`; refreshed external adapter report published | 10.18 seconds | `tmp/real-world-memory/real-world-memory-report.json`, `tmp/real-world-memory/real-world-memory-report.md` |
+| `ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker` | `pass`; mem0 `8/8` encoded checks pass | 39.17 seconds wall; 36 seconds project runtime | `tmp/live-baseline/live-baseline-report.json`, `tmp/live-baseline/mem0-checks.json` |
+| `cargo make real-world-memory` | `pass`; refreshed external adapter report published | 8.88 seconds | `tmp/real-world-memory/real-world-memory-report.json`, `tmp/real-world-memory/real-world-memory-report.md` |
 
-Fresh mem0 run id: `live-baseline-20260611111119`.
+Fresh mem0 run id: `live-baseline-20260611113003`.
 
-Generated external adapter summary:
+Generated external adapter summary for all external adapter manifest rows:
 
 - Scenario statuses: `unsupported=2`, `blocked=2`, `wrong_result=1`,
   `lifecycle_fail=1`, `pass=9`, `not_encoded=3`.
@@ -50,12 +50,15 @@ Generated external adapter summary:
 - Normalized comparison outcomes: `win=2`, `tie=4`, `loss=1`,
   `not_tested=8`, `blocked=1`, `non_goal=2`.
 
+mem0/OpenMemory rows in this report contain eight scenarios: `loss=1`,
+`tie=3`, `not_tested=1`, `blocked=1`, and `non_goal=2`.
+
 ## Scenario Outcomes
 
 | Scenario | mem0/OpenMemory evidence | ELF comparison outcome | Status | Command | Artifact |
 | --- | --- | --- | --- | --- | --- |
 | Basic local lifecycle | mem0 passes same-corpus retrieval, update, delete, and cold-start reload in the prior first-generation baseline. | `tie` | `pass` | `ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker` | `tmp/live-baseline/live-baseline-report.json` |
-| Preference correction history | `Memory.history` preserves old and current preference records; search returns only the current correction. | `loss` | `pass` | mem0: `ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker`; ELF: `cargo make real-world-memory-live-adapters` | mem0: `tmp/live-baseline/mem0-checks.json`; ELF: `tmp/real-world-memory/live-adapters/`, `docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md` |
+| Preference correction history | `Memory.history` exposes explicit `ADD` and `UPDATE` preference records; search returns only the current correction. | `loss` | `pass` | mem0: `ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker`; ELF: `cargo make real-world-memory-live-adapters` | mem0: `tmp/live-baseline/mem0-checks.json`; ELF: `tmp/real-world-memory/live-adapters/`, `docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md` |
 | Entity-scoped personalization | `search()` with `user_id`, `agent_id`, and `run_id` filters returns the ELF-scoped preference and omits a PubFi-scoped preference. | `tie` | `pass` | mem0: `ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker`; ELF: `cargo make real-world-memory-live-adapters` | mem0: `tmp/live-baseline/mem0-checks.json`; ELF: `tmp/real-world-memory/live-adapters/`, `docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md` |
 | Delete audit readback | `Memory.history` exposes a `DELETE` event and post-delete search suppresses the deleted memory. | `tie` | `pass` | mem0: `ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker`; ELF: `cargo make real-world-memory-live-adapters` | mem0: `tmp/live-baseline/mem0-checks.json`; ELF: `tmp/real-world-memory/live-adapters/`, `docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md` |
 | Local SDK export-style readback | `Memory.get_all` returns the current scoped preference and omits the other scope. | `not_tested` | `pass` | `ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker` | `tmp/live-baseline/mem0-checks.json` |
@@ -81,6 +84,7 @@ The `preference_correction_history` check verifies all of:
 - history is available;
 - history contains the original preference;
 - history contains the corrected preference;
+- history contains explicit `ADD` and `UPDATE` events;
 - search contains the corrected preference;
 - search omits the old preference.
 
@@ -119,7 +123,7 @@ Allowed:
 
 - mem0/OpenMemory local OSS passes the new encoded history, correction,
   personalization, deletion-audit, and local `get_all` readback checks in run
-  `live-baseline-20260611111119`.
+  `live-baseline-20260611113003`.
 - ELF currently has a measured `loss` against mem0 on the preference correction
   history dimension because the June 11 temporal/history report records ELF's live
   memory-evolution preference job as `wrong_result`.
diff --git a/docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md b/docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md
index d0749918..c93ebea8 100644
--- a/docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md
+++ b/docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md
@@ -134,9 +134,9 @@ the right snippets.
 | --- | --- | --- | --- |
 | Basic local lifecycle | mem0 update/delete/reload | Fresh Docker baseline: ELF `8/8`, mem0 `4/4`, combined `12/12` | ELF ties or exceeds the encoded smoke surface, but does not beat OpenMemory UI/history/hosted claims. |
 | Retrieval/debug | qmd transparent CLI, expansion/fusion/rerank/replay ergonomics | ELF/qmd live adapters pass retrieval suites; previous qmd debug profile exists | ELF is not clearly stronger. qmd remains the debug-UX bar. |
-| Current-vs-historical memory | Graphiti/Zep temporal validity; mem0 history surfaces | ELF/qmd live memory-evolution wrong_result; Graphiti/Zep blocked; mem0 real-world history not encoded | ELF has a measured gap. It only narrowly beats qmd's current run. |
+| Current-vs-historical memory | Graphiti/Zep temporal validity; mem0 history surfaces | ELF/qmd live memory-evolution wrong_result; Graphiti/Zep blocked; mem0 local OSS preference correction history now passes, but mem0 real-world prompt history is not encoded | ELF has a measured gap. It only narrowly beats qmd's current run and loses the local OSS preference-correction history scenario to mem0. |
 | Delete/tombstone lifecycle | ELF production ops and qmd local replay | ELF passes delete/TTL job; qmd misses tombstone | ELF has a narrow measured win over qmd on this job. |
-| Entity preference history | mem0/OpenMemory | Only basic mem0 lifecycle smoke passed | Not comparable. Need mem0/OpenMemory history and UI/export benchmark. |
+| Entity preference history | mem0/OpenMemory | XY-924 local OSS run passes mem0 preference correction history and entity-scoped personalization; OpenMemory UI/export remains blocked | ELF loses the preference-correction history scenario and ties the scoped-personalization scenario; no OpenMemory UI/export claim is allowed. |
 | Core-vs-archival memory | Letta core memory blocks versus archival memory | Research-only, no contained live output | Not comparable. Borrow design only. |
 | Context trajectory | OpenViking staged context and hierarchy | Existing adapter remains not encoded or wrong_result for trajectory | Not comparable. Need staged trajectory benchmark. |
 | Capture and continuity | agentmemory, claude-mem hooks/viewers | Existing adapters are baseline-only and undermeasured | Not comparable. Need capture/write-policy and work-resume adapters. |
@@ -148,7 +148,7 @@ the right snippets.
 | Source | Best idea to absorb | Benchmark gate before any claim |
 | --- | --- | --- |
 | Graphiti/Zep | Validity windows, `valid_at`/`invalid_at`, current/historical/future fact separation, temporal relation provenance | Provider-backed Docker temporal smoke must map current, historical, and rationale facts to scored evidence ids. |
-| mem0/OpenMemory | Entity-scoped memory history, user-visible lifecycle inspection, update/delete ergonomics | mem0/OpenMemory adapter must score preference history, correction, deletion, and UI/export readback. |
+| mem0/OpenMemory | Entity-scoped memory history, user-visible lifecycle inspection, update/delete ergonomics | Local OSS history, correction, deletion, and SDK `get_all` readback are now scored; UI/export readback still needs a bounded OpenMemory runner. |
 | Letta | Always-loaded core memory blocks separated from archival search | Add core-vs-archival jobs for attachment scope, provenance, fallback, and stale-core avoidance. |
 | qmd | Local replay, candidate inspection, expansion/fusion/rerank debug knobs | ELF trace artifacts must show candidate generation, rerank, dropped evidence, conflict candidates, and replay commands. |
 | OpenViking | Staged context trajectory and hierarchy | Encode trajectory jobs after evidence-bearing same-corpus output passes. |
@@ -176,17 +176,19 @@ claim that ELF has solved temporal memory.
 
 ### P0 - mem0/OpenMemory History Comparison
 
-The fresh mem0 pass means the next useful comparison is no longer basic update/delete.
-It should move to the product behavior users actually care about:
+XY-924 moves the reproducible local OSS comparison past basic update/delete into
+the product behavior users actually care about:
 
 1. preference history across correction events;
 2. entity-scoped memory lookup and update;
-3. user-visible inspection/export of memory lifecycle;
+3. local SDK inspection/export-style readback of memory lifecycle;
 4. deletion versus historical audit readback;
 5. optional graph-memory behavior only if the OSS path is reproducible in Docker.
 
-Target benchmark: mem0/OpenMemory and ELF both run comparable history jobs; claims are
-made per scenario, not per project brand.
+Target benchmark status: local OSS history jobs are now encoded with per-scenario
+claims. OpenMemory UI/export readback remains blocked until a UI runner exists, and
+hosted Platform export plus optional graph memory remain non-goals for the local OSS
+lane.
 
 ### P0 - qmd-Level Debugging And Replay
 
diff --git a/docs/research/2026-06-11-competitor-strength-adoption-report.json b/docs/research/2026-06-11-competitor-strength-adoption-report.json
index 9226f5ca..11871923 100644
--- a/docs/research/2026-06-11-competitor-strength-adoption-report.json
+++ b/docs/research/2026-06-11-competitor-strength-adoption-report.json
@@ -12,7 +12,7 @@
       "Live temporal reconciliation remains wrong_result for five of six memory_evolution jobs.",
       "Private-corpus production quality is blocked until an operator-owned manifest exists.",
       "Credentialed provider production-ops gates are blocked until explicit provider setup exists.",
-      "Several competitor strengths remain not_tested: mem0/OpenMemory history/UI, OpenViking trajectory, Letta core-vs-archival memory, and graph/RAG navigation. The XY-923 follow-up now scores qmd immediate top-10/replay artifact ergonomics as stronger than ELF's default stress report, while expansion, fusion, rerank, and candidate-drop diagnosis remain untested."
+      "Several competitor strengths remain not_tested or blocked: OpenMemory UI/export, hosted mem0 Platform behavior, OpenViking trajectory, Letta core-vs-archival memory, and graph/RAG navigation. mem0 local OSS preference history is now measured separately and is an ELF loss on the current correction-history scenario. The XY-923 follow-up now scores qmd immediate top-10/replay artifact ergonomics as stronger than ELF's default stress report, while expansion, fusion, rerank, and candidate-drop diagnosis remain untested."
     ]
   },
   "evidence_class_terms": [
@@ -51,6 +51,11 @@
       "artifact": "docs/guide/benchmarking/2026-06-11-first-generation-oss-adapter-promotion-report.md",
       "claim": "mem0/OpenMemory and memsearch pass basic local baseline smokes; agentmemory remains lifecycle_fail and claude-mem remains wrong_result on same-corpus retrieval."
     },
+    {
+      "command": "ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker",
+      "artifact": "docs/guide/benchmarking/2026-06-11-mem0-openmemory-history-ui-export-report.md",
+      "claim": "mem0 local OSS passes preference correction history, entity-scoped personalization, local get_all export-style readback, and deletion audit history; OpenMemory UI/export remains blocked and hosted Platform export remains non-goal."
+    },
     {
       "command": "ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make graphiti-zep-docker-temporal-smoke",
       "artifact": "docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md",
@@ -142,8 +147,8 @@
       "scenario_id": "memory_evolution_temporal_history",
       "title": "Memory evolution and temporal history",
       "outcome": "loss",
-      "evidence_classes": ["fixture_backed", "live_real_world", "wrong_result", "blocked"],
-      "measured_claim": "ELF fixture memory_evolution passes, but live ELF passes only the delete/TTL job and reports five wrong_result jobs where evidence is retrieved but current-vs-historical state is not reconciled.",
+      "evidence_classes": ["fixture_backed", "live_real_world", "live_baseline_only", "wrong_result", "blocked"],
+      "measured_claim": "ELF fixture memory_evolution passes, but live ELF passes only the delete/TTL job and reports five wrong_result jobs where evidence is retrieved but current-vs-historical state is not reconciled. The mem0 local OSS preference-correction history scenario is now measured and is also an ELF loss.",
       "command_artifacts": [
         "docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md",
         "docs/research/2026-06-11-temporal-history-competitor-gap-report.json"
@@ -180,8 +185,8 @@
       "scenario_id": "operator_debugging_viewer_ux",
       "title": "Operator debugging/viewer UX",
       "outcome": "not_tested",
-      "evidence_classes": ["fixture_backed", "not_encoded", "research_gate"],
-      "measured_claim": "ELF fixture operator-debugging UX passes, but live trace/viewer scoring is not encoded and qmd/OpenMemory/claude-mem UX comparisons are unscored.",
+      "evidence_classes": ["fixture_backed", "live_baseline_only", "blocked", "not_encoded", "research_gate"],
+      "measured_claim": "ELF fixture operator-debugging UX passes. mem0 local SDK get_all readback is measured, but OpenMemory UI/export remains blocked and must not be inferred from SDK readback. Live trace/viewer scoring and qmd/OpenMemory/claude-mem UX comparisons remain unscored.",
       "command_artifacts": [
         "docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md",
         "docs/guide/benchmarking/2026-06-11-qmd-openviking-strength-profile-report.md"
@@ -232,13 +237,14 @@
       "scenario_id": "personalization_scoped_preferences",
       "title": "Personalization and scoped preferences",
       "outcome": "tie",
-      "evidence_classes": ["fixture_backed", "live_real_world", "not_encoded"],
-      "measured_claim": "ELF and qmd both pass the single encoded live personalization job. mem0/OpenMemory and Letta personalization/history are not encoded.",
+      "evidence_classes": ["fixture_backed", "live_real_world", "live_baseline_only", "not_encoded"],
+      "measured_claim": "ELF and qmd both pass the single encoded live personalization job. mem0 local OSS now passes entity-scoped personalization, so scoped preference behavior is a measured tie; preference correction history remains a separate ELF loss.",
       "command_artifacts": [
-        "docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md"
+        "docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md",
+        "docs/guide/benchmarking/2026-06-11-mem0-openmemory-history-ui-export-report.md"
       ],
-      "follow_up_issues": ["XY-924", "XY-927"],
-      "caveat": "The tie does not prove entity history, UI readback, or long-term preference evolution."
+      "follow_up_issues": ["XY-927"],
+      "caveat": "The tie is scoped to encoded personalization and local OSS entity filters; OpenMemory UI readback and long-term preference evolution remain separate surfaces."
     },
     {
       "scenario_id": "context_trajectory_hierarchical_retrieval",
@@ -294,8 +300,8 @@
     {
       "issue": "XY-924",
       "priority": "P0",
-      "state": "Backlog",
-      "gap": "mem0/OpenMemory history and UI-export comparison."
+      "state": "Encoded local OSS history; UI/export still gated",
+      "gap": "mem0/OpenMemory local OSS history and SDK export-style readback are measured; OpenMemory UI/export still needs a UI runner before any product-UX claim."
     },
     {
       "issue": "XY-925",
@@ -351,7 +357,7 @@
     "not_allowed": [
       "Do not claim ELF broadly beats qmd.",
       "Do not claim qmd's trace/replay artifact win is a broad qmd-over-ELF memory-system or retrieval-quality win.",
-      "Do not claim ELF beats mem0/OpenMemory on history, UI/export, hosted behavior, or graph memory.",
+      "Do not claim ELF beats mem0/OpenMemory on preference history, UI/export, hosted behavior, or graph memory. The local OSS correction-history scenario is currently an ELF loss, while OpenMemory UI/export, hosted behavior, and graph memory remain outside measured local OSS evidence.",
       "Do not claim ELF beats OpenViking on staged context trajectory.",
       "Do not claim ELF beats Letta on core-vs-archival memory.",
       "Do not claim graph/RAG parity from smoke-only evidence.",
diff --git a/docs/research/2026-06-11-temporal-history-competitor-gap-report.json b/docs/research/2026-06-11-temporal-history-competitor-gap-report.json
index fe95e723..d9129ec7 100644
--- a/docs/research/2026-06-11-temporal-history-competitor-gap-report.json
+++ b/docs/research/2026-06-11-temporal-history-competitor-gap-report.json
@@ -19,6 +19,13 @@
       "runtime_seconds": 50.14,
       "artifact": "tmp/live-baseline/live-baseline-report.json"
     },
+    {
+      "command": "ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker",
+      "status": "pass",
+      "runtime_seconds": 39.17,
+      "artifact": "tmp/live-baseline/mem0-checks.json",
+      "claim": "XY-924 local OSS mem0 history run passes preference correction history, entity-scoped personalization, local get_all readback, and deletion audit history while keeping OpenMemory UI/export blocked."
+    },
     {
       "command": "cargo make real-world-memory-evolution",
       "status": "pass",
@@ -99,7 +106,7 @@
         "not_measured": [
           "OpenMemory UI",
           "hosted ecosystem behavior",
-          "entity history quality",
+          "OpenMemory UI/export quality",
           "optional graph memory",
           "real-world memory_evolution jobs"
         ]
@@ -248,7 +255,7 @@
       "scenario": "basic_local_lifecycle",
       "current_judgment": "elf_and_mem0_both_pass_encoded_smoke",
       "claim_strength": "limited_tie_or_elf_broader_smoke_surface",
-      "next_gate": "mem0/OpenMemory history and UI/export readback benchmark"
+      "next_gate": "OpenMemory UI/export readback runner; hosted Platform export and optional graph memory remain non-goals for the local OSS lane"
     },
     {
       "scenario": "retrieval_debug",
@@ -291,8 +298,8 @@
     {
       "priority": "P0",
       "direction": "mem0_openmemory_history_comparison",
-      "description": "Move past basic update/delete smoke into preference history, entity memory, lifecycle inspection, deletion audit, and UI/export readback.",
-      "benchmark_gate": "Comparable ELF and mem0/OpenMemory history jobs with typed evidence classes."
+      "description": "Local OSS comparison has moved past basic update/delete smoke into preference history, entity memory, lifecycle inspection, deletion audit, and SDK export-style readback.",
+      "benchmark_gate": "Local OSS history jobs are encoded with per-scenario claims; OpenMemory UI/export still needs a bounded UI runner."
     },
     {
       "priority": "P0",
@@ -322,6 +329,7 @@
   "claim_boundaries": {
     "allowed": [
       "ELF+mem0 basic local lifecycle smoke passed in the fresh Docker baseline.",
+      "mem0 local OSS history, entity-scoped personalization, deletion audit, and SDK get_all readback are measured by the XY-924 report.",
       "ELF narrowly outperformed qmd on the fresh memory-evolution slice because ELF passed delete/TTL and qmd did not.",
       "ELF still failed five of six live memory-evolution jobs.",
       "Graphiti/Zep temporal smoke is typed blocked due missing explicit provider key.",
@@ -330,7 +338,7 @@
     "not_allowed": [
       "All goals are complete.",
       "ELF beats all tracked memory projects.",
-      "ELF beats mem0/OpenMemory on UI, hosted behavior, entity history, or graph memory.",
+      "ELF beats mem0/OpenMemory on preference history, UI/export, hosted behavior, or graph memory.",
       "ELF beats Graphiti/Zep on temporal validity.",
       "ELF beats Letta on core-vs-archival memory.",
       "Fixture pass, baseline smoke pass, and live real-world pass are interchangeable evidence classes."
@@ -338,7 +346,7 @@
   },
   "next_issue_directions": [
     "P0 ELF live temporal reconciliation and trace contract",
-    "P0 mem0/OpenMemory history and UI/export readback benchmark",
+    "P0 OpenMemory UI/export readback runner after the local OSS history benchmark",
     "P0 ELF/qmd trace-level replay and wrong-result diagnosis",
     "P1 Letta-style core-vs-archival memory benchmark",
     "P2 Graphiti/Zep provider-backed temporal smoke after explicit provider credentials exist",
diff --git a/scripts/live-baseline-benchmark.sh b/scripts/live-baseline-benchmark.sh
index d899677b..d1a65f31 100755
--- a/scripts/live-baseline-benchmark.sh
+++ b/scripts/live-baseline-benchmark.sh
@@ -2471,6 +2471,14 @@ else:
         preference_history["history"],
         ["concise", "evidence-linked"],
     )
+    history_has_add_event = preference_history["available"] and history_has_event(
+        preference_history["history"],
+        "ADD",
+    )
+    history_has_update_event = preference_history["available"] and history_has_event(
+        preference_history["history"],
+        "UPDATE",
+    )
     search_has_current = contains_terms(
         result_entries(preference_search),
         ["concise", "evidence-linked"],
@@ -2479,9 +2487,16 @@ else:
     if not preference_history["available"]:
         preference_status = "blocked"
         preference_reason = "Memory.history could not be read for the updated preference memory."
-    elif history_has_old and history_has_current and search_has_current and search_omits_old:
+    elif (
+        history_has_old
+        and history_has_current
+        and history_has_add_event
+        and history_has_update_event
+        and search_has_current
+        and search_omits_old
+    ):
         preference_status = "pass"
-        preference_reason = "mem0 history preserved the old and current preference while search returned only the current correction."
+        preference_reason = "mem0 history preserved ADD and UPDATE preference events while search returned only the current correction."
     else:
         preference_status = "lifecycle_fail"
         preference_reason = "mem0 did not expose a clean preference correction chain with current-only search readback."
@@ -2498,6 +2513,8 @@ else:
                 "history_error": preference_history["error"],
                 "history_has_old": history_has_old,
                 "history_has_current": history_has_current,
+                "history_has_add_event": history_has_add_event,
+                "history_has_update_event": history_has_update_event,
                 "search_has_current": search_has_current,
                 "search_omits_old": search_omits_old,
                 "history": preference_history["history"],