hack-ink · yvette-carlisle · Jun 16, 2026 · Jun 16, 2026
diff --git a/README.md b/README.md
@@ -167,12 +167,20 @@ provider-backed ELF evidence was required.
   targeted `work_resume`, `retrieval`, and `project_decisions` slice passing, but the
   full sweep is not a full-suite pass. ELF now live-scores capture/write-policy,
   consolidation proposal review, knowledge-page rebuild/lint, and operator-debugging
-  fixtures. The remaining ELF non-pass boundaries are memory-evolution wrong results,
-  production-ops operator boundaries, the core/archival live adapter gap, and blocked
-  context-trajectory measurement. qmd remains the local retrieval-debug UX reference;
+  fixtures. The remaining ELF non-pass boundaries are production-ops operator
+  boundaries, the core/archival live adapter gap, and blocked context-trajectory
+  measurement. qmd remains the local retrieval-debug UX reference;
   it keeps consolidation, knowledge, capture, and core/archival typed non-pass states
   and is `wrong_result` for operator-debug trace hydration, so no broad ELF-over-qmd
   claim is allowed.
+- Live temporal reconciliation after XY-905: `cargo make real-world-memory-live-adapters`
+  now reports ELF live `memory_evolution` as 6/6 pass, score mean `1.000`,
+  conflict detection count `5`, update rationale count `6`, and zero
+  selected-but-not-narrated conflict evidence. The report adds current, historical,
+  rationale, tombstone, invalidation, selected, dropped, and lifecycle-demoted
+  evidence fields. qmd remains `wrong_result` on the same slice, but this is not a
+  broad qmd, Graphiti/Zep, mem0/OpenMemory, Letta, hosted-memory, or private-corpus
+  superiority claim.
 - Live operator-debugging slice after XY-932: `cargo make
   real-world-job-operator-ux-live-adapters` emits narrow Docker-isolated
   `live_real_world` records for ELF and qmd over the operator-debugging fixtures.
@@ -248,6 +256,7 @@ Detailed evidence and interpretation:
 - [mem0/OpenMemory History and UI Export Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-mem0-openmemory-history-ui-export-report.md)
 - [Capture/Write-Policy Live Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-capture-write-policy-live-report.md)
 - [First-Generation OSS Continuity and Source-Store Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-first-generation-oss-continuity-source-store-report.md)
+- [Live Temporal Reconciliation Report - June 16, 2026](docs/guide/benchmarking/2026-06-16-live-temporal-reconciliation-report.md)
 - [Live Baseline Benchmark Runbook](docs/guide/benchmarking/live_baseline_benchmark.md)
 - [Single-User Production Runbook](docs/guide/single_user_production.md)
 - Benchmark contract:
@@ -260,7 +269,7 @@ Detailed evidence and interpretation:
   live sweep, but that sweep still contains typed non-pass states and is not
   full-suite parity.
 
-Evidence-backed position after the June 11 real-world reports:
+Evidence-backed position after the June 16 temporal reconciliation report:
 
 - ELF is better evidenced than the tested alternatives on evidence-bound writes,
   deterministic ingestion boundaries, Postgres source-of-truth plus rebuildable Qdrant
@@ -327,6 +336,7 @@ Detailed comparison, mechanism-level analysis, and source map:
 - [mem0/OpenMemory History and UI Export Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-mem0-openmemory-history-ui-export-report.md)
 - [Capture/Write-Policy Live Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-capture-write-policy-live-report.md)
 - [First-Generation OSS Continuity and Source-Store Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-first-generation-oss-continuity-source-store-report.md)
+- [Live Temporal Reconciliation Report - June 16, 2026](docs/guide/benchmarking/2026-06-16-live-temporal-reconciliation-report.md)
 - [Live Baseline Benchmark Runbook](docs/guide/benchmarking/live_baseline_benchmark.md)
 - [Real-World Agent Memory Benchmark](docs/guide/benchmarking/real_world_agent_memory_benchmark.md)
 - [External Memory Improvement Plan](docs/guide/research/external_memory_improvement_plan.md)
@@ -336,7 +346,7 @@ Detailed comparison, mechanism-level analysis, and source map:
 - [Real-World Benchmark Dimension Research Run](docs/research/2026-06-09-xy-841-external-memory-benchmark-dimensions.json)
 - [RAG/Graph Adapter Feasibility Research Run](docs/research/2026-06-10-xy-882-rag-graph-adapter-feasibility.json)
 
-Latest real-world benchmark report: June 11, 2026. Latest external research refresh:
+Latest real-world benchmark report: June 16, 2026. Latest external research refresh:
 June 11, 2026.
 
 ## Documentation

diff --git a/apps/elf-eval/fixtures/real_world_memory/memory_evolution/delete_ttl_staleness.json b/apps/elf-eval/fixtures/real_world_memory/memory_evolution/delete_ttl_staleness.json
@@ -196,5 +196,23 @@
     "acceptable_phrases": [],
     "fallback_action": "state_blocker"
   },
+  "memory_evolution": {
+    "current_evidence_ids": ["current-benchmark-plan"],
+    "historical_evidence_ids": [],
+    "tombstone_evidence_ids": ["delete-tombstone"],
+    "invalidation_evidence_ids": ["delete-tombstone"],
+    "stale_trap_ids": ["stale-deleted-plan"],
+    "conflicts": [],
+    "update_rationale": {
+      "claim_id": "deleted_fact_suppressed",
+      "evidence_ids": ["delete-tombstone"],
+      "available": true
+    },
+    "temporal_validity": {
+      "required": false,
+      "encoded": false,
+      "follow_up": null
+    }
+  },
   "tags": ["synthetic", "ttl", "delete", "stale_fact", "no_live_claim"]
 }
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark.rs b/apps/elf-eval/src/bin/real_world_job_benchmark.rs
@@ -311,6 +311,10 @@ struct MemoryEvolution {
 	#[serde(default)]
 	historical_evidence_ids: Vec<String>,
 	#[serde(default)]
+	tombstone_evidence_ids: Vec<String>,
+	#[serde(default)]
+	invalidation_evidence_ids: Vec<String>,
+	#[serde(default)]
 	stale_trap_ids: Vec<String>,
 	#[serde(default)]
 	conflicts: Vec<EvolutionConflict>,
@@ -1170,6 +1174,16 @@ struct EvolutionSummary {
 struct EvolutionJobReport {
 	current_evidence: Vec<String>,
 	historical_evidence: Vec<String>,
+	tombstone_evidence: Vec<String>,
+	invalidation_evidence: Vec<String>,
+	selected_current_evidence: Vec<String>,
+	selected_historical_evidence: Vec<String>,
+	selected_rationale_evidence: Vec<String>,
+	selected_tombstone_evidence: Vec<String>,
+	selected_invalidation_evidence: Vec<String>,
+	conflict_candidate_evidence: Vec<String>,
+	retrieved_but_dropped_evidence: Vec<String>,
+	selected_but_not_narrated_evidence: Vec<String>,
 	stale_trap_ids_used: Vec<String>,
 	stale_answer_count: usize,
 	conflict_count: usize,
@@ -1858,8 +1872,12 @@ fn validate_memory_evolution(job: &RealWorldJob, path: &Path) -> Result<()> {
 	let trap_ids =
 		job.negative_traps.iter().map(|trap| trap.trap_id.as_str()).collect::<BTreeSet<_>>();
 
-	for evidence_id in
-		evolution.current_evidence_ids.iter().chain(evolution.historical_evidence_ids.iter())
+	for evidence_id in evolution
+		.current_evidence_ids
+		.iter()
+		.chain(evolution.historical_evidence_ids.iter())
+		.chain(evolution.tombstone_evidence_ids.iter())
+		.chain(evolution.invalidation_evidence_ids.iter())
 	{
 		ensure_known_evidence(path, &evidence_ids, evidence_id)?;
 	}
@@ -2381,6 +2399,7 @@ fn evolution_job_report(
 	forbidden_claim_count: usize,
 ) -> Option<EvolutionJobReport> {
 	let evolution = job.memory_evolution.as_ref()?;
+	let produced = produced_evidence_ids(answer);
 	let stale_trap_ids_used = stale_trap_ids_used(job, evolution, trap_ids_used);
 	let stale_answer_count =
 		stale_answer_count(job, evolution, &stale_trap_ids_used, forbidden_claim_count);
@@ -2417,6 +2436,28 @@ fn evolution_job_report(
 	Some(EvolutionJobReport {
 		current_evidence: evolution.current_evidence_ids.clone(),
 		historical_evidence: evolution.historical_evidence_ids.clone(),
+		tombstone_evidence: evolution.tombstone_evidence_ids.clone(),
+		invalidation_evidence: evolution.invalidation_evidence_ids.clone(),
+		selected_current_evidence: selected_evolution_evidence(
+			&evolution.current_evidence_ids,
+			&produced,
+		),
+		selected_historical_evidence: selected_evolution_evidence(
+			&evolution.historical_evidence_ids,
+			&produced,
+		),
+		selected_rationale_evidence: selected_rationale_evidence(evolution, &produced),
+		selected_tombstone_evidence: selected_evolution_evidence(
+			&evolution.tombstone_evidence_ids,
+			&produced,
+		),
+		selected_invalidation_evidence: selected_evolution_evidence(
+			&evolution.invalidation_evidence_ids,
+			&produced,
+		),
+		conflict_candidate_evidence: selected_conflict_candidate_evidence(evolution, &produced),
+		retrieved_but_dropped_evidence: trace_dropped_evidence(answer),
+		selected_but_not_narrated_evidence: selected_but_not_narrated_evidence(answer),
 		stale_answer_count,
 		stale_trap_ids_used,
 		conflict_count: evolution.conflicts.len(),
@@ -2448,6 +2489,77 @@ fn stale_answer_count(
 	stale_trap_ids_used.len().max(stale_forbidden_claims)
 }
 
+fn selected_evolution_evidence(
+	evidence_ids: &[String],
+	produced: &BTreeSet<String>,
+) -> Vec<String> {
+	evidence_ids.iter().filter(|evidence_id| produced.contains(*evidence_id)).cloned().collect()
+}
+
+fn selected_rationale_evidence(
+	evolution: &MemoryEvolution,
+	produced: &BTreeSet<String>,
+) -> Vec<String> {
+	evolution.update_rationale.as_ref().map_or_else(Vec::new, |rationale| {
+		selected_evolution_evidence(&rationale.evidence_ids, produced)
+	})
+}
+
+fn selected_conflict_candidate_evidence(
+	evolution: &MemoryEvolution,
+	produced: &BTreeSet<String>,
+) -> Vec<String> {
+	let mut evidence_ids = Vec::new();
+
+	for conflict in &evolution.conflicts {
+		push_if_produced(&mut evidence_ids, conflict.current_evidence_id.as_str(), produced);
+		push_if_produced(&mut evidence_ids, conflict.historical_evidence_id.as_str(), produced);
+
+		if let Some(evidence_id) = &conflict.resolved_by_evidence_id {
+			push_if_produced(&mut evidence_ids, evidence_id.as_str(), produced);
+		}
+	}
+
+	evidence_ids
+}
+
+fn push_if_produced(out: &mut Vec<String>, evidence_id: &str, produced: &BTreeSet<String>) {
+	if produced.contains(evidence_id) && !out.iter().any(|id| id == evidence_id) {
+		out.push(evidence_id.to_string());
+	}
+}
+
+fn trace_dropped_evidence(answer: &ProducedAnswer) -> Vec<String> {
+	let mut evidence = Vec::new();
+
+	if let Some(trace) = &answer.trace_explainability {
+		for stage in &trace.stages {
+			for evidence_id in &stage.dropped_evidence {
+				if !evidence.iter().any(|id| id == evidence_id) {
+					evidence.push(evidence_id.clone());
+				}
+			}
+		}
+	}
+
+	evidence
+}
+
+fn selected_but_not_narrated_evidence(answer: &ProducedAnswer) -> Vec<String> {
+	let narrated = answer
+		.claims
+		.iter()
+		.flat_map(|claim| claim.evidence_ids.iter().map(String::as_str))
+		.collect::<BTreeSet<_>>();
+
+	answer
+		.evidence_ids
+		.iter()
+		.filter(|evidence_id| !narrated.contains(evidence_id.as_str()))
+		.cloned()
+		.collect()
+}
+
 fn stale_trap_ids_used(
 	job: &RealWorldJob,
 	evolution: &MemoryEvolution,
@@ -4831,20 +4943,44 @@ fn render_markdown_evolution(out: &mut String, report: &RealWorldReport) {
 		"- History readback encoded: `{}`\n\n",
 		report.evolution.history_readback_encoded_count
 	));
-	out.push_str("| Suite | Job | Current Evidence | Historical Evidence | Stale Traps Used | Conflict Count | Detected | Update Rationale | Temporal Validity | History Readback | Follow-up |\n");
-	out.push_str("| --- | --- | --- | --- | --- | ---: | ---: | --- | --- | --- | --- |\n");
+	out.push_str("| Suite | Job | Current Evidence | Historical Evidence | Tombstone/Invalidation | Selected Current | Selected Historical | Selected Rationale | Selected Tombstone/Invalidation | Selected But Not Narrated | Stale Traps Used | Conflict Count | Detected | Update Rationale | Temporal Validity | History Readback | Follow-up |\n");
+	out.push_str("| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | ---: | ---: | --- | --- | --- | --- |\n");
 
 	for job in &report.jobs {
 		let Some(evolution) = &job.evolution else {
 			continue;
 		};
 
 		out.push_str(&format!(
-			"| {} | {} | `{}` | `{}` | `{}` | {} | {} | `{}` | `{}` | `{}` | {} |\n",
+			"| {} | {} | `{}` | `{}` | `{}` | `{}` | `{}` | `{}` | `{}` | `{}` | `{}` | {} | {} | `{}` | `{}` | `{}` | {} |\n",
 			md_cell(job.suite_id.as_str()),
 			md_cell(job.job_id.as_str()),
 			md_inline(evolution.current_evidence.join(", ").as_str()),
 			md_inline(evolution.historical_evidence.join(", ").as_str()),
+			md_inline(
+				evolution
+					.tombstone_evidence
+					.iter()
+					.chain(evolution.invalidation_evidence.iter())
+					.cloned()
+					.collect::<Vec<_>>()
+					.join(", ")
+					.as_str()
+			),
+			md_inline(evolution.selected_current_evidence.join(", ").as_str()),
+			md_inline(evolution.selected_historical_evidence.join(", ").as_str()),
+			md_inline(evolution.selected_rationale_evidence.join(", ").as_str()),
+			md_inline(
+				evolution
+					.selected_tombstone_evidence
+					.iter()
+					.chain(evolution.selected_invalidation_evidence.iter())
+					.cloned()
+					.collect::<Vec<_>>()
+					.join(", ")
+					.as_str()
+			),
+			md_inline(evolution.selected_but_not_narrated_evidence.join(", ").as_str()),
 			md_inline(evolution.stale_trap_ids_used.join(", ").as_str()),
 			evolution.conflict_count,
 			evolution.conflict_detection_count,