hack-ink · yvette-carlisle · Jun 10, 2026 · Jun 10, 2026
diff --git a/apps/elf-api/static/viewer.html b/apps/elf-api/static/viewer.html
@@ -1463,13 +1463,14 @@ <h2>Recent Traces</h2>
 			}
 			return section("Relation Context", [
 				table(
-					["Rank", "Scope", "Subject", "Predicate", "Object", "Evidence Notes"],
+					["Rank", "Scope", "Subject", "Predicate", "Object", "Temporal", "Evidence Notes"],
 					relations.map(({ item, context }) => [
 						item.rank,
 						context.scope,
 						getPath(context, ["subject", "canonical"]) || "none",
 						context.predicate,
 						getPath(context, ["object", "entity", "canonical"]) || getPath(context, ["object", "value"]) || "none",
+						context.temporal_status || "current",
 						(context.evidence_note_ids || []).join(", ")
 					])
 				)

diff --git a/...lation_temporal_validity_not_encoded.json → ...evolution/relation_temporal_validity.json b/...lation_temporal_validity_not_encoded.json → ...evolution/relation_temporal_validity.json
@@ -2,15 +2,8 @@
   "schema": "elf.real_world_job/v1",
   "job_id": "memory-evolution-relation-temporal-001",
   "suite": "memory_evolution",
-  "title": "Mark temporal relation validity as not encoded instead of faking a graph pass",
-  "encoding": {
-    "status": "not_encoded",
-    "reason": "ELF graph-lite currently returns bounded relation context, but this runner does not yet encode current-only versus historical temporal validity for relation facts.",
-    "follow_up": {
-      "title": "[ELF graph P1] Add temporal validity to graph-lite facts",
-      "reason": "Relation facts need valid_from and invalidated_at semantics before this job can claim a current-versus-historical graph pass."
-    }
-  },
+  "title": "Distinguish current and historical relation validity in graph-lite context",
+  "encoding": {},
   "corpus": {
     "corpus_id": "real-world-memory-evolution-2026-06-09",
     "profile": "synthetic",
@@ -23,7 +16,7 @@
           "schema": "source_ref/v1",
           "resolver": "real_world_job_fixture/v1",
           "ref": {
-            "fixture": "relation_temporal_validity_not_encoded",
+            "fixture": "relation_temporal_validity",
             "evidence_id": "relation-old-owner"
           }
         },
@@ -37,7 +30,7 @@
           "schema": "source_ref/v1",
           "resolver": "real_world_job_fixture/v1",
           "ref": {
-            "fixture": "relation_temporal_validity_not_encoded",
+            "fixture": "relation_temporal_validity",
             "evidence_id": "relation-current-owner"
           }
         },
@@ -51,13 +44,49 @@
           "schema": "source_ref/v1",
           "resolver": "real_world_job_fixture/v1",
           "ref": {
-            "fixture": "relation_temporal_validity_not_encoded",
+            "fixture": "relation_temporal_validity",
             "evidence_id": "relation-owner-rationale"
           }
         },
         "created_at": "2026-06-08T00:05:00Z"
       }
-    ]
+    ],
+    "adapter_response": {
+      "adapter_id": "fixture_memory_evolution",
+      "answer": {
+        "content": "Team Echo currently owns deployment method review. Team Delta owned deployment method review historically. The ownership moved after the single-user production runbook scope changed.",
+        "claims": [
+          {
+            "claim_id": "relation_current_owner",
+            "text": "Team Echo currently owns deployment method review.",
+            "evidence_ids": [
+              "relation-current-owner",
+              "relation-old-owner",
+              "relation-owner-rationale"
+            ],
+            "confidence": "high"
+          },
+          {
+            "claim_id": "relation_historical_owner",
+            "text": "Team Delta owned deployment method review historically.",
+            "evidence_ids": ["relation-old-owner"],
+            "confidence": "high"
+          },
+          {
+            "claim_id": "relation_owner_update_rationale",
+            "text": "Ownership moved after single-user production runbook scope changed.",
+            "evidence_ids": ["relation-owner-rationale"],
+            "confidence": "high"
+          }
+        ],
+        "evidence_ids": [
+          "relation-current-owner",
+          "relation-old-owner",
+          "relation-owner-rationale"
+        ]
+      },
+      "consolidation": null
+    }
   },
   "timeline": [
     {
@@ -101,7 +130,8 @@
         "relation-old-owner",
         "relation-owner-rationale"
       ],
-      "relation_historical_owner": ["relation-old-owner"]
+      "relation_historical_owner": ["relation-old-owner"],
+      "relation_owner_update_rationale": ["relation-owner-rationale"]
     },
     "answer_type": "direct_answer",
     "accepted_alternates": [],
@@ -160,9 +190,9 @@
     ]
   },
   "allowed_uncertainty": {
-    "can_answer_unknown": true,
-    "acceptable_phrases": ["Temporal relation validity is not encoded in this runner."],
-    "fallback_action": "state_blocker"
+    "can_answer_unknown": false,
+    "acceptable_phrases": [],
+    "fallback_action": "score_temporal_relation_behavior"
   },
   "memory_evolution": {
     "current_evidence_ids": ["relation-current-owner"],
@@ -180,20 +210,19 @@
     "update_rationale": {
       "claim_id": "relation_owner_update_rationale",
       "evidence_ids": ["relation-owner-rationale"],
-      "available": false
+      "available": true
     },
     "temporal_validity": {
       "required": true,
-      "encoded": false,
-      "follow_up": "[ELF graph P1] Add temporal validity to graph-lite facts"
+      "encoded": true
     }
   },
   "tags": [
     "synthetic",
     "memory_evolution",
     "reference_graphiti_zep_temporal",
     "reference_nanograph_typed_query",
-    "not_encoded",
+    "graph_temporal_encoded",
     "no_live_claim"
   ]
 }
diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -689,16 +689,16 @@ fn assert_root_knowledge_summary(report: &Value) {
 
 fn assert_root_aggregate_summary(report: &Value) {
 	assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(38));
-	assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(34));
+	assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(35));
 	assert_eq!(report.pointer("/summary/wrong_result").and_then(Value::as_u64), Some(0));
 	assert_eq!(report.pointer("/summary/incomplete").and_then(Value::as_u64), Some(1));
 	assert_eq!(report.pointer("/summary/blocked").and_then(Value::as_u64), Some(2));
-	assert_eq!(report.pointer("/summary/not_encoded").and_then(Value::as_u64), Some(1));
+	assert_eq!(report.pointer("/summary/not_encoded").and_then(Value::as_u64), Some(0));
 	assert_eq!(report.pointer("/summary/unsupported_claim_count").and_then(Value::as_u64), Some(0));
 	assert_eq!(report.pointer("/summary/wrong_result_count").and_then(Value::as_u64), Some(0));
 	assert_eq!(
 		report.pointer("/summary/expected_evidence_recall").and_then(Value::as_f64),
-		Some(0.973)
+		Some(1.0)
 	);
 	assert_eq!(
 		report.pointer("/summary/irrelevant_context_ratio").and_then(Value::as_f64),
@@ -708,15 +708,15 @@ fn assert_root_aggregate_summary(report: &Value) {
 	assert_eq!(report.pointer("/summary/stale_answer_count").and_then(Value::as_u64), Some(0));
 	assert_eq!(
 		report.pointer("/summary/conflict_detection_count").and_then(Value::as_u64),
-		Some(6)
+		Some(7)
 	);
 	assert_eq!(
 		report.pointer("/summary/update_rationale_available_count").and_then(Value::as_u64),
-		Some(9)
+		Some(10)
 	);
 	assert_eq!(
 		report.pointer("/summary/temporal_validity_not_encoded_count").and_then(Value::as_u64),
-		Some(1)
+		Some(0)
 	);
 	assert_eq!(report.pointer("/summary/redaction_leak_count").and_then(Value::as_u64), Some(0));
 	assert_eq!(report.pointer("/summary/scope_check_count").and_then(Value::as_u64), Some(2));
@@ -734,10 +734,10 @@ fn assert_root_aggregate_summary(report: &Value) {
 		report.pointer("/summary/evidence_required_count").and_then(Value::as_u64),
 		Some(82)
 	);
-	assert_eq!(report.pointer("/summary/evidence_covered_count").and_then(Value::as_u64), Some(80));
-	assert_eq!(report.pointer("/summary/evidence_coverage").and_then(Value::as_f64), Some(0.976));
-	assert_eq!(report.pointer("/summary/source_ref_coverage").and_then(Value::as_f64), Some(0.976));
-	assert_eq!(report.pointer("/summary/quote_coverage").and_then(Value::as_f64), Some(0.976));
+	assert_eq!(report.pointer("/summary/evidence_covered_count").and_then(Value::as_u64), Some(82));
+	assert_eq!(report.pointer("/summary/evidence_coverage").and_then(Value::as_f64), Some(1.0));
+	assert_eq!(report.pointer("/summary/source_ref_coverage").and_then(Value::as_f64), Some(1.0));
+	assert_eq!(report.pointer("/summary/quote_coverage").and_then(Value::as_f64), Some(1.0));
 	assert_eq!(
 		report.pointer("/summary/trace_explainability_count").and_then(Value::as_u64),
 		Some(1)
@@ -777,6 +777,7 @@ fn assert_root_aggregate_suites(report: &Value) -> Result<()> {
 		"consolidation",
 		"knowledge_compilation",
 		"operator_debugging_ux",
+		"memory_evolution",
 	] {
 		let suite = find_by_field(suites, "/suite_id", suite_id)?;
 
@@ -785,7 +786,7 @@ fn assert_root_aggregate_suites(report: &Value) -> Result<()> {
 
 	let memory_evolution = find_by_field(suites, "/suite_id", "memory_evolution")?;
 
-	assert_eq!(memory_evolution.pointer("/status").and_then(Value::as_str), Some("not_encoded"));
+	assert_eq!(memory_evolution.pointer("/status").and_then(Value::as_str), Some("pass"));
 
 	let project_decisions = find_by_field(suites, "/suite_id", "project_decisions")?;
 
@@ -812,6 +813,7 @@ fn assert_root_aggregate_jobs(report: &Value) -> Result<()> {
 	let rebuild = find_by_field(jobs, "/job_id", "trust-sot-rebuild-001")?;
 	let redaction = find_by_field(jobs, "/job_id", "capture-redaction-exclusion-001")?;
 	let personalization = find_by_field(jobs, "/job_id", "personalization-scoped-preference-001")?;
+	let relation_job = find_by_field(jobs, "/job_id", "memory-evolution-relation-temporal-001")?;
 	let stage_job = find_by_field(jobs, "/job_id", "operator-debug-stage-attribution-001")?;
 	let production_restore =
 		find_by_field(jobs, "/job_id", "production-ops-restore-cold-start-001")?;
@@ -825,6 +827,7 @@ fn assert_root_aggregate_jobs(report: &Value) -> Result<()> {
 	assert_eq!(personalization.pointer("/scope_check_count").and_then(Value::as_u64), Some(1));
 	assert_eq!(personalization.pointer("/scope_correct_count").and_then(Value::as_u64), Some(1));
 	assert_eq!(stage_job.pointer("/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(relation_job.pointer("/status").and_then(Value::as_str), Some("pass"));
 	assert_eq!(
 		stage_job.pointer("/trace_explainability/failure_stage").and_then(Value::as_str),
 		Some("rerank.score")
@@ -992,54 +995,51 @@ fn memory_evolution_fixtures_report_temporal_and_staleness_metrics() -> Result<(
 
 	assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(5));
 	assert_eq!(report.pointer("/summary/encoded_suite_count").and_then(Value::as_u64), Some(1));
-	assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(4));
-	assert_eq!(report.pointer("/summary/not_encoded").and_then(Value::as_u64), Some(1));
+	assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(5));
+	assert_eq!(report.pointer("/summary/not_encoded").and_then(Value::as_u64), Some(0));
 	assert_eq!(report.pointer("/summary/stale_answer_count").and_then(Value::as_u64), Some(0));
 	assert_eq!(
 		report.pointer("/summary/conflict_detection_count").and_then(Value::as_u64),
-		Some(4)
+		Some(5)
 	);
 	assert_eq!(
 		report.pointer("/summary/update_rationale_available_count").and_then(Value::as_u64),
-		Some(4)
+		Some(5)
 	);
 	assert_eq!(
 		report.pointer("/summary/temporal_validity_not_encoded_count").and_then(Value::as_u64),
-		Some(1)
+		Some(0)
 	);
 	assert_eq!(
 		report.pointer("/evolution/temporal_validity_not_encoded_count").and_then(Value::as_u64),
-		Some(1)
+		Some(0)
 	);
 
 	let suites = array_at(&report, "/suites")?;
 	let memory_evolution = find_by_field(suites, "/suite_id", "memory_evolution")?;
 
-	assert_eq!(memory_evolution.pointer("/status").and_then(Value::as_str), Some("not_encoded"));
+	assert_eq!(memory_evolution.pointer("/status").and_then(Value::as_str), Some("pass"));
 	assert_eq!(
 		memory_evolution.pointer("/temporal_validity_not_encoded_count").and_then(Value::as_u64),
-		Some(1)
+		Some(0)
 	);
 
 	let jobs = array_at(&report, "/jobs")?;
 	let relation_job = find_by_field(jobs, "/job_id", "memory-evolution-relation-temporal-001")?;
 
-	assert_eq!(relation_job.pointer("/status").and_then(Value::as_str), Some("not_encoded"));
+	assert_eq!(relation_job.pointer("/status").and_then(Value::as_str), Some("pass"));
 	assert_eq!(
 		relation_job.pointer("/evolution/temporal_validity_not_encoded").and_then(Value::as_bool),
+		Some(false)
+	);
+	assert_eq!(
+		relation_job.pointer("/evolution/temporal_validity_encoded").and_then(Value::as_bool),
 		Some(true)
 	);
 
 	let follow_ups = array_at(&report, "/follow_ups")?;
 
-	assert_eq!(follow_ups.len(), 1);
-	assert_eq!(
-		follow_ups
-			.first()
-			.and_then(|follow_up| follow_up.pointer("/title"))
-			.and_then(Value::as_str),
-		Some("[ELF graph P1] Add temporal validity to graph-lite facts")
-	);
+	assert!(follow_ups.is_empty());
 
 	Ok(())
 }
@@ -1163,8 +1163,9 @@ fn memory_evolution_report_renders_markdown_counters() -> Result<()> {
 	let markdown = fs::read_to_string(markdown_path)?;
 
 	assert!(markdown.contains("## Memory Evolution"));
-	assert!(markdown.contains("Temporal validity not encoded: `1`"));
-	assert!(markdown.contains("[ELF graph P1] Add temporal validity to graph-lite facts"));
+	assert!(markdown.contains("Temporal validity not encoded: `0`"));
+	assert!(markdown.contains("| memory_evolution | memory-evolution-relation-temporal-001"));
+	assert!(markdown.contains("`encoded`"));
 
 	Ok(())
 }

diff --git a/docs/guide/benchmarking/live_baseline_benchmark.md b/docs/guide/benchmarking/live_baseline_benchmark.md
@@ -353,8 +353,8 @@ cargo make real-world-memory-evolution
 
 It lives under `apps/elf-eval/fixtures/real_world_memory/evolution/` and reports
 stale-answer count, conflict detection count, update rationale availability, temporal
-validity gaps, and unsupported claims. Its relation-temporal fixture is deliberately
-`not_encoded` until graph-lite temporal validity is implemented.
+validity encoding, and unsupported claims. Its relation-temporal fixture is encoded as
+a normal pass/fail check for current versus historical graph-lite relation context.
 
 To run the checked-in retrieval-quality real-world fixtures:
 

diff --git a/docs/guide/benchmarking/real_world_agent_memory_benchmark.md b/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
@@ -166,7 +166,7 @@ including the retrieval-quality slice below. The suite currently encodes:
 
 The generated report includes evidence coverage, source-ref coverage, quote coverage,
 unsupported-claim count, stale retrieval count, stale-answer count, conflict detection
-count, update rationale availability, temporal validity `not_encoded` count, scope
+count, update rationale availability, temporal validity encoding count, scope
 correctness, redaction leak count, capture/integration behavior classes, Qdrant
 rebuild case/pass counts, expected evidence recall, irrelevant context ratio,
 latency/cost, answer-type plus caveat/refusal/uncertainty flags, and trace
@@ -262,8 +262,8 @@ tmp/real-world-memory/evolution-report.md
 
 This parses `apps/elf-eval/fixtures/real_world_memory/evolution/` and reports only
 the cases added for current-versus-historical interpretation and temporal staleness.
-The relation temporal-validity fixture is deliberately `not_encoded` and declares the
-graph follow-up instead of claiming a fake graph pass.
+The relation temporal-validity fixture is encoded and scores current owner,
+historical owner, update rationale, and stale-owner trap behavior.
 
 Current checked-in retrieval-quality increment:
 

diff --git a/docs/guide/benchmarking/real_world_memory_evolution.md b/docs/guide/benchmarking/real_world_memory_evolution.md
@@ -2,7 +2,7 @@
 
 Goal: Run and interpret the checked-in memory evolution real-world job fixtures.
 Read this when: You need to test current facts, historical facts, stale facts,
-conflicts, corrected memories, and temporal validity limitations.
+conflicts, corrected memories, and temporal relation validity.
 Inputs: `apps/elf-eval/fixtures/real_world_memory/evolution/`,
 `apps/elf-eval/src/bin/real_world_job_benchmark.rs`, and `Makefile.toml`.
 Depends on: `docs/spec/real_world_agent_memory_benchmark_v1.md`,
@@ -23,13 +23,12 @@ The checked-in fixture set covers:
 - Issue state evolution from blocked to done.
 - Production deployment guidance superseding a local smoke quickstart.
 - Benchmark adoption verdict reversal with a bounded private-corpus caveat.
-- Relation fact current-versus-historical ownership, encoded as `not_encoded`
-  because temporal graph validity is not yet implemented in the runner.
+- Relation fact current-versus-historical ownership with graph-lite temporal
+  validity encoded as a normal pass/fail fixture.
 
 The relation case borrows from Graphiti/Zep temporal validity and nanograph typed
-query ergonomics. It intentionally does not fake a pass for graph temporal behavior.
-The report declares the follow-up `[ELF graph P1] Add temporal validity to graph-lite
-facts`.
+query ergonomics while preserving ELF's Postgres source-of-truth and evidence-link
+requirements.
 
 ## Run
 
@@ -55,10 +54,11 @@ The runner reports memory evolution counters at summary, suite, and job levels:
 - `update_rationale_available_count`: jobs where the produced answer cites the
   update rationale.
 - `temporal_validity_not_encoded_count`: jobs that require temporal graph validity
-  but are deliberately declared `not_encoded`.
+  but are deliberately declared `not_encoded`; this should be `0` for the checked-in
+  evolution fixture set.
 - `unsupported_claim_count`: existing real-world job unsupported claim counter.
 
 Runnable jobs should have `stale_answer_count = 0`, nonzero conflict detection, and
-an update rationale when the fixture provides one. A temporal validity gap should
-remain `not_encoded` until graph-lite facts can model current-only and historical
-relation validity.
+an update rationale when the fixture provides one. The relation temporal-validity job
+should report temporal validity as encoded and pass only when current and historical
+relation evidence are distinguished.