diff --git a/README.md b/README.md
index 564a3be7..dde8c179 100644
--- a/README.md
+++ b/README.md
@@ -141,10 +141,11 @@ with the production embedding provider path, `Qwen3-Embedding-8B`, and
   search recovered the restored note.
 - Fresh all-project smoke run: ELF and qmd passed every encoded check. agentmemory
   passed same-corpus retrieval but failed lifecycle/cold-start coverage. memsearch,
-  mem0, OpenViking, and claude-mem remained `incomplete` or wrong-result typed states;
-  those states are reported as limitations, not hidden as proof.
+  mem0, OpenViking, and claude-mem remained typed non-pass states. OpenViking now
+  reaches its pinned Docker local embedding path and is reported as `wrong_result`
+  when same-corpus evidence terms are missed; setup failures remain `incomplete`.
 - Real-world agent memory aggregate after the P1 benchmark batch: 38 fixture-backed
-  jobs across 11 suites, 35 pass, 1 incomplete, 2 blocked, 0 wrong-result,
+  jobs across 11 suites, 36 pass, 0 incomplete, 2 blocked, 0 wrong-result,
   0 not-encoded, and 0 unsupported-claim results. The remaining non-pass jobs are
   production-ops operator boundaries, not hidden benchmark wins.
 - Full-suite live real-world adapter sweep after XY-880: ELF and qmd now emit
@@ -157,8 +158,8 @@ with the production embedding provider path, `Qwen3-Embedding-8B`, and
   manifest now includes `research_gate` records for RAGFlow, LightRAG, GraphRAG,
   Graphiti/Zep, Letta, LangGraph, nanograph, llm-wiki, gbrain, graphify, and deeper
   qmd/OpenViking profiles. These records carry source/setup/runtime/resource/retry
-  metadata and typed `blocked`, `incomplete`, or `not_encoded` states; they are not
-  fixture-backed or live adapter pass evidence.
+  metadata and typed `blocked`, `incomplete`, `wrong_result`, or `not_encoded` states;
+  they are not fixture-backed or live adapter pass evidence.
 - The benchmark runner and report publisher are checked in and Docker-isolated:
   `cargo make baseline-live-docker`, `cargo make baseline-backfill-docker`,
   `cargo make baseline-production-private-addendum`,
diff --git a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
index 0c0c0a69..e49d67ae 100644
--- a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
+++ b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
@@ -20,7 +20,7 @@
       "evidence_class": "fixture_backed",
       "docker_default": true,
       "host_global_installs_required": false,
-      "overall_status": "incomplete",
+      "overall_status": "blocked",
       "setup": {
         "status": "pass",
         "evidence": "The checked-in real_world_memory fixtures parse and score through the ELF fixture runner.",
@@ -28,13 +28,13 @@
         "artifact": "tmp/real-world-memory/real-world-memory-report.json"
       },
       "run": {
-        "status": "incomplete",
-        "evidence": "The current fixture set reports 38 jobs, 35 pass, 1 incomplete, 2 blocked, 0 wrong_result, 0 not_encoded, and 0 unsupported_claim.",
+        "status": "blocked",
+        "evidence": "The current fixture set reports 38 jobs, 36 pass, 0 incomplete, 2 blocked, 0 wrong_result, 0 not_encoded, and 0 unsupported_claim.",
         "command": "cargo make real-world-memory",
         "artifact": "tmp/real-world-memory/real-world-memory-report.json"
       },
       "result": {
-        "status": "incomplete",
+        "status": "blocked",
         "evidence": "This is fixture-backed ELF scoring, not a live external adapter result.",
         "artifact": "tmp/real-world-memory/real-world-memory-report.md"
       },
@@ -103,8 +103,8 @@
         },
         {
           "suite_id": "production_ops",
-          "status": "incomplete",
-          "evidence": "Production-ops fixtures encode restore, Qdrant rebuild, backfill resume, resource-envelope interpretation, plus typed incomplete and blocked operator boundaries."
+          "status": "blocked",
+          "evidence": "Production-ops fixtures encode restore, Qdrant rebuild, backfill resume, resource-envelope interpretation, OpenViking wrong-result classification, plus typed blocked operator boundaries."
         },
         {
           "suite_id": "personalization",
@@ -126,7 +126,7 @@
       ],
       "notes": [
         "This adapter record exists to keep ELF fixture results separate from live external adapter results.",
-        "The remaining non-pass ELF fixture states are production-ops operator boundaries: a Docker local-embedding dependency, provider credentials, and an operator-owned private corpus manifest.",
+        "The remaining non-pass ELF fixture states are production-ops operator boundaries: provider credentials and an operator-owned private corpus manifest.",
         "Use elf_live_real_world for service-runtime real_world_job evidence; this fixture-backed record must not imply live-service behavior."
       ]
     },
@@ -714,28 +714,33 @@
       "evidence_class": "live_baseline_only",
       "docker_default": true,
       "host_global_installs_required": false,
-      "overall_status": "incomplete",
+      "overall_status": "wrong_result",
       "setup": {
-        "status": "incomplete",
-        "evidence": "OpenViking local-embed setup can fail in Docker while building or importing local embedding dependencies.",
+        "status": "pass",
+        "evidence": "OpenViking local-embed setup installed and imported pinned llama-cpp-python==0.3.28 from the CPU wheel index in Docker.",
         "command": "ELF_BASELINE_PROJECTS=OpenViking cargo make baseline-live-docker",
         "artifact": "tmp/live-baseline/OpenViking.log"
       },
       "run": {
-        "status": "incomplete",
-        "evidence": "The adapter cannot reliably reach same-corpus add_resource/find behavior until local embedding setup is pinned for Docker.",
+        "status": "wrong_result",
+        "evidence": "The adapter reached same-corpus add_resource/find, but returned 0 of 3 expected evidence-term matches in the smoke run.",
         "artifact": "tmp/live-baseline/live-baseline-report.json"
       },
       "result": {
-        "status": "incomplete",
-        "evidence": "No real_world_job OpenViking adapter is encoded; current blocker is dependency setup, not a quality claim.",
+        "status": "wrong_result",
+        "evidence": "The current OpenViking Docker evidence is a behavioral wrong_result, not a local embedding setup blocker and not a real_world_job pass.",
         "artifact": "docs/guide/benchmarking/live_baseline_benchmark.md"
       },
       "capabilities": [
         {
           "capability": "local_embed_setup",
-          "status": "incomplete",
-          "evidence": "Docker local embedding dependency setup is not reliable in the current adapter."
+          "status": "pass",
+          "evidence": "Docker local embedding dependency setup is pinned to llama-cpp-python==0.3.28 from https://abetlen.github.io/llama-cpp-python/whl/cpu and reached import/runtime in the smoke run."
+        },
+        {
+          "capability": "same_corpus_retrieval",
+          "status": "wrong_result",
+          "evidence": "OpenViking add_resource/find returned resources but missed expected evidence-term matches for every smoke query."
         },
         {
           "capability": "context_trajectory",
@@ -751,8 +756,8 @@
       "suites": [
         {
           "suite_id": "retrieval",
-          "status": "incomplete",
-          "evidence": "The local embedding install blocker prevents a fair retrieval job run."
+          "status": "wrong_result",
+          "evidence": "The Docker-local setup reached add_resource/find, but the retrieval check returned 0/3 expected evidence-term matches."
         },
         {
           "suite_id": "work_resume",
@@ -769,15 +774,37 @@
         {
           "kind": "runner",
           "ref": "scripts/live-baseline-benchmark.sh",
-          "status": "incomplete"
+          "status": "wrong_result"
         }
       ],
+      "execution_metadata": {
+        "sources": [
+          {
+            "label": "OpenViking repository",
+            "url": "https://github.com/volcengine/OpenViking/",
+            "evidence": "Official source for OpenViking local context database, resource, and retrieval APIs."
+          },
+          {
+            "label": "llama-cpp-python CPU wheel index",
+            "url": "https://abetlen.github.io/llama-cpp-python/whl/cpu",
+            "evidence": "Official prebuilt CPU wheel index used by the Docker-local embedding pin."
+          }
+        ],
+        "setup_path": "Run ELF_BASELINE_PROJECTS=OpenViking cargo make baseline-live-docker. The runner installs llama-cpp-python==0.3.28 with --only-binary llama-cpp-python from the CPU wheel index before OpenViking add_resource/find.",
+        "runtime_boundary": "docker-compose.baseline.yml baseline-runner container; no host-global OpenViking, llama-cpp-python, or model service install is required.",
+        "resource_expectation": "Local embedding setup may download a CPU wheel and model assets; record OpenViking.log, elapsed time, and cache size before claiming adapter quality.",
+        "retry_guidance": [
+          "Use the default pinned CPU wheel path first.",
+          "Override ELF_BASELINE_OPENVIKING_LLAMA_CPP_PYTHON_VERSION or ELF_BASELINE_OPENVIKING_LLAMA_CPP_PYTHON_INDEX only when the default wheel is unavailable for the Docker platform.",
+          "Treat install/import failure as incomplete, not wrong_result; treat add_resource/find evidence misses as wrong_result."
+        ]
+      },
       "notes": [
-        "Record OpenViking as incomplete until Docker-compatible local embeddings are pinned; do not treat setup weight as a negative quality result."
+        "Record OpenViking as wrong_result now that the pinned Docker local embedding path reaches add_resource/find but misses expected evidence."
       ],
       "follow_up": {
-        "title": "[ELF benchmark adapter] Pin OpenViking Docker local embedding dependency path",
-        "reason": "The current adapter must reach add_resource/find before real-world job suites can be scored."
+        "title": "Fix OpenViking evidence-bearing same-corpus retrieval output",
+        "reason": "The current adapter reaches add_resource/find but must return evidence-bearing content before real-world job suites can be scored."
       }
     },
     {
@@ -940,26 +967,26 @@
       "evidence_class": "research_gate",
       "docker_default": true,
       "host_global_installs_required": false,
-      "overall_status": "incomplete",
+      "overall_status": "not_encoded",
       "setup": {
-        "status": "incomplete",
-        "evidence": "OpenViking deep-profile work is blocked at the same Docker local-embedding dependency boundary as the current live-baseline adapter.",
+        "status": "pass",
+        "evidence": "The default pinned OpenViking local embedding dependency path reaches runtime in Docker.",
         "command": "ELF_BASELINE_PROJECTS=OpenViking cargo make baseline-live-docker",
         "artifact": "tmp/live-baseline/OpenViking.log"
       },
       "run": {
-        "status": "incomplete",
-        "evidence": "The adapter cannot fairly exercise hierarchical trajectory behavior until add_resource/find reaches execution in Docker."
+        "status": "not_encoded",
+        "evidence": "The adapter cannot fairly exercise hierarchical trajectory behavior until same-corpus add_resource/find returns evidence-bearing results."
       },
       "result": {
-        "status": "incomplete",
-        "evidence": "No OpenViking deep context-trajectory result is claimed from a setup-blocked run."
+        "status": "not_encoded",
+        "evidence": "No OpenViking deep context-trajectory result is claimed from the current wrong-result smoke run."
       },
       "capabilities": [
         {
           "capability": "docker_local_embed_setup",
-          "status": "incomplete",
-          "evidence": "The local embedding setup must be pinned before deep profile runs can execute."
+          "status": "pass",
+          "evidence": "The local embedding setup is pinned and reaches import/runtime in Docker."
         },
         {
           "capability": "hierarchical_context_trajectory",
@@ -975,8 +1002,8 @@
       "suites": [
         {
           "suite_id": "retrieval",
-          "status": "incomplete",
-          "evidence": "Same-corpus retrieval setup remains incomplete in Docker."
+          "status": "not_encoded",
+          "evidence": "Deep retrieval scoring is deferred until the smoke adapter returns evidence-bearing same-corpus output."
         },
         {
           "suite_id": "work_resume",
@@ -998,7 +1025,7 @@
         {
           "kind": "runner",
           "ref": "scripts/live-baseline-benchmark.sh",
-          "status": "incomplete"
+          "status": "wrong_result"
         }
       ],
       "execution_metadata": {
@@ -1009,17 +1036,18 @@
             "evidence": "Official source for OpenViking local context database, resource, and retrieval APIs."
           }
         ],
-        "setup_path": "Pin a Docker-compatible local embedding path, then run OpenViking add_resource/find before any deep profile scoring.",
+        "setup_path": "Use the pinned Docker local embedding path from scripts/live-baseline-benchmark.sh, then run OpenViking add_resource/find before any deep profile scoring.",
         "runtime_boundary": "docker-compose.baseline.yml baseline-runner container; no host model or compiler setup outside Docker.",
-        "resource_expectation": "Local embedding builds can be native-toolchain and model heavy; record build logs, model cache size, and elapsed time.",
+        "resource_expectation": "Local embedding setup can download CPU wheels and model assets; record build/import logs, model cache size, and elapsed time.",
         "retry_guidance": [
-          "Pin or prebuild the local embedding dependency in the baseline image.",
-          "Only then add context-trajectory real_world_job scoring for hierarchical retrieval."
+          "Run the default pinned llama-cpp-python==0.3.28 CPU wheel path first.",
+          "Override the OpenViking llama-cpp-python version or index only when the default wheel is unavailable for the Docker platform.",
+          "Fix evidence-bearing same-corpus output before adding context-trajectory real_world_job scoring for hierarchical retrieval."
         ],
-        "research_depth": "D2 reviewed; runtime setup incomplete"
+        "research_depth": "D2 reviewed; local embedding setup pinned; deep profile not encoded"
       },
       "notes": [
-        "OpenViking remains a context-trajectory reference, but this gate prevents setup failure from becoming a quality judgment."
+        "OpenViking remains a context-trajectory reference, but this gate prevents a smoke wrong_result from becoming a deep-profile claim."
       ]
     },
     {
diff --git a/apps/elf-eval/fixtures/real_world_memory/production_ops/cold_start_missing_dependency_incomplete.json b/apps/elf-eval/fixtures/real_world_memory/production_ops/cold_start_missing_dependency_incomplete.json
index 8fcbfc39..5ff0912d 100644
--- a/apps/elf-eval/fixtures/real_world_memory/production_ops/cold_start_missing_dependency_incomplete.json
+++ b/apps/elf-eval/fixtures/real_world_memory/production_ops/cold_start_missing_dependency_incomplete.json
@@ -2,35 +2,62 @@
   "schema": "elf.real_world_job/v1",
   "job_id": "production-ops-cold-start-dependency-001",
   "suite": "production_ops",
-  "title": "Preserve cold-start dependency failure as incomplete instead of pass",
-  "encoding": {
-    "status": "incomplete",
-    "reason": "The fixture records a cold-start dependency failure path that could not reach the behavioral check; this must remain incomplete rather than a silent pass.",
-    "follow_up": {
-      "title": "[ELF benchmark P0] Pin Docker-compatible local embedding dependency for cold-start adapter checks",
-      "reason": "The adapter cannot fairly test cold-start recovery until its local embedding dependency can build or import in Docker."
-    }
-  },
+  "title": "Report pinned OpenViking cold-start path reaching behavioral wrong-result",
+  "encoding": {},
   "corpus": {
     "corpus_id": "real-world-memory-production-ops-2026-06-10",
     "profile": "external_adapter",
     "items": [
       {
-        "evidence_id": "local-embed-install-failure",
+        "evidence_id": "pinned-local-embed-runtime-reached",
         "kind": "adapter_state",
-        "text": "OpenViking cold-start check could not run because the Docker platform could not build or import llama-cpp-python for the local embedding path; the adapter status is incomplete with retrieval_status=local_embed_install_failed.",
+        "text": "The pinned OpenViking Docker local embedding path installed and imported llama-cpp-python==0.3.28, then reached OpenViking add_resource/find in the baseline runner.",
         "source_ref": {
           "schema": "source_ref/v1",
           "resolver": "real_world_job_fixture/v1",
           "ref": {
             "fixture": "cold_start_missing_dependency_incomplete",
-            "evidence_id": "local-embed-install-failure"
+            "evidence_id": "pinned-local-embed-runtime-reached"
           },
           "locator": {
-            "quote": "could not build or import llama-cpp-python"
+            "quote": "llama_cpp_import_ok 0.3.28"
           }
         },
-        "created_at": "2026-06-09T08:38:14Z"
+        "created_at": "2026-06-10T08:38:58Z"
+      },
+      {
+        "evidence_id": "pinned-local-embed-retry",
+        "kind": "runbook",
+        "text": "The Docker retry path is ELF_BASELINE_PROJECTS=OpenViking cargo make baseline-live-docker; the runner installs llama-cpp-python==0.3.28 from https://abetlen.github.io/llama-cpp-python/whl/cpu with --only-binary llama-cpp-python before OpenViking add_resource/find.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "cold_start_missing_dependency_incomplete",
+            "evidence_id": "pinned-local-embed-retry"
+          },
+          "locator": {
+            "quote": "llama-cpp-python==0.3.28"
+          }
+        },
+        "created_at": "2026-06-10T00:00:00Z"
+      },
+      {
+        "evidence_id": "openviking-wrong-result-behavior",
+        "kind": "adapter_state",
+        "text": "OpenViking now records status=wrong_result and retrieval_status=retrieval_wrong_result because add_resource/find returned 0 of 3 expected evidence-term matches after the pinned local embedding setup succeeded.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "cold_start_missing_dependency_incomplete",
+            "evidence_id": "openviking-wrong-result-behavior"
+          },
+          "locator": {
+            "quote": "status=wrong_result"
+          }
+        },
+        "created_at": "2026-06-10T08:38:58Z"
       },
       {
         "evidence_id": "typed-incomplete-policy",
@@ -52,7 +79,7 @@
       {
         "evidence_id": "dependency-pass-decoy",
         "kind": "adapter_state",
-        "text": "Decoy: missing local embedding dependency should be reported as pass because no retrieval mismatch occurred.",
+        "text": "Decoy: the pinned OpenViking run should be reported as pass because the dependency installed even though retrieval missed expected evidence terms.",
         "source_ref": {
           "schema": "source_ref/v1",
           "resolver": "real_world_job_fixture/v1",
@@ -67,16 +94,36 @@
     "adapter_response": {
       "adapter_id": "fixture_production_ops",
       "answer": {
-        "content": "The cold-start dependency failure is incomplete, not pass. The adapter could not build or import llama-cpp-python, so the behavioral cold-start check did not run and needs dependency repair before a pass claim.",
+        "content": "The pinned OpenViking Docker local embedding path reached add_resource/find. OpenViking now reports wrong_result/retrieval_wrong_result because all three smoke queries missed expected evidence terms. If the pinned llama-cpp-python install or import fails on another Docker platform, classify that setup boundary as incomplete, not pass.",
         "claims": [
           {
-            "claim_id": "cold_start_dependency_incomplete",
-            "text": "The cold-start dependency failure is incomplete, not pass.",
-            "evidence_ids": ["local-embed-install-failure", "typed-incomplete-policy"],
+            "claim_id": "pinned_openviking_runtime_reached",
+            "text": "The pinned OpenViking Docker local embedding path reached add_resource/find.",
+            "evidence_ids": [
+              "pinned-local-embed-runtime-reached",
+              "pinned-local-embed-retry"
+            ],
+            "confidence": "high"
+          },
+          {
+            "claim_id": "openviking_wrong_result_after_runtime",
+            "text": "OpenViking now reports wrong_result/retrieval_wrong_result because all three smoke queries missed expected evidence terms.",
+            "evidence_ids": ["openviking-wrong-result-behavior"],
+            "confidence": "high"
+          },
+          {
+            "claim_id": "setup_failure_stays_incomplete",
+            "text": "If the pinned llama-cpp-python install or import fails on another Docker platform, classify that setup boundary as incomplete, not pass.",
+            "evidence_ids": ["typed-incomplete-policy"],
             "confidence": "high"
           }
         ],
-        "evidence_ids": ["local-embed-install-failure", "typed-incomplete-policy"],
+        "evidence_ids": [
+          "pinned-local-embed-runtime-reached",
+          "pinned-local-embed-retry",
+          "openviking-wrong-result-behavior",
+          "typed-incomplete-policy"
+        ],
         "latency_ms": 1.8,
         "cost": {
           "currency": "USD",
@@ -89,12 +136,28 @@
   },
   "timeline": [
     {
-      "event_id": "cold-start-dependency-failed",
-      "ts": "2026-06-09T08:38:14Z",
+      "event_id": "pinned-local-embed-runtime-reached",
+      "ts": "2026-06-10T08:38:58Z",
+      "actor": "tool",
+      "action": "reached_behavior_check",
+      "evidence_ids": ["pinned-local-embed-runtime-reached"],
+      "summary": "The pinned local embedding dependency installed and imported, and OpenViking add_resource/find executed."
+    },
+    {
+      "event_id": "pinned-local-embed-retry-recorded",
+      "ts": "2026-06-10T00:00:00Z",
+      "actor": "agent",
+      "action": "recorded_retry_path",
+      "evidence_ids": ["pinned-local-embed-retry"],
+      "summary": "The fixture records the Docker-local pinned llama-cpp-python retry command and wheel index."
+    },
+    {
+      "event_id": "openviking-wrong-result-recorded",
+      "ts": "2026-06-10T08:38:58Z",
       "actor": "tool",
-      "action": "hit_dependency_failure",
-      "evidence_ids": ["local-embed-install-failure"],
-      "summary": "The cold-start adapter path stopped before behavioral scoring because a native dependency could not build or import."
+      "action": "classified_behavior",
+      "evidence_ids": ["openviking-wrong-result-behavior"],
+      "summary": "The OpenViking adapter reached retrieval behavior and missed all expected evidence-term checks."
     },
     {
       "event_id": "typed-incomplete-retained",
@@ -107,20 +170,33 @@
   ],
   "prompt": {
     "role": "user",
-    "content": "How should the production-ops suite classify a cold-start check that cannot run because a dependency is missing?",
+    "content": "How should the production-ops suite classify the OpenViking cold-start local embedding path after the pinned Docker retry reaches add_resource/find but misses expected evidence?",
     "job_mode": "operate",
     "constraints": ["cite_evidence", "preserve_typed_status", "do_not_claim_pass"]
   },
   "expected_answer": {
     "must_include": [
       {
-        "claim_id": "cold_start_dependency_incomplete",
-        "text": "The cold-start dependency failure is incomplete, not pass."
+        "claim_id": "pinned_openviking_runtime_reached",
+        "text": "The pinned OpenViking Docker local embedding path reached add_resource/find."
+      },
+      {
+        "claim_id": "openviking_wrong_result_after_runtime",
+        "text": "OpenViking now reports wrong_result/retrieval_wrong_result because all three smoke queries missed expected evidence terms."
+      },
+      {
+        "claim_id": "setup_failure_stays_incomplete",
+        "text": "If the pinned llama-cpp-python install or import fails on another Docker platform, classify that setup boundary as incomplete, not pass."
       }
     ],
-    "must_not_include": ["reported as pass"],
+    "must_not_include": ["reported as pass", "dependency failure is incomplete, not pass"],
     "evidence_links": {
-      "cold_start_dependency_incomplete": ["local-embed-install-failure", "typed-incomplete-policy"]
+      "pinned_openviking_runtime_reached": [
+        "pinned-local-embed-runtime-reached",
+        "pinned-local-embed-retry"
+      ],
+      "openviking_wrong_result_after_runtime": ["openviking-wrong-result-behavior"],
+      "setup_failure_stays_incomplete": ["typed-incomplete-policy"]
     },
     "answer_type": "direct_answer",
     "accepted_alternates": [],
@@ -129,14 +205,26 @@
   },
   "required_evidence": [
     {
-      "evidence_id": "local-embed-install-failure",
-      "claim_id": "cold_start_dependency_incomplete",
+      "evidence_id": "pinned-local-embed-runtime-reached",
+      "claim_id": "pinned_openviking_runtime_reached",
+      "requirement": "cite",
+      "quote": "installed and imported llama-cpp-python==0.3.28"
+    },
+    {
+      "evidence_id": "pinned-local-embed-retry",
+      "claim_id": "pinned_openviking_runtime_reached",
+      "requirement": "cite",
+      "quote": "llama-cpp-python==0.3.28"
+    },
+    {
+      "evidence_id": "openviking-wrong-result-behavior",
+      "claim_id": "openviking_wrong_result_after_runtime",
       "requirement": "cite",
-      "quote": "could not build or import llama-cpp-python"
+      "quote": "status=wrong_result"
     },
     {
       "evidence_id": "typed-incomplete-policy",
-      "claim_id": "cold_start_dependency_incomplete",
+      "claim_id": "setup_failure_stays_incomplete",
       "requirement": "cite",
       "quote": "Use incomplete when install, import, build"
     }
@@ -154,17 +242,17 @@
       "lifecycle_behavior": {
         "weight": 0.35,
         "max_points": 1.0,
-        "criteria": "Would test cold-start behavior only after dependency setup succeeds."
+        "criteria": "Distinguishes dependency setup reaching runtime from the remaining behavioral retrieval result."
       },
       "evidence_grounding": {
         "weight": 0.3,
         "max_points": 1.0,
-        "criteria": "Cites dependency failure and typed-incomplete policy."
+        "criteria": "Cites the pinned runtime success, wrong-result behavior, and typed-incomplete fallback policy."
       },
       "uncertainty_handling": {
         "weight": 0.2,
         "max_points": 1.0,
-        "criteria": "States that no pass claim is allowed."
+        "criteria": "States that setup failure would remain incomplete, but the current reached-runtime result is wrong_result."
       },
       "trap_avoidance": {
         "weight": 0.15,
@@ -180,8 +268,8 @@
   },
   "allowed_uncertainty": {
     "can_answer_unknown": true,
-    "acceptable_phrases": ["incomplete, not pass"],
-    "fallback_action": "state_blocker"
+    "acceptable_phrases": ["wrong_result/retrieval_wrong_result"],
+    "fallback_action": "state_current_wrong_result"
   },
-  "tags": ["external_adapter", "production_ops", "cold_start", "dependency_boundary", "no_live_claim"]
+  "tags": ["external_adapter", "production_ops", "cold_start", "dependency_boundary", "wrong_result", "no_live_claim"]
 }
diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
index 414e28fa..fe994564 100644
--- a/apps/elf-eval/tests/real_world_job_benchmark.rs
+++ b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -239,7 +239,7 @@ fn assert_external_adapter_manifest_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/overall_status_counts/wrong_result")
 			.and_then(Value::as_u64),
-		Some(5)
+		Some(6)
 	);
 	assert_eq!(
 		report
@@ -251,19 +251,19 @@ fn assert_external_adapter_manifest_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/overall_status_counts/incomplete")
 			.and_then(Value::as_u64),
-		Some(3)
+		Some(0)
 	);
 	assert_eq!(
 		report
 			.pointer("/external_adapters/summary/overall_status_counts/blocked")
 			.and_then(Value::as_u64),
-		Some(3)
+		Some(4)
 	);
 	assert_eq!(
 		report
 			.pointer("/external_adapters/summary/overall_status_counts/not_encoded")
 			.and_then(Value::as_u64),
-		Some(8)
+		Some(9)
 	);
 	assert_eq!(
 		report
@@ -281,7 +281,7 @@ fn assert_external_adapter_manifest_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/suite_status_counts/blocked")
 			.and_then(Value::as_u64),
-		Some(10)
+		Some(11)
 	);
 }
 
@@ -297,7 +297,7 @@ fn assert_external_adapter_manifest_records(report: &Value) -> Result<()> {
 	let qmd_deep = find_by_field(adapters, "/adapter_id", "qmd_deep_profile_gate")?;
 
 	assert_eq!(elf.pointer("/evidence_class").and_then(Value::as_str), Some("fixture_backed"));
-	assert_eq!(elf.pointer("/overall_status").and_then(Value::as_str), Some("incomplete"));
+	assert_eq!(elf.pointer("/overall_status").and_then(Value::as_str), Some("blocked"));
 	assert_eq!(
 		elf_live.pointer("/evidence_class").and_then(Value::as_str),
 		Some("live_real_world")
@@ -320,7 +320,7 @@ fn assert_external_adapter_manifest_records(report: &Value) -> Result<()> {
 		agentmemory.pointer("/capabilities/1/status").and_then(Value::as_str),
 		Some("mocked")
 	);
-	assert_eq!(openviking.pointer("/overall_status").and_then(Value::as_str), Some("incomplete"));
+	assert_eq!(openviking.pointer("/overall_status").and_then(Value::as_str), Some("wrong_result"));
 	assert_eq!(ragflow.pointer("/evidence_class").and_then(Value::as_str), Some("research_gate"));
 	assert_eq!(ragflow.pointer("/overall_status").and_then(Value::as_str), Some("blocked"));
 	assert_eq!(
@@ -733,8 +733,8 @@ fn production_ops_fixtures_report_bounded_typed_states() -> Result<()> {
 	let report = run_json_report_from(production_ops_fixture_dir())?;
 
 	assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(6));
-	assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(3));
-	assert_eq!(report.pointer("/summary/incomplete").and_then(Value::as_u64), Some(1));
+	assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(4));
+	assert_eq!(report.pointer("/summary/incomplete").and_then(Value::as_u64), Some(0));
 	assert_eq!(report.pointer("/summary/blocked").and_then(Value::as_u64), Some(2));
 	assert_eq!(report.pointer("/summary/not_encoded").and_then(Value::as_u64), Some(0));
 	assert_eq!(report.pointer("/summary/evidence_coverage").and_then(Value::as_f64), Some(1.0));
@@ -750,7 +750,7 @@ fn production_ops_fixtures_report_bounded_typed_states() -> Result<()> {
 	let suites = array_at(&report, "/suites")?;
 	let production_ops = find_by_field(suites, "/suite_id", "production_ops")?;
 
-	assert_eq!(production_ops.pointer("/status").and_then(Value::as_str), Some("incomplete"));
+	assert_eq!(production_ops.pointer("/status").and_then(Value::as_str), Some("blocked"));
 	assert_eq!(production_ops.pointer("/encoded_job_count").and_then(Value::as_u64), Some(6));
 
 	let jobs = array_at(&report, "/jobs")?;
@@ -766,7 +766,7 @@ fn production_ops_fixtures_report_bounded_typed_states() -> Result<()> {
 	assert_eq!(restore.pointer("/qdrant_rebuild_case").and_then(Value::as_bool), Some(true));
 	assert_eq!(private_manifest.pointer("/status").and_then(Value::as_str), Some("blocked"));
 	assert_eq!(credentials.pointer("/status").and_then(Value::as_str), Some("blocked"));
-	assert_eq!(dependency.pointer("/status").and_then(Value::as_str), Some("incomplete"));
+	assert_eq!(dependency.pointer("/status").and_then(Value::as_str), Some("pass"));
 
 	Ok(())
 }
@@ -782,9 +782,9 @@ fn assert_root_knowledge_summary(report: &Value) {
 
 fn assert_root_aggregate_summary(report: &Value) {
 	assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(38));
-	assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(35));
+	assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(36));
 	assert_eq!(report.pointer("/summary/wrong_result").and_then(Value::as_u64), Some(0));
-	assert_eq!(report.pointer("/summary/incomplete").and_then(Value::as_u64), Some(1));
+	assert_eq!(report.pointer("/summary/incomplete").and_then(Value::as_u64), Some(0));
 	assert_eq!(report.pointer("/summary/blocked").and_then(Value::as_u64), Some(2));
 	assert_eq!(report.pointer("/summary/not_encoded").and_then(Value::as_u64), Some(0));
 	assert_eq!(report.pointer("/summary/unsupported_claim_count").and_then(Value::as_u64), Some(0));
@@ -825,9 +825,9 @@ fn assert_root_aggregate_summary(report: &Value) {
 	);
 	assert_eq!(
 		report.pointer("/summary/evidence_required_count").and_then(Value::as_u64),
-		Some(82)
+		Some(84)
 	);
-	assert_eq!(report.pointer("/summary/evidence_covered_count").and_then(Value::as_u64), Some(82));
+	assert_eq!(report.pointer("/summary/evidence_covered_count").and_then(Value::as_u64), Some(84));
 	assert_eq!(report.pointer("/summary/evidence_coverage").and_then(Value::as_f64), Some(1.0));
 	assert_eq!(report.pointer("/summary/source_ref_coverage").and_then(Value::as_f64), Some(1.0));
 	assert_eq!(report.pointer("/summary/quote_coverage").and_then(Value::as_f64), Some(1.0));
@@ -895,7 +895,7 @@ fn assert_root_aggregate_suites(report: &Value) -> Result<()> {
 
 	let production_ops = find_by_field(suites, "/suite_id", "production_ops")?;
 
-	assert_eq!(production_ops.pointer("/status").and_then(Value::as_str), Some("incomplete"));
+	assert_eq!(production_ops.pointer("/status").and_then(Value::as_str), Some("blocked"));
 	assert_eq!(production_ops.pointer("/encoded_job_count").and_then(Value::as_u64), Some(6));
 
 	Ok(())
diff --git a/docker-compose.baseline.yml b/docker-compose.baseline.yml
index 1495166a..5793f66c 100644
--- a/docker-compose.baseline.yml
+++ b/docker-compose.baseline.yml
@@ -62,6 +62,8 @@ services:
       ELF_BASELINE_BACKFILL_RESUME_PROBE: ${ELF_BASELINE_BACKFILL_RESUME_PROBE:-}
       ELF_BASELINE_MAX_ELF_RSS_KB: ${ELF_BASELINE_MAX_ELF_RSS_KB:-1500000}
       ELF_BASELINE_MAX_ELF_SECONDS: ${ELF_BASELINE_MAX_ELF_SECONDS:-600}
+      ELF_BASELINE_OPENVIKING_LLAMA_CPP_PYTHON_INDEX: ${ELF_BASELINE_OPENVIKING_LLAMA_CPP_PYTHON_INDEX:-}
+      ELF_BASELINE_OPENVIKING_LLAMA_CPP_PYTHON_VERSION: ${ELF_BASELINE_OPENVIKING_LLAMA_CPP_PYTHON_VERSION:-}
       ELF_BASELINE_PROFILE: ${ELF_BASELINE_PROFILE:-smoke}
       ELF_BASELINE_PROJECTS: ${ELF_BASELINE_PROJECTS:-all}
       ELF_BASELINE_PRODUCTION_CORPUS_MANIFEST: ${ELF_BASELINE_PRODUCTION_CORPUS_MANIFEST:-}
diff --git a/docs/guide/benchmarking/2026-06-10-real-world-comparison-report.md b/docs/guide/benchmarking/2026-06-10-real-world-comparison-report.md
index 632f1536..2868b4b8 100644
--- a/docs/guide/benchmarking/2026-06-10-real-world-comparison-report.md
+++ b/docs/guide/benchmarking/2026-06-10-real-world-comparison-report.md
@@ -10,10 +10,11 @@ and the live-baseline reports linked from this guide.
 Depends on: `docs/spec/real_world_agent_memory_benchmark_v1.md`,
 `docs/guide/benchmarking/real_world_agent_memory_benchmark.md`, and
 `docs/guide/benchmarking/live_baseline_benchmark.md`.
-Verification: The commands listed below were run from branch `y/elf-xy-865`. The
-generated reports used runner version
-`0.2.0-89d30dc04a854771f2a62f607e1d13498ccb3073-aarch64-apple-darwin`; the working
-tree also contained the adapter manifest refresh recorded here.
+Verification: The original commands listed below were run from branch `y/elf-xy-865`.
+XY-881 refreshed `cargo make real-world-memory`, `cargo make real-world-memory-production-ops`,
+and `ELF_BASELINE_PROJECTS=OpenViking cargo make baseline-live-docker` from branch
+`y/elf-xy-881`. Tables below include that refresh where the OpenViking cold-start
+dependency boundary is discussed.
 
 Postscript: XY-880 superseded the live-adapter state in this report for ELF and qmd.
 The successor evidence is
@@ -43,14 +44,14 @@ paths remain typed `blocked` boundaries, not passes.
 
 | Command | Generated artifact | Run ID | Generated at |
 | --- | --- | --- | --- |
-| `cargo make real-world-memory` | `tmp/real-world-memory/real-world-memory-report.{json,md}` | `real-world-memory` | `2026-06-10T04:21:32.545027Z` |
+| `cargo make real-world-memory` | `tmp/real-world-memory/real-world-memory-report.{json,md}` | `real-world-memory` | `2026-06-10T08:47:44.086502Z` |
 | `cargo make real-world-memory-project-decisions` | `tmp/real-world-memory/project-decisions/report.{json,md}` | `real-world-memory-project-decisions` | `2026-06-10T04:21:52.403238Z` |
-| `cargo make real-world-memory-production-ops` | `tmp/real-world-memory/production-ops-report.{json,md}` | `real-world-memory-production-ops` | `2026-06-10T04:21:59.520163Z` |
+| `cargo make real-world-memory-production-ops` | `tmp/real-world-memory/production-ops-report.{json,md}` | `real-world-memory-production-ops` | `2026-06-10T08:47:18.205778Z` |
 | `cargo make real-world-memory-evolution` | `tmp/real-world-memory/evolution-report.{json,md}` | `real-world-memory-evolution` | `2026-06-10T04:22:06.325152Z` |
 | `cargo make real-world-job-operator-ux` | `tmp/real-world-job/real-world-job-operator-ux-report.{json,md}` | `real-world-job-operator-ux` | `2026-06-10T04:22:12.28938Z` |
 
-All generated reports used runner version
-`0.2.0-89d30dc04a854771f2a62f607e1d13498ccb3073-aarch64-apple-darwin`.
+The refreshed real-world-memory reports used runner version
+`0.2.0-a8b25d00880bd3cf04707c3b2b328cd20a585396-aarch64-apple-darwin`.
 
 ## Aggregate Result
 
@@ -59,18 +60,18 @@ suites:
 
 | Metric | Value |
 | --- | ---: |
-| Pass | `35` |
-| Incomplete | `1` |
+| Pass | `36` |
+| Incomplete | `0` |
 | Blocked | `2` |
 | Wrong result | `0` |
 | Lifecycle fail | `0` |
 | Not encoded | `0` |
 | Unsupported claim | `0` |
-| Mean score | `0.921` |
-| Evidence coverage | `82/82` (`1.000`) |
-| Source-ref coverage | `82/82` (`1.000`) |
-| Quote coverage | `82/82` (`1.000`) |
-| Expected evidence recall | `75/75` (`1.000`) |
+| Mean score | `0.947` |
+| Evidence coverage | `84/84` (`1.000`) |
+| Source-ref coverage | `84/84` (`1.000`) |
+| Quote coverage | `84/84` (`1.000`) |
+| Expected evidence recall | `77/77` (`1.000`) |
 | Redaction leaks | `0` |
 | Scope violations | `0` |
 | Temporal validity gaps | `0` |
@@ -89,7 +90,7 @@ Suite-level outcomes:
 | `knowledge_compilation` | 2 | `pass` | `1.000` | Derived page fixtures passed with citation/rebuild checks. |
 | `operator_debugging_ux` | 1 | `pass` | `1.000` | Aggregate stage-attribution fixture passed. |
 | `capture_integration` | 2 | `pass` | `1.000` | Redaction and capture-boundary fixtures passed. |
-| `production_ops` | 6 | `incomplete` | `0.500` | Three jobs passed, one is a typed dependency `incomplete`, and two are typed operator `blocked`. |
+| `production_ops` | 6 | `blocked` | `0.667` | Four jobs passed, including the pinned OpenViking cold-start classification, and two operator-owned boundaries remain `blocked`. |
 | `personalization` | 1 | `pass` | `1.000` | Scoped preference correction passed. |
 
 ## Focused P1 Slices
@@ -99,7 +100,7 @@ Suite-level outcomes:
 | `cargo make real-world-memory-project-decisions` | 5 | `5` pass | Current decision, historical/reversed decision, validation gate, tradeoff rationale, and private-manifest caveat all passed. |
 | `cargo make real-world-memory-evolution` | 5 | `5` pass | Temporal relation validity is now encoded and passing; stale answers `0`, conflict detections `5`, update rationales `5`. |
 | `cargo make real-world-job-operator-ux` | 5 | `5` pass | Dropped evidence, rerank promotion, provider latency, rebuild change, and misleading relation-context debug cases passed with raw SQL needed `0`. |
-| `cargo make real-world-memory-production-ops` | 6 | `3` pass, `1` incomplete, `2` blocked | Restore/Qdrant rebuild, interrupted backfill resume, and resource envelope passed; local embedding dependency, provider credentials, and private manifest remain typed non-pass boundaries. |
+| `cargo make real-world-memory-production-ops` | 6 | `4` pass, `0` incomplete, `2` blocked | Restore/Qdrant rebuild, interrupted backfill resume, resource envelope, and pinned OpenViking cold-start classification passed; provider credentials and private manifest remain typed non-pass boundaries. |
 
 ## External Adapter Evidence
 
@@ -112,7 +113,7 @@ separate:
 | --- | ---: | --- |
 | `fixture_backed` | 1 | ELF fixture scoring through checked-in real-world jobs. |
 | `live_baseline_only` | 6 | Docker same-corpus/lifecycle evidence from the live-baseline runner only. |
-| `live_real_world` | 2 | Targeted ELF and qmd adapters execute representative `real_world_job` prompts and scoring. |
+| `live_real_world` | 2 | ELF and qmd adapters execute the full encoded-suite `real_world_job` sweep with typed non-pass states preserved. |
 | `research_gate` | 12 | Source/setup/runtime/resource/retry metadata for future adapter paths; not fixture-backed or live execution evidence. |
 
 XY-882 added D1/D2 feasibility verdicts inside the research-gate lane. RAGFlow
@@ -135,25 +136,28 @@ Adapter-level status after refreshing the manifest:
 
 | Project | Evidence class | Overall status | What is proven | What is not proven |
 | --- | --- | --- | --- | --- |
-| ELF | `fixture_backed` | `incomplete` | Fixture-backed real-world scoring passes 10 of 11 suites, with production-ops typed boundaries preserved. | Fixture-backed scoring is not live-service behavior; cite `elf_live_real_world` for the targeted live slice. |
-| ELF | `live_real_world` | `pass` | The targeted Docker slice materializes real_world_job answers through ElfService, worker indexing, and search_raw for work_resume, retrieval, and project_decisions. | This is not yet a full 11-suite live-service run or private-corpus proof. |
-| qmd | `live_baseline_only` | `pass` | Docker same-corpus retrieval, update, delete, and cold-start live-baseline checks pass. | Same-corpus checks are not real-world job scoring; cite `qmd_live_real_world` for the targeted live slice. |
-| qmd | `live_real_world` | `pass` | The targeted Docker slice indexes real_world_job corpora through qmd collection add/update/embed/query and scores generated answers. | This is not yet broad RAG/graph adapter coverage or full-suite external parity. |
+| ELF | `fixture_backed` | `blocked` | Fixture-backed real-world scoring passes every non-operator-owned suite and preserves the production-ops credential/private-manifest boundaries. | Fixture-backed scoring is not live-service behavior; cite `elf_live_real_world` for service-runtime sweep evidence. |
+| ELF | `live_real_world` | `wrong_result` | The Docker live sweep materializes all encoded real_world_job records through ElfService, worker indexing, and search_raw; the original targeted answer-retrieval slice still passes. | This is not a full-suite live pass or private-corpus proof; typed wrong_result, incomplete, blocked, and not_encoded states remain visible. |
+| qmd | `live_baseline_only` | `pass` | Docker same-corpus retrieval, update, delete, and cold-start live-baseline checks pass. | Same-corpus checks are not real-world job scoring; cite `qmd_live_real_world` for service-runtime sweep evidence. |
+| qmd | `live_real_world` | `wrong_result` | The Docker live sweep indexes the encoded real_world_job corpora through qmd collection add/update/embed/query and preserves per-suite scoring evidence. | This is not a full-suite live pass or broad RAG/graph adapter coverage; typed wrong_result, incomplete, blocked, and not_encoded states remain visible. |
 | agentmemory | `live_baseline_only` | `lifecycle_fail` | Same-corpus retrieval can run through current adapter. | Durable storage/cold-start lifecycle and real-world suites are blocked by the current in-memory adapter path. |
 | mem0/OpenMemory | `live_baseline_only` | `wrong_result` | Local OSS setup is represented separately from hosted/OpenMemory claims. | Same-corpus retrieval was not a clean pass and no real-world job adapter is encoded. |
 | memsearch | `live_baseline_only` | `wrong_result` | Markdown-first design remains a source-of-truth ergonomics reference. | Same-corpus retrieval was not a clean pass and real-world suites are incomplete/not encoded. |
-| OpenViking | `live_baseline_only` | `incomplete` | Hierarchical context trajectory remains a reference direction. | Docker local-embedding setup must be pinned before fair retrieval or real-world jobs can run. |
+| OpenViking | `live_baseline_only` | `wrong_result` | The Docker local-embedding setup is pinned and reaches `add_resource`/`find`. | The same-corpus smoke still misses expected evidence terms; no real-world job adapter or context-trajectory suite is claimed. |
 | claude-mem | `live_baseline_only` | `wrong_result` | Progressive disclosure and local viewer remain UX references. | Current Docker evidence is not a clean same-corpus pass and progressive disclosure jobs are not encoded. |
 | qmd deep profile | `research_gate` | `not_encoded` | The stress-profile command path and source metadata are recorded for a future deeper retrieval-debug run. | No expanded qmd stress artifact or broader real-world suite pass is checked in. |
-| OpenViking deep profile | `research_gate` | `incomplete` | The deeper context-trajectory gate inherits the current Docker local-embedding setup blocker. | No hierarchical trajectory suite result is claimed. |
+| OpenViking deep profile | `research_gate` | `not_encoded` | The deeper context-trajectory gate can reuse the pinned Docker local-embedding setup path. | No hierarchical trajectory suite result is claimed until evidence-bearing same-corpus output is fixed. |
 | RAGFlow, LightRAG, GraphRAG | `research_gate` | `blocked` | Official sources, setup/resource/retry expectations, and XY-882 adapter-candidate verdicts are recorded. | Docker runtime proof and real_world_job evidence-output mapping are still required before any live adapter claim. |
 | Graphiti/Zep, Letta, LangGraph, nanograph, llm-wiki, gbrain, graphify | `research_gate` | `not_encoded` | XY-882 records Graphiti/Zep and graphify as adapter candidates, Letta/LangGraph/nanograph/llm-wiki as research-only, and gbrain as blocked. | No Docker-isolated `real_world_job` adapter has run for these projects. |
 
 External summary counters: `21` adapter records, `19` non-ELF adapter records,
 `21` Docker-default, `0` host-global-install requirements, `2` live real-world
-adapters, and `12` research-gate records. Overall adapter statuses are `3` pass,
-`3` wrong_result, `1` lifecycle_fail, `3` incomplete, `3` blocked, and
-`8` not_encoded.
+adapters, and `12` research-gate records. Overall adapter statuses are `1` pass,
+`6` wrong_result, `1` lifecycle_fail, `0` incomplete, `4` blocked, and
+`9` not_encoded.
+Real-world suite statuses are tracked separately as `20` pass, `3` wrong_result,
+`7` incomplete, `11` blocked, and `40` not_encoded, so a setup boundary is not hidden
+behind an aggregate status.
 
 ## Remaining Gaps
 
@@ -162,15 +166,15 @@ report:
 
 | Gap | Status | Follow-up or non-goal |
 | --- | --- | --- |
-| ELF production-ops cold-start dependency fixture | `incomplete` | `[ELF benchmark P0] Pin Docker-compatible local embedding dependency for cold-start adapter checks`. |
+| ELF production-ops cold-start dependency fixture | `pass` | XY-881 pins the Docker OpenViking local embedding path and preserves setup failures as `incomplete` if the wheel/import boundary fails on another platform. |
 | ELF provider-backed production-ops gate | `blocked` | Run only with routed operator credentials; credentials were not supplied for this report. |
 | ELF private production corpus | `blocked` | Supply an operator-owned sanitized private manifest; private-corpus checks were a non-goal without that manifest. |
-| Full ELF live-service real-world sweep | `not_encoded` beyond targeted slice | Expand `elf_live_real_world` beyond representative work_resume, retrieval, and project_decisions jobs before claiming full live-service suite coverage. |
-| Full qmd real-world job sweep | `not_encoded` beyond targeted slice | Expand `qmd_live_real_world` beyond the representative targeted slice before claiming broad real-world suite parity. |
+| Full ELF live-service real-world sweep | `wrong_result` | XY-880 expanded `elf_live_real_world` to the full encoded suite corpus; the result is intentionally typed non-pass rather than a full-suite live pass. |
+| Full qmd real-world job sweep | `wrong_result` | XY-880 expanded `qmd_live_real_world` to the full encoded suite corpus; the result is intentionally typed non-pass rather than broad real-world suite parity. |
 | agentmemory durable lifecycle | `lifecycle_fail` / `blocked` | `[ELF benchmark P0] Make agentmemory adapter lifecycle-durable and fail-typed`. |
 | mem0/OpenMemory same-corpus and real-world coverage | `wrong_result` / `not_encoded` | Add/fix a local OSS adapter before claiming lifecycle, personalization, or OpenMemory UI parity. |
 | memsearch same-corpus and real-world coverage | `wrong_result` / `incomplete` | Fix Docker same-corpus retrieval/reindex evidence before scoring Markdown-first real-world jobs. |
-| OpenViking Docker local embedding path | `incomplete` | `[ELF benchmark adapter] Pin OpenViking Docker local embedding dependency path`. |
+| OpenViking Docker local embedding path | `wrong_result` | The pinned dependency path reaches `add_resource`/`find`; the remaining follow-up is evidence-bearing retrieval output, not setup. |
 | claude-mem durable/progressive-disclosure adapter | `wrong_result` / `not_encoded` | Add durable local repository and progressive-disclosure job coverage before UX parity claims. |
 | RAGFlow, LightRAG, GraphRAG, Graphiti/Zep, and graphify adapters | `research_gate` adapter candidates | Follow-up issues [XY-885](https://linear.app/hack-ink/issue/XY-885/elf-benchmark-adapter-implement-ragflow-docker-evidence-smoke-adapter), [XY-886](https://linear.app/hack-ink/issue/XY-886/elf-benchmark-adapter-implement-lightrag-docker-context-export-adapter), [XY-887](https://linear.app/hack-ink/issue/XY-887/elf-benchmark-adapter-implement-graphrag-cost-bounded-docker-adapter), [XY-888](https://linear.app/hack-ink/issue/XY-888/elf-benchmark-adapter-implement-graphitizep-temporal-graph-adapter), and [XY-889](https://linear.app/hack-ink/issue/XY-889/elf-benchmark-adapter-implement-graphify-docker-graph-report-adapter) must run only Docker-contained adapter smokes that emit evidence-linked outputs before any live result claim. |
 | Letta, LangGraph, nanograph, and llm-wiki adapters | `research_only` research gates | Keep as architecture or workflow references until a contained output contract is selected. |
diff --git a/docs/guide/benchmarking/live_baseline_benchmark.md b/docs/guide/benchmarking/live_baseline_benchmark.md
index d757b304..d1d08e6d 100644
--- a/docs/guide/benchmarking/live_baseline_benchmark.md
+++ b/docs/guide/benchmarking/live_baseline_benchmark.md
@@ -157,11 +157,18 @@ Current deeper checks:
   stress default is a bounded 60-second signal.
 
 OpenViking attempts the official `.[local-embed]` path plus `OpenViking.add_resource`
-and `OpenViking.find`. If the Docker platform cannot build or import
-`llama-cpp-python`, the project is recorded as `incomplete` with
+and `OpenViking.find`. The Docker runner first pins the local embedding dependency to
+`llama-cpp-python==0.3.28` from the official CPU wheel index
+`https://abetlen.github.io/llama-cpp-python/whl/cpu` and installs it with
+`--only-binary llama-cpp-python`. Override
+`ELF_BASELINE_OPENVIKING_LLAMA_CPP_PYTHON_VERSION` or
+`ELF_BASELINE_OPENVIKING_LLAMA_CPP_PYTHON_INDEX` only when the pinned wheel is
+unavailable for the Docker platform. If the pinned wheel cannot install or import, the
+project is recorded as `incomplete` with
 `retrieval_status = "local_embed_install_failed"` rather than as a retrieval failure.
-The adapter metadata includes retry guidance to pin or provide a Docker-compatible
-local embedding dependency before scaling the OpenViking profile.
+When the pinned dependency reaches `add_resource`/`find`, evidence misses are recorded
+as `wrong_result`/`retrieval_wrong_result`. This local dependency check is separate
+from provider-backed ELF/Qwen3 embedding evidence.
 
 ## Checked-In Reports
 
diff --git a/docs/guide/benchmarking/real_world_agent_memory_benchmark.md b/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
index 77277c5a..e4745d72 100644
--- a/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
+++ b/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
@@ -158,9 +158,9 @@ including the retrieval-quality slice below. The suite currently encodes:
 - `capture_integration`: write-policy audit behavior for redaction/private exclusion
   and fixture-backed capture/integration boundary classification.
 - `production_ops`: interrupted generated backfill resume, backup/restore plus
-  cold-start readback, resource-envelope interpretation, missing dependency
-  `incomplete` classification, missing private manifest `blocked` classification, and
-  provider credential boundary `blocked` classification.
+  cold-start readback, resource-envelope interpretation, pinned OpenViking local
+  embedding runtime/wrong-result classification, missing private manifest `blocked`
+  classification, and provider credential boundary `blocked` classification.
 - `personalization`: scoped stable preference correction without temporary or
   cross-project preference leakage.
 
@@ -170,7 +170,7 @@ count, update rationale availability, temporal validity encoding count, scope
 correctness, redaction leak count, capture/integration behavior classes, Qdrant
 rebuild case/pass counts, expected evidence recall, irrelevant context ratio,
 latency/cost, answer-type plus caveat/refusal/uncertainty flags, and trace
-explainability counters, production-ops blocked/incomplete job states, and
+explainability counters, production-ops blocked/wrong-result job states, and
 private-corpus redaction policy. The fixtures include negative traps for stale
 blockers, unsupported prior claims, stale deleted facts, stale historical facts,
 cross-project preference leakage, private/redacted text leakage, obsolete retrieval
@@ -232,8 +232,9 @@ remain `not_encoded` for this live adapter path. qmd still also keeps its separa
 record is not a real-world suite win. agentmemory is blocked on durable upstream
 storage for lifecycle proof. mem0/OpenMemory, memsearch, and claude-mem currently
 retain wrong-result or incomplete live-baseline states for the checked-in adapter
-evidence. OpenViking is incomplete until its local embedding setup is reliable inside
-Docker. The expanded RAG and graph-memory records for RAGFlow, LightRAG, GraphRAG,
+evidence. OpenViking now reaches its pinned Docker local embedding setup but remains a
+same-corpus `wrong_result` until it returns evidence-bearing retrieval output. The
+expanded RAG and graph-memory records for RAGFlow, LightRAG, GraphRAG,
 Graphiti/Zep, Letta, LangGraph, nanograph, llm-wiki, gbrain, graphify, and deeper
 qmd/OpenViking profiles are `research_gate` records until their Docker-isolated
 adapter runs are implemented. These typed states describe benchmark coverage; do not
@@ -392,8 +393,12 @@ interpretation.
 
 The same slice deliberately keeps non-pass boundaries typed. A missing private
 production manifest is `blocked`, unavailable provider credentials are `blocked`, and
-a cold-start adapter dependency failure is `incomplete`. These states are evidence for
-operator caveats, not proof of private-corpus or provider-backed production success.
+the OpenViking cold-start dependency fixture now records a pinned Docker-local
+embedding path that reaches `OpenViking.add_resource` and `OpenViking.find` but returns
+`wrong_result` evidence for the smoke queries. If the pinned wheel cannot install or
+import on a Docker platform, that setup boundary remains `incomplete`. These states
+are evidence for operator caveats, not proof of private-corpus, provider-backed
+production, or external-adapter quality success.
 
 This suite does not run private corpus data, does not require or publish credentials,
 does not perform live Docker restore/backfill work, and does not reinterpret older
diff --git a/docs/guide/research/comparison_external_projects.md b/docs/guide/research/comparison_external_projects.md
index a13f33e5..f969544c 100644
--- a/docs/guide/research/comparison_external_projects.md
+++ b/docs/guide/research/comparison_external_projects.md
@@ -102,7 +102,7 @@ Project-to-suite map:
 | claude-mem | `rw.operator-continuity`, `rw.resume-evidence`, `rw.retrieval-debug` | Progressive-disclosure search, auto-capture hooks, local viewer, and observation/timeline workflows are directly aligned with real agent resumption jobs. | Exercise a real local repository with hook-driven capture, then evaluate `search -> timeline -> observations` behavior after restart; do not rely on mocked storage. | Docs-grounded for progressive disclosure/viewer; current benchmark adapter evidence is incomplete/wrong-result and mostly not encoded for lifecycle. Confidence: medium for product reference, low for current adapter claims. | ELF has stronger provenance and service boundaries, but claude-mem remains a reference for operator workflow and progressive disclosure UX. |
 | mem0 / OpenMemory | `rw.lifecycle-staleness`, `rw.graph-temporal`, `rw.operator-continuity`, `rw.resume-evidence` | Entity-scoped memory, memory history, expiration, hosted/OSS surfaces, OpenMemory UI, and optional graph memory make it the broadest lifecycle and ecosystem comparison target. | Separate OSS local FastEmbed/Qdrant evidence from hosted Platform claims; prove add/update/delete/history, entity-scoped retrieval, expiration exclusion, OpenMemory UI readback, and optional graph context on the same corpus. | Docs-grounded for lifecycle/entity/graph/UI claims; current local adapter is incomplete/wrong-result for same-corpus retrieval and delete remains not encoded. Confidence: medium for suite fit, low for current adapter quality. | ELF is stronger on deterministic evidence-bound writes; mem0/OpenMemory is the reference for ecosystem reach, entity-scoped history, hosted option, and optional graph UX. |
 | memsearch | `rw.lifecycle-staleness`, `rw.retrieval-debug`, `rw.resume-evidence` | Markdown as canonical memory plus incremental/content-addressed reindexing is a useful model for source transparency and rebuildable derived indexes. | Index a real-world Markdown corpus, mutate/delete files, rerun index/search from fresh processes, and record Milvus mode so Lite/Server/Cloud behavior is not conflated. | Docs-grounded for architecture; current adapter is incomplete/invalid-result, so no pass/fail quality claim is allowed. Confidence: medium for design pattern, low for current adapter evidence. | ELF already owns source-of-truth plus rebuildable index at service level; memsearch remains a reference for simple local canonical-store ergonomics. |
-| OpenViking | `rw.context-trajectory`, `rw.resume-evidence`, `rw.retrieval-debug` | `viking://` context organization, intent analysis, hierarchical retrieval, staged find/search behavior, and session compression are relevant to multi-hop agent context jobs. | Pin or provide a Docker-compatible local embedding path, then evaluate `add_resource`/`find`/`search` over multi-stage jobs with stage output, hierarchy, and session memory evidence. | Docs-grounded for mechanism; current benchmark adapter is incomplete due local embedding install failure. Confidence: medium for architecture reference, low for runnable adapter quality. | ELF has first-class traces and evidence-bound notes, but OpenViking is the reference for hierarchical context trajectory and filesystem-like organization. |
+| OpenViking | `rw.context-trajectory`, `rw.resume-evidence`, `rw.retrieval-debug` | `viking://` context organization, intent analysis, hierarchical retrieval, staged find/search behavior, and session compression are relevant to multi-hop agent context jobs. | Use the pinned Docker local embedding path, then evaluate `add_resource`/`find`/`search` over multi-stage jobs with stage output, hierarchy, and session memory evidence. | Docs-grounded for mechanism; current benchmark adapter reaches local embedding setup and `add_resource`/`find`, but remains `wrong_result` because same-corpus evidence terms are missed. Confidence: medium for architecture reference, low for runnable adapter quality. | ELF has first-class traces and evidence-bound notes, but OpenViking is the reference for hierarchical context trajectory and filesystem-like organization. |
 | llm-wiki | `rw.knowledge-synthesis`, `rw.resume-evidence` | Query/save/lint flows and topic-scoped wiki pages are a useful reference for turning retrieved memory into maintained project knowledge. | Run a corpus-to-wiki job, ask resume/decision questions, require page citations back to source memory, then mutate a stale source and prove lint/repair catches it. | Docs-grounded D1; no benchmark adapter evidence. Confidence: medium for derived-knowledge fit. | ELF is not yet stronger on derived knowledge pages; llm-wiki should inform rebuildable, evidence-cited dossiers rather than core storage. |
 | gbrain | `rw.knowledge-synthesis`, `rw.operator-continuity` | `compiled_truth`, timeline sections, backlinks, primary-home routing, and enrichment workflows model a living operational brain for project work. | Build or update pages from the real-world corpus, require current-truth plus timeline answers, and prove enrichment/backlink maintenance does not hide unsupported claims. | Docs-grounded D1; no benchmark adapter evidence. Confidence: medium for operator knowledge UX. | ELF should keep source notes authoritative; gbrain is a reference for presentation, enrichment, and maintenance loops. |
 | Always-On Memory Agent | `rw.consolidation-review`, `rw.operator-continuity` | The file/API/dashboard ingest loop and timer-based consolidation show how background memory formation becomes a user-visible product surface. | Run scheduled consolidation on a fixed corpus, record source rows and output insights, then score whether consolidation is reviewable, repeatable, and bounded against unsupported claims. | Docs-grounded D1; no benchmark adapter evidence. Confidence: medium for consolidation workflow reference. | ELF should borrow scheduling and operator controls while keeping deterministic writes and reviewable derived outputs. |
diff --git a/docs/guide/research/external_memory_improvement_plan.md b/docs/guide/research/external_memory_improvement_plan.md
index 2e2e53a8..6ad45be2 100644
--- a/docs/guide/research/external_memory_improvement_plan.md
+++ b/docs/guide/research/external_memory_improvement_plan.md
@@ -33,7 +33,10 @@ Current encoded result:
 - ELF and qmd passed every encoded smoke check.
 - agentmemory passed same-corpus retrieval but failed or could not complete lifecycle checks.
 - mem0, memsearch, and claude-mem returned wrong same-corpus retrieval results in the encoded smoke.
-- OpenViking was incomplete because its local embedding dependency could not complete inside the Docker runner.
+- OpenViking was incomplete in the June 9 run because its local embedding dependency
+  could not complete inside the Docker runner. XY-881 later pinned the Docker path to
+  a CPU `llama-cpp-python` wheel and moved the current OpenViking state to
+  `wrong_result` when `add_resource`/`find` misses expected evidence terms.
 
 What this proves:
 
@@ -83,7 +86,7 @@ Use these terms in future benchmark reports and Linear issues:
 | `pass` | Encoded check completed and returned expected result. | ELF same-corpus retrieval and lifecycle checks pass. |
 | `wrong_result` | The system completed but returned an incorrect memory or missed the expected evidence. | mem0/memsearch/claude-mem smoke retrieval mismatch. |
 | `lifecycle_fail` | Retrieval may work, but update/delete/cold-start/persistence behavior is wrong or incomplete. | agentmemory adapter passing retrieval but not lifecycle. |
-| `incomplete` | The benchmark could not reach the behavioral check due to install/runtime/dependency failure. | OpenViking local embedding install failure in Docker. |
+| `incomplete` | The benchmark could not reach the behavioral check due to install/runtime/dependency failure. | A pinned local embedding wheel/import failure before OpenViking `add_resource`/`find`. |
 | `not_encoded` | Capability is not currently covered by the benchmark, so no pass/fail claim is allowed. | Viewer quality and batch backfill UX. |
 | `blocked` | A safe test cannot run without external credentials, manual setup, or a dependency outside the issue scope. | Private corpus evaluation before sanitized corpus exists. |
 
@@ -240,7 +243,9 @@ Implementation shape:
 Acceptance:
 
 - agentmemory adapter either passes durable lifecycle checks or is explicitly marked blocked with evidence.
-- OpenViking incomplete state records a pinned dependency failure and retry path.
+- OpenViking records a pinned Docker local embedding retry path; install/import
+  failure remains `incomplete`, while evidence misses after `add_resource`/`find`
+  are `wrong_result`.
 - qmd smoke pass remains covered and gains scale/stress profiles.
 - Real-world reports include adapter coverage counters before any external adapter is
   allowed to claim a real-world suite pass.
diff --git a/scripts/live-baseline-benchmark.sh b/scripts/live-baseline-benchmark.sh
index 63f62465..d6f96758 100755
--- a/scripts/live-baseline-benchmark.sh
+++ b/scripts/live-baseline-benchmark.sh
@@ -2431,23 +2431,28 @@ project_openviking() {
   local config_path="${REPORT_DIR}/${project}-ov.conf"
   local result_path="${REPORT_DIR}/${project}-search.json"
   local driver_path="${REPOS_DIR}/${project}/elf-live-baseline-openviking.py"
-  local local_embed_failure_pattern="llama-cpp-python|target specific option mismatch|failed-wheel-build-for-install|Failed building wheel|Failed to build llama-cpp-python|No module named 'llama_cpp'|Local embedding is enabled but 'llama-cpp-python' is not installed"
+  local constraints_path="${REPORT_DIR}/${project}-constraints.txt"
+  local llama_cpp_python_version="${ELF_BASELINE_OPENVIKING_LLAMA_CPP_PYTHON_VERSION:-0.3.28}"
+  local llama_cpp_python_index="${ELF_BASELINE_OPENVIKING_LLAMA_CPP_PYTHON_INDEX:-https://abetlen.github.io/llama-cpp-python/whl/cpu}"
+  local local_embed_failure_pattern="target specific option mismatch|failed-wheel-build-for-install|Failed building wheel for llama-cpp-python|Failed to build llama-cpp-python|Could not build wheels for llama-cpp-python|No module named 'llama_cpp'|Local embedding is enabled but 'llama-cpp-python' is not installed|No matching distribution found|Could not find a version that satisfies|not a supported wheel"
+  local local_embed_install_reason="OpenViking local-embed install failed in Docker for pinned llama-cpp-python==${llama_cpp_python_version} from the CPU wheel index, so same-corpus local retrieval could not be run"
+  local local_embed_command_summary="pip install -e .; openviking/ov --help; pip install llama-cpp-python==${llama_cpp_python_version} --extra-index-url ${llama_cpp_python_index} --only-binary llama-cpp-python; pip install -e .[local-embed]; OpenViking.add_resource/find"
   local head
   mkdir -p "${home}"
-  cat >"${REPORT_DIR}/${project}-adapter.json" <<'JSON'
+  cat >"${REPORT_DIR}/${project}-adapter.json" <<JSON
 {
   "schema": "elf.live_baseline.adapter_metadata/v1",
   "project": "OpenViking",
   "storage": {
-    "status": "incomplete",
-    "detail": "The adapter attempts OpenViking local storage, but Docker local-embed setup can fail before retrieval is reached."
+    "status": "real",
+    "detail": "The adapter uses OpenViking local storage after pinning the Docker local embedding dependency path."
   },
   "behaviors": {
     "same_corpus_retrieval": {
-      "status": "incomplete",
-      "surface": "OpenViking.add_resource and OpenViking.find after installing .[local-embed]",
-      "evidence": "The known Docker failure is llama-cpp-python build/import failure during local embedding setup.",
-      "retry": "Retry after pinning or providing a Docker-compatible llama-cpp-python/local embedding dependency."
+      "status": "real",
+      "surface": "OpenViking.add_resource and OpenViking.find after installing .[local-embed] with llama-cpp-python==${llama_cpp_python_version} from the CPU wheel index",
+      "evidence": "The Docker dependency boundary is the local llama-cpp-python wheel/import path, not provider-backed ELF embeddings. Once setup reaches add_resource/find, misses are reported as wrong_result.",
+      "retry": "Retry with ELF_BASELINE_PROJECTS=OpenViking cargo make baseline-live-docker; override ELF_BASELINE_OPENVIKING_LLAMA_CPP_PYTHON_VERSION or ELF_BASELINE_OPENVIKING_LLAMA_CPP_PYTHON_INDEX only when the pinned CPU wheel is unavailable for the Docker platform. Treat wheel install/import failures as incomplete, not wrong_result."
     },
     "update": {
       "status": "not_encoded",
@@ -2467,7 +2472,7 @@ project_openviking() {
     },
     "scale_stress_profile": {
       "status": "blocked",
-      "surface": "scale/stress is blocked until local-embed setup is reliable in Docker"
+      "surface": "scale/stress is blocked until smoke same-corpus retrieval returns evidence-bearing results"
     }
   }
 }
@@ -2661,18 +2666,22 @@ finally:
     client.close()
 PY
 
-  if ! run_cmd "${project}: install local embedding extras" 900 "${log_path}" \
-    "export HOME='${home}'; cd '${REPOS_DIR}/${project}' && .venv/bin/pip install -e '.[local-embed]'"; then
+  if ! run_cmd "${project}: install pinned local embedding extras" 900 "${log_path}" \
+    "export HOME='${home}'; cd '${REPOS_DIR}/${project}' && printf 'llama-cpp-python==${llama_cpp_python_version}\n' > '${constraints_path}' && .venv/bin/pip install --extra-index-url '${llama_cpp_python_index}' --only-binary llama-cpp-python -c '${constraints_path}' 'llama-cpp-python==${llama_cpp_python_version}' && .venv/bin/pip install --extra-index-url '${llama_cpp_python_index}' --only-binary llama-cpp-python -c '${constraints_path}' -e '.[local-embed]' && .venv/bin/python - <<'PY'
+import llama_cpp
+
+print('llama_cpp_import_ok', getattr(llama_cpp, '__version__', 'unknown'))
+PY"; then
     if rg -q "${local_embed_failure_pattern}" "${log_path}"; then
-      json_record "${project}" "${repo}" "${head}" "incomplete" "local_embed_install_failed" "OpenViking local-embed install failed in Docker while building llama-cpp-python for aarch64, so same-corpus local retrieval could not be run" "${project}.log" "pip install -e .; openviking/ov --help; pip install -e .[local-embed]"
+      json_record "${project}" "${repo}" "${head}" "incomplete" "local_embed_install_failed" "${local_embed_install_reason}" "${project}.log" "${local_embed_command_summary}"
       return
     fi
-    json_record "${project}" "${repo}" "${head}" "incomplete" "local_embed_install_failed" "OpenViking local-embed install failed in Docker, so same-corpus local retrieval could not be run" "${project}.log" "pip install -e .; openviking/ov --help; pip install -e .[local-embed]"
+    json_record "${project}" "${repo}" "${head}" "incomplete" "local_embed_install_failed" "${local_embed_install_reason}" "${project}.log" "${local_embed_command_summary}"
     return
   fi
 
   if rg -q "${local_embed_failure_pattern}" "${log_path}"; then
-    json_record "${project}" "${repo}" "${head}" "incomplete" "local_embed_install_failed" "OpenViking local-embed install returned success but the log contains llama-cpp-python build/import failure, so same-corpus local retrieval could not be run" "${project}.log" "pip install -e .; openviking/ov --help; pip install -e .[local-embed]"
+    json_record "${project}" "${repo}" "${head}" "incomplete" "local_embed_install_failed" "OpenViking pinned local-embed install returned success but the log contains llama-cpp-python wheel/import failure, so same-corpus local retrieval could not be run" "${project}.log" "${local_embed_command_summary}"
     return
   fi
 
@@ -2682,11 +2691,11 @@ PY
       jq '{check_summary, checks}' "${result_path}" >"${REPORT_DIR}/${project}-checks.json"
     fi
     if rg -q "${local_embed_failure_pattern}" "${log_path}"; then
-      json_record "${project}" "${repo}" "${head}" "incomplete" "local_embed_install_failed" "OpenViking local add_resource/find hit llama-cpp-python build/import failure, so same-corpus local retrieval could not be run" "${project}.log" "pip install -e .[local-embed]; OpenViking.add_resource/find"
+      json_record "${project}" "${repo}" "${head}" "incomplete" "local_embed_install_failed" "OpenViking local add_resource/find hit pinned llama-cpp-python wheel/import failure, so same-corpus local retrieval could not be run" "${project}.log" "${local_embed_command_summary}"
       return
     fi
     if [[ ! -s "${result_path}" ]] || ! jq -e . "${result_path}" >/dev/null 2>&1; then
-      json_record "${project}" "${repo}" "${head}" "incomplete" "retrieval_command_failed" "OpenViking local add_resource/find returned success but did not write a valid result JSON" "${project}.log" "pip install -e .[local-embed]; OpenViking.add_resource/find"
+      json_record "${project}" "${repo}" "${head}" "incomplete" "retrieval_command_failed" "OpenViking local add_resource/find returned success but did not write a valid result JSON" "${project}.log" "${local_embed_command_summary}"
       return
     fi
     if jq -e --argjson query_count "${QUERY_COUNT}" '
@@ -2701,19 +2710,19 @@ PY
       else
         retrieval_status="retrieval_wrong_result"
       fi
-      json_record "${project}" "${repo}" "${head}" "${typed_status}" "${retrieval_status}" "$(typed_status_reason "${project}" "${typed_status}")" "${project}.log" "pip install -e .[local-embed]; OpenViking.add_resource/find"
+      json_record "${project}" "${repo}" "${head}" "${typed_status}" "${retrieval_status}" "$(typed_status_reason "${project}" "${typed_status}")" "${project}.log" "${local_embed_command_summary}"
       return
     fi
-    json_record "${project}" "${repo}" "${head}" "incomplete" "invalid_json_result" "OpenViking local add_resource/find did not produce a valid benchmark result" "${project}.log" "pip install -e .[local-embed]; OpenViking.add_resource/find"
+    json_record "${project}" "${repo}" "${head}" "incomplete" "invalid_json_result" "OpenViking local add_resource/find did not produce a valid benchmark result" "${project}.log" "${local_embed_command_summary}"
     return
   fi
 
   if rg -q "${local_embed_failure_pattern}" "${log_path}"; then
-    json_record "${project}" "${repo}" "${head}" "incomplete" "local_embed_install_failed" "OpenViking local add_resource/find failed because llama-cpp-python was unavailable in Docker" "${project}.log" "pip install -e .[local-embed]; OpenViking.add_resource/find"
+    json_record "${project}" "${repo}" "${head}" "incomplete" "local_embed_install_failed" "OpenViking local add_resource/find failed because pinned llama-cpp-python was unavailable in Docker" "${project}.log" "${local_embed_command_summary}"
     return
   fi
 
-  json_record "${project}" "${repo}" "${head}" "incomplete" "retrieval_command_failed" "OpenViking local-embed installed, but same-corpus add_resource/find failed in Docker" "${project}.log" "pip install -e .[local-embed]; OpenViking.add_resource/find"
+  json_record "${project}" "${repo}" "${head}" "incomplete" "retrieval_command_failed" "OpenViking pinned local-embed installed, but same-corpus add_resource/find failed in Docker" "${project}.log" "${local_embed_command_summary}"
 }
 
 project_claude_mem() {