From 2707f7c344d7d5f26f73104b85c6f8c1f79e0b37 Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Wed, 10 Jun 2026 16:03:45 +0800
Subject: [PATCH] {"schema":"decodex/commit/1","summary":"Add expanded RAG and
 graph-memory adapter research gates","authority":"XY-834"}

---
 README.md                                     |   14 +-
 .../memory_projects_manifest.json             | 1010 +++++++++++++++++
 .../src/bin/real_world_job_benchmark.rs       |  120 +-
 .../tests/real_world_job_benchmark.rs         |   63 +-
 ...2026-06-10-real-world-comparison-report.md |   17 +-
 .../benchmarking/live_baseline_benchmark.md   |    4 +-
 .../real_world_agent_memory_benchmark.md      |   15 +-
 .../research/comparison_external_projects.md  |   23 +-
 .../external_memory_improvement_plan.md       |    2 +
 .../research/research_projects_inventory.md   |   10 +-
 .../real_world_agent_memory_benchmark_v1.md   |   39 +-
 11 files changed, 1273 insertions(+), 44 deletions(-)

diff --git a/README.md b/README.md
index 4fc5cf10..e306299d 100644
--- a/README.md
+++ b/README.md
@@ -152,6 +152,12 @@ with the production embedding provider path, `Qwen3-Embedding-8B`, and
   `retrieval`, and `project_decisions` jobs through
   `cargo make real-world-memory-live-adapters`. This does not imply full-suite
   live-service parity, broad adapter parity, or private-corpus production proof.
+- Expanded adapter-pack coverage after XY-834: the real-world external adapter
+  manifest now includes `research_gate` records for RAGFlow, LightRAG, GraphRAG,
+  Graphiti/Zep, Letta, LangGraph, nanograph, llm-wiki, gbrain, graphify, and deeper
+  qmd/OpenViking profiles. These records carry source/setup/runtime/resource/retry
+  metadata and typed `blocked`, `incomplete`, or `not_encoded` states; they are not
+  fixture-backed or live adapter pass evidence.
 - The benchmark runner and report publisher are checked in and Docker-isolated:
   `cargo make baseline-live-docker`, `cargo make baseline-backfill-docker`,
   `cargo make baseline-production-private-addendum`,
@@ -174,10 +180,10 @@ Detailed evidence and interpretation:
   [Real-World Agent Memory Benchmark v1](docs/spec/real_world_agent_memory_benchmark_v1.md).
   This contract defines job-level suites for agent work. `cargo make real-world-memory`
   now reports fixture-backed ELF evidence plus the external adapter coverage manifest
-  for ELF, qmd, agentmemory, mem0/OpenMemory, claude-mem, memsearch, and OpenViking.
-  The report still distinguishes fixture-backed and live-baseline-only evidence from
-  true live real-world adapter runs; only the targeted ELF and qmd live adapter slice
-  currently executes `real_world_job` prompts and scoring.
+  for the first memory-project set plus expanded RAG and graph-memory research gates.
+  The report still distinguishes fixture-backed, live-baseline-only, research-gate,
+  and true live real-world adapter evidence; only the targeted ELF and qmd live
+  adapter slice currently executes `real_world_job` prompts and scoring.
 
 Evidence-backed position after the June 10 real-world report:
 
diff --git a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
index 8b9f0f61..9ee1acb6 100644
--- a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
+++ b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
@@ -734,6 +734,1016 @@
       "notes": [
         "claude-mem remains a UX reference; current Docker evidence is not a real-world progressive-disclosure pass."
       ]
+    },
+    {
+      "adapter_id": "qmd_deep_profile_gate",
+      "project": "qmd",
+      "adapter_kind": "docker_cli_deep_profile_gate",
+      "evidence_class": "research_gate",
+      "docker_default": true,
+      "host_global_installs_required": false,
+      "overall_status": "not_encoded",
+      "setup": {
+        "status": "pass",
+        "evidence": "qmd already has a Docker CLI live-baseline adapter; this gate records the deeper profile extension before a separate scaled run is claimed.",
+        "command": "ELF_BASELINE_PROJECTS=qmd ELF_BASELINE_PROFILE=stress cargo make baseline-live-docker",
+        "artifact": "tmp/live-baseline/qmd.log"
+      },
+      "run": {
+        "status": "not_encoded",
+        "evidence": "No expanded qmd stress or real_world_job deep-profile artifact is checked in for this adapter-pack gate."
+      },
+      "result": {
+        "status": "not_encoded",
+        "evidence": "qmd deep retrieval-debug evidence remains a planned profile, not a new pass claim."
+      },
+      "capabilities": [
+        {
+          "capability": "stress_profile_retrieval_debug",
+          "status": "not_encoded",
+          "evidence": "The stress command path exists, but this adapter-pack gate has not published a deep qmd profile result."
+        },
+        {
+          "capability": "real_world_job_adapter",
+          "status": "not_encoded",
+          "evidence": "The qmd live real-world slice covers representative jobs only; expanded retrieval-debug suites need their own materialized adapter run."
+        },
+        {
+          "capability": "host_global_install_boundary",
+          "status": "unsupported",
+          "evidence": "Repository-supported qmd benchmark runs must stay inside docker-compose.baseline.yml and must not require host-global installs."
+        }
+      ],
+      "suites": [
+        {
+          "suite_id": "retrieval",
+          "status": "not_encoded",
+          "evidence": "A deeper stress retrieval-debug report is not checked in for this gate."
+        },
+        {
+          "suite_id": "operator_debugging_ux",
+          "status": "not_encoded",
+          "evidence": "qmd query planning and score readback are not yet scored as operator-debugging real_world_job outputs."
+        }
+      ],
+      "evidence": [
+        {
+          "kind": "source",
+          "ref": "https://github.com/tobi/qmd",
+          "status": "real"
+        },
+        {
+          "kind": "runner",
+          "ref": "scripts/live-baseline-benchmark.sh",
+          "status": "real"
+        }
+      ],
+      "execution_metadata": {
+        "sources": [
+          {
+            "label": "qmd repository",
+            "url": "https://github.com/tobi/qmd",
+            "evidence": "Official qmd source for local hybrid search, CLI setup, and query behavior."
+          }
+        ],
+        "setup_path": "Use the existing Docker baseline qmd install, collection add, update, embed, and query flow with scale or stress profiles.",
+        "runtime_boundary": "docker-compose.baseline.yml baseline-runner container with project files and caches inside Docker volumes.",
+        "resource_expectation": "CPU local embedding and rerank cost scale with corpus size; record elapsed time and qmd log artifacts before claims.",
+        "retry_guidance": [
+          "Run qmd stress profile in Docker and publish the artifact path.",
+          "Map qmd JSON output to retrieval-debug real_world_job scoring before suite claims."
+        ],
+        "research_depth": "D2 reviewed; deep profile not encoded"
+      },
+      "notes": [
+        "This gate deepens qmd planning without changing the existing qmd pass evidence from the smoke live baseline."
+      ]
+    },
+    {
+      "adapter_id": "openviking_deep_profile_gate",
+      "project": "OpenViking",
+      "adapter_kind": "docker_local_embed_deep_profile_gate",
+      "evidence_class": "research_gate",
+      "docker_default": true,
+      "host_global_installs_required": false,
+      "overall_status": "incomplete",
+      "setup": {
+        "status": "incomplete",
+        "evidence": "OpenViking deep-profile work is blocked at the same Docker local-embedding dependency boundary as the current live-baseline adapter.",
+        "command": "ELF_BASELINE_PROJECTS=OpenViking cargo make baseline-live-docker",
+        "artifact": "tmp/live-baseline/OpenViking.log"
+      },
+      "run": {
+        "status": "incomplete",
+        "evidence": "The adapter cannot fairly exercise hierarchical trajectory behavior until add_resource/find reaches execution in Docker."
+      },
+      "result": {
+        "status": "incomplete",
+        "evidence": "No OpenViking deep context-trajectory result is claimed from a setup-blocked run."
+      },
+      "capabilities": [
+        {
+          "capability": "docker_local_embed_setup",
+          "status": "incomplete",
+          "evidence": "The local embedding setup must be pinned before deep profile runs can execute."
+        },
+        {
+          "capability": "hierarchical_context_trajectory",
+          "status": "not_encoded",
+          "evidence": "Stage trajectory scoring is not encoded until setup reaches runnable OpenViking APIs."
+        },
+        {
+          "capability": "host_global_install_boundary",
+          "status": "unsupported",
+          "evidence": "The adapter pack must not ask operators to install OpenViking dependencies globally on the host."
+        }
+      ],
+      "suites": [
+        {
+          "suite_id": "retrieval",
+          "status": "incomplete",
+          "evidence": "Same-corpus retrieval setup remains incomplete in Docker."
+        },
+        {
+          "suite_id": "work_resume",
+          "status": "not_encoded",
+          "evidence": "No OpenViking resume or context trajectory real_world_job run is encoded."
+        },
+        {
+          "suite_id": "operator_debugging_ux",
+          "status": "not_encoded",
+          "evidence": "Trajectory readback is a reference feature but not a scored adapter output."
+        }
+      ],
+      "evidence": [
+        {
+          "kind": "source",
+          "ref": "https://github.com/volcengine/OpenViking/",
+          "status": "real"
+        },
+        {
+          "kind": "runner",
+          "ref": "scripts/live-baseline-benchmark.sh",
+          "status": "incomplete"
+        }
+      ],
+      "execution_metadata": {
+        "sources": [
+          {
+            "label": "OpenViking repository",
+            "url": "https://github.com/volcengine/OpenViking/",
+            "evidence": "Official source for OpenViking local context database, resource, and retrieval APIs."
+          }
+        ],
+        "setup_path": "Pin a Docker-compatible local embedding path, then run OpenViking add_resource/find before any deep profile scoring.",
+        "runtime_boundary": "docker-compose.baseline.yml baseline-runner container; no host model or compiler setup outside Docker.",
+        "resource_expectation": "Local embedding builds can be native-toolchain and model heavy; record build logs, model cache size, and elapsed time.",
+        "retry_guidance": [
+          "Pin or prebuild the local embedding dependency in the baseline image.",
+          "Only then add context-trajectory real_world_job scoring for hierarchical retrieval."
+        ],
+        "research_depth": "D2 reviewed; runtime setup incomplete"
+      },
+      "notes": [
+        "OpenViking remains a context-trajectory reference, but this gate prevents setup failure from becoming a quality judgment."
+      ]
+    },
+    {
+      "adapter_id": "ragflow_research_gate",
+      "project": "RAGFlow",
+      "adapter_kind": "research_gate",
+      "evidence_class": "research_gate",
+      "docker_default": true,
+      "host_global_installs_required": false,
+      "overall_status": "blocked",
+      "setup": {
+        "status": "blocked",
+        "evidence": "RAGFlow remains a large RAG system watch item; D1/D2 research must prove a Docker-safe corpus ingest and query path before adapter implementation."
+      },
+      "run": {
+        "status": "not_encoded",
+        "evidence": "No RAGFlow real_world_job or live-baseline adapter is encoded."
+      },
+      "result": {
+        "status": "blocked",
+        "evidence": "No quality result is claimed until deployability, resource envelope, and output mapping are researched."
+      },
+      "capabilities": [
+        {
+          "capability": "d1_d2_research_before_adapter",
+          "status": "blocked",
+          "evidence": "The inventory marks RAGFlow as D0 pending deep dive."
+        },
+        {
+          "capability": "docker_service_setup",
+          "status": "blocked",
+          "evidence": "The adapter must size the multi-service Docker setup and avoid host-global installs before running."
+        },
+        {
+          "capability": "real_world_job_adapter",
+          "status": "not_encoded",
+          "evidence": "No job prompt, answer, evidence, or trap mapping is implemented."
+        }
+      ],
+      "suites": [
+        {
+          "suite_id": "retrieval",
+          "status": "blocked",
+          "evidence": "Corpus ingestion, query output, and evidence citation mapping need D1/D2 research."
+        },
+        {
+          "suite_id": "knowledge_compilation",
+          "status": "not_encoded",
+          "evidence": "RAGFlow knowledge output is not mapped to real_world_job page or citation scoring."
+        },
+        {
+          "suite_id": "production_ops",
+          "status": "blocked",
+          "evidence": "Resource envelope and service startup retry guidance must be documented first."
+        }
+      ],
+      "evidence": [
+        {
+          "kind": "source",
+          "ref": "https://github.com/infiniflow/ragflow",
+          "status": "real"
+        },
+        {
+          "kind": "source",
+          "ref": "https://ragflow.io/docs/",
+          "status": "real"
+        }
+      ],
+      "execution_metadata": {
+        "sources": [
+          {
+            "label": "RAGFlow repository",
+            "url": "https://github.com/infiniflow/ragflow",
+            "evidence": "Official source for RAGFlow service code and Docker Compose setup."
+          },
+          {
+            "label": "RAGFlow docs",
+            "url": "https://ragflow.io/docs/",
+            "evidence": "Official deployment and setup documentation."
+          }
+        ],
+        "setup_path": "Research the official Docker deployment, corpus ingest API, query API, and artifact export before adding a runner.",
+        "runtime_boundary": "Future runs must use docker-compose.baseline.yml or a nested Docker-isolated service profile without host-global installs.",
+        "resource_expectation": "Large multi-service RAG stack; record CPU/GPU mode, memory, disk, startup time, and provider credential needs before scoring.",
+        "retry_guidance": [
+          "Complete a D1/D2 setup and API deep dive.",
+          "Prototype a tiny Docker smoke that reaches ingest and query before adding quality checks."
+        ],
+        "research_depth": "D0 watch item; D1/D2 required"
+      },
+      "follow_up": {
+        "title": "[ELF benchmark adapter] Research RAGFlow Docker adapter feasibility",
+        "reason": "The project is too large to score fairly without setup, resource, and API mapping research."
+      }
+    },
+    {
+      "adapter_id": "lightrag_research_gate",
+      "project": "LightRAG",
+      "adapter_kind": "research_gate",
+      "evidence_class": "research_gate",
+      "docker_default": true,
+      "host_global_installs_required": false,
+      "overall_status": "blocked",
+      "setup": {
+        "status": "blocked",
+        "evidence": "LightRAG requires D1/D2 research on Docker setup, LLM/embedding configuration, persistence, and context output before adapter implementation."
+      },
+      "run": {
+        "status": "not_encoded",
+        "evidence": "No LightRAG real_world_job adapter is encoded."
+      },
+      "result": {
+        "status": "blocked",
+        "evidence": "No graph-RAG quality claim is allowed until a Docker-safe adapter reaches query output."
+      },
+      "capabilities": [
+        {
+          "capability": "graph_augmented_rag_setup",
+          "status": "blocked",
+          "evidence": "The inventory marks LightRAG as D0 pending deep dive."
+        },
+        {
+          "capability": "retrieved_context_export",
+          "status": "blocked",
+          "evidence": "The adapter must prove it can extract evidence-bearing retrieved contexts for scoring."
+        },
+        {
+          "capability": "real_world_job_adapter",
+          "status": "not_encoded",
+          "evidence": "No LightRAG fixture materializer or scorer mapping exists."
+        }
+      ],
+      "suites": [
+        {
+          "suite_id": "retrieval",
+          "status": "blocked",
+          "evidence": "Graph/vector retrieval output mapping needs research."
+        },
+        {
+          "suite_id": "memory_evolution",
+          "status": "blocked",
+          "evidence": "Stale/corrected fact update behavior is not yet audited."
+        },
+        {
+          "suite_id": "operator_debugging_ux",
+          "status": "not_encoded",
+          "evidence": "Trace or context-debug output is not mapped to benchmark scoring."
+        }
+      ],
+      "evidence": [
+        {
+          "kind": "source",
+          "ref": "https://github.com/HKUDS/LightRAG",
+          "status": "real"
+        },
+        {
+          "kind": "source",
+          "ref": "https://github.com/HKUDS/LightRAG/blob/main/docs/DockerDeployment.md",
+          "status": "real"
+        }
+      ],
+      "execution_metadata": {
+        "sources": [
+          {
+            "label": "LightRAG repository",
+            "url": "https://github.com/HKUDS/LightRAG",
+            "evidence": "Official source for LightRAG server, Docker, and retrieval modes."
+          },
+          {
+            "label": "LightRAG Docker docs",
+            "url": "https://github.com/HKUDS/LightRAG/blob/main/docs/DockerDeployment.md",
+            "evidence": "Official Docker deployment reference."
+          }
+        ],
+        "setup_path": "Research Docker Compose with explicit LLM, embedding, rerank, and storage configuration before adding a benchmark runner.",
+        "runtime_boundary": "Docker-only service profile with generated corpus mounted as container-local input.",
+        "resource_expectation": "Graph extraction and local model choices may dominate runtime; record backend choices, cache sizes, and provider needs.",
+        "retry_guidance": [
+          "Run a tiny Docker ingest/query smoke with deterministic or local providers.",
+          "Verify returned contexts can be mapped to required evidence IDs."
+        ],
+        "research_depth": "D0 watch item; D1/D2 required"
+      },
+      "follow_up": {
+        "title": "[ELF benchmark adapter] Research LightRAG graph-RAG adapter feasibility",
+        "reason": "Graph extraction, persistence, and context output must be understood before fair scoring."
+      }
+    },
+    {
+      "adapter_id": "graphrag_research_gate",
+      "project": "GraphRAG",
+      "adapter_kind": "research_gate",
+      "evidence_class": "research_gate",
+      "docker_default": true,
+      "host_global_installs_required": false,
+      "overall_status": "blocked",
+      "setup": {
+        "status": "blocked",
+        "evidence": "GraphRAG indexing cost and source-citation mapping require D1/D2 research before adapter implementation."
+      },
+      "run": {
+        "status": "not_encoded",
+        "evidence": "No GraphRAG real_world_job adapter is encoded."
+      },
+      "result": {
+        "status": "blocked",
+        "evidence": "No graph-navigation or knowledge-synthesis result is claimed from docs-only research."
+      },
+      "capabilities": [
+        {
+          "capability": "indexing_resource_envelope",
+          "status": "blocked",
+          "evidence": "Official docs warn that indexing can be expensive; the benchmark must start small and record costs."
+        },
+        {
+          "capability": "source_citation_mapping",
+          "status": "blocked",
+          "evidence": "The adapter must map graph summaries and query output back to benchmark evidence IDs."
+        },
+        {
+          "capability": "real_world_job_adapter",
+          "status": "not_encoded",
+          "evidence": "No GraphRAG materializer or scorer mapping exists."
+        }
+      ],
+      "suites": [
+        {
+          "suite_id": "knowledge_compilation",
+          "status": "blocked",
+          "evidence": "Community summaries and graph reports need source coverage checks before scoring."
+        },
+        {
+          "suite_id": "retrieval",
+          "status": "blocked",
+          "evidence": "Query output and expected-evidence mapping are not researched."
+        },
+        {
+          "suite_id": "production_ops",
+          "status": "blocked",
+          "evidence": "Indexing resource envelope is not established."
+        }
+      ],
+      "evidence": [
+        {
+          "kind": "source",
+          "ref": "https://github.com/microsoft/graphrag",
+          "status": "real"
+        },
+        {
+          "kind": "source",
+          "ref": "https://microsoft.github.io/graphrag/",
+          "status": "real"
+        }
+      ],
+      "execution_metadata": {
+        "sources": [
+          {
+            "label": "GraphRAG repository",
+            "url": "https://github.com/microsoft/graphrag",
+            "evidence": "Official Microsoft GraphRAG source and setup reference."
+          },
+          {
+            "label": "GraphRAG docs",
+            "url": "https://microsoft.github.io/graphrag/",
+            "evidence": "Official documentation for indexing and querying."
+          }
+        ],
+        "setup_path": "Research a tiny CLI index/query path with explicit model configuration and source mapping.",
+        "runtime_boundary": "Docker-only Python CLI run with generated corpus and container-local artifacts.",
+        "resource_expectation": "Indexing may be expensive; record model calls, cache size, elapsed time, and maximum corpus size used.",
+        "retry_guidance": [
+          "Complete D1/D2 indexing and query-output research.",
+          "Add a cost-bounded smoke before any scale or quality claim."
+        ],
+        "research_depth": "D0 watch item; D1/D2 required"
+      },
+      "follow_up": {
+        "title": "[ELF benchmark adapter] Research GraphRAG cost-bounded adapter path",
+        "reason": "Indexing cost, graph summaries, and citation guarantees need proof before scoring."
+      }
+    },
+    {
+      "adapter_id": "graphiti_zep_research_gate",
+      "project": "Graphiti/Zep",
+      "adapter_kind": "research_gate",
+      "evidence_class": "research_gate",
+      "docker_default": true,
+      "host_global_installs_required": false,
+      "overall_status": "not_encoded",
+      "setup": {
+        "status": "not_encoded",
+        "evidence": "Graphiti/Zep is D1 reviewed as a temporal graph-memory reference, but no Docker adapter is implemented."
+      },
+      "run": {
+        "status": "not_encoded",
+        "evidence": "No temporal graph fact add/query job is encoded."
+      },
+      "result": {
+        "status": "not_encoded",
+        "evidence": "No current-versus-historical real_world_job pass is claimed."
+      },
+      "capabilities": [
+        {
+          "capability": "temporal_graph_memory",
+          "status": "not_encoded",
+          "evidence": "Temporal fact validity is a reference dimension but not an executable adapter output."
+        },
+        {
+          "capability": "docker_graph_store_setup",
+          "status": "blocked",
+          "evidence": "A safe local graph store, embedding, and LLM configuration must be documented before execution."
+        },
+        {
+          "capability": "real_world_job_adapter",
+          "status": "not_encoded",
+          "evidence": "No Graphiti/Zep materializer or scorer mapping exists."
+        }
+      ],
+      "suites": [
+        {
+          "suite_id": "memory_evolution",
+          "status": "not_encoded",
+          "evidence": "Current/historical fact validity jobs are not encoded for Graphiti/Zep."
+        },
+        {
+          "suite_id": "retrieval",
+          "status": "not_encoded",
+          "evidence": "Hybrid graph retrieval output is not mapped to evidence IDs."
+        }
+      ],
+      "evidence": [
+        {
+          "kind": "source",
+          "ref": "https://github.com/getzep/graphiti",
+          "status": "real"
+        },
+        {
+          "kind": "source",
+          "ref": "https://www.getzep.com/platform/graphiti/",
+          "status": "real"
+        }
+      ],
+      "execution_metadata": {
+        "sources": [
+          {
+            "label": "Graphiti repository",
+            "url": "https://github.com/getzep/graphiti",
+            "evidence": "Official open-source temporal context graph engine."
+          },
+          {
+            "label": "Zep Graphiti overview",
+            "url": "https://www.getzep.com/platform/graphiti/",
+            "evidence": "Official product documentation for temporal context graph behavior."
+          }
+        ],
+        "setup_path": "Define a Docker-local graph store and provider configuration, then encode add/query current-versus-historical fact jobs.",
+        "runtime_boundary": "Docker-only service or SDK run with graph store state under benchmark artifacts.",
+        "resource_expectation": "Requires graph store plus LLM/embedding configuration; record service startup, storage size, and provider boundaries.",
+        "retry_guidance": [
+          "Prototype a tiny temporal fact add/query run.",
+          "Map valid_at/invalid_at evidence to memory_evolution scoring."
+        ],
+        "research_depth": "D1 reviewed; adapter not encoded"
+      }
+    },
+    {
+      "adapter_id": "letta_research_gate",
+      "project": "Letta",
+      "adapter_kind": "research_gate",
+      "evidence_class": "research_gate",
+      "docker_default": true,
+      "host_global_installs_required": false,
+      "overall_status": "not_encoded",
+      "setup": {
+        "status": "not_encoded",
+        "evidence": "Letta is D1 reviewed as a core/archival memory reference, but no Docker real_world_job adapter is implemented."
+      },
+      "run": {
+        "status": "not_encoded",
+        "evidence": "No Letta core block, archival memory, or shared-memory job is encoded."
+      },
+      "result": {
+        "status": "not_encoded",
+        "evidence": "No Letta personalization or project-decision suite result is claimed."
+      },
+      "capabilities": [
+        {
+          "capability": "core_archival_memory",
+          "status": "not_encoded",
+          "evidence": "Core blocks and archival memory are reference semantics but not scored."
+        },
+        {
+          "capability": "docker_embedding_configuration",
+          "status": "blocked",
+          "evidence": "Docker setup requires explicit embedding configuration before archival retrieval can be tested."
+        },
+        {
+          "capability": "real_world_job_adapter",
+          "status": "not_encoded",
+          "evidence": "No Letta materializer or scorer mapping exists."
+        }
+      ],
+      "suites": [
+        {
+          "suite_id": "personalization",
+          "status": "not_encoded",
+          "evidence": "Core memory preference application is not encoded for Letta."
+        },
+        {
+          "suite_id": "project_decisions",
+          "status": "not_encoded",
+          "evidence": "Archival memory decision retrieval is not encoded for Letta."
+        },
+        {
+          "suite_id": "work_resume",
+          "status": "not_encoded",
+          "evidence": "Agent resumption through Letta memory blocks is not encoded."
+        }
+      ],
+      "evidence": [
+        {
+          "kind": "source",
+          "ref": "https://github.com/letta-ai/letta",
+          "status": "real"
+        },
+        {
+          "kind": "source",
+          "ref": "https://docs.letta.com/guides/docker/",
+          "status": "real"
+        }
+      ],
+      "execution_metadata": {
+        "sources": [
+          {
+            "label": "Letta repository",
+            "url": "https://github.com/letta-ai/letta",
+            "evidence": "Official source for Letta stateful agents and memory."
+          },
+          {
+            "label": "Letta Docker docs",
+            "url": "https://docs.letta.com/guides/docker/",
+            "evidence": "Official Docker deployment guide and embedding configuration boundary."
+          }
+        ],
+        "setup_path": "Define Docker server setup, embedding model configuration, and a core/archival memory fixture flow.",
+        "runtime_boundary": "Docker-only Letta server or CLI flow with benchmark-created agents and no host-global state.",
+        "resource_expectation": "Embedding model and agent server state must be explicit; record storage and provider boundaries.",
+        "retry_guidance": [
+          "Create a tiny Docker agent with archival memory search.",
+          "Score core-versus-archival retrieval only after source evidence can be exported."
+        ],
+        "research_depth": "D1 reviewed; adapter not encoded"
+      }
+    },
+    {
+      "adapter_id": "langgraph_research_gate",
+      "project": "LangGraph",
+      "adapter_kind": "research_gate",
+      "evidence_class": "research_gate",
+      "docker_default": true,
+      "host_global_installs_required": false,
+      "overall_status": "not_encoded",
+      "setup": {
+        "status": "not_encoded",
+        "evidence": "LangGraph is D1 reviewed as a replay/checkpoint reference, not a direct memory backend adapter."
+      },
+      "run": {
+        "status": "not_encoded",
+        "evidence": "No checkpoint replay real_world_job harness is encoded."
+      },
+      "result": {
+        "status": "not_encoded",
+        "evidence": "No production-ops or resume suite result is claimed."
+      },
+      "capabilities": [
+        {
+          "capability": "checkpoint_replay_regression",
+          "status": "not_encoded",
+          "evidence": "Replay/fork behavior needs an agent graph harness before scoring."
+        },
+        {
+          "capability": "standalone_memory_backend",
+          "status": "unsupported",
+          "evidence": "LangGraph persistence is an agent-state/checkpoint layer, not a drop-in memory retrieval backend."
+        },
+        {
+          "capability": "real_world_job_adapter",
+          "status": "not_encoded",
+          "evidence": "No LangGraph benchmark materializer exists."
+        }
+      ],
+      "suites": [
+        {
+          "suite_id": "production_ops",
+          "status": "not_encoded",
+          "evidence": "Checkpoint recovery and replay regression are not encoded."
+        },
+        {
+          "suite_id": "work_resume",
+          "status": "not_encoded",
+          "evidence": "Resume from checkpoint with memory reads is not encoded."
+        }
+      ],
+      "evidence": [
+        {
+          "kind": "source",
+          "ref": "https://docs.langchain.com/oss/python/langgraph/persistence",
+          "status": "real"
+        }
+      ],
+      "execution_metadata": {
+        "sources": [
+          {
+            "label": "LangGraph persistence docs",
+            "url": "https://docs.langchain.com/oss/python/langgraph/persistence",
+            "evidence": "Official documentation for checkpoints, replay, fork, and persistence behavior."
+          }
+        ],
+        "setup_path": "Build a tiny LangGraph agent with a checkpointer and explicit memory read/write steps before scoring.",
+        "runtime_boundary": "Docker-only Python harness with checkpoint store under the artifact directory.",
+        "resource_expectation": "Small runtime expected, but LLM calls and side effects must be stubbed or deterministic before replay claims.",
+        "retry_guidance": [
+          "Encode one replay/fork failure recovery job.",
+          "Keep LangGraph classified as replay reference unless memory retrieval is actually exercised."
+        ],
+        "research_depth": "D1 reviewed; adapter not encoded"
+      }
+    },
+    {
+      "adapter_id": "nanograph_research_gate",
+      "project": "nanograph",
+      "adapter_kind": "research_gate",
+      "evidence_class": "research_gate",
+      "docker_default": true,
+      "host_global_installs_required": false,
+      "overall_status": "not_encoded",
+      "setup": {
+        "status": "not_encoded",
+        "evidence": "nanograph is D1 reviewed as typed graph DX, but no Docker adapter is implemented."
+      },
+      "run": {
+        "status": "not_encoded",
+        "evidence": "No typed graph schema/query real_world_job run is encoded."
+      },
+      "result": {
+        "status": "not_encoded",
+        "evidence": "No graph temporal or retrieval-debug result is claimed."
+      },
+      "capabilities": [
+        {
+          "capability": "typed_graph_schema",
+          "status": "not_encoded",
+          "evidence": "Schema-as-code and typed query ergonomics need a benchmark harness."
+        },
+        {
+          "capability": "memory_backend_comparison",
+          "status": "unsupported",
+          "evidence": "nanograph is a graph database reference, not a complete agent memory service."
+        },
+        {
+          "capability": "real_world_job_adapter",
+          "status": "not_encoded",
+          "evidence": "No nanograph materializer exists."
+        }
+      ],
+      "suites": [
+        {
+          "suite_id": "memory_evolution",
+          "status": "not_encoded",
+          "evidence": "Typed current/historical fact jobs are not encoded."
+        },
+        {
+          "suite_id": "retrieval",
+          "status": "not_encoded",
+          "evidence": "Typed query explainability is not scored."
+        }
+      ],
+      "evidence": [
+        {
+          "kind": "source",
+          "ref": "https://github.com/nanograph/nanograph",
+          "status": "real"
+        }
+      ],
+      "execution_metadata": {
+        "sources": [
+          {
+            "label": "nanograph repository",
+            "url": "https://github.com/nanograph/nanograph",
+            "evidence": "Official source for on-device typed property graph behavior."
+          }
+        ],
+        "setup_path": "Build or install nanograph inside Docker and load a typed graph fixture from generated corpus facts.",
+        "runtime_boundary": "Docker-only CLI run with graph folder under benchmark artifacts.",
+        "resource_expectation": "Light local graph runtime expected; record binary build/install time and graph artifact size.",
+        "retry_guidance": [
+          "Define a minimal schema for memory_evolution facts.",
+          "Score typed query output only if it cites fixture evidence IDs."
+        ],
+        "research_depth": "D1 reviewed; adapter not encoded"
+      }
+    },
+    {
+      "adapter_id": "llm_wiki_research_gate",
+      "project": "llm-wiki",
+      "adapter_kind": "research_gate",
+      "evidence_class": "research_gate",
+      "docker_default": true,
+      "host_global_installs_required": false,
+      "overall_status": "not_encoded",
+      "setup": {
+        "status": "not_encoded",
+        "evidence": "llm-wiki is D1 reviewed as a knowledge-compilation reference, but no plugin or generated-page adapter is implemented."
+      },
+      "run": {
+        "status": "not_encoded",
+        "evidence": "No llm-wiki corpus-to-page run is encoded."
+      },
+      "result": {
+        "status": "not_encoded",
+        "evidence": "No knowledge page citation or lint result is claimed."
+      },
+      "capabilities": [
+        {
+          "capability": "knowledge_page_compilation",
+          "status": "not_encoded",
+          "evidence": "Wiki generation and citation lint are not executed by the runner."
+        },
+        {
+          "capability": "live_service_runtime",
+          "status": "unsupported",
+          "evidence": "llm-wiki is a plugin/workflow reference rather than a service adapter."
+        },
+        {
+          "capability": "real_world_job_adapter",
+          "status": "not_encoded",
+          "evidence": "No page materializer or scorer mapping exists."
+        }
+      ],
+      "suites": [
+        {
+          "suite_id": "knowledge_compilation",
+          "status": "not_encoded",
+          "evidence": "Corpus-to-wiki output is not encoded."
+        },
+        {
+          "suite_id": "work_resume",
+          "status": "not_encoded",
+          "evidence": "Resume answers from wiki pages are not encoded."
+        }
+      ],
+      "evidence": [
+        {
+          "kind": "source",
+          "ref": "https://github.com/nvk/llm-wiki",
+          "status": "real"
+        }
+      ],
+      "execution_metadata": {
+        "sources": [
+          {
+            "label": "llm-wiki repository",
+            "url": "https://github.com/nvk/llm-wiki",
+            "evidence": "Official source for the LLM Wiki plugin and knowledge-base workflow."
+          }
+        ],
+        "setup_path": "Research plugin bootstrap inside a Docker-contained Codex or file-based harness, then materialize page artifacts.",
+        "runtime_boundary": "Docker-only plugin or fixture materializer; no user-global Codex plugin install.",
+        "resource_expectation": "LLM generation cost depends on page build; record provider boundary and generated artifact size.",
+        "retry_guidance": [
+          "Prototype a fixture-only page build with explicit citations.",
+          "Do not score until generated sections can be mapped to evidence IDs."
+        ],
+        "research_depth": "D1 reviewed; adapter not encoded"
+      }
+    },
+    {
+      "adapter_id": "gbrain_research_gate",
+      "project": "gbrain",
+      "adapter_kind": "research_gate",
+      "evidence_class": "research_gate",
+      "docker_default": true,
+      "host_global_installs_required": false,
+      "overall_status": "not_encoded",
+      "setup": {
+        "status": "not_encoded",
+        "evidence": "gbrain is D1 reviewed as a compiled-truth and timeline reference, but no Docker adapter is implemented."
+      },
+      "run": {
+        "status": "not_encoded",
+        "evidence": "No gbrain brain-repo import or compiled-truth run is encoded."
+      },
+      "result": {
+        "status": "not_encoded",
+        "evidence": "No knowledge-synthesis or operator-continuity result is claimed."
+      },
+      "capabilities": [
+        {
+          "capability": "compiled_truth_timeline",
+          "status": "not_encoded",
+          "evidence": "Compiled truth plus timeline output is a reference pattern but not scored."
+        },
+        {
+          "capability": "postgres_backed_brain_repo",
+          "status": "blocked",
+          "evidence": "A Docker-local brain repo and Postgres setup path must be proven before execution."
+        },
+        {
+          "capability": "real_world_job_adapter",
+          "status": "not_encoded",
+          "evidence": "No gbrain materializer exists."
+        }
+      ],
+      "suites": [
+        {
+          "suite_id": "knowledge_compilation",
+          "status": "not_encoded",
+          "evidence": "Compiled truth and timeline pages are not scored."
+        },
+        {
+          "suite_id": "operator_debugging_ux",
+          "status": "not_encoded",
+          "evidence": "Operator continuity through brain pages is not encoded."
+        }
+      ],
+      "evidence": [
+        {
+          "kind": "source",
+          "ref": "https://github.com/garrytan/gbrain",
+          "status": "real"
+        },
+        {
+          "kind": "source",
+          "ref": "https://github.com/garrytan/gbrain/blob/master/docs/guides/compiled-truth.md",
+          "status": "real"
+        }
+      ],
+      "execution_metadata": {
+        "sources": [
+          {
+            "label": "gbrain repository",
+            "url": "https://github.com/garrytan/gbrain",
+            "evidence": "Official source for brain repo and retrieval workflow."
+          },
+          {
+            "label": "compiled truth guide",
+            "url": "https://github.com/garrytan/gbrain/blob/master/docs/guides/compiled-truth.md",
+            "evidence": "Official guide for compiled truth plus timeline behavior."
+          }
+        ],
+        "setup_path": "Create a Docker-local brain repo fixture, run import/sync, and export compiled truth plus timeline evidence.",
+        "runtime_boundary": "Docker-only repository and database state with no operator-owned brain repo.",
+        "resource_expectation": "Postgres-backed sync and embedding choices must be explicit; record DB size and import time.",
+        "retry_guidance": [
+          "Prototype a tiny brain repo with one current-truth page and timeline.",
+          "Score only if compiled truth cites the source timeline evidence."
+        ],
+        "research_depth": "D1 reviewed; adapter not encoded"
+      }
+    },
+    {
+      "adapter_id": "graphify_research_gate",
+      "project": "graphify",
+      "adapter_kind": "research_gate",
+      "evidence_class": "research_gate",
+      "docker_default": true,
+      "host_global_installs_required": false,
+      "overall_status": "not_encoded",
+      "setup": {
+        "status": "not_encoded",
+        "evidence": "graphify is D1 reviewed as a graph-navigation reference, but no Docker adapter is implemented."
+      },
+      "run": {
+        "status": "not_encoded",
+        "evidence": "No graphify graph/report build is encoded."
+      },
+      "result": {
+        "status": "not_encoded",
+        "evidence": "No graph-navigation or knowledge-compilation result is claimed."
+      },
+      "capabilities": [
+        {
+          "capability": "graph_report_generation",
+          "status": "not_encoded",
+          "evidence": "Graph reports and assistant query flows are not executed by the runner."
+        },
+        {
+          "capability": "multimodal_code_graph",
+          "status": "not_encoded",
+          "evidence": "Multimodal graph extraction is a reference capability but not scored."
+        },
+        {
+          "capability": "real_world_job_adapter",
+          "status": "not_encoded",
+          "evidence": "No graphify materializer exists."
+        }
+      ],
+      "suites": [
+        {
+          "suite_id": "knowledge_compilation",
+          "status": "not_encoded",
+          "evidence": "Graph report citation and lint behavior are not scored."
+        },
+        {
+          "suite_id": "retrieval",
+          "status": "not_encoded",
+          "evidence": "Graph-guided query output is not mapped to required evidence."
+        },
+        {
+          "suite_id": "work_resume",
+          "status": "not_encoded",
+          "evidence": "Resume answers from graph context are not encoded."
+        }
+      ],
+      "evidence": [
+        {
+          "kind": "source",
+          "ref": "https://github.com/safishamsi/graphify",
+          "status": "real"
+        }
+      ],
+      "execution_metadata": {
+        "sources": [
+          {
+            "label": "graphify repository",
+            "url": "https://github.com/safishamsi/graphify",
+            "evidence": "Official source for graphify graph extraction and query workflow."
+          }
+        ],
+        "setup_path": "Install graphify inside Docker, build a graph/report from a generated corpus, and export query evidence.",
+        "runtime_boundary": "Docker-only CLI or skill run over mounted benchmark corpus.",
+        "resource_expectation": "Graph build cost scales with corpus and model choices; record build time, graph size, and generated report size.",
+        "retry_guidance": [
+          "Start with a generated public code/document corpus.",
+          "Score graph-guided answers only when report nodes cite source evidence IDs."
+        ],
+        "research_depth": "D1 reviewed; adapter not encoded"
+      }
     }
   ]
 }
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark.rs b/apps/elf-eval/src/bin/real_world_job_benchmark.rs
index c80f749c..e987986b 100644
--- a/apps/elf-eval/src/bin/real_world_job_benchmark.rs
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark.rs
@@ -686,6 +686,8 @@ struct ExternalAdapterReport {
 	suites: Vec<AdapterSuiteCoverage>,
 	#[serde(default)]
 	evidence: Vec<AdapterEvidencePointer>,
+	#[serde(skip_serializing_if = "Option::is_none")]
+	execution_metadata: Option<AdapterExecutionMetadata>,
 	#[serde(default)]
 	notes: Vec<String>,
 	#[serde(skip_serializing_if = "Option::is_none")]
@@ -724,6 +726,26 @@ struct AdapterEvidencePointer {
 	status: AdapterCoverageStatus,
 }
 
+#[derive(Clone, Debug, Deserialize, Serialize)]
+struct AdapterExecutionMetadata {
+	#[serde(default)]
+	sources: Vec<AdapterSource>,
+	setup_path: String,
+	runtime_boundary: String,
+	resource_expectation: String,
+	#[serde(default)]
+	retry_guidance: Vec<String>,
+	#[serde(skip_serializing_if = "Option::is_none")]
+	research_depth: Option<String>,
+}
+
+#[derive(Clone, Debug, Deserialize, Serialize)]
+struct AdapterSource {
+	label: String,
+	url: String,
+	evidence: String,
+}
+
 #[derive(Clone, Debug, Default, Deserialize, Serialize)]
 struct ExternalAdapterSummary {
 	adapter_count: usize,
@@ -733,6 +755,8 @@ struct ExternalAdapterSummary {
 	fixture_backed_count: usize,
 	live_baseline_only_count: usize,
 	live_real_world_count: usize,
+	#[serde(default)]
+	research_gate_count: usize,
 	overall_status_counts: AdapterStatusCounts,
 	capability_status_counts: AdapterStatusCounts,
 	suite_status_counts: AdapterStatusCounts,
@@ -3719,7 +3743,7 @@ fn validate_external_adapter(path: &Path, adapter: &ExternalAdapterReport) -> Re
 	}
 	if !matches!(
 		adapter.evidence_class.as_str(),
-		"fixture_backed" | "live_baseline_only" | "live_real_world"
+		"fixture_backed" | "live_baseline_only" | "live_real_world" | "research_gate"
 	) {
 		return Err(eyre::eyre!(
 			"{} adapter {} has unsupported evidence_class {}.",
@@ -3740,6 +3764,7 @@ fn validate_external_adapter(path: &Path, adapter: &ExternalAdapterReport) -> Re
 	validate_adapter_capabilities(path, adapter)?;
 	validate_adapter_suites(path, adapter)?;
 	validate_adapter_evidence(path, adapter)?;
+	validate_adapter_execution_metadata(path, adapter)?;
 
 	if let Some(follow_up) = &adapter.follow_up
 		&& (follow_up.title.trim().is_empty() || follow_up.reason.trim().is_empty())
@@ -3822,6 +3847,40 @@ fn validate_adapter_evidence(path: &Path, adapter: &ExternalAdapterReport) -> Re
 	Ok(())
 }
 
+fn validate_adapter_execution_metadata(path: &Path, adapter: &ExternalAdapterReport) -> Result<()> {
+	let Some(metadata) = &adapter.execution_metadata else {
+		return Ok(());
+	};
+
+	if metadata.setup_path.trim().is_empty()
+		|| metadata.runtime_boundary.trim().is_empty()
+		|| metadata.resource_expectation.trim().is_empty()
+		|| metadata.retry_guidance.iter().any(|guidance| guidance.trim().is_empty())
+		|| metadata.sources.is_empty()
+	{
+		return Err(eyre::eyre!(
+			"{} adapter {} has incomplete execution metadata.",
+			path.display(),
+			adapter.adapter_id
+		));
+	}
+
+	for source in &metadata.sources {
+		if source.label.trim().is_empty()
+			|| source.url.trim().is_empty()
+			|| source.evidence.trim().is_empty()
+		{
+			return Err(eyre::eyre!(
+				"{} adapter {} has incomplete source metadata.",
+				path.display(),
+				adapter.adapter_id
+			));
+		}
+	}
+
+	Ok(())
+}
+
 fn external_adapter_summary(adapters: &[ExternalAdapterReport]) -> ExternalAdapterSummary {
 	let mut summary = ExternalAdapterSummary {
 		adapter_count: adapters.len(),
@@ -3846,6 +3905,7 @@ fn accumulate_adapter_summary(
 	summary.fixture_backed_count += usize::from(adapter.evidence_class == "fixture_backed");
 	summary.live_baseline_only_count += usize::from(adapter.evidence_class == "live_baseline_only");
 	summary.live_real_world_count += usize::from(adapter.evidence_class == "live_real_world");
+	summary.research_gate_count += usize::from(adapter.evidence_class == "research_gate");
 
 	increment_adapter_status_count(&mut summary.overall_status_counts, adapter.overall_status);
 
@@ -4013,10 +4073,11 @@ fn render_markdown_external_adapters(out: &mut String, report: &RealWorldReport)
 		summary.host_global_install_required_count
 	));
 	out.push_str(&format!(
-		"- Evidence classes: `{}` fixture-backed, `{}` live-baseline-only, `{}` live real-world\n",
+		"- Evidence classes: `{}` fixture-backed, `{}` live-baseline-only, `{}` live real-world, `{}` research-gate\n",
 		summary.fixture_backed_count,
 		summary.live_baseline_only_count,
-		summary.live_real_world_count
+		summary.live_real_world_count,
+		summary.research_gate_count
 	));
 	out.push_str(&format!(
 		"- Overall statuses: `{}`\n",
@@ -4065,9 +4126,43 @@ fn render_markdown_external_adapters(out: &mut String, report: &RealWorldReport)
 		}
 	}
 
+	render_markdown_adapter_execution_metadata(out, report.external_adapters.adapters.as_slice());
+
 	out.push('\n');
 }
 
+fn render_markdown_adapter_execution_metadata(
+	out: &mut String,
+	adapters: &[ExternalAdapterReport],
+) {
+	let mut wrote_header = false;
+
+	for adapter in adapters {
+		let Some(metadata) = &adapter.execution_metadata else {
+			continue;
+		};
+
+		if !wrote_header {
+			out.push_str("\n### Adapter Execution Metadata\n\n");
+			out.push_str("| Adapter | Sources | Setup Path | Runtime Boundary | Resource Expectation | Retry Guidance | Research Depth |\n");
+			out.push_str("| --- | --- | --- | --- | --- | --- | --- |\n");
+
+			wrote_header = true;
+		}
+
+		out.push_str(&format!(
+			"| `{}` | {} | {} | {} | {} | {} | {} |\n",
+			md_inline(adapter.adapter_id.as_str()),
+			adapter_sources_cell(metadata.sources.as_slice()),
+			md_cell(metadata.setup_path.as_str()),
+			md_cell(metadata.runtime_boundary.as_str()),
+			md_cell(metadata.resource_expectation.as_str()),
+			md_list(metadata.retry_guidance.as_slice()),
+			md_cell(metadata.research_depth.as_deref().unwrap_or("not recorded"))
+		));
+	}
+}
+
 fn render_markdown_header(out: &mut String, report: &RealWorldReport, report_path: &str) {
 	out.push_str("# Real-World Job Benchmark Report\n\n");
 	out.push_str(
@@ -4728,6 +4823,25 @@ fn adapter_evidence_cell(adapter: &ExternalAdapterReport) -> String {
 	format!("setup: `{}`<br>result: `{}`", md_inline(setup), md_inline(result))
 }
 
+fn adapter_sources_cell(sources: &[AdapterSource]) -> String {
+	if sources.is_empty() {
+		return "`none`".to_string();
+	}
+
+	sources
+		.iter()
+		.map(|source| {
+			format!(
+				"[{}]({}): {}",
+				md_cell(source.label.as_str()),
+				md_url(source.url.as_str()),
+				md_cell(source.evidence.as_str())
+			)
+		})
+		.collect::<Vec<_>>()
+		.join("<br>")
+}
+
 fn trace_failure_stage(trace: Option<&TraceExplainability>) -> Option<&str> {
 	trace.and_then(|trace| trace.failure_stage.as_deref())
 }
diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
index 1f9fb61b..45ac5b1f 100644
--- a/apps/elf-eval/tests/real_world_job_benchmark.rs
+++ b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -122,12 +122,16 @@ fn smoke_fixture_produces_typed_json_report() -> Result<()> {
 	assert_eq!(report.pointer("/summary/wrong_result_count").and_then(Value::as_u64), Some(0));
 	assert_eq!(
 		report.pointer("/external_adapters/summary/adapter_count").and_then(Value::as_u64),
-		Some(9)
+		Some(21)
 	);
 	assert_eq!(
 		report.pointer("/external_adapters/summary/live_real_world_count").and_then(Value::as_u64),
 		Some(2)
 	);
+	assert_eq!(
+		report.pointer("/external_adapters/summary/research_gate_count").and_then(Value::as_u64),
+		Some(12)
+	);
 
 	let jobs = array_at(&report, "/jobs")?;
 	let job = find_by_field(jobs, "/job_id", "work-resume-stale-worktree-001")?;
@@ -174,6 +178,13 @@ fn smoke_fixture_produces_typed_json_report() -> Result<()> {
 fn real_world_report_includes_external_adapter_coverage_manifest() -> Result<()> {
 	let report = run_json_report_from(real_world_memory_fixture_dir())?;
 
+	assert_external_adapter_manifest_summary(&report);
+	assert_external_adapter_manifest_records(&report)?;
+
+	Ok(())
+}
+
+fn assert_external_adapter_manifest_summary(report: &Value) {
 	assert_eq!(
 		report.pointer("/external_adapters/schema").and_then(Value::as_str),
 		Some("elf.real_world_external_adapter_report/v1")
@@ -194,11 +205,11 @@ fn real_world_report_includes_external_adapter_coverage_manifest() -> Result<()>
 	);
 	assert_eq!(
 		report.pointer("/external_adapters/summary/adapter_count").and_then(Value::as_u64),
-		Some(9)
+		Some(21)
 	);
 	assert_eq!(
 		report.pointer("/external_adapters/summary/external_project_count").and_then(Value::as_u64),
-		Some(7)
+		Some(19)
 	);
 	assert_eq!(
 		report.pointer("/external_adapters/summary/fixture_backed_count").and_then(Value::as_u64),
@@ -214,6 +225,10 @@ fn real_world_report_includes_external_adapter_coverage_manifest() -> Result<()>
 		report.pointer("/external_adapters/summary/live_real_world_count").and_then(Value::as_u64),
 		Some(2)
 	);
+	assert_eq!(
+		report.pointer("/external_adapters/summary/research_gate_count").and_then(Value::as_u64),
+		Some(12)
+	);
 	assert_eq!(
 		report
 			.pointer("/external_adapters/summary/overall_status_counts/pass")
@@ -236,7 +251,19 @@ fn real_world_report_includes_external_adapter_coverage_manifest() -> Result<()>
 		report
 			.pointer("/external_adapters/summary/overall_status_counts/incomplete")
 			.and_then(Value::as_u64),
-		Some(2)
+		Some(3)
+	);
+	assert_eq!(
+		report
+			.pointer("/external_adapters/summary/overall_status_counts/blocked")
+			.and_then(Value::as_u64),
+		Some(3)
+	);
+	assert_eq!(
+		report
+			.pointer("/external_adapters/summary/overall_status_counts/not_encoded")
+			.and_then(Value::as_u64),
+		Some(8)
 	);
 	assert_eq!(
 		report
@@ -244,20 +271,30 @@ fn real_world_report_includes_external_adapter_coverage_manifest() -> Result<()>
 			.and_then(Value::as_u64),
 		Some(2)
 	);
+	assert_eq!(
+		report
+			.pointer("/external_adapters/summary/capability_status_counts/unsupported")
+			.and_then(Value::as_u64),
+		Some(5)
+	);
 	assert_eq!(
 		report
 			.pointer("/external_adapters/summary/suite_status_counts/blocked")
 			.and_then(Value::as_u64),
-		Some(3)
+		Some(10)
 	);
+}
 
-	let adapters = array_at(&report, "/external_adapters/adapters")?;
+fn assert_external_adapter_manifest_records(report: &Value) -> Result<()> {
+	let adapters = array_at(report, "/external_adapters/adapters")?;
 	let elf = find_by_field(adapters, "/adapter_id", "elf_real_world_memory_fixture")?;
 	let elf_live = find_by_field(adapters, "/adapter_id", "elf_live_real_world")?;
 	let qmd = find_by_field(adapters, "/adapter_id", "qmd_live_baseline")?;
 	let qmd_live = find_by_field(adapters, "/adapter_id", "qmd_live_real_world")?;
 	let agentmemory = find_by_field(adapters, "/adapter_id", "agentmemory_live_baseline")?;
 	let openviking = find_by_field(adapters, "/adapter_id", "openviking_live_baseline")?;
+	let ragflow = find_by_field(adapters, "/adapter_id", "ragflow_research_gate")?;
+	let qmd_deep = find_by_field(adapters, "/adapter_id", "qmd_deep_profile_gate")?;
 
 	assert_eq!(elf.pointer("/evidence_class").and_then(Value::as_str), Some("fixture_backed"));
 	assert_eq!(elf.pointer("/overall_status").and_then(Value::as_str), Some("incomplete"));
@@ -280,6 +317,20 @@ fn real_world_report_includes_external_adapter_coverage_manifest() -> Result<()>
 		Some("mocked")
 	);
 	assert_eq!(openviking.pointer("/overall_status").and_then(Value::as_str), Some("incomplete"));
+	assert_eq!(ragflow.pointer("/evidence_class").and_then(Value::as_str), Some("research_gate"));
+	assert_eq!(ragflow.pointer("/overall_status").and_then(Value::as_str), Some("blocked"));
+	assert_eq!(
+		ragflow.pointer("/execution_metadata/research_depth").and_then(Value::as_str),
+		Some("D0 watch item; D1/D2 required")
+	);
+	assert_eq!(
+		ragflow.pointer("/execution_metadata/sources/0/url").and_then(Value::as_str),
+		Some("https://github.com/infiniflow/ragflow")
+	);
+	assert_eq!(
+		qmd_deep.pointer("/capabilities/2/status").and_then(Value::as_str),
+		Some("unsupported")
+	);
 
 	Ok(())
 }
diff --git a/docs/guide/benchmarking/2026-06-10-real-world-comparison-report.md b/docs/guide/benchmarking/2026-06-10-real-world-comparison-report.md
index e35aee54..490fecfb 100644
--- a/docs/guide/benchmarking/2026-06-10-real-world-comparison-report.md
+++ b/docs/guide/benchmarking/2026-06-10-real-world-comparison-report.md
@@ -100,7 +100,7 @@ Suite-level outcomes:
 
 The real-world runner loads
 `apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json`.
-That manifest is an evidence ledger, not a leaderboard. It keeps three evidence classes
+That manifest is an evidence ledger, not a leaderboard. It keeps four evidence classes
 separate:
 
 | Evidence class | Count | Meaning |
@@ -108,6 +108,7 @@ separate:
 | `fixture_backed` | 1 | ELF fixture scoring through checked-in real-world jobs. |
 | `live_baseline_only` | 6 | Docker same-corpus/lifecycle evidence from the live-baseline runner only. |
 | `live_real_world` | 2 | Targeted ELF and qmd adapters execute representative `real_world_job` prompts and scoring. |
+| `research_gate` | 12 | Source/setup/runtime/resource/retry metadata for future adapter paths; not fixture-backed or live execution evidence. |
 
 Adapter-level status after refreshing the manifest:
 
@@ -122,10 +123,16 @@ Adapter-level status after refreshing the manifest:
 | memsearch | `live_baseline_only` | `wrong_result` | Markdown-first design remains a source-of-truth ergonomics reference. | Same-corpus retrieval was not a clean pass and real-world suites are incomplete/not encoded. |
 | OpenViking | `live_baseline_only` | `incomplete` | Hierarchical context trajectory remains a reference direction. | Docker local-embedding setup must be pinned before fair retrieval or real-world jobs can run. |
 | claude-mem | `live_baseline_only` | `wrong_result` | Progressive disclosure and local viewer remain UX references. | Current Docker evidence is not a clean same-corpus pass and progressive disclosure jobs are not encoded. |
+| qmd deep profile | `research_gate` | `not_encoded` | The stress-profile command path and source metadata are recorded for a future deeper retrieval-debug run. | No expanded qmd stress artifact or broader real-world suite pass is checked in. |
+| OpenViking deep profile | `research_gate` | `incomplete` | The deeper context-trajectory gate inherits the current Docker local-embedding setup blocker. | No hierarchical trajectory suite result is claimed. |
+| RAGFlow, LightRAG, GraphRAG | `research_gate` | `blocked` | Official sources and setup/resource/retry expectations are recorded. | D1/D2 research, Docker runtime proof, and evidence-output mapping are required before adapter implementation. |
+| Graphiti/Zep, Letta, LangGraph, nanograph, llm-wiki, gbrain, graphify | `research_gate` | `not_encoded` | D1/D2-inspired adapter directions have source/setup/runtime/resource/retry metadata. | No Docker-isolated `real_world_job` adapter has run for these projects. |
 
-External summary counters: `9` adapter records, `7` external project records, `9` Docker-default,
-`0` host-global-install requirements, `2` live real-world adapters, `3` external
-wrong-result overall states, `1` lifecycle-fail state, and `1` external incomplete state.
+External summary counters: `21` adapter records, `19` non-ELF adapter records,
+`21` Docker-default, `0` host-global-install requirements, `2` live real-world
+adapters, and `12` research-gate records. Overall adapter statuses are `3` pass,
+`3` wrong_result, `1` lifecycle_fail, `3` incomplete, `3` blocked, and
+`8` not_encoded.
 
 ## Remaining Gaps
 
@@ -144,6 +151,8 @@ report:
 | memsearch same-corpus and real-world coverage | `wrong_result` / `incomplete` | Fix Docker same-corpus retrieval/reindex evidence before scoring Markdown-first real-world jobs. |
 | OpenViking Docker local embedding path | `incomplete` | `[ELF benchmark adapter] Pin OpenViking Docker local embedding dependency path`. |
 | claude-mem durable/progressive-disclosure adapter | `wrong_result` / `not_encoded` | Add durable local repository and progressive-disclosure job coverage before UX parity claims. |
+| RAGFlow, LightRAG, and GraphRAG adapter feasibility | `blocked` research gates | Run D1/D2 research on setup, resource envelope, corpus ingest, query output, source mapping, and Docker retry path before implementation. |
+| Graphiti/Zep, Letta, LangGraph, nanograph, llm-wiki, gbrain, and graphify adapters | `not_encoded` research gates | Implement only after a scoped Docker path can emit evidence-linked outputs for the relevant real-world suites. |
 
 ## Adoption Implications
 
diff --git a/docs/guide/benchmarking/live_baseline_benchmark.md b/docs/guide/benchmarking/live_baseline_benchmark.md
index e71ade85..3b6a1997 100644
--- a/docs/guide/benchmarking/live_baseline_benchmark.md
+++ b/docs/guide/benchmarking/live_baseline_benchmark.md
@@ -355,7 +355,9 @@ by default and records live-baseline-only external adapter evidence under
 `external_adapters`; those records preserve the typed setup/run evidence but still
 leave real-world suites as `not_encoded`, `blocked`, `incomplete`, `wrong_result`, or
 `lifecycle_fail` until an adapter actually executes `real_world_job` prompts and
-scoring.
+scoring. The same manifest can also contain `research_gate` records for future adapter
+packs; those records provide source/setup/runtime/resource/retry guidance but are not
+live-baseline evidence.
 
 The targeted live real-world adapter slice for ELF and qmd is separate from the
 same-corpus live baseline:
diff --git a/docs/guide/benchmarking/real_world_agent_memory_benchmark.md b/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
index d721a24d..61872397 100644
--- a/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
+++ b/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
@@ -208,9 +208,8 @@ The report also loads the checked-in external adapter coverage manifest by defau
 apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
 ```
 
-That manifest records the first memory-project set: ELF, qmd, agentmemory,
-mem0/OpenMemory, claude-mem, memsearch, and OpenViking. Its `external_adapters`
-report section distinguishes:
+That manifest records the first memory-project set plus expanded RAG and graph-memory
+research gates. Its `external_adapters` report section distinguishes:
 
 - `fixture_backed`: checked-in real-world fixture scoring, such as the ELF fixture
   response path.
@@ -218,6 +217,8 @@ report section distinguishes:
   a real-world suite win.
 - `live_real_world`: external adapters that actually execute `real_world_job`
   prompts and scoring.
+- `research_gate`: checked-in source/setup/runtime/resource/retry metadata for a
+  future adapter path, not fixture-backed or live execution evidence.
 
 Current state: the targeted `elf_live_real_world` and `qmd_live_real_world` adapter
 slice is encoded through `cargo make real-world-memory-live-adapters`. It materializes
@@ -228,8 +229,12 @@ record is not a real-world suite win. agentmemory is blocked on durable upstream
 storage for lifecycle proof. mem0/OpenMemory, memsearch, and claude-mem currently
 retain wrong-result or incomplete live-baseline states for the checked-in adapter
 evidence. OpenViking is incomplete until its local embedding setup is reliable inside
-Docker. These typed states describe benchmark coverage; do not treat them as broad
-project quality rankings.
+Docker. The expanded RAG and graph-memory records for RAGFlow, LightRAG, GraphRAG,
+Graphiti/Zep, Letta, LangGraph, nanograph, llm-wiki, gbrain, graphify, and deeper
+qmd/OpenViking profiles are `research_gate` records until their Docker-isolated
+adapter runs are implemented. These typed states describe benchmark coverage; do not
+convert setup weight, missing research, or unencoded suites into broad project quality
+rankings.
 
 To run the targeted live adapter slice for ELF and qmd:
 
diff --git a/docs/guide/research/comparison_external_projects.md b/docs/guide/research/comparison_external_projects.md
index a61030a6..8e549544 100644
--- a/docs/guide/research/comparison_external_projects.md
+++ b/docs/guide/research/comparison_external_projects.md
@@ -63,9 +63,13 @@ projects only have `live_baseline_only` Docker retrieval/lifecycle evidence, whi
 capabilities are `mocked`, `blocked`, `unsupported`, `incomplete`, `wrong_result`, or
 `lifecycle_fail`, and which real-world suites remain `not_encoded`. The manifest now
 includes targeted `live_real_world` records for ELF and qmd through
-`cargo make real-world-memory-live-adapters`; other external projects remain
-live-baseline-only, incomplete, blocked, or not encoded until their own
-`real_world_job` adapters run.
+`cargo make real-world-memory-live-adapters`; it also includes `research_gate` records
+for RAGFlow, LightRAG, GraphRAG, Graphiti/Zep, Letta, LangGraph, nanograph,
+llm-wiki, gbrain, graphify, and deeper qmd/OpenViking profiles. Research gates carry
+source/setup/runtime/resource/retry metadata for future adapter work, but they are not
+fixture-backed, live-baseline-only, or live-real-world evidence. Other external
+projects remain live-baseline-only, incomplete, blocked, or not encoded until their
+own `real_world_job` adapters run.
 
 Benchmark suite labels:
 
@@ -102,8 +106,9 @@ Project-to-suite map:
 | Graphiti / Zep | `rw.graph-temporal`, `rw.resume-evidence` | Temporal entities, relations, fact triples, validity windows, and graph search directly target stale/contradictory factual memory. | Add fact triples with validity changes, query current and historical answers, and score invalidation/append behavior under contradiction traps. | Docs-grounded D1; no benchmark adapter evidence. Confidence: medium-high for temporal-graph dimension. | ELF graph-lite covers evidence-linked validity windows and current/historical relation context; Graphiti/Zep remains the reference for broader temporal graph workflows. |
 | nanograph | `rw.graph-temporal`, `rw.retrieval-debug` | Typed schema and typed query ergonomics are relevant to making ELF graph-lite interactions inspectable and hard to misuse. | Define typed graph schemas and queries for the same fact set, then score developer-visible validation, query shape, and explainability rather than retrieval quality alone. | Docs-grounded D1; no benchmark adapter evidence. Confidence: medium for DX reference, low for memory-system comparison. | ELF should borrow typed graph ergonomics without treating nanograph as a full memory backend. |
 
-Pending watch items remain D0. Keep them out of benchmark strength claims until current
-evidence is gathered:
+Pending watch items remain D0 even when they have checked-in `research_gate` adapter
+records. Keep them out of benchmark strength claims until current D1/D2 evidence is
+gathered and a Docker-isolated adapter actually runs:
 
 | Watch item | Candidate suite if promoted | Minimum evidence needed before adapter or quality claims |
 | ---------- | --------------------------- | ------------------------------------------------------- |
@@ -282,7 +287,7 @@ Capability notes:
 - [gbrain](https://github.com/garrytan/gbrain): Strong operational knowledge-brain shape with primary-home routing, `compiled_truth` + timeline pages, and explicit maintenance/enrichment workflows. Trade-off: page-first ontology and personal-brain workflow assumptions would over-couple ELF core to one UI/content model if copied directly.
 - [Always-On Memory Agent](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/gemini/agents/always-on-memory-agent): Strong always-on ingest/consolidate/query loop with multimodal inbox, timer-driven consolidation, simple SQLite persistence, and a lightweight dashboard/API. Trade-off: memory formation is LLM-first, so it does not preserve ELF-style deterministic write boundaries or evidence-bound fact contracts.
 - [graphify](https://github.com/safishamsi/graphify): Strong multimodal graph compression with deterministic AST extraction for code, explicit `EXTRACTED`/`INFERRED`/`AMBIGUOUS` relation tagging, and always-on assistant hooks. Trade-off: it is closer to a graph-guided corpus understanding skill than a multi-tenant memory service, so its graph artifact should be treated as a derived operator surface rather than a source-of-truth memory backend.
-- [nanograph](https://github.com/aaltshuler/nanograph): Strong typed schema + typed query developer ergonomics. Trade-off: focuses on graph-first DX patterns rather than ELF's evidence-bound notes + multi-tenant service contract.
+- [nanograph](https://github.com/nanograph/nanograph): Strong typed schema + typed query developer ergonomics. Trade-off: focuses on graph-first DX patterns rather than ELF's evidence-bound notes + multi-tenant service contract.
 
 ## nanograph Snapshot (New)
 
@@ -293,9 +298,9 @@ Snapshot date for this subsection: March 4, 2026.
 
 Primary references:
 
-- [nanograph](https://github.com/aaltshuler/nanograph)
-- [Schema docs](https://github.com/aaltshuler/nanograph/blob/main/docs/user/schema.md)
-- [Query docs](https://github.com/aaltshuler/nanograph/blob/main/docs/user/queries.md)
+- [nanograph](https://github.com/nanograph/nanograph)
+- [Schema docs](https://github.com/nanograph/nanograph/blob/main/docs/user/schema.md)
+- [Query docs](https://github.com/nanograph/nanograph/blob/main/docs/user/queries.md)
 
 ## LLM Wiki And Operational Brain Snapshot (New)
 
diff --git a/docs/guide/research/external_memory_improvement_plan.md b/docs/guide/research/external_memory_improvement_plan.md
index 508bfab2..2e2e53a8 100644
--- a/docs/guide/research/external_memory_improvement_plan.md
+++ b/docs/guide/research/external_memory_improvement_plan.md
@@ -229,6 +229,8 @@ Implementation shape:
 
 - Replace mock/in-memory external adapters with durable local modes where feasible.
 - For every external adapter, mark which behaviors are real, mocked, unsupported, or blocked.
+- For expanded RAG and graph-memory systems, use `research_gate` records until D1/D2
+  research, resource sizing, and Docker runtime boundaries are proven.
 - Add lifecycle checks: update, delete/expire, cold-start reload, and same-corpus retrieval.
 - Keep failures typed with the terms in this document.
 - Use `apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json`
diff --git a/docs/guide/research/research_projects_inventory.md b/docs/guide/research/research_projects_inventory.md
index c84ddab6..23c6f565 100644
--- a/docs/guide/research/research_projects_inventory.md
+++ b/docs/guide/research/research_projects_inventory.md
@@ -6,7 +6,7 @@ Inputs: Existing research notes, open architecture questions, and tracked adopti
 Depends on: `docs/guide/research/comparison_external_projects.md`.
 Outputs: A current inventory of reviewed and pending external projects.
 
-Last updated: June 9, 2026.
+Last updated: June 10, 2026.
 
 ## Legend
 
@@ -34,10 +34,10 @@ Last updated: June 9, 2026.
 | [Letta](https://github.com/letta-ai/letta) | D1 | Reviewed | `rw.core-archival`, `rw.operator-continuity` | Core vs archival memory split, shared blocks | `docs/guide/research/comparison_external_projects.md`; `docs/research/2026-06-09-xy-841-external-memory-benchmark-dimensions.json` |
 | [LangGraph](https://docs.langchain.com/oss/python/langgraph/persistence) | D1 | Reviewed | `rw.replay-regression`, `rw.resume-evidence` | Checkpoint/replay mindset for quality regression workflows | `docs/guide/research/comparison_external_projects.md`; `docs/research/2026-06-09-xy-841-external-memory-benchmark-dimensions.json` |
 | [Graphiti / Zep](https://help.getzep.com/graphiti/core-concepts/temporal-awareness) | D1 | Reviewed | `rw.graph-temporal`, `rw.resume-evidence` | Temporal fact validity model for graph-like memory evolution | `docs/guide/research/comparison_external_projects.md`; `docs/research/2026-06-09-xy-841-external-memory-benchmark-dimensions.json` |
-| [nanograph](https://github.com/aaltshuler/nanograph) | D1 | Reviewed | `rw.graph-temporal`, `rw.retrieval-debug` | Typed schema + typed query ergonomics for graph-lite developer experience | `docs/guide/research/comparison_external_projects.md`; `docs/research/2026-06-09-xy-841-external-memory-benchmark-dimensions.json` |
-| [RAGFlow](https://github.com/infiniflow/ragflow) | D0 | Watch item; pending deep dive | Candidate `rw.resume-evidence`, `rw.graph-navigation`, `rw.retrieval-debug`; no strength claim | Potential framework integration discussion; not yet audited to adoption level | Discussion history only; see watch-item evidence requirements in `docs/guide/research/comparison_external_projects.md` |
-| [LightRAG](https://github.com/HKUDS/LightRAG) | D0 | Watch item; pending deep dive | Candidate `rw.graph-navigation`, `rw.graph-temporal`, `rw.retrieval-debug`; no strength claim | Graph-augmented RAG strategy relevance; not yet audited to adoption level | Discussion history only; see watch-item evidence requirements in `docs/guide/research/comparison_external_projects.md` |
-| [GraphRAG](https://www.microsoft.com/en-us/research/project/graphrag/) | D0 | Watch item; pending deep dive | Candidate `rw.graph-navigation`, `rw.knowledge-synthesis`, `rw.retrieval-debug`; no strength claim | Graph-based retrieval concepts; not yet audited to implementation decision level | Discussion history only; see watch-item evidence requirements in `docs/guide/research/comparison_external_projects.md` |
+| [nanograph](https://github.com/nanograph/nanograph) | D1 | Reviewed; research gate added | `rw.graph-temporal`, `rw.retrieval-debug` | Typed schema + typed query ergonomics for graph-lite developer experience | `docs/guide/research/comparison_external_projects.md`; `docs/research/2026-06-09-xy-841-external-memory-benchmark-dimensions.json`; `apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json` |
+| [RAGFlow](https://github.com/infiniflow/ragflow) | D0 | Research gate added; D1/D2 still required before adapter | Candidate `rw.resume-evidence`, `rw.graph-navigation`, `rw.retrieval-debug`; no strength claim | Potential framework integration discussion; not yet audited to adoption level | `apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json`; see watch-item evidence requirements in `docs/guide/research/comparison_external_projects.md` |
+| [LightRAG](https://github.com/HKUDS/LightRAG) | D0 | Research gate added; D1/D2 still required before adapter | Candidate `rw.graph-navigation`, `rw.graph-temporal`, `rw.retrieval-debug`; no strength claim | Graph-augmented RAG strategy relevance; not yet audited to adoption level | `apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json`; see watch-item evidence requirements in `docs/guide/research/comparison_external_projects.md` |
+| [GraphRAG](https://github.com/microsoft/graphrag) | D0 | Research gate added; D1/D2 still required before adapter | Candidate `rw.graph-navigation`, `rw.knowledge-synthesis`, `rw.retrieval-debug`; no strength claim | Graph-based retrieval concepts; not yet audited to implementation decision level | `apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json`; see watch-item evidence requirements in `docs/guide/research/comparison_external_projects.md` |
 
 ## June 2026 Activity Snapshot
 
diff --git a/docs/spec/real_world_agent_memory_benchmark_v1.md b/docs/spec/real_world_agent_memory_benchmark_v1.md
index b48a0f97..bb0a4b82 100644
--- a/docs/spec/real_world_agent_memory_benchmark_v1.md
+++ b/docs/spec/real_world_agent_memory_benchmark_v1.md
@@ -162,9 +162,9 @@ Each `adapters[]` record MUST include:
 - `adapter_id`: stable id unique within the manifest.
 - `project`: display name such as `qmd`, `agentmemory`, or `mem0/OpenMemory`.
 - `adapter_kind`: local execution shape, for example `docker_cli_same_corpus`,
-  `docker_sdk_same_corpus`, or `offline_fixture_response`.
-- `evidence_class`: one of `fixture_backed`, `live_baseline_only`, or
-  `live_real_world`.
+  `docker_sdk_same_corpus`, `offline_fixture_response`, or `research_gate`.
+- `evidence_class`: one of `fixture_backed`, `live_baseline_only`,
+  `live_real_world`, or `research_gate`.
 - `docker_default`: boolean.
 - `host_global_installs_required`: boolean.
 - `overall_status`: one adapter status from the table below.
@@ -177,6 +177,30 @@ Each `adapters[]` record MUST include:
 - `evidence`: array of evidence pointers with `kind`, `ref`, and `status`.
 - `notes`: optional bounded explanatory strings.
 - `follow_up`: optional `title` and `reason`.
+- `execution_metadata`: optional object used by expanded adapter packs and research
+  gates. When present, it MUST include `sources`, `setup_path`,
+  `runtime_boundary`, `resource_expectation`, and `retry_guidance`. It MAY include
+  `research_depth`.
+
+`research_gate` evidence class means the adapter record is a checked-in gating record
+for future implementation, not a benchmark execution result. It is used when a project
+needs D1/D2 research, resource sizing, credentials, Docker runtime proof, or source
+mapping before a fair adapter can run. A `research_gate` record MUST NOT be counted as
+fixture-backed, live-baseline-only, or live-real-world evidence.
+
+`execution_metadata.sources[]` entries MUST include:
+
+- `label`: short source label.
+- `url`: official source, docs, or repository URL.
+- `evidence`: bounded description of why the source matters.
+
+`execution_metadata` fields:
+
+- `setup_path`: intended setup path or the setup blocker to resolve.
+- `runtime_boundary`: Docker/service/CLI/process boundary expected for safe runs.
+- `resource_expectation`: expected resource or credential envelope, including unknowns.
+- `retry_guidance`: one or more concrete next checks before claiming pass/fail.
+- `research_depth`: optional `D0`, `D1`, or `D2` research state.
 
 Adapter coverage status terms:
 
@@ -198,7 +222,8 @@ metadata, per-adapter records, and summary counters for:
 
 - adapter count, external project count, Docker-default count, host-global-install
   count;
-- `fixture_backed`, `live_baseline_only`, and `live_real_world` evidence classes;
+- `fixture_backed`, `live_baseline_only`, `live_real_world`, and `research_gate`
+  evidence classes;
 - overall adapter statuses;
 - capability coverage statuses;
 - real-world suite coverage statuses.
@@ -542,9 +567,9 @@ Reports MUST include:
   preserving the `real`, `fixture_backed`, `mocked`, `blocked`, and `not_encoded`
   distinction.
 - external adapter coverage when an external adapter manifest is loaded, preserving
-  `fixture_backed`, `live_baseline_only`, `live_real_world`, `real`, `mocked`,
-  `unsupported`, `blocked`, `incomplete`, `wrong_result`, `lifecycle_fail`, `pass`,
-  and `not_encoded` distinctions.
+  `fixture_backed`, `live_baseline_only`, `live_real_world`, `research_gate`,
+  `real`, `mocked`, `unsupported`, `blocked`, `incomplete`, `wrong_result`,
+  `lifecycle_fail`, `pass`, and `not_encoded` distinctions.
 
 Reports that encode `memory_evolution` jobs SHOULD also include stale-answer counts,
 conflict detection counts, update rationale availability, and temporal-validity