diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
index 28ac002b..b448c8c7 100644
--- a/.github/workflows/e2e.yml
+++ b/.github/workflows/e2e.yml
@@ -109,7 +109,7 @@ jobs:
       - name: Run context misranking harness
         run: |
           mkdir -p tmp
-          cargo make e2e
+          cargo make test-e2e
 
       - name: Upload harness outputs
         if: always()
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 31adcc87..0e409287 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -91,4 +91,4 @@ jobs:
           exit 1
 
       - name: Run integration tests
-        run: cargo make test-all
+        run: cargo make test-rust-all
diff --git a/.github/workflows/language.yml b/.github/workflows/language.yml
index 7fd3cdcb..6385bd46 100644
--- a/.github/workflows/language.yml
+++ b/.github/workflows/language.yml
@@ -30,8 +30,8 @@ concurrency:
   cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
 
 jobs:
-  rust:
-    name: Rust checks
+  repo:
+    name: Repository checks
     runs-on: ubuntu-latest
     steps:
       - name: Fetch latest code
@@ -72,37 +72,10 @@ jobs:
         with:
           tool: nextest
 
-      - name: Run lint
-        run: cargo make lint
-
-      - name: Run Rust format checks
-        run: cargo make fmt-rust-check
-
-      - name: Run tests
-        run: cargo make test-rust
-
-  toml:
-    name: TOML checks
-    runs-on: ubuntu-latest
-    steps:
-      - name: Fetch latest code
-        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10
-
-      - name: Set up Rust toolchain
-        uses: actions-rust-lang/setup-rust-toolchain@46268bd060767258de96ed93c1251119784f2ab6
-        with:
-          cache: true
-          rustflags: ''
-
-      - name: Install cargo-make
-        uses: taiki-e/install-action@15449e3094499af05d8d964a1c884208e4b8b595
-        with:
-          tool: cargo-make
-
       - name: Install taplo
         uses: taiki-e/install-action@15449e3094499af05d8d964a1c884208e4b8b595
         with:
           tool: taplo
 
-      - name: Run TOML format checks
-        run: cargo make fmt-toml-check
+      - name: Run repository checks
+        run: cargo make check
diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml
index 745a0c1e..210114fb 100644
--- a/.github/workflows/quality.yml
+++ b/.github/workflows/quality.yml
@@ -59,6 +59,11 @@ jobs:
           cache: true
           rustflags: ''
 
+      - name: Install cargo-make
+        uses: taiki-e/install-action@15449e3094499af05d8d964a1c884208e4b8b595
+        with:
+          tool: cargo-make
+
       - name: Install Postgres client
         run: |
           sudo apt-get update
@@ -73,39 +78,8 @@ jobs:
           echo "Postgres did not become ready in time."
           exit 1
 
-      - name: Create schema
-        run: |
-          python3 - <<'PY' > tmp.schema.sql
-          from pathlib import Path
-          
-          vector_dim = 4
-          root = Path(".")
-          sql_dir = root / "sql"
-          
-          out = []
-          for raw_line in (sql_dir / "init.sql").read_text(encoding="utf-8").splitlines():
-            line = raw_line.strip()
-            if line.startswith(r"\ir "):
-              rel = line[len(r"\ir ") :].strip()
-              out.append((sql_dir / rel).read_text(encoding="utf-8"))
-            else:
-              out.append(raw_line)
-          
-          expanded = "\n".join(out) + "\n"
-          print(expanded.replace("<VECTOR_DIM>", str(vector_dim)), end="")
-          PY
-          
-          psql "${PG_DSN}" -v ON_ERROR_STOP=1 -f tmp.schema.sql
-
-      - name: Load trace gate fixture
-        run: psql "${PG_DSN}" -v ON_ERROR_STOP=1 -f .github/fixtures/trace_gate/fixture.sql
-
       - name: Run trace regression gate
-        run: |
-          cargo run -p elf-eval --bin trace_regression_gate -- \
-            --config .github/fixtures/trace_gate/config.toml \
-            --gate .github/fixtures/trace_gate/gate.json \
-            --out trace_gate.report.json
+        run: TRACE_GATE_REPORT_PATH=trace_gate.report.json cargo make check-trace-gate
 
       - name: Upload trace gate report
         if: always()
diff --git a/Makefile.toml b/Makefile.toml
index 7513eb0d..02654763 100644
--- a/Makefile.toml
+++ b/Makefile.toml
@@ -1,272 +1,144 @@
 # Rust workspace tasks.
 
-# Lint
-# | task          | type      | cwd |
-# | ------------- | --------- | --- |
-# | lint          | composite |     |
-# | lint-fix      | composite |     |
-# | lint-rust     | command   |     |
-# | lint-fix-rust | extend    |     |
-# | lint-vstyle   | command   |     |
-# | lint-fix-vstyle | command |     |
-
-[tasks.lint]
-workspace = false
-dependencies = [
-	"lint-rust",
-	"lint-vstyle",
-]
-
-[tasks.lint-fix]
-workspace = false
-dependencies = [
-	"lint-fix-rust",
-	"lint-fix-vstyle",
-]
+# Benchmark
+# | task                                       | type      | cwd |
+# | ------------------------------------------ | --------- | --- |
+# | baseline-backfill-100k-docker              | command   |     |
+# | baseline-backfill-10k-docker               | command   |     |
+# | baseline-backfill-docker                   | command   |     |
+# | baseline-live-docker                       | command   |     |
+# | baseline-live-report                       | command   |     |
+# | baseline-production-private                | command   |     |
+# | baseline-production-private-addendum       | command   |     |
+# | baseline-production-synthetic              | command   |     |
+# | baseline-soak-docker                       | command   |     |
+# | openmemory-ui-export-readback              | command   |     |
+# | parity-docker                              | command   |     |
+# | real-world-first-generation-oss            | composite |     |
+# | real-world-first-generation-oss-json       | command   |     |
+# | real-world-first-generation-oss-report     | command   |     |
+# | real-world-job-operator-ux                 | composite |     |
+# | real-world-job-operator-ux-json            | command   |     |
+# | real-world-job-operator-ux-live-adapters   | command   |     |
+# | real-world-job-operator-ux-report          | command   |     |
+# | real-world-memory                          | composite |     |
+# | real-world-memory-consolidation            | composite |     |
+# | real-world-memory-consolidation-json       | command   |     |
+# | real-world-memory-consolidation-report     | command   |     |
+# | real-world-memory-core-archival            | composite |     |
+# | real-world-memory-core-archival-json       | command   |     |
+# | real-world-memory-core-archival-report     | command   |     |
+# | real-world-memory-evolution                | composite |     |
+# | real-world-memory-evolution-json           | command   |     |
+# | real-world-memory-evolution-report         | command   |     |
+# | real-world-memory-graph-rag                | composite |     |
+# | real-world-memory-graph-rag-json           | command   |     |
+# | real-world-memory-graph-rag-report         | command   |     |
+# | real-world-memory-json                     | command   |     |
+# | real-world-memory-knowledge                | composite |     |
+# | real-world-memory-knowledge-json           | command   |     |
+# | real-world-memory-knowledge-report         | command   |     |
+# | real-world-memory-live-adapters            | command   |     |
+# | real-world-memory-live-consolidation       | command   |     |
+# | real-world-memory-proactive-brief          | composite |     |
+# | real-world-memory-proactive-brief-json     | command   |     |
+# | real-world-memory-proactive-brief-report   | command   |     |
+# | real-world-memory-production-ops           | composite |     |
+# | real-world-memory-production-ops-json      | command   |     |
+# | real-world-memory-production-ops-report    | command   |     |
+# | real-world-memory-project-decisions        | composite |     |
+# | real-world-memory-project-decisions-json   | command   |     |
+# | real-world-memory-project-decisions-report | command   |     |
+# | real-world-memory-report                   | command   |     |
+# | real-world-memory-retrieval                | composite |     |
+# | real-world-memory-retrieval-json           | command   |     |
+# | real-world-memory-retrieval-report         | command   |     |
+# | real-world-memory-scheduled                | composite |     |
+# | real-world-memory-scheduled-json           | command   |     |
+# | real-world-memory-scheduled-report         | command   |     |
+# | real-world-memory-summary                  | composite |     |
+# | real-world-memory-summary-json             | command   |     |
+# | real-world-memory-summary-report           | command   |     |
 
-[tasks.lint-rust]
-workspace = false
-command = "cargo"
-args = [
-	"clippy",
-	"--all-features",
-	"--all-targets",
-	"--workspace",
-	"--",
-	"-D",
-	"clippy::all",
-	"-D",
-	"clippy::too_many_lines",
-	"-D",
-	"clippy::unwrap_used",
-	"-D",
-	"clippy::use_self",
-	"-D",
-	"clippy::wildcard_imports",
-	"-D",
-	"missing-docs",
-	"-D",
-	"unused-crate-dependencies",
-	"-D",
-	"warnings",
-]
-
-[tasks.lint-fix-rust]
-extend = "lint-rust"
-args = [
-	"clippy",
-	"--fix",
-	"--allow-dirty",
-	"--all-features",
-	"--all-targets",
-	"--workspace",
-	"--",
-	"-D",
-	"clippy::all",
-	"-D",
-	"clippy::too_many_lines",
-	"-D",
-	"clippy::unwrap_used",
-	"-D",
-	"clippy::use_self",
-	"-D",
-	"clippy::wildcard_imports",
-	"-D",
-	"missing-docs",
-	"-D",
-	"unused-crate-dependencies",
-	"-D",
-	"warnings",
-]
-
-[tasks.lint-vstyle]
+[tasks.baseline-backfill-100k-docker]
 workspace = false
-command = "cargo"
+command = "bash"
 args = [
-	"vstyle",
-	"curate",
-	"--language",
-	"rust",
-	"--workspace",
-	"--all-features"
+	"scripts/baseline-docker.sh",
+	"backfill-100k",
 ]
 
-[tasks.lint-fix-vstyle]
+[tasks.baseline-backfill-10k-docker]
 workspace = false
-command = "cargo"
+command = "bash"
 args = [
-	"vstyle",
-	"tune",
-	"--language",
-	"rust",
-	"--workspace",
-	"--all-features",
-	"--strict",
-]
-
-
-# Test
-# | task      | type      | cwd       |
-# | --------- | --------- | ---       |
-# | test      | composite |           |
-# | test-rust | command   |           |
-# | test-all  | composite |           |
-# | test-rust-all | command |         |
-# | test-integration      | composite |
-# | test-integration-rust | command   |
-
-[tasks.test]
-workspace = false
-dependencies = [
-	"test-rust",
+	"scripts/baseline-docker.sh",
+	"backfill-10k",
 ]
 
-[tasks.test-rust]
+[tasks.baseline-backfill-docker]
 workspace = false
-command = "cargo"
+command = "bash"
 args = [
-	"nextest",
-	"run",
-	"--workspace",
-	"--all-targets",
-	"--all-features",
+	"scripts/baseline-docker.sh",
+	"backfill",
 ]
 
-[tasks.test-all]
-workspace = false
-dependencies = [
-	"test-rust-all",
-]
-
-[tasks.test-rust-all]
+[tasks.baseline-live-docker]
 workspace = false
-command = "cargo"
+command = "bash"
 args = [
-	"nextest",
-	"run",
-	"--workspace",
-	"--all-targets",
-	"--all-features",
-	"--run-ignored",
-	"all",
+	"scripts/baseline-docker.sh",
+	"live",
 ]
 
-[tasks.test-integration]
-workspace = false
-dependencies = [
-	"test-integration-rust",
-]
-
-[tasks.test-integration-rust]
+[tasks.baseline-live-report]
 workspace = false
-command = "cargo"
+command = "bash"
 args = [
-	"nextest",
-	"run",
-	"--workspace",
-	"--all-targets",
-	"--all-features",
-	"--run-ignored",
-	"only",
-]
-
-
-# Format
-# | task           | type      | cwd |
-# | -------------- | --------- | --- |
-# | fmt            | composite |     |
-# | fmt-check      | composite |     |
-# | fmt-rust       | command   |     |
-# | fmt-rust-check | extend    |     |
-# | fmt-toml       | command   |     |
-# | fmt-toml-check | extend    |     |
-
-[tasks.fmt]
-workspace = false
-dependencies = [
-	"fmt-rust",
-	"fmt-toml",
-]
-
-[tasks.fmt-check]
-workspace = false
-dependencies = [
-	"fmt-rust-check",
-	"fmt-toml-check",
+	"scripts/live-baseline-report-to-md.sh",
 ]
 
-[tasks.fmt-rust]
+[tasks.baseline-production-private]
 workspace = false
-command = "rustup"
+command = "bash"
 args = [
-	"run",
-	"nightly",
-	"cargo",
-	"fmt",
-	"--all",
+	"scripts/baseline-docker.sh",
+	"production-private",
 ]
 
-[tasks.fmt-rust-check]
+[tasks.baseline-production-private-addendum]
 workspace = false
-command = "rustup"
+command = "bash"
 args = [
-	"run",
-	"nightly",
-	"cargo",
-	"fmt",
-	"--all",
-	"--",
-	"--check",
+	"scripts/baseline-docker.sh",
+	"production-private-addendum",
 ]
 
-[tasks.fmt-toml]
+[tasks.baseline-production-synthetic]
 workspace = false
-command = "taplo"
-args = [
-	"fmt",
-]
-
-[tasks.fmt-toml-check]
-extend = "fmt-toml"
+command = "bash"
 args = [
-	"fmt",
-	"--check",
-]
-
-# E2E
-# | task                           | type      | cwd |
-# | ------------------------------ | --------- | --- |
-# | e2e                            | composite |     |
-# | e2e-context-misranking-harness | command   |     |
-# | e2e-consolidation-harness      | command   |     |
-
-[tasks.e2e]
-workspace = false
-dependencies = [
-	"e2e-context-misranking-harness",
+	"scripts/baseline-docker.sh",
+	"production-synthetic",
 ]
 
-[tasks.e2e-context-misranking-harness]
+[tasks.baseline-soak-docker]
 workspace = false
 command = "bash"
 args = [
-	"scripts/context-misranking-harness.sh",
+	"scripts/baseline-docker.sh",
+	"soak",
 ]
 
-[tasks.e2e-consolidation-harness]
+[tasks.openmemory-ui-export-readback]
 workspace = false
 command = "bash"
 args = [
-	"scripts/consolidation-harness.sh",
+	"scripts/baseline-docker.sh",
+	"openmemory-ui-export-readback",
 ]
 
-
-# Competitive parity
-# | task                | type    | cwd |
-# | ------------------- | ------- | --- |
-# | parity-docker       | command |     |
-# | parity-docker-clean | command |     |
-
 [tasks.parity-docker]
 workspace = false
 command = "docker"
@@ -280,179 +152,125 @@ args = [
 	"parity-runner",
 ]
 
-[tasks.parity-docker-clean]
-workspace = false
-command = "docker"
-args = [
-	"compose",
-	"-f",
-	"docker-compose.parity.yml",
-	"down",
-	"-v",
-	"--remove-orphans",
-]
-
-
-# Live external baseline benchmark
-# | task                       | type    | cwd |
-# | -------------------------- | ------- | --- |
-# | baseline-live-docker       | command |     |
-# | baseline-backfill-docker   | command |     |
-# | baseline-live-report       | command |     |
-# | baseline-live-docker-clean | command |     |
-# | baseline-production-synthetic | command | |
-# | baseline-production-private | command |   |
-# | baseline-production-private-addendum | command | |
-# | baseline-backfill-10k-docker | command |  |
-# | baseline-backfill-100k-docker | command | |
-# | baseline-soak-docker       | command |     |
-# | openmemory-ui-export-readback | command |   |
-
-[tasks.baseline-live-docker]
+[tasks.real-world-first-generation-oss]
 workspace = false
-command = "bash"
-args = [
-	"-lc",
-	"set -euo pipefail; head=\"$(git rev-parse HEAD)\"; if [ -n \"$(git status --porcelain)\" ]; then head=\"$head+dirty\"; fi; export ELF_BASELINE_ELF_HEAD=\"$head\"; docker compose -f docker-compose.baseline.yml run --build --rm baseline-runner",
+dependencies = [
+	"real-world-first-generation-oss-report",
 ]
 
-[tasks.baseline-backfill-docker]
+[tasks.real-world-first-generation-oss-json]
 workspace = false
-command = "bash"
+command = "cargo"
 args = [
-	"-lc",
-	"set -euo pipefail; head=\"$(git rev-parse HEAD)\"; if [ -n \"$(git status --porcelain)\" ]; then head=\"$head+dirty\"; fi; selected_projects=\"$(printenv ELF_BASELINE_PROJECTS || true)\"; if [ -z \"$selected_projects\" ]; then selected_projects=\"ELF\"; fi; selected_profile=\"$(printenv ELF_BASELINE_PROFILE || true)\"; if [ -z \"$selected_profile\" ]; then selected_profile=\"backfill\"; fi; backfill_docs=\"$(printenv ELF_BASELINE_BACKFILL_DOCS || true)\"; if [ -z \"$backfill_docs\" ]; then backfill_docs=\"2000\"; fi; elf_timeout=\"$(printenv ELF_BASELINE_ELF_TIMEOUT_SECONDS || true)\"; if [ -z \"$elf_timeout\" ]; then elf_timeout=\"3600\"; fi; max_elf_seconds=\"$(printenv ELF_BASELINE_MAX_ELF_SECONDS || true)\"; if [ -z \"$max_elf_seconds\" ]; then max_elf_seconds=\"3600\"; fi; export ELF_BASELINE_ELF_HEAD=\"$head\"; export ELF_BASELINE_PROJECTS=\"$selected_projects\"; export ELF_BASELINE_PROFILE=\"$selected_profile\"; export ELF_BASELINE_BACKFILL_DOCS=\"$backfill_docs\"; export ELF_BASELINE_ELF_TIMEOUT_SECONDS=\"$elf_timeout\"; export ELF_BASELINE_MAX_ELF_SECONDS=\"$max_elf_seconds\"; docker compose -f docker-compose.baseline.yml run --build --rm baseline-runner",
+	"run",
+	"-p",
+	"elf-eval",
+	"--bin",
+	"real_world_job_benchmark",
+	"--",
+	"run",
+	"--fixtures",
+	"apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss",
+	"--out",
+	"tmp/real-world-memory/first-generation-oss/report.json",
+	"--run-id",
+	"first-generation-oss-continuity-source-store",
+	"--adapter-id",
+	"fixture_first_generation_oss",
+	"--adapter-name",
+	"First-generation OSS fixture coverage",
 ]
 
-[tasks.baseline-live-report]
+[tasks.real-world-first-generation-oss-report]
 workspace = false
-command = "bash"
-args = [
-	"scripts/live-baseline-report-to-md.sh",
+dependencies = [
+	"real-world-first-generation-oss-json",
 ]
-
-[tasks.baseline-live-docker-clean]
-workspace = false
-command = "docker"
+command = "cargo"
 args = [
-	"compose",
-	"-f",
-	"docker-compose.baseline.yml",
-	"down",
-	"-v",
-	"--remove-orphans",
+	"run",
+	"-p",
+	"elf-eval",
+	"--bin",
+	"real_world_job_benchmark",
+	"--",
+	"publish",
+	"--report",
+	"tmp/real-world-memory/first-generation-oss/report.json",
+	"--out",
+	"tmp/real-world-memory/first-generation-oss/report.md",
 ]
 
-[tasks.openmemory-ui-export-readback]
+[tasks.real-world-job-operator-ux]
 workspace = false
-command = "bash"
-args = [
-	"-lc",
-	"set -euo pipefail; head=\"$(git rev-parse HEAD)\"; if [ -n \"$(git status --porcelain)\" ]; then head=\"$head+dirty\"; fi; export ELF_BASELINE_ELF_HEAD=\"$head\"; export ELF_BASELINE_PROJECTS=mem0; docker compose -f docker-compose.baseline.yml run --build --rm baseline-runner",
+dependencies = [
+	"real-world-job-operator-ux-report",
 ]
 
-[tasks.baseline-production-synthetic]
+[tasks.real-world-job-operator-ux-json]
 workspace = false
-command = "bash"
+command = "cargo"
 args = [
-	"-lc",
-	"set -euo pipefail; head=\"$(git rev-parse HEAD)\"; if [ -n \"$(git status --porcelain)\" ]; then head=\"$head+dirty\"; fi; selected_projects=\"$(printenv ELF_BASELINE_PROJECTS || true)\"; if [ -z \"$selected_projects\" ]; then selected_projects=\"ELF\"; fi; export ELF_BASELINE_ELF_HEAD=\"$head\"; export ELF_BASELINE_PROJECTS=\"$selected_projects\"; export ELF_BASELINE_PROFILE=production-synthetic; docker compose -f docker-compose.baseline.yml run --build --rm baseline-runner",
+	"run",
+	"-p",
+	"elf-eval",
+	"--bin",
+	"real_world_job_benchmark",
+	"--",
+	"run",
+	"--fixtures",
+	"apps/elf-eval/fixtures/real_world_job/operator_debugging_ux",
+	"--out",
+	"tmp/real-world-job/real-world-job-operator-ux-report.json",
+	"--run-id",
+	"real-world-job-operator-ux",
+	"--adapter-id",
+	"fixture_operator_ux",
+	"--adapter-name",
+	"ELF operator UX fixture",
 ]
 
-[tasks.baseline-production-private]
+[tasks.real-world-job-operator-ux-live-adapters]
 workspace = false
 command = "bash"
 args = [
-	"-lc",
-	"set -euo pipefail; manifest=\"$(printenv ELF_BASELINE_PRODUCTION_CORPUS_MANIFEST || true)\"; if [ -z \"$manifest\" ]; then echo \"ELF_BASELINE_PRODUCTION_CORPUS_MANIFEST is required for baseline-production-private\" >&2; exit 1; fi; head=\"$(git rev-parse HEAD)\"; if [ -n \"$(git status --porcelain)\" ]; then head=\"$head+dirty\"; fi; selected_projects=\"$(printenv ELF_BASELINE_PROJECTS || true)\"; if [ -z \"$selected_projects\" ]; then selected_projects=\"ELF\"; fi; export ELF_BASELINE_ELF_HEAD=\"$head\"; export ELF_BASELINE_PROJECTS=\"$selected_projects\"; export ELF_BASELINE_PROFILE=production-private; docker compose -f docker-compose.baseline.yml run --build --rm baseline-runner",
+	"scripts/real-world-docker.sh",
+	"job-operator-ux-live-adapters",
 ]
 
-[tasks.baseline-production-private-addendum]
+[tasks.real-world-job-operator-ux-report]
 workspace = false
-command = "bash"
-args = [
-	"-lc",
-	"set -euo pipefail; manifest=\"$(printenv ELF_BASELINE_PRODUCTION_CORPUS_MANIFEST || true)\"; if [ -z \"$manifest\" ]; then echo \"ELF_BASELINE_PRODUCTION_CORPUS_MANIFEST is required for baseline-production-private-addendum\" >&2; exit 1; fi; head=\"$(git rev-parse HEAD)\"; if [ -n \"$(git status --porcelain)\" ]; then head=\"$head+dirty\"; fi; selected_projects=\"$(printenv ELF_BASELINE_PROJECTS || true)\"; if [ -z \"$selected_projects\" ]; then selected_projects=\"ELF\"; fi; addendum=\"$(printenv ELF_BASELINE_PRIVATE_ADDENDUM || true)\"; if [ -z \"$addendum\" ]; then addendum=\"tmp/live-baseline/private-production-addendum.md\"; fi; export ELF_BASELINE_ELF_HEAD=\"$head\"; export ELF_BASELINE_PROJECTS=\"$selected_projects\"; export ELF_BASELINE_PROFILE=production-private; docker compose -f docker-compose.baseline.yml run --build --rm baseline-runner; ELF_BASELINE_MARKDOWN_REPORT=\"$addendum\" cargo make baseline-live-report; echo \"Private production addendum: $addendum\"",
+dependencies = [
+	"real-world-job-operator-ux-json",
 ]
-
-[tasks.baseline-backfill-10k-docker]
-workspace = false
-command = "bash"
+command = "cargo"
 args = [
-	"-lc",
-	"set -euo pipefail; head=\"$(git rev-parse HEAD)\"; if [ -n \"$(git status --porcelain)\" ]; then head=\"$head+dirty\"; fi; backfill_docs=\"$(printenv ELF_BASELINE_BACKFILL_DOCS || true)\"; if [ -z \"$backfill_docs\" ]; then backfill_docs=\"10000\"; fi; elf_timeout=\"$(printenv ELF_BASELINE_ELF_TIMEOUT_SECONDS || true)\"; if [ -z \"$elf_timeout\" ]; then elf_timeout=\"14400\"; fi; max_elf_seconds=\"$(printenv ELF_BASELINE_MAX_ELF_SECONDS || true)\"; if [ -z \"$max_elf_seconds\" ]; then max_elf_seconds=\"$elf_timeout\"; fi; export ELF_BASELINE_ELF_HEAD=\"$head\"; export ELF_BASELINE_PROJECTS=ELF; export ELF_BASELINE_PROFILE=backfill; export ELF_BASELINE_BACKFILL_DOCS=\"$backfill_docs\"; export ELF_BASELINE_ELF_TIMEOUT_SECONDS=\"$elf_timeout\"; export ELF_BASELINE_MAX_ELF_SECONDS=\"$max_elf_seconds\"; docker compose -f docker-compose.baseline.yml run --build --rm baseline-runner",
+	"run",
+	"-p",
+	"elf-eval",
+	"--bin",
+	"real_world_job_benchmark",
+	"--",
+	"publish",
+	"--report",
+	"tmp/real-world-job/real-world-job-operator-ux-report.json",
+	"--out",
+	"tmp/real-world-job/real-world-job-operator-ux-report.md",
 ]
 
-[tasks.baseline-backfill-100k-docker]
+[tasks.real-world-memory]
 workspace = false
-command = "bash"
-args = [
-	"-lc",
-	"set -euo pipefail; enabled=\"$(printenv ELF_BASELINE_ENABLE_EXPENSIVE || true)\"; if [ \"$enabled\" != \"1\" ]; then echo \"ELF_BASELINE_ENABLE_EXPENSIVE=1 is required for baseline-backfill-100k-docker\" >&2; exit 1; fi; head=\"$(git rev-parse HEAD)\"; if [ -n \"$(git status --porcelain)\" ]; then head=\"$head+dirty\"; fi; backfill_docs=\"$(printenv ELF_BASELINE_BACKFILL_DOCS || true)\"; if [ -z \"$backfill_docs\" ]; then backfill_docs=\"100000\"; fi; elf_timeout=\"$(printenv ELF_BASELINE_ELF_TIMEOUT_SECONDS || true)\"; if [ -z \"$elf_timeout\" ]; then elf_timeout=\"86400\"; fi; max_elf_seconds=\"$(printenv ELF_BASELINE_MAX_ELF_SECONDS || true)\"; if [ -z \"$max_elf_seconds\" ]; then max_elf_seconds=\"$elf_timeout\"; fi; export ELF_BASELINE_ELF_HEAD=\"$head\"; export ELF_BASELINE_PROJECTS=ELF; export ELF_BASELINE_PROFILE=backfill; export ELF_BASELINE_BACKFILL_DOCS=\"$backfill_docs\"; export ELF_BASELINE_ELF_TIMEOUT_SECONDS=\"$elf_timeout\"; export ELF_BASELINE_MAX_ELF_SECONDS=\"$max_elf_seconds\"; docker compose -f docker-compose.baseline.yml run --build --rm baseline-runner",
+dependencies = [
+	"real-world-memory-report",
 ]
 
-[tasks.baseline-soak-docker]
-workspace = false
-command = "bash"
-args = [
-	"-lc",
-	"set -euo pipefail; head=\"$(git rev-parse HEAD)\"; if [ -n \"$(git status --porcelain)\" ]; then head=\"$head+dirty\"; fi; soak_seconds=\"$(printenv ELF_BASELINE_SOAK_SECONDS || true)\"; if [ -z \"$soak_seconds\" ]; then soak_seconds=\"3600\"; fi; elf_timeout=\"$(printenv ELF_BASELINE_ELF_TIMEOUT_SECONDS || true)\"; if [ -z \"$elf_timeout\" ]; then elf_timeout=\"$((soak_seconds + 1800))\"; fi; max_elf_seconds=\"$(printenv ELF_BASELINE_MAX_ELF_SECONDS || true)\"; if [ -z \"$max_elf_seconds\" ]; then max_elf_seconds=\"$elf_timeout\"; fi; export ELF_BASELINE_ELF_HEAD=\"$head\"; export ELF_BASELINE_PROJECTS=ELF; export ELF_BASELINE_PROFILE=stress; export ELF_BASELINE_SOAK_SECONDS=\"$soak_seconds\"; export ELF_BASELINE_ELF_TIMEOUT_SECONDS=\"$elf_timeout\"; export ELF_BASELINE_MAX_ELF_SECONDS=\"$max_elf_seconds\"; docker compose -f docker-compose.baseline.yml run --build --rm baseline-runner",
-]
-
-
-# Real-world job benchmark smoke
-# | task                                   | type      | cwd |
-# | -------------------------------------- | --------- | --- |
-# | real-world-job-smoke                   | composite |     |
-# | real-world-job-smoke-json              | command   |     |
-# | real-world-job-smoke-report            | command   |     |
-# | real-world-memory                      | composite |     |
-# | real-world-memory-json                 | command   |     |
-# | real-world-memory-report               | command   |     |
-# | real-world-memory-project-decisions        | composite | |
-# | real-world-memory-project-decisions-json   | command   | |
-# | real-world-memory-project-decisions-report | command   | |
-# | real-world-memory-evolution            | composite |     |
-# | real-world-memory-evolution-json       | command   |     |
-# | real-world-memory-evolution-report     | command   |     |
-# | real-world-memory-consolidation        | composite |     |
-# | real-world-memory-consolidation-json   | command   |     |
-# | real-world-memory-consolidation-report | command   |     |
-# | real-world-memory-summary              | composite |     |
-# | real-world-memory-summary-json         | command   |     |
-# | real-world-memory-summary-report       | command   |     |
-# | real-world-memory-proactive-brief      | composite |     |
-# | real-world-memory-proactive-brief-json | command   |     |
-# | real-world-memory-proactive-brief-report | command |     |
-# | real-world-memory-scheduled            | composite |     |
-# | real-world-memory-scheduled-json       | command   |     |
-# | real-world-memory-scheduled-report     | command   |     |
-# | real-world-memory-live-consolidation   | command   |     |
-# | real-world-job-operator-ux             | composite |     |
-# | real-world-job-operator-ux-json        | command   |     |
-# | real-world-job-operator-ux-report      | command   |     |
-# | real-world-job-operator-ux-live-adapters | command |     |
-# | real-world-memory-retrieval            | composite |     |
-# | real-world-memory-retrieval-json       | command   |     |
-# | real-world-memory-retrieval-report     | command   |     |
-# | real-world-memory-production-ops        | composite |     |
-# | real-world-memory-production-ops-json   | command   |     |
-# | real-world-memory-production-ops-report | command   |     |
-# | real-world-memory-core-archival         | composite |     |
-# | real-world-memory-core-archival-json    | command   |     |
-# | real-world-memory-core-archival-report  | command   |     |
-# | real-world-memory-graph-rag             | composite |     |
-# | real-world-memory-graph-rag-json        | command   |     |
-# | real-world-memory-graph-rag-report      | command   |     |
-# | real-world-memory-live-adapters         | command   |     |
-
-[tasks.real-world-job-smoke]
+[tasks.real-world-memory-consolidation]
 workspace = false
 dependencies = [
-	"real-world-job-smoke-report",
+	"real-world-memory-consolidation-report",
 ]
 
-[tasks.real-world-job-smoke-json]
+[tasks.real-world-memory-consolidation-json]
 workspace = false
 command = "cargo"
 args = [
@@ -464,15 +282,21 @@ args = [
 	"--",
 	"run",
 	"--fixtures",
-	"apps/elf-eval/fixtures/real_world_memory/work_resume",
+	"apps/elf-eval/fixtures/real_world_memory/consolidation",
 	"--out",
-	"tmp/real-world-job/real-world-job-smoke-report.json",
+	"tmp/real-world-memory/consolidation/report.json",
+	"--run-id",
+	"real-world-memory-consolidation",
+	"--adapter-id",
+	"fixture_consolidation",
+	"--adapter-name",
+	"ELF consolidation fixture",
 ]
 
-[tasks.real-world-job-smoke-report]
+[tasks.real-world-memory-consolidation-report]
 workspace = false
 dependencies = [
-	"real-world-job-smoke-json",
+	"real-world-memory-consolidation-json",
 ]
 command = "cargo"
 args = [
@@ -484,18 +308,18 @@ args = [
 	"--",
 	"publish",
 	"--report",
-	"tmp/real-world-job/real-world-job-smoke-report.json",
+	"tmp/real-world-memory/consolidation/report.json",
 	"--out",
-	"tmp/real-world-job/real-world-job-smoke-report.md",
+	"tmp/real-world-memory/consolidation/report.md",
 ]
 
-[tasks.real-world-memory]
+[tasks.real-world-memory-core-archival]
 workspace = false
 dependencies = [
-	"real-world-memory-report",
+	"real-world-memory-core-archival-report",
 ]
 
-[tasks.real-world-memory-json]
+[tasks.real-world-memory-core-archival-json]
 workspace = false
 command = "cargo"
 args = [
@@ -507,21 +331,21 @@ args = [
 	"--",
 	"run",
 	"--fixtures",
-	"apps/elf-eval/fixtures/real_world_memory",
+	"apps/elf-eval/fixtures/real_world_memory/core_archival_memory",
 	"--out",
-	"tmp/real-world-memory/real-world-memory-report.json",
+	"tmp/real-world-memory/core-archival/report.json",
 	"--run-id",
-	"real-world-memory",
+	"real-world-memory-core-archival",
 	"--adapter-id",
-	"elf_real_world_memory_fixture",
+	"fixture_core_archival_memory",
 	"--adapter-name",
-	"ELF real-world memory fixture",
+	"ELF core and archival memory fixture",
 ]
 
-[tasks.real-world-memory-report]
+[tasks.real-world-memory-core-archival-report]
 workspace = false
 dependencies = [
-	"real-world-memory-json",
+	"real-world-memory-core-archival-json",
 ]
 command = "cargo"
 args = [
@@ -533,18 +357,18 @@ args = [
 	"--",
 	"publish",
 	"--report",
-	"tmp/real-world-memory/real-world-memory-report.json",
+	"tmp/real-world-memory/core-archival/report.json",
 	"--out",
-	"tmp/real-world-memory/real-world-memory-report.md",
+	"tmp/real-world-memory/core-archival/report.md",
 ]
 
-[tasks.real-world-memory-project-decisions]
+[tasks.real-world-memory-evolution]
 workspace = false
 dependencies = [
-	"real-world-memory-project-decisions-report",
+	"real-world-memory-evolution-report",
 ]
 
-[tasks.real-world-memory-project-decisions-json]
+[tasks.real-world-memory-evolution-json]
 workspace = false
 command = "cargo"
 args = [
@@ -556,21 +380,21 @@ args = [
 	"--",
 	"run",
 	"--fixtures",
-	"apps/elf-eval/fixtures/real_world_memory/project_decisions",
+	"apps/elf-eval/fixtures/real_world_memory/evolution",
 	"--out",
-	"tmp/real-world-memory/project-decisions/report.json",
+	"tmp/real-world-memory/evolution-report.json",
 	"--run-id",
-	"real-world-memory-project-decisions",
+	"real-world-memory-evolution",
 	"--adapter-id",
-	"fixture_project_decisions",
+	"fixture_memory_evolution",
 	"--adapter-name",
-	"ELF project decision fixture",
+	"ELF fixture memory evolution",
 ]
 
-[tasks.real-world-memory-project-decisions-report]
+[tasks.real-world-memory-evolution-report]
 workspace = false
 dependencies = [
-	"real-world-memory-project-decisions-json",
+	"real-world-memory-evolution-json",
 ]
 command = "cargo"
 args = [
@@ -582,18 +406,18 @@ args = [
 	"--",
 	"publish",
 	"--report",
-	"tmp/real-world-memory/project-decisions/report.json",
+	"tmp/real-world-memory/evolution-report.json",
 	"--out",
-	"tmp/real-world-memory/project-decisions/report.md",
+	"tmp/real-world-memory/evolution-report.md",
 ]
 
-[tasks.real-world-memory-evolution]
+[tasks.real-world-memory-graph-rag]
 workspace = false
 dependencies = [
-	"real-world-memory-evolution-report",
+	"real-world-memory-graph-rag-report",
 ]
 
-[tasks.real-world-memory-evolution-json]
+[tasks.real-world-memory-graph-rag-json]
 workspace = false
 command = "cargo"
 args = [
@@ -605,21 +429,21 @@ args = [
 	"--",
 	"run",
 	"--fixtures",
-	"apps/elf-eval/fixtures/real_world_memory/evolution",
+	"apps/elf-eval/fixtures/real_world_external_adapters/graph_rag",
 	"--out",
-	"tmp/real-world-memory/evolution-report.json",
+	"tmp/real-world-memory/graph-rag/report.json",
 	"--run-id",
-	"real-world-memory-evolution",
+	"real-world-memory-graph-rag",
 	"--adapter-id",
-	"fixture_memory_evolution",
+	"fixture_graph_rag_external_adapters",
 	"--adapter-name",
-	"ELF fixture memory evolution",
+	"Graph/RAG representative external-adapter fixtures",
 ]
 
-[tasks.real-world-memory-evolution-report]
+[tasks.real-world-memory-graph-rag-report]
 workspace = false
 dependencies = [
-	"real-world-memory-evolution-json",
+	"real-world-memory-graph-rag-json",
 ]
 command = "cargo"
 args = [
@@ -631,18 +455,41 @@ args = [
 	"--",
 	"publish",
 	"--report",
-	"tmp/real-world-memory/evolution-report.json",
+	"tmp/real-world-memory/graph-rag/report.json",
 	"--out",
-	"tmp/real-world-memory/evolution-report.md",
+	"tmp/real-world-memory/graph-rag/report.md",
 ]
 
-[tasks.real-world-job-operator-ux]
+[tasks.real-world-memory-json]
+workspace = false
+command = "cargo"
+args = [
+	"run",
+	"-p",
+	"elf-eval",
+	"--bin",
+	"real_world_job_benchmark",
+	"--",
+	"run",
+	"--fixtures",
+	"apps/elf-eval/fixtures/real_world_memory",
+	"--out",
+	"tmp/real-world-memory/real-world-memory-report.json",
+	"--run-id",
+	"real-world-memory",
+	"--adapter-id",
+	"elf_real_world_memory_fixture",
+	"--adapter-name",
+	"ELF real-world memory fixture",
+]
+
+[tasks.real-world-memory-knowledge]
 workspace = false
 dependencies = [
-	"real-world-job-operator-ux-report",
+	"real-world-memory-knowledge-report",
 ]
 
-[tasks.real-world-job-operator-ux-json]
+[tasks.real-world-memory-knowledge-json]
 workspace = false
 command = "cargo"
 args = [
@@ -654,21 +501,21 @@ args = [
 	"--",
 	"run",
 	"--fixtures",
-	"apps/elf-eval/fixtures/real_world_job/operator_debugging_ux",
+	"apps/elf-eval/fixtures/real_world_memory/knowledge",
 	"--out",
-	"tmp/real-world-job/real-world-job-operator-ux-report.json",
+	"tmp/real-world-memory/knowledge-report.json",
 	"--run-id",
-	"real-world-job-operator-ux",
+	"real-world-memory-knowledge",
 	"--adapter-id",
-	"fixture_operator_ux",
+	"fixture_knowledge",
 	"--adapter-name",
-	"ELF operator UX fixture",
+	"ELF knowledge fixture",
 ]
 
-[tasks.real-world-job-operator-ux-report]
+[tasks.real-world-memory-knowledge-report]
 workspace = false
 dependencies = [
-	"real-world-job-operator-ux-json",
+	"real-world-memory-knowledge-json",
 ]
 command = "cargo"
 args = [
@@ -680,26 +527,34 @@ args = [
 	"--",
 	"publish",
 	"--report",
-	"tmp/real-world-job/real-world-job-operator-ux-report.json",
+	"tmp/real-world-memory/knowledge-report.json",
 	"--out",
-	"tmp/real-world-job/real-world-job-operator-ux-report.md",
+	"tmp/real-world-memory/knowledge-report.md",
 ]
 
-[tasks.real-world-job-operator-ux-live-adapters]
+[tasks.real-world-memory-live-adapters]
 workspace = false
 command = "bash"
 args = [
-	"-lc",
-	"docker compose -f docker-compose.baseline.yml run --build --rm -e ELF_OPERATOR_DEBUG_LIVE_REPORT_DIR -e ELF_OPERATOR_DEBUG_LIVE_FIXTURES -e ELF_OPERATOR_DEBUG_LIVE_WORK_DIR -e ELF_OPERATOR_DEBUG_QMD_DIR baseline-runner bash scripts/real-world-operator-debug-live-adapters.sh",
+	"scripts/real-world-docker.sh",
+	"memory-live-adapters",
 ]
 
-[tasks.real-world-memory-retrieval]
+[tasks.real-world-memory-live-consolidation]
+workspace = false
+command = "bash"
+args = [
+	"scripts/real-world-docker.sh",
+	"memory-live-consolidation",
+]
+
+[tasks.real-world-memory-proactive-brief]
 workspace = false
 dependencies = [
-	"real-world-memory-retrieval-report",
+	"real-world-memory-proactive-brief-report",
 ]
 
-[tasks.real-world-memory-retrieval-json]
+[tasks.real-world-memory-proactive-brief-json]
 workspace = false
 command = "cargo"
 args = [
@@ -711,21 +566,21 @@ args = [
 	"--",
 	"run",
 	"--fixtures",
-	"apps/elf-eval/fixtures/real_world_memory/retrieval",
+	"apps/elf-eval/fixtures/real_world_memory/proactive_brief",
+	"--out",
+	"tmp/real-world-memory/proactive-brief/report.json",
 	"--run-id",
-	"real-world-memory-retrieval",
+	"real-world-memory-proactive-brief",
 	"--adapter-id",
-	"fixture_retrieval",
+	"fixture_proactive_brief",
 	"--adapter-name",
-	"ELF fixture retrieval cases",
-	"--out",
-	"tmp/real-world-memory/retrieval-report.json",
+	"ELF proactive brief fixture",
 ]
 
-[tasks.real-world-memory-retrieval-report]
+[tasks.real-world-memory-proactive-brief-report]
 workspace = false
 dependencies = [
-	"real-world-memory-retrieval-json",
+	"real-world-memory-proactive-brief-json",
 ]
 command = "cargo"
 args = [
@@ -737,9 +592,9 @@ args = [
 	"--",
 	"publish",
 	"--report",
-	"tmp/real-world-memory/retrieval-report.json",
+	"tmp/real-world-memory/proactive-brief/report.json",
 	"--out",
-	"tmp/real-world-memory/retrieval-report.md",
+	"tmp/real-world-memory/proactive-brief/report.md",
 ]
 
 [tasks.real-world-memory-production-ops]
@@ -791,13 +646,13 @@ args = [
 	"tmp/real-world-memory/production-ops-report.md",
 ]
 
-[tasks.real-world-memory-consolidation]
+[tasks.real-world-memory-project-decisions]
 workspace = false
 dependencies = [
-	"real-world-memory-consolidation-report",
+	"real-world-memory-project-decisions-report",
 ]
 
-[tasks.real-world-memory-consolidation-json]
+[tasks.real-world-memory-project-decisions-json]
 workspace = false
 command = "cargo"
 args = [
@@ -809,21 +664,21 @@ args = [
 	"--",
 	"run",
 	"--fixtures",
-	"apps/elf-eval/fixtures/real_world_memory/consolidation",
+	"apps/elf-eval/fixtures/real_world_memory/project_decisions",
 	"--out",
-	"tmp/real-world-memory/consolidation/report.json",
+	"tmp/real-world-memory/project-decisions/report.json",
 	"--run-id",
-	"real-world-memory-consolidation",
+	"real-world-memory-project-decisions",
 	"--adapter-id",
-	"fixture_consolidation",
+	"fixture_project_decisions",
 	"--adapter-name",
-	"ELF consolidation fixture",
+	"ELF project decision fixture",
 ]
 
-[tasks.real-world-memory-consolidation-report]
+[tasks.real-world-memory-project-decisions-report]
 workspace = false
 dependencies = [
-	"real-world-memory-consolidation-json",
+	"real-world-memory-project-decisions-json",
 ]
 command = "cargo"
 args = [
@@ -835,44 +690,15 @@ args = [
 	"--",
 	"publish",
 	"--report",
-	"tmp/real-world-memory/consolidation/report.json",
-	"--out",
-	"tmp/real-world-memory/consolidation/report.md",
-]
-
-[tasks.real-world-memory-summary]
-workspace = false
-dependencies = [
-	"real-world-memory-summary-report",
-]
-
-[tasks.real-world-memory-summary-json]
-workspace = false
-command = "cargo"
-args = [
-	"run",
-	"-p",
-	"elf-eval",
-	"--bin",
-	"real_world_job_benchmark",
-	"--",
-	"run",
-	"--fixtures",
-	"apps/elf-eval/fixtures/real_world_memory/memory_summary",
+	"tmp/real-world-memory/project-decisions/report.json",
 	"--out",
-	"tmp/real-world-memory/memory-summary/report.json",
-	"--run-id",
-	"real-world-memory-summary",
-	"--adapter-id",
-	"fixture_memory_summary",
-	"--adapter-name",
-	"ELF memory summary fixture",
+	"tmp/real-world-memory/project-decisions/report.md",
 ]
 
-[tasks.real-world-memory-summary-report]
+[tasks.real-world-memory-report]
 workspace = false
 dependencies = [
-	"real-world-memory-summary-json",
+	"real-world-memory-json",
 ]
 command = "cargo"
 args = [
@@ -884,18 +710,18 @@ args = [
 	"--",
 	"publish",
 	"--report",
-	"tmp/real-world-memory/memory-summary/report.json",
+	"tmp/real-world-memory/real-world-memory-report.json",
 	"--out",
-	"tmp/real-world-memory/memory-summary/report.md",
+	"tmp/real-world-memory/real-world-memory-report.md",
 ]
 
-[tasks.real-world-memory-proactive-brief]
+[tasks.real-world-memory-retrieval]
 workspace = false
 dependencies = [
-	"real-world-memory-proactive-brief-report",
+	"real-world-memory-retrieval-report",
 ]
 
-[tasks.real-world-memory-proactive-brief-json]
+[tasks.real-world-memory-retrieval-json]
 workspace = false
 command = "cargo"
 args = [
@@ -907,21 +733,21 @@ args = [
 	"--",
 	"run",
 	"--fixtures",
-	"apps/elf-eval/fixtures/real_world_memory/proactive_brief",
-	"--out",
-	"tmp/real-world-memory/proactive-brief/report.json",
+	"apps/elf-eval/fixtures/real_world_memory/retrieval",
 	"--run-id",
-	"real-world-memory-proactive-brief",
+	"real-world-memory-retrieval",
 	"--adapter-id",
-	"fixture_proactive_brief",
+	"fixture_retrieval",
 	"--adapter-name",
-	"ELF proactive brief fixture",
+	"ELF fixture retrieval cases",
+	"--out",
+	"tmp/real-world-memory/retrieval-report.json",
 ]
 
-[tasks.real-world-memory-proactive-brief-report]
+[tasks.real-world-memory-retrieval-report]
 workspace = false
 dependencies = [
-	"real-world-memory-proactive-brief-json",
+	"real-world-memory-retrieval-json",
 ]
 command = "cargo"
 args = [
@@ -933,9 +759,9 @@ args = [
 	"--",
 	"publish",
 	"--report",
-	"tmp/real-world-memory/proactive-brief/report.json",
+	"tmp/real-world-memory/retrieval-report.json",
 	"--out",
-	"tmp/real-world-memory/proactive-brief/report.md",
+	"tmp/real-world-memory/retrieval-report.md",
 ]
 
 [tasks.real-world-memory-scheduled]
@@ -987,21 +813,13 @@ args = [
 	"tmp/real-world-memory/scheduled/report.md",
 ]
 
-[tasks.real-world-memory-live-consolidation]
-workspace = false
-command = "bash"
-args = [
-	"-lc",
-	"docker compose -f docker-compose.baseline.yml run --build --rm -e ELF_CONSOLIDATION_LIVE_REPORT_DIR -e ELF_CONSOLIDATION_LIVE_FIXTURES baseline-runner bash scripts/real-world-consolidation-live-adapter.sh",
-]
-
-[tasks.real-world-memory-core-archival]
+[tasks.real-world-memory-summary]
 workspace = false
 dependencies = [
-	"real-world-memory-core-archival-report",
+	"real-world-memory-summary-report",
 ]
 
-[tasks.real-world-memory-core-archival-json]
+[tasks.real-world-memory-summary-json]
 workspace = false
 command = "cargo"
 args = [
@@ -1013,21 +831,21 @@ args = [
 	"--",
 	"run",
 	"--fixtures",
-	"apps/elf-eval/fixtures/real_world_memory/core_archival_memory",
+	"apps/elf-eval/fixtures/real_world_memory/memory_summary",
 	"--out",
-	"tmp/real-world-memory/core-archival/report.json",
+	"tmp/real-world-memory/memory-summary/report.json",
 	"--run-id",
-	"real-world-memory-core-archival",
+	"real-world-memory-summary",
 	"--adapter-id",
-	"fixture_core_archival_memory",
+	"fixture_memory_summary",
 	"--adapter-name",
-	"ELF core and archival memory fixture",
+	"ELF memory summary fixture",
 ]
 
-[tasks.real-world-memory-core-archival-report]
+[tasks.real-world-memory-summary-report]
 workspace = false
 dependencies = [
-	"real-world-memory-core-archival-json",
+	"real-world-memory-summary-json",
 ]
 command = "cargo"
 args = [
@@ -1039,233 +857,250 @@ args = [
 	"--",
 	"publish",
 	"--report",
-	"tmp/real-world-memory/core-archival/report.json",
+	"tmp/real-world-memory/memory-summary/report.json",
 	"--out",
-	"tmp/real-world-memory/core-archival/report.md",
+	"tmp/real-world-memory/memory-summary/report.md",
 ]
 
-[tasks.real-world-memory-graph-rag]
+# Check
+# | task             | type      | cwd |
+# | ---------------- | --------- | --- |
+# | check            | composite |     |
+# | check-docs       | command   |     |
+# | check-rust       | command   |     |
+# | check-trace-gate | command   |     |
+
+[tasks.check]
+clear = true
 workspace = false
 dependencies = [
-	"real-world-memory-graph-rag-report",
+	"fmt-check",
+	"check-docs",
+	"check-rust",
+	"lint",
+	"test",
 ]
 
-[tasks.real-world-memory-graph-rag-json]
+[tasks.check-docs]
 workspace = false
-command = "cargo"
+command = "python3"
 args = [
-	"run",
-	"-p",
-	"elf-eval",
-	"--bin",
-	"real_world_job_benchmark",
-	"--",
-	"run",
-	"--fixtures",
-	"apps/elf-eval/fixtures/real_world_external_adapters/graph_rag",
-	"--out",
-	"tmp/real-world-memory/graph-rag/report.json",
-	"--run-id",
-	"real-world-memory-graph-rag",
-	"--adapter-id",
-	"fixture_graph_rag_external_adapters",
-	"--adapter-name",
-	"Graph/RAG representative external-adapter fixtures",
+	"scripts/check-docs.py",
 ]
 
-[tasks.real-world-memory-graph-rag-report]
+[tasks.check-rust]
 workspace = false
-dependencies = [
-	"real-world-memory-graph-rag-json",
-]
 command = "cargo"
 args = [
-	"run",
-	"-p",
-	"elf-eval",
-	"--bin",
-	"real_world_job_benchmark",
-	"--",
-	"publish",
-	"--report",
-	"tmp/real-world-memory/graph-rag/report.json",
-	"--out",
-	"tmp/real-world-memory/graph-rag/report.md",
+	"check",
+	"--workspace",
+	"--all-targets",
+	"--all-features",
 ]
 
-[tasks.real-world-memory-live-adapters]
+[tasks.check-trace-gate]
 workspace = false
 command = "bash"
 args = [
-	"-lc",
-	"set -euo pipefail; lightrag_start=\"$(printenv ELF_LIGHTRAG_CONTEXT_START || true)\"; graphiti_start=\"$(printenv ELF_GRAPHITI_ZEP_SMOKE_START || true)\"; status=0; if [ \"$lightrag_start\" = \"1\" ]; then docker compose -f docker-compose.baseline.yml --profile lightrag up -d lightrag; fi; if [ \"$graphiti_start\" = \"1\" ]; then docker compose -f docker-compose.baseline.yml --profile graphiti-zep up -d graphiti-falkordb; fi; docker compose -f docker-compose.baseline.yml run --build --rm -e ELF_REAL_WORLD_LIVE_ENABLE_RAGFLOW -e ELF_REAL_WORLD_LIVE_ENABLE_LIGHTRAG -e ELF_REAL_WORLD_LIVE_ENABLE_GRAPHRAG -e ELF_REAL_WORLD_LIVE_ENABLE_GRAPHITI_ZEP -e ELF_REAL_WORLD_LIVE_ENABLE_GRAPHIFY -e ELF_RAGFLOW_SMOKE_START -e ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE -e ELF_RAGFLOW_SMOKE_ALLOW_ARM -e ELF_RAGFLOW_SMOKE_PULL_IMAGE -e ELF_RAGFLOW_SMOKE_CLEANUP -e ELF_RAGFLOW_SMOKE_DEVICE -e ELF_RAGFLOW_API_PORT -e ELF_RAGFLOW_API_BASE -e ELF_RAGFLOW_API_KEY -e RAGFLOW_API_KEY -e ELF_RAGFLOW_SMOKE_STARTUP_ATTEMPTS -e ELF_RAGFLOW_SMOKE_STARTUP_INTERVAL_SECONDS -e ELF_RAGFLOW_SMOKE_COMPOSE_TIMEOUT_SECONDS -e ELF_RAGFLOW_REPO_URL -e ELF_RAGFLOW_REF -e ELF_RAGFLOW_IMAGE -e ELF_RAGFLOW_COMPOSE_PROJECT -e ELF_LIGHTRAG_CONTEXT_START -e ELF_LIGHTRAG_API_BASE -e ELF_LIGHTRAG_ADAPTER_ID -e ELF_LIGHTRAG_ADAPTER_NAME -e ELF_LIGHTRAG_STARTUP_ATTEMPTS -e ELF_LIGHTRAG_STARTUP_INTERVAL_SECONDS -e ELF_LIGHTRAG_INDEX_ATTEMPTS -e ELF_LIGHTRAG_INDEX_INTERVAL_SECONDS -e ELF_GRAPHRAG_SMOKE_RUN -e ELF_GRAPHRAG_SMOKE_WORK_DIR -e ELF_GRAPHRAG_SMOKE_INSTALL -e ELF_GRAPHRAG_VERSION -e ELF_GRAPHRAG_PACKAGE -e ELF_GRAPHRAG_REF -e ELF_GRAPHRAG_CHAT_MODEL -e ELF_GRAPHRAG_EMBEDDING_MODEL -e ELF_GRAPHRAG_API_BASE -e ELF_GRAPHRAG_API_KEY -e ELF_GRAPHRAG_INDEX_METHOD -e ELF_GRAPHRAG_QUERY_METHOD -e ELF_GRAPHRAG_TIMEOUT_SECONDS -e ELF_GRAPHRAG_MAX_DOCS -e ELF_GRAPHRAG_MAX_INPUT_CHARS -e ELF_GRAPHITI_ZEP_SMOKE_START -e ELF_GRAPHITI_ZEP_SMOKE_RUN -e ELF_GRAPHITI_ZEP_SMOKE_WORK_DIR -e ELF_GRAPHITI_ZEP_SMOKE_INSTALL -e ELF_GRAPHITI_ZEP_VERSION -e ELF_GRAPHITI_ZEP_PACKAGE -e ELF_GRAPHITI_ZEP_REF -e ELF_GRAPHITI_ZEP_API_BASE -e ELF_GRAPHITI_ZEP_API_KEY -e ELF_GRAPHITI_ZEP_LLM_MODEL -e ELF_GRAPHITI_ZEP_EMBEDDING_MODEL -e ELF_GRAPHITI_ZEP_FALKORDB_HOST -e ELF_GRAPHITI_ZEP_FALKORDB_PORT -e ELF_GRAPHITI_ZEP_FALKORDB_DATABASE -e ELF_GRAPHITI_ZEP_TIMEOUT_SECONDS -e ELF_GRAPHITI_ZEP_STARTUP_ATTEMPTS -e ELF_GRAPHITI_ZEP_STARTUP_INTERVAL_SECONDS -e ELF_GRAPHIFY_SMOKE_RUN -e ELF_GRAPHIFY_SMOKE_WORK_DIR -e ELF_GRAPHIFY_SMOKE_INSTALL -e ELF_GRAPHIFY_PACKAGE -e ELF_GRAPHIFY_REF -e ELF_GRAPHIFY_TIMEOUT_SECONDS -e ELF_GRAPHIFY_QUERY_BUDGET baseline-runner bash scripts/real-world-live-adapters.sh || status=$?; if [ \"$lightrag_start\" = \"1\" ]; then docker compose -f docker-compose.baseline.yml --profile lightrag stop lightrag lightrag-mock-provider >/dev/null 2>&1 || true; fi; if [ \"$graphiti_start\" = \"1\" ]; then docker compose -f docker-compose.baseline.yml --profile graphiti-zep stop graphiti-falkordb >/dev/null 2>&1 || true; fi; exit \"$status\"",
+	"scripts/trace-gate.sh",
 ]
 
+# Clean
+# | task                       | type    | cwd |
+# | -------------------------- | ------- | --- |
+# | clean-baseline-live-docker | command |     |
+# | clean-parity-docker        | command |     |
 
-# Real-world memory knowledge benchmark
-# | task                           | type      | cwd |
-# | ------------------------------ | --------- | --- |
-# | real-world-memory-knowledge        | composite | |
-# | real-world-memory-knowledge-json   | command   | |
-# | real-world-memory-knowledge-report | command   | |
-# | real-world-first-generation-oss        | composite | |
-# | real-world-first-generation-oss-json   | command   | |
-# | real-world-first-generation-oss-report | command   | |
-# | ragflow-docker-smoke                    | command   | |
-# | lightrag-docker-context-smoke           | command   | |
-# | graphrag-docker-smoke                   | command   | |
-# | graphiti-zep-docker-temporal-smoke      | command   | |
-# | graphify-docker-graph-report-smoke      | command   | |
-
-[tasks.ragflow-docker-smoke]
+[tasks.clean-baseline-live-docker]
 workspace = false
-command = "bash"
+command = "docker"
 args = [
-	"scripts/ragflow-docker-evidence-smoke.sh",
+	"compose",
+	"-f",
+	"docker-compose.baseline.yml",
+	"down",
+	"-v",
+	"--remove-orphans",
 ]
 
-[tasks.lightrag-docker-context-smoke]
+[tasks.clean-parity-docker]
 workspace = false
-command = "bash"
+command = "docker"
 args = [
-	"-lc",
-	"set -euo pipefail; start=\"$(printenv ELF_LIGHTRAG_CONTEXT_START || true)\"; status=0; if [ \"$start\" = \"1\" ]; then docker compose -f docker-compose.baseline.yml --profile lightrag up -d lightrag; fi; docker compose -f docker-compose.baseline.yml run --build --rm baseline-runner bash scripts/lightrag-docker-context-smoke.sh || status=$?; if [ \"$start\" = \"1\" ]; then docker compose -f docker-compose.baseline.yml --profile lightrag stop lightrag lightrag-mock-provider >/dev/null 2>&1 || true; fi; exit \"$status\"",
+	"compose",
+	"-f",
+	"docker-compose.parity.yml",
+	"down",
+	"-v",
+	"--remove-orphans",
 ]
 
-[tasks.graphrag-docker-smoke]
+# Format
+# | task           | type      | cwd |
+# | -------------- | --------- | --- |
+# | fmt            | composite |     |
+# | fmt-check      | composite |     |
+# | fmt-rust       | command   |     |
+# | fmt-rust-check | extend    |     |
+# | fmt-toml       | command   |     |
+# | fmt-toml-check | extend    |     |
+
+[tasks.fmt]
 workspace = false
-command = "bash"
-args = [
-	"-lc",
-	"set -euo pipefail; docker compose -f docker-compose.baseline.yml run --build --rm -e ELF_GRAPHRAG_SMOKE_RUN -e ELF_GRAPHRAG_SMOKE_REPORT_DIR -e ELF_GRAPHRAG_SMOKE_WORK_DIR -e ELF_GRAPHRAG_SMOKE_INSTALL -e ELF_GRAPHRAG_VERSION -e ELF_GRAPHRAG_PACKAGE -e ELF_GRAPHRAG_REF -e ELF_GRAPHRAG_CHAT_MODEL -e ELF_GRAPHRAG_EMBEDDING_MODEL -e ELF_GRAPHRAG_API_BASE -e ELF_GRAPHRAG_API_KEY -e ELF_GRAPHRAG_INDEX_METHOD -e ELF_GRAPHRAG_QUERY_METHOD -e ELF_GRAPHRAG_TIMEOUT_SECONDS -e ELF_GRAPHRAG_MAX_DOCS -e ELF_GRAPHRAG_MAX_INPUT_CHARS baseline-runner python3 scripts/graphrag-docker-smoke.py",
+dependencies = [
+	"fmt-rust",
+	"fmt-toml",
+]
+
+[tasks.fmt-check]
+workspace = false
+dependencies = [
+	"fmt-rust-check",
+	"fmt-toml-check",
 ]
 
-[tasks.graphiti-zep-docker-temporal-smoke]
+[tasks.fmt-rust]
+workspace = false
+script = "cargo +nightly fmt --all"
+
+[tasks.fmt-rust-check]
+extend = "fmt-rust"
+script = "cargo +nightly fmt --all -- --check"
+
+[tasks.fmt-toml]
 workspace = false
-command = "bash"
+command = "taplo"
 args = [
-	"-lc",
-	"set -euo pipefail; start=\"$(printenv ELF_GRAPHITI_ZEP_SMOKE_START || true)\"; status=0; if [ \"$start\" = \"1\" ]; then docker compose -f docker-compose.baseline.yml --profile graphiti-zep up -d graphiti-falkordb; fi; docker compose -f docker-compose.baseline.yml run --build --rm -e ELF_GRAPHITI_ZEP_SMOKE_RUN -e ELF_GRAPHITI_ZEP_SMOKE_REPORT_DIR -e ELF_GRAPHITI_ZEP_SMOKE_WORK_DIR -e ELF_GRAPHITI_ZEP_SMOKE_INSTALL -e ELF_GRAPHITI_ZEP_VERSION -e ELF_GRAPHITI_ZEP_PACKAGE -e ELF_GRAPHITI_ZEP_REF -e ELF_GRAPHITI_ZEP_API_BASE -e ELF_GRAPHITI_ZEP_API_KEY -e ELF_GRAPHITI_ZEP_LLM_MODEL -e ELF_GRAPHITI_ZEP_EMBEDDING_MODEL -e ELF_GRAPHITI_ZEP_FALKORDB_HOST -e ELF_GRAPHITI_ZEP_FALKORDB_PORT -e ELF_GRAPHITI_ZEP_FALKORDB_DATABASE -e ELF_GRAPHITI_ZEP_TIMEOUT_SECONDS -e ELF_GRAPHITI_ZEP_STARTUP_ATTEMPTS -e ELF_GRAPHITI_ZEP_STARTUP_INTERVAL_SECONDS baseline-runner python3 scripts/graphiti-zep-docker-temporal-smoke.py || status=$?; if [ \"$start\" = \"1\" ]; then docker compose -f docker-compose.baseline.yml --profile graphiti-zep stop graphiti-falkordb >/dev/null 2>&1 || true; fi; exit \"$status\"",
+	"fmt",
 ]
 
-[tasks.graphify-docker-graph-report-smoke]
-workspace = false
-command = "bash"
+[tasks.fmt-toml-check]
+extend = "fmt-toml"
 args = [
-	"-lc",
-	"set -euo pipefail; docker compose -f docker-compose.baseline.yml run --build --rm -e ELF_GRAPHIFY_SMOKE_RUN -e ELF_GRAPHIFY_SMOKE_REPORT_DIR -e ELF_GRAPHIFY_SMOKE_WORK_DIR -e ELF_GRAPHIFY_SMOKE_INSTALL -e ELF_GRAPHIFY_PACKAGE -e ELF_GRAPHIFY_REF -e ELF_GRAPHIFY_TIMEOUT_SECONDS -e ELF_GRAPHIFY_QUERY_BUDGET baseline-runner python3 scripts/graphify-docker-graph-report-smoke.py",
+	"fmt",
+	"--check",
 ]
 
-[tasks.real-world-memory-knowledge]
+# Lint
+# | task        | type      | cwd |
+# | ----------- | --------- | --- |
+# | lint        | composite |     |
+# | lint-rust   | command   |     |
+# | lint-vstyle | command   |     |
+
+[tasks.lint]
 workspace = false
 dependencies = [
-	"real-world-memory-knowledge-report",
+	"lint-rust",
+	"lint-vstyle",
 ]
 
-[tasks.real-world-memory-knowledge-json]
+[tasks.lint-rust]
 workspace = false
 command = "cargo"
 args = [
-	"run",
-	"-p",
-	"elf-eval",
-	"--bin",
-	"real_world_job_benchmark",
+	"clippy",
+	"--all-features",
+	"--all-targets",
+	"--workspace",
 	"--",
-	"run",
-	"--fixtures",
-	"apps/elf-eval/fixtures/real_world_memory/knowledge",
-	"--out",
-	"tmp/real-world-memory/knowledge-report.json",
-	"--run-id",
-	"real-world-memory-knowledge",
-	"--adapter-id",
-	"fixture_knowledge",
-	"--adapter-name",
-	"ELF knowledge fixture",
+	"-D",
+	"clippy::all",
+	"-D",
+	"clippy::too_many_lines",
+	"-D",
+	"clippy::unwrap_used",
+	"-D",
+	"clippy::use_self",
+	"-D",
+	"clippy::wildcard_imports",
+	"-D",
+	"missing-docs",
+	"-D",
+	"unused-crate-dependencies",
+	"-D",
+	"warnings",
 ]
 
-[tasks.real-world-memory-knowledge-report]
+[tasks.lint-vstyle]
 workspace = false
-dependencies = [
-	"real-world-memory-knowledge-json",
-]
 command = "cargo"
 args = [
-	"run",
-	"-p",
-	"elf-eval",
-	"--bin",
-	"real_world_job_benchmark",
-	"--",
-	"publish",
-	"--report",
-	"tmp/real-world-memory/knowledge-report.json",
-	"--out",
-	"tmp/real-world-memory/knowledge-report.md",
+	"vstyle",
+	"curate",
+	"--language",
+	"rust",
+	"--workspace",
+	"--all-features",
 ]
 
-[tasks.real-world-first-generation-oss]
+# Lint Fix
+# | task            | type      | cwd |
+# | --------------- | --------- | --- |
+# | lint-fix        | composite |     |
+# | lint-fix-rust   | command   |     |
+# | lint-fix-vstyle | command   |     |
+
+[tasks.lint-fix]
 workspace = false
 dependencies = [
-	"real-world-first-generation-oss-report",
+	"lint-fix-rust",
+	"lint-fix-vstyle",
 ]
 
-[tasks.real-world-first-generation-oss-json]
+[tasks.lint-fix-rust]
 workspace = false
 command = "cargo"
 args = [
-	"run",
-	"-p",
-	"elf-eval",
-	"--bin",
-	"real_world_job_benchmark",
+	"clippy",
+	"--fix",
+	"--allow-dirty",
+	"--all-features",
+	"--all-targets",
+	"--workspace",
 	"--",
-	"run",
-	"--fixtures",
-	"apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss",
-	"--out",
-	"tmp/real-world-memory/first-generation-oss/report.json",
-	"--run-id",
-	"first-generation-oss-continuity-source-store",
-	"--adapter-id",
-	"fixture_first_generation_oss",
-	"--adapter-name",
-	"First-generation OSS fixture coverage",
+	"-D",
+	"clippy::all",
+	"-D",
+	"clippy::too_many_lines",
+	"-D",
+	"clippy::unwrap_used",
+	"-D",
+	"clippy::use_self",
+	"-D",
+	"clippy::wildcard_imports",
+	"-D",
+	"missing-docs",
+	"-D",
+	"unused-crate-dependencies",
+	"-D",
+	"warnings",
 ]
 
-[tasks.real-world-first-generation-oss-report]
+[tasks.lint-fix-vstyle]
 workspace = false
-dependencies = [
-	"real-world-first-generation-oss-json",
-]
 command = "cargo"
 args = [
-	"run",
-	"-p",
-	"elf-eval",
-	"--bin",
-	"real_world_job_benchmark",
-	"--",
-	"publish",
-	"--report",
-	"tmp/real-world-memory/first-generation-oss/report.json",
-	"--out",
-	"tmp/real-world-memory/first-generation-oss/report.md",
+	"vstyle",
+	"tune",
+	"--language",
+	"rust",
+	"--workspace",
+	"--all-features",
+	"--strict",
 ]
 
-
-# External memory pattern radar
-# | task                               | type      | cwd |
-# | ---------------------------------- | --------- | --- |
-# | external-memory-radar              | command   |     |
-# | external-memory-radar-artifact     | composite |     |
-# | external-memory-radar-artifact-json | command  |     |
-# | external-memory-radar-artifact-validate | command | |
-# | external-memory-radar-dry-run      | composite |     |
-# | external-memory-radar-dry-run-json | command   |     |
-# | external-memory-radar-dry-run-validate | command |  |
-# | external-memory-radar-validate     | command   |     |
+# Research
+# | task                                    | type      | cwd |
+# | --------------------------------------- | --------- | --- |
+# | external-memory-radar                   | command   |     |
+# | external-memory-radar-artifact          | composite |     |
+# | external-memory-radar-artifact-json     | command   |     |
+# | external-memory-radar-artifact-validate | command   |     |
+# | external-memory-radar-dry-run           | composite |     |
+# | external-memory-radar-dry-run-json      | command   |     |
+# | external-memory-radar-dry-run-validate  | command   |     |
+# | external-memory-radar-validate          | command   |     |
 
 [tasks.external-memory-radar]
 workspace = false
@@ -1383,30 +1218,156 @@ args = [
 	"docs/research/external_memory_pattern_radar/cursor.json",
 ]
 
+# Smoke
+# | task                               | type      | cwd |
+# | ---------------------------------- | --------- | --- |
+# | smoke-graphify-docker-graph-report | command   |     |
+# | smoke-graphiti-zep-docker-temporal | command   |     |
+# | smoke-graphrag-docker              | command   |     |
+# | smoke-lightrag-docker-context      | command   |     |
+# | smoke-ragflow-docker               | command   |     |
+# | smoke-real-world-job               | composite |     |
+# | smoke-real-world-job-json          | command   |     |
+# | smoke-real-world-job-report        | command   |     |
+
+[tasks.smoke-graphify-docker-graph-report]
+workspace = false
+command = "bash"
+args = [
+	"scripts/smoke-docker.sh",
+	"graphify-docker-graph-report",
+]
+
+[tasks.smoke-graphiti-zep-docker-temporal]
+workspace = false
+command = "bash"
+args = [
+	"scripts/smoke-docker.sh",
+	"graphiti-zep-docker-temporal",
+]
+
+[tasks.smoke-graphrag-docker]
+workspace = false
+command = "bash"
+args = [
+	"scripts/smoke-docker.sh",
+	"graphrag-docker",
+]
+
+[tasks.smoke-lightrag-docker-context]
+workspace = false
+command = "bash"
+args = [
+	"scripts/smoke-docker.sh",
+	"lightrag-docker-context",
+]
 
-# Meta
-# | task   | type      | cwd |
-# | ------ | --------- | --- |
-# | checks | composite |     |
+[tasks.smoke-ragflow-docker]
+workspace = false
+command = "bash"
+args = [
+	"scripts/ragflow-docker-evidence-smoke.sh",
+]
 
-[tasks.checks]
+[tasks.smoke-real-world-job]
 workspace = false
 dependencies = [
-	"lint",
-	"test",
-	"fmt-check",
+	"smoke-real-world-job-report",
+]
+
+[tasks.smoke-real-world-job-json]
+workspace = false
+command = "cargo"
+args = [
+	"run",
+	"-p",
+	"elf-eval",
+	"--bin",
+	"real_world_job_benchmark",
+	"--",
+	"run",
+	"--fixtures",
+	"apps/elf-eval/fixtures/real_world_memory/work_resume",
+	"--out",
+	"tmp/real-world-job/real-world-job-smoke-report.json",
+]
+
+[tasks.smoke-real-world-job-report]
+workspace = false
+dependencies = [
+	"smoke-real-world-job-json",
+]
+command = "cargo"
+args = [
+	"run",
+	"-p",
+	"elf-eval",
+	"--bin",
+	"real_world_job_benchmark",
+	"--",
+	"publish",
+	"--report",
+	"tmp/real-world-job/real-world-job-smoke-report.json",
+	"--out",
+	"tmp/real-world-job/real-world-job-smoke-report.md",
 ]
 
+# Test
+# | task                  | type      | cwd |
+# | --------------------- | --------- | --- |
+# | test                  | composite |     |
+# | test-e2e              | command   |     |
+# | test-rust             | command   |     |
+# | test-rust-all         | command   |     |
+# | test-rust-integration | command   |     |
 
-# Quality utilities
-# | task      | type    | cwd |
-# | --------- | ------- | --- |
-# | trace-gate | command |     |
+[tasks.test]
+clear = true
+workspace = false
+dependencies = [
+	"test-rust",
+]
 
-[tasks.trace-gate]
+[tasks.test-e2e]
 workspace = false
 command = "bash"
 args = [
-	"-lc",
-	"set -euo pipefail; DSN=\"${TRACE_GATE_PG_DSN:-postgres://postgres:postgres@127.0.0.1:5432/elf}\"; psql \"${DSN}\" -v ON_ERROR_STOP=1 -f sql/init.sql; psql \"${DSN}\" -v ON_ERROR_STOP=1 -f .github/fixtures/trace_gate/fixture.sql; cargo run -p elf-eval --bin trace_regression_gate -- --config .github/fixtures/trace_gate/config.toml --gate .github/fixtures/trace_gate/gate.json --out tmp/trace_gate.report.json",
+	"scripts/context-misranking-harness.sh",
+]
+
+[tasks.test-rust]
+workspace = false
+command = "cargo"
+args = [
+	"nextest",
+	"run",
+	"--workspace",
+	"--all-targets",
+	"--all-features",
+]
+
+[tasks.test-rust-all]
+workspace = false
+command = "cargo"
+args = [
+	"nextest",
+	"run",
+	"--workspace",
+	"--all-targets",
+	"--all-features",
+	"--run-ignored",
+	"all",
+]
+
+[tasks.test-rust-integration]
+workspace = false
+command = "cargo"
+args = [
+	"nextest",
+	"run",
+	"--workspace",
+	"--all-targets",
+	"--all-features",
+	"--run-ignored",
+	"only",
 ]
diff --git a/README.md b/README.md
index 13de0803..5649d0d6 100644
--- a/README.md
+++ b/README.md
@@ -254,7 +254,7 @@ provider-backed ELF evidence was required.
   `cargo make baseline-soak-docker`, `cargo make baseline-live-report`,
   `cargo make real-world-memory-live-adapters`,
   `cargo make real-world-first-generation-oss`, and
-  `cargo make baseline-live-docker-clean`. Expensive 100k and long-soak profiles
+  `cargo make clean-baseline-live-docker`. Expensive 100k and long-soak profiles
   are opt-in and do not run in normal checks.
 
 Detailed evidence and interpretation:
@@ -390,8 +390,8 @@ self-check evidence, and fixture-backed scheduled-memory task scoring.
 
 ```sh
 cargo make fmt
-cargo make lint
-cargo make test
+cargo make check
+cargo make test-rust
 ```
 
 For integration and E2E workflows, use `docs/guide/getting_started.md` and `docs/guide/integration-testing.md`.
diff --git a/apps/elf-eval/fixtures/production_corpus/synthetic_coding_agent_manifest.json b/apps/elf-eval/fixtures/production_corpus/synthetic_coding_agent_manifest.json
index d627b627..62873c40 100644
--- a/apps/elf-eval/fixtures/production_corpus/synthetic_coding_agent_manifest.json
+++ b/apps/elf-eval/fixtures/production_corpus/synthetic_coding_agent_manifest.json
@@ -13,13 +13,13 @@
       "evidence_id": "pr-110-review",
       "category": "pr",
       "title": "PR 110 Review Status",
-      "text": "PR #110 is review-ready for the ELF viewer lane. It passed `cargo make checks` and waits for the non-draft review handoff."
+      "text": "PR #110 is review-ready for the ELF viewer lane. It passed `cargo make check` and waits for the non-draft review handoff."
     },
     {
       "evidence_id": "worktree-xy791-repair",
       "category": "worktree",
       "title": "XY-791 Strict Config Repair",
-      "text": "Worktree XY-791 recovered strict-config repair after rebase. The exact gate was `cargo make fmt && cargo make lint-fix && cargo make checks`."
+      "text": "Worktree XY-791 recovered strict-config repair after rebase. The exact gate was `cargo make fmt && cargo make lint-fix && cargo make check`."
     },
     {
       "evidence_id": "runbook-live-baseline",
@@ -67,7 +67,7 @@
       "query": "Recover the exact repair gate command for XY-791 strict config.",
       "expected_evidence_ids": ["worktree-xy791-repair"],
       "allowed_alternate_evidence_ids": ["runbook-live-baseline"],
-      "expected_terms": ["XY-791", "cargo make fmt && cargo make lint-fix && cargo make checks"]
+      "expected_terms": ["XY-791", "cargo make fmt && cargo make lint-fix && cargo make check"]
     },
     {
       "query_id": "q-explain-stale-blocker",
diff --git a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
index afd789bc..0ba49733 100644
--- a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
+++ b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
@@ -1759,13 +1759,13 @@
       "setup": {
         "status": "blocked",
         "evidence": "XY-900 promotes the Docker-safe tiny-corpus evidence smoke into a generated real_world_job report while the checked-in row remains smoke-only research_gate evidence.",
-        "command": "cargo make ragflow-docker-smoke",
+        "command": "cargo make smoke-ragflow-docker",
         "artifact": "tmp/real-world-memory/ragflow-smoke/ragflow-smoke.json"
       },
       "run": {
         "status": "blocked",
         "evidence": "The live path requires explicit resource-envelope opt-in and a local self-hosted RAGFlow API key; setup failures stay typed in the generated smoke artifact.",
-        "command": "ELF_RAGFLOW_SMOKE_START=1 ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE=1 cargo make ragflow-docker-smoke",
+        "command": "ELF_RAGFLOW_SMOKE_START=1 ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE=1 cargo make smoke-ragflow-docker",
         "artifact": "tmp/real-world-memory/ragflow-smoke/memory_projects_manifest.ragflow-smoke.json"
       },
       "result": {
@@ -1877,7 +1877,7 @@
         "runtime_boundary": "Run scripts/ragflow-docker-evidence-smoke.sh through cargo make; the live path uses the official RAGFlow Docker Compose service boundary without host-global RAGFlow installs.",
         "resource_expectation": "Large multi-service RAG stack; generated artifacts record CPU/GPU mode, memory, disk, image size, expanded disk notes, startup time, vm.max_map_count handling, and provider boundaries before scoring.",
         "retry_guidance": [
-          "Run cargo make ragflow-docker-smoke first to produce a typed preflight artifact.",
+          "Run cargo make smoke-ragflow-docker first to produce a typed preflight artifact.",
           "Start the live path only with ELF_RAGFLOW_SMOKE_START=1 and ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE=1.",
           "Keep private corpora and operator-owned provider credentials out of this smoke; map only generated public corpus reference chunks to evidence ids."
         ],
@@ -1903,13 +1903,13 @@
       "setup": {
         "status": "blocked",
         "evidence": "XY-886 adds a Docker-profile context-export smoke command, and XY-900 keeps its generated retrieval fixtures scored through real_world_job_benchmark. The checked-in row remains smoke-only research_gate evidence.",
-        "command": "cargo make lightrag-docker-context-smoke",
+        "command": "cargo make smoke-lightrag-docker-context",
         "artifact": "tmp/real-world-memory/lightrag-context/lightrag-materialization.json"
       },
       "run": {
         "status": "blocked",
         "evidence": "The default smoke records a typed setup/runtime failure if the LightRAG API is unavailable; set ELF_LIGHTRAG_CONTEXT_START=1 to start the opt-in Docker service profile.",
-        "command": "ELF_LIGHTRAG_CONTEXT_START=1 cargo make lightrag-docker-context-smoke",
+        "command": "ELF_LIGHTRAG_CONTEXT_START=1 cargo make smoke-lightrag-docker-context",
         "artifact": "tmp/real-world-memory/lightrag-context/summary.json"
       },
       "result": {
@@ -1990,7 +1990,7 @@
         },
         {
           "kind": "command",
-          "ref": "cargo make lightrag-docker-context-smoke",
+          "ref": "cargo make smoke-lightrag-docker-context",
           "status": "blocked"
         },
         {
@@ -2027,11 +2027,11 @@
             "evidence": "Official source-id and file-path citation reference."
           }
         ],
-        "setup_path": "Run cargo make lightrag-docker-context-smoke for a typed preflight artifact; set ELF_LIGHTRAG_CONTEXT_START=1 to start the opt-in LightRAG Docker profile and attempt live context export.",
+        "setup_path": "Run cargo make smoke-lightrag-docker-context for a typed preflight artifact; set ELF_LIGHTRAG_CONTEXT_START=1 to start the opt-in LightRAG Docker profile and attempt live context export.",
         "runtime_boundary": "docker-compose.baseline.yml baseline-runner plus opt-in lightrag and lightrag-mock-provider services; generated source files and LightRAG data stay in Docker-mounted artifact paths and Docker volumes.",
         "resource_expectation": "The default profile uses the official LightRAG image, a local OpenAI-compatible mock provider, 64-dimensional embeddings, rerank disabled for context queries, cargo/pip/Hugging Face caches, and Docker volumes for rag_storage, inputs, and prompts.",
         "retry_guidance": [
-          "Run cargo make lightrag-docker-context-smoke first; a missing API must remain a typed incomplete artifact, not a pass claim.",
+          "Run cargo make smoke-lightrag-docker-context first; a missing API must remain a typed incomplete artifact, not a pass claim.",
           "Set ELF_LIGHTRAG_CONTEXT_START=1 only when Docker may pull/start the LightRAG service profile.",
           "Score retrieval only when returned context, references.file_path, or references.content map to required evidence ids."
         ],
@@ -2057,13 +2057,13 @@
       "setup": {
         "status": "blocked",
         "evidence": "XY-900 promotes the Docker-safe generated-corpus GraphRAG smoke into a scored knowledge_compilation report while the checked-in row remains smoke-only research_gate evidence.",
-        "command": "cargo make graphrag-docker-smoke",
+        "command": "cargo make smoke-graphrag-docker",
         "artifact": "tmp/real-world-memory/graphrag-smoke/graphrag-smoke.json"
       },
       "run": {
         "status": "blocked",
         "evidence": "The default smoke records a typed blocked artifact without model calls; set ELF_GRAPHRAG_SMOKE_RUN=1 with explicit provider configuration to attempt live GraphRAG index/query.",
-        "command": "ELF_GRAPHRAG_SMOKE_RUN=1 cargo make graphrag-docker-smoke",
+        "command": "ELF_GRAPHRAG_SMOKE_RUN=1 cargo make smoke-graphrag-docker",
         "artifact": "tmp/real-world-memory/graphrag-smoke/summary.json"
       },
       "result": {
@@ -2149,7 +2149,7 @@
         },
         {
           "kind": "command",
-          "ref": "cargo make graphrag-docker-smoke",
+          "ref": "cargo make smoke-graphrag-docker",
           "status": "blocked"
         },
         {
@@ -2191,11 +2191,11 @@
             "evidence": "Official local-search context and graph traversal reference."
           }
         ],
-        "setup_path": "Run cargo make graphrag-docker-smoke for a typed preflight artifact; set ELF_GRAPHRAG_SMOKE_RUN=1 with explicit provider configuration for a live GraphRAG index/query attempt.",
+        "setup_path": "Run cargo make smoke-graphrag-docker for a typed preflight artifact; set ELF_GRAPHRAG_SMOKE_RUN=1 with explicit provider configuration for a live GraphRAG index/query attempt.",
         "runtime_boundary": "docker-compose.baseline.yml baseline-runner, container-local Python venv, generated public corpus, and report artifacts under tmp/real-world-memory/graphrag-smoke.",
         "resource_expectation": "The default profile uses a generated public corpus capped by ELF_GRAPHRAG_MAX_DOCS and ELF_GRAPHRAG_MAX_INPUT_CHARS, pins GraphRAG through ELF_GRAPHRAG_PACKAGE, and records elapsed time, cache size, output size, and observed cache entries.",
         "retry_guidance": [
-          "Run cargo make graphrag-docker-smoke first; missing provider configuration must remain a typed blocked artifact, not a pass claim.",
+          "Run cargo make smoke-graphrag-docker first; missing provider configuration must remain a typed blocked artifact, not a pass claim.",
           "Enable ELF_GRAPHRAG_SMOKE_RUN=1 only for generated public corpus indexing with explicit provider configuration.",
           "Fail typed if source document or text_unit identifiers cannot be mapped to expected evidence IDs."
         ],
@@ -2221,13 +2221,13 @@
       "setup": {
         "status": "blocked",
         "evidence": "XY-900 promotes the Docker-contained Graphiti/Zep temporal smoke into a scored memory_evolution report while the checked-in row remains smoke-only research_gate evidence.",
-        "command": "cargo make graphiti-zep-docker-temporal-smoke",
+        "command": "cargo make smoke-graphiti-zep-docker-temporal",
         "artifact": "tmp/real-world-memory/graphiti-zep-smoke/graphiti-zep-smoke.json"
       },
       "run": {
         "status": "blocked",
         "evidence": "The default smoke records a typed setup/runtime failure if live execution is not explicitly enabled. Set ELF_GRAPHITI_ZEP_SMOKE_START=1 and ELF_GRAPHITI_ZEP_SMOKE_RUN=1 with explicit provider configuration to start Docker-local FalkorDB and run Graphiti.",
-        "command": "ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make graphiti-zep-docker-temporal-smoke",
+        "command": "ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make smoke-graphiti-zep-docker-temporal",
         "artifact": "tmp/real-world-memory/graphiti-zep-smoke/summary.json"
       },
       "result": {
@@ -2308,7 +2308,7 @@
         },
         {
           "kind": "command",
-          "ref": "cargo make graphiti-zep-docker-temporal-smoke",
+          "ref": "cargo make smoke-graphiti-zep-docker-temporal",
           "status": "blocked"
         },
         {
@@ -2350,11 +2350,11 @@
             "evidence": "Official manual fact-triple ingest contract."
           }
         ],
-        "setup_path": "Run cargo make graphiti-zep-docker-temporal-smoke for a typed artifact; set ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 with explicit provider configuration for a live attempt.",
+        "setup_path": "Run cargo make smoke-graphiti-zep-docker-temporal for a typed artifact; set ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 with explicit provider configuration for a live attempt.",
         "runtime_boundary": "docker-compose.baseline.yml baseline-runner plus graphiti-zep FalkorDB profile, container-local Python venv, generated public temporal facts, and report artifacts under tmp/real-world-memory/graphiti-zep-smoke.",
         "resource_expectation": "Requires Docker-local FalkorDB plus LLM/embedding configuration; generated artifacts record service startup, storage size, provider boundaries, fact count, and timeout before scoring.",
         "retry_guidance": [
-          "Run cargo make graphiti-zep-docker-temporal-smoke first to produce a typed blocked artifact.",
+          "Run cargo make smoke-graphiti-zep-docker-temporal first to produce a typed blocked artifact.",
           "Start the live path only with ELF_GRAPHITI_ZEP_SMOKE_START=1, ELF_GRAPHITI_ZEP_SMOKE_RUN=1, and explicit provider configuration.",
           "Treat missing validity windows or unmapped current/historical facts as wrong_result, not pass."
         ],
@@ -2859,13 +2859,13 @@
       "setup": {
         "status": "pass",
         "evidence": "XY-900 validation reached the Docker-only graph/report smoke setup inside the baseline runner without host-global assistant hooks.",
-        "command": "cargo make graphify-docker-graph-report-smoke",
+        "command": "cargo make smoke-graphify-docker-graph-report",
         "artifact": "tmp/real-world-memory/graphify-smoke/graphify-smoke.json"
       },
       "run": {
         "status": "pass",
         "evidence": "The smoke installed graphify in a container-local venv, ran over a generated public corpus, and produced graph/report/query output for scoring.",
-        "command": "cargo make graphify-docker-graph-report-smoke",
+        "command": "cargo make smoke-graphify-docker-graph-report",
         "artifact": "tmp/real-world-memory/graphify-smoke/summary.json"
       },
       "result": {
@@ -2946,7 +2946,7 @@
         },
         {
           "kind": "command",
-          "ref": "cargo make graphify-docker-graph-report-smoke",
+          "ref": "cargo make smoke-graphify-docker-graph-report",
           "status": "wrong_result"
         },
         {
@@ -2973,11 +2973,11 @@
             "evidence": "Official CLI, output artifact, query, and source-location contract."
           }
         ],
-        "setup_path": "Run cargo make graphify-docker-graph-report-smoke to install graphify in Docker, build graph/report artifacts from a generated public corpus, and export query evidence without installing host-global assistant hooks.",
+        "setup_path": "Run cargo make smoke-graphify-docker-graph-report to install graphify in Docker, build graph/report artifacts from a generated public corpus, and export query evidence without installing host-global assistant hooks.",
         "runtime_boundary": "docker-compose.baseline.yml baseline-runner, container-local Python venv, isolated HOME/config paths, generated public corpus, and artifacts under tmp/real-world-memory/graphify-smoke.",
         "resource_expectation": "Graph build cost scales with corpus and model choices; generated artifacts record package reference, provider/model boundary, build time, graph size, report size, cache size, timeout, and retry behavior.",
         "retry_guidance": [
-          "Run cargo make graphify-docker-graph-report-smoke first; setup/runtime failures must remain typed artifacts, not pass claims.",
+          "Run cargo make smoke-graphify-docker-graph-report first; setup/runtime failures must remain typed artifacts, not pass claims.",
           "Do not use graphify host assistant hook installs or operator-owned assistant configuration as proof.",
           "Score graph-guided answers only when graph.json, GRAPH_REPORT.md, and graphify query output map to generated evidence ids."
         ],
diff --git a/apps/elf-eval/fixtures/real_world_live_adapters/work_resume_exact_next_action.json b/apps/elf-eval/fixtures/real_world_live_adapters/work_resume_exact_next_action.json
index 66128882..d3dd6d44 100644
--- a/apps/elf-eval/fixtures/real_world_live_adapters/work_resume_exact_next_action.json
+++ b/apps/elf-eval/fixtures/real_world_live_adapters/work_resume_exact_next_action.json
@@ -10,7 +10,7 @@
       {
         "evidence_id": "xy868-current-next-action",
         "kind": "runbook",
-        "text": "Exact next action for XY-868: run `cargo make real-world-memory-live-adapters`, then run `cargo make fmt`, `cargo make lint-fix`, and `cargo make checks` before pushing branch y/elf-xy-868.",
+        "text": "Exact next action for XY-868: run `cargo make real-world-memory-live-adapters`, then run `cargo make fmt`, `cargo make lint-fix`, and `cargo make check` before pushing branch y/elf-xy-868.",
         "source_ref": {
           "schema": "source_ref/v1",
           "resolver": "real_world_live_adapter_fixture/v1",
@@ -65,7 +65,7 @@
     "must_include": [
       {
         "claim_id": "next_action",
-        "text": "Exact next action for XY-868: run `cargo make real-world-memory-live-adapters`, then run `cargo make fmt`, `cargo make lint-fix`, and `cargo make checks` before pushing branch y/elf-xy-868."
+        "text": "Exact next action for XY-868: run `cargo make real-world-memory-live-adapters`, then run `cargo make fmt`, `cargo make lint-fix`, and `cargo make check` before pushing branch y/elf-xy-868."
       }
     ],
     "must_not_include": [
diff --git a/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/stale_core_detection.json b/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/stale_core_detection.json
index 084c26cb..0dde7817 100644
--- a/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/stale_core_detection.json
+++ b/apps/elf-eval/fixtures/real_world_memory/core_archival_memory/stale_core_detection.json
@@ -24,7 +24,7 @@
       {
         "evidence_id": "archival-current-validation-gate",
         "kind": "decision",
-        "text": "Archival decision update: before pushing a refreshed PR head, run cargo make fmt, cargo make lint-fix, and cargo make checks.",
+        "text": "Archival decision update: before pushing a refreshed PR head, run cargo make fmt, cargo make lint-fix, and cargo make check.",
         "source_ref": {
           "schema": "source_ref/v1",
           "resolver": "real_world_job_fixture/v1",
@@ -33,7 +33,7 @@
             "evidence_id": "archival-current-validation-gate"
           },
           "locator": {
-            "quote": "cargo make fmt, cargo make lint-fix, and cargo make checks"
+            "quote": "cargo make fmt, cargo make lint-fix, and cargo make check"
           }
         },
         "created_at": "2026-06-11T04:30:00Z"
@@ -73,7 +73,7 @@
     "adapter_response": {
       "adapter_id": "fixture_core_archival_memory",
       "answer": {
-        "content": "Treat the attached validation-gate core block as stale. The current archival decision says to run cargo make fmt, cargo make lint-fix, and cargo make checks before pushing a refreshed PR head, and the archival rationale says that evidence supersedes the core block until it is updated from source-of-truth state.",
+        "content": "Treat the attached validation-gate core block as stale. The current archival decision says to run cargo make fmt, cargo make lint-fix, and cargo make check before pushing a refreshed PR head, and the archival rationale says that evidence supersedes the core block until it is updated from source-of-truth state.",
         "claims": [
           {
             "claim_id": "stale_core_detected",
@@ -83,7 +83,7 @@
           },
           {
             "claim_id": "archival_current_gate",
-            "text": "The current archival validation gate is cargo make fmt, cargo make lint-fix, and cargo make checks.",
+            "text": "The current archival validation gate is cargo make fmt, cargo make lint-fix, and cargo make check.",
             "evidence_ids": ["archival-current-validation-gate"],
             "confidence": "high"
           }
@@ -131,7 +131,7 @@
       },
       {
         "claim_id": "archival_current_gate",
-        "text": "The current archival validation gate is cargo make fmt, cargo make lint-fix, and cargo make checks."
+        "text": "The current archival validation gate is cargo make fmt, cargo make lint-fix, and cargo make check."
       }
     ],
     "must_not_include": [
@@ -151,7 +151,7 @@
       "evidence_id": "archival-current-validation-gate",
       "claim_id": "archival_current_gate",
       "requirement": "cite",
-      "quote": "cargo make fmt, cargo make lint-fix, and cargo make checks"
+      "quote": "cargo make fmt, cargo make lint-fix, and cargo make check"
     },
     {
       "evidence_id": "archival-supersedes-core-rationale",
diff --git a/apps/elf-eval/fixtures/real_world_memory/memory_evolution/delete_ttl_staleness.json b/apps/elf-eval/fixtures/real_world_memory/memory_evolution/delete_ttl_staleness.json
index d6dc98c7..e903e548 100644
--- a/apps/elf-eval/fixtures/real_world_memory/memory_evolution/delete_ttl_staleness.json
+++ b/apps/elf-eval/fixtures/real_world_memory/memory_evolution/delete_ttl_staleness.json
@@ -41,7 +41,7 @@
       {
         "evidence_id": "current-benchmark-plan",
         "kind": "decision",
-        "text": "Current plan: add real_world_memory trust and personalization cases, then run cargo make real-world-job-smoke and cargo make real-world-memory.",
+        "text": "Current plan: add real_world_memory trust and personalization cases, then run cargo make smoke-real-world-job and cargo make real-world-memory.",
         "source_ref": {
           "schema": "source_ref/v1",
           "resolver": "real_world_job_fixture/v1",
@@ -59,7 +59,7 @@
     "adapter_response": {
       "adapter_id": "elf_real_world_memory_fixture",
       "answer": {
-        "content": "Do not use the expired Redis branch plan. The tombstone says deleted-temp-plan expired and search must suppress it after the worker DELETE outbox completes. The current plan is to add real_world_memory trust and personalization cases, then run cargo make real-world-job-smoke and cargo make real-world-memory.",
+        "content": "Do not use the expired Redis branch plan. The tombstone says deleted-temp-plan expired and search must suppress it after the worker DELETE outbox completes. The current plan is to add real_world_memory trust and personalization cases, then run cargo make smoke-real-world-job and cargo make real-world-memory.",
         "claims": [
           {
             "claim_id": "deleted_fact_suppressed",
@@ -69,7 +69,7 @@
           },
           {
             "claim_id": "current_plan",
-            "text": "Add real_world_memory trust and personalization cases, then run cargo make real-world-job-smoke and cargo make real-world-memory.",
+            "text": "Add real_world_memory trust and personalization cases, then run cargo make smoke-real-world-job and cargo make real-world-memory.",
             "evidence_ids": ["current-benchmark-plan"],
             "confidence": "high"
           }
@@ -125,7 +125,7 @@
       },
       {
         "claim_id": "current_plan",
-        "text": "Add real_world_memory trust and personalization cases, then run cargo make real-world-job-smoke and cargo make real-world-memory."
+        "text": "Add real_world_memory trust and personalization cases, then run cargo make smoke-real-world-job and cargo make real-world-memory."
       }
     ],
     "must_not_include": [
diff --git a/apps/elf-eval/fixtures/real_world_memory/project_decisions/current_validation_gate.json b/apps/elf-eval/fixtures/real_world_memory/project_decisions/current_validation_gate.json
index f3e459b1..224f44f8 100644
--- a/apps/elf-eval/fixtures/real_world_memory/project_decisions/current_validation_gate.json
+++ b/apps/elf-eval/fixtures/real_world_memory/project_decisions/current_validation_gate.json
@@ -24,7 +24,7 @@
       {
         "evidence_id": "validation-gate-current-decodex",
         "kind": "decision",
-        "text": "Current validation gate: before pushing a refreshed PR head, run cargo make fmt, cargo make lint-fix, and cargo make checks.",
+        "text": "Current validation gate: before pushing a refreshed PR head, run cargo make fmt, cargo make lint-fix, and cargo make check.",
         "source_ref": {
           "schema": "source_ref/v1",
           "resolver": "real_world_memory_fixture/v1",
@@ -67,11 +67,11 @@
     "adapter_response": {
       "adapter_id": "fixture_project_decisions",
       "answer": {
-        "content": "The current validation gate is cargo make fmt, cargo make lint-fix, and cargo make checks before pushing a refreshed PR head. The older lint-and-test gate is historical; the current gate adds formatting, automatic lint repair, and full checks to prevent avoidable review churn before Decodex review handoff.",
+        "content": "The current validation gate is cargo make fmt, cargo make lint-fix, and cargo make check before pushing a refreshed PR head. The older lint-and-test gate is historical; the current gate adds formatting, automatic lint repair, and full checks to prevent avoidable review churn before Decodex review handoff.",
         "claims": [
           {
             "claim_id": "current_validation_gate",
-            "text": "The current validation gate is cargo make fmt, cargo make lint-fix, and cargo make checks before pushing a refreshed PR head.",
+            "text": "The current validation gate is cargo make fmt, cargo make lint-fix, and cargo make check before pushing a refreshed PR head.",
             "evidence_ids": [
               "validation-gate-current-decodex",
               "validation-gate-old-lint-test",
@@ -136,7 +136,7 @@
     "must_include": [
       {
         "claim_id": "current_validation_gate",
-        "text": "The current validation gate is cargo make fmt, cargo make lint-fix, and cargo make checks before pushing a refreshed PR head."
+        "text": "The current validation gate is cargo make fmt, cargo make lint-fix, and cargo make check before pushing a refreshed PR head."
       },
       {
         "claim_id": "validation_gate_rationale",
@@ -164,7 +164,7 @@
       "evidence_id": "validation-gate-current-decodex",
       "claim_id": "current_validation_gate",
       "requirement": "cite",
-      "quote": "run cargo make fmt, cargo make lint-fix, and cargo make checks"
+      "quote": "run cargo make fmt, cargo make lint-fix, and cargo make check"
     },
     {
       "evidence_id": "validation-gate-old-lint-test",
diff --git a/apps/elf-eval/fixtures/real_world_memory/retrieval/alternate_phrasing.json b/apps/elf-eval/fixtures/real_world_memory/retrieval/alternate_phrasing.json
index c939fb62..a8b4c6c5 100644
--- a/apps/elf-eval/fixtures/real_world_memory/retrieval/alternate_phrasing.json
+++ b/apps/elf-eval/fixtures/real_world_memory/retrieval/alternate_phrasing.json
@@ -10,7 +10,7 @@
       {
         "evidence_id": "xy840-current-handoff",
         "kind": "issue",
-        "text": "XY-840 trace schema lane uses branch y/elf-xy-840. Before review handoff, run `cargo make checks` after the trace schema update is complete.",
+        "text": "XY-840 trace schema lane uses branch y/elf-xy-840. Before review handoff, run `cargo make check` after the trace schema update is complete.",
         "source_ref": {
           "schema": "source_ref/v1",
           "resolver": "real_world_job_fixture/v1",
@@ -39,7 +39,7 @@
     "adapter_response": {
       "adapter_id": "fixture_retrieval",
       "answer": {
-        "content": "Use branch y/elf-xy-840 for XY-840 and run `cargo make checks` before review handoff.",
+        "content": "Use branch y/elf-xy-840 for XY-840 and run `cargo make check` before review handoff.",
         "claims": [
           {
             "claim_id": "branch",
@@ -49,7 +49,7 @@
           },
           {
             "claim_id": "gate",
-            "text": "Run `cargo make checks` before review handoff.",
+            "text": "Run `cargo make check` before review handoff.",
             "evidence_ids": ["xy840-current-handoff"],
             "confidence": "high"
           }
@@ -97,7 +97,7 @@
       },
       {
         "claim_id": "gate",
-        "text": "Run `cargo make checks` before review handoff."
+        "text": "Run `cargo make check` before review handoff."
       }
     ],
     "must_not_include": [
@@ -124,7 +124,7 @@
       "evidence_id": "xy840-current-handoff",
       "claim_id": "gate",
       "requirement": "use",
-      "quote": "run `cargo make checks`"
+      "quote": "run `cargo make check`"
     }
   ],
   "negative_traps": [
diff --git a/apps/elf-eval/fixtures/real_world_memory/work_resume/work_resume_failed_command_recovery.json b/apps/elf-eval/fixtures/real_world_memory/work_resume/work_resume_failed_command_recovery.json
index b6c3da36..5ddbda61 100644
--- a/apps/elf-eval/fixtures/real_world_memory/work_resume/work_resume_failed_command_recovery.json
+++ b/apps/elf-eval/fixtures/real_world_memory/work_resume/work_resume_failed_command_recovery.json
@@ -10,7 +10,7 @@
       {
         "evidence_id": "xy844-current-command-failure",
         "kind": "trace",
-        "text": "Command log: cargo make real-world-job-smoke failed because apps/elf-eval/fixtures/real_world_memory/work_resume/ did not contain the required suite fixtures. Earlier lint warnings had already been repaired. The exact recovery action is to create the missing fixtures, then rerun cargo make real-world-job-smoke.",
+        "text": "Command log: cargo make smoke-real-world-job failed because apps/elf-eval/fixtures/real_world_memory/work_resume/ did not contain the required suite fixtures. Earlier lint warnings had already been repaired. The exact recovery action is to create the missing fixtures, then rerun cargo make smoke-real-world-job.",
         "source_ref": {
           "schema": "source_ref/v1",
           "resolver": "real_world_job_fixture/v1",
@@ -39,7 +39,7 @@
     "adapter_response": {
       "adapter_id": "fixture_work_resume",
       "answer": {
-        "content": "The current stop is the missing work_resume fixture suite, not the old lint warning. Create the missing fixtures under apps/elf-eval/fixtures/real_world_memory/work_resume/, then rerun `cargo make real-world-job-smoke`.",
+        "content": "The current stop is the missing work_resume fixture suite, not the old lint warning. Create the missing fixtures under apps/elf-eval/fixtures/real_world_memory/work_resume/, then rerun `cargo make smoke-real-world-job`.",
         "claims": [
           {
             "claim_id": "current_stop",
@@ -49,7 +49,7 @@
           },
           {
             "claim_id": "recovery_action",
-            "text": "Create the missing fixtures, then rerun `cargo make real-world-job-smoke`.",
+            "text": "Create the missing fixtures, then rerun `cargo make smoke-real-world-job`.",
             "evidence_ids": ["xy844-current-command-failure"],
             "confidence": "high"
           },
@@ -107,7 +107,7 @@
       },
       {
         "claim_id": "recovery_action",
-        "text": "Create the missing fixtures, then rerun `cargo make real-world-job-smoke`."
+        "text": "Create the missing fixtures, then rerun `cargo make smoke-real-world-job`."
       },
       {
         "claim_id": "stale_blocker",
@@ -139,7 +139,7 @@
       "evidence_id": "xy844-current-command-failure",
       "claim_id": "recovery_action",
       "requirement": "use",
-      "quote": "The exact recovery action is to create the missing fixtures, then rerun cargo make real-world-job-smoke."
+      "quote": "The exact recovery action is to create the missing fixtures, then rerun cargo make smoke-real-world-job."
     }
   ],
   "negative_traps": [
diff --git a/apps/elf-eval/fixtures/real_world_memory/work_resume/work_resume_next_action_extraction.json b/apps/elf-eval/fixtures/real_world_memory/work_resume/work_resume_next_action_extraction.json
index 4cbe80c3..1f6ae49e 100644
--- a/apps/elf-eval/fixtures/real_world_memory/work_resume/work_resume_next_action_extraction.json
+++ b/apps/elf-eval/fixtures/real_world_memory/work_resume/work_resume_next_action_extraction.json
@@ -10,7 +10,7 @@
       {
         "evidence_id": "xy844-next-action-handoff",
         "kind": "runbook",
-        "text": "XY-844 handoff note: after adding the six work-resume continuity and capture/integration fixtures, run `cargo make real-world-job-smoke`, then run `cargo make fmt`, `cargo make lint-fix`, and `cargo make checks` before pushing branch y/elf-xy-844.",
+        "text": "XY-844 handoff note: after adding the six work-resume continuity and capture/integration fixtures, run `cargo make smoke-real-world-job`, then run `cargo make fmt`, `cargo make lint-fix`, and `cargo make check` before pushing branch y/elf-xy-844.",
         "source_ref": {
           "schema": "source_ref/v1",
           "resolver": "real_world_job_fixture/v1",
@@ -39,7 +39,7 @@
     "adapter_response": {
       "adapter_id": "fixture_work_resume",
       "answer": {
-        "content": "The exact next action is to add the six work-resume continuity and capture/integration fixtures, run `cargo make real-world-job-smoke`, then run the pre-push gate: `cargo make fmt`, `cargo make lint-fix`, and `cargo make checks` before pushing y/elf-xy-844.",
+        "content": "The exact next action is to add the six work-resume continuity and capture/integration fixtures, run `cargo make smoke-real-world-job`, then run the pre-push gate: `cargo make fmt`, `cargo make lint-fix`, and `cargo make check` before pushing y/elf-xy-844.",
         "claims": [
           {
             "claim_id": "exact_next_action",
@@ -49,7 +49,7 @@
           },
           {
             "claim_id": "validation_sequence",
-            "text": "Run `cargo make real-world-job-smoke`, then `cargo make fmt`, `cargo make lint-fix`, and `cargo make checks` before pushing.",
+            "text": "Run `cargo make smoke-real-world-job`, then `cargo make fmt`, `cargo make lint-fix`, and `cargo make check` before pushing.",
             "evidence_ids": ["xy844-next-action-handoff"],
             "confidence": "high"
           }
@@ -101,7 +101,7 @@
       },
       {
         "claim_id": "validation_sequence",
-        "text": "Run `cargo make real-world-job-smoke`, then `cargo make fmt`, `cargo make lint-fix`, and `cargo make checks` before pushing."
+        "text": "Run `cargo make smoke-real-world-job`, then `cargo make fmt`, `cargo make lint-fix`, and `cargo make check` before pushing."
       }
     ],
     "must_not_include": [
@@ -127,7 +127,7 @@
       "evidence_id": "xy844-next-action-handoff",
       "claim_id": "validation_sequence",
       "requirement": "use",
-      "quote": "run `cargo make real-world-job-smoke`, then run `cargo make fmt`, `cargo make lint-fix`, and `cargo make checks`"
+      "quote": "run `cargo make smoke-real-world-job`, then run `cargo make fmt`, `cargo make lint-fix`, and `cargo make check`"
     }
   ],
   "negative_traps": [
diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
index ff9d3c6f..a9a6a8f7 100644
--- a/apps/elf-eval/tests/real_world_job_benchmark.rs
+++ b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -944,7 +944,7 @@ fn assert_graph_rag_research_gate_records(ragflow: &Value, lightrag: &Value, gra
 	);
 	assert_eq!(
 		ragflow.pointer("/setup/command").and_then(Value::as_str),
-		Some("cargo make ragflow-docker-smoke")
+		Some("cargo make smoke-ragflow-docker")
 	);
 	assert_eq!(
 		ragflow.pointer("/result/artifact").and_then(Value::as_str),
@@ -958,11 +958,11 @@ fn assert_graph_rag_research_gate_records(ragflow: &Value, lightrag: &Value, gra
 	assert_eq!(lightrag.pointer("/overall_status").and_then(Value::as_str), Some("blocked"));
 	assert_eq!(
 		lightrag.pointer("/setup/command").and_then(Value::as_str),
-		Some("cargo make lightrag-docker-context-smoke")
+		Some("cargo make smoke-lightrag-docker-context")
 	);
 	assert_eq!(
 		lightrag.pointer("/run/command").and_then(Value::as_str),
-		Some("ELF_LIGHTRAG_CONTEXT_START=1 cargo make lightrag-docker-context-smoke")
+		Some("ELF_LIGHTRAG_CONTEXT_START=1 cargo make smoke-lightrag-docker-context")
 	);
 	assert_eq!(
 		lightrag.pointer("/capabilities/3/status").and_then(Value::as_str),
@@ -971,7 +971,7 @@ fn assert_graph_rag_research_gate_records(ragflow: &Value, lightrag: &Value, gra
 	assert_eq!(graphrag.pointer("/evidence_class").and_then(Value::as_str), Some("research_gate"));
 	assert_eq!(
 		graphrag.pointer("/setup/command").and_then(Value::as_str),
-		Some("cargo make graphrag-docker-smoke")
+		Some("cargo make smoke-graphrag-docker")
 	);
 	assert_eq!(graphrag.pointer("/suites/1/status").and_then(Value::as_str), Some("not_encoded"));
 }
@@ -1389,12 +1389,12 @@ fn assert_graphiti_zep_adapter(adapter: &Value) {
 	assert_eq!(adapter.pointer("/overall_status").and_then(Value::as_str), Some("blocked"));
 	assert_eq!(
 		adapter.pointer("/setup/command").and_then(Value::as_str),
-		Some("cargo make graphiti-zep-docker-temporal-smoke")
+		Some("cargo make smoke-graphiti-zep-docker-temporal")
 	);
 	assert_eq!(
 		adapter.pointer("/run/command").and_then(Value::as_str),
 		Some(
-			"ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make graphiti-zep-docker-temporal-smoke"
+			"ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make smoke-graphiti-zep-docker-temporal"
 		)
 	);
 	assert_eq!(
@@ -1418,7 +1418,7 @@ fn assert_graphify_adapter(adapter: &Value) -> Result<()> {
 	assert_eq!(adapter.pointer("/result/status").and_then(Value::as_str), Some("wrong_result"));
 	assert_eq!(
 		adapter.pointer("/setup/command").and_then(Value::as_str),
-		Some("cargo make graphify-docker-graph-report-smoke")
+		Some("cargo make smoke-graphify-docker-graph-report")
 	);
 	assert_eq!(
 		adapter.pointer("/suites/0/suite_id").and_then(Value::as_str),
@@ -1526,13 +1526,13 @@ fn graphify_generated_manifest_keeps_retrieval_unscored() -> Result<()> {
 			"setup": {
 				"status": "pass",
 				"evidence": "setup evidence",
-				"command": "cargo make graphify-docker-graph-report-smoke",
+				"command": "cargo make smoke-graphify-docker-graph-report",
 				"artifact": "tmp/real-world-memory/graphify-smoke/graphify-smoke.json"
 			},
 			"run": {
 				"status": "pass",
 				"evidence": "run evidence",
-				"command": "cargo make graphify-docker-graph-report-smoke",
+				"command": "cargo make smoke-graphify-docker-graph-report",
 				"artifact": "tmp/real-world-memory/graphify-smoke/summary.json"
 			},
 			"result": {
@@ -1559,7 +1559,7 @@ fn graphify_generated_manifest_keeps_retrieval_unscored() -> Result<()> {
 			],
 			"evidence": [],
 			"execution_metadata": {
-				"setup_path": "cargo make graphify-docker-graph-report-smoke",
+				"setup_path": "cargo make smoke-graphify-docker-graph-report",
 				"runtime_boundary": "Docker-only generated graph/report smoke.",
 				"resource_expectation": "Tiny generated corpus only.",
 				"retry_guidance": [],
@@ -1673,9 +1673,16 @@ fn graph_rag_representative_fixtures_report_typed_non_pass_states() -> Result<()
 
 #[test]
 fn live_adapter_aggregate_forwards_graph_rag_smoke_controls() -> Result<()> {
-	let makefile = fs::read_to_string(
-		Path::new(env!("CARGO_MANIFEST_DIR")).join("..").join("..").join("Makefile.toml"),
-	)?;
+	let workspace = workspace_root()?;
+	let makefile = fs::read_to_string(workspace.join("Makefile.toml"))?;
+	let docker_script = fs::read_to_string(workspace.join("scripts/real-world-docker.sh"))?;
+
+	assert!(
+		makefile.contains("[tasks.real-world-memory-live-adapters]")
+			&& makefile.contains("scripts/real-world-docker.sh")
+			&& makefile.contains("memory-live-adapters"),
+		"Makefile should expose the live-adapter command and delegate Docker details to a script",
+	);
 
 	for env_name in [
 		"ELF_REAL_WORLD_LIVE_ENABLE_RAGFLOW",
@@ -1693,17 +1700,17 @@ fn live_adapter_aggregate_forwards_graph_rag_smoke_controls() -> Result<()> {
 		"ELF_GRAPHIFY_SMOKE_RUN",
 	] {
 		assert!(
-			makefile.contains(&format!("-e {env_name}")),
+			docker_script.contains(&format!("-e {env_name}")),
 			"real-world-memory-live-adapters must forward {env_name}",
 		);
 	}
 
 	assert!(
-		makefile.contains("--profile lightrag up -d lightrag"),
+		docker_script.contains("--profile lightrag up -d lightrag"),
 		"aggregate task should start LightRAG profile when ELF_LIGHTRAG_CONTEXT_START=1",
 	);
 	assert!(
-		makefile.contains("--profile graphiti-zep up -d graphiti-falkordb"),
+		docker_script.contains("--profile graphiti-zep up -d graphiti-falkordb"),
 		"aggregate task should start Graphiti/Zep profile when ELF_GRAPHITI_ZEP_SMOKE_START=1",
 	);
 
@@ -1714,6 +1721,7 @@ fn live_adapter_aggregate_forwards_graph_rag_smoke_controls() -> Result<()> {
 fn openmemory_ui_export_probe_has_dedicated_docker_task() -> Result<()> {
 	let workspace_root = workspace_root()?;
 	let makefile = fs::read_to_string(workspace_root.join("Makefile.toml"))?;
+	let docker_script = fs::read_to_string(workspace_root.join("scripts/baseline-docker.sh"))?;
 	let compose = fs::read_to_string(workspace_root.join("docker-compose.baseline.yml"))?;
 	let script = fs::read_to_string(workspace_root.join("scripts/live-baseline-benchmark.sh"))?;
 	let report = serde_json::from_str::<Value>(&fs::read_to_string(
@@ -1721,7 +1729,9 @@ fn openmemory_ui_export_probe_has_dedicated_docker_task() -> Result<()> {
 	)?)?;
 
 	assert!(makefile.contains("[tasks.openmemory-ui-export-readback]"));
-	assert!(makefile.contains("export ELF_BASELINE_PROJECTS=mem0"));
+	assert!(makefile.contains("scripts/baseline-docker.sh"));
+	assert!(makefile.contains("openmemory-ui-export-readback"));
+	assert!(docker_script.contains("export ELF_BASELINE_PROJECTS=mem0"));
 	assert!(compose.contains("ELF_MEM0_OPENMEMORY_EXPORT_USER_ID"));
 	assert!(compose.contains("ELF_MEM0_OPENMEMORY_EXPORT_CONTAINER"));
 	assert!(script.contains("probe_mem0_openmemory_ui_export"));
@@ -1756,6 +1766,7 @@ fn openmemory_ui_export_probe_has_dedicated_docker_task() -> Result<()> {
 fn operator_debug_live_adapter_task_is_docker_scoped() -> Result<()> {
 	let workspace = workspace_root()?;
 	let makefile = fs::read_to_string(workspace.join("Makefile.toml"))?;
+	let docker_script = fs::read_to_string(workspace.join("scripts/real-world-docker.sh"))?;
 	let script = fs::read_to_string(
 		workspace.join("scripts").join("real-world-operator-debug-live-adapters.sh"),
 	)?;
@@ -1765,8 +1776,12 @@ fn operator_debug_live_adapter_task_is_docker_scoped() -> Result<()> {
 		fs::read_to_string(workspace.join("apps/elf-eval/src/bin/real_world_job_benchmark.rs"))?;
 
 	assert!(makefile.contains("[tasks.real-world-job-operator-ux-live-adapters]"));
-	assert!(makefile.contains("docker compose -f docker-compose.baseline.yml run --build --rm"));
-	assert!(makefile.contains("scripts/real-world-operator-debug-live-adapters.sh"));
+	assert!(makefile.contains("scripts/real-world-docker.sh"));
+	assert!(makefile.contains("job-operator-ux-live-adapters"));
+	assert!(
+		docker_script.contains("docker compose -f docker-compose.baseline.yml run --build --rm")
+	);
+	assert!(docker_script.contains("scripts/real-world-operator-debug-live-adapters.sh"));
 	assert!(script.contains("apps/elf-eval/fixtures/real_world_job/operator_debugging_ux"));
 	assert!(script.contains("elf_operator_debug_live"));
 	assert!(script.contains("qmd_operator_debug_live"));
@@ -2169,7 +2184,11 @@ fn live_consolidation_report_preserves_reviewable_output_boundaries() -> Result<
 	assert!(benchmark_guide.contains("Current live consolidation increment"));
 	assert!(benchmark_guide.contains("tmp/real-world-memory/live-consolidation/summary.json"));
 	assert!(makefile.contains("[tasks.real-world-memory-live-consolidation]"));
-	assert!(makefile.contains("scripts/real-world-consolidation-live-adapter.sh"));
+	assert!(makefile.contains("scripts/real-world-docker.sh"));
+
+	let docker_script = fs::read_to_string(workspace.join("scripts/real-world-docker.sh"))?;
+
+	assert!(docker_script.contains("scripts/real-world-consolidation-live-adapter.sh"));
 	assert!(live_script.contains("elf.real_world_consolidation_live_adapter_sweep/v1"));
 	assert!(live_script.contains("real_world_live_adapter -- elf"));
 	assert!(!live_script.contains("real_world_live_adapter -- qmd"));
diff --git a/docs/guide/agent-setup.md b/docs/guide/agent-setup.md
index e4e81473..57257017 100644
--- a/docs/guide/agent-setup.md
+++ b/docs/guide/agent-setup.md
@@ -155,7 +155,7 @@ Example:
 ELF_PG_DSN="postgres://elf_dev:elf_dev_password@127.0.0.1:51888/postgres" \
 ELF_QDRANT_GRPC_URL="http://127.0.0.1:51890" \
 ELF_QDRANT_HTTP_URL="http://127.0.0.1:51889" \
-cargo make e2e
+cargo make test-e2e
 ```
 
 ## Troubleshooting
diff --git a/docs/guide/benchmarking/2026-06-09-live-baseline-report.md b/docs/guide/benchmarking/2026-06-09-live-baseline-report.md
index 78df93bb..9551adeb 100644
--- a/docs/guide/benchmarking/2026-06-09-live-baseline-report.md
+++ b/docs/guide/benchmarking/2026-06-09-live-baseline-report.md
@@ -230,7 +230,7 @@ cargo make baseline-live-report
 Clean Docker-owned state:
 
 ```sh
-cargo make baseline-live-docker-clean
+cargo make clean-baseline-live-docker
 ```
 
 The only host report directory is `tmp/live-baseline/`. Raw generated JSON stays there
diff --git a/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md b/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
index 4f960804..12aeeb01 100644
--- a/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
+++ b/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
@@ -99,8 +99,8 @@ results, or lifecycle failures into one aggregate leaderboard.
 | `ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker` | `2026-06-11-first-generation-oss-adapter-promotion-report.md` | mem0/OpenMemory and memsearch pass basic local baseline smokes; agentmemory remains lifecycle_fail and claude-mem remains wrong_result. |
 | `cargo make real-world-first-generation-oss` | `2026-06-11-first-generation-oss-continuity-source-store-report.md` | First-generation OSS fixture slice reports 6 jobs: 4 pass, 2 blocked, full evidence/source-ref/quote coverage, and manifest scenario outcomes across win, tie, loss, not_tested, blocked, and non_goal without promoting smoke evidence into live suite passes. |
 | `cargo make openmemory-ui-export-readback` | `2026-06-11-mem0-openmemory-history-ui-export-report.md` | mem0 local OSS passes preference correction history, entity-scoped personalization, local `get_all` export-style readback, and deletion audit history; OpenMemory export-helper setup emits a separate blocked artifact with `DOCKER_UNAVAILABLE_IN_BASELINE_RUNNER`, and hosted Platform export remains non-goal. |
-| `ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make graphiti-zep-docker-temporal-smoke` | `2026-06-11-temporal-history-competitor-gap-report.md` | Graphiti/Zep temporal smoke remains blocked by `provider_api_key_missing`. |
-| `cargo make graphify-docker-graph-report-smoke` | `2026-06-11-graph-rag-scored-smoke-adapter-report.md` | graphify reaches tiny Docker graph/report scoring but remains wrong_result. |
+| `ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make smoke-graphiti-zep-docker-temporal` | `2026-06-11-temporal-history-competitor-gap-report.md` | Graphiti/Zep temporal smoke remains blocked by `provider_api_key_missing`. |
+| `cargo make smoke-graphify-docker-graph-report` | `2026-06-11-graph-rag-scored-smoke-adapter-report.md` | graphify reaches tiny Docker graph/report scoring but remains wrong_result. |
 | `cargo make real-world-memory-graph-rag` | `tmp/real-world-memory/graph-rag/report.json` | Representative graph/RAG fixtures produce typed non-pass reports: RAGFlow, GraphRAG, and Graphiti/Zep blocked; LightRAG incomplete with comparison blocked; graphify wrong_result; llm-wiki not_tested; gbrain blocked; private/hosted profiles non_goal. |
 | `cargo make baseline-production-synthetic`, `cargo make baseline-backfill-docker`, backup/restore, Qdrant rebuild proof | `2026-06-10-production-adoption-refresh.md` | ELF has provider synthetic, stress, backfill, restore, and rebuild evidence; private-corpus proof is blocked by missing operator-owned manifest. |
 | `ELF_BASELINE_PROJECTS=ELF,qmd ELF_BASELINE_PROFILE=stress cargo make baseline-live-docker` plus ELF trace-bundle and qmd CLI replay commands | `2026-06-11-elf-qmd-trace-replay-diagnostics-report.md` | Retrieval correctness remains tied, but qmd wins current immediate top-10/replay artifact ergonomics; ELF trace/admin surfaces are useful but not yet hydrated into the default stress artifact. |
diff --git a/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md b/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md
index c48bdcf2..6402b188 100644
--- a/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md
+++ b/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md
@@ -90,16 +90,16 @@ lifecycle-fail -> `lifecycle_fail`, and not-encoded -> `not_encoded`.
 | memsearch | Markdown-first canonical store with rebuildable local index and practical hybrid retrieval. | `live_baseline_only`; XY-925 `fixture_backed`. | `pass`: fresh scoped run `ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker`, `tmp/live-baseline/live-baseline-report.json`, with memsearch `4/4` local checks passing. XY-925 adds fixture-backed source-store and retrieval-debug prompts through `cargo make real-world-first-generation-oss`, `tmp/real-world-memory/first-generation-oss/report.json`. | `not_encoded`: no live memsearch runtime adapter executes real-world prompt scoring; memory-evolution prompt adapters remain not encoded; TTL/expiry is unsupported by the current CLI path. | Promote the fixture-backed source-store and retrieval-debug prompts into a live memsearch real-world adapter before any suite-level win/loss claim; keep TTL/expiry as unsupported unless a comparable path exists. | Canonical markdown store, local reindex clarity, and user-inspectable source files. |
 | OpenViking | Filesystem-like context trajectory, hierarchical retrieval, and staged context loading. | `live_baseline_only`; supporting `fixture_backed` and `research_gate`. | `wrong_result`: `ELF_BASELINE_PROJECTS=OpenViking cargo make baseline-live-docker`, `tmp/live-baseline/live-baseline-report.json`; `blocked`: checked-in `context_trajectory` fixtures cover staged retrieval, hierarchy selection, and recursive/context expansion gates. | `blocked`: hierarchical context trajectory is encoded but blocked until same-corpus evidence ids match and staged artifacts are materialized. | Make evidence-bearing same-corpus output pass, then score staged trajectory and hierarchy expansion. | `viking://`-style context model, trajectory readback, and staged retrieval planning. |
 | claude-mem | Progressive disclosure, automatic capture loop, repository-local lifecycle, and local viewer workflow. | `live_baseline_only`; XY-925 `fixture_backed`. | `wrong_result`: `ELF_BASELINE_PROJECTS=claude-mem cargo make baseline-live-docker`, `tmp/live-baseline/live-baseline-report.json`. XY-925 adds fixture-backed progressive-disclosure and retrieval-repair prompts through `cargo make real-world-first-generation-oss`, `tmp/real-world-memory/first-generation-oss/report.json`. | `blocked`: hook capture and viewer/operator workflows still lack a Docker-contained runner; retrieval remains `wrong_result`, and the repair prompt lists rerun/inspection targets `tmp/live-baseline/claude-mem.log` and `tmp/live-baseline/claude-mem-checks.json`. | Promote durable repository-backed work_resume, operator_debugging_ux, capture/write-policy, and progressive-disclosure prompts into a live claude-mem adapter before any broader UX claim. | Progressive disclosure, automatic capture review loops, and local viewer/operator comfort. |
-| RAGFlow | Full RAG application workflow with document, chunk, and reference evidence handles. | `research_gate`. | `blocked`: `ELF_RAGFLOW_SMOKE_START=1 ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE=1 cargo make ragflow-docker-smoke`, `tmp/real-world-memory/ragflow-smoke/ragflow-smoke.json`. | `blocked`: Docker resource envelope and adapter output mapping still need proof. | XY-885 tiny Docker evidence-smoke adapter mapping `reference.chunks` to scored evidence. | Document/chunk references, resource-envelope reporting, and RAG app evidence handles. |
-| LightRAG | Lightweight graph/RAG context export with source file-path citation shape. | `research_gate`. | `blocked`: `ELF_LIGHTRAG_CONTEXT_START=1 cargo make lightrag-docker-context-smoke`, `tmp/real-world-memory/lightrag-context/summary.json`. | `blocked`: Docker service setup and context export are not proven. | XY-886 Docker context-export adapter with explicit provider config and source citation mapping. | Context-only query modes, graph-aware retrieval layout, and file-path citation readback. |
-| GraphRAG | GraphRAG indexing, graph summaries, and document/text-unit evidence tables. | `research_gate`. | `blocked`: `ELF_GRAPHRAG_SMOKE_RUN=1 cargo make graphrag-docker-smoke`, `tmp/real-world-memory/graphrag-smoke/summary.json`. | `blocked`: indexing resource envelope and source citation mapping are not proven. | XY-887 cost-bounded Docker adapter over a tiny corpus and scored output tables. | Graph summary artifacts, local/global search separation, and source table evidence mapping. |
-| Graphiti/Zep | Temporal graph memory with current, historical, and future fact validity windows. | `research_gate`. | `blocked`: `ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make graphiti-zep-docker-temporal-smoke`, `tmp/real-world-memory/graphiti-zep-smoke/summary.json`. | `blocked`: Docker graph-store and temporal adapter are not proven. | XY-888 Docker-local temporal graph adapter scoring current/historical fact validity. | Temporal fact windows, invalidation/supersession semantics, and graph fact provenance. |
+| RAGFlow | Full RAG application workflow with document, chunk, and reference evidence handles. | `research_gate`. | `blocked`: `ELF_RAGFLOW_SMOKE_START=1 ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE=1 cargo make smoke-ragflow-docker`, `tmp/real-world-memory/ragflow-smoke/ragflow-smoke.json`. | `blocked`: Docker resource envelope and adapter output mapping still need proof. | XY-885 tiny Docker evidence-smoke adapter mapping `reference.chunks` to scored evidence. | Document/chunk references, resource-envelope reporting, and RAG app evidence handles. |
+| LightRAG | Lightweight graph/RAG context export with source file-path citation shape. | `research_gate`. | `blocked`: `ELF_LIGHTRAG_CONTEXT_START=1 cargo make smoke-lightrag-docker-context`, `tmp/real-world-memory/lightrag-context/summary.json`. | `blocked`: Docker service setup and context export are not proven. | XY-886 Docker context-export adapter with explicit provider config and source citation mapping. | Context-only query modes, graph-aware retrieval layout, and file-path citation readback. |
+| GraphRAG | GraphRAG indexing, graph summaries, and document/text-unit evidence tables. | `research_gate`. | `blocked`: `ELF_GRAPHRAG_SMOKE_RUN=1 cargo make smoke-graphrag-docker`, `tmp/real-world-memory/graphrag-smoke/summary.json`. | `blocked`: indexing resource envelope and source citation mapping are not proven. | XY-887 cost-bounded Docker adapter over a tiny corpus and scored output tables. | Graph summary artifacts, local/global search separation, and source table evidence mapping. |
+| Graphiti/Zep | Temporal graph memory with current, historical, and future fact validity windows. | `research_gate`. | `blocked`: `ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make smoke-graphiti-zep-docker-temporal`, `tmp/real-world-memory/graphiti-zep-smoke/summary.json`. | `blocked`: Docker graph-store and temporal adapter are not proven. | XY-888 Docker-local temporal graph adapter scoring current/historical fact validity. | Temporal fact windows, invalidation/supersession semantics, and graph fact provenance. |
 | Letta | Core memory blocks versus archival memory with explicit operating-context surfaces. | `research_gate`. | `blocked`: the selected comparison contract is a Docker-only benchmark-created agent export that returns core block JSON, archival search/readback JSON, and source ids; no materialized export exists yet. | `blocked`: no Letta materializer currently creates the benchmark agent, imports the ELF `core_archival_memory` fixture corpus, or exports comparable core and archival evidence. | Implement and run the contained export/readback adapter before any Letta win, tie, or loss claim; keep personalization and project-decision scenarios blocked or not tested until that evidence exists. | Core memory block ergonomics, archival separation, and shared operating context readback. |
 | LangGraph | Checkpoint/replay regression workflow and durable state replay for agent runs. | `research_gate`. | `not_encoded`: `docs/research/2026-06-10-xy-882-rag-graph-adapter-feasibility.json`. | `unsupported`: not a standalone memory backend adapter. | Non-goal for direct win/loss until a standalone memory output contract exists; use replay jobs as benchmark infrastructure reference. | Checkpoint replay, deterministic regression, and state-diff evaluation patterns. |
 | nanograph | Typed graph schema and query ergonomics for graph-lite developer experience. | `research_gate`. | `not_encoded`: `docs/research/2026-06-10-xy-882-rag-graph-adapter-feasibility.json`. | `unsupported`: not a memory backend comparison target. | Non-goal for direct win/loss unless a contained memory-backed target emerges; measure ELF graph-lite DX instead. | Typed relation schema, query ergonomics, and small graph developer experience. |
 | llm-wiki | LLM-maintained wiki or knowledge-page workflow with query-save and lint loops. | `research_gate`. | `not_encoded`: `docs/research/2026-06-10-xy-882-rag-graph-adapter-feasibility.json`. | `unsupported`: no live service runtime for adapter proof. | Select contained plugin or instruction harness, then score knowledge pages for citations, unsupported claims, rebuild, and stale-source lint. | Maintained wiki workflows, page lint, query-save loops, and topic-scoped navigation. |
 | gbrain | Operational knowledge brain with compiled_truth pages, timelines, enrichment, and maintenance loops. | `research_gate`. | `not_encoded`: `docs/research/2026-06-10-xy-882-rag-graph-adapter-feasibility.json`. | `blocked`: Docker-local brain repo and database path are missing. | Prove Docker-local repository/database setup, then encode compiled_truth/timeline and operator-continuity jobs. | Compiled truth pages, timeline maintenance, and human-operable knowledge-brain navigation. |
-| graphify | Graph-compressed navigation with `graph.json` and `GRAPH_REPORT` evidence outputs. | Scored tiny `live_real_world` smoke; not broad graph-quality proof. | `wrong_result`: `cargo make graphify-docker-graph-report-smoke`, `tmp/real-world-memory/graphify-smoke/graphify-report.json`. | `not_encoded`: broad graph navigation, multimodal, private-corpus, and large-corpus quality remain outside the tiny smoke. | Expand beyond the generated smoke only after graph/report output maps to scored evidence on representative graph/RAG jobs. | Graph compression, source-location graph reports, and navigation hints for large code or document spaces. |
+| graphify | Graph-compressed navigation with `graph.json` and `GRAPH_REPORT` evidence outputs. | Scored tiny `live_real_world` smoke; not broad graph-quality proof. | `wrong_result`: `cargo make smoke-graphify-docker-graph-report`, `tmp/real-world-memory/graphify-smoke/graphify-report.json`. | `not_encoded`: broad graph navigation, multimodal, private-corpus, and large-corpus quality remain outside the tiny smoke. | Expand beyond the generated smoke only after graph/report output maps to scored evidence on representative graph/RAG jobs. | Graph compression, source-location graph reports, and navigation hints for large code or document spaces. |
 
 ## Scenario Matrix
 
diff --git a/docs/guide/benchmarking/2026-06-11-graph-rag-scored-smoke-adapter-report.md b/docs/guide/benchmarking/2026-06-11-graph-rag-scored-smoke-adapter-report.md
index 542e0839..290092d3 100644
--- a/docs/guide/benchmarking/2026-06-11-graph-rag-scored-smoke-adapter-report.md
+++ b/docs/guide/benchmarking/2026-06-11-graph-rag-scored-smoke-adapter-report.md
@@ -39,11 +39,11 @@ contract, not the quality claim.
 
 | Project | Scored scenario | Command | Current scored status | Claim boundary |
 | --- | --- | --- | --- | --- |
-| RAGFlow | `retrieval`: reference chunks mapped to generated evidence ids | `cargo make ragflow-docker-smoke` | `blocked` or `incomplete` by execution boundary | Smoke-only. No RAGFlow quality claim until returned reference chunks map to `ragflow-smoke-anchor`. |
-| LightRAG | `retrieval`: context/source export mapped to fixture evidence ids | `cargo make lightrag-docker-context-smoke` | `incomplete` when the API service is not started | Smoke-only. No graph-RAG quality claim until context or references map to generated evidence ids. |
-| GraphRAG | `knowledge_compilation`: output tables mapped to generated evidence ids | `cargo make graphrag-docker-smoke` | `blocked` | Smoke-only. No graph-navigation or synthesis claim until output tables map to generated evidence ids. |
-| Graphiti/Zep | `memory_evolution`: current and historical validity facts | `cargo make graphiti-zep-docker-temporal-smoke` | `blocked` before live opt-in; `provider_api_key_missing` when live path is enabled without explicit credentials | Provider-bound. No ELF-over-Graphiti/Zep claim until temporal output maps to scored evidence ids. |
-| graphify | `knowledge_compilation`: `graph.json`, `GRAPH_REPORT.md`, and query output mapping | `cargo make graphify-docker-graph-report-smoke` | `wrong_result` after setup/run pass | Scored tiny smoke. The graph/report output maps to evidence ids, but the job remains non-pass; no broad graph-navigation quality claim follows. |
+| RAGFlow | `retrieval`: reference chunks mapped to generated evidence ids | `cargo make smoke-ragflow-docker` | `blocked` or `incomplete` by execution boundary | Smoke-only. No RAGFlow quality claim until returned reference chunks map to `ragflow-smoke-anchor`. |
+| LightRAG | `retrieval`: context/source export mapped to fixture evidence ids | `cargo make smoke-lightrag-docker-context` | `incomplete` when the API service is not started | Smoke-only. No graph-RAG quality claim until context or references map to generated evidence ids. |
+| GraphRAG | `knowledge_compilation`: output tables mapped to generated evidence ids | `cargo make smoke-graphrag-docker` | `blocked` | Smoke-only. No graph-navigation or synthesis claim until output tables map to generated evidence ids. |
+| Graphiti/Zep | `memory_evolution`: current and historical validity facts | `cargo make smoke-graphiti-zep-docker-temporal` | `blocked` before live opt-in; `provider_api_key_missing` when live path is enabled without explicit credentials | Provider-bound. No ELF-over-Graphiti/Zep claim until temporal output maps to scored evidence ids. |
+| graphify | `knowledge_compilation`: `graph.json`, `GRAPH_REPORT.md`, and query output mapping | `cargo make smoke-graphify-docker-graph-report` | `wrong_result` after setup/run pass | Scored tiny smoke. The graph/report output maps to evidence ids, but the job remains non-pass; no broad graph-navigation quality claim follows. |
 
 ## Artifact Contract
 
diff --git a/docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md b/docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md
index a9bee44c..40fca7fa 100644
--- a/docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md
+++ b/docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md
@@ -53,7 +53,7 @@ clear answer and trace.
 
 | Command | Result | Runtime | Main artifact |
 | --- | --- | ---: | --- |
-| `ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make graphiti-zep-docker-temporal-smoke` | typed blocked | 3.5 seconds | `tmp/real-world-memory/graphiti-zep-smoke/summary.json` |
+| `ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make smoke-graphiti-zep-docker-temporal` | typed blocked | 3.5 seconds | `tmp/real-world-memory/graphiti-zep-smoke/summary.json` |
 | `ELF_BASELINE_PROJECTS=ELF,mem0 cargo make baseline-live-docker` | pass | 50.14 seconds | `tmp/live-baseline/live-baseline-report.json` |
 | `cargo make real-world-memory-evolution` | pass | 59.65 seconds | `tmp/real-world-memory/evolution-report.json` |
 | `cargo make real-world-memory-live-adapters` | pass | 166.61 seconds | `tmp/real-world-memory/live-adapters/` |
diff --git a/docs/guide/benchmarking/2026-06-16-scheduled-memory-task-scoring-report.md b/docs/guide/benchmarking/2026-06-16-scheduled-memory-task-scoring-report.md
index 7907c225..f0d5dedd 100644
--- a/docs/guide/benchmarking/2026-06-16-scheduled-memory-task-scoring-report.md
+++ b/docs/guide/benchmarking/2026-06-16-scheduled-memory-task-scoring-report.md
@@ -79,16 +79,16 @@ This section is manifest-backed. It records external adapter coverage and blocke
 | claude-mem | `claude_mem_live_baseline` | `live_baseline_only` | `wrong_result` | `pass` | `wrong_result` | `wrong_result` | `true` | `work_resume`: `not_encoded`<br>`operator_debugging_ux`: `blocked`<br>`capture_integration`: `blocked` | setup: `ELF_BASELINE_PROJECTS=claude-mem cargo make baseline-live-docker`<br>result: `tmp/live-baseline/live-baseline-report.json` |
 | qmd | `qmd_deep_profile_gate` | `research_gate` | `not_encoded` | `pass` | `not_encoded` | `not_encoded` | `true` | `retrieval`: `not_encoded`<br>`operator_debugging_ux`: `not_encoded` | setup: `ELF_BASELINE_PROJECTS=qmd ELF_BASELINE_PROFILE=stress cargo make baseline-live-docker`<br>result: `docs/research/2026-06-11-qmd-openviking-strength-profile-report.json` |
 | OpenViking | `openviking_deep_profile_gate` | `research_gate` | `blocked` | `pass` | `blocked` | `blocked` | `true` | `retrieval`: `wrong_result`<br>`context_trajectory`: `blocked`<br>`operator_debugging_ux`: `not_encoded` | setup: `ELF_BASELINE_PROJECTS=OpenViking cargo make baseline-live-docker`<br>result: `docs/research/2026-06-11-qmd-openviking-strength-profile-report.json` |
-| RAGFlow | `ragflow_research_gate` | `research_gate` | `blocked` | `blocked` | `blocked` | `blocked` | `true` | `retrieval`: `blocked`<br>`knowledge_compilation`: `not_encoded`<br>`production_ops`: `blocked` | setup: `cargo make ragflow-docker-smoke`<br>result: `tmp/real-world-memory/ragflow-smoke/ragflow-report.json` |
-| LightRAG | `lightrag_research_gate` | `research_gate` | `blocked` | `blocked` | `blocked` | `blocked` | `true` | `retrieval`: `blocked`<br>`memory_evolution`: `not_encoded`<br>`operator_debugging_ux`: `not_encoded` | setup: `cargo make lightrag-docker-context-smoke`<br>result: `tmp/real-world-memory/lightrag-context/lightrag-report.json` |
-| GraphRAG | `graphrag_research_gate` | `research_gate` | `blocked` | `blocked` | `blocked` | `blocked` | `true` | `knowledge_compilation`: `blocked`<br>`retrieval`: `not_encoded`<br>`production_ops`: `not_encoded`<br>`memory_evolution`: `not_encoded` | setup: `cargo make graphrag-docker-smoke`<br>result: `tmp/real-world-memory/graphrag-smoke/graphrag-report.json` |
-| Graphiti/Zep | `graphiti_zep_research_gate` | `research_gate` | `blocked` | `blocked` | `blocked` | `blocked` | `true` | `memory_evolution`: `blocked`<br>`retrieval`: `not_encoded`<br>`production_ops`: `not_encoded` | setup: `cargo make graphiti-zep-docker-temporal-smoke`<br>result: `tmp/real-world-memory/graphiti-zep-smoke/graphiti-zep-report.json` |
+| RAGFlow | `ragflow_research_gate` | `research_gate` | `blocked` | `blocked` | `blocked` | `blocked` | `true` | `retrieval`: `blocked`<br>`knowledge_compilation`: `not_encoded`<br>`production_ops`: `blocked` | setup: `cargo make smoke-ragflow-docker`<br>result: `tmp/real-world-memory/ragflow-smoke/ragflow-report.json` |
+| LightRAG | `lightrag_research_gate` | `research_gate` | `blocked` | `blocked` | `blocked` | `blocked` | `true` | `retrieval`: `blocked`<br>`memory_evolution`: `not_encoded`<br>`operator_debugging_ux`: `not_encoded` | setup: `cargo make smoke-lightrag-docker-context`<br>result: `tmp/real-world-memory/lightrag-context/lightrag-report.json` |
+| GraphRAG | `graphrag_research_gate` | `research_gate` | `blocked` | `blocked` | `blocked` | `blocked` | `true` | `knowledge_compilation`: `blocked`<br>`retrieval`: `not_encoded`<br>`production_ops`: `not_encoded`<br>`memory_evolution`: `not_encoded` | setup: `cargo make smoke-graphrag-docker`<br>result: `tmp/real-world-memory/graphrag-smoke/graphrag-report.json` |
+| Graphiti/Zep | `graphiti_zep_research_gate` | `research_gate` | `blocked` | `blocked` | `blocked` | `blocked` | `true` | `memory_evolution`: `blocked`<br>`retrieval`: `not_encoded`<br>`production_ops`: `not_encoded` | setup: `cargo make smoke-graphiti-zep-docker-temporal`<br>result: `tmp/real-world-memory/graphiti-zep-smoke/graphiti-zep-report.json` |
 | Letta | `letta_research_gate` | `research_gate` | `blocked` | `blocked` | `not_encoded` | `not_encoded` | `true` | `personalization`: `not_encoded`<br>`project_decisions`: `not_encoded`<br>`work_resume`: `not_encoded`<br>`core_archival_memory`: `blocked` | setup: `Letta is D1 reviewed as a core/archival memory reference. The contained comparison contract is a Docker-only benchmark-created agent export that must return core block JSON, archival search readback, and source ids before any scenario claim is scored.`<br>result: `No Letta core block, archival fallback, stale-core, scope, provenance, or project-decision result is claimed.` |
 | LangGraph | `langgraph_research_gate` | `research_gate` | `not_encoded` | `not_encoded` | `not_encoded` | `not_encoded` | `true` | `production_ops`: `not_encoded`<br>`work_resume`: `not_encoded` | setup: `LangGraph is D1 reviewed as a replay/checkpoint reference, not a direct memory backend adapter.`<br>result: `No production-ops or resume suite result is claimed.` |
 | nanograph | `nanograph_research_gate` | `research_gate` | `not_encoded` | `not_encoded` | `not_encoded` | `not_encoded` | `true` | `memory_evolution`: `not_encoded`<br>`retrieval`: `not_encoded` | setup: `nanograph is D1 reviewed as typed graph DX, but no Docker adapter is implemented.`<br>result: `No graph temporal or retrieval-debug result is claimed.` |
 | llm-wiki | `llm_wiki_research_gate` | `research_gate` | `not_encoded` | `not_encoded` | `not_encoded` | `not_encoded` | `true` | `knowledge_compilation`: `not_encoded`<br>`work_resume`: `not_encoded` | setup: `llm-wiki is D1 reviewed as a knowledge-compilation reference, but no plugin or generated-page adapter is implemented.`<br>result: `No knowledge page citation or lint result is claimed.` |
 | gbrain | `gbrain_research_gate` | `research_gate` | `not_encoded` | `not_encoded` | `not_encoded` | `not_encoded` | `true` | `knowledge_compilation`: `not_encoded`<br>`operator_debugging_ux`: `not_encoded` | setup: `gbrain is D1 reviewed as a compiled-truth and timeline reference, but no Docker adapter is implemented.`<br>result: `No knowledge-synthesis or operator-continuity result is claimed.` |
-| graphify | `graphify_docker_smoke` | `live_real_world` | `wrong_result` | `pass` | `pass` | `wrong_result` | `true` | `knowledge_compilation`: `wrong_result`<br>`retrieval`: `blocked`<br>`work_resume`: `not_encoded` | setup: `cargo make graphify-docker-graph-report-smoke`<br>result: `tmp/real-world-memory/graphify-smoke/graphify-report.json` |
+| graphify | `graphify_docker_smoke` | `live_real_world` | `wrong_result` | `pass` | `pass` | `wrong_result` | `true` | `knowledge_compilation`: `wrong_result`<br>`retrieval`: `blocked`<br>`work_resume`: `not_encoded` | setup: `cargo make smoke-graphify-docker-graph-report`<br>result: `tmp/real-world-memory/graphify-smoke/graphify-report.json` |
 
 ### Adapter Capability Details
 
@@ -267,16 +267,16 @@ This section is manifest-backed. It records external adapter coverage and blocke
 | `openviking_live_baseline` | [OpenViking repository](https://github.com/volcengine/OpenViking/): Official source for OpenViking local context database, resource, and retrieval APIs.<br>[llama-cpp-python CPU wheel index](https://abetlen.github.io/llama-cpp-python/whl/cpu): Official prebuilt CPU wheel index used by the Docker-local embedding pin. | Run ELF_BASELINE_PROJECTS=OpenViking cargo make baseline-live-docker. The runner installs llama-cpp-python==0.3.28 with --only-binary llama-cpp-python from the CPU wheel index before OpenViking add_resource/find. | docker-compose.baseline.yml baseline-runner container; no host-global OpenViking, llama-cpp-python, or model service install is required. | Local embedding setup may download a CPU wheel and model assets; record OpenViking.log, elapsed time, and cache size before claiming adapter quality. | Use the default pinned CPU wheel path first.; Override ELF_BASELINE_OPENVIKING_LLAMA_CPP_PYTHON_VERSION or ELF_BASELINE_OPENVIKING_LLAMA_CPP_PYTHON_INDEX only when the default wheel is unavailable for the Docker platform.; Treat install/import failure as incomplete, not wrong_result; treat add_resource/find evidence misses as wrong_result. | not recorded |
 | `qmd_deep_profile_gate` | [qmd repository](https://github.com/tobi/qmd): Official qmd source for local hybrid search, CLI setup, and query behavior. | Use the existing Docker baseline qmd install, collection add, update, embed, and query flow with scale or stress profiles. | docker-compose.baseline.yml baseline-runner container with project files and caches inside Docker volumes. | CPU local embedding and rerank cost scale with corpus size; record elapsed time and qmd log artifacts before claims. | Run qmd stress profile in Docker and publish the artifact path.; Map qmd JSON output to retrieval-debug real_world_job scoring before suite claims. | D2 reviewed; deep profile not encoded |
 | `openviking_deep_profile_gate` | [OpenViking repository](https://github.com/volcengine/OpenViking/): Official source for OpenViking local context database, resource, and retrieval APIs. | Use the pinned Docker local embedding path from scripts/live-baseline-benchmark.sh, then run OpenViking add_resource/find before any deep profile scoring. | docker-compose.baseline.yml baseline-runner container; no host model or compiler setup outside Docker. | Local embedding setup can download CPU wheels and model assets; record build/import logs, model cache size, and elapsed time. | Run the default pinned llama-cpp-python==0.3.28 CPU wheel path first.; Override the OpenViking llama-cpp-python version or index only when the default wheel is unavailable for the Docker platform.; Fix evidence-bearing same-corpus output and materialize selected hierarchy/expansion artifacts before converting blocked context_trajectory fixtures into scored jobs. | D2 reviewed; local embedding setup pinned; blocked fixtures encoded |
-| `ragflow_research_gate` | [RAGFlow repository](https://github.com/infiniflow/ragflow): Official source for RAGFlow service code and Docker Compose setup.<br>[RAGFlow docs](https://ragflow.io/docs/): Official deployment and setup documentation.<br>[RAGFlow HTTP API reference](https://raw.githubusercontent.com/infiniflow/ragflow/main/docs/references/http_api_reference.md): Official reference for OpenAI-compatible responses with reference chunks and document metadata. | Implement a tiny Docker evidence-smoke runner using the official Docker deployment, dataset ingest API, and OpenAI-compatible query API. | Run scripts/ragflow-docker-evidence-smoke.sh through cargo make; the live path uses the official RAGFlow Docker Compose service boundary without host-global RAGFlow installs. | Large multi-service RAG stack; generated artifacts record CPU/GPU mode, memory, disk, image size, expanded disk notes, startup time, vm.max_map_count handling, and provider boundaries before scoring. | Run cargo make ragflow-docker-smoke first to produce a typed preflight artifact.; Start the live path only with ELF_RAGFLOW_SMOKE_START=1 and ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE=1.; Keep private corpora and operator-owned provider credentials out of this smoke; map only generated public corpus reference chunks to evidence ids. | D2 feasibility verdict plus XY-885 evidence-smoke implementation and XY-900 scored smoke promotion; checked-in record remains research_gate unless a generated artifact reaches query output |
-| `lightrag_research_gate` | [LightRAG repository](https://github.com/HKUDS/LightRAG): Official source for LightRAG server, Docker, and retrieval modes.<br>[LightRAG Docker docs](https://github.com/HKUDS/LightRAG/blob/main/docs/DockerDeployment.md): Official Docker deployment reference.<br>[LightRAG API server docs](https://github.com/HKUDS/LightRAG/blob/main/docs/LightRAG-API-Server.md): Official query-mode and context-output reference.<br>[LightRAG core programming docs](https://github.com/HKUDS/LightRAG/blob/main/docs/ProgramingWithCore.md): Official source-id and file-path citation reference. | Run cargo make lightrag-docker-context-smoke for a typed preflight artifact; set ELF_LIGHTRAG_CONTEXT_START=1 to start the opt-in LightRAG Docker profile and attempt live context export. | docker-compose.baseline.yml baseline-runner plus opt-in lightrag and lightrag-mock-provider services; generated source files and LightRAG data stay in Docker-mounted artifact paths and Docker volumes. | The default profile uses the official LightRAG image, a local OpenAI-compatible mock provider, 64-dimensional embeddings, rerank disabled for context queries, cargo/pip/Hugging Face caches, and Docker volumes for rag_storage, inputs, and prompts. | Run cargo make lightrag-docker-context-smoke first; a missing API must remain a typed incomplete artifact, not a pass claim.; Set ELF_LIGHTRAG_CONTEXT_START=1 only when Docker may pull/start the LightRAG service profile.; Score retrieval only when returned context, references.file_path, or references.content map to required evidence ids. | D2 feasibility plus XY-886 context-export implementation and XY-900 scored smoke aggregation; checked-in record remains research_gate unless a generated artifact reaches query output |
-| `graphrag_research_gate` | [GraphRAG repository](https://github.com/microsoft/graphrag): Official Microsoft GraphRAG source and setup reference.<br>[GraphRAG docs](https://microsoft.github.io/graphrag/): Official documentation for indexing and querying.<br>[GraphRAG input docs](https://microsoft.github.io/graphrag/index/inputs/): Official input format and document metadata reference.<br>[GraphRAG output tables](https://microsoft.github.io/graphrag/index/outputs/): Official output schema with document, text unit, community, and relationship identifiers.<br>[GraphRAG local search docs](https://microsoft.github.io/graphrag/query/local_search/): Official local-search context and graph traversal reference. | Run cargo make graphrag-docker-smoke for a typed preflight artifact; set ELF_GRAPHRAG_SMOKE_RUN=1 with explicit provider configuration for a live GraphRAG index/query attempt. | docker-compose.baseline.yml baseline-runner, container-local Python venv, generated public corpus, and report artifacts under tmp/real-world-memory/graphrag-smoke. | The default profile uses a generated public corpus capped by ELF_GRAPHRAG_MAX_DOCS and ELF_GRAPHRAG_MAX_INPUT_CHARS, pins GraphRAG through ELF_GRAPHRAG_PACKAGE, and records elapsed time, cache size, output size, and observed cache entries. | Run cargo make graphrag-docker-smoke first; missing provider configuration must remain a typed blocked artifact, not a pass claim.; Enable ELF_GRAPHRAG_SMOKE_RUN=1 only for generated public corpus indexing with explicit provider configuration.; Fail typed if source document or text_unit identifiers cannot be mapped to expected evidence IDs. | D2 feasibility plus XY-887 Docker smoke implementation and XY-900 scored smoke promotion; checked-in record remains research_gate unless a generated artifact reaches GraphRAG output |
-| `graphiti_zep_research_gate` | [Graphiti repository](https://github.com/getzep/graphiti): Official open-source temporal context graph engine.<br>[Zep Graphiti overview](https://www.getzep.com/platform/graphiti/): Official product documentation for temporal context graph behavior.<br>[Graphiti quick start](https://help.getzep.com/graphiti/getting-started/quick-start): Official setup, episode ingest, and search output reference.<br>[Graphiti FalkorDB configuration](https://help.getzep.com/graphiti/configuration/falkor-db-configuration): Official Docker-local FalkorDB setup reference.<br>[Graphiti fact triples](https://help.getzep.com/graphiti/working-with-data/adding-fact-triples): Official manual fact-triple ingest contract. | Run cargo make graphiti-zep-docker-temporal-smoke for a typed artifact; set ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 with explicit provider configuration for a live attempt. | docker-compose.baseline.yml baseline-runner plus graphiti-zep FalkorDB profile, container-local Python venv, generated public temporal facts, and report artifacts under tmp/real-world-memory/graphiti-zep-smoke. | Requires Docker-local FalkorDB plus LLM/embedding configuration; generated artifacts record service startup, storage size, provider boundaries, fact count, and timeout before scoring. | Run cargo make graphiti-zep-docker-temporal-smoke first to produce a typed blocked artifact.; Start the live path only with ELF_GRAPHITI_ZEP_SMOKE_START=1, ELF_GRAPHITI_ZEP_SMOKE_RUN=1, and explicit provider configuration.; Treat missing validity windows or unmapped current/historical facts as wrong_result, not pass. | D2 feasibility plus XY-888 Docker temporal smoke implementation and XY-900 scored smoke promotion; checked-in record remains research_gate unless a generated artifact reaches Graphiti search output |
+| `ragflow_research_gate` | [RAGFlow repository](https://github.com/infiniflow/ragflow): Official source for RAGFlow service code and Docker Compose setup.<br>[RAGFlow docs](https://ragflow.io/docs/): Official deployment and setup documentation.<br>[RAGFlow HTTP API reference](https://raw.githubusercontent.com/infiniflow/ragflow/main/docs/references/http_api_reference.md): Official reference for OpenAI-compatible responses with reference chunks and document metadata. | Implement a tiny Docker evidence-smoke runner using the official Docker deployment, dataset ingest API, and OpenAI-compatible query API. | Run scripts/ragflow-docker-evidence-smoke.sh through cargo make; the live path uses the official RAGFlow Docker Compose service boundary without host-global RAGFlow installs. | Large multi-service RAG stack; generated artifacts record CPU/GPU mode, memory, disk, image size, expanded disk notes, startup time, vm.max_map_count handling, and provider boundaries before scoring. | Run cargo make smoke-ragflow-docker first to produce a typed preflight artifact.; Start the live path only with ELF_RAGFLOW_SMOKE_START=1 and ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE=1.; Keep private corpora and operator-owned provider credentials out of this smoke; map only generated public corpus reference chunks to evidence ids. | D2 feasibility verdict plus XY-885 evidence-smoke implementation and XY-900 scored smoke promotion; checked-in record remains research_gate unless a generated artifact reaches query output |
+| `lightrag_research_gate` | [LightRAG repository](https://github.com/HKUDS/LightRAG): Official source for LightRAG server, Docker, and retrieval modes.<br>[LightRAG Docker docs](https://github.com/HKUDS/LightRAG/blob/main/docs/DockerDeployment.md): Official Docker deployment reference.<br>[LightRAG API server docs](https://github.com/HKUDS/LightRAG/blob/main/docs/LightRAG-API-Server.md): Official query-mode and context-output reference.<br>[LightRAG core programming docs](https://github.com/HKUDS/LightRAG/blob/main/docs/ProgramingWithCore.md): Official source-id and file-path citation reference. | Run cargo make smoke-lightrag-docker-context for a typed preflight artifact; set ELF_LIGHTRAG_CONTEXT_START=1 to start the opt-in LightRAG Docker profile and attempt live context export. | docker-compose.baseline.yml baseline-runner plus opt-in lightrag and lightrag-mock-provider services; generated source files and LightRAG data stay in Docker-mounted artifact paths and Docker volumes. | The default profile uses the official LightRAG image, a local OpenAI-compatible mock provider, 64-dimensional embeddings, rerank disabled for context queries, cargo/pip/Hugging Face caches, and Docker volumes for rag_storage, inputs, and prompts. | Run cargo make smoke-lightrag-docker-context first; a missing API must remain a typed incomplete artifact, not a pass claim.; Set ELF_LIGHTRAG_CONTEXT_START=1 only when Docker may pull/start the LightRAG service profile.; Score retrieval only when returned context, references.file_path, or references.content map to required evidence ids. | D2 feasibility plus XY-886 context-export implementation and XY-900 scored smoke aggregation; checked-in record remains research_gate unless a generated artifact reaches query output |
+| `graphrag_research_gate` | [GraphRAG repository](https://github.com/microsoft/graphrag): Official Microsoft GraphRAG source and setup reference.<br>[GraphRAG docs](https://microsoft.github.io/graphrag/): Official documentation for indexing and querying.<br>[GraphRAG input docs](https://microsoft.github.io/graphrag/index/inputs/): Official input format and document metadata reference.<br>[GraphRAG output tables](https://microsoft.github.io/graphrag/index/outputs/): Official output schema with document, text unit, community, and relationship identifiers.<br>[GraphRAG local search docs](https://microsoft.github.io/graphrag/query/local_search/): Official local-search context and graph traversal reference. | Run cargo make smoke-graphrag-docker for a typed preflight artifact; set ELF_GRAPHRAG_SMOKE_RUN=1 with explicit provider configuration for a live GraphRAG index/query attempt. | docker-compose.baseline.yml baseline-runner, container-local Python venv, generated public corpus, and report artifacts under tmp/real-world-memory/graphrag-smoke. | The default profile uses a generated public corpus capped by ELF_GRAPHRAG_MAX_DOCS and ELF_GRAPHRAG_MAX_INPUT_CHARS, pins GraphRAG through ELF_GRAPHRAG_PACKAGE, and records elapsed time, cache size, output size, and observed cache entries. | Run cargo make smoke-graphrag-docker first; missing provider configuration must remain a typed blocked artifact, not a pass claim.; Enable ELF_GRAPHRAG_SMOKE_RUN=1 only for generated public corpus indexing with explicit provider configuration.; Fail typed if source document or text_unit identifiers cannot be mapped to expected evidence IDs. | D2 feasibility plus XY-887 Docker smoke implementation and XY-900 scored smoke promotion; checked-in record remains research_gate unless a generated artifact reaches GraphRAG output |
+| `graphiti_zep_research_gate` | [Graphiti repository](https://github.com/getzep/graphiti): Official open-source temporal context graph engine.<br>[Zep Graphiti overview](https://www.getzep.com/platform/graphiti/): Official product documentation for temporal context graph behavior.<br>[Graphiti quick start](https://help.getzep.com/graphiti/getting-started/quick-start): Official setup, episode ingest, and search output reference.<br>[Graphiti FalkorDB configuration](https://help.getzep.com/graphiti/configuration/falkor-db-configuration): Official Docker-local FalkorDB setup reference.<br>[Graphiti fact triples](https://help.getzep.com/graphiti/working-with-data/adding-fact-triples): Official manual fact-triple ingest contract. | Run cargo make smoke-graphiti-zep-docker-temporal for a typed artifact; set ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 with explicit provider configuration for a live attempt. | docker-compose.baseline.yml baseline-runner plus graphiti-zep FalkorDB profile, container-local Python venv, generated public temporal facts, and report artifacts under tmp/real-world-memory/graphiti-zep-smoke. | Requires Docker-local FalkorDB plus LLM/embedding configuration; generated artifacts record service startup, storage size, provider boundaries, fact count, and timeout before scoring. | Run cargo make smoke-graphiti-zep-docker-temporal first to produce a typed blocked artifact.; Start the live path only with ELF_GRAPHITI_ZEP_SMOKE_START=1, ELF_GRAPHITI_ZEP_SMOKE_RUN=1, and explicit provider configuration.; Treat missing validity windows or unmapped current/historical facts as wrong_result, not pass. | D2 feasibility plus XY-888 Docker temporal smoke implementation and XY-900 scored smoke promotion; checked-in record remains research_gate unless a generated artifact reaches Graphiti search output |
 | `letta_research_gate` | [Letta repository](https://github.com/letta-ai/letta): Official source for Letta stateful agents and memory.<br>[Letta Docker docs](https://docs.letta.com/guides/docker/): Official Docker deployment guide and embedding configuration boundary. | Use a Docker-only Letta server or CLI flow that creates a benchmark-owned agent, loads the checked-in core_archival_memory fixture corpus, writes core memory and archival memory with fixture source ids, then exports core block JSON plus archival search/readback JSON. | Docker-only Letta server or CLI flow with benchmark-created agents, benchmark-owned storage, no host-global state, and no unstated hosted service dependency. | Embedding model, agent server state, exported core memory, archival search output, and provider boundaries must be explicit in the artifact. | Create a tiny Docker agent with core memory and archival memory loaded from the ELF core_archival_memory fixtures.; Export core block readback, archival search results, source ids, and any audit-equivalent metadata as JSON before scoring.; Score core-versus-archival scenarios only after source evidence can be exported and mapped to the fixture evidence ids. | D1 feasibility verdict: research_only (XY-882); XY-927 selects the contained export/readback contract, but the Letta adapter remains blocked until that artifact exists |
 | `langgraph_research_gate` | [LangGraph persistence docs](https://docs.langchain.com/oss/python/langgraph/persistence): Official documentation for checkpoints, replay, fork, and persistence behavior. | Build a tiny LangGraph agent with a checkpointer and explicit memory read/write steps before scoring. | Docker-only Python harness with checkpoint store under the artifact directory. | Small runtime expected, but LLM calls and side effects must be stubbed or deterministic before replay claims. | Encode one replay/fork failure recovery job.; Keep LangGraph classified as replay reference unless memory retrieval is actually exercised. | D1 feasibility verdict: research_only (XY-882); replay/checkpoint reference, adapter not encoded |
 | `nanograph_research_gate` | [nanograph repository](https://github.com/nanograph/nanograph): Official source for on-device typed property graph behavior. | Build or install nanograph inside Docker and load a typed graph fixture from generated corpus facts. | Docker-only CLI run with graph folder under benchmark artifacts. | Light local graph runtime expected; record binary build/install time and graph artifact size. | Define a minimal schema for memory_evolution facts.; Score typed query output only if it cites fixture evidence IDs. | D1 feasibility verdict: research_only (XY-882); typed graph DX reference, adapter not encoded |
 | `llm_wiki_research_gate` | [llm-wiki repository](https://github.com/nvk/llm-wiki): Official source for the LLM Wiki plugin and knowledge-base workflow. | Research plugin bootstrap inside a Docker-contained Codex or file-based harness, then materialize page artifacts. | Docker-only plugin or fixture materializer; no user-global Codex plugin install. | LLM generation cost depends on page build; record provider boundary and generated artifact size. | Prototype a fixture-only page build with explicit citations.; Do not score until generated sections can be mapped to evidence IDs. | D1 feasibility verdict: research_only (XY-882); derived wiki workflow reference, adapter not encoded |
 | `gbrain_research_gate` | [gbrain repository](https://github.com/garrytan/gbrain): Official source for brain repo and retrieval workflow.<br>[compiled truth guide](https://github.com/garrytan/gbrain/blob/master/docs/guides/compiled-truth.md): Official guide for compiled truth plus timeline behavior. | Create a Docker-local brain repo fixture, run import/sync, and export compiled truth plus timeline evidence. | Docker-only repository and database state with no operator-owned brain repo. | Postgres-backed sync and embedding choices must be explicit; record DB size and import time. | Prototype a tiny brain repo with one current-truth page and timeline.; Score only if compiled truth cites the source timeline evidence. | D1 feasibility verdict: blocked (XY-882); Docker-local brain repo and database path not proven |
-| `graphify_docker_smoke` | [graphify repository](https://github.com/safishamsi/graphify): Official source for graphify graph extraction and query workflow.<br>[graphify README](https://github.com/safishamsi/graphify/blob/v3/README.md): Official CLI, output artifact, query, and source-location contract. | Run cargo make graphify-docker-graph-report-smoke to install graphify in Docker, build graph/report artifacts from a generated public corpus, and export query evidence without installing host-global assistant hooks. | docker-compose.baseline.yml baseline-runner, container-local Python venv, isolated HOME/config paths, generated public corpus, and artifacts under tmp/real-world-memory/graphify-smoke. | Graph build cost scales with corpus and model choices; generated artifacts record package reference, provider/model boundary, build time, graph size, report size, cache size, timeout, and retry behavior. | Run cargo make graphify-docker-graph-report-smoke first; setup/runtime failures must remain typed artifacts, not pass claims.; Do not use graphify host assistant hook installs or operator-owned assistant configuration as proof.; Score graph-guided answers only when graph.json, GRAPH_REPORT.md, and graphify query output map to generated evidence ids. | D1 feasibility verdict plus XY-889 Docker graph/report smoke implementation and XY-900 scored smoke promotion; current Docker validation reaches graphify output and scores the tiny knowledge_compilation job as wrong_result |
+| `graphify_docker_smoke` | [graphify repository](https://github.com/safishamsi/graphify): Official source for graphify graph extraction and query workflow.<br>[graphify README](https://github.com/safishamsi/graphify/blob/v3/README.md): Official CLI, output artifact, query, and source-location contract. | Run cargo make smoke-graphify-docker-graph-report to install graphify in Docker, build graph/report artifacts from a generated public corpus, and export query evidence without installing host-global assistant hooks. | docker-compose.baseline.yml baseline-runner, container-local Python venv, isolated HOME/config paths, generated public corpus, and artifacts under tmp/real-world-memory/graphify-smoke. | Graph build cost scales with corpus and model choices; generated artifacts record package reference, provider/model boundary, build time, graph size, report size, cache size, timeout, and retry behavior. | Run cargo make smoke-graphify-docker-graph-report first; setup/runtime failures must remain typed artifacts, not pass claims.; Do not use graphify host assistant hook installs or operator-owned assistant configuration as proof.; Score graph-guided answers only when graph.json, GRAPH_REPORT.md, and graphify query output map to generated evidence ids. | D1 feasibility verdict plus XY-889 Docker graph/report smoke implementation and XY-900 scored smoke promotion; current Docker validation reaches graphify output and scores the tiny knowledge_compilation job as wrong_result |
 
 ## Capture And Integration Coverage
 
diff --git a/docs/guide/benchmarking/live_baseline_benchmark.md b/docs/guide/benchmarking/live_baseline_benchmark.md
index ad839597..9d93a2d6 100644
--- a/docs/guide/benchmarking/live_baseline_benchmark.md
+++ b/docs/guide/benchmarking/live_baseline_benchmark.md
@@ -405,7 +405,7 @@ tmp/real-world-memory/live-adapters/summary.json
 To run the checked-in real-world job smoke fixture and render its Markdown report:
 
 ```sh
-cargo make real-world-job-smoke
+cargo make smoke-real-world-job
 ```
 
 To run the checked-in work-resume, source-of-truth, lifecycle, redaction,
@@ -508,7 +508,7 @@ benchmark artifacts, not source-truth replacements.
 ## Clean Up
 
 ```sh
-cargo make baseline-live-docker-clean
+cargo make clean-baseline-live-docker
 ```
 
 This removes Docker-managed Postgres, Qdrant, npm, pip, cargo, and target volumes used
diff --git a/docs/guide/benchmarking/real_world_agent_memory_benchmark.md b/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
index 969dc125..c4e5c141 100644
--- a/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
+++ b/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
@@ -117,7 +117,7 @@ Recommended first increments:
 Current checked-in smoke increment:
 
 ```sh
-cargo make real-world-job-smoke
+cargo make smoke-real-world-job
 ```
 
 This parses `apps/elf-eval/fixtures/real_world_memory/work_resume/`, writes
diff --git a/docs/guide/competitive_parity_testing.md b/docs/guide/competitive_parity_testing.md
index 0497ae74..328bdd91 100644
--- a/docs/guide/competitive_parity_testing.md
+++ b/docs/guide/competitive_parity_testing.md
@@ -29,7 +29,7 @@ tmp/parity/competitive-parity-report.json
 Remove parity containers and Docker-managed volumes:
 
 ```sh
-cargo make parity-docker-clean
+cargo make clean-parity-docker
 ```
 
 The cleanup command removes Postgres, Qdrant, Cargo cache, and Rust target volumes
diff --git a/docs/guide/evaluation.md b/docs/guide/evaluation.md
index 994ab0af..39441ab9 100644
--- a/docs/guide/evaluation.md
+++ b/docs/guide/evaluation.md
@@ -172,7 +172,7 @@ To measure cross-scope misranking before and after enabling context boosting, us
 script:
 
 ```bash
-cargo make e2e
+cargo make test-e2e
 ```
 
 Or run the script directly:
@@ -339,12 +339,6 @@ What it does:
 
 To validate the reflection/consolidation loop with stable query assertions, use the harness:
 
-```bash
-cargo make e2e-consolidation-harness
-```
-
-Or run directly:
-
 ```bash
 scripts/consolidation-harness.sh
 ```
diff --git a/docs/guide/getting_started.md b/docs/guide/getting_started.md
index b630c218..f5ede104 100644
--- a/docs/guide/getting_started.md
+++ b/docs/guide/getting_started.md
@@ -141,7 +141,7 @@ ELF_PG_DSN="postgres://elf_dev:elf_dev_password@127.0.0.1:51888/postgres" \
 ELF_QDRANT_GRPC_URL="http://127.0.0.1:51890" \
 ELF_QDRANT_HTTP_URL="http://127.0.0.1:51889" \
 ELF_HARNESS_VECTOR_DIM=256 \
-cargo make e2e
+cargo make test-e2e
 ```
 
 ## 8. Development workflow
@@ -150,17 +150,17 @@ Use `cargo make` tasks from repository root.
 
 ```sh
 cargo make fmt
-cargo make lint
-cargo make test
-cargo make test-integration
-cargo make e2e
+cargo make check
+cargo make test-rust
+cargo make test-rust-integration
+cargo make test-e2e
 ```
 
 Notes:
 
-- `cargo make test-integration` runs ignored tests that require external Postgres and Qdrant.
+- `cargo make test-rust-integration` runs ignored tests that require external Postgres and Qdrant.
   Set `ELF_PG_DSN` and `ELF_QDRANT_GRPC_URL`.
-- `cargo make e2e` runs the context misranking harness.
+- `cargo make test-e2e` runs the context misranking harness.
   Set `ELF_PG_DSN`, `ELF_QDRANT_GRPC_URL`, and `ELF_QDRANT_HTTP_URL`.
 - Stop local dependencies with `docker compose -f docker-compose.yml down`.
   Add `-v` only when you intentionally want to delete the local development volumes.
diff --git a/docs/guide/integration-testing.md b/docs/guide/integration-testing.md
index c6219b46..336715f9 100644
--- a/docs/guide/integration-testing.md
+++ b/docs/guide/integration-testing.md
@@ -20,7 +20,7 @@ Run the ignored integration suite (requires external Postgres and Qdrant):
 ```bash
 ELF_PG_DSN="postgres://postgres:postgres@127.0.0.1:51888/postgres" \
 ELF_QDRANT_GRPC_URL="http://127.0.0.1:51890" \
-cargo make test-integration
+cargo make test-rust-integration
 ```
 
 Run the context misranking harness (creates and drops a dedicated database and collection):
@@ -29,7 +29,7 @@ Run the context misranking harness (creates and drops a dedicated database and c
 ELF_PG_DSN="postgres://postgres:postgres@127.0.0.1:51888/postgres" \
 ELF_QDRANT_GRPC_URL="http://127.0.0.1:51890" \
 ELF_QDRANT_HTTP_URL="http://127.0.0.1:51889" \
-cargo make e2e
+cargo make test-e2e
 ```
 
 CI also runs this harness as a required check for code changes (see `.github/workflows/e2e.yml`).
diff --git a/docs/guide/research/comparison_external_projects.md b/docs/guide/research/comparison_external_projects.md
index 7173ecb1..42a861f8 100644
--- a/docs/guide/research/comparison_external_projects.md
+++ b/docs/guide/research/comparison_external_projects.md
@@ -110,7 +110,7 @@ Project-to-suite map:
 | llm-wiki | `rw.knowledge-synthesis`, `rw.resume-evidence` | Query/save/lint flows and topic-scoped wiki pages are a useful reference for turning retrieved memory into maintained project knowledge. | Run a corpus-to-wiki job, ask resume/decision questions, require page citations back to source memory, then mutate a stale source and prove lint/repair catches it. | Docs-grounded D1; no benchmark adapter evidence. Confidence: medium for derived-knowledge fit. | ELF is not yet stronger on derived knowledge pages; llm-wiki should inform rebuildable, evidence-cited dossiers rather than core storage. |
 | gbrain | `rw.knowledge-synthesis`, `rw.operator-continuity` | `compiled_truth`, timeline sections, backlinks, primary-home routing, and enrichment workflows model a living operational brain for project work. | Build or update pages from the real-world corpus, require current-truth plus timeline answers, and prove enrichment/backlink maintenance does not hide unsupported claims. | Docs-grounded D1; no benchmark adapter evidence. Confidence: medium for operator knowledge UX. | ELF should keep source notes authoritative; gbrain is a reference for presentation, enrichment, and maintenance loops. |
 | Always-On Memory Agent | `rw.consolidation-review`, `rw.operator-continuity` | The file/API/dashboard ingest loop and timer-based consolidation show how background memory formation becomes a user-visible product surface. | Run scheduled consolidation on a fixed corpus, record source rows and output insights, then score whether consolidation is reviewable, repeatable, and bounded against unsupported claims. | Docs-grounded D1; no benchmark adapter evidence. Confidence: medium for consolidation workflow reference. | ELF should borrow scheduling and operator controls while keeping deterministic writes and reviewable derived outputs. |
-| graphify | `rw.graph-navigation`, `rw.knowledge-synthesis`, `rw.resume-evidence` | Deterministic code extraction, LLM-assisted graph building, honesty tags, graph reports, and assistant hooks are strong references for graph-compressed navigation over large corpora. | Generate graph/report artifacts from the benchmark corpus, require answers to use graph structure plus source evidence, and prove rebuild behavior after corpus edits. | Scored tiny `live_real_world` smoke: `cargo make graphify-docker-graph-report-smoke` records a Docker-only generated-corpus graph/report artifact and currently scores `wrong_result`; the checked-in manifest does not claim broad graph quality, rebuild strength, or production-quality graph navigation. Confidence: medium for adapter feasibility, low for production-quality graph navigation. | ELF is stronger as a memory service; graphify is now a runnable reference for derived graph reports and pre-search guidance, but not yet a stronger end-to-end memory system. |
+| graphify | `rw.graph-navigation`, `rw.knowledge-synthesis`, `rw.resume-evidence` | Deterministic code extraction, LLM-assisted graph building, honesty tags, graph reports, and assistant hooks are strong references for graph-compressed navigation over large corpora. | Generate graph/report artifacts from the benchmark corpus, require answers to use graph structure plus source evidence, and prove rebuild behavior after corpus edits. | Scored tiny `live_real_world` smoke: `cargo make smoke-graphify-docker-graph-report` records a Docker-only generated-corpus graph/report artifact and currently scores `wrong_result`; the checked-in manifest does not claim broad graph quality, rebuild strength, or production-quality graph navigation. Confidence: medium for adapter feasibility, low for production-quality graph navigation. | ELF is stronger as a memory service; graphify is now a runnable reference for derived graph reports and pre-search guidance, but not yet a stronger end-to-end memory system. |
 | Letta | `rw.core-archival`, `rw.operator-continuity` | Core memory blocks, archival memory, and shared/read-only memory blocks map directly to always-loaded operating context versus retrievable memory. | Build a multi-agent job where core blocks must be attached/detached/shared read-only, while archival memory is retrieved separately and audited. | Docs-grounded D1; no benchmark adapter evidence. Confidence: medium for memory-semantics reference. | ELF has scoped notes but not first-class core/archival block ergonomics; Letta is the reference dimension. |
 | LangGraph | `rw.replay-regression`, `rw.resume-evidence` | Thread checkpoints, durable execution, replay, fork, and time travel define a strong model for debugging agent-state and memory-regression behavior. | Run an agent job with memory reads across checkpoints, replay/fork the thread after a stale-memory failure, and verify side-effect boundaries. | Docs-grounded D1; no benchmark adapter evidence. Confidence: medium for replay workflow reference. | ELF traces are useful but do not replace full agent checkpoint replay; LangGraph is the reference for replay-regression jobs. |
 | Graphiti / Zep | `rw.graph-temporal`, `rw.resume-evidence` | Temporal entities, relations, fact triples, validity windows, and graph search directly target stale/contradictory factual memory. | Add fact triples with validity changes, query current and historical answers, and score invalidation/append behavior under contradiction traps. | Docs-grounded D1; no benchmark adapter evidence. Confidence: medium-high for temporal-graph dimension. | ELF graph-lite covers evidence-linked validity windows and current/historical relation context; Graphiti/Zep remains the reference for broader temporal graph workflows. |
@@ -124,7 +124,7 @@ XY-882 feasibility verdicts for RAG and graph-memory gates:
 | LightRAG | `adapter_candidate` | Docker Compose server with explicit LLM, embedding, rerank, storage, workspace, and data-volume configuration. | Context-only query modes can return the context prepared for the LLM; core APIs can insert documents with ids and source file paths. | [XY-886](https://linear.app/hack-ink/issue/XY-886/elf-benchmark-adapter-implement-lightrag-docker-context-export-adapter); no live pass claim. |
 | GraphRAG | `adapter_candidate` | Cost-bounded Docker Python CLI/API run over a generated tiny corpus with container-local parquet artifacts. | Output tables contain generated UUIDs, human-readable ids, source documents, text units, community reports, and text-unit links for graph summaries and relationships. | [XY-887](https://linear.app/hack-ink/issue/XY-887/elf-benchmark-adapter-implement-graphrag-cost-bounded-docker-adapter); no live pass claim. |
 | Graphiti / Zep | `adapter_candidate` | Docker-local FalkorDB or Neo4j plus Python SDK runner with provider config captured under benchmark artifacts. | Search results and fact triples expose UUIDs, fact text, and validity windows (`valid_at` / `invalid_at`) that map to memory-evolution scoring. | [XY-888](https://linear.app/hack-ink/issue/XY-888/elf-benchmark-adapter-implement-graphitizep-temporal-graph-adapter); no live pass claim. |
-| graphify | `adapter_candidate` | Docker-only CLI/materializer using `pip install graphifyy` over a mounted corpus; host-global assistant hooks are out of scope. | `graph.json`, `GRAPH_REPORT.md`, and graph query output include edge types, confidence tags, source files, and source locations. | [XY-889](https://linear.app/hack-ink/issue/XY-889/elf-benchmark-adapter-implement-graphify-docker-graph-report-adapter) adds `cargo make graphify-docker-graph-report-smoke`; XY-900 promotes the tiny generated smoke to scored `live_real_world` `wrong_result` evidence while still avoiding broad quality claims. |
+| graphify | `adapter_candidate` | Docker-only CLI/materializer using `pip install graphifyy` over a mounted corpus; host-global assistant hooks are out of scope. | `graph.json`, `GRAPH_REPORT.md`, and graph query output include edge types, confidence tags, source files, and source locations. | [XY-889](https://linear.app/hack-ink/issue/XY-889/elf-benchmark-adapter-implement-graphify-docker-graph-report-adapter) adds `cargo make smoke-graphify-docker-graph-report`; XY-900 promotes the tiny generated smoke to scored `live_real_world` `wrong_result` evidence while still avoiding broad quality claims. |
 | Letta | `research_only` | Docker server exists, but current docs require explicit embedding configuration and steer Letta Code evaluation toward non-Docker local/frontier-model exploration. | Core/archival memory and shared blocks remain useful semantics, but no contained evidence export is selected for this adapter batch. | No implementation issue. |
 | LangGraph | `research_only` | A Docker harness is possible, but the project is an agent-state/checkpoint framework rather than a standalone memory adapter. | Store search and checkpoints are references for replay-regression jobs, not a direct external memory output contract here. | No implementation issue. |
 | nanograph | `research_only` | Official positioning is one CLI / one folder / no server / no Docker. | Typed schema, query, CDC, and search ergonomics remain graph-lite DX inspiration. | No implementation issue. |
diff --git a/docs/guide/testing.md b/docs/guide/testing.md
index dbd539e0..480a8c61 100644
--- a/docs/guide/testing.md
+++ b/docs/guide/testing.md
@@ -10,9 +10,9 @@ Outputs: A consistent test-category name and the matching command or workflow.
 
 - `unit` — Tests inside `#[cfg(test)]` modules in `src/`. Run with `cargo make test`.
 - `integration` — Rust integration tests under `tests/*.rs`. Run with `cargo make test`.
-- `integration (ignored)` — Integration tests that require external services and are marked `#[ignore]`.
+- `integration (ignored)` — Integration tests that require external services and are marked `#[ignore]`. Run with `cargo make test-rust-integration`.
 - `acceptance` — The integration suite in `packages/elf-service/tests/acceptance.rs` and `packages/elf-service/tests/acceptance/*.rs`. These are usually `#[ignore]` and require external services.
-- `E2E harness` — Deterministic harness scripts for memory retrieval/ranking. Run locally with `cargo make e2e` and in CI via `.github/workflows/e2e.yml`.
+- `E2E harness` — Deterministic harness scripts for memory retrieval/ranking. Run locally with `cargo make test-e2e` and in CI via `.github/workflows/e2e.yml`.
 
 Note: Some integration tests require external services such as Postgres or Qdrant and are marked `#[ignore]`. When requesting those, say "integration (ignored)" so the ignored set is included.
 
diff --git a/docs/plans/2026-02-02-project-cleanup-design.md b/docs/plans/2026-02-02-project-cleanup-design.md
index 2199e4ba..4f6d6cf4 100644
--- a/docs/plans/2026-02-02-project-cleanup-design.md
+++ b/docs/plans/2026-02-02-project-cleanup-design.md
@@ -1,6 +1,6 @@
 # Project Cleanup Architecture Design
 
-**Goal:** Restructure each app into a library-plus-binary layout, remove `#[path]` test imports, and make `cargo make lint` pass without suppressing lints.
+**Goal:** Restructure each app into a library-plus-binary layout, remove `#[path]` test imports, and make `cargo make lint-rust` pass without suppressing lints.
 
 **Scope (Option 2):**
 - Apply the `lib + bin` layout to `elf-api`, `elf-mcp`, and `elf-worker`.
@@ -19,5 +19,5 @@
 - Any remaining clippy errors will be fixed by small structural adjustments rather than `#[allow]` attributes.
 
 **Testing and Verification:**
-- Run `cargo make lint` to confirm workspace linting passes.
+- Run `cargo make lint-rust` to confirm workspace linting passes.
 - Do not change test behavior; only update import paths and shared wiring required by the new layout.
diff --git a/docs/plans/2026-02-02-project-cleanup.md b/docs/plans/2026-02-02-project-cleanup.md
index 536991c7..a0ef40d4 100644
--- a/docs/plans/2026-02-02-project-cleanup.md
+++ b/docs/plans/2026-02-02-project-cleanup.md
@@ -2,7 +2,7 @@
 
 > **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
 
-**Goal:** Refactor each app into a lib+bin layout, remove `#[path]` test imports, and keep CLI/logging behavior unchanged while ensuring `cargo make lint` passes.
+**Goal:** Refactor each app into a lib+bin layout, remove `#[path]` test imports, and keep CLI/logging behavior unchanged while ensuring `cargo make lint-rust` passes.
 
 **Architecture:** Each app exposes a small `lib.rs` with its CLI `Args` and `run` entrypoint plus existing modules. `main.rs` becomes a thin wrapper that parses CLI args and calls the library. Tests import the library modules instead of using `#[path]`.
 
@@ -250,7 +250,7 @@ git commit -m "refactor: move elf-mcp entrypoint into lib"
 - Modify: None
 
 **Step 1: Run lint**
-Run: `cargo make lint`
+Run: `cargo make lint-rust`
 Expected: PASS.
 
 **Step 2: Run targeted app tests**
diff --git a/docs/plans/2026-02-25-ci-services-checks-design.md b/docs/plans/2026-02-25-ci-services-checks-design.md
index 359c7017..92b8765d 100644
--- a/docs/plans/2026-02-25-ci-services-checks-design.md
+++ b/docs/plans/2026-02-25-ci-services-checks-design.md
@@ -43,7 +43,7 @@ Update `.github/workflows/integration.yml` to run on PR and merge queue (in addi
 
 In this workflow, run the full workspace test suite including ignored tests:
 
-- `cargo nextest run --workspace --all-targets --all-features --run-ignored all`
+- `cargo make test-rust-all`
 
 Rationale:
 
@@ -54,7 +54,7 @@ Rationale:
 
 Add a new workflow to run the lightweight, deterministic E2E harness:
 
-- `cargo make e2e` (which runs `scripts/context-misranking-harness.sh`)
+- `cargo make test-e2e` (which runs `scripts/context-misranking-harness.sh`)
 
 Key properties:
 
@@ -73,7 +73,6 @@ Do not change `.github/workflows/nightly-harness-signals.yml` scope: it remains
 - `Integration Tests` runs with `--run-ignored all` and succeeds on `main`.
 - A new E2E workflow runs on:
   - `pull_request`, `merge_group`, `workflow_dispatch`
-- E2E job starts Postgres + Qdrant via GitHub Actions services and successfully runs `cargo make e2e` without external secrets.
+- E2E job starts Postgres + Qdrant via GitHub Actions services and successfully runs `cargo make test-e2e` without external secrets.
 - Both workflows use `paths-ignore` for docs-only changes (`docs/**`, `**/*.md`, `.gitignore`).
 - Local docs reflect the updated meaning of “E2E harness” vs “nightly harness signals”.
-
diff --git a/docs/research/2026-06-11-competitor-strength-adoption-report.json b/docs/research/2026-06-11-competitor-strength-adoption-report.json
index cfe2f5ca..6404bc35 100644
--- a/docs/research/2026-06-11-competitor-strength-adoption-report.json
+++ b/docs/research/2026-06-11-competitor-strength-adoption-report.json
@@ -93,12 +93,12 @@
       "claim": "mem0 local OSS passes preference correction history, entity-scoped personalization, local get_all export-style readback, and deletion audit history; OpenMemory export-helper setup emits a separate blocked artifact with DOCKER_UNAVAILABLE_IN_BASELINE_RUNNER, and hosted Platform export remains non-goal."
     },
     {
-      "command": "ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make graphiti-zep-docker-temporal-smoke",
+      "command": "ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make smoke-graphiti-zep-docker-temporal",
       "artifact": "docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md",
       "claim": "Graphiti/Zep temporal smoke remains blocked by provider_api_key_missing when live provider execution is explicitly enabled without credentials."
     },
     {
-      "command": "cargo make graphify-docker-graph-report-smoke",
+      "command": "cargo make smoke-graphify-docker-graph-report",
       "artifact": "docs/guide/benchmarking/2026-06-11-graph-rag-scored-smoke-adapter-report.md",
       "claim": "graphify reaches tiny Docker graph/report scoring but remains wrong_result; broad graph/RAG quality is not tested."
     },
diff --git a/docs/research/2026-06-11-temporal-history-competitor-gap-report.json b/docs/research/2026-06-11-temporal-history-competitor-gap-report.json
index cb6cd9be..8bfcffd6 100644
--- a/docs/research/2026-06-11-temporal-history-competitor-gap-report.json
+++ b/docs/research/2026-06-11-temporal-history-competitor-gap-report.json
@@ -7,7 +7,7 @@
   "role_boundary": "No ELF optimization implementation is included; this report records evidence, claim boundaries, and future optimization directions.",
   "commands": [
     {
-      "command": "ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make graphiti-zep-docker-temporal-smoke",
+      "command": "ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make smoke-graphiti-zep-docker-temporal",
       "status": "blocked",
       "typed_status": "provider_api_key_missing",
       "runtime_seconds": 3.5,
diff --git a/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json b/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json
index 3de690bd..f74e0d45 100644
--- a/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json
+++ b/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json
@@ -237,7 +237,7 @@
       ],
       "measured_status": "blocked",
       "proof": {
-        "command": "ELF_RAGFLOW_SMOKE_START=1 ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE=1 cargo make ragflow-docker-smoke",
+        "command": "ELF_RAGFLOW_SMOKE_START=1 ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE=1 cargo make smoke-ragflow-docker",
         "artifact": "tmp/real-world-memory/ragflow-smoke/ragflow-smoke.json"
       },
       "unsupported_or_blocked_status": {
@@ -257,7 +257,7 @@
       ],
       "measured_status": "blocked",
       "proof": {
-        "command": "ELF_LIGHTRAG_CONTEXT_START=1 cargo make lightrag-docker-context-smoke",
+        "command": "ELF_LIGHTRAG_CONTEXT_START=1 cargo make smoke-lightrag-docker-context",
         "artifact": "tmp/real-world-memory/lightrag-context/summary.json"
       },
       "unsupported_or_blocked_status": {
@@ -277,7 +277,7 @@
       ],
       "measured_status": "blocked",
       "proof": {
-        "command": "ELF_GRAPHRAG_SMOKE_RUN=1 cargo make graphrag-docker-smoke",
+        "command": "ELF_GRAPHRAG_SMOKE_RUN=1 cargo make smoke-graphrag-docker",
         "artifact": "tmp/real-world-memory/graphrag-smoke/summary.json"
       },
       "unsupported_or_blocked_status": {
@@ -297,7 +297,7 @@
       ],
       "measured_status": "blocked",
       "proof": {
-        "command": "ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make graphiti-zep-docker-temporal-smoke",
+        "command": "ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make smoke-graphiti-zep-docker-temporal",
         "artifact": "tmp/real-world-memory/graphiti-zep-smoke/summary.json"
       },
       "unsupported_or_blocked_status": {
@@ -417,7 +417,7 @@
       ],
       "measured_status": "wrong_result",
       "proof": {
-        "command": "cargo make graphify-docker-graph-report-smoke",
+        "command": "cargo make smoke-graphify-docker-graph-report",
         "artifact": "tmp/real-world-memory/graphify-smoke/graphify-report.json"
       },
       "unsupported_or_blocked_status": {
diff --git a/docs/research/2026-06-16-scheduled-memory-task-scoring-report.json b/docs/research/2026-06-16-scheduled-memory-task-scoring-report.json
index 612802ff..9bdae08b 100644
--- a/docs/research/2026-06-16-scheduled-memory-task-scoring-report.json
+++ b/docs/research/2026-06-16-scheduled-memory-task-scoring-report.json
@@ -1847,13 +1847,13 @@
         "setup": {
           "status": "blocked",
           "evidence": "XY-900 promotes the Docker-safe tiny-corpus evidence smoke into a generated real_world_job report while the checked-in row remains smoke-only research_gate evidence.",
-          "command": "cargo make ragflow-docker-smoke",
+          "command": "cargo make smoke-ragflow-docker",
           "artifact": "tmp/real-world-memory/ragflow-smoke/ragflow-smoke.json"
         },
         "run": {
           "status": "blocked",
           "evidence": "The live path requires explicit resource-envelope opt-in and a local self-hosted RAGFlow API key; setup failures stay typed in the generated smoke artifact.",
-          "command": "ELF_RAGFLOW_SMOKE_START=1 ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE=1 cargo make ragflow-docker-smoke",
+          "command": "ELF_RAGFLOW_SMOKE_START=1 ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE=1 cargo make smoke-ragflow-docker",
           "artifact": "tmp/real-world-memory/ragflow-smoke/memory_projects_manifest.ragflow-smoke.json"
         },
         "result": {
@@ -1965,7 +1965,7 @@
           "runtime_boundary": "Run scripts/ragflow-docker-evidence-smoke.sh through cargo make; the live path uses the official RAGFlow Docker Compose service boundary without host-global RAGFlow installs.",
           "resource_expectation": "Large multi-service RAG stack; generated artifacts record CPU/GPU mode, memory, disk, image size, expanded disk notes, startup time, vm.max_map_count handling, and provider boundaries before scoring.",
           "retry_guidance": [
-            "Run cargo make ragflow-docker-smoke first to produce a typed preflight artifact.",
+            "Run cargo make smoke-ragflow-docker first to produce a typed preflight artifact.",
             "Start the live path only with ELF_RAGFLOW_SMOKE_START=1 and ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE=1.",
             "Keep private corpora and operator-owned provider credentials out of this smoke; map only generated public corpus reference chunks to evidence ids."
           ],
@@ -1991,13 +1991,13 @@
         "setup": {
           "status": "blocked",
           "evidence": "XY-886 adds a Docker-profile context-export smoke command, and XY-900 keeps its generated retrieval fixtures scored through real_world_job_benchmark. The checked-in row remains smoke-only research_gate evidence.",
-          "command": "cargo make lightrag-docker-context-smoke",
+          "command": "cargo make smoke-lightrag-docker-context",
           "artifact": "tmp/real-world-memory/lightrag-context/lightrag-materialization.json"
         },
         "run": {
           "status": "blocked",
           "evidence": "The default smoke records a typed setup/runtime failure if the LightRAG API is unavailable; set ELF_LIGHTRAG_CONTEXT_START=1 to start the opt-in Docker service profile.",
-          "command": "ELF_LIGHTRAG_CONTEXT_START=1 cargo make lightrag-docker-context-smoke",
+          "command": "ELF_LIGHTRAG_CONTEXT_START=1 cargo make smoke-lightrag-docker-context",
           "artifact": "tmp/real-world-memory/lightrag-context/summary.json"
         },
         "result": {
@@ -2078,7 +2078,7 @@
           },
           {
             "kind": "command",
-            "ref": "cargo make lightrag-docker-context-smoke",
+            "ref": "cargo make smoke-lightrag-docker-context",
             "status": "blocked"
           },
           {
@@ -2115,11 +2115,11 @@
               "evidence": "Official source-id and file-path citation reference."
             }
           ],
-          "setup_path": "Run cargo make lightrag-docker-context-smoke for a typed preflight artifact; set ELF_LIGHTRAG_CONTEXT_START=1 to start the opt-in LightRAG Docker profile and attempt live context export.",
+          "setup_path": "Run cargo make smoke-lightrag-docker-context for a typed preflight artifact; set ELF_LIGHTRAG_CONTEXT_START=1 to start the opt-in LightRAG Docker profile and attempt live context export.",
           "runtime_boundary": "docker-compose.baseline.yml baseline-runner plus opt-in lightrag and lightrag-mock-provider services; generated source files and LightRAG data stay in Docker-mounted artifact paths and Docker volumes.",
           "resource_expectation": "The default profile uses the official LightRAG image, a local OpenAI-compatible mock provider, 64-dimensional embeddings, rerank disabled for context queries, cargo/pip/Hugging Face caches, and Docker volumes for rag_storage, inputs, and prompts.",
           "retry_guidance": [
-            "Run cargo make lightrag-docker-context-smoke first; a missing API must remain a typed incomplete artifact, not a pass claim.",
+            "Run cargo make smoke-lightrag-docker-context first; a missing API must remain a typed incomplete artifact, not a pass claim.",
             "Set ELF_LIGHTRAG_CONTEXT_START=1 only when Docker may pull/start the LightRAG service profile.",
             "Score retrieval only when returned context, references.file_path, or references.content map to required evidence ids."
           ],
@@ -2145,13 +2145,13 @@
         "setup": {
           "status": "blocked",
           "evidence": "XY-900 promotes the Docker-safe generated-corpus GraphRAG smoke into a scored knowledge_compilation report while the checked-in row remains smoke-only research_gate evidence.",
-          "command": "cargo make graphrag-docker-smoke",
+          "command": "cargo make smoke-graphrag-docker",
           "artifact": "tmp/real-world-memory/graphrag-smoke/graphrag-smoke.json"
         },
         "run": {
           "status": "blocked",
           "evidence": "The default smoke records a typed blocked artifact without model calls; set ELF_GRAPHRAG_SMOKE_RUN=1 with explicit provider configuration to attempt live GraphRAG index/query.",
-          "command": "ELF_GRAPHRAG_SMOKE_RUN=1 cargo make graphrag-docker-smoke",
+          "command": "ELF_GRAPHRAG_SMOKE_RUN=1 cargo make smoke-graphrag-docker",
           "artifact": "tmp/real-world-memory/graphrag-smoke/summary.json"
         },
         "result": {
@@ -2237,7 +2237,7 @@
           },
           {
             "kind": "command",
-            "ref": "cargo make graphrag-docker-smoke",
+            "ref": "cargo make smoke-graphrag-docker",
             "status": "blocked"
           },
           {
@@ -2279,11 +2279,11 @@
               "evidence": "Official local-search context and graph traversal reference."
             }
           ],
-          "setup_path": "Run cargo make graphrag-docker-smoke for a typed preflight artifact; set ELF_GRAPHRAG_SMOKE_RUN=1 with explicit provider configuration for a live GraphRAG index/query attempt.",
+          "setup_path": "Run cargo make smoke-graphrag-docker for a typed preflight artifact; set ELF_GRAPHRAG_SMOKE_RUN=1 with explicit provider configuration for a live GraphRAG index/query attempt.",
           "runtime_boundary": "docker-compose.baseline.yml baseline-runner, container-local Python venv, generated public corpus, and report artifacts under tmp/real-world-memory/graphrag-smoke.",
           "resource_expectation": "The default profile uses a generated public corpus capped by ELF_GRAPHRAG_MAX_DOCS and ELF_GRAPHRAG_MAX_INPUT_CHARS, pins GraphRAG through ELF_GRAPHRAG_PACKAGE, and records elapsed time, cache size, output size, and observed cache entries.",
           "retry_guidance": [
-            "Run cargo make graphrag-docker-smoke first; missing provider configuration must remain a typed blocked artifact, not a pass claim.",
+            "Run cargo make smoke-graphrag-docker first; missing provider configuration must remain a typed blocked artifact, not a pass claim.",
             "Enable ELF_GRAPHRAG_SMOKE_RUN=1 only for generated public corpus indexing with explicit provider configuration.",
             "Fail typed if source document or text_unit identifiers cannot be mapped to expected evidence IDs."
           ],
@@ -2309,13 +2309,13 @@
         "setup": {
           "status": "blocked",
           "evidence": "XY-900 promotes the Docker-contained Graphiti/Zep temporal smoke into a scored memory_evolution report while the checked-in row remains smoke-only research_gate evidence.",
-          "command": "cargo make graphiti-zep-docker-temporal-smoke",
+          "command": "cargo make smoke-graphiti-zep-docker-temporal",
           "artifact": "tmp/real-world-memory/graphiti-zep-smoke/graphiti-zep-smoke.json"
         },
         "run": {
           "status": "blocked",
           "evidence": "The default smoke records a typed setup/runtime failure if live execution is not explicitly enabled. Set ELF_GRAPHITI_ZEP_SMOKE_START=1 and ELF_GRAPHITI_ZEP_SMOKE_RUN=1 with explicit provider configuration to start Docker-local FalkorDB and run Graphiti.",
-          "command": "ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make graphiti-zep-docker-temporal-smoke",
+          "command": "ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make smoke-graphiti-zep-docker-temporal",
           "artifact": "tmp/real-world-memory/graphiti-zep-smoke/summary.json"
         },
         "result": {
@@ -2396,7 +2396,7 @@
           },
           {
             "kind": "command",
-            "ref": "cargo make graphiti-zep-docker-temporal-smoke",
+            "ref": "cargo make smoke-graphiti-zep-docker-temporal",
             "status": "blocked"
           },
           {
@@ -2438,11 +2438,11 @@
               "evidence": "Official manual fact-triple ingest contract."
             }
           ],
-          "setup_path": "Run cargo make graphiti-zep-docker-temporal-smoke for a typed artifact; set ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 with explicit provider configuration for a live attempt.",
+          "setup_path": "Run cargo make smoke-graphiti-zep-docker-temporal for a typed artifact; set ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 with explicit provider configuration for a live attempt.",
           "runtime_boundary": "docker-compose.baseline.yml baseline-runner plus graphiti-zep FalkorDB profile, container-local Python venv, generated public temporal facts, and report artifacts under tmp/real-world-memory/graphiti-zep-smoke.",
           "resource_expectation": "Requires Docker-local FalkorDB plus LLM/embedding configuration; generated artifacts record service startup, storage size, provider boundaries, fact count, and timeout before scoring.",
           "retry_guidance": [
-            "Run cargo make graphiti-zep-docker-temporal-smoke first to produce a typed blocked artifact.",
+            "Run cargo make smoke-graphiti-zep-docker-temporal first to produce a typed blocked artifact.",
             "Start the live path only with ELF_GRAPHITI_ZEP_SMOKE_START=1, ELF_GRAPHITI_ZEP_SMOKE_RUN=1, and explicit provider configuration.",
             "Treat missing validity windows or unmapped current/historical facts as wrong_result, not pass."
           ],
@@ -2954,13 +2954,13 @@
         "setup": {
           "status": "pass",
           "evidence": "XY-900 validation reached the Docker-only graph/report smoke setup inside the baseline runner without host-global assistant hooks.",
-          "command": "cargo make graphify-docker-graph-report-smoke",
+          "command": "cargo make smoke-graphify-docker-graph-report",
           "artifact": "tmp/real-world-memory/graphify-smoke/graphify-smoke.json"
         },
         "run": {
           "status": "pass",
           "evidence": "The smoke installed graphify in a container-local venv, ran over a generated public corpus, and produced graph/report/query output for scoring.",
-          "command": "cargo make graphify-docker-graph-report-smoke",
+          "command": "cargo make smoke-graphify-docker-graph-report",
           "artifact": "tmp/real-world-memory/graphify-smoke/summary.json"
         },
         "result": {
@@ -3041,7 +3041,7 @@
           },
           {
             "kind": "command",
-            "ref": "cargo make graphify-docker-graph-report-smoke",
+            "ref": "cargo make smoke-graphify-docker-graph-report",
             "status": "wrong_result"
           },
           {
@@ -3068,11 +3068,11 @@
               "evidence": "Official CLI, output artifact, query, and source-location contract."
             }
           ],
-          "setup_path": "Run cargo make graphify-docker-graph-report-smoke to install graphify in Docker, build graph/report artifacts from a generated public corpus, and export query evidence without installing host-global assistant hooks.",
+          "setup_path": "Run cargo make smoke-graphify-docker-graph-report to install graphify in Docker, build graph/report artifacts from a generated public corpus, and export query evidence without installing host-global assistant hooks.",
           "runtime_boundary": "docker-compose.baseline.yml baseline-runner, container-local Python venv, isolated HOME/config paths, generated public corpus, and artifacts under tmp/real-world-memory/graphify-smoke.",
           "resource_expectation": "Graph build cost scales with corpus and model choices; generated artifacts record package reference, provider/model boundary, build time, graph size, report size, cache size, timeout, and retry behavior.",
           "retry_guidance": [
-            "Run cargo make graphify-docker-graph-report-smoke first; setup/runtime failures must remain typed artifacts, not pass claims.",
+            "Run cargo make smoke-graphify-docker-graph-report first; setup/runtime failures must remain typed artifacts, not pass claims.",
             "Do not use graphify host assistant hook installs or operator-owned assistant configuration as proof.",
             "Score graph-guided answers only when graph.json, GRAPH_REPORT.md, and graphify query output map to generated evidence ids."
           ],
diff --git a/docs/spec/production_corpus_manifest_v1.md b/docs/spec/production_corpus_manifest_v1.md
index 05bc417e..36347823 100644
--- a/docs/spec/production_corpus_manifest_v1.md
+++ b/docs/spec/production_corpus_manifest_v1.md
@@ -82,7 +82,7 @@ evidence ID. It must not silently fall back to the checked-in synthetic corpus.
       "evidence_id": "issue-xy123-resume",
       "category": "issue",
       "title": "XY-123 Resume State",
-      "text": "XY-123 resumes on branch y/example with command `cargo make checks`."
+      "text": "XY-123 resumes on branch y/example with command `cargo make check`."
     }
   ],
   "queries": [
@@ -92,7 +92,7 @@ evidence ID. It must not silently fall back to the checked-in synthetic corpus.
       "query": "How do I resume XY-123?",
       "expected_evidence_ids": ["issue-xy123-resume"],
       "allowed_alternate_evidence_ids": [],
-      "expected_terms": ["XY-123", "cargo make checks"]
+      "expected_terms": ["XY-123", "cargo make check"]
     }
   ]
 }
diff --git a/scripts/baseline-docker.sh b/scripts/baseline-docker.sh
new file mode 100755
index 00000000..a6e38d82
--- /dev/null
+++ b/scripts/baseline-docker.sh
@@ -0,0 +1,173 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+profile="${1:-}"
+if [ -z "$profile" ]; then
+	echo "usage: scripts/baseline-docker.sh <profile>" >&2
+	exit 2
+fi
+
+head="$(git rev-parse HEAD)"
+if [ -n "$(git status --porcelain)" ]; then
+	head="$head+dirty"
+fi
+
+run_baseline() {
+	docker compose -f docker-compose.baseline.yml run --build --rm baseline-runner
+}
+
+selected_projects_or_default() {
+	local selected_projects
+	selected_projects="$(printenv ELF_BASELINE_PROJECTS || true)"
+	if [ -z "$selected_projects" ]; then
+		selected_projects="ELF"
+	fi
+	printf '%s' "$selected_projects"
+}
+
+case "$profile" in
+live)
+	export ELF_BASELINE_ELF_HEAD="$head"
+	run_baseline
+	;;
+backfill)
+	selected_projects="$(selected_projects_or_default)"
+	selected_profile="$(printenv ELF_BASELINE_PROFILE || true)"
+	if [ -z "$selected_profile" ]; then
+		selected_profile="backfill"
+	fi
+	backfill_docs="$(printenv ELF_BASELINE_BACKFILL_DOCS || true)"
+	if [ -z "$backfill_docs" ]; then
+		backfill_docs="2000"
+	fi
+	elf_timeout="$(printenv ELF_BASELINE_ELF_TIMEOUT_SECONDS || true)"
+	if [ -z "$elf_timeout" ]; then
+		elf_timeout="3600"
+	fi
+	max_elf_seconds="$(printenv ELF_BASELINE_MAX_ELF_SECONDS || true)"
+	if [ -z "$max_elf_seconds" ]; then
+		max_elf_seconds="3600"
+	fi
+	export ELF_BASELINE_ELF_HEAD="$head"
+	export ELF_BASELINE_PROJECTS="$selected_projects"
+	export ELF_BASELINE_PROFILE="$selected_profile"
+	export ELF_BASELINE_BACKFILL_DOCS="$backfill_docs"
+	export ELF_BASELINE_ELF_TIMEOUT_SECONDS="$elf_timeout"
+	export ELF_BASELINE_MAX_ELF_SECONDS="$max_elf_seconds"
+	run_baseline
+	;;
+openmemory-ui-export-readback)
+	export ELF_BASELINE_ELF_HEAD="$head"
+	export ELF_BASELINE_PROJECTS=mem0
+	run_baseline
+	;;
+production-synthetic)
+	selected_projects="$(selected_projects_or_default)"
+	export ELF_BASELINE_ELF_HEAD="$head"
+	export ELF_BASELINE_PROJECTS="$selected_projects"
+	export ELF_BASELINE_PROFILE=production-synthetic
+	run_baseline
+	;;
+production-private)
+	manifest="$(printenv ELF_BASELINE_PRODUCTION_CORPUS_MANIFEST || true)"
+	if [ -z "$manifest" ]; then
+		echo "ELF_BASELINE_PRODUCTION_CORPUS_MANIFEST is required for baseline-production-private" >&2
+		exit 1
+	fi
+	selected_projects="$(selected_projects_or_default)"
+	export ELF_BASELINE_ELF_HEAD="$head"
+	export ELF_BASELINE_PROJECTS="$selected_projects"
+	export ELF_BASELINE_PROFILE=production-private
+	run_baseline
+	;;
+production-private-addendum)
+	manifest="$(printenv ELF_BASELINE_PRODUCTION_CORPUS_MANIFEST || true)"
+	if [ -z "$manifest" ]; then
+		echo "ELF_BASELINE_PRODUCTION_CORPUS_MANIFEST is required for baseline-production-private-addendum" >&2
+		exit 1
+	fi
+	selected_projects="$(selected_projects_or_default)"
+	addendum="$(printenv ELF_BASELINE_PRIVATE_ADDENDUM || true)"
+	if [ -z "$addendum" ]; then
+		addendum="tmp/live-baseline/private-production-addendum.md"
+	fi
+	export ELF_BASELINE_ELF_HEAD="$head"
+	export ELF_BASELINE_PROJECTS="$selected_projects"
+	export ELF_BASELINE_PROFILE=production-private
+	run_baseline
+	ELF_BASELINE_MARKDOWN_REPORT="$addendum" bash scripts/live-baseline-report-to-md.sh
+	echo "Private production addendum: $addendum"
+	;;
+backfill-10k)
+	backfill_docs="$(printenv ELF_BASELINE_BACKFILL_DOCS || true)"
+	if [ -z "$backfill_docs" ]; then
+		backfill_docs="10000"
+	fi
+	elf_timeout="$(printenv ELF_BASELINE_ELF_TIMEOUT_SECONDS || true)"
+	if [ -z "$elf_timeout" ]; then
+		elf_timeout="14400"
+	fi
+	max_elf_seconds="$(printenv ELF_BASELINE_MAX_ELF_SECONDS || true)"
+	if [ -z "$max_elf_seconds" ]; then
+		max_elf_seconds="$elf_timeout"
+	fi
+	export ELF_BASELINE_ELF_HEAD="$head"
+	export ELF_BASELINE_PROJECTS=ELF
+	export ELF_BASELINE_PROFILE=backfill
+	export ELF_BASELINE_BACKFILL_DOCS="$backfill_docs"
+	export ELF_BASELINE_ELF_TIMEOUT_SECONDS="$elf_timeout"
+	export ELF_BASELINE_MAX_ELF_SECONDS="$max_elf_seconds"
+	run_baseline
+	;;
+backfill-100k)
+	enabled="$(printenv ELF_BASELINE_ENABLE_EXPENSIVE || true)"
+	if [ "$enabled" != "1" ]; then
+		echo "ELF_BASELINE_ENABLE_EXPENSIVE=1 is required for baseline-backfill-100k-docker" >&2
+		exit 1
+	fi
+	backfill_docs="$(printenv ELF_BASELINE_BACKFILL_DOCS || true)"
+	if [ -z "$backfill_docs" ]; then
+		backfill_docs="100000"
+	fi
+	elf_timeout="$(printenv ELF_BASELINE_ELF_TIMEOUT_SECONDS || true)"
+	if [ -z "$elf_timeout" ]; then
+		elf_timeout="86400"
+	fi
+	max_elf_seconds="$(printenv ELF_BASELINE_MAX_ELF_SECONDS || true)"
+	if [ -z "$max_elf_seconds" ]; then
+		max_elf_seconds="$elf_timeout"
+	fi
+	export ELF_BASELINE_ELF_HEAD="$head"
+	export ELF_BASELINE_PROJECTS=ELF
+	export ELF_BASELINE_PROFILE=backfill
+	export ELF_BASELINE_BACKFILL_DOCS="$backfill_docs"
+	export ELF_BASELINE_ELF_TIMEOUT_SECONDS="$elf_timeout"
+	export ELF_BASELINE_MAX_ELF_SECONDS="$max_elf_seconds"
+	run_baseline
+	;;
+soak)
+	soak_seconds="$(printenv ELF_BASELINE_SOAK_SECONDS || true)"
+	if [ -z "$soak_seconds" ]; then
+		soak_seconds="3600"
+	fi
+	elf_timeout="$(printenv ELF_BASELINE_ELF_TIMEOUT_SECONDS || true)"
+	if [ -z "$elf_timeout" ]; then
+		elf_timeout="$((soak_seconds + 1800))"
+	fi
+	max_elf_seconds="$(printenv ELF_BASELINE_MAX_ELF_SECONDS || true)"
+	if [ -z "$max_elf_seconds" ]; then
+		max_elf_seconds="$elf_timeout"
+	fi
+	export ELF_BASELINE_ELF_HEAD="$head"
+	export ELF_BASELINE_PROJECTS=ELF
+	export ELF_BASELINE_PROFILE=stress
+	export ELF_BASELINE_SOAK_SECONDS="$soak_seconds"
+	export ELF_BASELINE_ELF_TIMEOUT_SECONDS="$elf_timeout"
+	export ELF_BASELINE_MAX_ELF_SECONDS="$max_elf_seconds"
+	run_baseline
+	;;
+*)
+	echo "unknown baseline profile: $profile" >&2
+	exit 2
+	;;
+esac
diff --git a/scripts/check-docs.py b/scripts/check-docs.py
new file mode 100755
index 00000000..9f64d34e
--- /dev/null
+++ b/scripts/check-docs.py
@@ -0,0 +1,116 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import re
+import sys
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parents[1]
+TASK_RE = re.compile(r"^\[tasks\.([^\]]+)\]", re.MULTILINE)
+CARGO_MAKE_RE = re.compile(r"\bcargo\s+make\s+([A-Za-z0-9][A-Za-z0-9_:-]*)")
+MARKDOWN_LINK_RE = re.compile(r"!?\[[^\]\n]*\]\(([^)\n]+)\)")
+
+
+def read_text(path: Path) -> str:
+	return path.read_text(encoding="utf-8")
+
+
+def cargo_make_tasks() -> set[str]:
+	return set(TASK_RE.findall(read_text(ROOT / "Makefile.toml")))
+
+
+def iter_reference_files() -> list[Path]:
+	roots = [
+		ROOT / "README.md",
+		ROOT / "AGENTS.md",
+		ROOT / "docs",
+		ROOT / ".github" / "workflows",
+	]
+	files: list[Path] = []
+	for root in roots:
+		if root.is_file():
+			files.append(root)
+			continue
+		if root.is_dir():
+			files.extend(
+				path
+				for path in root.rglob("*")
+				if path.suffix in {".md", ".yml", ".yaml"}
+			)
+	return sorted(files)
+
+
+def iter_markdown_files() -> list[Path]:
+	return [
+		path
+		for path in iter_reference_files()
+		if path.suffix == ".md"
+	]
+
+
+def normalize_link_target(raw_target: str) -> str:
+	target = raw_target.strip()
+	if target.startswith("<") and ">" in target:
+		target = target[1:target.index(">")]
+	elif " " in target:
+		target = target.split(maxsplit=1)[0]
+	return target
+
+
+def is_external_or_anchor(target: str) -> bool:
+	return (
+		not target
+		or target.startswith("#")
+		or target.startswith("/")
+		or bool(re.match(r"^[A-Za-z][A-Za-z0-9+.-]*:", target))
+	)
+
+
+def check_cargo_make_references(tasks: set[str]) -> list[str]:
+	errors: list[str] = []
+	for path in iter_reference_files():
+		for line_number, line in enumerate(read_text(path).splitlines(), start=1):
+			for match in CARGO_MAKE_RE.finditer(line):
+				task = match.group(1)
+				if task not in tasks:
+					rel_path = path.relative_to(ROOT)
+					errors.append(f"{rel_path}:{line_number}: unknown cargo make task `{task}`")
+	return errors
+
+
+def check_markdown_links() -> list[str]:
+	errors: list[str] = []
+	for path in iter_markdown_files():
+		for line_number, line in enumerate(read_text(path).splitlines(), start=1):
+			for match in MARKDOWN_LINK_RE.finditer(line):
+				target = normalize_link_target(match.group(1))
+				if is_external_or_anchor(target):
+					continue
+				path_part = target.split("#", maxsplit=1)[0]
+				if not path_part:
+					continue
+				candidate = (
+					ROOT / path_part.removeprefix("/")
+					if path_part.startswith("/")
+					else path.parent / path_part
+				)
+				if not candidate.exists():
+					rel_path = path.relative_to(ROOT)
+					errors.append(f"{rel_path}:{line_number}: broken local link `{target}`")
+	return errors
+
+
+def main() -> int:
+	errors = check_cargo_make_references(cargo_make_tasks())
+	errors.extend(check_markdown_links())
+	if errors:
+		for error in errors:
+			print(error, file=sys.stderr)
+		return 1
+	print("check-docs passed")
+	return 0
+
+
+if __name__ == "__main__":
+	raise SystemExit(main())
diff --git a/scripts/graphify-docker-graph-report-smoke.py b/scripts/graphify-docker-graph-report-smoke.py
index 0035a1b9..c5ac0cfc 100755
--- a/scripts/graphify-docker-graph-report-smoke.py
+++ b/scripts/graphify-docker-graph-report-smoke.py
@@ -1209,13 +1209,13 @@ def write_manifest(status: StatusState) -> dict[str, Any]:
                 "setup": {
                     "status": status.setup,
                     "evidence": "The smoke installs graphify in a container-local Python venv and runs with isolated assistant config paths.",
-                    "command": "cargo make graphify-docker-graph-report-smoke",
+                    "command": "cargo make smoke-graphify-docker-graph-report",
                     "artifact": rel(OUT),
                 },
                 "run": {
                     "status": status.run,
                     "evidence": "The live path builds graphify graph/report artifacts from a generated public corpus and runs graphify query over graph.json.",
-                    "command": "cargo make graphify-docker-graph-report-smoke",
+                    "command": "cargo make smoke-graphify-docker-graph-report",
                     "artifact": rel(OUT),
                 },
                 "result": {
@@ -1298,11 +1298,11 @@ def write_manifest(status: StatusState) -> dict[str, Any]:
                             "evidence": "Official package referenced by the graphify README.",
                         },
                     ],
-                    "setup_path": "Run cargo make graphify-docker-graph-report-smoke to install graphify in a container-local venv and build graph/report artifacts over generated public files.",
+                    "setup_path": "Run cargo make smoke-graphify-docker-graph-report to install graphify in a container-local venv and build graph/report artifacts over generated public files.",
                     "runtime_boundary": "docker-compose.baseline.yml baseline-runner, isolated HOME/config paths, generated corpus, and artifacts under tmp/real-world-memory/graphify-smoke.",
                     "resource_expectation": f"graphify package {GRAPHIFY_REF}, generated_files=4, timeout_seconds={TIMEOUT_SECONDS}, query_budget={QUERY_BUDGET}.",
                     "retry_guidance": [
-                        "Rerun cargo make graphify-docker-graph-report-smoke after dependency or runtime fixes.",
+                        "Rerun cargo make smoke-graphify-docker-graph-report after dependency or runtime fixes.",
                         "Do not use graphify install hooks, host-global Codex/Claude/Gemini config, or private corpora as proof.",
                         "Score only when graph.json, GRAPH_REPORT.md, and graphify query output map to generated evidence ids.",
                     ],
@@ -1404,7 +1404,7 @@ def main() -> int:
         status.result = "incomplete"
         status.overall = "incomplete"
         status.failure_class = "not_running_in_docker"
-        status.failure_reason = "graphify smoke must run inside Docker; use cargo make graphify-docker-graph-report-smoke."
+        status.failure_reason = "graphify smoke must run inside Docker; use cargo make smoke-graphify-docker-graph-report."
     elif not command_available("python3"):
         status.setup = "incomplete"
         status.result = "incomplete"
diff --git a/scripts/graphiti-zep-docker-temporal-smoke.py b/scripts/graphiti-zep-docker-temporal-smoke.py
index 5ba1cc34..065bb78c 100644
--- a/scripts/graphiti-zep-docker-temporal-smoke.py
+++ b/scripts/graphiti-zep-docker-temporal-smoke.py
@@ -1003,13 +1003,13 @@ def write_manifest(status: StatusState) -> dict[str, Any]:
                 "setup": {
                     "status": status.setup,
                     "evidence": "The smoke runs inside the baseline Docker runner and uses Docker-local FalkorDB plus a container-local Python venv.",
-                    "command": "cargo make graphiti-zep-docker-temporal-smoke",
+                    "command": "cargo make smoke-graphiti-zep-docker-temporal",
                     "artifact": rel(OUT),
                 },
                 "run": {
                     "status": status.run,
                     "evidence": "The live path adds generated temporal fact triples and searches Graphiti/Zep for UUID, fact, valid_at, invalid_at, and source node evidence.",
-                    "command": "ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make graphiti-zep-docker-temporal-smoke",
+                    "command": "ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make smoke-graphiti-zep-docker-temporal",
                     "artifact": rel(OUT),
                 },
                 "result": {
@@ -1101,7 +1101,7 @@ def write_manifest(status: StatusState) -> dict[str, Any]:
                             "evidence": "Official manual fact-triple ingest contract.",
                         },
                     ],
-                    "setup_path": "Run cargo make graphiti-zep-docker-temporal-smoke for a typed artifact; set ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 with explicit provider configuration for a live attempt.",
+                    "setup_path": "Run cargo make smoke-graphiti-zep-docker-temporal for a typed artifact; set ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 with explicit provider configuration for a live attempt.",
                     "runtime_boundary": "docker-compose.baseline.yml baseline-runner plus graphiti-zep FalkorDB profile, container-local Python venv, generated public temporal facts, and report artifacts under tmp/real-world-memory/graphiti-zep-smoke.",
                     "resource_expectation": f"Graphiti package {GRAPHITI_REF}, fact_count=3, timeout_seconds={TIMEOUT_SECONDS}, FalkorDB host={FALKORDB_HOST}:{FALKORDB_PORT}.",
                     "retry_guidance": [
@@ -1185,7 +1185,7 @@ def main() -> int:
         status.result = "incomplete"
         status.overall = "incomplete"
         status.failure_class = "not_running_in_docker"
-        status.failure_reason = "Graphiti/Zep smoke must run inside Docker; use cargo make graphiti-zep-docker-temporal-smoke."
+        status.failure_reason = "Graphiti/Zep smoke must run inside Docker; use cargo make smoke-graphiti-zep-docker-temporal."
         mapping["status"] = status.result
         mapping["reason"] = status.failure_reason
     elif not command_available("python3"):
diff --git a/scripts/graphrag-docker-smoke.py b/scripts/graphrag-docker-smoke.py
index 02be1560..c6b01d45 100755
--- a/scripts/graphrag-docker-smoke.py
+++ b/scripts/graphrag-docker-smoke.py
@@ -1186,13 +1186,13 @@ def write_manifest(status: StatusState) -> dict[str, Any]:
                 "setup": {
                     "status": status.setup,
                     "evidence": "The smoke runs inside the baseline Docker runner and installs or invokes GraphRAG only in the container-local work directory.",
-                    "command": "cargo make graphrag-docker-smoke",
+                    "command": "cargo make smoke-graphrag-docker",
                     "artifact": rel(OUT),
                 },
                 "run": {
                     "status": status.run,
                     "evidence": "The live path generates a tiny public corpus, initializes GraphRAG, indexes with bounded inputs, and runs local search when provider config is supplied.",
-                    "command": "ELF_GRAPHRAG_SMOKE_RUN=1 cargo make graphrag-docker-smoke",
+                    "command": "ELF_GRAPHRAG_SMOKE_RUN=1 cargo make smoke-graphrag-docker",
                     "artifact": rel(OUT),
                 },
                 "result": {
@@ -1286,7 +1286,7 @@ def write_manifest(status: StatusState) -> dict[str, Any]:
                             "evidence": "Official local-search context and graph traversal reference.",
                         },
                     ],
-                    "setup_path": "Run cargo make graphrag-docker-smoke for a typed artifact; set ELF_GRAPHRAG_SMOKE_RUN=1 with explicit provider configuration for a live index/query attempt.",
+                    "setup_path": "Run cargo make smoke-graphrag-docker for a typed artifact; set ELF_GRAPHRAG_SMOKE_RUN=1 with explicit provider configuration for a live index/query attempt.",
                     "runtime_boundary": "docker-compose.baseline.yml baseline-runner, container-local Python venv, generated public corpus, and report artifacts under tmp/real-world-memory/graphrag-smoke.",
                     "resource_expectation": f"GraphRAG package {GRAPH_RAG_REF}, max_docs={MAX_DOCS}, max_input_chars={MAX_INPUT_CHARS}, timeout_seconds={TIMEOUT_SECONDS}, index_method={INDEX_METHOD}.",
                     "retry_guidance": [
@@ -1378,7 +1378,7 @@ def main() -> int:
         status.result = "incomplete"
         status.overall = "incomplete"
         status.failure_class = "not_running_in_docker"
-        status.failure_reason = "GraphRAG smoke must run inside Docker; use cargo make graphrag-docker-smoke."
+        status.failure_reason = "GraphRAG smoke must run inside Docker; use cargo make smoke-graphrag-docker."
     elif not command_available("python3"):
         status.setup = "incomplete"
         status.result = "incomplete"
diff --git a/scripts/lightrag-docker-context-smoke.sh b/scripts/lightrag-docker-context-smoke.sh
index 6e4d302e..a643d286 100644
--- a/scripts/lightrag-docker-context-smoke.sh
+++ b/scripts/lightrag-docker-context-smoke.sh
@@ -14,7 +14,7 @@ INDEX_ATTEMPTS="${ELF_LIGHTRAG_INDEX_ATTEMPTS:-60}"
 INDEX_INTERVAL_SECONDS="${ELF_LIGHTRAG_INDEX_INTERVAL_SECONDS:-2}"
 
 if [[ ! -f "/.dockerenv" && "${ELF_LIGHTRAG_CONTEXT_ALLOW_HOST:-0}" != "1" ]]; then
-  echo "Refusing to run LightRAG context smoke outside Docker. Use cargo make lightrag-docker-context-smoke." >&2
+  echo "Refusing to run LightRAG context smoke outside Docker. Use cargo make smoke-lightrag-docker-context." >&2
   exit 1
 fi
 
diff --git a/scripts/parity-docker-gate.sh b/scripts/parity-docker-gate.sh
index 99cd5aaf..62fa0ec1 100755
--- a/scripts/parity-docker-gate.sh
+++ b/scripts/parity-docker-gate.sh
@@ -151,7 +151,7 @@ write_report() {
         },
         cleanup: {
           status: "documented",
-          command: "cargo make parity-docker-clean"
+          command: "cargo make clean-parity-docker"
         }
       },
       thresholds: {
diff --git a/scripts/ragflow-docker-evidence-smoke.sh b/scripts/ragflow-docker-evidence-smoke.sh
index 95cd50f5..17dd572f 100755
--- a/scripts/ragflow-docker-evidence-smoke.sh
+++ b/scripts/ragflow-docker-evidence-smoke.sh
@@ -687,8 +687,8 @@ write_artifact() {
 			},
 			setup: {
 				status: $setup_status,
-				command: "cargo make ragflow-docker-smoke",
-				live_command: "ELF_RAGFLOW_SMOKE_START=1 ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE=1 cargo make ragflow-docker-smoke",
+				command: "cargo make smoke-ragflow-docker",
+				live_command: "ELF_RAGFLOW_SMOKE_START=1 ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE=1 cargo make smoke-ragflow-docker",
 				started: ($started == "true"),
 				startup_time_ms: (if $startup_time_ms == "" then null else ($startup_time_ms | tonumber) end),
 				vm_max_map_count: {
@@ -847,13 +847,13 @@ write_manifest() {
 					setup: {
 						status: $setup_status,
 						evidence: "Official RAGFlow Docker Compose boundary and resource envelope were evaluated for the tiny evidence smoke.",
-						command: "cargo make ragflow-docker-smoke",
+						command: "cargo make smoke-ragflow-docker",
 						artifact: $out_rel
 					},
 					run: {
 						status: $run_status,
 						evidence: "The smoke attempts dataset creation, empty-document corpus ingest, chunk insert, retrieval query, and reference chunk extraction.",
-						command: "ELF_RAGFLOW_SMOKE_START=1 ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE=1 cargo make ragflow-docker-smoke",
+						command: "ELF_RAGFLOW_SMOKE_START=1 ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE=1 cargo make smoke-ragflow-docker",
 						artifact: $out_rel
 					},
 					result: {
diff --git a/scripts/real-world-docker.sh b/scripts/real-world-docker.sh
new file mode 100755
index 00000000..a6413839
--- /dev/null
+++ b/scripts/real-world-docker.sh
@@ -0,0 +1,118 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+profile="${1:-}"
+if [ -z "$profile" ]; then
+	echo "usage: scripts/real-world-docker.sh <profile>" >&2
+	exit 2
+fi
+
+case "$profile" in
+job-operator-ux-live-adapters)
+	docker compose -f docker-compose.baseline.yml run --build --rm \
+		-e ELF_OPERATOR_DEBUG_LIVE_REPORT_DIR \
+		-e ELF_OPERATOR_DEBUG_LIVE_FIXTURES \
+		-e ELF_OPERATOR_DEBUG_LIVE_WORK_DIR \
+		-e ELF_OPERATOR_DEBUG_QMD_DIR \
+		baseline-runner bash scripts/real-world-operator-debug-live-adapters.sh
+	;;
+memory-live-consolidation)
+	docker compose -f docker-compose.baseline.yml run --build --rm \
+		-e ELF_CONSOLIDATION_LIVE_REPORT_DIR \
+		-e ELF_CONSOLIDATION_LIVE_FIXTURES \
+		baseline-runner bash scripts/real-world-consolidation-live-adapter.sh
+	;;
+memory-live-adapters)
+	lightrag_start="$(printenv ELF_LIGHTRAG_CONTEXT_START || true)"
+	graphiti_start="$(printenv ELF_GRAPHITI_ZEP_SMOKE_START || true)"
+	status=0
+	if [ "$lightrag_start" = "1" ]; then
+		docker compose -f docker-compose.baseline.yml --profile lightrag up -d lightrag
+	fi
+	if [ "$graphiti_start" = "1" ]; then
+		docker compose -f docker-compose.baseline.yml --profile graphiti-zep up -d graphiti-falkordb
+	fi
+	docker compose -f docker-compose.baseline.yml run --build --rm \
+		-e ELF_REAL_WORLD_LIVE_ENABLE_RAGFLOW \
+		-e ELF_REAL_WORLD_LIVE_ENABLE_LIGHTRAG \
+		-e ELF_REAL_WORLD_LIVE_ENABLE_GRAPHRAG \
+		-e ELF_REAL_WORLD_LIVE_ENABLE_GRAPHITI_ZEP \
+		-e ELF_REAL_WORLD_LIVE_ENABLE_GRAPHIFY \
+		-e ELF_RAGFLOW_SMOKE_START \
+		-e ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE \
+		-e ELF_RAGFLOW_SMOKE_ALLOW_ARM \
+		-e ELF_RAGFLOW_SMOKE_PULL_IMAGE \
+		-e ELF_RAGFLOW_SMOKE_CLEANUP \
+		-e ELF_RAGFLOW_SMOKE_DEVICE \
+		-e ELF_RAGFLOW_API_PORT \
+		-e ELF_RAGFLOW_API_BASE \
+		-e ELF_RAGFLOW_API_KEY \
+		-e RAGFLOW_API_KEY \
+		-e ELF_RAGFLOW_SMOKE_STARTUP_ATTEMPTS \
+		-e ELF_RAGFLOW_SMOKE_STARTUP_INTERVAL_SECONDS \
+		-e ELF_RAGFLOW_SMOKE_COMPOSE_TIMEOUT_SECONDS \
+		-e ELF_RAGFLOW_REPO_URL \
+		-e ELF_RAGFLOW_REF \
+		-e ELF_RAGFLOW_IMAGE \
+		-e ELF_RAGFLOW_COMPOSE_PROJECT \
+		-e ELF_LIGHTRAG_CONTEXT_START \
+		-e ELF_LIGHTRAG_API_BASE \
+		-e ELF_LIGHTRAG_ADAPTER_ID \
+		-e ELF_LIGHTRAG_ADAPTER_NAME \
+		-e ELF_LIGHTRAG_STARTUP_ATTEMPTS \
+		-e ELF_LIGHTRAG_STARTUP_INTERVAL_SECONDS \
+		-e ELF_LIGHTRAG_INDEX_ATTEMPTS \
+		-e ELF_LIGHTRAG_INDEX_INTERVAL_SECONDS \
+		-e ELF_GRAPHRAG_SMOKE_RUN \
+		-e ELF_GRAPHRAG_SMOKE_WORK_DIR \
+		-e ELF_GRAPHRAG_SMOKE_INSTALL \
+		-e ELF_GRAPHRAG_VERSION \
+		-e ELF_GRAPHRAG_PACKAGE \
+		-e ELF_GRAPHRAG_REF \
+		-e ELF_GRAPHRAG_CHAT_MODEL \
+		-e ELF_GRAPHRAG_EMBEDDING_MODEL \
+		-e ELF_GRAPHRAG_API_BASE \
+		-e ELF_GRAPHRAG_API_KEY \
+		-e ELF_GRAPHRAG_INDEX_METHOD \
+		-e ELF_GRAPHRAG_QUERY_METHOD \
+		-e ELF_GRAPHRAG_TIMEOUT_SECONDS \
+		-e ELF_GRAPHRAG_MAX_DOCS \
+		-e ELF_GRAPHRAG_MAX_INPUT_CHARS \
+		-e ELF_GRAPHITI_ZEP_SMOKE_START \
+		-e ELF_GRAPHITI_ZEP_SMOKE_RUN \
+		-e ELF_GRAPHITI_ZEP_SMOKE_WORK_DIR \
+		-e ELF_GRAPHITI_ZEP_SMOKE_INSTALL \
+		-e ELF_GRAPHITI_ZEP_VERSION \
+		-e ELF_GRAPHITI_ZEP_PACKAGE \
+		-e ELF_GRAPHITI_ZEP_REF \
+		-e ELF_GRAPHITI_ZEP_API_BASE \
+		-e ELF_GRAPHITI_ZEP_API_KEY \
+		-e ELF_GRAPHITI_ZEP_LLM_MODEL \
+		-e ELF_GRAPHITI_ZEP_EMBEDDING_MODEL \
+		-e ELF_GRAPHITI_ZEP_FALKORDB_HOST \
+		-e ELF_GRAPHITI_ZEP_FALKORDB_PORT \
+		-e ELF_GRAPHITI_ZEP_FALKORDB_DATABASE \
+		-e ELF_GRAPHITI_ZEP_TIMEOUT_SECONDS \
+		-e ELF_GRAPHITI_ZEP_STARTUP_ATTEMPTS \
+		-e ELF_GRAPHITI_ZEP_STARTUP_INTERVAL_SECONDS \
+		-e ELF_GRAPHIFY_SMOKE_RUN \
+		-e ELF_GRAPHIFY_SMOKE_WORK_DIR \
+		-e ELF_GRAPHIFY_SMOKE_INSTALL \
+		-e ELF_GRAPHIFY_PACKAGE \
+		-e ELF_GRAPHIFY_REF \
+		-e ELF_GRAPHIFY_TIMEOUT_SECONDS \
+		-e ELF_GRAPHIFY_QUERY_BUDGET \
+		baseline-runner bash scripts/real-world-live-adapters.sh || status=$?
+	if [ "$lightrag_start" = "1" ]; then
+		docker compose -f docker-compose.baseline.yml --profile lightrag stop lightrag lightrag-mock-provider >/dev/null 2>&1 || true
+	fi
+	if [ "$graphiti_start" = "1" ]; then
+		docker compose -f docker-compose.baseline.yml --profile graphiti-zep stop graphiti-falkordb >/dev/null 2>&1 || true
+	fi
+	exit "$status"
+	;;
+*)
+	echo "unknown real-world Docker profile: $profile" >&2
+	exit 2
+	;;
+esac
diff --git a/scripts/smoke-docker.sh b/scripts/smoke-docker.sh
new file mode 100755
index 00000000..6aa816a8
--- /dev/null
+++ b/scripts/smoke-docker.sh
@@ -0,0 +1,90 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+smoke="${1:-}"
+if [ -z "$smoke" ]; then
+	echo "usage: scripts/smoke-docker.sh <smoke>" >&2
+	exit 2
+fi
+
+case "$smoke" in
+graphify-docker-graph-report)
+	docker compose -f docker-compose.baseline.yml run --build --rm \
+		-e ELF_GRAPHIFY_SMOKE_RUN \
+		-e ELF_GRAPHIFY_SMOKE_REPORT_DIR \
+		-e ELF_GRAPHIFY_SMOKE_WORK_DIR \
+		-e ELF_GRAPHIFY_SMOKE_INSTALL \
+		-e ELF_GRAPHIFY_PACKAGE \
+		-e ELF_GRAPHIFY_REF \
+		-e ELF_GRAPHIFY_TIMEOUT_SECONDS \
+		-e ELF_GRAPHIFY_QUERY_BUDGET \
+		baseline-runner python3 scripts/graphify-docker-graph-report-smoke.py
+	;;
+graphiti-zep-docker-temporal)
+	start="$(printenv ELF_GRAPHITI_ZEP_SMOKE_START || true)"
+	status=0
+	if [ "$start" = "1" ]; then
+		docker compose -f docker-compose.baseline.yml --profile graphiti-zep up -d graphiti-falkordb
+	fi
+	docker compose -f docker-compose.baseline.yml run --build --rm \
+		-e ELF_GRAPHITI_ZEP_SMOKE_RUN \
+		-e ELF_GRAPHITI_ZEP_SMOKE_REPORT_DIR \
+		-e ELF_GRAPHITI_ZEP_SMOKE_WORK_DIR \
+		-e ELF_GRAPHITI_ZEP_SMOKE_INSTALL \
+		-e ELF_GRAPHITI_ZEP_VERSION \
+		-e ELF_GRAPHITI_ZEP_PACKAGE \
+		-e ELF_GRAPHITI_ZEP_REF \
+		-e ELF_GRAPHITI_ZEP_API_BASE \
+		-e ELF_GRAPHITI_ZEP_API_KEY \
+		-e ELF_GRAPHITI_ZEP_LLM_MODEL \
+		-e ELF_GRAPHITI_ZEP_EMBEDDING_MODEL \
+		-e ELF_GRAPHITI_ZEP_FALKORDB_HOST \
+		-e ELF_GRAPHITI_ZEP_FALKORDB_PORT \
+		-e ELF_GRAPHITI_ZEP_FALKORDB_DATABASE \
+		-e ELF_GRAPHITI_ZEP_TIMEOUT_SECONDS \
+		-e ELF_GRAPHITI_ZEP_STARTUP_ATTEMPTS \
+		-e ELF_GRAPHITI_ZEP_STARTUP_INTERVAL_SECONDS \
+		baseline-runner python3 scripts/graphiti-zep-docker-temporal-smoke.py || status=$?
+	if [ "$start" = "1" ]; then
+		docker compose -f docker-compose.baseline.yml --profile graphiti-zep stop graphiti-falkordb >/dev/null 2>&1 || true
+	fi
+	exit "$status"
+	;;
+graphrag-docker)
+	docker compose -f docker-compose.baseline.yml run --build --rm \
+		-e ELF_GRAPHRAG_SMOKE_RUN \
+		-e ELF_GRAPHRAG_SMOKE_REPORT_DIR \
+		-e ELF_GRAPHRAG_SMOKE_WORK_DIR \
+		-e ELF_GRAPHRAG_SMOKE_INSTALL \
+		-e ELF_GRAPHRAG_VERSION \
+		-e ELF_GRAPHRAG_PACKAGE \
+		-e ELF_GRAPHRAG_REF \
+		-e ELF_GRAPHRAG_CHAT_MODEL \
+		-e ELF_GRAPHRAG_EMBEDDING_MODEL \
+		-e ELF_GRAPHRAG_API_BASE \
+		-e ELF_GRAPHRAG_API_KEY \
+		-e ELF_GRAPHRAG_INDEX_METHOD \
+		-e ELF_GRAPHRAG_QUERY_METHOD \
+		-e ELF_GRAPHRAG_TIMEOUT_SECONDS \
+		-e ELF_GRAPHRAG_MAX_DOCS \
+		-e ELF_GRAPHRAG_MAX_INPUT_CHARS \
+		baseline-runner python3 scripts/graphrag-docker-smoke.py
+	;;
+lightrag-docker-context)
+	start="$(printenv ELF_LIGHTRAG_CONTEXT_START || true)"
+	status=0
+	if [ "$start" = "1" ]; then
+		docker compose -f docker-compose.baseline.yml --profile lightrag up -d lightrag
+	fi
+	docker compose -f docker-compose.baseline.yml run --build --rm \
+		baseline-runner bash scripts/lightrag-docker-context-smoke.sh || status=$?
+	if [ "$start" = "1" ]; then
+		docker compose -f docker-compose.baseline.yml --profile lightrag stop lightrag lightrag-mock-provider >/dev/null 2>&1 || true
+	fi
+	exit "$status"
+	;;
+*)
+	echo "unknown smoke: $smoke" >&2
+	exit 2
+	;;
+esac
diff --git a/scripts/trace-gate.sh b/scripts/trace-gate.sh
new file mode 100755
index 00000000..5cbdd52e
--- /dev/null
+++ b/scripts/trace-gate.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+DSN="${TRACE_GATE_PG_DSN:-${PG_DSN:-postgres://postgres:postgres@127.0.0.1:5432/elf}}"
+VECTOR_DIM="${TRACE_GATE_VECTOR_DIM:-4}"
+SCHEMA_PATH="tmp/trace_gate.schema.sql"
+REPORT_PATH="${TRACE_GATE_REPORT_PATH:-tmp/trace_gate.report.json}"
+
+mkdir -p tmp
+
+TRACE_GATE_VECTOR_DIM="${VECTOR_DIM}" python3 - <<'PY' > "${SCHEMA_PATH}"
+import os
+from pathlib import Path
+
+vector_dim = int(os.environ["TRACE_GATE_VECTOR_DIM"])
+root = Path(".")
+sql_dir = root / "sql"
+
+out = []
+for raw_line in (sql_dir / "init.sql").read_text(encoding="utf-8").splitlines():
+	line = raw_line.strip()
+	if line.startswith(r"\ir "):
+		rel = line[len(r"\ir ") :].strip()
+		out.append((sql_dir / rel).read_text(encoding="utf-8"))
+	else:
+		out.append(raw_line)
+
+expanded = "\n".join(out) + "\n"
+print(expanded.replace("<VECTOR_DIM>", str(vector_dim)), end="")
+PY
+
+psql "${DSN}" -v ON_ERROR_STOP=1 -f "${SCHEMA_PATH}"
+psql "${DSN}" -v ON_ERROR_STOP=1 -f .github/fixtures/trace_gate/fixture.sql
+cargo run -p elf-eval --bin trace_regression_gate -- \
+	--config .github/fixtures/trace_gate/config.toml \
+	--gate .github/fixtures/trace_gate/gate.json \
+	--out "${REPORT_PATH}"