From c912ce4f45527a44d6f3f1f22aff8193e86776c5 Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Wed, 10 Jun 2026 19:12:29 +0800
Subject: [PATCH 1/2] {"schema":"decodex/commit/1","summary":"Implement
 GraphRAG cost-bounded Docker adapter","authority":"XY-887"}

---
 Makefile.toml                                 |    9 +
 .../memory_projects_manifest.json             |   70 +-
 .../tests/real_world_job_benchmark.rs         |    9 +-
 scripts/graphrag-docker-smoke.py              | 1339 +++++++++++++++++
 scripts/real-world-live-adapters.sh           |   29 +
 5 files changed, 1438 insertions(+), 18 deletions(-)
 create mode 100755 scripts/graphrag-docker-smoke.py

diff --git a/Makefile.toml b/Makefile.toml
index 27f5c6c5..be3c2e41 100644
--- a/Makefile.toml
+++ b/Makefile.toml
@@ -823,6 +823,7 @@ args = [
 # | real-world-memory-knowledge-report | command   | |
 # | ragflow-docker-smoke               | command   | |
 # | lightrag-docker-context-smoke      | command   | |
+# | graphrag-docker-smoke              | command   | |
 
 [tasks.ragflow-docker-smoke]
 workspace = false
@@ -839,6 +840,14 @@ args = [
 	"set -euo pipefail; start=\"$(printenv ELF_LIGHTRAG_CONTEXT_START || true)\"; status=0; if [ \"$start\" = \"1\" ]; then docker compose -f docker-compose.baseline.yml --profile lightrag up -d lightrag; fi; docker compose -f docker-compose.baseline.yml run --build --rm baseline-runner bash scripts/lightrag-docker-context-smoke.sh || status=$?; if [ \"$start\" = \"1\" ]; then docker compose -f docker-compose.baseline.yml --profile lightrag stop lightrag lightrag-mock-provider >/dev/null 2>&1 || true; fi; exit \"$status\"",
 ]
 
+[tasks.graphrag-docker-smoke]
+workspace = false
+command = "bash"
+args = [
+	"-lc",
+	"set -euo pipefail; docker compose -f docker-compose.baseline.yml run --build --rm -e ELF_GRAPHRAG_SMOKE_RUN -e ELF_GRAPHRAG_SMOKE_REPORT_DIR -e ELF_GRAPHRAG_SMOKE_WORK_DIR -e ELF_GRAPHRAG_SMOKE_INSTALL -e ELF_GRAPHRAG_VERSION -e ELF_GRAPHRAG_PACKAGE -e ELF_GRAPHRAG_REF -e ELF_GRAPHRAG_CHAT_MODEL -e ELF_GRAPHRAG_EMBEDDING_MODEL -e ELF_GRAPHRAG_API_BASE -e ELF_GRAPHRAG_API_KEY -e ELF_GRAPHRAG_INDEX_METHOD -e ELF_GRAPHRAG_QUERY_METHOD -e ELF_GRAPHRAG_TIMEOUT_SECONDS -e ELF_GRAPHRAG_MAX_DOCS -e ELF_GRAPHRAG_MAX_INPUT_CHARS baseline-runner python3 scripts/graphrag-docker-smoke.py",
+]
+
 [tasks.real-world-memory-knowledge]
 workspace = false
 dependencies = [
diff --git a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
index 07c16306..66627424 100644
--- a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
+++ b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
@@ -1288,48 +1288,63 @@
       "overall_status": "blocked",
       "setup": {
         "status": "blocked",
-        "evidence": "XY-882 marks GraphRAG as an adapter_candidate, but indexing cost and source mapping still need a cost-bounded Docker implementation before live scoring."
+        "evidence": "XY-887 adds a Docker-safe generated-corpus GraphRAG smoke command. The checked-in manifest remains a research gate until a generated artifact reaches GraphRAG parquet output.",
+        "command": "cargo make graphrag-docker-smoke",
+        "artifact": "tmp/real-world-memory/graphrag-smoke/graphrag-smoke.json"
       },
       "run": {
-        "status": "not_encoded",
-        "evidence": "No GraphRAG real_world_job adapter is encoded."
+        "status": "blocked",
+        "evidence": "The default smoke records a typed blocked artifact without model calls; set ELF_GRAPHRAG_SMOKE_RUN=1 with explicit provider configuration to attempt live GraphRAG index/query.",
+        "command": "ELF_GRAPHRAG_SMOKE_RUN=1 cargo make graphrag-docker-smoke",
+        "artifact": "tmp/real-world-memory/graphrag-smoke/summary.json"
       },
       "result": {
         "status": "blocked",
-        "evidence": "No graph-navigation or knowledge-synthesis result is claimed from docs-only research."
+        "evidence": "No graph-navigation or knowledge-synthesis result is claimed from the checked-in research gate. Generated smoke artifacts may become live_real_world only after GraphRAG output tables map to generated evidence ids.",
+        "artifact": "tmp/real-world-memory/graphrag-smoke/memory_projects_manifest.graphrag-smoke.json"
       },
       "capabilities": [
         {
           "capability": "indexing_resource_envelope",
           "status": "blocked",
-          "evidence": "XY-882 requires the first adapter to start with a tiny corpus and record indexing cost before any scale or quality claim."
+          "evidence": "The smoke bounds the generated public corpus, timeout, GraphRAG package, model configuration, cache size, output size, elapsed time, and observed cache entries."
         },
         {
           "capability": "source_citation_mapping",
           "status": "blocked",
-          "evidence": "The adapter must map graph summaries and query output back to benchmark evidence IDs."
+          "evidence": "The generated artifact maps GraphRAG documents, text_units, communities, community_reports, entities, and relationships parquet rows back to real_world_job evidence ids when available."
         },
         {
           "capability": "real_world_job_adapter",
+          "status": "blocked",
+          "evidence": "The smoke writes a generated real_world_job fixture for the tiny corpus, but the checked-in record stays blocked until live GraphRAG output maps to expected evidence ids."
+        },
+        {
+          "capability": "quality_or_scale_claim",
           "status": "not_encoded",
-          "evidence": "No GraphRAG materializer or scorer mapping exists."
+          "evidence": "The smoke does not claim broad graph-navigation quality, knowledge-synthesis quality, private corpora, or large-corpus indexing."
         }
       ],
       "suites": [
         {
           "suite_id": "knowledge_compilation",
           "status": "blocked",
-          "evidence": "Community summaries and graph reports need source coverage checks before scoring."
+          "evidence": "The generated smoke can exercise parquet table source coverage for one tiny knowledge-compilation fixture, but the checked-in record stays blocked until live output exists."
         },
         {
           "suite_id": "retrieval",
-          "status": "blocked",
-          "evidence": "Query output and expected-evidence mapping are not researched."
+          "status": "not_encoded",
+          "evidence": "The smoke may run local search for reachability, but retrieval quality scoring is not encoded."
         },
         {
           "suite_id": "production_ops",
-          "status": "blocked",
-          "evidence": "Indexing resource envelope is not established."
+          "status": "not_encoded",
+          "evidence": "Resource bounds are recorded, but no production-ops suite scoring is encoded."
+        },
+        {
+          "suite_id": "memory_evolution",
+          "status": "not_encoded",
+          "evidence": "GraphRAG update/delete/current-versus-historical behavior is not encoded by the smoke."
         }
       ],
       "evidence": [
@@ -1342,6 +1357,16 @@
           "kind": "source",
           "ref": "https://microsoft.github.io/graphrag/",
           "status": "real"
+        },
+        {
+          "kind": "command",
+          "ref": "cargo make graphrag-docker-smoke",
+          "status": "blocked"
+        },
+        {
+          "kind": "artifact",
+          "ref": "tmp/real-world-memory/graphrag-smoke/graphrag-smoke.json",
+          "status": "blocked"
         }
       ],
       "execution_metadata": {
@@ -1356,20 +1381,31 @@
             "url": "https://microsoft.github.io/graphrag/",
             "evidence": "Official documentation for indexing and querying."
           },
+          {
+            "label": "GraphRAG input docs",
+            "url": "https://microsoft.github.io/graphrag/index/inputs/",
+            "evidence": "Official input format and document metadata reference."
+          },
           {
             "label": "GraphRAG output tables",
             "url": "https://microsoft.github.io/graphrag/index/outputs/",
             "evidence": "Official output schema with document, text unit, community, and relationship identifiers."
+          },
+          {
+            "label": "GraphRAG local search docs",
+            "url": "https://microsoft.github.io/graphrag/query/local_search/",
+            "evidence": "Official local-search context and graph traversal reference."
           }
         ],
-        "setup_path": "Implement a tiny CLI/API index/query path with explicit model configuration and source mapping from parquet output tables.",
-        "runtime_boundary": "Docker-only Python CLI run with generated corpus and container-local artifacts.",
-        "resource_expectation": "Indexing may be expensive; record model calls, cache size, elapsed time, and maximum corpus size used.",
+        "setup_path": "Run cargo make graphrag-docker-smoke for a typed preflight artifact; set ELF_GRAPHRAG_SMOKE_RUN=1 with explicit provider configuration for a live GraphRAG index/query attempt.",
+        "runtime_boundary": "docker-compose.baseline.yml baseline-runner, container-local Python venv, generated public corpus, and report artifacts under tmp/real-world-memory/graphrag-smoke.",
+        "resource_expectation": "The default profile uses a generated public corpus capped by ELF_GRAPHRAG_MAX_DOCS and ELF_GRAPHRAG_MAX_INPUT_CHARS, pins GraphRAG through ELF_GRAPHRAG_PACKAGE, and records elapsed time, cache size, output size, and observed cache entries.",
         "retry_guidance": [
-          "Add a cost-bounded smoke before any scale or quality claim.",
+          "Run cargo make graphrag-docker-smoke first; missing provider configuration must remain a typed blocked artifact, not a pass claim.",
+          "Enable ELF_GRAPHRAG_SMOKE_RUN=1 only for generated public corpus indexing with explicit provider configuration.",
           "Fail typed if source document or text_unit identifiers cannot be mapped to expected evidence IDs."
         ],
-        "research_depth": "D2 feasibility verdict: adapter_candidate (XY-882); research_gate only, adapter not encoded"
+        "research_depth": "D2 feasibility plus XY-887 Docker smoke implementation; checked-in record remains research_gate unless a generated artifact reaches GraphRAG output"
       },
       "follow_up": {
         "title": "[ELF benchmark adapter] Implement GraphRAG cost-bounded Docker adapter",
diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
index 1ac9bfd2..d3a62b17 100644
--- a/apps/elf-eval/tests/real_world_job_benchmark.rs
+++ b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -281,7 +281,7 @@ fn assert_external_adapter_manifest_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/suite_status_counts/blocked")
 			.and_then(Value::as_u64),
-		Some(10)
+		Some(8)
 	);
 }
 
@@ -295,6 +295,7 @@ fn assert_external_adapter_manifest_records(report: &Value) -> Result<()> {
 	let openviking = find_by_field(adapters, "/adapter_id", "openviking_live_baseline")?;
 	let ragflow = find_by_field(adapters, "/adapter_id", "ragflow_research_gate")?;
 	let lightrag = find_by_field(adapters, "/adapter_id", "lightrag_research_gate")?;
+	let graphrag = find_by_field(adapters, "/adapter_id", "graphrag_research_gate")?;
 	let qmd_deep = find_by_field(adapters, "/adapter_id", "qmd_deep_profile_gate")?;
 
 	assert_eq!(elf.pointer("/evidence_class").and_then(Value::as_str), Some("fixture_backed"));
@@ -356,6 +357,12 @@ fn assert_external_adapter_manifest_records(report: &Value) -> Result<()> {
 		lightrag.pointer("/capabilities/3/status").and_then(Value::as_str),
 		Some("not_encoded")
 	);
+	assert_eq!(graphrag.pointer("/evidence_class").and_then(Value::as_str), Some("research_gate"));
+	assert_eq!(
+		graphrag.pointer("/setup/command").and_then(Value::as_str),
+		Some("cargo make graphrag-docker-smoke")
+	);
+	assert_eq!(graphrag.pointer("/suites/1/status").and_then(Value::as_str), Some("not_encoded"));
 	assert_eq!(
 		qmd_deep.pointer("/capabilities/2/status").and_then(Value::as_str),
 		Some("unsupported")
diff --git a/scripts/graphrag-docker-smoke.py b/scripts/graphrag-docker-smoke.py
new file mode 100755
index 00000000..96757f16
--- /dev/null
+++ b/scripts/graphrag-docker-smoke.py
@@ -0,0 +1,1339 @@
+#!/usr/bin/env python3
+"""Cost-bounded GraphRAG Docker smoke for real-world external adapters."""
+
+from __future__ import annotations
+
+import csv
+import json
+import os
+import shutil
+import subprocess
+import sys
+import time
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+
+
+SCRIPT_DIR = Path(__file__).resolve().parent
+ROOT_DIR = SCRIPT_DIR.parent
+REPORT_DIR = Path(
+    os.environ.get(
+        "ELF_GRAPHRAG_SMOKE_REPORT_DIR",
+        ROOT_DIR / "tmp" / "real-world-memory" / "graphrag-smoke",
+    )
+)
+WORK_DIR = Path(os.environ.get("ELF_GRAPHRAG_SMOKE_WORK_DIR", REPORT_DIR / "work"))
+OUT = Path(os.environ.get("ELF_GRAPHRAG_SMOKE_OUT", REPORT_DIR / "graphrag-smoke.json"))
+MANIFEST_OUT = Path(
+    os.environ.get(
+        "ELF_GRAPHRAG_SMOKE_MANIFEST_OUT",
+        REPORT_DIR / "memory_projects_manifest.graphrag-smoke.json",
+    )
+)
+SUMMARY_OUT = Path(os.environ.get("ELF_GRAPHRAG_SMOKE_SUMMARY_OUT", REPORT_DIR / "summary.json"))
+FIXTURE_DIR = REPORT_DIR / "graphrag-fixtures"
+OUTPUT_CAPTURE_DIR = REPORT_DIR / "graphrag-output"
+LOG_DIR = REPORT_DIR / "logs"
+
+RUN_ID = os.environ.get(
+    "ELF_GRAPHRAG_SMOKE_RUN_ID",
+    f"graphrag-docker-smoke-{datetime.now(timezone.utc).strftime('%Y%m%d%H%M%S')}",
+)
+RUN_LIVE = os.environ.get("ELF_GRAPHRAG_SMOKE_RUN", "0") == "1"
+ALLOW_HOST = os.environ.get("ELF_GRAPHRAG_SMOKE_ALLOW_HOST", "0") == "1"
+INSTALL_GRAPHRAG = os.environ.get("ELF_GRAPHRAG_SMOKE_INSTALL", "1") == "1"
+GRAPH_RAG_VERSION = os.environ.get("ELF_GRAPHRAG_VERSION", "3.1.0")
+GRAPH_RAG_PACKAGE = os.environ.get("ELF_GRAPHRAG_PACKAGE", f"graphrag=={GRAPH_RAG_VERSION}")
+GRAPH_RAG_REF = os.environ.get("ELF_GRAPHRAG_REF", f"pypi:{GRAPH_RAG_PACKAGE}")
+CHAT_MODEL = os.environ.get("ELF_GRAPHRAG_CHAT_MODEL", "gpt-4o-mini")
+EMBEDDING_MODEL = os.environ.get("ELF_GRAPHRAG_EMBEDDING_MODEL", "text-embedding-3-small")
+API_BASE = os.environ.get("ELF_GRAPHRAG_API_BASE", "")
+API_KEY = os.environ.get("ELF_GRAPHRAG_API_KEY", os.environ.get("GRAPHRAG_API_KEY", ""))
+INDEX_METHOD = os.environ.get("ELF_GRAPHRAG_INDEX_METHOD", "fast")
+QUERY_METHOD = os.environ.get("ELF_GRAPHRAG_QUERY_METHOD", "local")
+TIMEOUT_SECONDS = int(os.environ.get("ELF_GRAPHRAG_TIMEOUT_SECONDS", "900"))
+MAX_DOCS = max(1, min(int(os.environ.get("ELF_GRAPHRAG_MAX_DOCS", "2")), 3))
+MAX_INPUT_CHARS = max(400, min(int(os.environ.get("ELF_GRAPHRAG_MAX_INPUT_CHARS", "2400")), 6000))
+
+TABLES = (
+    "documents",
+    "text_units",
+    "communities",
+    "community_reports",
+    "entities",
+    "relationships",
+)
+
+
+@dataclass
+class StatusState:
+    """Typed status for generated GraphRAG smoke artifacts."""
+
+    setup: str = "blocked"
+    run: str = "not_encoded"
+    result: str = "blocked"
+    overall: str = "blocked"
+    evidence_class: str = "research_gate"
+    failure_class: str = "graphrag_live_run_disabled"
+    failure_reason: str = (
+        "GraphRAG indexing is model-call intensive; set ELF_GRAPHRAG_SMOKE_RUN=1 "
+        "and provide explicit provider configuration to attempt the live Docker smoke."
+    )
+
+
+@dataclass
+class CommandRecord:
+    """Captured command result without secret-bearing environment values."""
+
+    label: str
+    command: list[str]
+    status: str
+    elapsed_ms: float
+    stdout_artifact: str | None
+    stderr_artifact: str | None
+    returncode: int | None
+    reason: str
+
+
+def utc_now() -> str:
+    """Return an RFC3339 UTC timestamp."""
+
+    return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
+
+
+def rel(path: Path) -> str:
+    """Return a repository-relative path when possible."""
+
+    try:
+        return str(path.resolve().relative_to(ROOT_DIR))
+    except ValueError:
+        return str(path)
+
+
+def mkdirs() -> None:
+    """Create output directories."""
+
+    for path in (REPORT_DIR, WORK_DIR, FIXTURE_DIR, OUTPUT_CAPTURE_DIR, LOG_DIR):
+        path.mkdir(parents=True, exist_ok=True)
+
+
+def write_json(path: Path, payload: Any) -> None:
+    """Write stable, pretty JSON."""
+
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+
+
+def dir_size(path: Path) -> int:
+    """Return total file size for a directory or file."""
+
+    if not path.exists():
+        return 0
+    if path.is_file():
+        return path.stat().st_size
+
+    return sum(item.stat().st_size for item in path.rglob("*") if item.is_file())
+
+
+def file_count(path: Path) -> int:
+    """Return file count for a directory."""
+
+    if not path.exists():
+        return 0
+
+    return sum(1 for item in path.rglob("*") if item.is_file())
+
+
+def command_available(command: str) -> bool:
+    """Return whether a command is on PATH."""
+
+    return shutil.which(command) is not None
+
+
+def run_command(
+    label: str,
+    command: list[str],
+    cwd: Path,
+    timeout: int = TIMEOUT_SECONDS,
+    extra_env: dict[str, str] | None = None,
+) -> CommandRecord:
+    """Run a subprocess and capture stdout/stderr artifacts."""
+
+    cwd.mkdir(parents=True, exist_ok=True)
+    stdout_path = LOG_DIR / f"{label}.stdout.log"
+    stderr_path = LOG_DIR / f"{label}.stderr.log"
+    env = os.environ.copy()
+
+    if extra_env:
+        env.update(extra_env)
+
+    started = time.monotonic()
+    try:
+        proc = subprocess.run(
+            command,
+            cwd=cwd,
+            env=env,
+            text=True,
+            capture_output=True,
+            timeout=timeout,
+            check=False,
+        )
+        elapsed_ms = (time.monotonic() - started) * 1000
+        stdout_path.write_text(proc.stdout, encoding="utf-8")
+        stderr_path.write_text(proc.stderr, encoding="utf-8")
+        status = "pass" if proc.returncode == 0 else "incomplete"
+        reason = "Command completed." if proc.returncode == 0 else f"Command exited {proc.returncode}."
+
+        return CommandRecord(
+            label=label,
+            command=command,
+            status=status,
+            elapsed_ms=elapsed_ms,
+            stdout_artifact=rel(stdout_path),
+            stderr_artifact=rel(stderr_path),
+            returncode=proc.returncode,
+            reason=reason,
+        )
+    except subprocess.TimeoutExpired as err:
+        elapsed_ms = (time.monotonic() - started) * 1000
+        stdout_path.write_text(err.stdout or "", encoding="utf-8")
+        stderr_path.write_text(err.stderr or "", encoding="utf-8")
+
+        return CommandRecord(
+            label=label,
+            command=command,
+            status="incomplete",
+            elapsed_ms=elapsed_ms,
+            stdout_artifact=rel(stdout_path),
+            stderr_artifact=rel(stderr_path),
+            returncode=None,
+            reason=f"Command timed out after {timeout} seconds.",
+        )
+
+
+def command_to_json(record: CommandRecord) -> dict[str, Any]:
+    """Serialize a command record."""
+
+    return {
+        "label": record.label,
+        "status": record.status,
+        "command": record.command,
+        "elapsed_ms": round(record.elapsed_ms, 3),
+        "stdout_artifact": record.stdout_artifact,
+        "stderr_artifact": record.stderr_artifact,
+        "returncode": record.returncode,
+        "reason": record.reason,
+    }
+
+
+def generated_corpus() -> list[dict[str, str]]:
+    """Return the bounded generated-public corpus."""
+
+    docs = [
+        {
+            "evidence_id": "graphrag-smoke-nova-observatory",
+            "title": "Nova Observatory memo",
+            "text": (
+                "Evidence ID graphrag-smoke-nova-observatory. Nova Observatory "
+                "operates the public Aurora Index review. The Aurora Index links "
+                "skyglow measurements to open weather station readings for civic "
+                "science audits. The GraphRAG smoke must map this source document "
+                "and its text unit back to the Nova Observatory evidence id."
+            ),
+        },
+        {
+            "evidence_id": "graphrag-smoke-aurora-index",
+            "title": "Aurora Index field note",
+            "text": (
+                "Evidence ID graphrag-smoke-aurora-index. The Aurora Index uses "
+                "Nova Observatory calibration notes when explaining why a public "
+                "skyglow reading changed. The GraphRAG smoke must keep the Aurora "
+                "Index source document and text unit evidence id recoverable."
+            ),
+        },
+        {
+            "evidence_id": "graphrag-smoke-stale-trap",
+            "title": "Retired skyglow note",
+            "text": (
+                "Evidence ID graphrag-smoke-stale-trap. Retired note: Nova "
+                "Observatory previously used the obsolete Zenith Ledger. This note "
+                "is a distractor and must not be used as the primary answer."
+            ),
+        },
+    ]
+    trimmed: list[dict[str, str]] = []
+    used_chars = 0
+
+    for doc in docs[:MAX_DOCS]:
+        remaining = MAX_INPUT_CHARS - used_chars
+
+        if remaining <= 0:
+            break
+
+        text = doc["text"][:remaining].strip()
+        used_chars += len(text)
+        trimmed.append({**doc, "text": text})
+
+    return trimmed
+
+
+def write_corpus(project_dir: Path, corpus: list[dict[str, str]]) -> Path:
+    """Write GraphRAG plain text input plus a CSV mapping copy."""
+
+    input_dir = project_dir / "input"
+    input_dir.mkdir(parents=True, exist_ok=True)
+    csv_path = REPORT_DIR / "generated-corpus.csv"
+
+    with csv_path.open("w", newline="", encoding="utf-8") as handle:
+        writer = csv.DictWriter(handle, fieldnames=("evidence_id", "title", "text"))
+        writer.writeheader()
+
+        for item in corpus:
+            writer.writerow(item)
+
+    for item in corpus:
+        file_name = f"{slug(item['evidence_id'])}.txt"
+        (input_dir / file_name).write_text(
+            f"Title: {item['title']}\nEvidence ID: {item['evidence_id']}\n\n{item['text']}\n",
+            encoding="utf-8",
+        )
+
+    return csv_path
+
+
+def write_fixture(corpus: list[dict[str, str]], status: StatusState, mapped_ids: list[str]) -> Path:
+    """Write a generated real_world_job fixture for the smoke."""
+
+    fixture_path = FIXTURE_DIR / "knowledge" / "graphrag_tiny_corpus.json"
+    expected_ids = [item["evidence_id"] for item in corpus if item["evidence_id"] != "graphrag-smoke-stale-trap"]
+    used_ids = [item for item in mapped_ids if item in expected_ids]
+    response = {
+        "adapter_id": "graphrag_docker_smoke",
+        "answer": {
+            "content": (
+                "Nova Observatory and the Aurora Index are connected by calibration "
+                "and public skyglow review evidence."
+                if used_ids
+                else ""
+            ),
+            "claims": [
+                {
+                    "claim_id": "nova_aurora_link",
+                    "text": (
+                        "Nova Observatory and the Aurora Index are connected by "
+                        "calibration and public skyglow review evidence."
+                    ),
+                    "evidence_ids": used_ids,
+                    "confidence": "derived_from_graphrag_table_mapping",
+                }
+            ]
+            if used_ids
+            else [],
+            "evidence_ids": used_ids,
+            "latency_ms": 0.0,
+            "cost": {
+                "currency": "USD",
+                "amount": 0.0,
+                "input_tokens": 0,
+                "output_tokens": 0,
+            },
+        },
+    }
+    fixture: dict[str, Any] = {
+        "schema": "elf.real_world_job/v1",
+        "job_id": "graphrag-tiny-corpus-001",
+        "suite": "knowledge_compilation",
+        "title": "Map GraphRAG output tables to generated evidence",
+        "corpus": {
+            "corpus_id": "graphrag-generated-public-smoke",
+            "profile": "generated_public",
+            "items": [
+                {
+                    "evidence_id": item["evidence_id"],
+                    "kind": "document",
+                    "text": item["text"],
+                    "source_ref": {
+                        "schema": "source_ref/v1",
+                        "resolver": "graphrag_smoke/v1",
+                        "ref": {
+                            "run_id": RUN_ID,
+                            "evidence_id": item["evidence_id"],
+                            "title": item["title"],
+                        },
+                    },
+                    "created_at": "2026-06-10T00:00:00Z",
+                }
+                for item in corpus
+            ],
+            "adapter_response": response,
+        },
+        "timeline": [
+            {
+                "event_id": "graphrag-smoke-corpus-generated",
+                "ts": "2026-06-10T00:00:00Z",
+                "actor": "system",
+                "action": "generated_public_corpus",
+                "evidence_ids": expected_ids,
+                "summary": "The GraphRAG smoke generated a tiny public corpus for source mapping.",
+            }
+        ],
+        "prompt": {
+            "role": "user",
+            "content": "What connects Nova Observatory and the Aurora Index in the generated corpus?",
+            "job_mode": "compile",
+            "constraints": ["cite_evidence", "avoid_stale_facts"],
+        },
+        "expected_answer": {
+            "must_include": [
+                {
+                    "claim_id": "nova_aurora_link",
+                    "text": (
+                        "Nova Observatory and the Aurora Index are connected by "
+                        "calibration and public skyglow review evidence."
+                    ),
+                }
+            ],
+            "must_not_include": ["Zenith Ledger is the current source."],
+            "evidence_links": {"nova_aurora_link": expected_ids},
+            "answer_type": "direct_answer",
+            "accepted_alternates": [],
+            "requires_caveat": False,
+            "requires_refusal": False,
+        },
+        "required_evidence": [
+            {
+                "evidence_id": evidence_id,
+                "claim_id": "nova_aurora_link",
+                "requirement": "cite",
+                "quote": "Aurora Index",
+            }
+            for evidence_id in expected_ids
+        ],
+        "negative_traps": [
+            {
+                "trap_id": "retired-zenith-ledger",
+                "type": "stale_fact",
+                "evidence_ids": ["graphrag-smoke-stale-trap"],
+                "failure_if_used": True,
+            }
+        ],
+        "scoring_rubric": {
+            "dimensions": {
+                "answer_correctness": {
+                    "weight": 0.35,
+                    "max_points": 1.0,
+                    "criteria": "States the Nova Observatory and Aurora Index relationship.",
+                },
+                "evidence_grounding": {
+                    "weight": 0.35,
+                    "max_points": 1.0,
+                    "criteria": "Maps output table identifiers to generated evidence ids.",
+                },
+                "trap_avoidance": {
+                    "weight": 0.2,
+                    "max_points": 1.0,
+                    "criteria": "Does not use the retired Zenith Ledger distractor.",
+                },
+                "uncertainty": {
+                    "weight": 0.1,
+                    "max_points": 1.0,
+                    "criteria": "Does not claim broad GraphRAG quality from the tiny smoke.",
+                },
+            },
+            "pass_threshold": 0.75,
+            "hard_fail_rules": [],
+        },
+        "allowed_uncertainty": {
+            "phrases": ["tiny generated corpus", "smoke only"],
+            "fallback": "Report typed failure when GraphRAG output identifiers cannot be mapped.",
+        },
+        "operator_debug": None,
+        "encoding": {},
+        "memory_evolution": None,
+        "tags": ["external_adapter", "generated_public", "no_live_claim"],
+    }
+
+    if status.result in {"blocked", "incomplete"}:
+        fixture["encoding"] = {
+            "status": status.result,
+            "reason": status.failure_reason,
+        }
+
+    write_json(fixture_path, fixture)
+
+    return fixture_path
+
+
+def slug(value: str) -> str:
+    """Return a small ASCII slug."""
+
+    out: list[str] = []
+    last_dash = False
+
+    for char in value.lower():
+        if char.isascii() and char.isalnum():
+            out.append(char)
+            last_dash = False
+        elif not last_dash and out:
+            out.append("-")
+            last_dash = True
+
+    while out and out[-1] == "-":
+        out.pop()
+
+    return "".join(out) or "item"
+
+
+def init_project(project_dir: Path, command_records: list[CommandRecord]) -> bool:
+    """Create a venv, install GraphRAG, and initialize the project."""
+
+    venv_dir = WORK_DIR / ".venv"
+    python = venv_dir / "bin" / "python"
+    graphrag = venv_dir / "bin" / "graphrag"
+
+    if INSTALL_GRAPHRAG:
+        venv_record = run_command("python-venv", [sys.executable, "-m", "venv", str(venv_dir)], WORK_DIR)
+        command_records.append(venv_record)
+        if venv_record.status != "pass":
+            return False
+
+        install_record = run_command(
+            "graphrag-install",
+            [str(python), "-m", "pip", "install", "--disable-pip-version-check", GRAPH_RAG_PACKAGE],
+            WORK_DIR,
+        )
+        command_records.append(install_record)
+        if install_record.status != "pass":
+            return False
+    elif not graphrag.exists():
+        command_records.append(
+            CommandRecord(
+                label="graphrag-install",
+                command=["graphrag"],
+                status="incomplete",
+                elapsed_ms=0.0,
+                stdout_artifact=None,
+                stderr_artifact=None,
+                returncode=None,
+                reason="GraphRAG install was disabled and no venv graphrag executable exists.",
+            )
+        )
+
+        return False
+
+    init_record = run_command(
+        "graphrag-init",
+        [
+            str(graphrag),
+            "init",
+            "--root",
+            str(project_dir),
+            "--model",
+            CHAT_MODEL,
+            "--embedding",
+            EMBEDDING_MODEL,
+            "--force",
+        ],
+        WORK_DIR,
+        extra_env={"GRAPHRAG_API_KEY": API_KEY, "GRAPHRAG_API_BASE": API_BASE},
+    )
+    command_records.append(init_record)
+
+    if init_record.status != "pass":
+        return False
+
+    patch_settings(project_dir / "settings.yaml")
+
+    return True
+
+
+def patch_settings(settings_path: Path) -> None:
+    """Apply bounded model, chunking, and output configuration to settings.yaml."""
+
+    if not settings_path.exists():
+        return
+
+    lines = settings_path.read_text(encoding="utf-8").splitlines()
+    patched: list[str] = []
+    inserted_api_base = False
+
+    for line in lines:
+        patched.append(line)
+        stripped = line.strip()
+        indent = line[: len(line) - len(line.lstrip())]
+
+        if API_BASE and stripped.startswith("api_key:") and not inserted_api_base:
+            patched.append(f"{indent}api_base: ${{GRAPHRAG_API_BASE}}")
+            inserted_api_base = True
+
+    patched.extend(
+        [
+            "",
+            "# ELF GraphRAG smoke bounds.",
+            "chunks:",
+            "  size: 220",
+            "  overlap: 20",
+            "  prepend_metadata: false",
+            "extract_graph:",
+            "  max_gleanings: 0",
+            "summarize_descriptions:",
+            "  max_length: 160",
+            "  max_input_length: 600",
+            "community_reports:",
+            "  max_length: 220",
+            "  max_input_length: 800",
+            "parallelization:",
+            "  stagger: 0.0",
+            "  num_threads: 1",
+            "async_mode: threaded",
+        ]
+    )
+    settings_path.write_text("\n".join(patched) + "\n", encoding="utf-8")
+
+
+def run_graphrag(project_dir: Path, command_records: list[CommandRecord]) -> Path | None:
+    """Run GraphRAG index and local query."""
+
+    graphrag = WORK_DIR / ".venv" / "bin" / "graphrag"
+    env = {"GRAPHRAG_API_KEY": API_KEY, "GRAPHRAG_API_BASE": API_BASE}
+    index_record = run_command(
+        "graphrag-index",
+        [
+            str(graphrag),
+            "index",
+            "--root",
+            str(project_dir),
+            "--method",
+            INDEX_METHOD,
+            "--cache",
+        ],
+        WORK_DIR,
+        extra_env=env,
+    )
+    command_records.append(index_record)
+    if index_record.status != "pass":
+        return None
+
+    output_dir = find_output_dir(project_dir)
+    if output_dir is None:
+        command_records.append(
+            CommandRecord(
+                label="graphrag-output-discovery",
+                command=["find", str(project_dir / "output"), "-name", "*.parquet"],
+                status="incomplete",
+                elapsed_ms=0.0,
+                stdout_artifact=None,
+                stderr_artifact=None,
+                returncode=None,
+                reason="GraphRAG index completed but no parquet output directory was found.",
+            )
+        )
+
+        return None
+
+    query_record = run_command(
+        "graphrag-query-local",
+        [
+            str(graphrag),
+            "query",
+            "--root",
+            str(project_dir),
+            "--method",
+            QUERY_METHOD,
+            "--data",
+            str(output_dir),
+            "--response-type",
+            "Single Sentence",
+            "What connects Nova Observatory and the Aurora Index in the generated corpus?",
+        ],
+        WORK_DIR,
+        extra_env=env,
+    )
+    command_records.append(query_record)
+
+    if query_record.status != "pass":
+        return None
+
+    return output_dir
+
+
+def find_output_dir(project_dir: Path) -> Path | None:
+    """Find a GraphRAG output directory containing parquet tables."""
+
+    output_root = project_dir / "output"
+    candidates: list[Path] = []
+
+    if output_root.exists():
+        for parquet in output_root.rglob("*.parquet"):
+            candidates.append(parquet.parent)
+
+    if not candidates:
+        return None
+
+    candidates.sort(key=lambda path: path.stat().st_mtime if path.exists() else 0.0)
+
+    return candidates[-1]
+
+
+def map_tables(output_dir: Path, corpus: list[dict[str, str]]) -> tuple[list[dict[str, Any]], list[str]]:
+    """Map GraphRAG parquet table identifiers to real_world_job evidence ids."""
+
+    try:
+        import pandas as pd  # type: ignore[import-not-found]
+    except ImportError as err:
+        return (
+            [
+                {
+                    "table": table,
+                    "mapping_status": "reader_missing",
+                    "error": f"pandas/pyarrow unavailable: {err}",
+                    "row_count": 0,
+                    "mapped_row_count": 0,
+                    "rows": [],
+                }
+                for table in TABLES
+            ],
+            [],
+        )
+
+    table_paths = capture_table_artifacts(output_dir)
+    mapped_by_table: dict[str, dict[str, list[str]]] = {}
+    mappings: list[dict[str, Any]] = []
+
+    for table in TABLES:
+        path = table_paths.get(table)
+
+        if path is None:
+            mappings.append(
+                {
+                    "table": table,
+                    "mapping_status": "missing_table",
+                    "artifact": None,
+                    "row_count": 0,
+                    "mapped_row_count": 0,
+                    "rows": [],
+                }
+            )
+            mapped_by_table[table] = {}
+            continue
+
+        try:
+            frame = pd.read_parquet(path)
+        except Exception as err:  # noqa: BLE001
+            mappings.append(
+                {
+                    "table": table,
+                    "mapping_status": "read_failed",
+                    "artifact": rel(path),
+                    "error": str(err),
+                    "row_count": 0,
+                    "mapped_row_count": 0,
+                    "rows": [],
+                }
+            )
+            mapped_by_table[table] = {}
+            continue
+
+        rows, by_id = map_frame(table, frame, corpus, mapped_by_table)
+        mapped_count = sum(1 for row in rows if row["evidence_ids"])
+        status = "pass"
+
+        if table in {"documents", "text_units"} and mapped_count < len(rows):
+            status = "unmapped_required_rows"
+        elif mapped_count == 0 and len(rows) > 0:
+            status = "unmapped_rows"
+
+        mappings.append(
+            {
+                "table": table,
+                "mapping_status": status,
+                "artifact": rel(path),
+                "row_count": len(rows),
+                "mapped_row_count": mapped_count,
+                "rows": rows,
+            }
+        )
+        mapped_by_table[table] = by_id
+
+    evidence_ids: list[str] = []
+
+    for mapping in mappings:
+        for row in mapping["rows"]:
+            for evidence_id in row["evidence_ids"]:
+                if evidence_id not in evidence_ids:
+                    evidence_ids.append(evidence_id)
+
+    return mappings, evidence_ids
+
+
+def empty_table_mappings(mapping_status: str) -> list[dict[str, Any]]:
+    """Return explicit table mapping placeholders for non-live typed outcomes."""
+
+    return [
+        {
+            "table": table,
+            "mapping_status": mapping_status,
+            "artifact": None,
+            "row_count": 0,
+            "mapped_row_count": 0,
+            "rows": [],
+        }
+        for table in TABLES
+    ]
+
+
+def capture_table_artifacts(output_dir: Path) -> dict[str, Path]:
+    """Copy known GraphRAG parquet tables into the report artifact directory."""
+
+    table_paths: dict[str, Path] = {}
+
+    if OUTPUT_CAPTURE_DIR.exists():
+        shutil.rmtree(OUTPUT_CAPTURE_DIR)
+    OUTPUT_CAPTURE_DIR.mkdir(parents=True, exist_ok=True)
+
+    for table in TABLES:
+        source = find_table_path(output_dir, table)
+
+        if source is None:
+            continue
+
+        destination = OUTPUT_CAPTURE_DIR / f"{table}.parquet"
+        shutil.copy2(source, destination)
+        table_paths[table] = destination
+
+    return table_paths
+
+
+def find_table_path(output_dir: Path, table: str) -> Path | None:
+    """Find a parquet file for a GraphRAG logical table name."""
+
+    candidates = list(output_dir.rglob("*.parquet"))
+    exact_names = {
+        f"{table}.parquet",
+        f"create_final_{table}.parquet",
+        f"final_{table}.parquet",
+    }
+
+    for path in candidates:
+        if path.name in exact_names:
+            return path
+
+    for path in candidates:
+        stem = path.stem.lower()
+
+        if stem.endswith(table) or stem == table or f"_{table}" in stem:
+            return path
+
+    return None
+
+
+def map_frame(
+    table: str,
+    frame: Any,
+    corpus: list[dict[str, str]],
+    mapped_by_table: dict[str, dict[str, list[str]]],
+) -> tuple[list[dict[str, Any]], dict[str, list[str]]]:
+    """Map rows for a GraphRAG output table."""
+
+    rows: list[dict[str, Any]] = []
+    by_id: dict[str, list[str]] = {}
+
+    for _, row in frame.iterrows():
+        row_dict = {key: normalize_cell(value) for key, value in row.to_dict().items()}
+        row_id = str(row_dict.get("id") or row_dict.get("human_readable_id") or row_dict.get("community") or "")
+        evidence_ids = evidence_from_row(table, row_dict, corpus, mapped_by_table)
+        rows.append(
+            {
+                "row_id": row_id,
+                "human_readable_id": row_dict.get("human_readable_id"),
+                "document_id": row_dict.get("document_id"),
+                "community": row_dict.get("community"),
+                "text_unit_ids": row_dict.get("text_unit_ids") or row_dict.get("text_units") or [],
+                "evidence_ids": evidence_ids,
+            }
+        )
+
+        if row_id:
+            by_id[row_id] = evidence_ids
+
+    return rows, by_id
+
+
+def normalize_cell(value: Any) -> Any:
+    """Normalize dataframe cell values into JSON-safe values."""
+
+    if value is None:
+        return None
+    if hasattr(value, "tolist"):
+        return normalize_cell(value.tolist())
+    if isinstance(value, float) and value != value:
+        return None
+    if isinstance(value, (list, tuple, set)):
+        return [normalize_cell(item) for item in value]
+    if isinstance(value, dict):
+        return {str(key): normalize_cell(item) for key, item in value.items()}
+
+    return value
+
+
+def evidence_from_row(
+    table: str,
+    row: dict[str, Any],
+    corpus: list[dict[str, str]],
+    mapped_by_table: dict[str, dict[str, list[str]]],
+) -> list[str]:
+    """Return mapped evidence ids for one output row."""
+
+    evidence_ids: list[str] = []
+    haystack = json.dumps(row, sort_keys=True, default=str)
+
+    for item in corpus:
+        evidence_id = item["evidence_id"]
+        title = item["title"]
+        signature = item["text"].split(".")[0]
+
+        if (
+            evidence_id in haystack
+            or slug(evidence_id) in haystack
+            or title in haystack
+            or signature in haystack
+        ):
+            append_unique(evidence_ids, evidence_id)
+
+    document_id = row.get("document_id")
+    if document_id is not None:
+        for evidence_id in mapped_by_table.get("documents", {}).get(str(document_id), []):
+            append_unique(evidence_ids, evidence_id)
+
+    for text_unit_id in row.get("text_unit_ids") or []:
+        for evidence_id in mapped_by_table.get("text_units", {}).get(str(text_unit_id), []):
+            append_unique(evidence_ids, evidence_id)
+
+    if table == "community_reports":
+        community = row.get("community")
+
+        if community is not None:
+            for candidate_id, candidate_evidence in mapped_by_table.get("communities", {}).items():
+                if str(candidate_id) == str(community):
+                    for evidence_id in candidate_evidence:
+                        append_unique(evidence_ids, evidence_id)
+
+    return evidence_ids
+
+
+def append_unique(values: list[str], value: str) -> None:
+    """Append a value if absent."""
+
+    if value not in values:
+        values.append(value)
+
+
+def mapping_is_valid(mappings: list[dict[str, Any]], expected_ids: list[str]) -> tuple[bool, str]:
+    """Validate source document/text-unit evidence mapping."""
+
+    mapping_by_table = {mapping["table"]: mapping for mapping in mappings}
+
+    for table in TABLES:
+        mapping = mapping_by_table.get(table)
+
+        if mapping is None or mapping["mapping_status"] in {"missing_table", "read_failed", "reader_missing"}:
+            return False, f"GraphRAG output table {table} was not available for evidence mapping."
+
+    for table in ("documents", "text_units"):
+        mapping = mapping_by_table[table]
+
+        if mapping["mapping_status"] != "pass":
+            return False, f"GraphRAG {table} rows include identifiers that did not map to evidence ids."
+
+    seen: list[str] = []
+    for mapping in mappings:
+        for row in mapping["rows"]:
+            for evidence_id in row["evidence_ids"]:
+                append_unique(seen, evidence_id)
+
+    missing = [evidence_id for evidence_id in expected_ids if evidence_id not in seen]
+
+    if missing:
+        return False, f"GraphRAG output mappings missed expected evidence ids: {', '.join(missing)}."
+
+    return True, "GraphRAG output tables mapped to expected generated evidence ids."
+
+
+def write_materialization(
+    status: StatusState,
+    corpus: list[dict[str, str]],
+    fixture_path: Path,
+    corpus_csv: Path,
+    command_records: list[CommandRecord],
+    mappings: list[dict[str, Any]],
+    mapped_ids: list[str],
+    started_at: float,
+) -> dict[str, Any]:
+    """Write the primary smoke artifact."""
+
+    cache_dir = WORK_DIR / "project" / "cache"
+    output_dir = WORK_DIR / "project" / "output"
+    elapsed_ms = (time.monotonic() - started_at) * 1000
+    expected_ids = [item["evidence_id"] for item in corpus if item["evidence_id"] != "graphrag-smoke-stale-trap"]
+    payload = {
+        "schema": "elf.graphrag_docker_smoke/v1",
+        "generated_at": utc_now(),
+        "run_id": RUN_ID,
+        "adapter_id": "graphrag_docker_smoke",
+        "evidence_class": status.evidence_class,
+        "status": {
+            "setup": status.setup,
+            "run": status.run,
+            "result": status.result,
+            "overall": status.overall,
+            "failure_class": status.failure_class,
+            "failure_reason": status.failure_reason,
+        },
+        "artifacts": {
+            "generated_corpus_csv": rel(corpus_csv),
+            "generated_fixture": rel(fixture_path),
+            "graph_output_dir": rel(OUTPUT_CAPTURE_DIR),
+            "manifest": rel(MANIFEST_OUT),
+            "summary": rel(SUMMARY_OUT),
+        },
+        "docker_boundary": {
+            "compose_file": "docker-compose.baseline.yml",
+            "runner_service": "baseline-runner",
+            "runner": "scripts/graphrag-docker-smoke.py",
+            "host_global_installs_required": False,
+            "docker_only": True,
+        },
+        "provider_configuration": {
+            "package": GRAPH_RAG_REF,
+            "package_spec": GRAPH_RAG_PACKAGE,
+            "chat_model": CHAT_MODEL,
+            "embedding_model": EMBEDDING_MODEL,
+            "api_base_configured": bool(API_BASE),
+            "api_key_provided": bool(API_KEY),
+            "operator_owned_provider_credentials_used": False,
+            "index_method": INDEX_METHOD,
+            "query_method": QUERY_METHOD,
+            "live_run_enabled": RUN_LIVE,
+        },
+        "resource_bounds": {
+            "max_docs": MAX_DOCS,
+            "max_input_chars": MAX_INPUT_CHARS,
+            "actual_doc_count": len(corpus),
+            "actual_input_chars": sum(len(item["text"]) for item in corpus),
+            "timeout_seconds": TIMEOUT_SECONDS,
+            "elapsed_ms": round(elapsed_ms, 3),
+            "cache_size_bytes": dir_size(cache_dir),
+            "cache_file_count": file_count(cache_dir),
+            "output_size_bytes": dir_size(output_dir),
+            "captured_output_size_bytes": dir_size(OUTPUT_CAPTURE_DIR),
+            "model_call_observation": {
+                "source": "GraphRAG cache artifact count when available",
+                "observed_cache_entries": file_count(cache_dir),
+                "raw_provider_usage_tokens_recorded": False,
+            },
+        },
+        "commands": [command_to_json(record) for record in command_records],
+        "evidence_mapping": {
+            "expected_evidence_ids": expected_ids,
+            "mapped_evidence_ids": mapped_ids,
+            "tables": mappings,
+        },
+    }
+    write_json(OUT, payload)
+
+    return payload
+
+
+def write_manifest(status: StatusState) -> dict[str, Any]:
+    """Write a generated external adapter manifest for this smoke."""
+
+    manifest = {
+        "schema": "elf.real_world_external_adapter_manifest/v1",
+        "manifest_id": f"graphrag-docker-smoke-{RUN_ID}",
+        "docker_isolation": {
+            "default": True,
+            "compose_file": "docker-compose.baseline.yml",
+            "runner": "scripts/graphrag-docker-smoke.py",
+            "artifact_dir": "tmp/real-world-memory/graphrag-smoke",
+            "host_global_installs_required": False,
+            "notes": [
+                f"Generated by the GraphRAG Docker smoke at {utc_now()}.",
+                "The smoke uses a generated public corpus and records typed setup/runtime failures.",
+            ],
+        },
+        "adapters": [
+            {
+                "adapter_id": "graphrag_docker_smoke",
+                "project": "GraphRAG",
+                "adapter_kind": "docker_python_cli_api_smoke",
+                "evidence_class": status.evidence_class,
+                "docker_default": True,
+                "host_global_installs_required": False,
+                "overall_status": status.overall,
+                "setup": {
+                    "status": status.setup,
+                    "evidence": "The smoke runs inside the baseline Docker runner and installs or invokes GraphRAG only in the container-local work directory.",
+                    "command": "cargo make graphrag-docker-smoke",
+                    "artifact": rel(OUT),
+                },
+                "run": {
+                    "status": status.run,
+                    "evidence": "The live path generates a tiny public corpus, initializes GraphRAG, indexes with bounded inputs, and runs local search when provider config is supplied.",
+                    "command": "ELF_GRAPHRAG_SMOKE_RUN=1 cargo make graphrag-docker-smoke",
+                    "artifact": rel(OUT),
+                },
+                "result": {
+                    "status": status.result,
+                    "evidence": status.failure_reason
+                    if status.failure_reason
+                    else "GraphRAG parquet output tables mapped to generated real_world_job evidence ids.",
+                    "artifact": rel(OUT),
+                },
+                "capabilities": [
+                    {
+                        "capability": "docker_python_cli_boundary",
+                        "status": status.setup,
+                        "evidence": "The runner is Python-only inside docker-compose.baseline.yml baseline-runner and does not require host-global GraphRAG installs.",
+                    },
+                    {
+                        "capability": "graphrag_index_query",
+                        "status": status.run,
+                        "evidence": "The opt-in live path runs GraphRAG index and local query over the generated public corpus.",
+                    },
+                    {
+                        "capability": "parquet_table_evidence_mapping",
+                        "status": status.result,
+                        "evidence": "documents, text_units, communities, community_reports, entities, and relationships parquet table identifiers are mapped to evidence ids when available.",
+                    },
+                    {
+                        "capability": "quality_or_scale_claim",
+                        "status": "not_encoded",
+                        "evidence": "The smoke does not claim graph-navigation quality, synthesis quality, private-corpus behavior, or large-corpus indexing.",
+                    },
+                ],
+                "suites": [
+                    {
+                        "suite_id": "knowledge_compilation",
+                        "status": status.result,
+                        "evidence": "Only the generated tiny-corpus table-mapping job is represented.",
+                    },
+                    {
+                        "suite_id": "retrieval",
+                        "status": status.run if status.run != "pass" else "not_encoded",
+                        "evidence": "The smoke may run local search for reachability, but retrieval quality scoring is not encoded.",
+                    },
+                    {
+                        "suite_id": "production_ops",
+                        "status": "not_encoded",
+                        "evidence": "The smoke records resource bounds but does not encode backup, restore, provider credential, or private corpus production-ops checks.",
+                    },
+                    {
+                        "suite_id": "memory_evolution",
+                        "status": "not_encoded",
+                        "evidence": "GraphRAG update/delete/current-versus-historical behavior is not encoded by this smoke.",
+                    },
+                ],
+                "evidence": [
+                    {"kind": "artifact", "ref": rel(OUT), "status": status.result},
+                    {"kind": "artifact", "ref": rel(OUTPUT_CAPTURE_DIR), "status": status.result},
+                    {"kind": "manifest", "ref": rel(MANIFEST_OUT), "status": status.overall},
+                    {"kind": "source", "ref": "https://github.com/microsoft/graphrag", "status": "real"},
+                    {"kind": "source", "ref": "https://microsoft.github.io/graphrag/", "status": "real"},
+                    {
+                        "kind": "source",
+                        "ref": "https://microsoft.github.io/graphrag/index/outputs/",
+                        "status": "real",
+                    },
+                ],
+                "execution_metadata": {
+                    "sources": [
+                        {
+                            "label": "GraphRAG repository",
+                            "url": "https://github.com/microsoft/graphrag",
+                            "evidence": "Official source and package for GraphRAG.",
+                        },
+                        {
+                            "label": "GraphRAG CLI docs",
+                            "url": "https://microsoft.github.io/graphrag/cli/",
+                            "evidence": "Official index and query command contract.",
+                        },
+                        {
+                            "label": "GraphRAG input docs",
+                            "url": "https://microsoft.github.io/graphrag/index/inputs/",
+                            "evidence": "Official input formats and document schema.",
+                        },
+                        {
+                            "label": "GraphRAG output tables",
+                            "url": "https://microsoft.github.io/graphrag/index/outputs/",
+                            "evidence": "Official parquet output table schema for evidence mapping.",
+                        },
+                        {
+                            "label": "GraphRAG local search docs",
+                            "url": "https://microsoft.github.io/graphrag/query/local_search/",
+                            "evidence": "Official local-search context and graph traversal reference.",
+                        },
+                    ],
+                    "setup_path": "Run cargo make graphrag-docker-smoke for a typed artifact; set ELF_GRAPHRAG_SMOKE_RUN=1 with explicit provider configuration for a live index/query attempt.",
+                    "runtime_boundary": "docker-compose.baseline.yml baseline-runner, container-local Python venv, generated public corpus, and report artifacts under tmp/real-world-memory/graphrag-smoke.",
+                    "resource_expectation": f"GraphRAG package {GRAPH_RAG_REF}, max_docs={MAX_DOCS}, max_input_chars={MAX_INPUT_CHARS}, timeout_seconds={TIMEOUT_SECONDS}, index_method={INDEX_METHOD}.",
+                    "retry_guidance": [
+                        "Default command records a typed blocked artifact without model calls.",
+                        "Enable the live path only with explicit provider configuration and generated public corpus.",
+                        "Treat missing or unmapped documents/text_units as wrong_result, not as pass.",
+                    ],
+                    "research_depth": "D2 feasibility plus XY-887 cost-bounded Docker smoke implementation; generated artifact decides live evidence class.",
+                },
+                "notes": [
+                    "The checked-in manifest record remains research_gate; generated smoke artifacts carry live status.",
+                    "Failure before GraphRAG output remains typed as blocked or incomplete.",
+                    "The smoke does not use private corpora or unrecorded provider credentials.",
+                ],
+            }
+        ],
+    }
+    write_json(MANIFEST_OUT, manifest)
+
+    return manifest
+
+
+def write_summary(materialization: dict[str, Any], manifest: dict[str, Any]) -> None:
+    """Write a small summary artifact."""
+
+    write_json(
+        SUMMARY_OUT,
+        {
+            "schema": "elf.graphrag_docker_smoke_summary/v1",
+            "generated_at": utc_now(),
+            "adapter_id": "graphrag_docker_smoke",
+            "evidence_class": materialization["evidence_class"],
+            "materialization": materialization,
+            "manifest": {
+                "json": rel(MANIFEST_OUT),
+                "summary": manifest["adapters"][0]["overall_status"],
+                "suites": manifest["adapters"][0]["suites"],
+            },
+        },
+    )
+
+
+def scrub_report_secrets(project_dir: Path) -> None:
+    """Remove provider secrets from text artifacts before reporting."""
+
+    if not API_KEY:
+        return
+
+    for root in (project_dir, LOG_DIR):
+        if not root.exists():
+            continue
+
+        for path in root.rglob("*"):
+            if not path.is_file() or path.suffix not in {".env", ".json", ".log", ".txt", ".yaml", ".yml"}:
+                continue
+
+            try:
+                content = path.read_text(encoding="utf-8")
+            except UnicodeDecodeError:
+                continue
+
+            if API_KEY in content:
+                path.write_text(content.replace(API_KEY, "<redacted>"), encoding="utf-8")
+
+
+def main() -> int:
+    """Run the smoke and always emit typed artifacts when possible."""
+
+    started_at = time.monotonic()
+    mkdirs()
+    status = StatusState()
+    command_records: list[CommandRecord] = []
+    mappings: list[dict[str, Any]] = empty_table_mappings("not_encoded")
+    mapped_ids: list[str] = []
+    corpus = generated_corpus()
+    project_dir = WORK_DIR / "project"
+    corpus_csv = write_corpus(project_dir, corpus)
+
+    if not Path("/.dockerenv").exists() and not ALLOW_HOST:
+        status.setup = "incomplete"
+        status.result = "incomplete"
+        status.overall = "incomplete"
+        status.failure_class = "not_running_in_docker"
+        status.failure_reason = "GraphRAG smoke must run inside Docker; use cargo make graphrag-docker-smoke."
+    elif not command_available("python3"):
+        status.setup = "incomplete"
+        status.result = "incomplete"
+        status.overall = "incomplete"
+        status.failure_class = "python_missing"
+        status.failure_reason = "python3 is required for the GraphRAG smoke runner."
+    elif not RUN_LIVE:
+        pass
+    elif not API_KEY:
+        status.setup = "blocked"
+        status.run = "not_encoded"
+        status.result = "blocked"
+        status.overall = "blocked"
+        status.failure_class = "provider_api_key_missing"
+        status.failure_reason = "GraphRAG live indexing requires an explicit provider API key; no private or unrecorded provider credentials were used."
+    elif not init_project(project_dir, command_records):
+        status.setup = "incomplete"
+        status.run = "not_encoded"
+        status.result = "incomplete"
+        status.overall = "incomplete"
+        status.failure_class = "graphrag_setup_failed"
+        status.failure_reason = "GraphRAG installation or initialization failed inside the Docker runner."
+    else:
+        status.setup = "pass"
+        output_dir = run_graphrag(project_dir, command_records)
+
+        if output_dir is None:
+            status.run = "incomplete"
+            status.result = "incomplete"
+            status.overall = "incomplete"
+            status.failure_class = "graphrag_index_or_query_failed"
+            status.failure_reason = "GraphRAG did not complete both index and local query for the generated corpus."
+        else:
+            status.run = "pass"
+            status.evidence_class = "live_real_world"
+            mappings, mapped_ids = map_tables(output_dir, corpus)
+            expected_ids = [
+                item["evidence_id"]
+                for item in corpus
+                if item["evidence_id"] != "graphrag-smoke-stale-trap"
+            ]
+            valid, reason = mapping_is_valid(mappings, expected_ids)
+
+            if valid:
+                status.result = "pass"
+                status.overall = "pass"
+                status.failure_class = ""
+                status.failure_reason = ""
+            else:
+                status.result = "wrong_result"
+                status.overall = "wrong_result"
+                status.failure_class = "graphrag_evidence_mapping_failed"
+                status.failure_reason = reason
+
+    scrub_report_secrets(project_dir)
+    fixture_path = write_fixture(corpus, status, mapped_ids)
+    materialization = write_materialization(
+        status,
+        corpus,
+        fixture_path,
+        corpus_csv,
+        command_records,
+        mappings,
+        mapped_ids,
+        started_at,
+    )
+    manifest = write_manifest(status)
+    write_summary(materialization, manifest)
+    print(f"GraphRAG smoke artifact: {OUT}")
+    print(f"GraphRAG smoke manifest: {MANIFEST_OUT}")
+    print(f"GraphRAG smoke summary: {SUMMARY_OUT}")
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/scripts/real-world-live-adapters.sh b/scripts/real-world-live-adapters.sh
index 094db251..b01d7591 100755
--- a/scripts/real-world-live-adapters.sh
+++ b/scripts/real-world-live-adapters.sh
@@ -29,6 +29,7 @@ rm -rf "${REPORT_DIR:?}/elf-fixtures" \
   "${REPORT_DIR:?}/qmd-report.json" \
   "${REPORT_DIR:?}/qmd-report.md" \
   "${REPORT_DIR:?}/lightrag" \
+  "${REPORT_DIR:?}/graphrag" \
   "${REPORT_DIR:?}/summary.json"
 
 cd "${ROOT_DIR}"
@@ -82,6 +83,11 @@ if [[ "${ELF_REAL_WORLD_LIVE_ENABLE_LIGHTRAG:-0}" == "1" ]]; then
     bash scripts/lightrag-docker-context-smoke.sh
 fi
 
+if [[ "${ELF_REAL_WORLD_LIVE_ENABLE_GRAPHRAG:-0}" == "1" ]]; then
+  ELF_GRAPHRAG_SMOKE_REPORT_DIR="${REPORT_DIR}/graphrag" \
+    python3 scripts/graphrag-docker-smoke.py
+fi
+
 jq -n \
   --slurpfile elf_materialization "${REPORT_DIR}/elf-materialization.json" \
   --slurpfile qmd_materialization "${REPORT_DIR}/qmd-materialization.json" \
@@ -132,6 +138,25 @@ if [[ -f "${REPORT_DIR}/lightrag/summary.json" ]]; then
   mv "${REPORT_DIR}/summary.json.tmp" "${REPORT_DIR}/summary.json"
 fi
 
+if [[ -f "${REPORT_DIR}/graphrag/summary.json" ]]; then
+  jq \
+    --slurpfile graphrag_summary "${REPORT_DIR}/graphrag/summary.json" \
+    '.adapters += [
+      {
+        adapter_id: $graphrag_summary[0].adapter_id,
+        evidence_class: $graphrag_summary[0].evidence_class,
+        materialization: $graphrag_summary[0].materialization,
+        report: {
+          json: "tmp/real-world-memory/live-adapters/graphrag/graphrag-smoke.json",
+          markdown: null,
+          summary: $graphrag_summary[0].materialization.status,
+          suites: $graphrag_summary[0].manifest.suites
+        }
+      }
+    ]' "${REPORT_DIR}/summary.json" >"${REPORT_DIR}/summary.json.tmp"
+  mv "${REPORT_DIR}/summary.json.tmp" "${REPORT_DIR}/summary.json"
+fi
+
 echo "Live real-world adapter reports:"
 echo "  ${REPORT_DIR}/elf-report.json"
 echo "  ${REPORT_DIR}/elf-report.md"
@@ -141,4 +166,8 @@ if [[ -f "${REPORT_DIR}/lightrag/summary.json" ]]; then
   echo "  ${REPORT_DIR}/lightrag/lightrag-report.json"
   echo "  ${REPORT_DIR}/lightrag/lightrag-report.md"
 fi
+if [[ -f "${REPORT_DIR}/graphrag/summary.json" ]]; then
+  echo "  ${REPORT_DIR}/graphrag/graphrag-smoke.json"
+  echo "  ${REPORT_DIR}/graphrag/summary.json"
+fi
 echo "  ${REPORT_DIR}/summary.json"

From 6d20ed64ca2c7f2ef9a900cbbc11564db6db2183 Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Wed, 10 Jun 2026 19:21:05 +0800
Subject: [PATCH 2/2] {"schema":"decodex/commit/1","summary":"Use explicit NaN
 check in GraphRAG smoke","authority":"XY-887"}

---
 scripts/graphrag-docker-smoke.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/graphrag-docker-smoke.py b/scripts/graphrag-docker-smoke.py
index 96757f16..69942e45 100755
--- a/scripts/graphrag-docker-smoke.py
+++ b/scripts/graphrag-docker-smoke.py
@@ -5,6 +5,7 @@
 
 import csv
 import json
+import math
 import os
 import shutil
 import subprocess
@@ -868,7 +869,7 @@ def normalize_cell(value: Any) -> Any:
         return None
     if hasattr(value, "tolist"):
         return normalize_cell(value.tolist())
-    if isinstance(value, float) and value != value:
+    if isinstance(value, float) and math.isnan(value):
         return None
     if isinstance(value, (list, tuple, set)):
         return [normalize_cell(item) for item in value]