diff --git a/Cargo.lock b/Cargo.lock
index ccd3b168..f9ffbcfc 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -964,12 +964,16 @@ dependencies = [
 name = "elf-eval"
 version = "0.2.0"
 dependencies = [
+ "blake3",
  "clap",
  "color-eyre",
+ "elf-chunking",
  "elf-cli",
  "elf-config",
  "elf-service",
  "elf-storage",
+ "elf-testkit",
+ "elf-worker",
  "serde",
  "serde_json",
  "sqlx",
diff --git a/Makefile.toml b/Makefile.toml
index 832f0c7e..3cf5f17c 100644
--- a/Makefile.toml
+++ b/Makefile.toml
@@ -293,6 +293,41 @@ args = [
 ]
 
 
+# Live external baseline benchmark
+# | task                       | type    | cwd |
+# | -------------------------- | ------- | --- |
+# | baseline-live-docker       | command |     |
+# | baseline-live-report       | command |     |
+# | baseline-live-docker-clean | command |     |
+
+[tasks.baseline-live-docker]
+workspace = false
+command = "bash"
+args = [
+	"-lc",
+	"set -euo pipefail; head=\"$(git rev-parse HEAD)\"; if [ -n \"$(git status --porcelain)\" ]; then head=\"$head+dirty\"; fi; export ELF_BASELINE_ELF_HEAD=\"$head\"; docker compose -f docker-compose.baseline.yml run --build --rm baseline-runner",
+]
+
+[tasks.baseline-live-report]
+workspace = false
+command = "bash"
+args = [
+	"scripts/live-baseline-report-to-md.sh",
+]
+
+[tasks.baseline-live-docker-clean]
+workspace = false
+command = "docker"
+args = [
+	"compose",
+	"-f",
+	"docker-compose.baseline.yml",
+	"down",
+	"-v",
+	"--remove-orphans",
+]
+
+
 # Meta
 # | task   | type      | cwd |
 # | ------ | --------- | --- |
diff --git a/README.md b/README.md
index cd17b656..173714aa 100644
--- a/README.md
+++ b/README.md
@@ -113,6 +113,29 @@ flowchart TB
 
 ## Comparison
 
+### Checked-In Live Benchmark Snapshot
+
+The June 9, 2026 Docker-only live baseline uses the same generated corpus and query
+manifest across ELF and the external memory projects below. ELF was run with the
+production embedding provider path, `Qwen3-Embedding-8B`, and 4096-dimensional
+embeddings.
+
+- ELF production-provider stress run: 480 documents, 16 queries, `8/8` encoded checks,
+  `retrieval_pass`, and `pass` in 1163 seconds.
+- All-project smoke run: ELF and qmd passed every encoded check. agentmemory passed
+  same-corpus retrieval but failed or could not complete lifecycle checks. mem0,
+  memsearch, and claude-mem returned wrong same-corpus retrieval results in the encoded
+  smoke. OpenViking was `incomplete` because its local embedding dependency could not
+  complete in the Docker runner.
+- The benchmark runner and report publisher are checked in and Docker-isolated:
+  `cargo make baseline-live-docker`, `cargo make baseline-live-report`, and
+  `cargo make baseline-live-docker-clean`.
+
+Detailed evidence and interpretation:
+
+- [Live Baseline Benchmark Report - June 9, 2026](docs/guide/benchmarking/2026-06-09-live-baseline-report.md)
+- [Live Baseline Benchmark Runbook](docs/guide/benchmarking/live_baseline_benchmark.md)
+
 Quick comparison snapshot (objective/high-level).
 This table compares capability coverage, not overall project quality.
 
@@ -153,6 +176,8 @@ Project signature strengths (what each does especially well):
 
 Detailed comparison, mechanism-level analysis, and source map:
 
+- [Live Baseline Benchmark Report - June 9, 2026](docs/guide/benchmarking/2026-06-09-live-baseline-report.md)
+- [Live Baseline Benchmark Runbook](docs/guide/benchmarking/live_baseline_benchmark.md)
 - [Detailed External Comparison](docs/guide/research/comparison_external_projects.md)
 - [Research Projects Inventory](docs/guide/research/research_projects_inventory.md)
 - [Agent Memory Selection Research Run](docs/research/2026-06-08-agent-memory-selection.json)
@@ -163,6 +188,7 @@ Latest external research refresh: June 8, 2026.
 
 - Start here: `docs/index.md`
 - Operational guide index: `docs/guide/index.md`
+- Benchmarking guides and reports: `docs/guide/benchmarking/index.md`
 - Research index: `docs/guide/research/index.md`
 - Specifications: `docs/spec/index.md`
 - System contract: `docs/spec/system_elf_memory_service_v2.md`
diff --git a/apps/elf-eval/Cargo.toml b/apps/elf-eval/Cargo.toml
index ec438112..149e81f5 100644
--- a/apps/elf-eval/Cargo.toml
+++ b/apps/elf-eval/Cargo.toml
@@ -6,6 +6,7 @@ name        = "elf-eval"
 version     = "0.2.0"
 
 [dependencies]
+blake3             = { workspace = true }
 clap               = { workspace = true }
 color-eyre         = { workspace = true }
 serde              = { workspace = true }
@@ -17,10 +18,13 @@ tracing            = { workspace = true }
 tracing-subscriber = { workspace = true }
 uuid               = { workspace = true }
 
-elf-cli     = { workspace = true }
-elf-config  = { workspace = true }
-elf-service = { workspace = true }
-elf-storage = { workspace = true }
+elf-chunking = { workspace = true }
+elf-cli      = { workspace = true }
+elf-config   = { workspace = true }
+elf-service  = { workspace = true }
+elf-storage  = { workspace = true }
+elf-testkit  = { workspace = true }
+elf-worker   = { workspace = true }
 
 [build-dependencies]
 vergen-gitcl = { workspace = true }
diff --git a/apps/elf-eval/src/bin/live_baseline_elf.rs b/apps/elf-eval/src/bin/live_baseline_elf.rs
new file mode 100644
index 00000000..75c9b83e
--- /dev/null
+++ b/apps/elf-eval/src/bin/live_baseline_elf.rs
@@ -0,0 +1,1661 @@
+#![allow(clippy::single_component_path_imports, unused_crate_dependencies)]
+
+//! Docker live-baseline runner for ELF's own same-corpus retrieval path.
+
+use std::{
+	collections::{BTreeMap, HashSet},
+	env, fs,
+	path::{Path, PathBuf},
+	process::Command,
+	sync::Arc,
+	time::{Duration, Instant},
+};
+
+use clap::Parser;
+use color_eyre::{Report, eyre};
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use tokio::{task::JoinSet, time};
+use uuid::Uuid;
+
+use elf_chunking::ChunkingConfig;
+use elf_config::{Config, EmbeddingProviderConfig, LlmProviderConfig, ProviderConfig};
+use elf_service::{
+	AddNoteInput, AddNoteRequest, BoxFuture, DeleteRequest, ElfService, EmbeddingProvider,
+	ExtractorProvider, PayloadLevel, Providers, RerankProvider, SearchRequest, UpdateRequest,
+};
+use elf_storage::{db::Db, qdrant::QdrantStore};
+use elf_testkit::TestDatabase;
+use elf_worker::worker::{self, WorkerState};
+
+const TENANT_ID: &str = "elf-live-baseline";
+const PROJECT_ID: &str = "shared-corpus";
+const AGENT_ID: &str = "elf-bench-agent";
+const SCOPE: &str = "agent_private";
+
+#[derive(Debug, Parser)]
+#[command(version = elf_cli::VERSION, rename_all = "kebab", styles = elf_cli::styles())]
+struct Args {
+	/// Base ELF config to load before Docker runtime overrides are applied.
+	#[arg(long, short = 'c', value_name = "FILE")]
+	config: PathBuf,
+
+	/// Directory containing the generated benchmark corpus markdown files.
+	#[arg(long, value_name = "DIR")]
+	corpus: PathBuf,
+
+	/// Query manifest generated by the live-baseline harness.
+	#[arg(long, value_name = "FILE")]
+	queries: PathBuf,
+
+	/// Write ELF result JSON to this file.
+	#[arg(long, value_name = "FILE")]
+	out: PathBuf,
+}
+
+#[derive(Debug, Deserialize)]
+struct QueryManifest {
+	queries: Vec<QueryCase>,
+}
+
+#[derive(Clone, Debug, Deserialize, Serialize)]
+struct QueryCase {
+	id: String,
+	query: String,
+	expected_doc: String,
+	expected_terms: Vec<String>,
+}
+
+#[derive(Debug)]
+struct CorpusNote {
+	key: String,
+	title: String,
+	text: String,
+	source_doc: String,
+}
+
+#[derive(Debug)]
+struct BaselineRuntime {
+	config_path: PathBuf,
+	dsn: String,
+	qdrant_url: String,
+	collection: String,
+	docs_collection: String,
+}
+
+#[derive(Debug, Serialize)]
+struct WorkerRunEvidence {
+	label: String,
+	expected_note_count: usize,
+	iterations: usize,
+	before: BTreeMap<String, i64>,
+	after: BTreeMap<String, i64>,
+	chunk_rows: i64,
+	chunk_embedding_rows: i64,
+	failed_jobs: Vec<FailedOutboxJob>,
+}
+
+#[derive(Debug, Serialize)]
+struct FailedOutboxJob {
+	note_id: Uuid,
+	note_key: Option<String>,
+	op: String,
+	attempts: i32,
+	last_error: Option<String>,
+}
+
+#[derive(Debug, Serialize)]
+struct ResourceEnvelopeEvidence {
+	elapsed_seconds: f64,
+	max_elapsed_seconds: f64,
+	rss_kb: Option<u64>,
+	max_rss_kb: u64,
+}
+
+#[derive(Debug, Serialize)]
+struct EmbeddingRuntimeReport {
+	mode: EmbeddingMode,
+	provider_id: String,
+	model: String,
+	dimensions: u32,
+	timeout_ms: u64,
+	api_base: String,
+	path: String,
+}
+
+#[derive(Debug, Serialize)]
+struct SoakConfig {
+	target_seconds: u64,
+	write_rounds: usize,
+	probe_interval_millis: u64,
+}
+
+#[derive(Debug, Serialize)]
+struct ElfBaselineReport {
+	schema: &'static str,
+	status: &'static str,
+	retrieval_status: &'static str,
+	reason: String,
+	head: String,
+	embedding: EmbeddingRuntimeReport,
+	indexing: IndexingReport,
+	summary: QuerySummary,
+	check_summary: CheckSummary,
+	checks: Vec<CheckResult>,
+	queries: Vec<QueryResult>,
+}
+
+#[derive(Debug, Serialize)]
+struct IndexingReport {
+	note_count: usize,
+	rebuild_rebuilt_count: u64,
+	rebuild_missing_vector_count: u64,
+	rebuild_error_count: u64,
+}
+
+#[derive(Debug, Serialize)]
+struct QuerySummary {
+	total: usize,
+	pass: usize,
+	fail: usize,
+}
+
+#[derive(Debug, Serialize)]
+struct CheckSummary {
+	total: usize,
+	pass: usize,
+	fail: usize,
+	incomplete: usize,
+}
+
+#[derive(Debug, Serialize)]
+struct CheckResult {
+	name: &'static str,
+	status: &'static str,
+	reason: String,
+	evidence: Value,
+}
+
+#[derive(Debug, Serialize)]
+struct QueryResult {
+	id: String,
+	query: String,
+	expected_doc: String,
+	expected_terms: Vec<String>,
+	matched: bool,
+	matched_terms: Vec<String>,
+	top_note_key: Option<String>,
+	top_snippet: Option<String>,
+	returned_count: usize,
+}
+
+#[derive(Debug)]
+struct DeterministicEmbedding {
+	vector_dim: u32,
+}
+impl EmbeddingProvider for DeterministicEmbedding {
+	fn embed<'a>(
+		&'a self,
+		_cfg: &'a EmbeddingProviderConfig,
+		texts: &'a [String],
+	) -> BoxFuture<'a, elf_service::Result<Vec<Vec<f32>>>> {
+		let dim = self.vector_dim;
+		let vectors = texts.iter().map(|text| embed_text(text, dim)).collect();
+
+		Box::pin(async move { Ok(vectors) })
+	}
+}
+
+#[derive(Debug)]
+struct TokenOverlapRerank;
+impl RerankProvider for TokenOverlapRerank {
+	fn rerank<'a>(
+		&'a self,
+		_cfg: &'a ProviderConfig,
+		query: &'a str,
+		docs: &'a [String],
+	) -> BoxFuture<'a, elf_service::Result<Vec<f32>>> {
+		let query_terms = terms(query);
+		let scores = docs
+			.iter()
+			.map(|doc| {
+				let doc_terms = terms(doc);
+				let hits = query_terms.intersection(&doc_terms).count() as f32;
+
+				hits / query_terms.len().max(1) as f32
+			})
+			.collect();
+
+		Box::pin(async move { Ok(scores) })
+	}
+}
+
+#[derive(Debug)]
+struct NoopExtractor;
+impl ExtractorProvider for NoopExtractor {
+	fn extract<'a>(
+		&'a self,
+		_cfg: &'a LlmProviderConfig,
+		_messages: &'a [Value],
+	) -> BoxFuture<'a, elf_service::Result<Value>> {
+		Box::pin(async move { Ok(serde_json::json!({ "notes": [] })) })
+	}
+}
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize)]
+#[serde(rename_all = "snake_case")]
+enum EmbeddingMode {
+	Local,
+	Provider,
+}
+
+fn runtime_config(runtime: &BaselineRuntime) -> color_eyre::Result<Config> {
+	let embedding_mode = embedding_mode()?;
+	let mut cfg = elf_config::load(&runtime.config_path)?;
+
+	cfg.storage.postgres.dsn = runtime.dsn.clone();
+	cfg.storage.postgres.pool_max_conns = 12;
+	cfg.storage.qdrant.url = runtime.qdrant_url.clone();
+	cfg.storage.qdrant.collection = runtime.collection.clone();
+	cfg.storage.qdrant.docs_collection = runtime.docs_collection.clone();
+
+	if embedding_mode == EmbeddingMode::Provider {
+		apply_provider_embedding_overrides(&mut cfg)?;
+
+		cfg.storage.qdrant.vector_dim = cfg.providers.embedding.dimensions;
+	} else {
+		cfg.providers.embedding.provider_id = "local".to_string();
+		cfg.providers.embedding.model = "local-hash".to_string();
+		cfg.providers.embedding.dimensions = cfg.storage.qdrant.vector_dim;
+	}
+
+	cfg.providers.rerank.provider_id = "local".to_string();
+	cfg.providers.rerank.model = "local-token-overlap".to_string();
+	cfg.providers.llm_extractor.provider_id = "disabled".to_string();
+	cfg.providers.llm_extractor.model = "disabled".to_string();
+	cfg.context = None;
+
+	Ok(cfg)
+}
+
+fn deterministic_providers(vector_dim: u32) -> Providers {
+	Providers::new(
+		Arc::new(DeterministicEmbedding { vector_dim }),
+		Arc::new(TokenOverlapRerank),
+		Arc::new(NoopExtractor),
+	)
+}
+
+fn embedding_mode() -> color_eyre::Result<EmbeddingMode> {
+	let raw = env::var("ELF_BASELINE_ELF_EMBEDDING_MODE")
+		.unwrap_or_else(|_| "local".to_string())
+		.to_ascii_lowercase();
+
+	match raw.as_str() {
+		"local" | "deterministic" => Ok(EmbeddingMode::Local),
+		"provider" | "production" => Ok(EmbeddingMode::Provider),
+		_ => Err(eyre::eyre!(
+			"Unsupported ELF_BASELINE_ELF_EMBEDDING_MODE={raw:?}; use local or provider."
+		)),
+	}
+}
+
+fn apply_provider_embedding_overrides(cfg: &mut Config) -> color_eyre::Result<()> {
+	apply_env_string(
+		&mut cfg.providers.embedding.provider_id,
+		&[
+			"ELF_BASELINE_ELF_EMBEDDING_PROVIDER_ID",
+			"QWEN_EMBEDDING_PROVIDER_ID",
+			"EMBEDDING_PROVIDER_ID",
+		],
+	);
+	apply_env_string(
+		&mut cfg.providers.embedding.api_base,
+		&[
+			"ELF_BASELINE_ELF_EMBEDDING_API_BASE",
+			"QWEN_EMBEDDING_API_BASE",
+			"DASHSCOPE_API_BASE",
+			"EMBEDDING_API_BASE",
+		],
+	);
+	apply_env_string(
+		&mut cfg.providers.embedding.api_key,
+		&[
+			"ELF_BASELINE_ELF_EMBEDDING_API_KEY",
+			"QWEN_API_KEY",
+			"DASHSCOPE_API_KEY",
+			"EMBEDDING_API_KEY",
+		],
+	);
+	apply_env_string(
+		&mut cfg.providers.embedding.path,
+		&["ELF_BASELINE_ELF_EMBEDDING_PATH", "QWEN_EMBEDDING_PATH", "EMBEDDING_PATH"],
+	);
+	apply_env_string(
+		&mut cfg.providers.embedding.model,
+		&["ELF_BASELINE_ELF_EMBEDDING_MODEL", "QWEN_EMBEDDING_MODEL", "EMBEDDING_MODEL"],
+	);
+
+	if let Some(dimensions) = env_u32(&[
+		"ELF_BASELINE_ELF_EMBEDDING_DIMENSIONS",
+		"QWEN_EMBEDDING_DIMENSIONS",
+		"DASHSCOPE_EMBEDDING_DIMENSIONS",
+		"EMBEDDING_DIMENSIONS",
+	]) {
+		cfg.providers.embedding.dimensions = dimensions;
+	}
+	if let Some(timeout_ms) = env_u64(&[
+		"ELF_BASELINE_ELF_EMBEDDING_TIMEOUT_MS",
+		"QWEN_EMBEDDING_TIMEOUT_MS",
+		"EMBEDDING_TIMEOUT_MS",
+	]) {
+		cfg.providers.embedding.timeout_ms = timeout_ms;
+	} else {
+		cfg.providers.embedding.timeout_ms = cfg.providers.embedding.timeout_ms.max(30_000);
+	}
+
+	if cfg.providers.embedding.provider_id == "local" {
+		if env_string(&["ELF_BASELINE_ELF_EMBEDDING_API_KEY", "QWEN_API_KEY"]).is_some() {
+			cfg.providers.embedding.provider_id = "qwen".to_string();
+		} else if env_string(&["DASHSCOPE_API_KEY"]).is_some() {
+			cfg.providers.embedding.provider_id = "dashscope".to_string();
+		} else if env_string(&["EMBEDDING_API_KEY"]).is_some() {
+			cfg.providers.embedding.provider_id = "provider".to_string();
+		}
+	}
+	if cfg.providers.embedding.provider_id == "local" {
+		return Err(eyre::eyre!(
+			"Provider embedding mode requires a non-local provider id or QWEN_API_KEY/DASHSCOPE_API_KEY/EMBEDDING_API_KEY."
+		));
+	}
+	if cfg.providers.embedding.api_base.trim().is_empty()
+		|| cfg.providers.embedding.api_base == "http://127.0.0.1"
+	{
+		return Err(eyre::eyre!(
+			"Provider embedding mode requires ELF_BASELINE_ELF_EMBEDDING_API_BASE, QWEN_EMBEDDING_API_BASE, DASHSCOPE_API_BASE, or EMBEDDING_API_BASE."
+		));
+	}
+	if cfg.providers.embedding.api_key.trim().is_empty()
+		|| cfg.providers.embedding.api_key == "local-dev-placeholder"
+	{
+		return Err(eyre::eyre!(
+			"Provider embedding mode requires ELF_BASELINE_ELF_EMBEDDING_API_KEY, QWEN_API_KEY, DASHSCOPE_API_KEY, or EMBEDDING_API_KEY."
+		));
+	}
+	if cfg.providers.embedding.model == "local-hash"
+		|| cfg.providers.embedding.model.trim().is_empty()
+	{
+		return Err(eyre::eyre!(
+			"Provider embedding mode requires ELF_BASELINE_ELF_EMBEDDING_MODEL, QWEN_EMBEDDING_MODEL, or EMBEDDING_MODEL."
+		));
+	}
+	if cfg.providers.embedding.dimensions == 0 {
+		return Err(eyre::eyre!(
+			"Provider embedding dimensions must be greater than zero; set ELF_BASELINE_ELF_EMBEDDING_DIMENSIONS, QWEN_EMBEDDING_DIMENSIONS, DASHSCOPE_EMBEDDING_DIMENSIONS, or EMBEDDING_DIMENSIONS."
+		));
+	}
+
+	Ok(())
+}
+
+fn embedding_runtime_report(cfg: &Config) -> EmbeddingRuntimeReport {
+	EmbeddingRuntimeReport {
+		mode: embedding_mode().unwrap_or(EmbeddingMode::Local),
+		provider_id: cfg.providers.embedding.provider_id.clone(),
+		model: cfg.providers.embedding.model.clone(),
+		dimensions: cfg.providers.embedding.dimensions,
+		timeout_ms: cfg.providers.embedding.timeout_ms,
+		api_base: cfg.providers.embedding.api_base.clone(),
+		path: cfg.providers.embedding.path.clone(),
+	}
+}
+
+fn apply_env_string(target: &mut String, names: &[&str]) {
+	if let Some(value) = env_string(names) {
+		*target = value;
+	}
+}
+
+fn env_string(names: &[&str]) -> Option<String> {
+	names.iter().find_map(|name| {
+		env::var(name).ok().map(|value| value.trim().to_string()).filter(|value| !value.is_empty())
+	})
+}
+
+fn env_u32(names: &[&str]) -> Option<u32> {
+	env_string(names).and_then(|value| value.parse::<u32>().ok())
+}
+
+fn env_u64(names: &[&str]) -> Option<u64> {
+	env_string(names).and_then(|value| value.parse::<u64>().ok())
+}
+
+fn load_corpus_notes(corpus_dir: &Path) -> color_eyre::Result<Vec<CorpusNote>> {
+	let mut paths = fs::read_dir(corpus_dir)?
+		.map(|entry| entry.map(|entry| entry.path()))
+		.collect::<std::io::Result<Vec<_>>>()?;
+
+	paths.retain(|path| {
+		path.extension()
+			.and_then(|ext| ext.to_str())
+			.is_some_and(|ext| ext.eq_ignore_ascii_case("md"))
+	});
+	paths.sort();
+
+	let mut out = Vec::with_capacity(paths.len());
+
+	for path in paths {
+		let source_doc = path
+			.file_name()
+			.and_then(|name| name.to_str())
+			.ok_or_else(|| {
+				eyre::eyre!("Corpus path has no valid UTF-8 file name: {}", path.display())
+			})?
+			.to_string();
+		let raw = fs::read_to_string(&path)?;
+		let title = title_from_markdown(&raw, &source_doc);
+		let text = raw
+			.lines()
+			.filter(|line| !line.trim_start().starts_with('#'))
+			.collect::<Vec<_>>()
+			.join(" ")
+			.split_whitespace()
+			.collect::<Vec<_>>()
+			.join(" ");
+
+		out.push(CorpusNote { key: key_for_doc(&source_doc), title, text, source_doc });
+	}
+
+	if out.is_empty() {
+		return Err(eyre::eyre!("No markdown corpus files found in {}.", corpus_dir.display()));
+	}
+
+	Ok(out)
+}
+
+fn load_queries(path: &PathBuf) -> color_eyre::Result<QueryManifest> {
+	let raw = fs::read_to_string(path)?;
+
+	Ok(serde_json::from_str(&raw)?)
+}
+
+fn worker_max_iterations(note_count: usize) -> usize {
+	env::var("ELF_BASELINE_WORKER_MAX_ITERATIONS")
+		.ok()
+		.and_then(|value| value.parse::<usize>().ok())
+		.unwrap_or_else(|| note_count.saturating_mul(3).saturating_add(32))
+}
+
+fn outbox_done(counts: &BTreeMap<String, i64>, expected_note_count: usize) -> bool {
+	let done = counts.get("DONE").copied().unwrap_or_default();
+	let expected = i64::try_from(expected_note_count).unwrap_or(i64::MAX);
+	let pending = counts.get("PENDING").copied().unwrap_or_default();
+	let failed = counts.get("FAILED").copied().unwrap_or_default();
+	let claimed = counts.get("CLAIMED").copied().unwrap_or_default();
+
+	done >= expected && pending == 0 && failed == 0 && claimed == 0
+}
+
+fn retrieval_check(query_results: &[QueryResult]) -> CheckResult {
+	let pass_count = query_results.iter().filter(|result| result.matched).count();
+	let fail_count = query_results.len().saturating_sub(pass_count);
+
+	CheckResult {
+		name: "same_corpus_retrieval",
+		status: if fail_count == 0 { "pass" } else { "fail" },
+		reason: if fail_count == 0 {
+			"All same-corpus retrieval queries returned expected evidence.".to_string()
+		} else {
+			format!("{fail_count} same-corpus retrieval query case(s) missed expected evidence.")
+		},
+		evidence: serde_json::json!({
+			"total": query_results.len(),
+			"pass": pass_count,
+			"fail": fail_count,
+		}),
+	}
+}
+
+fn worker_indexing_check(evidence: WorkerRunEvidence) -> CheckResult {
+	let pass = outbox_done(&evidence.after, evidence.expected_note_count)
+		&& evidence.chunk_rows >= i64::try_from(evidence.expected_note_count).unwrap_or(i64::MAX)
+		&& evidence.chunk_embedding_rows >= evidence.chunk_rows;
+
+	CheckResult {
+		name: "async_worker_indexing_e2e",
+		status: if pass { "pass" } else { "fail" },
+		reason: if pass {
+			"ELF worker processed corpus outbox jobs into persisted chunks and embeddings."
+				.to_string()
+		} else {
+			"ELF worker did not fully process corpus outbox jobs into searchable chunks."
+				.to_string()
+		},
+		evidence: serde_json::json!(evidence),
+	}
+}
+
+fn concurrent_note_count() -> usize {
+	if let Ok(value) = env::var("ELF_BASELINE_CONCURRENT_NOTES")
+		&& let Ok(parsed) = value.parse::<usize>()
+	{
+		return parsed.max(1);
+	}
+
+	match env::var("ELF_BASELINE_PROFILE").as_deref() {
+		Ok("stress") => 32,
+		Ok("scale" | "full") => 16,
+		_ => 4,
+	}
+}
+
+fn concurrent_add_request(index: usize) -> AddNoteRequest {
+	let marker = concurrent_marker(index);
+
+	AddNoteRequest {
+		tenant_id: TENANT_ID.to_string(),
+		project_id: PROJECT_ID.to_string(),
+		agent_id: AGENT_ID.to_string(),
+		scope: SCOPE.to_string(),
+		notes: vec![AddNoteInput {
+			r#type: "fact".to_string(),
+			key: Some(format!("concurrent_{index:03}")),
+			text: format!(
+				"Concurrent benchmark note {index:03} records marker `{marker}` for write race validation."
+			),
+			structured: None,
+			importance: 0.91,
+			confidence: 0.96,
+			ttl_days: None,
+			source_ref: serde_json::json!({
+				"source": "ELF live baseline concurrent write check",
+				"document": format!("concurrent-{index:03}.md"),
+			}),
+			write_policy: None,
+		}],
+	}
+}
+
+fn concurrent_query_case(index: usize) -> QueryCase {
+	let marker = concurrent_marker(index);
+
+	QueryCase {
+		id: format!("concurrent-{index:03}"),
+		query: format!("Find the concurrent benchmark note containing marker {marker}."),
+		expected_doc: format!("concurrent-{index:03}.md"),
+		expected_terms: vec![marker],
+	}
+}
+
+fn concurrent_marker(index: usize) -> String {
+	format!("concurrency-{}-{index:03}", marker_word(index))
+}
+
+fn soak_config() -> SoakConfig {
+	let profile = env::var("ELF_BASELINE_PROFILE").ok();
+	let (default_seconds, default_rounds) = match profile.as_deref() {
+		Some("stress") => (60, 6),
+		Some("scale" | "full") => (15, 3),
+		_ => (0, 0),
+	};
+
+	SoakConfig {
+		target_seconds: parse_env_u64("ELF_BASELINE_SOAK_SECONDS").unwrap_or(default_seconds),
+		write_rounds: parse_env_usize("ELF_BASELINE_SOAK_ROUNDS").unwrap_or(default_rounds),
+		probe_interval_millis: parse_env_u64("ELF_BASELINE_SOAK_PROBE_INTERVAL_MS")
+			.unwrap_or(1_000)
+			.max(100),
+	}
+}
+
+fn parse_env_u64(name: &str) -> Option<u64> {
+	env::var(name).ok()?.parse::<u64>().ok()
+}
+
+fn parse_env_usize(name: &str) -> Option<usize> {
+	env::var(name).ok()?.parse::<usize>().ok()
+}
+
+fn soak_add_request(index: usize) -> AddNoteRequest {
+	let marker = soak_marker(index);
+	let (topic, detail) = soak_topic(index);
+
+	AddNoteRequest {
+		tenant_id: TENANT_ID.to_string(),
+		project_id: PROJECT_ID.to_string(),
+		agent_id: AGENT_ID.to_string(),
+		scope: SCOPE.to_string(),
+		notes: vec![AddNoteInput {
+			r#type: "fact".to_string(),
+			key: Some(format!("soak_{index:03}")),
+			text: format!(
+				"Soak benchmark note {index:03} covers {topic}. {detail} It records stability marker `{marker}` for repeated worker and search probes."
+			),
+			structured: None,
+			importance: 0.92,
+			confidence: 0.97,
+			ttl_days: None,
+			source_ref: serde_json::json!({
+				"source": "ELF live baseline soak stability check",
+				"document": format!("soak-{index:03}.md"),
+			}),
+			write_policy: None,
+		}],
+	}
+}
+
+fn soak_query_case(index: usize) -> QueryCase {
+	let marker = soak_marker(index);
+	let (topic, _) = soak_topic(index);
+
+	QueryCase {
+		id: format!("soak-{index:03}"),
+		query: format!("Find the soak benchmark note about {topic} containing marker {marker}."),
+		expected_doc: format!("soak-{index:03}.md"),
+		expected_terms: vec![marker],
+	}
+}
+
+fn soak_marker(index: usize) -> String {
+	format!("soak-stability-{}-{index:03}", marker_word(index))
+}
+
+fn marker_word(index: usize) -> &'static str {
+	const WORDS: &[&str] = &[
+		"aurora", "banyan", "cobalt", "delta", "ember", "fennel", "granite", "harbor", "indigo",
+		"jasper", "keystone", "lantern", "meridian", "nebula", "onyx", "prairie", "quartz",
+		"raven", "solstice", "topaz", "umbra", "verdant", "willow", "xenon", "yarrow", "zephyr",
+		"atlas", "beacon", "citadel", "drift", "equinox", "forge",
+	];
+
+	WORDS[index % WORDS.len()]
+}
+
+fn soak_topic(index: usize) -> (&'static str, &'static str) {
+	const TOPICS: &[(&str, &str)] = &[
+		(
+			"release rollback fencing",
+			"The rollback controller waits for a signed deploy fence before the next canary.",
+		),
+		(
+			"invoice export batching",
+			"The exporter groups invoice CSV rows by merchant ledger before upload.",
+		),
+		("search shard warming", "The search router warms tenant shard caches before rank probes."),
+		(
+			"incident pager routing",
+			"The incident desk routes page ownership through the release captain.",
+		),
+		(
+			"backup restore rehearsal",
+			"The restore rehearsal checks WAL freshness before dry-run recovery.",
+		),
+		(
+			"feature flag expiry",
+			"The flag sweeper archives expired toggles before deleting rollout rules.",
+		),
+		(
+			"support queue triage",
+			"The support classifier separates billing tickets from access tickets.",
+		),
+		(
+			"analytics job watermark",
+			"The analytics worker stores a warehouse watermark after each import.",
+		),
+	];
+
+	TOPICS[index % TOPICS.len()]
+}
+
+fn concurrency_probe_indexes(note_count: usize) -> Vec<usize> {
+	let mut indexes = vec![0, note_count / 2, note_count.saturating_sub(1)];
+
+	indexes.sort_unstable();
+	indexes.dedup();
+
+	indexes
+}
+
+fn resource_envelope_check(elapsed_seconds: f64) -> CheckResult {
+	let max_elapsed_seconds = env::var("ELF_BASELINE_MAX_ELF_SECONDS")
+		.ok()
+		.and_then(|value| value.parse::<f64>().ok())
+		.unwrap_or(600.0);
+	let max_rss_kb = env::var("ELF_BASELINE_MAX_ELF_RSS_KB")
+		.ok()
+		.and_then(|value| value.parse::<u64>().ok())
+		.unwrap_or(1_500_000);
+	let rss_kb = current_rss_kb();
+	let pass = elapsed_seconds <= max_elapsed_seconds && rss_kb.is_none_or(|rss| rss <= max_rss_kb);
+
+	CheckResult {
+		name: "resource_envelope",
+		status: if pass { "pass" } else { "fail" },
+		reason: if pass {
+			"ELF live-baseline runtime stayed within the configured local resource envelope."
+				.to_string()
+		} else {
+			"ELF live-baseline runtime exceeded the configured local resource envelope.".to_string()
+		},
+		evidence: serde_json::json!(ResourceEnvelopeEvidence {
+			elapsed_seconds,
+			max_elapsed_seconds,
+			rss_kb,
+			max_rss_kb,
+		}),
+	}
+}
+
+fn current_rss_kb() -> Option<u64> {
+	let status = fs::read_to_string("/proc/self/status").ok()?;
+
+	status.lines().find_map(|line| {
+		let rest = line.strip_prefix("VmHWM:")?.trim();
+		let value = rest.split_whitespace().next()?;
+
+		value.parse::<u64>().ok()
+	})
+}
+
+fn incomplete_check(name: &'static str, reason: &str) -> CheckResult {
+	CheckResult {
+		name,
+		status: "incomplete",
+		reason: reason.to_string(),
+		evidence: serde_json::json!({}),
+	}
+}
+
+fn summarize_checks(checks: &[CheckResult]) -> CheckSummary {
+	CheckSummary {
+		total: checks.len(),
+		pass: checks.iter().filter(|check| check.status == "pass").count(),
+		fail: checks.iter().filter(|check| check.status == "fail").count(),
+		incomplete: checks.iter().filter(|check| check.status == "incomplete").count(),
+	}
+}
+
+fn title_from_markdown(raw: &str, source_doc: &str) -> String {
+	raw.lines()
+		.find_map(|line| line.trim_start().strip_prefix("# "))
+		.map(str::trim)
+		.filter(|title| !title.is_empty())
+		.map(str::to_string)
+		.unwrap_or_else(|| source_doc.to_string())
+}
+
+fn key_for_doc(doc: &str) -> String {
+	let stem = Path::new(doc).file_stem().and_then(|stem| stem.to_str()).unwrap_or(doc);
+	let mut key = String::with_capacity(stem.len());
+	let mut last_was_separator = false;
+
+	for ch in stem.chars() {
+		if ch.is_ascii_alphanumeric() {
+			key.push(ch.to_ascii_lowercase());
+
+			last_was_separator = false;
+		} else if !last_was_separator && !key.is_empty() {
+			key.push('_');
+
+			last_was_separator = true;
+		}
+	}
+
+	if key.ends_with('_') {
+		key.pop();
+	}
+
+	if key.is_empty() { "doc".to_string() } else { key }
+}
+
+fn embed_text(text: &str, vector_dim: u32) -> Vec<f32> {
+	let dim = vector_dim as usize;
+	let mut vector = vec![0.0_f32; dim];
+
+	if dim == 0 {
+		return vector;
+	}
+
+	let normalized = normalize_ascii_alnum_lowercase(text);
+
+	for term in normalized.split_whitespace() {
+		if term.len() < 2 {
+			continue;
+		}
+
+		let hash = blake3::hash(term.as_bytes());
+		let bytes = hash.as_bytes();
+		let idx = (u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) as usize) % dim;
+		let sign = if bytes[4] & 1 == 0 { 1.0 } else { -1.0 };
+
+		vector[idx] += sign;
+	}
+
+	if vector.iter().all(|value| *value == 0.0) {
+		let hash = blake3::hash(text.as_bytes());
+		let bytes = hash.as_bytes();
+		let idx = (u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) as usize) % dim;
+
+		vector[idx] = 1.0;
+	}
+
+	let norm = vector.iter().map(|value| value * value).sum::<f32>().sqrt();
+
+	if norm > 0.0 {
+		for value in &mut vector {
+			*value /= norm;
+		}
+	}
+
+	vector
+}
+
+fn normalize_ascii_alnum_lowercase(text: &str) -> String {
+	let mut normalized = String::with_capacity(text.len());
+
+	for ch in text.chars() {
+		if ch.is_ascii_alphanumeric() {
+			normalized.push(ch.to_ascii_lowercase());
+		} else {
+			normalized.push(' ');
+		}
+	}
+
+	normalized
+}
+
+fn terms(text: &str) -> HashSet<String> {
+	text.split(|ch: char| !ch.is_ascii_alphanumeric())
+		.map(str::trim)
+		.filter(|term| !term.is_empty())
+		.map(str::to_ascii_lowercase)
+		.collect()
+}
+
+fn distinctive_terms(text: &str, limit: usize) -> Vec<String> {
+	let stop_words = [
+		"the", "and", "for", "with", "that", "this", "from", "into", "must", "uses", "after",
+		"before", "query", "memory", "note",
+	];
+	let stop_words = stop_words.into_iter().collect::<HashSet<_>>();
+	let mut out = Vec::new();
+
+	for raw in text.split(|ch: char| !ch.is_ascii_alphanumeric()) {
+		let term = raw.trim();
+
+		if term.len() < 5 {
+			continue;
+		}
+
+		let lowered = term.to_ascii_lowercase();
+
+		if stop_words.contains(lowered.as_str()) || out.iter().any(|existing| existing == term) {
+			continue;
+		}
+
+		out.push(term.to_string());
+
+		if out.len() >= limit {
+			break;
+		}
+	}
+
+	out
+}
+
+fn contains_case_insensitive(haystack: &str, needle: &str) -> bool {
+	haystack.to_ascii_lowercase().contains(&needle.to_ascii_lowercase())
+}
+
+fn git_head() -> color_eyre::Result<String> {
+	if let Ok(head) = env::var("ELF_BASELINE_ELF_HEAD") {
+		let head = head.trim();
+
+		if !head.is_empty() {
+			return Ok(head.to_string());
+		}
+	}
+
+	let output = Command::new("git").args(["rev-parse", "HEAD"]).output()?;
+
+	if !output.status.success() {
+		return Err(eyre::eyre!("git rev-parse HEAD failed."));
+	}
+
+	Ok(String::from_utf8(output.stdout)?.trim().to_string())
+}
+
+#[tokio::main]
+async fn main() -> color_eyre::Result<()> {
+	color_eyre::install()?;
+
+	let args = Args::parse();
+	let out = args.out.clone();
+	let report = run(args).await?;
+	let raw = serde_json::to_string_pretty(&report)?;
+
+	fs::write(out, raw)?;
+
+	Ok(())
+}
+
+async fn run(args: Args) -> color_eyre::Result<ElfBaselineReport> {
+	let started_at = Instant::now();
+	let base_dsn = env::var("ELF_PG_DSN")
+		.map_err(|_| eyre::eyre!("ELF_PG_DSN must be set for live ELF baseline."))?;
+	let qdrant_url = env::var("ELF_QDRANT_GRPC_URL")
+		.or_else(|_| env::var("ELF_QDRANT_URL"))
+		.map_err(|_| eyre::eyre!("ELF_QDRANT_GRPC_URL or ELF_QDRANT_URL must be set."))?;
+	let test_db = TestDatabase::new(&base_dsn).await?;
+	let collection = test_db.collection_name("elf_live_baseline_notes");
+	let docs_collection = test_db.collection_name("elf_live_baseline_docs");
+	let runtime = BaselineRuntime {
+		config_path: args.config.clone(),
+		dsn: test_db.dsn().to_string(),
+		qdrant_url,
+		collection,
+		docs_collection,
+	};
+	let service = Arc::new(build_service(&runtime).await?);
+	let notes = load_corpus_notes(&args.corpus)?;
+	let note_ids = add_notes(&service, &notes).await?;
+	let initial_worker =
+		run_worker_until_indexed(&runtime, &service, &note_ids, "corpus_upsert").await?;
+	let rebuild = service.rebuild_qdrant().await?;
+	let query_manifest = load_queries(&args.queries)?;
+	let query_results = run_queries(&service, query_manifest.queries).await?;
+	let pass_count = query_results.iter().filter(|result| result.matched).count();
+	let fail_count = query_results.len().saturating_sub(pass_count);
+	let retrieval_status =
+		if fail_count == 0 { "retrieval_pass" } else { "retrieval_wrong_result" };
+	let mut checks = vec![retrieval_check(&query_results), worker_indexing_check(initial_worker)];
+
+	checks.extend(run_lifecycle_checks(&runtime, &service, &notes, &note_ids).await?);
+	checks.push(run_concurrent_write_check(&runtime, Arc::clone(&service)).await?);
+
+	if let Some(soak_check) = run_soak_stability_check(&runtime, Arc::clone(&service)).await? {
+		checks.push(soak_check);
+	}
+
+	checks.push(resource_envelope_check(started_at.elapsed().as_secs_f64()));
+
+	let check_summary = summarize_checks(&checks);
+	let status =
+		if check_summary.fail == 0 && check_summary.incomplete == 0 { "pass" } else { "fail" };
+	let reason = if status == "pass" {
+		"ELF added the corpus, rebuilt Qdrant, and returned expected evidence for every query"
+			.to_string()
+	} else {
+		format!(
+			"ELF failed {} live-baseline check(s) and left {} incomplete check(s)",
+			check_summary.fail, check_summary.incomplete
+		)
+	};
+	let report = ElfBaselineReport {
+		schema: "elf.live_baseline.elf_result/v1",
+		status,
+		retrieval_status,
+		reason,
+		head: git_head().unwrap_or_else(|_| "unknown".to_string()),
+		embedding: embedding_runtime_report(&service.cfg),
+		indexing: IndexingReport {
+			note_count: notes.len(),
+			rebuild_rebuilt_count: rebuild.rebuilt_count,
+			rebuild_missing_vector_count: rebuild.missing_vector_count,
+			rebuild_error_count: rebuild.error_count,
+		},
+		summary: QuerySummary { total: query_results.len(), pass: pass_count, fail: fail_count },
+		check_summary,
+		checks,
+		queries: query_results,
+	};
+
+	drop(service);
+
+	test_db.cleanup().await?;
+
+	Ok(report)
+}
+
+async fn build_service(runtime: &BaselineRuntime) -> color_eyre::Result<ElfService> {
+	let cfg = runtime_config(runtime)?;
+	let embedding_mode = embedding_mode()?;
+	let vector_dim = cfg.storage.qdrant.vector_dim;
+	let db = Db::connect(&cfg.storage.postgres).await?;
+
+	db.ensure_schema(cfg.storage.qdrant.vector_dim).await?;
+
+	let qdrant = QdrantStore::new(&cfg.storage.qdrant)?;
+
+	qdrant.ensure_collection().await?;
+
+	if embedding_mode == EmbeddingMode::Provider {
+		Ok(ElfService::new(cfg, db, qdrant))
+	} else {
+		Ok(ElfService::with_providers(cfg, db, qdrant, deterministic_providers(vector_dim)))
+	}
+}
+
+async fn build_worker_state(runtime: &BaselineRuntime) -> color_eyre::Result<WorkerState> {
+	let cfg = runtime_config(runtime)?;
+	let db = Db::connect(&cfg.storage.postgres).await?;
+
+	db.ensure_schema(cfg.storage.qdrant.vector_dim).await?;
+
+	let qdrant = QdrantStore::new(&cfg.storage.qdrant)?;
+
+	qdrant.ensure_collection().await?;
+
+	let docs_qdrant =
+		QdrantStore::new_with_collection(&cfg.storage.qdrant, &cfg.storage.qdrant.docs_collection)?;
+
+	docs_qdrant.ensure_collection().await?;
+
+	let tokenizer = elf_chunking::load_tokenizer(&cfg.chunking.tokenizer_repo)
+		.map_err(|err| eyre::eyre!("Failed to load tokenizer for live baseline worker: {err}"))?;
+	let chunking = ChunkingConfig {
+		max_tokens: cfg.chunking.max_tokens,
+		overlap_tokens: cfg.chunking.overlap_tokens,
+	};
+
+	Ok(WorkerState {
+		db,
+		qdrant,
+		docs_qdrant,
+		embedding: cfg.providers.embedding,
+		chunking,
+		tokenizer,
+	})
+}
+
+async fn add_notes(service: &ElfService, notes: &[CorpusNote]) -> color_eyre::Result<Vec<Uuid>> {
+	let request = AddNoteRequest {
+		tenant_id: TENANT_ID.to_string(),
+		project_id: PROJECT_ID.to_string(),
+		agent_id: AGENT_ID.to_string(),
+		scope: SCOPE.to_string(),
+		notes: notes
+			.iter()
+			.map(|note| AddNoteInput {
+				r#type: "fact".to_string(),
+				key: Some(note.key.clone()),
+				text: note.text.clone(),
+				structured: None,
+				importance: 0.9,
+				confidence: 0.95,
+				ttl_days: None,
+				source_ref: serde_json::json!({
+					"source": "ELF live baseline corpus",
+					"title": note.title,
+					"document": note.source_doc,
+				}),
+				write_policy: None,
+			})
+			.collect(),
+	};
+	let response = service.add_note(request).await?;
+	let mut ids = Vec::with_capacity(response.results.len());
+
+	for result in response.results {
+		let note_id =
+			result.note_id.ok_or_else(|| eyre::eyre!("ELF add_note did not return a note_id."))?;
+
+		ids.push(note_id);
+	}
+
+	Ok(ids)
+}
+
+async fn run_worker_until_indexed(
+	runtime: &BaselineRuntime,
+	service: &ElfService,
+	note_ids: &[Uuid],
+	label: &str,
+) -> color_eyre::Result<WorkerRunEvidence> {
+	let state = build_worker_state(runtime).await?;
+	let before = outbox_status_counts(service, note_ids).await?;
+	let max_iterations = worker_max_iterations(note_ids.len());
+	let mut iterations = 0_usize;
+
+	while iterations < max_iterations {
+		let after = outbox_status_counts(service, note_ids).await?;
+
+		if outbox_done(&after, note_ids.len()) {
+			let (chunk_rows, chunk_embedding_rows) = chunk_counts(service, note_ids).await?;
+			let failed_jobs = failed_outbox_jobs(service, note_ids).await?;
+
+			return Ok(WorkerRunEvidence {
+				label: label.to_string(),
+				expected_note_count: note_ids.len(),
+				iterations,
+				before,
+				after,
+				chunk_rows,
+				chunk_embedding_rows,
+				failed_jobs,
+			});
+		}
+
+		worker::process_once(&state).await?;
+
+		iterations += 1;
+	}
+
+	let after = outbox_status_counts(service, note_ids).await?;
+	let (chunk_rows, chunk_embedding_rows) = chunk_counts(service, note_ids).await?;
+	let failed_jobs = failed_outbox_jobs(service, note_ids).await?;
+
+	Ok(WorkerRunEvidence {
+		label: label.to_string(),
+		expected_note_count: note_ids.len(),
+		iterations,
+		before,
+		after,
+		chunk_rows,
+		chunk_embedding_rows,
+		failed_jobs,
+	})
+}
+
+async fn outbox_status_counts(
+	service: &ElfService,
+	note_ids: &[Uuid],
+) -> color_eyre::Result<BTreeMap<String, i64>> {
+	if note_ids.is_empty() {
+		return Ok(BTreeMap::new());
+	}
+
+	let rows = sqlx::query_as::<_, (String, i64)>(
+		"\
+SELECT status, COUNT(*)::bigint
+FROM indexing_outbox
+WHERE note_id = ANY($1)
+GROUP BY status
+ORDER BY status",
+	)
+	.bind(note_ids)
+	.fetch_all(&service.db.pool)
+	.await?;
+
+	Ok(rows.into_iter().collect())
+}
+
+async fn chunk_counts(service: &ElfService, note_ids: &[Uuid]) -> color_eyre::Result<(i64, i64)> {
+	if note_ids.is_empty() {
+		return Ok((0, 0));
+	}
+
+	let chunk_rows = sqlx::query_scalar::<_, i64>(
+		"\
+SELECT COUNT(*)::bigint
+FROM memory_note_chunks
+WHERE note_id = ANY($1)",
+	)
+	.bind(note_ids)
+	.fetch_one(&service.db.pool)
+	.await?;
+	let chunk_embedding_rows = sqlx::query_scalar::<_, i64>(
+		"\
+SELECT COUNT(*)::bigint
+FROM memory_note_chunks c
+JOIN note_chunk_embeddings e ON e.chunk_id = c.chunk_id
+WHERE c.note_id = ANY($1)",
+	)
+	.bind(note_ids)
+	.fetch_one(&service.db.pool)
+	.await?;
+
+	Ok((chunk_rows, chunk_embedding_rows))
+}
+
+async fn failed_outbox_jobs(
+	service: &ElfService,
+	note_ids: &[Uuid],
+) -> color_eyre::Result<Vec<FailedOutboxJob>> {
+	if note_ids.is_empty() {
+		return Ok(Vec::new());
+	}
+
+	let rows = sqlx::query_as::<_, (Uuid, Option<String>, String, i32, Option<String>)>(
+		"\
+SELECT o.note_id, n.key, o.op, o.attempts, o.last_error
+FROM indexing_outbox o
+LEFT JOIN memory_notes n ON n.note_id = o.note_id
+WHERE o.note_id = ANY($1)
+	AND o.status = 'FAILED'
+ORDER BY n.key NULLS LAST, o.note_id",
+	)
+	.bind(note_ids)
+	.fetch_all(&service.db.pool)
+	.await?;
+
+	Ok(rows
+		.into_iter()
+		.map(|(note_id, note_key, op, attempts, last_error)| FailedOutboxJob {
+			note_id,
+			note_key,
+			op,
+			attempts,
+			last_error,
+		})
+		.collect())
+}
+
+async fn run_queries(
+	service: &ElfService,
+	queries: Vec<QueryCase>,
+) -> color_eyre::Result<Vec<QueryResult>> {
+	let mut out = Vec::with_capacity(queries.len());
+
+	for case in queries {
+		out.push(run_single_query(service, case).await?);
+	}
+
+	Ok(out)
+}
+
+async fn run_single_query(
+	service: &ElfService,
+	case: QueryCase,
+) -> color_eyre::Result<QueryResult> {
+	let top_k = env::var("ELF_BASELINE_TOP_K")
+		.ok()
+		.and_then(|value| value.parse::<u32>().ok())
+		.unwrap_or(10);
+	let response = service
+		.search_raw(SearchRequest {
+			tenant_id: TENANT_ID.to_string(),
+			project_id: PROJECT_ID.to_string(),
+			agent_id: AGENT_ID.to_string(),
+			token_id: None,
+			payload_level: PayloadLevel::default(),
+			read_profile: "private_only".to_string(),
+			query: case.query.clone(),
+			top_k: Some(top_k),
+			candidate_k: Some(top_k.max(20).saturating_mul(4)),
+			filter: None,
+			record_hits: Some(false),
+			ranking: None,
+		})
+		.await?;
+	let top = response.items.first();
+	let top_text = top.map(|item| item.snippet.clone()).unwrap_or_default();
+	let matched_terms = case
+		.expected_terms
+		.iter()
+		.filter(|term| contains_case_insensitive(&top_text, term))
+		.cloned()
+		.collect::<Vec<_>>();
+	let top_key = top.and_then(|item| item.key.clone());
+	let expected_key = key_for_doc(&case.expected_doc);
+	let matched = matched_terms.len() == case.expected_terms.len()
+		|| top_key.as_deref().is_some_and(|key| key == expected_key);
+
+	Ok(QueryResult {
+		id: case.id,
+		query: case.query,
+		expected_doc: case.expected_doc,
+		expected_terms: case.expected_terms,
+		matched,
+		matched_terms,
+		top_note_key: top_key,
+		top_snippet: top.map(|item| item.snippet.clone()),
+		returned_count: response.items.len(),
+	})
+}
+
+async fn run_lifecycle_checks(
+	runtime: &BaselineRuntime,
+	service: &ElfService,
+	notes: &[CorpusNote],
+	note_ids: &[Uuid],
+) -> color_eyre::Result<Vec<CheckResult>> {
+	let Some(update_note) = notes.first() else {
+		return Ok(vec![incomplete_check(
+			"update_replaces_note_text",
+			"Corpus has no note to update.",
+		)]);
+	};
+	let Some(update_note_id) = note_ids.first().copied() else {
+		return Ok(vec![incomplete_check(
+			"update_replaces_note_text",
+			"ELF add_note returned no note_id for lifecycle update.",
+		)]);
+	};
+	let Some(delete_note) = notes.get(1) else {
+		return Ok(vec![incomplete_check(
+			"delete_suppresses_retrieval",
+			"Corpus has no note to delete.",
+		)]);
+	};
+	let Some(delete_note_id) = note_ids.get(1).copied() else {
+		return Ok(vec![incomplete_check(
+			"delete_suppresses_retrieval",
+			"ELF add_note returned no note_id for lifecycle delete.",
+		)]);
+	};
+	let Some(recovery_note) = notes.get(2) else {
+		return Ok(vec![incomplete_check(
+			"cold_start_recovery_search",
+			"Corpus has no stable note for recovery search.",
+		)]);
+	};
+
+	Ok(vec![
+		run_update_replacement_check(runtime, service, update_note, update_note_id).await?,
+		run_delete_suppression_check(runtime, service, delete_note, delete_note_id).await?,
+		run_cold_start_recovery_check(runtime, service, recovery_note).await?,
+	])
+}
+
+async fn run_update_replacement_check(
+	runtime: &BaselineRuntime,
+	service: &ElfService,
+	update_note: &CorpusNote,
+	update_note_id: Uuid,
+) -> color_eyre::Result<CheckResult> {
+	let update_text = "\
+	Rotated auth middleware validates JWT tokens with key id `kid-v4` under \
+	`RotatedJwtKeyPlan`. It still requires tenant scope `project_shared` for deployment \
+	operations after the emergency key rotation."
+		.to_string();
+	let update_response = service
+		.update(UpdateRequest {
+			tenant_id: TENANT_ID.to_string(),
+			project_id: PROJECT_ID.to_string(),
+			agent_id: AGENT_ID.to_string(),
+			note_id: update_note_id,
+			text: Some(update_text.clone()),
+			importance: None,
+			confidence: None,
+			ttl_days: None,
+		})
+		.await?;
+	let update_worker =
+		run_worker_until_indexed(runtime, service, &[update_note_id], "lifecycle_update").await?;
+	let update_query = run_single_query(
+		service,
+		QueryCase {
+			id: "lifecycle-update-new-marker".to_string(),
+			query: "Which rotated JWT key id does the auth middleware require?".to_string(),
+			expected_doc: update_note.source_doc.clone(),
+			expected_terms: vec!["kid-v4".to_string(), "RotatedJwtKeyPlan".to_string()],
+		},
+	)
+	.await?;
+	let old_marker_absent = update_query
+		.top_snippet
+		.as_deref()
+		.is_some_and(|snippet| !contains_case_insensitive(snippet, "kid-v3"));
+	let update_pass = update_query.matched
+		&& old_marker_absent
+		&& outbox_done(&update_worker.after, update_worker.expected_note_count);
+
+	Ok(CheckResult {
+		name: "update_replaces_note_text",
+		status: if update_pass { "pass" } else { "fail" },
+		reason: if update_pass {
+			"Service update plus worker indexing returned the new marker and removed the old marker from the top snippet.".to_string()
+		} else {
+			"Service update plus worker indexing did not produce a clean search result for the replacement marker.".to_string()
+		},
+		evidence: serde_json::json!({
+			"note_id": update_note_id,
+			"op": update_response.op,
+			"worker": update_worker,
+			"query": update_query,
+			"old_marker_absent": old_marker_absent,
+		}),
+	})
+}
+
+async fn run_delete_suppression_check(
+	runtime: &BaselineRuntime,
+	service: &ElfService,
+	delete_note: &CorpusNote,
+	delete_note_id: Uuid,
+) -> color_eyre::Result<CheckResult> {
+	let delete_response = service
+		.delete(DeleteRequest {
+			tenant_id: TENANT_ID.to_string(),
+			project_id: PROJECT_ID.to_string(),
+			agent_id: AGENT_ID.to_string(),
+			note_id: delete_note_id,
+		})
+		.await?;
+	let delete_worker =
+		run_worker_until_indexed(runtime, service, &[delete_note_id], "lifecycle_delete").await?;
+	let delete_query = run_single_query(
+		service,
+		QueryCase {
+			id: "lifecycle-delete-suppresses-note".to_string(),
+			query: delete_note.text.clone(),
+			expected_doc: delete_note.source_doc.clone(),
+			expected_terms: distinctive_terms(&delete_note.text, 2),
+		},
+	)
+	.await?;
+	let delete_pass = !delete_query.matched
+		&& outbox_done(&delete_worker.after, delete_worker.expected_note_count);
+
+	Ok(CheckResult {
+		name: "delete_suppresses_retrieval",
+		status: if delete_pass { "pass" } else { "fail" },
+		reason: if delete_pass {
+			"Service delete suppressed the deleted note from subsequent search results.".to_string()
+		} else {
+			"Deleted note was still retrievable after service delete and worker indexing."
+				.to_string()
+		},
+		evidence: serde_json::json!({
+			"note_id": delete_note_id,
+			"op": delete_response.op,
+			"worker": delete_worker,
+			"query": delete_query,
+		}),
+	})
+}
+
+async fn run_cold_start_recovery_check(
+	runtime: &BaselineRuntime,
+	service: &ElfService,
+	recovery_note: &CorpusNote,
+) -> color_eyre::Result<CheckResult> {
+	let recovery_service = build_service(runtime).await?;
+	let recovery_query = run_single_query(
+		&recovery_service,
+		QueryCase {
+			id: "lifecycle-cold-start-recovery".to_string(),
+			query: recovery_note.text.clone(),
+			expected_doc: recovery_note.source_doc.clone(),
+			expected_terms: distinctive_terms(&recovery_note.text, 2),
+		},
+	)
+	.await?;
+	let outbox_counts = pending_outbox_counts(service).await?;
+
+	Ok(CheckResult {
+		name: "cold_start_recovery_search",
+		status: if recovery_query.matched { "pass" } else { "fail" },
+		reason: if recovery_query.matched {
+			"A newly constructed service over the same Postgres and Qdrant stores retrieved persisted evidence.".to_string()
+		} else {
+			"A newly constructed service over the same stores could not retrieve persisted evidence.".to_string()
+		},
+		evidence: serde_json::json!({
+			"query": recovery_query,
+			"pending_outbox_by_op": outbox_counts,
+			"note": recovery_note.source_doc,
+		}),
+	})
+}
+
+async fn pending_outbox_counts(service: &ElfService) -> color_eyre::Result<BTreeMap<String, i64>> {
+	let rows = sqlx::query_as::<_, (String, i64)>(
+		"\
+SELECT op, COUNT(*)::bigint
+FROM indexing_outbox
+WHERE status = 'PENDING'
+GROUP BY op
+ORDER BY op",
+	)
+	.fetch_all(&service.db.pool)
+	.await?;
+
+	Ok(rows.into_iter().collect())
+}
+
+async fn run_concurrent_write_check(
+	runtime: &BaselineRuntime,
+	service: Arc<ElfService>,
+) -> color_eyre::Result<CheckResult> {
+	let note_count = concurrent_note_count();
+	let mut set = JoinSet::new();
+
+	for index in 0..note_count {
+		let request = concurrent_add_request(index);
+		let service_ref = Arc::clone(&service);
+
+		set.spawn(async move {
+			let response = service_ref.add_note(request).await?;
+			let note_id = response
+				.results
+				.first()
+				.and_then(|result| result.note_id)
+				.ok_or_else(|| eyre::eyre!("Concurrent add_note did not return a note_id."))?;
+
+			Ok::<Uuid, Report>(note_id)
+		});
+	}
+
+	let mut note_ids = Vec::with_capacity(note_count);
+
+	while let Some(joined) = set.join_next().await {
+		note_ids.push(joined??);
+	}
+
+	let worker_evidence =
+		run_worker_until_indexed(runtime, &service, &note_ids, "concurrent_upsert").await?;
+	let probe_indexes = concurrency_probe_indexes(note_count);
+	let mut query_results = Vec::new();
+
+	for index in probe_indexes {
+		query_results.push(run_single_query(&service, concurrent_query_case(index)).await?);
+	}
+
+	let pass_count = query_results.iter().filter(|result| result.matched).count();
+	let pass = outbox_done(&worker_evidence.after, worker_evidence.expected_note_count)
+		&& pass_count == query_results.len();
+
+	Ok(CheckResult {
+		name: "concurrent_write_search_e2e",
+		status: if pass { "pass" } else { "fail" },
+		reason: if pass {
+			"Concurrent add_note calls were indexed by the worker and remained searchable."
+				.to_string()
+		} else {
+			"Concurrent add_note calls did not all become searchable after worker indexing."
+				.to_string()
+		},
+		evidence: serde_json::json!({
+			"note_count": note_count,
+			"worker": worker_evidence,
+			"query_summary": {
+				"total": query_results.len(),
+				"pass": pass_count,
+				"fail": query_results.len().saturating_sub(pass_count),
+			},
+			"queries": query_results,
+		}),
+	})
+}
+
+async fn run_soak_stability_check(
+	runtime: &BaselineRuntime,
+	service: Arc<ElfService>,
+) -> color_eyre::Result<Option<CheckResult>> {
+	let config = soak_config();
+
+	if config.target_seconds == 0 && config.write_rounds == 0 {
+		return Ok(None);
+	}
+
+	let target_duration = Duration::from_secs(config.target_seconds);
+	let started_at = Instant::now();
+	let write_rounds = config.write_rounds.max(if config.target_seconds > 0 { 1 } else { 0 });
+	let mut note_ids = Vec::with_capacity(write_rounds);
+	let mut worker_runs = Vec::with_capacity(write_rounds);
+	let mut query_results = Vec::new();
+
+	for index in 0..write_rounds {
+		let response = service.add_note(soak_add_request(index)).await?;
+		let note_id = response
+			.results
+			.first()
+			.and_then(|result| result.note_id)
+			.ok_or_else(|| eyre::eyre!("Soak add_note did not return a note_id."))?;
+
+		note_ids.push(note_id);
+		worker_runs
+			.push(run_worker_until_indexed(runtime, &service, &[note_id], "soak_upsert").await?);
+		query_results.push(run_single_query(&service, soak_query_case(index)).await?);
+
+		if config.target_seconds > 0 && write_rounds > 1 {
+			let target_elapsed = target_duration.mul_f64((index + 1) as f64 / write_rounds as f64);
+
+			if started_at.elapsed() < target_elapsed {
+				time::sleep(target_elapsed.saturating_sub(started_at.elapsed())).await;
+			}
+		}
+	}
+
+	let mut probe_index = 0;
+
+	while started_at.elapsed() < target_duration {
+		let index = probe_index % write_rounds;
+
+		query_results.push(run_single_query(&service, soak_query_case(index)).await?);
+
+		probe_index += 1;
+
+		let sleep_for = Duration::from_millis(config.probe_interval_millis)
+			.min(target_duration.saturating_sub(started_at.elapsed()));
+
+		if !sleep_for.is_zero() {
+			time::sleep(sleep_for).await;
+		}
+	}
+
+	let elapsed_seconds = started_at.elapsed().as_secs_f64();
+	let pass_count = query_results.iter().filter(|result| result.matched).count();
+	let query_fail_count = query_results.len().saturating_sub(pass_count);
+	let worker_pass =
+		worker_runs.iter().all(|run| outbox_done(&run.after, run.expected_note_count));
+	let duration_pass = target_duration.is_zero() || started_at.elapsed() >= target_duration;
+	let pass = worker_pass && duration_pass && query_fail_count == 0;
+	let failed_queries = query_results.iter().filter(|result| !result.matched).collect::<Vec<_>>();
+
+	Ok(Some(CheckResult {
+		name: "soak_stability_e2e",
+		status: if pass { "pass" } else { "fail" },
+		reason: if pass {
+			"ELF sustained repeated write, worker indexing, and search probes for the configured soak window.".to_string()
+		} else {
+			"ELF did not sustain the configured soak write/search window without a failed worker or retrieval probe.".to_string()
+		},
+		evidence: serde_json::json!({
+			"config": config,
+			"elapsed_seconds": elapsed_seconds,
+			"duration_met": duration_pass,
+			"worker_pass": worker_pass,
+			"write_note_ids": note_ids,
+			"worker_runs": worker_runs,
+			"query_summary": {
+				"total": query_results.len(),
+				"pass": pass_count,
+				"fail": query_fail_count,
+			},
+			"failed_queries": failed_queries,
+		}),
+	}))
+}
diff --git a/apps/elf-worker/src/worker.rs b/apps/elf-worker/src/worker.rs
index 27f3a1ab..823094a5 100644
--- a/apps/elf-worker/src/worker.rs
+++ b/apps/elf-worker/src/worker.rs
@@ -253,6 +253,15 @@ pub async fn run_worker(state: WorkerState) -> Result<()> {
 	}
 }
 
+/// Processes at most one due job from each worker-owned queue.
+pub async fn process_once(state: &WorkerState) -> Result<()> {
+	process_indexing_outbox_once(state).await?;
+	process_doc_indexing_outbox_once(state).await?;
+	process_trace_outbox_once(state).await?;
+
+	Ok(())
+}
+
 fn is_not_found_error(err: &QdrantError) -> bool {
 	let message = err.to_string().to_lowercase();
 	let point_not_found =
diff --git a/docker-compose.baseline.yml b/docker-compose.baseline.yml
new file mode 100644
index 00000000..ac7e9762
--- /dev/null
+++ b/docker-compose.baseline.yml
@@ -0,0 +1,97 @@
+name: elf-live-baseline
+
+services:
+  postgres:
+    image: pgvector/pgvector:pg18
+    environment:
+      POSTGRES_DB: postgres
+      POSTGRES_PASSWORD: elf_dev_password
+      POSTGRES_USER: elf_dev
+    healthcheck:
+      test:
+        - CMD-SHELL
+        - pg_isready -U elf_dev -d postgres
+      interval: 2s
+      timeout: 5s
+      retries: 30
+    volumes:
+      - elf-live-baseline-postgres-data:/var/lib/postgresql
+
+  qdrant:
+    image: qdrant/qdrant:v1.16.3
+    volumes:
+      - elf-live-baseline-qdrant-data:/qdrant/storage
+
+  baseline-runner:
+    build:
+      context: .
+      dockerfile: docker/baseline/Dockerfile
+    depends_on:
+      postgres:
+        condition: service_healthy
+      qdrant:
+        condition: service_started
+    environment:
+      CARGO_HOME: /usr/local/cargo
+      ELF_BASELINE_ELF_HEAD: ${ELF_BASELINE_ELF_HEAD:-unknown}
+      DASHSCOPE_API_BASE: ${DASHSCOPE_API_BASE:-}
+      DASHSCOPE_API_KEY: ${DASHSCOPE_API_KEY:-}
+      DASHSCOPE_EMBEDDING_DIMENSIONS: ${DASHSCOPE_EMBEDDING_DIMENSIONS:-}
+      EMBEDDING_API_BASE: ${EMBEDDING_API_BASE:-}
+      EMBEDDING_API_KEY: ${EMBEDDING_API_KEY:-}
+      EMBEDDING_DIMENSIONS: ${EMBEDDING_DIMENSIONS:-}
+      EMBEDDING_MODEL: ${EMBEDDING_MODEL:-}
+      EMBEDDING_PATH: ${EMBEDDING_PATH:-}
+      EMBEDDING_PROVIDER_ID: ${EMBEDDING_PROVIDER_ID:-}
+      EMBEDDING_TIMEOUT_MS: ${EMBEDDING_TIMEOUT_MS:-}
+      ELF_BASELINE_CONCURRENT_NOTES: ${ELF_BASELINE_CONCURRENT_NOTES:-}
+      ELF_BASELINE_ELF_EMBEDDING_API_BASE: ${ELF_BASELINE_ELF_EMBEDDING_API_BASE:-}
+      ELF_BASELINE_ELF_EMBEDDING_API_KEY: ${ELF_BASELINE_ELF_EMBEDDING_API_KEY:-}
+      ELF_BASELINE_ELF_EMBEDDING_DIMENSIONS: ${ELF_BASELINE_ELF_EMBEDDING_DIMENSIONS:-}
+      ELF_BASELINE_ELF_EMBEDDING_MODE: ${ELF_BASELINE_ELF_EMBEDDING_MODE:-local}
+      ELF_BASELINE_ELF_EMBEDDING_MODEL: ${ELF_BASELINE_ELF_EMBEDDING_MODEL:-}
+      ELF_BASELINE_ELF_EMBEDDING_PATH: ${ELF_BASELINE_ELF_EMBEDDING_PATH:-}
+      ELF_BASELINE_ELF_EMBEDDING_PROVIDER_ID: ${ELF_BASELINE_ELF_EMBEDDING_PROVIDER_ID:-}
+      ELF_BASELINE_ELF_EMBEDDING_TIMEOUT_MS: ${ELF_BASELINE_ELF_EMBEDDING_TIMEOUT_MS:-}
+      ELF_BASELINE_MAX_ELF_RSS_KB: ${ELF_BASELINE_MAX_ELF_RSS_KB:-1500000}
+      ELF_BASELINE_MAX_ELF_SECONDS: ${ELF_BASELINE_MAX_ELF_SECONDS:-600}
+      ELF_BASELINE_PROFILE: ${ELF_BASELINE_PROFILE:-smoke}
+      ELF_BASELINE_PROJECTS: ${ELF_BASELINE_PROJECTS:-all}
+      ELF_BASELINE_REPORT_DIR: /workspace/tmp/live-baseline
+      ELF_BASELINE_SCALE_DOCS: ${ELF_BASELINE_SCALE_DOCS:-120}
+      ELF_BASELINE_SOAK_PROBE_INTERVAL_MS: ${ELF_BASELINE_SOAK_PROBE_INTERVAL_MS:-}
+      ELF_BASELINE_SOAK_ROUNDS: ${ELF_BASELINE_SOAK_ROUNDS:-}
+      ELF_BASELINE_SOAK_SECONDS: ${ELF_BASELINE_SOAK_SECONDS:-}
+      ELF_BASELINE_STRESS_DOCS: ${ELF_BASELINE_STRESS_DOCS:-480}
+      ELF_BASELINE_TOP_K: ${ELF_BASELINE_TOP_K:-10}
+      QWEN_API_KEY: ${QWEN_API_KEY:-}
+      QWEN_EMBEDDING_API_BASE: ${QWEN_EMBEDDING_API_BASE:-}
+      QWEN_EMBEDDING_DIMENSIONS: ${QWEN_EMBEDDING_DIMENSIONS:-}
+      QWEN_EMBEDDING_MODEL: ${QWEN_EMBEDDING_MODEL:-}
+      QWEN_EMBEDDING_PATH: ${QWEN_EMBEDDING_PATH:-}
+      QWEN_EMBEDDING_PROVIDER_ID: ${QWEN_EMBEDDING_PROVIDER_ID:-}
+      QWEN_EMBEDDING_TIMEOUT_MS: ${QWEN_EMBEDDING_TIMEOUT_MS:-}
+      ELF_PG_DSN: postgres://elf_dev:elf_dev_password@postgres:5432/postgres
+      ELF_QDRANT_GRPC_URL: http://qdrant:6334
+      ELF_QDRANT_HTTP_URL: http://qdrant:6333
+      RUSTUP_HOME: /usr/local/rustup
+    volumes:
+      - elf-live-baseline-npm-cache:/root/.npm
+      - elf-live-baseline-pip-cache:/root/.cache/pip
+      - elf-live-baseline-huggingface-cache:/root/.cache/huggingface
+      - elf-live-baseline-qmd-cache:/root/.cache/qmd
+      - elf-live-baseline-cargo-git:/usr/local/cargo/git
+      - elf-live-baseline-cargo-registry:/usr/local/cargo/registry
+      - elf-live-baseline-target:/workspace/target
+      - ./tmp/live-baseline:/workspace/tmp/live-baseline
+
+volumes:
+  elf-live-baseline-cargo-git:
+  elf-live-baseline-cargo-registry:
+  elf-live-baseline-huggingface-cache:
+  elf-live-baseline-npm-cache:
+  elf-live-baseline-pip-cache:
+  elf-live-baseline-postgres-data:
+  elf-live-baseline-qmd-cache:
+  elf-live-baseline-qdrant-data:
+  elf-live-baseline-target:
diff --git a/docker/baseline/Dockerfile b/docker/baseline/Dockerfile
new file mode 100644
index 00000000..1384eb15
--- /dev/null
+++ b/docker/baseline/Dockerfile
@@ -0,0 +1,37 @@
+FROM node:22-bookworm
+
+RUN apt-get update \
+  && apt-get install -y --no-install-recommends \
+    bash \
+    build-essential \
+    ca-certificates \
+    clang \
+    cmake \
+    curl \
+    git \
+    jq \
+    libssl-dev \
+    pkg-config \
+    python3 \
+    python3-dev \
+    python3-pip \
+    python3-venv \
+    ripgrep \
+    sqlite3 \
+  && rm -rf /var/lib/apt/lists/*
+
+ENV CARGO_HOME=/usr/local/cargo
+ENV RUSTUP_HOME=/usr/local/rustup
+ENV PATH=/usr/local/cargo/bin:$PATH
+
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \
+  | sh -s -- -y --profile minimal --default-toolchain stable \
+  && chmod -R a+w "${CARGO_HOME}" "${RUSTUP_HOME}"
+
+RUN npm install -g bun pnpm tsx
+
+WORKDIR /workspace
+
+COPY . /workspace
+
+CMD ["bash", "scripts/live-baseline-benchmark.sh"]
diff --git a/docs/guide/benchmarking/2026-06-09-live-baseline-report.md b/docs/guide/benchmarking/2026-06-09-live-baseline-report.md
new file mode 100644
index 00000000..bbfb55ae
--- /dev/null
+++ b/docs/guide/benchmarking/2026-06-09-live-baseline-report.md
@@ -0,0 +1,203 @@
+# Live Baseline Benchmark Report - 2026-06-09
+
+Goal: Preserve the checked-in evidence snapshot behind the README benchmark claims.
+Read this when: You need the June 9, 2026 live baseline result, pass/fail reasons, or
+the next benchmark iteration backlog.
+Inputs: Docker-only benchmark reports generated by `cargo make baseline-live-docker`.
+Depends on: `docs/guide/benchmarking/live_baseline_benchmark.md`,
+`docker-compose.baseline.yml`, `scripts/live-baseline-benchmark.sh`, and
+`scripts/live-baseline-report-to-md.sh`.
+Verification: Re-run the commands in this report and compare
+`tmp/live-baseline/live-baseline-report.json`.
+
+## Executive Summary
+
+- ELF passed the production-provider stress run with `Qwen3-Embedding-8B`,
+  4096-dimensional embeddings, 480 documents, 16 queries, and `8/8` encoded checks.
+- In the all-project smoke comparison, ELF and qmd passed every encoded check.
+  agentmemory passed same-corpus retrieval but failed or could not complete lifecycle
+  checks. mem0, memsearch, and claude-mem returned wrong same-corpus retrieval results
+  in the encoded smoke. OpenViking was incomplete because its local embedding dependency
+  could not complete in the Docker runner.
+- Under the encoded service-style benchmark checks, ELF passed all ELF checks that were
+  run. Under the encoded local CLI smoke checks, qmd passed all qmd checks that were
+  run.
+- This report records results for the checked-in Docker benchmark contract. It does not
+  evaluate dimensions that are not encoded in the runner.
+
+## ELF Production-Provider Stress Run
+
+| Field | Value |
+| --- | --- |
+| Run ID | `live-baseline-20260609010854` |
+| Generated at | `2026-06-09T01:28:17Z` |
+| Project filter | `ELF` |
+| Corpus profile | `stress` |
+| Documents | `480` |
+| Queries | `16` |
+| Verdict | `pass` |
+| Same-corpus summary | `1/1 pass` |
+| Full check summary | `8/8 pass` |
+| Elapsed | `1163` seconds |
+| Embedding mode | `provider` |
+| Embedding model | `Qwen3-Embedding-8B` |
+| Embedding dimensions | `4096` |
+| Embedding API path | `https://ai.gitee.com/v1/embeddings` |
+| Timeout | `30000` ms |
+
+Encoded checks covered:
+
+- same-corpus retrieval for all 16 stress queries;
+- worker indexing for the 480-document corpus;
+- update replacement;
+- delete suppression;
+- cold-start recovery over the same stores;
+- concurrent write/search behavior;
+- stress-profile soak behavior;
+- resource envelope under the configured stress threshold.
+
+Re-run command:
+
+```sh
+set -a
+source .env
+set +a
+
+EMBEDDING_MODEL=Qwen3-Embedding-8B \
+EMBEDDING_DIMENSIONS=4096 \
+ELF_BASELINE_PROJECTS=ELF \
+ELF_BASELINE_PROFILE=stress \
+ELF_BASELINE_MAX_ELF_SECONDS=1800 \
+ELF_BASELINE_ELF_EMBEDDING_MODE=provider \
+cargo make baseline-live-docker
+```
+
+## All-Project Smoke Comparison
+
+| Field | Value |
+| --- | --- |
+| Run ID | `live-baseline-20260609022837` |
+| Generated at | `2026-06-09T02:42:37Z` |
+| Project filter | `all` |
+| Corpus profile | `smoke` |
+| Documents | `3` |
+| Queries | `3` |
+| Aggregate verdict | `fail` |
+| Project summary | `2 pass`, `4 fail`, `1 incomplete` |
+| Same-corpus summary | `3 pass`, `3 fail`, `1 incomplete` |
+| Full check summary | `17 pass`, `4 fail`, `4 incomplete` |
+
+The aggregate verdict is `fail` because the top-level report only passes when every
+selected project passes every encoded project check.
+
+| Project | Status | Retrieval | Checks | Elapsed | Interpretation |
+| --- | --- | --- | --- | --- | --- |
+| ELF | `pass` | `retrieval_pass` | `7/7` | `57s` | Service-backed provider run passed retrieval, worker indexing, lifecycle, recovery, and concurrency checks. |
+| qmd | `pass` | `retrieval_pass` | `4/4` | `53s` | Local CLI hybrid retrieval baseline passed retrieval, update, delete, and cold-start checks. |
+| agentmemory | `fail` | `retrieval_pass` | `2/4` | `38s` | Retrieval passed, but update replacement failed because the old marker remained searchable; cold-start is incomplete in the current in-memory adapter. |
+| memsearch | `fail` | `retrieval_wrong_result` | `2/4` | `169s` | Local search ran, update and cold-start passed, but same-corpus retrieval missed expected evidence. |
+| mem0 | `fail` | `retrieval_wrong_result` | `2/4` | `41s` | Local add/search ran, update and cold-start passed, but same-corpus retrieval missed expected evidence. |
+| OpenViking | `incomplete` | `local_embed_install_failed` | `0/1` | `385s` | The local embed install path hit a `llama-cpp-python` build/import failure in Docker, so retrieval was not evaluated. |
+| claude-mem | `fail` | `retrieval_wrong_result` | `0/1` | `97s` | Same-corpus repository search ran but did not return expected evidence. |
+
+Re-run command:
+
+```sh
+set -a
+source .env
+set +a
+
+EMBEDDING_MODEL=Qwen3-Embedding-8B \
+EMBEDDING_DIMENSIONS=4096 \
+ELF_BASELINE_PROFILE=smoke \
+ELF_BASELINE_ELF_EMBEDDING_MODE=provider \
+cargo make baseline-live-docker
+```
+
+## Pass, Fail, And Incomplete Rules
+
+- `pass`: the project installed and every encoded retrieval, lifecycle, recovery, and
+  resource check for the selected corpus profile passed.
+- `fail`: clone, install, import, build, retrieval, update, delete, recovery,
+  concurrency, soak, resource-envelope, or another declared project check failed.
+- `incomplete`: the project partially ran, but the encoded check could not be completed
+  without extra provider keys, host integration, native dependency support, durable
+  runtime wiring, or a project-specific command mapping not yet encoded in the runner.
+
+`incomplete` is not a pass. It means the benchmark needs more wiring before making a
+quality claim for that project.
+
+## Interpretation
+
+The benchmark is intentionally stricter than a feature checklist. It exercises whether a
+project can ingest the same corpus, return expected evidence for the same queries, and
+preserve basic lifecycle behavior under the runner's encoded contract.
+
+ELF checks covered in this run:
+
+- production-provider embeddings through the same service path used by ELF;
+- Postgres source-of-truth with Qdrant as a rebuildable derived index;
+- worker-produced chunks and embeddings, not direct in-memory fixture shortcuts;
+- explicit update, delete, cold-start, concurrency, soak, and resource checks;
+- report metadata that records corpus profile, document count, query count, project
+  status, check summaries, elapsed seconds, and embedding configuration.
+
+qmd was the external project that passed every encoded smoke check. agentmemory passed
+same-corpus retrieval, failed update replacement, and has incomplete cold-start coverage
+because the current adapter uses an in-memory SDK/KV mock. mem0, memsearch, and
+claude-mem failed the encoded smoke retrieval. OpenViking was not retrieval-evaluated
+because the Docker local embedding install path did not complete.
+
+## Speed And Production Stance
+
+The 480-document ELF stress run took 1163 seconds, roughly 19.4 minutes, or about 2.4
+seconds per document end-to-end. That includes the service path, provider embedding
+calls, worker indexing, Qdrant rebuild/search, lifecycle checks, soak, and container
+overhead. Whether that is acceptable depends on the production workflow: it is a
+cold/backfill measurement, not an interactive-ingest target.
+
+Throughput work should focus on:
+
+- micro-batching provider embedding requests;
+- multiple outbox worker lanes with leases or `FOR UPDATE SKIP LOCKED`;
+- batch Qdrant upserts;
+- a bulk import mode that defers or relaxes semantic deduplication;
+- vector handoff so an ingest-time embedding can be reused by the worker.
+
+## Next Benchmark Iterations
+
+- Add a sanitized private corpus that reflects real coding-agent memory cases.
+- Add scale/stress matrix runs for qmd and the other external projects once their smoke
+  adapters are stable.
+- Split elapsed time into install, ingest, embedding, indexing, query, and lifecycle
+  phases.
+- Add recall@k, MRR, and false-positive measurements instead of only pass/fail expected
+  evidence checks.
+- Add a batch-loading benchmark for ELF after provider micro-batching and parallel
+  worker lanes land.
+- Deepen external lifecycle checks for OpenViking and claude-mem after their local
+  runtime paths can complete in Docker.
+
+## Publish Workflow
+
+Generate a fresh aggregate JSON:
+
+```sh
+cargo make baseline-live-docker
+```
+
+Convert the latest JSON report into Markdown:
+
+```sh
+ELF_BASELINE_MARKDOWN_REPORT=docs/guide/benchmarking/YYYY-MM-DD-live-baseline-report.md \
+cargo make baseline-live-report
+```
+
+Clean Docker-owned state:
+
+```sh
+cargo make baseline-live-docker-clean
+```
+
+The only host report directory is `tmp/live-baseline/`. Raw generated JSON stays there
+and is not committed by default.
diff --git a/docs/guide/benchmarking/index.md b/docs/guide/benchmarking/index.md
new file mode 100644
index 00000000..4493e306
--- /dev/null
+++ b/docs/guide/benchmarking/index.md
@@ -0,0 +1,34 @@
+# Benchmarking Guide Index
+
+Goal: Route agents to live benchmark runbooks, report publication steps, and checked-in
+benchmark evidence.
+Read this when: You need to run, publish, interpret, or extend ELF benchmark evidence
+against external memory systems.
+Inputs: The benchmark question, selected corpus profile, and whether you need a runbook
+or a saved evidence snapshot.
+Depends on: `docs/index.md`, `docs/guide/index.md`, and `docs/governance.md`.
+Outputs: The smallest benchmarking guide or report needed to continue.
+
+## Use This Index When
+
+- You need to run the live Docker-only benchmark matrix.
+- You need to publish a Markdown report from a generated benchmark JSON report.
+- You need the checked-in benchmark evidence behind README claims.
+- You need to extend the benchmark matrix with new projects, profiles, or lifecycle
+  checks.
+
+## Guides And Reports
+
+- `live_baseline_benchmark.md`: run, clean up, publish, and interpret the live
+  Docker-only benchmark matrix.
+- `2026-06-09-live-baseline-report.md`: checked-in evidence snapshot for the June 9,
+  2026 ELF production-provider stress run and all-project smoke comparison.
+
+## Update Rules
+
+- Add a dated report when a new run changes README-level claims.
+- Keep generated raw JSON under `tmp/live-baseline/`; commit only reviewed Markdown
+  summaries and durable scripts.
+- Link the newest decision-relevant report from README and this index.
+- When benchmark semantics change, update `live_baseline_benchmark.md` and the
+  relevant spec before publishing a new result.
diff --git a/docs/guide/benchmarking/live_baseline_benchmark.md b/docs/guide/benchmarking/live_baseline_benchmark.md
new file mode 100644
index 00000000..b61b1e2b
--- /dev/null
+++ b/docs/guide/benchmarking/live_baseline_benchmark.md
@@ -0,0 +1,217 @@
+# Live Baseline Benchmark
+
+Goal: Run Docker-isolated, current-HEAD baseline checks against ELF and the external memory projects compared with ELF.
+Read this when: You need evidence about which external projects actually run against a shared benchmark corpus.
+Preconditions: Docker and Docker Compose are available on the host.
+Depends on: `docker-compose.baseline.yml`, `scripts/live-baseline-benchmark.sh`, and `docs/spec/system_competitive_parity_gate_v1.md`.
+Verification: `cargo make baseline-live-docker` writes `tmp/live-baseline/live-baseline-report.json`; `cargo make baseline-live-report` can render that JSON into a checked-in Markdown report.
+
+## Scope
+
+The runner covers ELF plus the six external projects in the README comparison table:
+
+- ELF
+- agentmemory
+- OpenViking
+- mem0
+- qmd
+- claude-mem
+- memsearch
+
+For ELF, the runner uses Docker-owned Postgres and Qdrant, writes the shared corpus
+through `add_note`, drains the worker indexing outbox into persisted chunks and
+embeddings, rebuilds Qdrant from the worker-produced chunk tables, and verifies
+`search_raw` against the shared query manifest. It also runs ELF service lifecycle
+checks for note update, note delete, cold-start recovery, concurrent writes,
+configurable soak stability, and a local resource envelope over the same Docker-owned
+stores. By default these checks use the deterministic local embedding provider. Set
+`ELF_BASELINE_ELF_EMBEDDING_MODE=provider` to run ELF through the configured
+production embedding provider instead.
+
+For external projects, the runner clones current upstream `main` inside Docker, records
+the exact commit SHA, reads the same generated corpus and query manifest, and runs a
+same-corpus retrieval adapter when the project exposes a local API or CLI that can run
+without provider keys.
+
+Corpus profiles:
+
+- `smoke`: default, 3 documents and 3 query cases.
+- `scale`: 120 documents by default, 8 query cases, and generated distractor notes
+  that make the check closer to a production retrieval benchmark.
+- `stress`: 480 documents by default, 16 query cases, and alternate phrasings for
+  every needle query.
+
+Use `ELF_BASELINE_SCALE_DOCS` and `ELF_BASELINE_STRESS_DOCS` to raise or lower the
+generated corpus sizes.
+Use `ELF_BASELINE_CONCURRENT_NOTES`, `ELF_BASELINE_MAX_ELF_SECONDS`, and
+`ELF_BASELINE_MAX_ELF_RSS_KB` to tune ELF's concurrent-write and resource-envelope
+checks.
+Use `ELF_BASELINE_SOAK_SECONDS`, `ELF_BASELINE_SOAK_ROUNDS`, and
+`ELF_BASELINE_SOAK_PROBE_INTERVAL_MS` to tune ELF's repeated write/search soak
+window. The smoke profile does not run soak by default; the scale/full profiles run a
+short 15-second soak by default, and the stress profile runs a 60-second soak by
+default.
+Use `ELF_BASELINE_ELF_EMBEDDING_MODE=provider` plus
+`ELF_BASELINE_ELF_EMBEDDING_API_BASE`, `ELF_BASELINE_ELF_EMBEDDING_API_KEY`,
+`ELF_BASELINE_ELF_EMBEDDING_MODEL`, and
+`ELF_BASELINE_ELF_EMBEDDING_DIMENSIONS` to run ELF with a production embedding API.
+The runner also accepts `QWEN_API_KEY`, `QWEN_EMBEDDING_API_BASE`,
+`QWEN_EMBEDDING_MODEL`, `QWEN_EMBEDDING_DIMENSIONS`, and `QWEN_EMBEDDING_PATH` for
+Qwen-compatible embedding configuration. Generic aliases `EMBEDDING_API_BASE`,
+`EMBEDDING_API_KEY`, `EMBEDDING_MODEL`, `EMBEDDING_DIMENSIONS`,
+`EMBEDDING_PROVIDER_ID`, `EMBEDDING_PATH`, and `EMBEDDING_TIMEOUT_MS` are also
+supported. Provider-mode runs default to a 30-second embedding timeout unless an
+explicit timeout env var is set. For Qwen3 production embedding runs, use
+`Qwen3-Embedding-8B` with `EMBEDDING_DIMENSIONS=4096`. The aggregate report records
+ELF's embedding mode, provider id, model, dimensions, timeout, API base, and path; it
+never records the API key.
+
+Current external same-corpus adapters:
+
+- agentmemory: writes every corpus document through `mem::remember`, queries through
+  `mem::search`, exercises `mem::forget` delete suppression, and probes
+  superseding by writing a revised memory through `mem::remember`. The current
+  adapter uses an in-memory SDK/KV mock, so cold-start recovery is recorded as
+  `incomplete` until a durable agentmemory runtime is wired into the harness.
+- qmd: adds the corpus as a collection, embeds it locally, and runs structured hybrid
+  `query --json` for every query case. It also rewrites and deletes corpus files,
+  then reruns `qmd update`, `qmd embed -f`, and fresh `qmd query` processes.
+- memsearch: indexes the corpus with the local ONNX embedder and runs CLI search.
+  It also rewrites and deletes corpus files, then reruns `memsearch index` and
+  fresh `memsearch search` processes.
+- mem0: writes the corpus with `infer=false` and searches local FastEmbed + Qdrant
+  path storage. It also runs public `Memory.update`, `Memory.delete`, and a new
+  `Memory.from_config` over the same local paths. No LLM inference is required.
+- claude-mem: writes every corpus document into the SQLite memory repository and runs
+  repository search for every query case.
+
+Current deeper checks:
+
+- ELF: same-corpus retrieval through worker-produced chunks, async worker indexing
+  completion, service update replacement through the worker, service delete
+  suppression through the worker, cold-start search recovery after constructing a
+  fresh service over the same Postgres and Qdrant stores, concurrent write/search E2E,
+  configurable repeated write/search soak stability, and a configurable local resource
+  envelope.
+- qmd, memsearch, and mem0: same-corpus retrieval, update replacement, delete
+  suppression, and cold-start search recovery through their local public API or CLI
+  surfaces.
+- agentmemory: same-corpus retrieval and delete suppression are exercised; update
+  replacement is probed through superseding `mem::remember`; cold-start recovery is
+  `incomplete` because the current adapter runs against an in-memory SDK/KV mock.
+- claude-mem and OpenViking: same-corpus retrieval only when their local runtime path
+  can complete. Update, delete, and recovery checks are not yet encoded for these two
+  adapters.
+- Concurrent write, soak stability, and resource-envelope checks are currently encoded
+  for ELF. They are not yet encoded for the external adapters. Multi-hour production
+  soak is still operator-controlled through `ELF_BASELINE_SOAK_SECONDS`; the checked-in
+  stress default is a bounded 60-second signal.
+
+OpenViking attempts the official `.[local-embed]` path plus `OpenViking.add_resource`
+and `OpenViking.find`. If the Docker platform cannot build or import
+`llama-cpp-python`, the project is recorded as `incomplete` with
+`retrieval_status = "local_embed_install_failed"` rather than as a retrieval failure.
+
+## Checked-In Reports
+
+- `docs/guide/benchmarking/2026-06-09-live-baseline-report.md`: June 9, 2026
+  production-provider ELF stress run and all-project smoke comparison.
+
+## Run
+
+```sh
+cargo make baseline-live-docker
+```
+
+To run the scale profile:
+
+```sh
+ELF_BASELINE_PROFILE=scale cargo make baseline-live-docker
+ELF_BASELINE_PROFILE=scale ELF_BASELINE_SCALE_DOCS=240 cargo make baseline-live-docker
+ELF_BASELINE_PROFILE=stress cargo make baseline-live-docker
+```
+
+To iterate on one or more project adapters without rerunning the full matrix:
+
+```sh
+ELF_BASELINE_PROJECTS=qmd cargo make baseline-live-docker
+ELF_BASELINE_PROJECTS=ELF,memsearch cargo make baseline-live-docker
+```
+
+The only host artifact is:
+
+```text
+tmp/live-baseline/
+```
+
+That directory contains the aggregate report, per-project logs, and the shared query
+fixture used by the run. The aggregate report records `corpus.profile`,
+`corpus.document_count`, and `corpus.query_count` so smoke, scale, and stress runs are
+not confused. Each project record includes `elapsed_seconds` for rough local runtime
+comparison. ELF project records also include an `embedding` summary so deterministic
+local and production-provider runs are not confused. Each project record also includes
+`checks` and `check_summary`; the aggregate `full_check_summary` is the
+adoption-relevant multi-check count.
+
+## Publish A Markdown Report
+
+After a run writes `tmp/live-baseline/live-baseline-report.json`, render a durable
+Markdown summary:
+
+```sh
+cargo make baseline-live-report
+```
+
+By default the task prints Markdown to stdout. To write a checked-in report:
+
+```sh
+ELF_BASELINE_MARKDOWN_REPORT=docs/guide/benchmarking/YYYY-MM-DD-live-baseline-report.md \
+cargo make baseline-live-report
+```
+
+The publisher summarizes one generated aggregate JSON report. For a combined report
+that compares multiple runs, use the generated Markdown as input evidence and then add
+the interpretation manually under `docs/guide/benchmarking/`.
+
+## Clean Up
+
+```sh
+cargo make baseline-live-docker-clean
+```
+
+This removes Docker-managed Postgres, Qdrant, npm, pip, cargo, and target volumes used
+by the live baseline runner. It does not remove the host report directory.
+
+## Result Semantics
+
+- `pass`: the project installed and every encoded check for that project passed in the
+  selected corpus profile.
+- `fail`: clone, install, import, build, retrieval, or another declared check failed.
+- `incomplete`: the project installed or partially ran, but a declared check could not
+  be completed without extra provider keys, agent-host integration, native dependency
+  support, durable runtime wiring, or a project-specific command mapping not yet
+  encoded in the runner.
+
+The top-level `verdict` is intentionally stricter than the per-project `status`: it
+only returns `pass` when every selected project has `status = "pass"` and
+`retrieval_status = "retrieval_pass"`. The `same_corpus_summary` field is the
+retrieval count and does not treat lifecycle failures as retrieval failures. For
+multi-check comparisons, read `full_check_summary` and each project's `checks`.
+
+`incomplete` is not a pass. Treat it as evidence that more benchmark wiring is needed.
+
+## Failure Conditions
+
+A project status should be `fail` when any declared project check completes and proves
+the project did not meet the selected benchmark contract. Examples:
+
+- clone, install, import, or build returns a non-zero result;
+- same-corpus retrieval runs but does not return the expected evidence;
+- update replacement leaves superseded evidence searchable;
+- delete suppression leaves deleted evidence searchable;
+- cold-start recovery cannot find data that should persist;
+- concurrent, soak, or resource-envelope checks exceed their declared threshold.
+
+Use `incomplete` instead of `fail` only when the runner cannot execute the declared
+check fairly because adapter wiring, provider credentials, native dependency support,
+or durable runtime integration is missing.
diff --git a/docs/guide/index.md b/docs/guide/index.md
index c221adcc..9fc8ace2 100644
--- a/docs/guide/index.md
+++ b/docs/guide/index.md
@@ -62,6 +62,8 @@ Then structure the body for execution:
 
 ## Guide subfolders
 
+- `docs/guide/benchmarking/` for live benchmark runbooks, report publication steps,
+  and checked-in benchmark evidence.
 - `docs/guide/competitive_parity_testing.md` for running the Docker-only adoption
   gate against external memory-system baselines.
 - `docs/guide/development/` for repository-development workflows.
diff --git a/packages/elf-providers/src/lib.rs b/packages/elf-providers/src/lib.rs
index b3ea4ac3..a8adbf90 100644
--- a/packages/elf-providers/src/lib.rs
+++ b/packages/elf-providers/src/lib.rs
@@ -8,7 +8,7 @@ mod error;
 
 pub use error::{Error, Result};
 
-use reqwest::header::{AUTHORIZATION, HeaderMap, HeaderName};
+use reqwest::header::{ACCEPT_ENCODING, AUTHORIZATION, HeaderMap, HeaderName, HeaderValue};
 use serde_json::{Map, Value};
 
 /// Builds authenticated request headers for provider API calls.
@@ -16,6 +16,7 @@ pub fn auth_headers(api_key: &str, default_headers: &Map<String, Value>) -> Resu
 	let mut headers = HeaderMap::new();
 
 	headers.insert(AUTHORIZATION, format!("Bearer {api_key}").parse()?);
+	headers.insert(ACCEPT_ENCODING, HeaderValue::from_static("identity"));
 
 	for (key, value) in default_headers {
 		let Some(raw) = value.as_str() else {
diff --git a/scripts/live-baseline-benchmark.sh b/scripts/live-baseline-benchmark.sh
new file mode 100755
index 00000000..fbb56b05
--- /dev/null
+++ b/scripts/live-baseline-benchmark.sh
@@ -0,0 +1,2144 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+REPORT_DIR="${ELF_BASELINE_REPORT_DIR:-${ROOT_DIR}/tmp/live-baseline}"
+WORK_DIR="${ELF_BASELINE_WORK_DIR:-/bench}"
+REPOS_DIR="${WORK_DIR}/repos"
+CORPUS_DIR="${WORK_DIR}/corpus"
+HOME_DIR="${WORK_DIR}/home"
+RECORDS="${REPORT_DIR}/project-records.jsonl"
+REPORT="${REPORT_DIR}/live-baseline-report.json"
+RUN_ID="${ELF_BASELINE_RUN_ID:-live-baseline-$(date +%Y%m%d%H%M%S)}"
+PROJECT_FILTER="${ELF_BASELINE_PROJECTS:-all}"
+CORPUS_PROFILE="${ELF_BASELINE_PROFILE:-smoke}"
+SCALE_DOC_COUNT="${ELF_BASELINE_SCALE_DOCS:-120}"
+STRESS_DOC_COUNT="${ELF_BASELINE_STRESS_DOCS:-480}"
+QUERY_TOP_K="${ELF_BASELINE_TOP_K:-10}"
+CURRENT_PROJECT_STARTED_AT=""
+
+if [[ ! -f "/.dockerenv" && "${ELF_BASELINE_ALLOW_HOST:-0}" != "1" ]]; then
+  echo "Refusing to run live baseline benchmark outside Docker. Use cargo make baseline-live-docker." >&2
+  exit 1
+fi
+
+for cmd in bash cargo git jq node npm python3 rg timeout; do
+  if ! command -v "${cmd}" >/dev/null 2>&1; then
+    echo "Missing ${cmd} in baseline runner." >&2
+    exit 1
+  fi
+done
+
+generate_corpus() {
+  python3 - "${CORPUS_PROFILE}" "${SCALE_DOC_COUNT}" "${STRESS_DOC_COUNT}" "${CORPUS_DIR}" "${REPORT_DIR}/queries.json" <<'PY'
+import json
+import sys
+from pathlib import Path
+
+profile, scale_doc_count_raw, stress_doc_count_raw, corpus_dir_raw, queries_path_raw = sys.argv[1:]
+corpus_dir = Path(corpus_dir_raw)
+queries_path = Path(queries_path_raw)
+scale_doc_count = int(scale_doc_count_raw)
+stress_doc_count = int(stress_doc_count_raw)
+
+anchors = [
+    {
+        "name": "auth-memory.md",
+        "title": "Auth Memory",
+        "body": "The API auth middleware validates JWT tokens with key id `kid-v3`. The middleware rejects tokens older than 15 minutes and requires tenant scope `project_shared` for deployment operations.",
+        "query": "Which JWT key id does the auth middleware require?",
+        "alternate_query": "Find the auth note that mentions key id kid-v3 and tenant scope.",
+        "terms": ["kid-v3", "auth middleware"],
+    },
+    {
+        "name": "database-memory.md",
+        "title": "Database Memory",
+        "body": "The invoice list N+1 query was fixed by eager loading invoice lines through `InvoiceLineBatcher`. Do not reintroduce per-row SQL calls in invoice rendering.",
+        "query": "How was the invoice list N+1 query fixed?",
+        "alternate_query": "Find the invoice rendering memory about InvoiceLineBatcher and N+1 prevention.",
+        "terms": ["InvoiceLineBatcher", "N+1"],
+    },
+    {
+        "name": "deploy-memory.md",
+        "title": "Deploy Memory",
+        "body": "Production deploys must run Docker-isolated parity checks first. The cleanup command must remove Postgres, Qdrant, npm, pip, cargo, and target volumes before adoption.",
+        "query": "What must be cleaned up after Docker parity checks?",
+        "alternate_query": "Find the deploy checklist that mentions Postgres, Qdrant, and cleanup volumes.",
+        "terms": ["Postgres", "Qdrant", "volumes"],
+    },
+    {
+        "name": "retention-memory.md",
+        "title": "Retention Memory",
+        "body": "The retention worker uses `RetentionSweepPlan` before deletion and writes a tombstone ledger entry named `ledger-retain-77` for every expired note.",
+        "query": "Which plan does the retention worker use before deletion?",
+        "alternate_query": "Find the retention note with ledger-retain-77 tombstone handling.",
+        "terms": ["RetentionSweepPlan", "ledger-retain-77"],
+    },
+    {
+        "name": "incident-memory.md",
+        "title": "Incident Memory",
+        "body": "During canary incidents, `CanaryTraceGate` must stay enabled until the rollback window closes and the release captain records marker `incident-green-42`.",
+        "query": "Which gate stays enabled during canary incidents?",
+        "alternate_query": "Find the canary incident memory with incident-green-42.",
+        "terms": ["CanaryTraceGate", "incident-green-42"],
+    },
+    {
+        "name": "billing-memory.md",
+        "title": "Billing Memory",
+        "body": "Billing replay uses `UsageAccumulator` with idempotency key `bill-run-42` so duplicate metering events do not create extra invoices.",
+        "query": "Which accumulator and idempotency key protect billing replay?",
+        "alternate_query": "Find the billing replay note with bill-run-42.",
+        "terms": ["UsageAccumulator", "bill-run-42"],
+    },
+    {
+        "name": "search-memory.md",
+        "title": "Search Memory",
+        "body": "Search fanout routes tenant scoped reads through `SemanticShardRouter`; every shard label must include the prefix `tenant_scope` before merge ranking.",
+        "query": "Which router handles tenant scoped search fanout?",
+        "alternate_query": "Find the tenant_scope shard routing memory.",
+        "terms": ["SemanticShardRouter", "tenant_scope"],
+    },
+    {
+        "name": "recovery-memory.md",
+        "title": "Recovery Memory",
+        "body": "Disaster recovery requires `SnapshotRestoreFence` and a WAL checkpoint named `wal-green-17` before accepting new writes after restore.",
+        "query": "Which fence is required before accepting writes after restore?",
+        "alternate_query": "Find the disaster recovery note with wal-green-17.",
+        "terms": ["SnapshotRestoreFence", "wal-green-17"],
+    },
+]
+
+if profile == "smoke":
+    docs = anchors[:3]
+elif profile in {"scale", "full"}:
+    docs = list(anchors)
+    target_count = max(scale_doc_count, len(anchors))
+elif profile == "stress":
+    docs = list(anchors)
+    target_count = max(stress_doc_count, len(anchors))
+else:
+    raise SystemExit(f"unsupported ELF_BASELINE_PROFILE={profile!r}")
+
+if profile in {"scale", "full", "stress"}:
+    topics = [
+        "scheduler dry run budget window",
+        "operator dashboard cache refresh",
+        "import packet normalization lane",
+        "workspace role synchronization",
+        "trace export sampling policy",
+        "background compaction checkpoint",
+        "local fixture replay validation",
+        "notification queue dampening",
+    ]
+    for idx in range(1, target_count - len(anchors) + 1):
+        topic = topics[idx % len(topics)]
+        docs.append(
+            {
+                "name": f"distractor-{idx:03d}.md",
+                "title": f"Distractor Memory {idx:03d}",
+                "body": (
+                    f"This operational note covers {topic}. "
+                    f"It intentionally uses ordinary maintenance vocabulary for lane {idx:03d}, "
+                    f"checkpoint batch {1000 + idx}, and reviewer group {idx % 9}. "
+                    "It should not answer the benchmark needle queries."
+                ),
+            }
+        )
+
+for existing in corpus_dir.glob("*.md"):
+    existing.unlink()
+
+for doc in docs:
+    (corpus_dir / doc["name"]).write_text(
+        f"# {doc['title']}\n\n{doc['body']}\n", encoding="utf-8"
+    )
+
+query_docs = anchors[: (3 if profile == "smoke" else len(anchors))]
+queries = []
+for doc in query_docs:
+    base_id = doc["name"].replace("-memory.md", "").replace(".md", "")
+    queries.append(
+        {
+            "id": f"q-{base_id}",
+            "query": doc["query"],
+            "expected_doc": doc["name"],
+            "expected_terms": doc["terms"],
+        }
+    )
+    if profile == "stress":
+        queries.append(
+            {
+                "id": f"q-{base_id}-alt",
+                "query": doc["alternate_query"],
+                "expected_doc": doc["name"],
+                "expected_terms": doc["terms"],
+            }
+        )
+
+queries_path.write_text(
+    json.dumps(
+        {
+            "schema": "elf.live_baseline.queries/v1",
+            "profile": profile,
+            "document_count": len(docs),
+            "queries": queries,
+        },
+        indent=2,
+    )
+    + "\n",
+    encoding="utf-8",
+)
+PY
+}
+
+rm -rf "${WORK_DIR}"
+mkdir -p "${REPORT_DIR}"
+find "${REPORT_DIR}" -maxdepth 1 -type f -delete
+mkdir -p "${REPOS_DIR}" "${CORPUS_DIR}" "${HOME_DIR}"
+: >"${RECORDS}"
+
+generate_corpus
+DOCUMENT_COUNT="$(find "${CORPUS_DIR}" -maxdepth 1 -type f -name '*.md' | wc -l | tr -d ' ')"
+QUERY_COUNT="$(jq '.queries | length' "${REPORT_DIR}/queries.json")"
+
+json_record() {
+  local project="$1"
+  local repo="$2"
+  local head="$3"
+  local status="$4"
+  local retrieval_status="$5"
+  local reason="$6"
+  local log_path="$7"
+  local command_summary="$8"
+  local finished_at
+  local elapsed_seconds
+  local checks_path
+  finished_at="$(date +%s)"
+  elapsed_seconds=0
+  if [[ -n "${CURRENT_PROJECT_STARTED_AT}" ]]; then
+    elapsed_seconds=$((finished_at - CURRENT_PROJECT_STARTED_AT))
+  fi
+  checks_path="${REPORT_DIR}/${project}-checks.json"
+
+  if [[ -s "${checks_path}" ]] && jq -e '.checks and .check_summary' "${checks_path}" >/dev/null 2>&1; then
+    jq -nc \
+      --arg project "${project}" \
+      --arg repo "${repo}" \
+      --arg head "${head}" \
+      --arg status "${status}" \
+      --arg retrieval_status "${retrieval_status}" \
+      --arg reason "${reason}" \
+      --arg log_path "${log_path}" \
+      --arg command_summary "${command_summary}" \
+      --argjson elapsed_seconds "${elapsed_seconds}" \
+      --slurpfile checks "${checks_path}" \
+      '{
+        project: $project,
+        repo: $repo,
+        head: $head,
+        status: $status,
+        retrieval_status: $retrieval_status,
+        reason: $reason,
+        log_path: $log_path,
+        command_summary: $command_summary,
+        elapsed_seconds: $elapsed_seconds,
+        embedding: ($checks[0].embedding // null),
+        check_summary: $checks[0].check_summary,
+        checks: $checks[0].checks
+      }' >>"${RECORDS}"
+  else
+    jq -nc \
+      --arg project "${project}" \
+      --arg repo "${repo}" \
+      --arg head "${head}" \
+      --arg status "${status}" \
+      --arg retrieval_status "${retrieval_status}" \
+      --arg reason "${reason}" \
+      --arg log_path "${log_path}" \
+      --arg command_summary "${command_summary}" \
+      --argjson elapsed_seconds "${elapsed_seconds}" \
+      '{
+        project: $project,
+        repo: $repo,
+        head: $head,
+        status: $status,
+        retrieval_status: $retrieval_status,
+        reason: $reason,
+        log_path: $log_path,
+        command_summary: $command_summary,
+        elapsed_seconds: $elapsed_seconds,
+        check_summary: {
+          total: 1,
+          pass: (if $retrieval_status == "retrieval_pass" then 1 else 0 end),
+          fail: (if $status == "fail" then 1 else 0 end),
+          incomplete: (if $retrieval_status != "retrieval_pass" and $status != "fail" then 1 else 0 end)
+        },
+        checks: [
+          {
+            name: "same_corpus_retrieval",
+            status: (if $retrieval_status == "retrieval_pass" then "pass" elif $status == "fail" then "fail" else "incomplete" end),
+            reason: $reason,
+            evidence: {
+              retrieval_status: $retrieval_status,
+              log_path: $log_path,
+              command_summary: $command_summary
+            }
+          }
+        ]
+      }' >>"${RECORDS}"
+  fi
+}
+
+run_cmd() {
+  local label="$1"
+  local timeout_seconds="$2"
+  local log_path="$3"
+  shift 3
+
+  {
+    echo "## ${label}"
+    echo "## started_at=$(date -u +%Y-%m-%dT%H:%M:%SZ)"
+    echo "## command=$*"
+  } >>"${log_path}"
+
+  if timeout "${timeout_seconds}" bash -lc "$*" >>"${log_path}" 2>&1; then
+    echo "## exit=0" >>"${log_path}"
+    return 0
+  fi
+
+  local code
+  code=$?
+  echo "## exit=${code}" >>"${log_path}"
+  return "${code}"
+}
+
+clone_project() {
+  local project="$1"
+  local repo="$2"
+  local log_path="$3"
+  local target="${REPOS_DIR}/${project}"
+
+  if run_cmd "${project}: clone" 180 "${log_path}" "git clone --depth 1 '${repo}' '${target}'"; then
+    git -C "${target}" rev-parse HEAD
+    return 0
+  fi
+
+  echo "clone_failed"
+  return 1
+}
+
+finish_report() {
+  jq -s \
+    --arg schema "elf.live_baseline.report/v1" \
+    --arg run_id "${RUN_ID}" \
+    --arg project_filter "${PROJECT_FILTER}" \
+    --arg corpus_profile "${CORPUS_PROFILE}" \
+    --argjson document_count "${DOCUMENT_COUNT}" \
+    --argjson query_count "${QUERY_COUNT}" \
+    --arg generated_at "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
+    '{
+      schema: $schema,
+      run_id: $run_id,
+      generated_at: $generated_at,
+      docker_only: true,
+      project_filter: $project_filter,
+      corpus: {
+        profile: $corpus_profile,
+        document_count: $document_count,
+        query_count: $query_count,
+        path: "generated in Docker under /bench/corpus",
+        query_file: "tmp/live-baseline/queries.json"
+      },
+      verdict: (
+        if length == 0 then "incomplete"
+        elif any(.[]; .status == "fail") then "fail"
+        elif all(.[]; .status == "pass" and .retrieval_status == "retrieval_pass") then "pass"
+        else "incomplete"
+        end
+      ),
+      summary: {
+        total: length,
+        pass: ([.[] | select(.status == "pass")] | length),
+        fail: ([.[] | select(.status == "fail")] | length),
+        incomplete: ([.[] | select(.status == "incomplete")] | length)
+      },
+      same_corpus_summary: {
+        total: length,
+        pass: ([.[] | select(.retrieval_status == "retrieval_pass")] | length),
+        fail: ([.[] | select(.retrieval_status != "retrieval_pass" and .status == "fail")] | length),
+        incomplete: ([.[] | select(.retrieval_status != "retrieval_pass" and .status != "fail")] | length)
+      },
+      full_check_summary: {
+        total: ([.[] | .check_summary.total // 0] | add // 0),
+        pass: ([.[] | .check_summary.pass // 0] | add // 0),
+        fail: ([.[] | .check_summary.fail // 0] | add // 0),
+        incomplete: ([.[] | .check_summary.incomplete // 0] | add // 0)
+      },
+      projects: .
+    }' "${RECORDS}" >"${REPORT}"
+}
+
+project_enabled() {
+  local project="$1"
+
+  if [[ -z "${PROJECT_FILTER}" || "${PROJECT_FILTER}" == "all" ]]; then
+    return 0
+  fi
+
+  for selected in ${PROJECT_FILTER//,/ }; do
+    if [[ "${selected}" == "${project}" ]]; then
+      return 0
+    fi
+  done
+
+  return 1
+}
+
+run_project() {
+  local project="$1"
+  local fn="$2"
+
+  if project_enabled "${project}"; then
+    CURRENT_PROJECT_STARTED_AT="$(date +%s)"
+    "${fn}"
+    CURRENT_PROJECT_STARTED_AT=""
+  fi
+}
+
+project_elf() {
+  local project="ELF"
+  local repo="local:/workspace"
+  local log_path="${REPORT_DIR}/${project}.log"
+  local result_path="${REPORT_DIR}/${project}-result.json"
+  local head
+  head="${ELF_BASELINE_ELF_HEAD:-}"
+  if [[ -z "${head}" ]]; then
+    head="$(git -C "${ROOT_DIR}" rev-parse HEAD 2>>"${log_path}" || echo "unknown")"
+  fi
+
+  if run_cmd "${project}: same-corpus retrieval" 1200 "${log_path}" \
+    "cd '${ROOT_DIR}' && cargo run -p elf-eval --bin live_baseline_elf -- --config config/local/elf.docker.toml --corpus '${CORPUS_DIR}' --queries '${REPORT_DIR}/queries.json' --out '${result_path}'"; then
+    if [[ -s "${result_path}" ]] && jq -e '.checks and .check_summary' "${result_path}" >/dev/null 2>&1; then
+      jq '{embedding, check_summary, checks}' "${result_path}" >"${REPORT_DIR}/${project}-checks.json"
+    fi
+    if [[ -s "${result_path}" ]] && jq -e --argjson document_count "${DOCUMENT_COUNT}" --argjson query_count "${QUERY_COUNT}" '
+      .schema == "elf.live_baseline.elf_result/v1" and
+      .status == "pass" and
+      .summary.total == $query_count and
+      .summary.fail == 0 and
+      .check_summary.fail == 0 and
+      .check_summary.incomplete == 0 and
+      .indexing.note_count == $document_count and
+      .indexing.rebuild_rebuilt_count >= $document_count and
+      .indexing.rebuild_error_count == 0
+    ' "${result_path}" >/dev/null; then
+      json_record "${project}" "${repo}" "${head}" "pass" "retrieval_pass" \
+        "$(jq -r '.reason' "${result_path}")" \
+        "${project}.log" "add_note; worker outbox indexing; rebuild_qdrant; search_raw; concurrent writes; soak stability"
+      return
+    fi
+
+    if [[ -s "${result_path}" ]] && jq -e '.schema == "elf.live_baseline.elf_result/v1"' "${result_path}" >/dev/null 2>&1; then
+      json_record "${project}" "${repo}" "${head}" "$(jq -r '.status // "fail"' "${result_path}")" \
+        "$(jq -r '.retrieval_status // "retrieval_failed"' "${result_path}")" \
+        "$(jq -r '.reason // "ELF result did not satisfy live baseline pass criteria"' "${result_path}")" \
+        "${project}.log" "add_note; worker outbox indexing; rebuild_qdrant; search_raw; concurrent writes; soak stability"
+      return
+    fi
+
+    json_record "${project}" "${repo}" "${head}" "fail" "runtime_failed" \
+      "ELF command completed but did not write a valid live-baseline result; inspect ELF.log for the runtime error" \
+      "${project}.log" "add_note; worker outbox indexing; rebuild_qdrant; search_raw; concurrent writes; soak stability"
+    return
+  fi
+
+  json_record "${project}" "${repo}" "${head}" "fail" "runtime_failed" \
+    "ELF same-corpus retrieval command failed in Docker" \
+    "${project}.log" "add_note; worker outbox indexing; rebuild_qdrant; search_raw; concurrent writes; soak stability"
+}
+
+project_agentmemory() {
+  local project="agentmemory"
+  local repo="https://github.com/rohitg00/agentmemory.git"
+  local log_path="${REPORT_DIR}/${project}.log"
+  local result_path="${REPORT_DIR}/${project}-search.json"
+  local driver_path="${REPOS_DIR}/${project}/elf-live-baseline-agentmemory.ts"
+  local head
+  head="$(clone_project "${project}" "${repo}" "${log_path}")" || {
+    json_record "${project}" "${repo}" "${head}" "fail" "not_run" "clone failed" "${project}.log" "git clone"
+    return
+  }
+
+  if run_cmd "${project}: install/build" 300 "${log_path}" \
+    "cd '${REPOS_DIR}/${project}' && (npm ci || npm install --no-audit --no-fund) && npm run build --if-present"; then
+    cat >"${driver_path}" <<'TS'
+import { readFileSync, readdirSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { registerRememberFunction } from "./src/functions/remember.js";
+import {
+  getSearchIndex,
+  registerSearchFunction,
+  setEmbeddingProvider,
+  setVectorIndex,
+} from "./src/functions/search.js";
+
+function mockKV() {
+  const store = new Map<string, Map<string, unknown>>();
+  return {
+    get: async <T>(scope: string, key: string): Promise<T | null> =>
+      (store.get(scope)?.get(key) as T) ?? null,
+    set: async <T>(scope: string, key: string, data: T): Promise<T> => {
+      if (!store.has(scope)) store.set(scope, new Map());
+      store.get(scope)!.set(key, data);
+      return data;
+    },
+    delete: async (scope: string, key: string): Promise<void> => {
+      store.get(scope)?.delete(key);
+    },
+    list: async <T>(scope: string): Promise<T[]> => {
+      const entries = store.get(scope);
+      return entries ? (Array.from(entries.values()) as T[]) : [];
+    },
+  };
+}
+
+function mockSdk() {
+  const functions = new Map<string, Function>();
+  return {
+    registerFunction: (idOrOpts: string | { id: string }, handler: Function) => {
+      const id = typeof idOrOpts === "string" ? idOrOpts : idOrOpts.id;
+      functions.set(id, handler);
+    },
+    registerTrigger: () => {},
+    trigger: async (
+      idOrInput: string | { function_id: string; payload: unknown },
+      data?: unknown,
+    ) => {
+      const id = typeof idOrInput === "string" ? idOrInput : idOrInput.function_id;
+      const payload = typeof idOrInput === "string" ? data : idOrInput.payload;
+      const fn = functions.get(id);
+      if (!fn) {
+        if (id === "mem::cascade-update") return { success: true };
+        throw new Error(`No function: ${id}`);
+      }
+      return fn(payload);
+    },
+  };
+}
+
+type QueryCase = {
+  id: string;
+  query: string;
+  expected_doc: string;
+  expected_terms: string[];
+};
+
+const outPath = process.argv[2];
+const corpusPath = process.argv[3];
+const queriesPath = process.argv[4];
+if (!outPath || !corpusPath || !queriesPath) {
+  throw new Error("output path, corpus path, and query path are required");
+}
+
+const sdk = mockSdk();
+const kv = mockKV();
+getSearchIndex().clear();
+setVectorIndex(null);
+setEmbeddingProvider(null);
+registerRememberFunction(sdk as never, kv as never);
+registerSearchFunction(sdk as never, kv as never);
+
+function plainText(markdown: string): string {
+  return markdown
+    .split(/\r?\n/)
+    .filter((line) => !line.trimStart().startsWith("#"))
+    .join(" ")
+    .replace(/\s+/g, " ")
+    .trim();
+}
+
+function conceptsFor(file: string): string[] {
+  return file
+    .replace(/\.md$/i, "")
+    .split(/[^A-Za-z0-9]+/)
+    .map((part) => part.toLowerCase())
+    .filter(Boolean);
+}
+
+function queryMatches(result: unknown, query: QueryCase): boolean {
+  const results = (result as { results?: unknown[] }).results ?? [];
+  return results.some((entry) => {
+    const entryJson = JSON.stringify(entry);
+    const entryText = entryJson.toLowerCase();
+    const files =
+      (entry as { observation?: { files?: string[] } }).observation?.files ?? [];
+    return (
+      files.includes(query.expected_doc) &&
+      query.expected_terms.every((term) =>
+        entryText.includes(term.toLowerCase()),
+      )
+    );
+  });
+}
+
+function resultEntries(result: unknown): unknown[] {
+  return (result as { results?: unknown[] }).results ?? [];
+}
+
+function makeCheck(
+  name: string,
+  status: "pass" | "fail" | "incomplete",
+  reason: string,
+  evidence: unknown,
+) {
+  return { name, status, reason, evidence };
+}
+
+function summarizeChecks(checks: Array<{ status: string }>) {
+  return {
+    total: checks.length,
+    pass: checks.filter((check) => check.status === "pass").length,
+    fail: checks.filter((check) => check.status === "fail").length,
+    incomplete: checks.filter((check) => check.status === "incomplete").length,
+  };
+}
+
+async function runSearch(query: QueryCase) {
+  return sdk.trigger("mem::search", {
+    query: query.query,
+    limit: topK,
+    format: "full",
+    project: "elfbench",
+  });
+}
+
+const docs = readdirSync(corpusPath)
+  .filter((file) => file.endsWith(".md"))
+  .sort()
+  .map((file) => ({
+    content: plainText(readFileSync(join(corpusPath, file), "utf8")),
+    concepts: conceptsFor(file),
+    files: [file],
+  }));
+const queries = JSON.parse(readFileSync(queriesPath, "utf8")).queries as QueryCase[];
+
+const writes = [];
+const memoryIdsBySource = new Map<string, string>();
+for (const doc of docs) {
+  const write = await sdk.trigger("mem::remember", {
+    content: doc.content,
+    type: "fact",
+    concepts: doc.concepts,
+    files: doc.files,
+    project: "elfbench",
+    agentId: "elf-baseline",
+  });
+  writes.push({ source: doc.files[0], result: write });
+  const memoryId = (write as { memory?: { id?: string } }).memory?.id;
+  if (memoryId) memoryIdsBySource.set(doc.files[0], memoryId);
+}
+
+const queryResults = [];
+const topK = Number(process.env.ELF_BASELINE_TOP_K ?? "10");
+for (const query of queries) {
+  const result = await runSearch(query);
+  queryResults.push({
+    id: query.id,
+    query: query.query,
+    expected_doc: query.expected_doc,
+    expected_terms: query.expected_terms,
+    matched: queryMatches(result, query),
+    result,
+  });
+}
+
+const pass = queryResults.filter((result) => result.matched).length;
+const checks = [
+  makeCheck(
+    "same_corpus_retrieval",
+    pass === queryResults.length ? "pass" : "fail",
+    pass === queryResults.length
+      ? "agentmemory mem::remember/mem::search returned expected evidence for every query."
+      : "agentmemory mem::remember/mem::search missed one or more expected results.",
+    {
+      total: queryResults.length,
+      pass,
+      fail: queryResults.length - pass,
+    },
+  ),
+];
+
+const authId = memoryIdsBySource.get("auth-memory.md");
+if (!authId) {
+  checks.push(
+    makeCheck(
+      "update_replaces_note_text",
+      "incomplete",
+      "The auth memory id was not returned by mem::remember, so supersede/update could not be exercised.",
+      { source: "auth-memory.md" },
+    ),
+  );
+} else {
+  const updateRemember = await sdk.trigger("mem::remember", {
+    content:
+      "The API auth middleware validates JWT tokens with key id `kid-v4` under `RotatedJwtKeyPlan`. The middleware rejects tokens older than 15 minutes and requires tenant scope `project_shared` for deployment operations.",
+    type: "fact",
+    concepts: conceptsFor("auth-memory.md"),
+    files: ["auth-memory.md"],
+    project: "elfbench",
+    agentId: "elf-baseline",
+  });
+  const updateQuery: QueryCase = {
+    id: "lifecycle-update-new-marker",
+    query: "Which rotated JWT key id does the auth middleware require?",
+    expected_doc: "auth-memory.md",
+    expected_terms: ["kid-v4", "RotatedJwtKeyPlan"],
+  };
+  const updateResult = await runSearch(updateQuery);
+  const updateMatched = queryMatches(updateResult, updateQuery);
+  const oldMarkerAbsent = resultEntries(updateResult)
+    .filter((entry) => {
+      const files =
+        (entry as { observation?: { files?: string[] } }).observation?.files ?? [];
+      return files.includes("auth-memory.md");
+    })
+    .every((entry) => !JSON.stringify(entry).toLowerCase().includes("kid-v3"));
+  checks.push(
+    makeCheck(
+      "update_replaces_note_text",
+      updateMatched && oldMarkerAbsent ? "pass" : "fail",
+      updateMatched && oldMarkerAbsent
+        ? "agentmemory mem::remember supersede returned the new marker and did not return the old marker for the updated file."
+        : "agentmemory mem::remember supersede did not cleanly replace the searchable auth memory text.",
+      {
+        memory_id: authId,
+        update_result: updateRemember,
+        matched_new_marker: updateMatched,
+        old_marker_absent: oldMarkerAbsent,
+        result: updateResult,
+      },
+    ),
+  );
+}
+
+const deleteQuery = queries.find(
+  (query) =>
+    query.expected_doc !== "auth-memory.md" &&
+    query.expected_doc !== "database-memory.md" &&
+    memoryIdsBySource.has(query.expected_doc),
+);
+if (!deleteQuery) {
+  checks.push(
+    makeCheck(
+      "delete_suppresses_retrieval",
+      "incomplete",
+      "No non-update, non-recovery memory id was available, so mem::forget could not be exercised.",
+      { available_sources: Array.from(memoryIdsBySource.keys()).sort() },
+    ),
+  );
+} else {
+  const deleteId = memoryIdsBySource.get(deleteQuery.expected_doc)!;
+  const deleteResult = await sdk.trigger("mem::forget", { memoryId: deleteId });
+  const searchAfterDelete = await runSearch(deleteQuery);
+  const deletedStillMatched = queryMatches(searchAfterDelete, deleteQuery);
+  checks.push(
+    makeCheck(
+      "delete_suppresses_retrieval",
+      deletedStillMatched ? "fail" : "pass",
+      deletedStillMatched
+        ? "agentmemory mem::forget returned success but the deleted memory was still searchable."
+        : "agentmemory mem::forget suppressed the deleted memory from subsequent search.",
+      {
+        memory_id: deleteId,
+        source: deleteQuery.expected_doc,
+        query: deleteQuery,
+        delete_result: deleteResult,
+        deleted_still_matched: deletedStillMatched,
+        result: searchAfterDelete,
+      },
+    ),
+  );
+}
+
+checks.push(
+  makeCheck(
+    "cold_start_recovery_search",
+    "incomplete",
+    "This adapter runs agentmemory against an in-memory SDK/KV mock; no durable store is available in the harness to prove cold-start recovery.",
+    {
+      adapter_storage: "mock StateKV Map",
+      required_next_step: "wire an agentmemory persistent KV/index path or hosted runtime for restart testing",
+    },
+  ),
+);
+
+const checkSummary = summarizeChecks(checks);
+
+writeFileSync(
+  outPath,
+  JSON.stringify(
+    {
+      schema: "elf.live_baseline.agentmemory_result/v1",
+      corpus: {
+        document_count: docs.length,
+        query_count: queries.length,
+      },
+      writes,
+      summary: {
+        total: queryResults.length,
+        pass,
+        fail: queryResults.length - pass,
+      },
+      check_summary: checkSummary,
+      checks,
+      queries: queryResults,
+    },
+    null,
+    2,
+  ),
+);
+TS
+    if run_cmd "${project}: same-corpus remember/search" 240 "${log_path}" \
+      "cd '${REPOS_DIR}/${project}' && npx tsx '${driver_path}' '${result_path}' '${CORPUS_DIR}' '${REPORT_DIR}/queries.json'"; then
+      if jq -e '.checks and .check_summary' "${result_path}" >/dev/null 2>&1; then
+        jq '{check_summary, checks}' "${result_path}" >"${REPORT_DIR}/${project}-checks.json"
+      fi
+      if jq -e --argjson query_count "${QUERY_COUNT}" --argjson document_count "${DOCUMENT_COUNT}" '
+        .schema == "elf.live_baseline.agentmemory_result/v1" and
+        .corpus.document_count == $document_count and
+        .summary.total == $query_count and
+        .summary.fail == 0 and
+        .check_summary.fail == 0 and
+        .check_summary.incomplete == 0
+      ' "${result_path}" >/dev/null; then
+        json_record "${project}" "${repo}" "${head}" "pass" "retrieval_pass" "agentmemory mem::remember/mem::search found expected evidence and lifecycle checks passed" "${project}.log" "npm install/build; mem::remember/mem::forget/mem::search"
+        return
+      fi
+      if jq -e --argjson query_count "${QUERY_COUNT}" --argjson document_count "${DOCUMENT_COUNT}" '
+        .schema == "elf.live_baseline.agentmemory_result/v1" and
+        .corpus.document_count == $document_count and
+        .summary.total == $query_count and
+        .summary.fail == 0 and
+        .check_summary.fail == 0
+      ' "${result_path}" >/dev/null; then
+        json_record "${project}" "${repo}" "${head}" "incomplete" "retrieval_pass" "agentmemory same-corpus retrieval passed, but one or more lifecycle checks could not be completed in the in-memory harness" "${project}.log" "npm install/build; mem::remember/mem::forget/mem::search"
+        return
+      fi
+      if jq -e --argjson query_count "${QUERY_COUNT}" --argjson document_count "${DOCUMENT_COUNT}" '
+        .schema == "elf.live_baseline.agentmemory_result/v1" and
+        .corpus.document_count == $document_count and
+        .summary.total == $query_count and
+        .summary.fail == 0
+      ' "${result_path}" >/dev/null; then
+        json_record "${project}" "${repo}" "${head}" "fail" "retrieval_pass" "agentmemory same-corpus retrieval passed, but one or more lifecycle checks failed" "${project}.log" "npm install/build; mem::remember/mem::forget/mem::search"
+        return
+      fi
+      json_record "${project}" "${repo}" "${head}" "fail" "retrieval_wrong_result" "agentmemory same-corpus search ran but did not return expected evidence" "${project}.log" "npm install/build; mem::remember; mem::search"
+      return
+    fi
+    json_record "${project}" "${repo}" "${head}" "incomplete" "retrieval_command_failed" "agentmemory install/build passed but same-corpus remember/search failed" "${project}.log" "npm install/build; mem::remember; mem::search"
+    return
+  fi
+
+  json_record "${project}" "${repo}" "${head}" "fail" "not_run" "install/build failed" "${project}.log" "npm install/build"
+}
+
+project_qmd() {
+  local project="qmd"
+  local repo="https://github.com/tobi/qmd.git"
+  local log_path="${REPORT_DIR}/${project}.log"
+  local query_result_path="${REPORT_DIR}/${project}-query.json"
+  local status_path="${REPORT_DIR}/${project}-status.txt"
+  local driver_path="${REPOS_DIR}/${project}/elf-live-baseline-qmd.mjs"
+  local home="${HOME_DIR}/${project}"
+  local head
+  mkdir -p "${home}"
+  head="$(clone_project "${project}" "${repo}" "${log_path}")" || {
+    json_record "${project}" "${repo}" "${head}" "fail" "not_run" "clone failed" "${project}.log" "git clone"
+    return
+  }
+
+  if ! run_cmd "${project}: install/build" 300 "${log_path}" \
+    "cd '${REPOS_DIR}/${project}' && (npm ci || npm install --no-audit --no-fund) && npm run build --if-present"; then
+    json_record "${project}" "${repo}" "${head}" "fail" "not_run" "install/build failed" "${project}.log" "npm install/build"
+    return
+  fi
+
+  cat >"${driver_path}" <<'JS'
+import { execFileSync } from "node:child_process";
+import { existsSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+
+const outPath = process.argv[2];
+const queriesPath = process.argv[3];
+const corpusPath = process.argv[4];
+if (!outPath || !queriesPath || !corpusPath) {
+  throw new Error("output path, query path, and corpus path are required");
+}
+
+const queries = JSON.parse(readFileSync(queriesPath, "utf8")).queries;
+const topK = process.env.ELF_BASELINE_TOP_K ?? "10";
+
+function resultMatches(results, query) {
+  if (!Array.isArray(results)) return false;
+  return results.some((entry) => {
+    const entryText = JSON.stringify(entry).toLowerCase();
+    const file = String(entry.file ?? "");
+    return (
+      file.includes(query.expected_doc) &&
+      query.expected_terms.every((term) =>
+        entryText.includes(String(term).toLowerCase()),
+      )
+    );
+  });
+}
+
+function qmdQuery(queryText) {
+  const structuredQuery = `lex: ${queryText}\nvec: ${queryText}`;
+  const stdout = execFileSync(
+    "npx",
+    [
+      "tsx",
+      "src/cli/qmd.ts",
+      "query",
+      structuredQuery,
+      "-c",
+      "elfbench",
+      "--json",
+      "--no-rerank",
+      "--min-score",
+      "0",
+      "-n",
+      topK,
+    ],
+    { encoding: "utf8", env: process.env },
+  );
+  return JSON.parse(stdout);
+}
+
+function runQueryCase(query) {
+  const results = qmdQuery(query.query);
+  return {
+    id: query.id,
+    query: query.query,
+    expected_doc: query.expected_doc,
+    expected_terms: query.expected_terms,
+    matched: resultMatches(results, query),
+    results,
+  };
+}
+
+function makeCheck(name, status, reason, evidence) {
+  return { name, status, reason, evidence };
+}
+
+function summarizeChecks(checks) {
+  return {
+    total: checks.length,
+    pass: checks.filter((check) => check.status === "pass").length,
+    fail: checks.filter((check) => check.status === "fail").length,
+    incomplete: checks.filter((check) => check.status === "incomplete").length,
+  };
+}
+
+function runQmd(args) {
+  return execFileSync("npx", ["tsx", "src/cli/qmd.ts", ...args], {
+    encoding: "utf8",
+    env: process.env,
+  });
+}
+
+function syncCollection({ embed = false } = {}) {
+  runQmd(["update"]);
+  if (embed) {
+    runQmd(["embed", "-f", "-c", "elfbench"]);
+  }
+}
+
+const queryResults = queries.map((query) => runQueryCase(query));
+const pass = queryResults.filter((result) => result.matched).length;
+const checks = [
+  makeCheck(
+    "same_corpus_retrieval",
+    pass === queryResults.length ? "pass" : "fail",
+    pass === queryResults.length
+      ? "qmd structured hybrid query returned expected evidence for every query."
+      : "qmd structured hybrid query missed one or more expected results.",
+    {
+      total: queryResults.length,
+      pass,
+      fail: queryResults.length - pass,
+    },
+  ),
+];
+
+const authPath = join(corpusPath, "auth-memory.md");
+if (!existsSync(authPath)) {
+  checks.push(
+    makeCheck(
+      "update_replaces_note_text",
+      "incomplete",
+      "The auth corpus file was missing, so qmd update could not be exercised.",
+      { source: "auth-memory.md" },
+    ),
+  );
+} else {
+  writeFileSync(
+    authPath,
+    "# Auth Memory\n\nRotated auth middleware validates JWT tokens with key id `kid-v4` under `RotatedJwtKeyPlan`. It still requires tenant scope `project_shared` for deployment operations after the emergency key rotation.\n",
+  );
+  syncCollection({ embed: true });
+  const updateQuery = {
+    id: "lifecycle-update-new-marker",
+    query: "Which rotated JWT key id does the auth middleware require?",
+    expected_doc: "auth-memory.md",
+    expected_terms: ["kid-v4", "RotatedJwtKeyPlan"],
+  };
+  const updateResults = qmdQuery(updateQuery.query);
+  const updateMatched = resultMatches(updateResults, updateQuery);
+  const oldMarkerAbsent = updateResults
+    .filter((entry) => String(entry.file ?? "").includes("auth-memory.md"))
+    .every((entry) => !JSON.stringify(entry).toLowerCase().includes("kid-v3"));
+  checks.push(
+    makeCheck(
+      "update_replaces_note_text",
+      updateMatched && oldMarkerAbsent ? "pass" : "fail",
+      updateMatched && oldMarkerAbsent
+        ? "qmd update/embed returned the new marker and did not return the old marker for the updated file."
+        : "qmd update/embed did not cleanly replace the searchable auth file text.",
+      {
+        source: "auth-memory.md",
+        matched_new_marker: updateMatched,
+        old_marker_absent: oldMarkerAbsent,
+        results: updateResults,
+      },
+    ),
+  );
+}
+
+const deleteQuery = queries.find(
+  (query) =>
+    query.expected_doc !== "auth-memory.md" &&
+    query.expected_doc !== "database-memory.md" &&
+    existsSync(join(corpusPath, query.expected_doc)),
+);
+if (!deleteQuery) {
+  checks.push(
+    makeCheck(
+      "delete_suppresses_retrieval",
+      "incomplete",
+      "No non-update, non-recovery corpus file was available, so qmd delete could not be exercised.",
+      { available_docs: queries.map((query) => query.expected_doc) },
+    ),
+  );
+} else {
+  unlinkSync(join(corpusPath, deleteQuery.expected_doc));
+  syncCollection();
+  const deleteResults = qmdQuery(deleteQuery.query);
+  const deletedStillMatched = resultMatches(deleteResults, deleteQuery);
+  checks.push(
+    makeCheck(
+      "delete_suppresses_retrieval",
+      deletedStillMatched ? "fail" : "pass",
+      deletedStillMatched
+        ? "qmd update marked the deleted file removed, but it was still searchable."
+        : "qmd update suppressed the deleted file from subsequent search.",
+      {
+        source: deleteQuery.expected_doc,
+        query: deleteQuery,
+        deleted_still_matched: deletedStillMatched,
+        results: deleteResults,
+      },
+    ),
+  );
+}
+
+const recoveryQuery = {
+  id: "lifecycle-cold-start-recovery",
+  query:
+    "The invoice list N+1 query was fixed by eager loading invoice lines through `InvoiceLineBatcher`. Do not reintroduce per-row SQL calls in invoice rendering.",
+  expected_doc: "database-memory.md",
+  expected_terms: ["InvoiceLineBatcher", "N+1"],
+};
+const recoveryResults = qmdQuery(recoveryQuery.query);
+const recoveryMatched = resultMatches(recoveryResults, recoveryQuery);
+checks.push(
+  makeCheck(
+    "cold_start_recovery_search",
+    recoveryMatched ? "pass" : "fail",
+    recoveryMatched
+      ? "A fresh qmd query process reopened the persisted index and retrieved expected evidence."
+      : "A fresh qmd query process did not retrieve expected persisted evidence.",
+    {
+      expected_doc: recoveryQuery.expected_doc,
+      matched: recoveryMatched,
+      results: recoveryResults,
+    },
+  ),
+);
+
+const checkSummary = summarizeChecks(checks);
+writeFileSync(
+  outPath,
+  JSON.stringify(
+    {
+      schema: "elf.live_baseline.qmd_result/v1",
+      summary: {
+        total: queryResults.length,
+        pass,
+        fail: queryResults.length - pass,
+      },
+      check_summary: checkSummary,
+      checks,
+      queries: queryResults,
+    },
+    null,
+    2,
+  ),
+);
+JS
+
+  if run_cmd "${project}: embedded retrieval" 900 "${log_path}" \
+    "export HOME='${home}'; export XDG_CACHE_HOME='/root/.cache'; export QMD_FORCE_CPU=1; cd '${REPOS_DIR}/${project}' && npx tsx src/cli/qmd.ts collection add '${CORPUS_DIR}' --name elfbench && npx tsx src/cli/qmd.ts update && npx tsx src/cli/qmd.ts embed -f -c elfbench && npx tsx src/cli/qmd.ts status > '${status_path}' && node '${driver_path}' '${query_result_path}' '${REPORT_DIR}/queries.json' '${CORPUS_DIR}'"; then
+    if jq -e '.checks and .check_summary' "${query_result_path}" >/dev/null 2>&1; then
+      jq '{check_summary, checks}' "${query_result_path}" >"${REPORT_DIR}/${project}-checks.json"
+    fi
+    if jq -e --argjson query_count "${QUERY_COUNT}" '
+      .schema == "elf.live_baseline.qmd_result/v1" and
+      .summary.total == $query_count and
+      .summary.fail == 0 and
+      .check_summary.fail == 0 and
+      .check_summary.incomplete == 0
+    ' "${query_result_path}" >/dev/null; then
+      json_record "${project}" "${repo}" "${head}" "pass" "retrieval_pass" "qmd embedded structured hybrid query found expected evidence and lifecycle checks passed" "${project}.log" "collection add; update; embed -f; query --json"
+    elif jq -e --argjson query_count "${QUERY_COUNT}" '
+      .schema == "elf.live_baseline.qmd_result/v1" and
+      .summary.total == $query_count and
+      .summary.fail == 0
+    ' "${query_result_path}" >/dev/null; then
+      json_record "${project}" "${repo}" "${head}" "fail" "retrieval_pass" "qmd same-corpus retrieval passed, but one or more update/delete/recovery checks failed or were incomplete" "${project}.log" "collection add; update; embed -f; query --json"
+    elif ! rg -q "Embedded [1-9][0-9]* chunks" "${log_path}"; then
+      json_record "${project}" "${repo}" "${head}" "incomplete" "embedding_required" "qmd indexed the corpus, but no successful embedding completion was observed" "${project}.log" "collection add; update; embed -f; query --json"
+    elif ! jq -e '.schema == "elf.live_baseline.qmd_result/v1"' "${query_result_path}" >/dev/null 2>&1; then
+      json_record "${project}" "${repo}" "${head}" "fail" "invalid_json_result" "qmd query command completed, but did not produce parseable JSON results" "${project}.log" "collection add; update; embed -f; search/query --json"
+    else
+      json_record "${project}" "${repo}" "${head}" "fail" "retrieval_wrong_result" "qmd embedded retrieval ran but did not return expected evidence" "${project}.log" "collection add; update; embed -f; search/query --json"
+    fi
+    return
+  fi
+
+  json_record "${project}" "${repo}" "${head}" "incomplete" "retrieval_command_failed" "qmd install passed but embedded retrieval command failed" "${project}.log" "collection add; update; embed -f; search/query --json"
+}
+
+project_memsearch() {
+  local project="memsearch"
+  local repo="https://github.com/zilliztech/memsearch.git"
+  local log_path="${REPORT_DIR}/${project}.log"
+  local home="${HOME_DIR}/${project}"
+  local result_path="${REPORT_DIR}/${project}-search.json"
+  local driver_path="${REPOS_DIR}/${project}/elf-live-baseline-memsearch.py"
+  local head
+  mkdir -p "${home}"
+  head="$(clone_project "${project}" "${repo}" "${log_path}")" || {
+    json_record "${project}" "${repo}" "${head}" "fail" "not_run" "clone failed" "${project}.log" "git clone"
+    return
+  }
+
+  if ! run_cmd "${project}: install" 420 "${log_path}" \
+    "cd '${REPOS_DIR}/${project}' && python3 -m venv .venv && .venv/bin/pip install --upgrade pip && .venv/bin/pip install -e '.[local,onnx]'"; then
+    json_record "${project}" "${repo}" "${head}" "fail" "not_run" "pip install failed" "${project}.log" "pip install -e .[local,onnx]"
+    return
+  fi
+
+  cat >"${driver_path}" <<'PY'
+import json
+import os
+import subprocess
+from pathlib import Path
+
+out_path = Path(os.environ["ELF_MEMSEARCH_RESULT_PATH"])
+queries_path = Path(os.environ["ELF_BASELINE_QUERIES_PATH"])
+corpus_path = Path(os.environ["ELF_BASELINE_CORPUS_PATH"])
+top_k = os.environ.get("ELF_BASELINE_TOP_K", "10")
+queries = json.loads(queries_path.read_text())["queries"]
+
+
+def run_memsearch(args):
+    return subprocess.run(
+        ["memsearch", *args],
+        check=True,
+        text=True,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+    ).stdout
+
+
+def index_corpus():
+    return run_memsearch(["index", str(corpus_path)])
+
+
+def search_output(query_text):
+    return run_memsearch(["search", query_text, "--top-k", top_k])
+
+
+def output_matches(output, query):
+    lowered = output.lower()
+    matched = query["expected_doc"] in output and all(
+        term.lower() in lowered for term in query["expected_terms"]
+    )
+    if not matched:
+        matched = all(term.lower() in lowered for term in query["expected_terms"])
+    return matched
+
+
+def make_check(name, status, reason, evidence):
+    return {
+        "name": name,
+        "status": status,
+        "reason": reason,
+        "evidence": evidence,
+    }
+
+
+def summarize_checks(checks):
+    return {
+        "total": len(checks),
+        "pass": sum(1 for check in checks if check["status"] == "pass"),
+        "fail": sum(1 for check in checks if check["status"] == "fail"),
+        "incomplete": sum(1 for check in checks if check["status"] == "incomplete"),
+    }
+
+
+query_results = []
+for query in queries:
+    output = search_output(query["query"])
+    matched = output_matches(output, query)
+    query_results.append(
+        {
+            "id": query["id"],
+            "query": query["query"],
+            "expected_doc": query["expected_doc"],
+            "expected_terms": query["expected_terms"],
+            "matched": matched,
+            "output": output,
+        }
+    )
+
+pass_count = sum(1 for result in query_results if result["matched"])
+checks = [
+    make_check(
+        "same_corpus_retrieval",
+        "pass" if pass_count == len(query_results) else "fail",
+        "memsearch search returned expected evidence for every query."
+        if pass_count == len(query_results)
+        else "memsearch search missed one or more expected results.",
+        {
+            "total": len(query_results),
+            "pass": pass_count,
+            "fail": len(query_results) - pass_count,
+        },
+    )
+]
+
+auth_path = corpus_path / "auth-memory.md"
+if not auth_path.exists():
+    checks.append(
+        make_check(
+            "update_replaces_note_text",
+            "incomplete",
+            "The auth corpus file was missing, so memsearch update could not be exercised.",
+            {"source": "auth-memory.md"},
+        )
+    )
+else:
+    auth_path.write_text(
+        "# Auth Memory\n\nRotated auth middleware validates JWT tokens with key id `kid-v4` under `RotatedJwtKeyPlan`. It still requires tenant scope `project_shared` for deployment operations after the emergency key rotation.\n"
+    )
+    update_index_output = index_corpus()
+    update_query = {
+        "id": "lifecycle-update-new-marker",
+        "query": "Which rotated JWT key id does the auth middleware require?",
+        "expected_doc": "auth-memory.md",
+        "expected_terms": ["kid-v4", "RotatedJwtKeyPlan"],
+    }
+    update_output = search_output(update_query["query"])
+    update_matched = output_matches(update_output, update_query)
+    old_marker_absent = "kid-v3" not in update_output.lower()
+    checks.append(
+        make_check(
+            "update_replaces_note_text",
+            "pass" if update_matched and old_marker_absent else "fail",
+            "memsearch re-index returned the new marker and did not return the old marker for the updated file."
+            if update_matched and old_marker_absent
+            else "memsearch re-index did not cleanly replace the searchable auth file text.",
+            {
+                "source": "auth-memory.md",
+                "matched_new_marker": update_matched,
+                "old_marker_absent": old_marker_absent,
+                "index_output": update_index_output,
+                "output": update_output,
+            },
+        )
+    )
+
+delete_query = next(
+    (
+        query
+        for query in queries
+        if query["expected_doc"] not in {"auth-memory.md", "database-memory.md"}
+        and (corpus_path / query["expected_doc"]).exists()
+    ),
+    None,
+)
+if delete_query is None:
+    checks.append(
+        make_check(
+            "delete_suppresses_retrieval",
+            "incomplete",
+            "No non-update, non-recovery corpus file was available, so memsearch delete could not be exercised.",
+            {"available_docs": [query["expected_doc"] for query in queries]},
+        )
+    )
+else:
+    (corpus_path / delete_query["expected_doc"]).unlink()
+    delete_index_output = index_corpus()
+    delete_output = search_output(delete_query["query"])
+    deleted_still_matched = output_matches(delete_output, delete_query)
+    checks.append(
+        make_check(
+            "delete_suppresses_retrieval",
+            "fail" if deleted_still_matched else "pass",
+            "memsearch index removed the deleted file from subsequent search."
+            if not deleted_still_matched
+            else "memsearch index returned success but the deleted file was still searchable.",
+            {
+                "source": delete_query["expected_doc"],
+                "query": delete_query,
+                "deleted_still_matched": deleted_still_matched,
+                "index_output": delete_index_output,
+                "output": delete_output,
+            },
+        )
+    )
+
+recovery_query = {
+    "id": "lifecycle-cold-start-recovery",
+    "query": "The invoice list N+1 query was fixed by eager loading invoice lines through `InvoiceLineBatcher`. Do not reintroduce per-row SQL calls in invoice rendering.",
+    "expected_doc": "database-memory.md",
+    "expected_terms": ["InvoiceLineBatcher", "N+1"],
+}
+recovery_output = search_output(recovery_query["query"])
+recovery_matched = output_matches(recovery_output, recovery_query)
+checks.append(
+    make_check(
+        "cold_start_recovery_search",
+        "pass" if recovery_matched else "fail",
+        "A fresh memsearch CLI process reopened the local Milvus index and retrieved persisted evidence."
+        if recovery_matched
+        else "A fresh memsearch CLI process did not retrieve expected persisted evidence.",
+        {
+            "expected_doc": recovery_query["expected_doc"],
+            "matched": recovery_matched,
+            "output": recovery_output,
+        },
+    )
+)
+
+check_summary = summarize_checks(checks)
+out_path.write_text(
+    json.dumps(
+        {
+            "schema": "elf.live_baseline.memsearch_result/v1",
+            "summary": {
+                "total": len(query_results),
+                "pass": pass_count,
+                "fail": len(query_results) - pass_count,
+            },
+            "check_summary": check_summary,
+            "checks": checks,
+            "queries": query_results,
+        },
+        indent=2,
+    )
+)
+PY
+
+  if run_cmd "${project}: cli retrieval attempt" 240 "${log_path}" \
+    "export HOME='${home}'; export ELF_MEMSEARCH_RESULT_PATH='${result_path}'; export ELF_BASELINE_QUERIES_PATH='${REPORT_DIR}/queries.json'; export ELF_BASELINE_CORPUS_PATH='${CORPUS_DIR}'; cd '${REPOS_DIR}/${project}' && source .venv/bin/activate && memsearch --help && memsearch config set embedding.provider onnx && memsearch index '${CORPUS_DIR}' && python '${driver_path}'"; then
+    if jq -e '.checks and .check_summary' "${result_path}" >/dev/null 2>&1; then
+      jq '{check_summary, checks}' "${result_path}" >"${REPORT_DIR}/${project}-checks.json"
+    fi
+    if jq -e --argjson query_count "${QUERY_COUNT}" '
+      .schema == "elf.live_baseline.memsearch_result/v1" and
+      .summary.total == $query_count and
+      .summary.fail == 0 and
+      .check_summary.fail == 0 and
+      .check_summary.incomplete == 0
+    ' "${result_path}" >/dev/null; then
+      json_record "${project}" "${repo}" "${head}" "pass" "retrieval_pass" "memsearch indexed the corpus and returned expected evidence and lifecycle checks passed" "${project}.log" "config; index; search"
+    elif jq -e --argjson query_count "${QUERY_COUNT}" '
+      .schema == "elf.live_baseline.memsearch_result/v1" and
+      .summary.total == $query_count and
+      .summary.fail == 0
+    ' "${result_path}" >/dev/null; then
+      json_record "${project}" "${repo}" "${head}" "fail" "retrieval_pass" "memsearch same-corpus retrieval passed, but one or more update/delete/recovery checks failed or were incomplete" "${project}.log" "config; index; search"
+    else
+      json_record "${project}" "${repo}" "${head}" "fail" "retrieval_wrong_result" "memsearch search ran but did not return expected evidence" "${project}.log" "config; index; search"
+    fi
+    return
+  fi
+
+  json_record "${project}" "${repo}" "${head}" "incomplete" "retrieval_command_failed" "memsearch installed, but the current CLI retrieval command failed" "${project}.log" "memsearch --help; config; index; search"
+}
+
+project_mem0() {
+  local project="mem0"
+  local repo="https://github.com/mem0ai/mem0.git"
+  local log_path="${REPORT_DIR}/${project}.log"
+  local result_path="${REPORT_DIR}/${project}-search.json"
+  local driver_path="${REPOS_DIR}/${project}/elf-live-baseline-mem0.py"
+  local home="${HOME_DIR}/${project}"
+  local head
+  mkdir -p "${home}"
+  head="$(clone_project "${project}" "${repo}" "${log_path}")" || {
+    json_record "${project}" "${repo}" "${head}" "fail" "not_run" "clone failed" "${project}.log" "git clone"
+    return
+  }
+
+  if ! run_cmd "${project}: install/import" 420 "${log_path}" \
+    "cd '${REPOS_DIR}/${project}' && python3 -m venv .venv && .venv/bin/pip install --upgrade pip && .venv/bin/pip install -e . fastembed ollama && .venv/bin/python - <<'PY'
+from mem0 import Memory
+print('mem0 Memory import ok:', Memory)
+PY"; then
+    json_record "${project}" "${repo}" "${head}" "fail" "not_run" "pip install or import failed" "${project}.log" "pip install -e . fastembed ollama; import Memory"
+    return
+  fi
+
+  cat >"${driver_path}" <<'PY'
+import gc
+import json
+import os
+from pathlib import Path
+
+os.environ.setdefault("MEM0_TELEMETRY", "false")
+
+from mem0 import Memory
+
+out_path = Path(os.environ["ELF_MEM0_RESULT_PATH"])
+base = Path(os.environ["ELF_MEM0_HOME"])
+corpus_path = Path(os.environ["ELF_BASELINE_CORPUS_PATH"])
+queries_path = Path(os.environ["ELF_BASELINE_QUERIES_PATH"])
+top_k = int(os.environ.get("ELF_BASELINE_TOP_K", "10"))
+
+config = {
+    "vector_store": {
+        "provider": "qdrant",
+        "config": {
+            "collection_name": "elfbench",
+            "path": str(base / "qdrant"),
+            "embedding_model_dims": 384,
+        },
+    },
+    "embedder": {
+        "provider": "fastembed",
+        "config": {
+            "model": "BAAI/bge-small-en-v1.5",
+            "embedding_dims": 384,
+        },
+    },
+    "llm": {
+        "provider": "ollama",
+        "config": {
+            "model": "llama3.1:8b",
+            "ollama_base_url": "http://127.0.0.1:11434",
+        },
+    },
+    "history_db_path": str(base / "history.db"),
+    "version": "v1.1",
+}
+
+memory = Memory.from_config(config)
+
+def plain_text(markdown: str) -> str:
+    return " ".join(
+        line.strip()
+        for line in markdown.splitlines()
+        if not line.lstrip().startswith("#")
+    ).strip()
+
+
+docs = [
+    (plain_text(path.read_text()), path.name)
+    for path in sorted(corpus_path.glob("*.md"))
+]
+queries = json.loads(queries_path.read_text())["queries"]
+
+adds = []
+memory_ids_by_source = {}
+for text, source in docs:
+    added = memory.add(
+        text,
+        user_id="elf-bench",
+        metadata={"source": source},
+        infer=False,
+    )
+    adds.append({"source": source, "result": added})
+    results = added.get("results", []) if isinstance(added, dict) else []
+    if results and isinstance(results[0], dict) and results[0].get("id"):
+        memory_ids_by_source[source] = results[0]["id"]
+
+
+def result_entries(search):
+    return search.get("results", []) if isinstance(search, dict) else []
+
+
+def search_memory(memory_instance, query_text):
+    return memory_instance.search(
+        query_text,
+        filters={"user_id": "elf-bench"},
+        top_k=top_k,
+        threshold=0.0,
+    )
+
+
+def matches_expected(search, expected_doc, expected_terms):
+    for entry in result_entries(search):
+        entry_text = json.dumps(entry, default=str).lower()
+        source = ((entry.get("metadata") or {}).get("source") or "")
+        if source == expected_doc and all(
+            term.lower() in entry_text for term in expected_terms
+        ):
+            return True
+    return False
+
+
+def query_result(query, search):
+    return {
+        "id": query["id"],
+        "query": query["query"],
+        "expected_doc": query["expected_doc"],
+        "expected_terms": query["expected_terms"],
+        "matched": matches_expected(
+            search,
+            query["expected_doc"],
+            query["expected_terms"],
+        ),
+        "search": search,
+    }
+
+
+def make_check(name, status, reason, evidence):
+    return {
+        "name": name,
+        "status": status,
+        "reason": reason,
+        "evidence": evidence,
+    }
+
+
+def summarize_checks(checks):
+    return {
+        "total": len(checks),
+        "pass": sum(1 for check in checks if check["status"] == "pass"),
+        "fail": sum(1 for check in checks if check["status"] == "fail"),
+        "incomplete": sum(1 for check in checks if check["status"] == "incomplete"),
+    }
+
+query_results = []
+for query in queries:
+    query_results.append(query_result(query, search_memory(memory, query["query"])))
+
+pass_count = sum(1 for result in query_results if result["matched"])
+checks = [
+    make_check(
+        "same_corpus_retrieval",
+        "pass" if pass_count == len(query_results) else "fail",
+        "mem0 local FastEmbed/Qdrant search returned expected evidence for every query."
+        if pass_count == len(query_results)
+        else "mem0 local FastEmbed/Qdrant search missed one or more expected results.",
+        {
+            "total": len(query_results),
+            "pass": pass_count,
+            "fail": len(query_results) - pass_count,
+        },
+    )
+]
+
+auth_id = memory_ids_by_source.get("auth-memory.md")
+if not auth_id:
+    checks.append(
+        make_check(
+            "update_replaces_note_text",
+            "incomplete",
+            "The auth memory id was not returned by mem0 add(), so update could not be exercised.",
+            {"source": "auth-memory.md"},
+        )
+    )
+else:
+    update_text = (
+        "Rotated auth middleware validates JWT tokens with key id `kid-v4` "
+        "under `RotatedJwtKeyPlan`. It still requires tenant scope "
+        "`project_shared` for deployment operations after the emergency key rotation."
+    )
+    update_result = memory.update(
+        auth_id,
+        update_text,
+        metadata={"source": "auth-memory.md", "lifecycle": "updated"},
+    )
+    update_search = search_memory(
+        memory,
+        "Which rotated JWT key id does the auth middleware require?",
+    )
+    update_matched = matches_expected(
+        update_search,
+        "auth-memory.md",
+        ["kid-v4", "RotatedJwtKeyPlan"],
+    )
+    old_marker_absent = all(
+        "kid-v3" not in json.dumps(entry, default=str).lower()
+        for entry in result_entries(update_search)
+        if entry.get("id") == auth_id
+        or ((entry.get("metadata") or {}).get("source") == "auth-memory.md")
+    )
+    checks.append(
+        make_check(
+            "update_replaces_note_text",
+            "pass" if update_matched and old_marker_absent else "fail",
+            "mem0 update() returned the new marker and did not return the old marker for the updated memory."
+            if update_matched and old_marker_absent
+            else "mem0 update() did not cleanly replace the searchable auth memory text.",
+            {
+                "memory_id": auth_id,
+                "update_result": update_result,
+                "matched_new_marker": update_matched,
+                "old_marker_absent": old_marker_absent,
+                "search": update_search,
+            },
+        )
+    )
+
+delete_query = next(
+    (
+        query
+        for query in queries
+        if query["expected_doc"] in memory_ids_by_source
+        and query["expected_doc"] not in {"auth-memory.md", "database-memory.md"}
+    ),
+    None,
+)
+if delete_query is None:
+    checks.append(
+        make_check(
+            "delete_suppresses_retrieval",
+            "incomplete",
+            "No non-update, non-recovery memory id was available, so delete could not be exercised.",
+            {"available_sources": sorted(memory_ids_by_source)},
+        )
+    )
+else:
+    delete_source = delete_query["expected_doc"]
+    delete_id = memory_ids_by_source[delete_source]
+    delete_result = memory.delete(delete_id)
+    delete_search = search_memory(
+        memory,
+        delete_query["query"],
+    )
+    deleted_still_matched = matches_expected(
+        delete_search,
+        delete_source,
+        delete_query["expected_terms"],
+    )
+    checks.append(
+        make_check(
+            "delete_suppresses_retrieval",
+            "pass" if not deleted_still_matched else "fail",
+            "mem0 delete() suppressed the deleted memory from subsequent search."
+            if not deleted_still_matched
+            else "mem0 delete() returned success but the deleted memory was still searchable.",
+            {
+                "memory_id": delete_id,
+                "source": delete_source,
+                "query": delete_query,
+                "delete_result": delete_result,
+                "deleted_still_matched": deleted_still_matched,
+                "search": delete_search,
+            },
+        )
+    )
+
+del memory
+gc.collect()
+reopened_memory = Memory.from_config(config)
+recovery_search = search_memory(
+    reopened_memory,
+    "The invoice list N+1 query was fixed by eager loading invoice lines through `InvoiceLineBatcher`. Do not reintroduce per-row SQL calls in invoice rendering.",
+)
+recovery_matched = matches_expected(
+    recovery_search,
+    "database-memory.md",
+    ["InvoiceLineBatcher", "N+1"],
+)
+checks.append(
+    make_check(
+        "cold_start_recovery_search",
+        "pass" if recovery_matched else "fail",
+        "A newly constructed mem0 Memory over the same local Qdrant/history paths retrieved persisted evidence."
+        if recovery_matched
+        else "A newly constructed mem0 Memory over the same local Qdrant/history paths did not retrieve persisted evidence.",
+        {
+            "expected_doc": "database-memory.md",
+            "matched": recovery_matched,
+            "search": recovery_search,
+        },
+    )
+)
+
+check_summary = summarize_checks(checks)
+
+out_path.write_text(
+    json.dumps(
+        {
+            "schema": "elf.live_baseline.mem0_result/v1",
+            "config": {
+                "embedder": "fastembed:BAAI/bge-small-en-v1.5",
+                "vector_store": "qdrant:path",
+                "infer": False,
+            },
+            "corpus": {
+                "document_count": len(docs),
+                "query_count": len(queries),
+            },
+            "adds": adds,
+            "summary": {
+                "total": len(query_results),
+                "pass": pass_count,
+                "fail": len(query_results) - pass_count,
+            },
+            "check_summary": check_summary,
+            "checks": checks,
+            "queries": query_results,
+        },
+        indent=2,
+        default=str,
+    )
+)
+PY
+
+  if run_cmd "${project}: local fastembed add/search" 900 "${log_path}" \
+    "export HOME='${home}'; export ELF_MEM0_HOME='${home}'; export ELF_MEM0_RESULT_PATH='${result_path}'; export ELF_BASELINE_CORPUS_PATH='${CORPUS_DIR}'; export ELF_BASELINE_QUERIES_PATH='${REPORT_DIR}/queries.json'; export MEM0_TELEMETRY=false; cd '${REPOS_DIR}/${project}' && source .venv/bin/activate && python '${driver_path}'"; then
+    if jq -e '.checks and .check_summary' "${result_path}" >/dev/null 2>&1; then
+      jq '{check_summary, checks}' "${result_path}" >"${REPORT_DIR}/${project}-checks.json"
+    fi
+    if jq -e --argjson query_count "${QUERY_COUNT}" --argjson document_count "${DOCUMENT_COUNT}" '
+      .schema == "elf.live_baseline.mem0_result/v1" and
+      .corpus.document_count == $document_count and
+      .summary.total == $query_count and
+      .summary.fail == 0 and
+      .check_summary.fail == 0 and
+      .check_summary.incomplete == 0
+    ' "${result_path}" >/dev/null; then
+      json_record "${project}" "${repo}" "${head}" "pass" "retrieval_pass" "mem0 infer=false local fastembed/Qdrant search found expected evidence and lifecycle checks passed" "${project}.log" "pip install -e . fastembed ollama; Memory.from_config; add/update/delete/search"
+      return
+    fi
+    if jq -e --argjson query_count "${QUERY_COUNT}" --argjson document_count "${DOCUMENT_COUNT}" '
+      .schema == "elf.live_baseline.mem0_result/v1" and
+      .corpus.document_count == $document_count and
+      .summary.total == $query_count and
+      .summary.fail == 0
+    ' "${result_path}" >/dev/null; then
+      json_record "${project}" "${repo}" "${head}" "fail" "retrieval_pass" "mem0 same-corpus retrieval passed, but one or more update/delete/recovery checks failed or were incomplete" "${project}.log" "pip install -e . fastembed ollama; Memory.from_config; add/update/delete/search"
+      return
+    fi
+    json_record "${project}" "${repo}" "${head}" "fail" "retrieval_wrong_result" "mem0 local add/search ran but did not return expected evidence" "${project}.log" "pip install -e . fastembed ollama; Memory.from_config; add infer=false; search"
+    return
+  fi
+
+  json_record "${project}" "${repo}" "${head}" "incomplete" "retrieval_command_failed" "mem0 installed and imported, but local fastembed/Qdrant add/search failed" "${project}.log" "pip install -e . fastembed ollama; Memory.from_config; add infer=false; search"
+}
+
+project_openviking() {
+  local project="OpenViking"
+  local repo="https://github.com/volcengine/OpenViking.git"
+  local log_path="${REPORT_DIR}/${project}.log"
+  local home="${HOME_DIR}/${project}"
+  local config_path="${REPORT_DIR}/${project}-ov.conf"
+  local result_path="${REPORT_DIR}/${project}-search.json"
+  local driver_path="${REPOS_DIR}/${project}/elf-live-baseline-openviking.py"
+  local local_embed_failure_pattern="llama-cpp-python|target specific option mismatch|failed-wheel-build-for-install|Failed building wheel|Failed to build llama-cpp-python|No module named 'llama_cpp'|Local embedding is enabled but 'llama-cpp-python' is not installed"
+  local head
+  mkdir -p "${home}"
+  head="$(clone_project "${project}" "${repo}" "${log_path}")" || {
+    json_record "${project}" "${repo}" "${head}" "fail" "not_run" "clone failed" "${project}.log" "git clone"
+    return
+  }
+
+  if ! run_cmd "${project}: install/help" 600 "${log_path}" \
+    "export HOME='${home}'; cd '${REPOS_DIR}/${project}' && python3 -m venv .venv && .venv/bin/pip install --upgrade pip && .venv/bin/pip install maturin && .venv/bin/pip install -e . && (.venv/bin/openviking language en || .venv/bin/ov language en) && (.venv/bin/openviking --help || .venv/bin/ov --help)"; then
+    json_record "${project}" "${repo}" "${head}" "fail" "not_run" "pip install or CLI help failed" "${project}.log" "pip install -e .; openviking/ov --help"
+    return
+  fi
+
+  if rg -q "ERROR: Failed building editable|Failed to build openviking|error: failed-wheel-build-for-install|CMake Error" "${log_path}"; then
+    json_record "${project}" "${repo}" "${head}" "fail" "partial_install" "OpenViking install/help returned success but the build log contains native build errors" "${project}.log" "pip install -e .; openviking/ov --help"
+    return
+  fi
+
+  cat >"${config_path}" <<EOF
+{
+  "default_account": "elfbench",
+  "default_user": "elfbench",
+  "storage": {
+    "workspace": "${home}/data",
+    "skip_process_lock": true,
+    "vectordb": {
+      "backend": "local",
+      "name": "elfbench_context",
+      "dimension": 512
+    }
+  },
+  "embedding": {
+    "dense": {
+      "provider": "local",
+      "model": "bge-small-zh-v1.5-f16",
+      "cache_dir": "${home}/models"
+    },
+    "text_source": "content_only",
+    "max_concurrent": 2
+  },
+  "auto_generate_l0": false,
+  "auto_generate_l1": false,
+  "default_search_mode": "fast",
+  "vlm": {},
+  "query_planner": {},
+  "rerank": {}
+}
+EOF
+
+  cat >"${driver_path}" <<'PY'
+import json
+import os
+from pathlib import Path
+
+from openviking import OpenViking
+
+
+def to_jsonable(value):
+    if hasattr(value, "to_dict"):
+        return value.to_dict()
+    if hasattr(value, "model_dump"):
+        return value.model_dump()
+    if isinstance(value, list):
+        return [to_jsonable(item) for item in value]
+    if isinstance(value, dict):
+        return {key: to_jsonable(item) for key, item in value.items()}
+    return value
+
+
+out_path = Path(os.environ["ELF_OPENVIKING_RESULT_PATH"])
+data_path = os.environ["ELF_OPENVIKING_DATA_PATH"]
+corpus_path = os.environ["ELF_OPENVIKING_CORPUS_PATH"]
+queries_path = Path(os.environ["ELF_BASELINE_QUERIES_PATH"])
+top_k = int(os.environ.get("ELF_BASELINE_TOP_K", "10"))
+
+
+def result_matches(found, query):
+    raw = json.dumps(to_jsonable(found), ensure_ascii=False, default=str).lower()
+    return query["expected_doc"].lower() in raw and all(
+        term.lower() in raw for term in query["expected_terms"]
+    )
+
+
+client = OpenViking(path=data_path)
+client.initialize()
+try:
+    queries = json.loads(queries_path.read_text())["queries"]
+    added = client.add_resource(
+        corpus_path,
+        to="viking://resources/elfbench",
+        wait=True,
+        timeout=240,
+        build_index=True,
+        summarize=False,
+    )
+    query_results = []
+    for query in queries:
+        found = client.find(
+            query["query"],
+            target_uri="viking://resources/elfbench",
+            limit=top_k,
+            score_threshold=0.0,
+            level=[2],
+        )
+        query_results.append(
+            {
+                "id": query["id"],
+                "query": query["query"],
+                "expected_doc": query["expected_doc"],
+                "expected_terms": query["expected_terms"],
+                "matched": result_matches(found, query),
+                "find": to_jsonable(found),
+            }
+        )
+    pass_count = sum(1 for result in query_results if result["matched"])
+    out_path.write_text(
+        json.dumps(
+            {
+                "schema": "elf.live_baseline.openviking_result/v1",
+                "config": {
+                    "embedder": "local:bge-small-zh-v1.5-f16",
+                    "vector_store": "local",
+                    "mode": "OpenViking.add_resource/find",
+                },
+                "add": to_jsonable(added),
+                "summary": {
+                    "total": len(query_results),
+                    "pass": pass_count,
+                    "fail": len(query_results) - pass_count,
+                },
+                "queries": query_results,
+            },
+            ensure_ascii=False,
+            indent=2,
+            default=str,
+        )
+    )
+finally:
+    client.close()
+PY
+
+  if ! run_cmd "${project}: install local embedding extras" 900 "${log_path}" \
+    "export HOME='${home}'; cd '${REPOS_DIR}/${project}' && .venv/bin/pip install -e '.[local-embed]'"; then
+    if rg -q "${local_embed_failure_pattern}" "${log_path}"; then
+      json_record "${project}" "${repo}" "${head}" "incomplete" "local_embed_install_failed" "OpenViking local-embed install failed in Docker while building llama-cpp-python for aarch64, so same-corpus local retrieval could not be run" "${project}.log" "pip install -e .; openviking/ov --help; pip install -e .[local-embed]"
+      return
+    fi
+    json_record "${project}" "${repo}" "${head}" "incomplete" "local_embed_install_failed" "OpenViking local-embed install failed in Docker, so same-corpus local retrieval could not be run" "${project}.log" "pip install -e .; openviking/ov --help; pip install -e .[local-embed]"
+    return
+  fi
+
+  if rg -q "${local_embed_failure_pattern}" "${log_path}"; then
+    json_record "${project}" "${repo}" "${head}" "incomplete" "local_embed_install_failed" "OpenViking local-embed install returned success but the log contains llama-cpp-python build/import failure, so same-corpus local retrieval could not be run" "${project}.log" "pip install -e .; openviking/ov --help; pip install -e .[local-embed]"
+    return
+  fi
+
+  if run_cmd "${project}: local add/find" 900 "${log_path}" \
+    "export HOME='${home}'; export OPENVIKING_CONFIG_FILE='${config_path}'; export ELF_OPENVIKING_DATA_PATH='${home}/data'; export ELF_OPENVIKING_CORPUS_PATH='${CORPUS_DIR}'; export ELF_OPENVIKING_RESULT_PATH='${result_path}'; export ELF_BASELINE_QUERIES_PATH='${REPORT_DIR}/queries.json'; cd '${REPOS_DIR}/${project}' && source .venv/bin/activate && python '${driver_path}'"; then
+    if rg -q "${local_embed_failure_pattern}" "${log_path}"; then
+      json_record "${project}" "${repo}" "${head}" "incomplete" "local_embed_install_failed" "OpenViking local add_resource/find hit llama-cpp-python build/import failure, so same-corpus local retrieval could not be run" "${project}.log" "pip install -e .[local-embed]; OpenViking.add_resource/find"
+      return
+    fi
+    if [[ ! -s "${result_path}" ]] || ! jq -e . "${result_path}" >/dev/null 2>&1; then
+      json_record "${project}" "${repo}" "${head}" "incomplete" "retrieval_command_failed" "OpenViking local add_resource/find returned success but did not write a valid result JSON" "${project}.log" "pip install -e .[local-embed]; OpenViking.add_resource/find"
+      return
+    fi
+    if jq -e --argjson query_count "${QUERY_COUNT}" '
+      .schema == "elf.live_baseline.openviking_result/v1" and
+      .summary.total == $query_count and
+      .summary.fail == 0
+    ' "${result_path}" >/dev/null; then
+      json_record "${project}" "${repo}" "${head}" "pass" "retrieval_pass" "OpenViking local add_resource/find found expected evidence for every query" "${project}.log" "pip install -e .[local-embed]; OpenViking.add_resource/find"
+      return
+    fi
+    json_record "${project}" "${repo}" "${head}" "fail" "retrieval_wrong_result" "OpenViking local add_resource/find ran but did not return expected evidence" "${project}.log" "pip install -e .[local-embed]; OpenViking.add_resource/find"
+    return
+  fi
+
+  if rg -q "${local_embed_failure_pattern}" "${log_path}"; then
+    json_record "${project}" "${repo}" "${head}" "incomplete" "local_embed_install_failed" "OpenViking local add_resource/find failed because llama-cpp-python was unavailable in Docker" "${project}.log" "pip install -e .[local-embed]; OpenViking.add_resource/find"
+    return
+  fi
+
+  json_record "${project}" "${repo}" "${head}" "incomplete" "retrieval_command_failed" "OpenViking local-embed installed, but same-corpus add_resource/find failed in Docker" "${project}.log" "pip install -e .[local-embed]; OpenViking.add_resource/find"
+}
+
+project_claude_mem() {
+  local project="claude-mem"
+  local repo="https://github.com/thedotmack/claude-mem.git"
+  local log_path="${REPORT_DIR}/${project}.log"
+  local result_path="${REPORT_DIR}/${project}-search.json"
+  local driver_path="${REPOS_DIR}/${project}/elf-live-baseline-claude-mem.ts"
+  local head
+  head="$(clone_project "${project}" "${repo}" "${log_path}")" || {
+    json_record "${project}" "${repo}" "${head}" "fail" "not_run" "clone failed" "${project}.log" "git clone"
+    return
+  }
+
+  if ! run_cmd "${project}: install/build" 420 "${log_path}" \
+    "cd '${REPOS_DIR}/${project}' && (npm ci || npm install --no-audit --no-fund) && npm run build --if-present"; then
+    json_record "${project}" "${repo}" "${head}" "fail" "not_run" "npm install/build failed" "${project}.log" "npm install/build"
+    return
+  fi
+
+  cat >"${driver_path}" <<'TS'
+import { readFileSync, readdirSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { Database } from "bun:sqlite";
+import { MemoryItemsRepository } from "./src/storage/sqlite/memory-items.ts";
+import { ProjectsRepository } from "./src/storage/sqlite/projects.ts";
+
+const outPath = Bun.argv[2];
+const corpusPath = Bun.argv[3];
+const queriesPath = Bun.argv[4];
+if (!outPath || !corpusPath || !queriesPath) {
+  throw new Error("output path, corpus path, and query path are required");
+}
+
+type QueryCase = {
+  id: string;
+  query: string;
+  expected_doc: string;
+  expected_terms: string[];
+};
+
+function plainText(markdown: string): string {
+  return markdown
+    .split(/\r?\n/)
+    .filter((line) => !line.trimStart().startsWith("#"))
+    .join(" ")
+    .replace(/\s+/g, " ")
+    .trim();
+}
+
+function titleFrom(markdown: string, file: string): string {
+  const heading = markdown
+    .split(/\r?\n/)
+    .find((line) => line.trimStart().startsWith("# "));
+  return heading ? heading.replace(/^#\s+/, "").trim() : file;
+}
+
+function conceptsFor(file: string): string[] {
+  return file
+    .replace(/\.md$/i, "")
+    .split(/[^A-Za-z0-9]+/)
+    .map((part) => part.toLowerCase())
+    .filter(Boolean);
+}
+
+function resultMatches(results: unknown[], query: QueryCase): boolean {
+  return results.some((entry) => {
+    const files = (entry as { filesRead?: string[] }).filesRead ?? [];
+    const entryText = JSON.stringify(entry).toLowerCase();
+    return (
+      files.includes(query.expected_doc) &&
+      query.expected_terms.every((term) =>
+        entryText.includes(term.toLowerCase()),
+      )
+    );
+  });
+}
+
+const db = new Database(":memory:");
+db.run("PRAGMA foreign_keys = ON");
+
+try {
+  const projects = new ProjectsRepository(db);
+  const memories = new MemoryItemsRepository(db);
+  const project = projects.create({
+    name: "elfbench",
+    slug: "elfbench",
+    rootPath: "/bench/corpus",
+    metadata: { source: "elf-live-baseline" },
+  });
+
+  const docs = readdirSync(corpusPath)
+    .filter((file) => file.endsWith(".md"))
+    .sort()
+    .map((file) => {
+      const raw = readFileSync(join(corpusPath, file), "utf8");
+      return {
+        title: titleFrom(raw, file),
+        text: plainText(raw),
+        concepts: conceptsFor(file),
+        file,
+      };
+    });
+  const queries = JSON.parse(readFileSync(queriesPath, "utf8")).queries as QueryCase[];
+  const topK = Number(process.env.ELF_BASELINE_TOP_K ?? "10");
+
+  const created = docs.map((doc) =>
+    memories.create({
+      projectId: project.id,
+      kind: "manual",
+      type: "fact",
+      title: doc.title,
+      text: doc.text,
+      narrative: doc.text,
+      facts: [doc.text],
+      concepts: doc.concepts,
+      filesRead: [doc.file],
+      metadata: { source: doc.file },
+    }),
+  );
+
+  const queryResults = queries.map((query) => {
+    const results = memories.search(project.id, query.query, topK);
+    return {
+      id: query.id,
+      query: query.query,
+      expected_doc: query.expected_doc,
+      expected_terms: query.expected_terms,
+      matched: resultMatches(results, query),
+      results,
+    };
+  });
+  const pass = queryResults.filter((result) => result.matched).length;
+
+  writeFileSync(
+    outPath,
+    JSON.stringify(
+      {
+        schema: "elf.live_baseline.claude_mem_result/v1",
+        corpus: {
+          document_count: docs.length,
+          query_count: queries.length,
+        },
+        created,
+        summary: {
+          total: queryResults.length,
+          pass,
+          fail: queryResults.length - pass,
+        },
+        queries: queryResults,
+      },
+      null,
+      2,
+    ),
+  );
+} finally {
+  db.close();
+}
+TS
+
+  if run_cmd "${project}: same-corpus sqlite search" 300 "${log_path}" \
+    "cd '${REPOS_DIR}/${project}' && bun '${driver_path}' '${result_path}' '${CORPUS_DIR}' '${REPORT_DIR}/queries.json'"; then
+    if jq -e --argjson query_count "${QUERY_COUNT}" --argjson document_count "${DOCUMENT_COUNT}" '
+      .schema == "elf.live_baseline.claude_mem_result/v1" and
+      .corpus.document_count == $document_count and
+      .summary.total == $query_count and
+      .summary.fail == 0
+    ' "${result_path}" >/dev/null; then
+      json_record "${project}" "${repo}" "${head}" "pass" "retrieval_pass" "claude-mem SQLite memory repository search found expected evidence for every query" "${project}.log" "npm install/build; MemoryItemsRepository.create/search"
+      return
+    fi
+    json_record "${project}" "${repo}" "${head}" "fail" "retrieval_wrong_result" "claude-mem same-corpus search ran but did not return expected evidence" "${project}.log" "npm install/build; MemoryItemsRepository.create/search"
+    return
+  fi
+
+  json_record "${project}" "${repo}" "${head}" "incomplete" "retrieval_command_failed" "claude-mem built, but same-corpus SQLite search did not pass in Docker" "${project}.log" "npm install/build; MemoryItemsRepository.create/search"
+}
+
+run_project "ELF" project_elf
+run_project "agentmemory" project_agentmemory
+run_project "qmd" project_qmd
+run_project "memsearch" project_memsearch
+run_project "mem0" project_mem0
+run_project "OpenViking" project_openviking
+run_project "claude-mem" project_claude_mem
+finish_report
+
+jq . "${REPORT}"
+echo "Live baseline report: ${REPORT}"
+
+if [[ "${ELF_BASELINE_STRICT:-0}" == "1" ]]; then
+  jq -e '.verdict == "pass"' "${REPORT}" >/dev/null
+fi
diff --git a/scripts/live-baseline-report-to-md.sh b/scripts/live-baseline-report-to-md.sh
new file mode 100755
index 00000000..651f29b4
--- /dev/null
+++ b/scripts/live-baseline-report-to-md.sh
@@ -0,0 +1,99 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+REPORT="${1:-${ELF_BASELINE_REPORT:-${ROOT_DIR}/tmp/live-baseline/live-baseline-report.json}}"
+OUT="${2:-${ELF_BASELINE_MARKDOWN_REPORT:-}}"
+
+if ! command -v jq >/dev/null 2>&1; then
+  echo "Missing jq; cannot render live baseline Markdown report." >&2
+  exit 1
+fi
+
+if [[ ! -f "${REPORT}" ]]; then
+  echo "Missing report: ${REPORT}" >&2
+  exit 1
+fi
+
+render_report() {
+  jq -r --arg report_path "${REPORT}" '
+    def dash:
+      if . == null then "-" else tostring end;
+    def md:
+      dash | gsub("\\|"; "\\|") | gsub("\n"; " ");
+    def checks:
+      ((.check_summary.pass // 0 | tostring) + "/" + (.check_summary.total // 0 | tostring));
+
+    "# Live Baseline Benchmark Report",
+    "",
+    "Goal: Publish a Markdown summary for one generated live baseline aggregate report.",
+    "Read this when: You need a durable, reviewable summary of a live baseline JSON report.",
+    ("Inputs: `" + $report_path + "`."),
+    "Depends on: `scripts/live-baseline-benchmark.sh` and `docs/guide/benchmarking/live_baseline_benchmark.md`.",
+    "Verification: Compare this Markdown summary with the source JSON before committing.",
+    "",
+    "## Summary",
+    "",
+    ("- Run ID: `" + (.run_id | md) + "`"),
+    ("- Generated at: `" + (.generated_at | md) + "`"),
+    ("- Verdict: `" + (.verdict | md) + "`"),
+    ("- Project filter: `" + (.project_filter | md) + "`"),
+    ("- Corpus profile: `" + (.corpus.profile | md) + "`"),
+    ("- Documents: `" + (.corpus.document_count | tostring) + "`"),
+    ("- Queries: `" + (.corpus.query_count | tostring) + "`"),
+    ("- Project summary: `" + (.summary.pass | tostring) + " pass`, `" + (.summary.fail | tostring) + " fail`, `" + (.summary.incomplete | tostring) + " incomplete`"),
+    ("- Same-corpus summary: `" + (.same_corpus_summary.pass | tostring) + " pass`, `" + (.same_corpus_summary.fail | tostring) + " fail`, `" + (.same_corpus_summary.incomplete | tostring) + " incomplete`"),
+    ("- Full check summary: `" + (.full_check_summary.pass | tostring) + "/" + (.full_check_summary.total | tostring) + " pass`"),
+    "",
+    "## Projects",
+    "",
+    "| Project | Status | Retrieval | Checks | Elapsed | Reason |",
+    "| --- | --- | --- | --- | --- | --- |",
+    (
+      .projects[]
+      | "| " + (.project | md)
+        + " | `" + (.status | md) + "`"
+        + " | `" + (.retrieval_status | md) + "`"
+        + " | `" + checks + "`"
+        + " | `" + (.elapsed_seconds | tostring) + "s`"
+        + " | " + (.reason | md) + " |"
+    ),
+    "",
+    (
+      [.projects[] | select(.embedding != null)] as $embedded
+      | if ($embedded | length) > 0 then
+          "## Embedding",
+          "",
+          "| Project | Mode | Provider | Model | Dimensions | Timeout | API Base | Path |",
+          "| --- | --- | --- | --- | --- | --- | --- | --- |",
+          (
+            $embedded[]
+            | "| " + (.project | md)
+              + " | `" + (.embedding.mode | md) + "`"
+              + " | `" + (.embedding.provider_id | md) + "`"
+              + " | `" + (.embedding.model | md) + "`"
+              + " | `" + (.embedding.dimensions | tostring) + "`"
+              + " | `" + (.embedding.timeout_ms | tostring) + "ms`"
+              + " | `" + (.embedding.api_base | md) + "`"
+              + " | `" + (.embedding.path | md) + "` |"
+          ),
+          ""
+        else empty end
+    ),
+    "## Result Semantics",
+    "",
+    "- `pass`: every encoded check for the selected project and profile passed.",
+    "- `fail`: clone, install, import, build, retrieval, lifecycle, recovery, concurrency, soak, resource-envelope, or another declared check failed.",
+    "- `incomplete`: the encoded check could not complete without extra provider keys, host integration, native dependency support, durable runtime wiring, or more adapter work.",
+    "",
+    "`incomplete` is not a pass; treat it as benchmark wiring debt."
+  ' "${REPORT}"
+}
+
+if [[ -n "${OUT}" ]]; then
+  mkdir -p "$(dirname "${OUT}")"
+  render_report >"${OUT}"
+  echo "Wrote ${OUT}"
+else
+  render_report
+fi