From 048e249585a085fb8a84b617b2fd6c72475316de Mon Sep 17 00:00:00 2001 From: Yvette Carlisle Date: Mon, 8 Jun 2026 21:21:07 +0800 Subject: [PATCH] Add Docker competitive parity gate --- .dockerignore | 3 + Makefile.toml | 32 +++ README.md | 2 +- docker-compose.parity.yml | 53 ++++ docker/parity/Dockerfile | 23 ++ docs/guide/competitive_parity_testing.md | 80 ++++++ docs/guide/index.md | 2 + docs/spec/index.md | 2 + .../spec/system_competitive_parity_gate_v1.md | 147 ++++++++++ scripts/consolidation-harness.sh | 18 +- scripts/parity-docker-gate.sh | 256 ++++++++++++++++++ 11 files changed, 615 insertions(+), 3 deletions(-) create mode 100644 docker-compose.parity.yml create mode 100644 docker/parity/Dockerfile create mode 100644 docs/guide/competitive_parity_testing.md create mode 100644 docs/spec/system_competitive_parity_gate_v1.md create mode 100755 scripts/parity-docker-gate.sh diff --git a/.dockerignore b/.dockerignore index f0559b26..8bccea2d 100644 --- a/.dockerignore +++ b/.dockerignore @@ -2,4 +2,7 @@ **/.next **/node_modules **/npm-debug.log +.worktrees .git +target +tmp diff --git a/Makefile.toml b/Makefile.toml index 637bf120..832f0c7e 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -261,6 +261,38 @@ args = [ ] +# Competitive parity +# | task | type | cwd | +# | ------------------- | ------- | --- | +# | parity-docker | command | | +# | parity-docker-clean | command | | + +[tasks.parity-docker] +workspace = false +command = "docker" +args = [ + "compose", + "-f", + "docker-compose.parity.yml", + "run", + "--build", + "--rm", + "parity-runner", +] + +[tasks.parity-docker-clean] +workspace = false +command = "docker" +args = [ + "compose", + "-f", + "docker-compose.parity.yml", + "down", + "-v", + "--remove-orphans", +] + + # Meta # | task | type | cwd | # | ------ | --------- | --- | diff --git a/README.md b/README.md index e9421036..cd17b656 100644 --- a/README.md +++ b/README.md @@ -131,7 +131,7 @@ This table compares capability coverage, not overall project quality. | Source-of-truth + rebuildable derived index | ✅ | ⚠️ | ✅ | ⚠️ | ⚠️ | ⚠️ | ✅ | | Hierarchical/recursive retrieval strategy | ⚠️ (in progress) | ⚠️ | ✅ | ⚠️ | ⚠️ | ⚠️ | ⚠️ | | Progressive context loading (L0/L1/L2 style) | ⚠️ (in progress) | ⚠️ | ✅ | ⚠️ | — | ⚠️ | — | -| Built-in web memory inspector/viewer | — | ✅ | — | ✅ (OpenMemory) | — | ✅ | — | +| Built-in web memory inspector/viewer | ✅ | ✅ | — | ✅ (OpenMemory) | — | ✅ | — | | Hosted managed option | — | — | — | ✅ | — | — | — | | Multi-tenant scope semantics | ✅ | ⚠️ | ⚠️ | ✅ | — | — | — | | TTL/lifecycle policy controls | ✅ | ⚠️ | ⚠️ | ✅ | — | ⚠️ | — | diff --git a/docker-compose.parity.yml b/docker-compose.parity.yml new file mode 100644 index 00000000..98530def --- /dev/null +++ b/docker-compose.parity.yml @@ -0,0 +1,53 @@ +name: elf-parity-gate + +services: + postgres: + image: pgvector/pgvector:pg18 + environment: + POSTGRES_DB: postgres + POSTGRES_PASSWORD: elf_dev_password + POSTGRES_USER: elf_dev + healthcheck: + test: + - CMD-SHELL + - pg_isready -U elf_dev -d postgres + interval: 2s + timeout: 5s + retries: 30 + volumes: + - elf-parity-postgres-data:/var/lib/postgresql + + qdrant: + image: qdrant/qdrant:v1.16.3 + volumes: + - elf-parity-qdrant-data:/qdrant/storage + + parity-runner: + build: + context: . + dockerfile: docker/parity/Dockerfile + depends_on: + postgres: + condition: service_healthy + qdrant: + condition: service_started + environment: + CARGO_HOME: /usr/local/cargo + ELF_HARNESS_COLLECTION: elf_parity_consolidation + ELF_HARNESS_DB_NAME: elf_parity_consolidation + ELF_HARNESS_RUN_ID: parity-docker + ELF_PG_DSN: postgres://elf_dev:elf_dev_password@postgres:5432/postgres + ELF_QDRANT_GRPC_URL: http://qdrant:6334 + ELF_QDRANT_HTTP_URL: http://qdrant:6333 + volumes: + - elf-parity-cargo-registry:/usr/local/cargo/registry + - elf-parity-cargo-git:/usr/local/cargo/git + - elf-parity-target:/workspace/target + - ./tmp/parity:/workspace/tmp/parity + +volumes: + elf-parity-cargo-git: + elf-parity-cargo-registry: + elf-parity-postgres-data: + elf-parity-qdrant-data: + elf-parity-target: diff --git a/docker/parity/Dockerfile b/docker/parity/Dockerfile new file mode 100644 index 00000000..8f8a740d --- /dev/null +++ b/docker/parity/Dockerfile @@ -0,0 +1,23 @@ +FROM rust:1-bookworm + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + bash \ + ca-certificates \ + clang \ + cmake \ + curl \ + git \ + jq \ + libssl-dev \ + perl \ + pkg-config \ + postgresql-client \ + protobuf-compiler \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /workspace + +COPY . /workspace + +CMD ["bash", "scripts/parity-docker-gate.sh"] diff --git a/docs/guide/competitive_parity_testing.md b/docs/guide/competitive_parity_testing.md new file mode 100644 index 00000000..0497ae74 --- /dev/null +++ b/docs/guide/competitive_parity_testing.md @@ -0,0 +1,80 @@ +# Competitive Parity Testing + +Goal: Run the Docker-only parity gate that decides whether ELF has enough evidence to be considered against external memory systems. +Read this when: You need to prove ELF meets the minimum adoption bar instead of relying on architecture claims. +Preconditions: Docker and Docker Compose are available on the host. +Depends on: `docs/spec/system_competitive_parity_gate_v1.md`, `docs/guide/research/agentmemory_adapter.md`, and `Makefile.toml`. +Verification: `cargo make parity-docker` exits successfully and writes `tmp/parity/competitive-parity-report.json` with `verdict = "pass"`. + +## Run + +Start the gate from the repository root: + +```sh +cargo make parity-docker +``` + +This command invokes Docker Compose on the host. The actual adapter check, +service-backed ELF run, Postgres database, Qdrant vector store, Cargo registry cache, +and Rust build target all run inside Docker-managed containers or volumes. + +The report is written to: + +```text +tmp/parity/competitive-parity-report.json +``` + +## Clean Up + +Remove parity containers and Docker-managed volumes: + +```sh +cargo make parity-docker-clean +``` + +The cleanup command removes Postgres, Qdrant, Cargo cache, and Rust target volumes +for the parity environment. It does not remove the host report directory under +`tmp/parity/`. + +## Current Gate Coverage + +The checked-in gate currently proves this minimum set: + +- the agentmemory fixture adapter maps the sanitized sample into 2 note candidates, + 2 doc candidates, 1 baseline query, and 1 explicit ignored item; +- note candidate source references keep the agentmemory fixture resolver and origin + identifiers; +- unsupported agentmemory memory kinds are rejected with the preserved reason + `unsupported_memory_kind`; +- ELF can run a Postgres/Qdrant-backed retrieval and consolidation harness in Docker; +- consolidation preserves or improves recall while keeping retrieved context size no + larger than the baseline run; +- the local admin viewer route returns 200 during the Docker service run. + +This is not enough for personal production adoption by itself. It is the required +floor that prevents subjective comparisons from being mistaken for evidence. + +## Production Adoption Expansion + +Before using ELF as personal production memory infrastructure, extend the same gate +with private data and live baselines: + +1. Build a sanitized private fixture pack from real personal coding-agent memory + cases. Keep the source fixture out of the repository unless it has been reviewed + for secrets and sensitive content. +2. Run the adapter/import/retrieval path against that private fixture pack inside + Docker. +3. Add at least one live containerized external baseline, starting with agentmemory, + against the same retrieval cases. +4. Keep the acceptance decision strict: ELF is not adopted if it loses on retrieval + quality, migration fidelity, operator inspectability, or failure recovery without + a documented compensating advantage. + +## Failure Handling + +When `cargo make parity-docker` fails: + +- keep `tmp/parity/competitive-parity-report.json` if it was written; +- inspect `tmp/parity/consolidation-harness.log` for service-backed failures; +- fix the failing gate dimension before expanding to broader baselines; +- do not lower thresholds to make a comparison pass. diff --git a/docs/guide/index.md b/docs/guide/index.md index 172c075d..c221adcc 100644 --- a/docs/guide/index.md +++ b/docs/guide/index.md @@ -62,6 +62,8 @@ Then structure the body for execution: ## Guide subfolders +- `docs/guide/competitive_parity_testing.md` for running the Docker-only adoption + gate against external memory-system baselines. - `docs/guide/development/` for repository-development workflows. - `docs/guide/research/` for external comparisons and decision-support materials that are non-normative. diff --git a/docs/spec/index.md b/docs/spec/index.md index ba425c19..e7c8f30c 100644 --- a/docs/spec/index.md +++ b/docs/spec/index.md @@ -35,6 +35,8 @@ Question this index answers: "what must remain true?" and storage invariants. - `system_consolidation_proposals_v1.md`: Reviewable derived consolidation run and proposal contract over immutable source evidence. +- `system_competitive_parity_gate_v1.md`: Docker-only adoption gate that decides + whether ELF meets or exceeds selected external memory-system baselines. ## Spec document contract diff --git a/docs/spec/system_competitive_parity_gate_v1.md b/docs/spec/system_competitive_parity_gate_v1.md new file mode 100644 index 00000000..7c130f7f --- /dev/null +++ b/docs/spec/system_competitive_parity_gate_v1.md @@ -0,0 +1,147 @@ +# Competitive Parity Gate v1 Specification + +Purpose: Define the adoption gate ELF must pass before it can be treated as production-eligible memory infrastructure. +Status: normative +Read this when: You are deciding whether ELF is at least as usable as the external memory systems it is being compared against. +Not this document: A market survey, implementation plan, or claim that architecture alone makes ELF better. +Defines: `elf.competitive_parity_gate/v1` dimensions, Docker isolation rules, baseline families, hard thresholds, and report schema. + +Related inputs: + +- `docs/research/2026-06-08-agent-memory-selection.json` +- `docs/guide/research/comparison_external_projects.md` +- `docs/guide/research/agentmemory_adapter.md` +- `docs/spec/system_elf_memory_service_v2.md` +- `docs/spec/system_consolidation_proposals_v1.md` + +## Core Rule + +ELF is adoption-eligible only when current test evidence shows that it meets or +exceeds the selected baseline projects in user-visible value. A design advantage, +unchecked capability table, or speculative architecture claim is not sufficient. + +The gate must fail closed. If ELF cannot run the comparison, preserve evidence, +retrieve expected memory, expose inspection surfaces, or cleanly isolate state, the +gate result is `fail`. + +## Contract Schema + +Canonical schema identifier: + +```text +elf.competitive_parity_gate/v1 +``` + +Every parity report must carry: + +```json +{ + "schema": "elf.competitive_parity_gate.report/v1", + "gate_schema": "elf.competitive_parity_gate/v1" +} +``` + +## Docker Isolation + +Competitive parity runs must use Docker Compose as the execution boundary. + +Required properties: + +- The host may invoke `docker compose`, but benchmark code, service processes, + Postgres, Qdrant, Cargo builds, and test commands must run inside containers. +- The parity compose file must not publish service ports to the host by default. +- Postgres, Qdrant, Cargo registry, Cargo git cache, and Rust target output must use + Docker-managed volumes. +- The only allowed host artifact is the parity report directory, normally + `tmp/parity/`. +- A parity runner must refuse to run on the host unless an explicit + `ELF_PARITY_ALLOW_HOST=1` override is supplied for debugging. +- Cleanup must be possible with `docker compose -f docker-compose.parity.yml down -v + --remove-orphans`. + +## Baseline Families + +The gate tracks baseline families separately so evidence can grow without changing +the core contract: + +- `agentmemory_fixture`: sanitized offline agentmemory-style session exports mapped + through the ELF-owned fixture adapter. +- `agentmemory_live_container`: future containerized agentmemory service comparisons + against the same private evaluation cases. +- `claude_mem_fixture`: future fixture import and retrieval comparison for + progressive-disclosure Claude memory workflows. +- `mem0_openmemory_fixture`: future local OpenMemory-style workflow comparison. +- `qmd_memsearch_fixture`: future local retrieval-quality comparison against + CLI/MCP-first hybrid retrieval systems. + +External projects are baselines and product references. They must not become hidden +runtime dependencies of ELF core memory semantics unless a separate design spec +explicitly adopts that dependency. + +## Gate Dimensions + +Each completed gate report must evaluate these dimensions: + +| Dimension | Meaning | First hard threshold | +| --------- | ------- | -------------------- | +| `docker_isolation` | The full run used container services and container-local build state. | `pass` | +| `adapter_coverage` | Baseline fixture records are mapped into candidate ELF notes, docs, queries, and ignored reasons. | agentmemory sample emits 2 note candidates, 2 doc candidates, 1 baseline query, and 1 ignored item | +| `provenance_integrity` | Candidate writes keep source-system, session, and item references. | agentmemory note candidate provenance completeness is `1.0` | +| `unsafe_rejection` | Unsupported or unsafe external memory items are rejected explicitly. | at least one ignored item with reason `unsupported_memory_kind` | +| `retrieval_quality` | ELF returns the expected memory for parity queries after normal ingestion/indexing. | consolidation harness after-run recall is not below baseline recall | +| `context_efficiency` | Retrieval/consolidation does not require more context to preserve recall. | consolidation harness after-run context chars are not above baseline | +| `source_safety` | Consolidation output remains derived and reviewable; authoritative source records are not destructively rewritten. | consolidation proposal/source immutability contract remains satisfied | +| `operator_inspectability` | A local operator can inspect memory state without write authority. | admin `GET /viewer` returns 200 during the Docker service run | +| `cleanup` | Test state can be removed without host database or vector-store residue. | documented compose cleanup command exists and succeeds when run | + +These are minimum thresholds. Passing them only proves that the checked-in gate is +alive. Personal production use requires the same gate shape to pass against a larger +private fixture pack and at least one live containerized baseline. + +## First Gate Scope + +The first checked-in executable gate covers: + +- Docker-only execution through `docker-compose.parity.yml`. +- Offline `agentmemory_fixture` adapter validation using the sanitized sample fixture. +- Service-backed ELF consolidation/retrieval validation using Postgres and Qdrant + containers. +- Admin viewer availability during the service-backed run. +- A machine-readable report under `tmp/parity/competitive-parity-report.json`. + +The first gate does not claim broad market superiority. It establishes a hard, +repeatable lower bound that must stay green before broader baselines are meaningful. + +## Report Schema + +Parity reports must be JSON objects with at least: + +- `schema`: `elf.competitive_parity_gate.report/v1` +- `gate_schema`: `elf.competitive_parity_gate/v1` +- `gate_id`: stable or timestamped run identifier +- `verdict`: `pass` or `fail` +- `docker_only`: boolean +- `baselines`: object keyed by baseline family +- `dimensions`: object keyed by gate dimension +- `thresholds`: object describing the hard thresholds used by the run +- `artifacts`: object with relative paths to preserved run evidence + +Reports may include extra metrics, but extra fields must not weaken the hard +thresholds in this spec. + +## Adoption Decision + +Treat ELF as `not_adoptable_for_production` while any of these are true: + +- The Docker parity gate fails. +- The gate only passes the checked-in toy fixture and has not passed a private + personal fixture pack. +- At least one selected external baseline outperforms ELF on retrieval quality, + migration fidelity, operator inspectability, or failure recovery without a + documented compensating ELF advantage. +- Evidence cannot be reproduced from the report artifacts. + +Treat ELF as `personal_production_candidate` only after the Docker gate passes on +both the checked-in fixture and a private personal fixture pack, and after at least +one live external baseline comparison is no worse than ELF on the selected +acceptance metrics. diff --git a/scripts/consolidation-harness.sh b/scripts/consolidation-harness.sh index e3ceddfa..8816fa82 100755 --- a/scripts/consolidation-harness.sh +++ b/scripts/consolidation-harness.sh @@ -28,7 +28,7 @@ else exit 1 fi -for cmd in curl psql taplo; do +for cmd in curl psql; do if ! command -v "${cmd}" >/dev/null 2>&1; then echo "Missing ${cmd}." >&2 exit 1 @@ -332,7 +332,11 @@ redact_secrets_on_write = true reject_non_english = true TOML -taplo fmt "${CFG_BASE}" >/dev/null 2>&1 +if command -v taplo >/dev/null 2>&1; then + taplo fmt "${CFG_BASE}" >/dev/null 2>&1 +else + echo "taplo not found; continuing with unformatted generated harness config." +fi echo "Building harness binaries." (cd "${ROOT_DIR}" && cargo build -p elf-worker -p elf-api -p elf-eval >/dev/null) @@ -358,6 +362,16 @@ if [[ "${status}" != "200" ]]; then exit 1 fi +if [[ "${ELF_HARNESS_CHECK_VIEWER:-0}" == "1" ]]; then + VIEWER_BASE="http://${ADMIN_BIND}" + viewer_status="$(curl -s -o /dev/null -w '%{http_code}' "${VIEWER_BASE}/viewer" 2>/dev/null || true)" + if [[ "${viewer_status}" != "200" ]]; then + echo "Admin viewer did not return 200 at ${VIEWER_BASE}/viewer. Check logs: ${API_LOG}." >&2 + exit 1 + fi + echo "Admin viewer check passed at ${VIEWER_BASE}/viewer." +fi + TENANT_ID="consolidation-tenant-${RUN_ID}" PROJECT_ID="consolidation-project-${RUN_ID}" AGENT_ID="consolidation-agent-${RUN_ID}" diff --git a/scripts/parity-docker-gate.sh b/scripts/parity-docker-gate.sh new file mode 100755 index 00000000..99cd5aaf --- /dev/null +++ b/scripts/parity-docker-gate.sh @@ -0,0 +1,256 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +REPORT_DIR="${ELF_PARITY_REPORT_DIR:-${ROOT_DIR}/tmp/parity}" +RUN_ID="${ELF_PARITY_RUN_ID:-parity-$(date +%Y%m%d%H%M%S)}" + +if [[ ! -f "/.dockerenv" && "${ELF_PARITY_ALLOW_HOST:-0}" != "1" ]]; then + echo "Refusing to run parity gate outside Docker. Use cargo make parity-docker." >&2 + exit 1 +fi + +for cmd in cargo curl jq psql; do + if ! command -v "${cmd}" >/dev/null 2>&1; then + echo "Missing ${cmd} in parity runner." >&2 + exit 1 + fi +done + +mkdir -p "${REPORT_DIR}" "${ROOT_DIR}/tmp" + +ADAPTER_OUT="${REPORT_DIR}/agentmemory-adapter.json" +CONSOLIDATION_LOG="${REPORT_DIR}/consolidation-harness.log" +CONSOLIDATION_BEFORE="${REPORT_DIR}/consolidation-before.json" +CONSOLIDATION_AFTER="${REPORT_DIR}/consolidation-after.json" +REPORT_OUT="${REPORT_DIR}/competitive-parity-report.json" + +write_report() { + local verdict="$1" + local failure_reason="${2:-}" + local adapter_status="${3:-not_run}" + local consolidation_status="${4:-not_run}" + + local note_candidates="0" + local doc_candidates="0" + local baseline_queries="0" + local ignored_items="0" + local provenance_completeness="0" + local unsupported_kind_rejected="false" + local base_recall="0" + local after_recall="0" + local base_context="0" + local after_context="0" + + if [[ -f "${ADAPTER_OUT}" ]]; then + note_candidates="$(jq -r '.summary.note_candidate_count // 0' "${ADAPTER_OUT}")" + doc_candidates="$(jq -r '.summary.doc_candidate_count // 0' "${ADAPTER_OUT}")" + baseline_queries="$(jq -r '.summary.baseline_query_count // 0' "${ADAPTER_OUT}")" + ignored_items="$(jq -r '.summary.ignored_count // 0' "${ADAPTER_OUT}")" + provenance_completeness="$( + jq -r ' + if (.summary.note_candidate_count // 0) == 0 then + 0 + else + ( + [ + .note_candidates[] + | select( + .notes_ingest_item.source_ref.resolver == "agentmemory_fixture/v1" + and (.notes_ingest_item.source_ref.ref.fixture_id | type == "string") + and (.notes_ingest_item.source_ref.ref.session_id | type == "string") + and (.notes_ingest_item.source_ref.ref.memory_id | type == "string") + ) + ] | length + ) / .summary.note_candidate_count + end + ' "${ADAPTER_OUT}" + )" + unsupported_kind_rejected="$( + jq -r '[.ignored_items[]? | select(.reason == "unsupported_memory_kind")] | length > 0' \ + "${ADAPTER_OUT}" + )" + fi + + if [[ -f "${CONSOLIDATION_BEFORE}" ]]; then + base_recall="$(jq -r '.summary.avg_recall_at_k // 0' "${CONSOLIDATION_BEFORE}")" + base_context="$(jq -r '.summary.avg_retrieved_summary_chars // 0' "${CONSOLIDATION_BEFORE}")" + fi + + if [[ -f "${CONSOLIDATION_AFTER}" ]]; then + after_recall="$(jq -r '.summary.avg_recall_at_k // 0' "${CONSOLIDATION_AFTER}")" + after_context="$(jq -r '.summary.avg_retrieved_summary_chars // 0' "${CONSOLIDATION_AFTER}")" + fi + + jq -n \ + --arg schema "elf.competitive_parity_gate.report/v1" \ + --arg gate_schema "elf.competitive_parity_gate/v1" \ + --arg gate_id "${RUN_ID}" \ + --arg verdict "${verdict}" \ + --arg failure_reason "${failure_reason}" \ + --arg adapter_status "${adapter_status}" \ + --arg consolidation_status "${consolidation_status}" \ + --argjson note_candidates "${note_candidates}" \ + --argjson doc_candidates "${doc_candidates}" \ + --argjson baseline_queries "${baseline_queries}" \ + --argjson ignored_items "${ignored_items}" \ + --argjson provenance_completeness "${provenance_completeness}" \ + --argjson unsupported_kind_rejected "${unsupported_kind_rejected}" \ + --argjson base_recall "${base_recall}" \ + --argjson after_recall "${after_recall}" \ + --argjson base_context "${base_context}" \ + --argjson after_context "${after_context}" \ + '{ + schema: $schema, + gate_schema: $gate_schema, + gate_id: $gate_id, + verdict: $verdict, + failure_reason: (if $failure_reason == "" then null else $failure_reason end), + docker_only: true, + baselines: { + agentmemory_fixture: { + status: $adapter_status, + note_candidate_count: $note_candidates, + doc_candidate_count: $doc_candidates, + baseline_query_count: $baseline_queries, + ignored_count: $ignored_items, + provenance_completeness: $provenance_completeness, + unsupported_kind_rejected: $unsupported_kind_rejected + }, + elf_consolidation_harness: { + status: $consolidation_status, + baseline_avg_recall_at_k: $base_recall, + after_avg_recall_at_k: $after_recall, + baseline_avg_retrieved_summary_chars: $base_context, + after_avg_retrieved_summary_chars: $after_context + } + }, + dimensions: { + docker_isolation: {status: "pass"}, + adapter_coverage: { + status: (if $note_candidates == 2 and $doc_candidates == 2 and $baseline_queries == 1 and $ignored_items == 1 then "pass" else "fail" end) + }, + provenance_integrity: { + status: (if $provenance_completeness == 1 then "pass" else "fail" end) + }, + unsafe_rejection: { + status: (if $unsupported_kind_rejected then "pass" else "fail" end) + }, + retrieval_quality: { + status: (if $consolidation_status == "pass" and $after_recall >= $base_recall then "pass" else "fail" end) + }, + context_efficiency: { + status: (if $consolidation_status == "pass" and $after_context <= $base_context then "pass" else "fail" end) + }, + source_safety: { + status: (if $consolidation_status == "pass" then "pass" else "fail" end) + }, + operator_inspectability: { + status: (if $consolidation_status == "pass" then "pass" else "fail" end), + checked_route: "GET /viewer" + }, + cleanup: { + status: "documented", + command: "cargo make parity-docker-clean" + } + }, + thresholds: { + agentmemory_fixture: { + note_candidate_count: 2, + doc_candidate_count: 2, + baseline_query_count: 1, + ignored_count: 1, + provenance_completeness: 1, + requires_unsupported_memory_kind_rejection: true + }, + consolidation: { + after_recall_must_be_at_least_baseline: true, + after_context_chars_must_not_exceed_baseline: true, + viewer_must_return_200: true + } + }, + artifacts: { + adapter_output: "tmp/parity/agentmemory-adapter.json", + consolidation_log: "tmp/parity/consolidation-harness.log", + consolidation_before: "tmp/parity/consolidation-before.json", + consolidation_after: "tmp/parity/consolidation-after.json" + } + }' >"${REPORT_OUT}" +} + +fail_gate() { + local reason="$1" + local adapter_status="${2:-fail}" + local consolidation_status="${3:-fail}" + write_report "fail" "${reason}" "${adapter_status}" "${consolidation_status}" + echo "Parity gate failed: ${reason}" >&2 + echo "Report: ${REPORT_OUT}" >&2 + exit 1 +} + +assert_passing_report() { + jq -e ' + .verdict == "pass" + and ([.dimensions | to_entries[] | select(.key != "cleanup" and .value.status != "pass")] | length == 0) + ' "${REPORT_OUT}" >/dev/null +} + +echo "Waiting for Docker service dependencies." +for _ in $(seq 1 120); do + if psql "${ELF_PG_DSN}" -tAc "SELECT 1" >/dev/null 2>&1 \ + && curl -fsS "${ELF_QDRANT_HTTP_URL}/collections" >/dev/null 2>&1; then + break + fi + sleep 0.5 +done + +if ! psql "${ELF_PG_DSN}" -tAc "SELECT 1" >/dev/null 2>&1; then + fail_gate "postgres dependency did not become reachable" "not_run" "not_run" +fi + +if ! curl -fsS "${ELF_QDRANT_HTTP_URL}/collections" >/dev/null 2>&1; then + fail_gate "qdrant dependency did not become reachable" "not_run" "not_run" +fi + +echo "Running agentmemory fixture adapter gate." +(cd "${ROOT_DIR}" && cargo run -q -p elf-eval --bin agentmemory_fixture_adapter -- \ + --fixture apps/elf-eval/fixtures/agentmemory/sample_session.json \ + --out "${ADAPTER_OUT}") || fail_gate "agentmemory fixture adapter command failed" "fail" "not_run" + +jq -e ' + .schema == "elf.agentmemory_adapter/v1" + and .summary.note_candidate_count == 2 + and .summary.doc_candidate_count == 2 + and .summary.baseline_query_count == 1 + and .summary.ignored_count == 1 + and ( + [ + .note_candidates[] + | select( + .notes_ingest_item.source_ref.resolver != "agentmemory_fixture/v1" + or (.notes_ingest_item.source_ref.ref.fixture_id | type != "string") + or (.notes_ingest_item.source_ref.ref.session_id | type != "string") + or (.notes_ingest_item.source_ref.ref.memory_id | type != "string") + ) + ] | length == 0 + ) + and ([.ignored_items[]? | select(.reason == "unsupported_memory_kind")] | length >= 1) +' "${ADAPTER_OUT}" >/dev/null \ + || fail_gate "agentmemory fixture adapter thresholds failed" "fail" "not_run" + +echo "Running service-backed consolidation parity gate." +( + cd "${ROOT_DIR}" + ELF_HARNESS_CHECK_VIEWER=1 \ + bash scripts/consolidation-harness.sh +) 2>&1 | tee "${CONSOLIDATION_LOG}" \ + || fail_gate "consolidation harness thresholds failed" "pass" "fail" + +cp "${ROOT_DIR}/tmp/elf.consolidation.out.base.json" "${CONSOLIDATION_BEFORE}" +cp "${ROOT_DIR}/tmp/elf.consolidation.out.after.json" "${CONSOLIDATION_AFTER}" + +write_report "pass" "" "pass" "pass" +assert_passing_report || fail_gate "one or more parity report dimensions failed" "pass" "pass" + +echo "Parity gate passed." +echo "Report: ${REPORT_OUT}"