diff --git a/README.md b/README.md index ac168376..e9421036 100644 --- a/README.md +++ b/README.md @@ -116,26 +116,26 @@ flowchart TB Quick comparison snapshot (objective/high-level). This table compares capability coverage, not overall project quality. -| Capability | ELF | OpenViking | mem0 | qmd | claude-mem | memsearch | -| ---------- | --- | ---------- | ---- | --- | ---------- | --------- | -| Local-first self-hosted workflow | ✅ | ✅ | ✅ (OpenMemory) | ✅ | ✅ | ✅ | -| MCP integration | ✅ | — | ✅ (OpenMemory) | ✅ | ✅ | ⚠️ | -| CLI-first developer workflow | — | ✅ | — | ✅ | ⚠️ | ✅ | -| HTTP API service surface | ✅ | ✅ | ✅ | ⚠️ (MCP Streamable HTTP) | ✅ | — | -| Query expansion or query rewriting | ✅ | ✅ | ⚠️ | ✅ | — | — | -| LLM reranking stage | ✅ | ⚠️ | ⚠️ | ✅ | — | — | -| Hybrid dense + sparse retrieval | ✅ | ✅ | ⚠️ | ✅ | ✅ | ✅ | -| Progressive disclosure style retrieval | ✅ | ✅ | — | — | ✅ | ⚠️ | -| Evidence-bound memory writes | ✅ | — | — | — | — | — | -| Deterministic and LLM-ingestion boundary | ✅ | ⚠️ | ⚠️ | — | — | — | -| Source-of-truth + rebuildable derived index | ✅ | ✅ | ⚠️ | ⚠️ | ⚠️ | ✅ | -| Hierarchical/recursive retrieval strategy | ⚠️ (in progress) | ✅ | ⚠️ | ⚠️ | ⚠️ | ⚠️ | -| Progressive context loading (L0/L1/L2 style) | ⚠️ (in progress) | ✅ | ⚠️ | — | ⚠️ | — | -| Built-in web memory inspector/viewer | — | — | ✅ (OpenMemory) | — | ✅ | — | -| Hosted managed option | — | — | ✅ | — | — | — | -| Multi-tenant scope semantics | ✅ | ⚠️ | ✅ | — | — | — | -| TTL/lifecycle policy controls | ✅ | ⚠️ | ✅ | — | ⚠️ | — | -| Graph memory mode | ⚠️ (graph-lite: structured relations persisted; optional search `relation_context`) | ⚠️ (URI-link relations) | ✅ (optional) | — | — | — | +| Capability | ELF | agentmemory | OpenViking | mem0 | qmd | claude-mem | memsearch | +| ---------- | --- | ----------- | ---------- | ---- | --- | ---------- | --------- | +| Local-first self-hosted workflow | ✅ | ✅ | ✅ | ✅ (OpenMemory) | ✅ | ✅ | ✅ | +| MCP integration | ✅ | ✅ | — | ✅ (OpenMemory) | ✅ | ✅ | ⚠️ | +| CLI-first developer workflow | — | ✅ | ✅ | — | ✅ | ⚠️ | ✅ | +| HTTP API service surface | ✅ | ✅ | ✅ | ✅ | ⚠️ (MCP Streamable HTTP) | ✅ | — | +| Query expansion or query rewriting | ✅ | ⚠️ | ✅ | ⚠️ | ✅ | — | — | +| LLM reranking stage | ✅ | ⚠️ | ⚠️ | ⚠️ | ✅ | — | — | +| Hybrid dense + sparse retrieval | ✅ | ✅ | ✅ | ⚠️ | ✅ | ✅ | ✅ | +| Progressive disclosure style retrieval | ✅ | ⚠️ | ✅ | — | — | ✅ | ⚠️ | +| Evidence-bound memory writes | ✅ | — | — | — | — | — | — | +| Deterministic and LLM-ingestion boundary | ✅ | ⚠️ | ⚠️ | ⚠️ | — | — | — | +| Source-of-truth + rebuildable derived index | ✅ | ⚠️ | ✅ | ⚠️ | ⚠️ | ⚠️ | ✅ | +| Hierarchical/recursive retrieval strategy | ⚠️ (in progress) | ⚠️ | ✅ | ⚠️ | ⚠️ | ⚠️ | ⚠️ | +| Progressive context loading (L0/L1/L2 style) | ⚠️ (in progress) | ⚠️ | ✅ | ⚠️ | — | ⚠️ | — | +| Built-in web memory inspector/viewer | — | ✅ | — | ✅ (OpenMemory) | — | ✅ | — | +| Hosted managed option | — | — | — | ✅ | — | — | — | +| Multi-tenant scope semantics | ✅ | ⚠️ | ⚠️ | ✅ | — | — | — | +| TTL/lifecycle policy controls | ✅ | ⚠️ | ⚠️ | ✅ | — | ⚠️ | — | +| Graph memory mode | ⚠️ (graph-lite: structured relations persisted; optional search `relation_context`) | ⚠️ | ⚠️ (URI-link relations) | ✅ (optional) | — | — | — | Legend: `✅` built-in and documented; `⚠️` partial, optional, or in-progress; `—` not a first-class documented capability. @@ -144,6 +144,7 @@ Project signature strengths (what each does especially well): | Project | Signature strengths | Potential ELF adoption value | | ------- | ------------------- | ---------------------------- | | ELF | Evidence-bound writes, deterministic ingestion boundary, SoT + rebuildable index, eval tooling | Keep as core differentiators while extending retrieval and UX | +| agentmemory | Cross-agent hooks, MCP/REST packaging, local viewer, iii console observability, coding-agent continuity benchmarks | Use as adapter/baseline and UX reference, not a replacement for ELF provenance semantics | | OpenViking | Filesystem-like context model (`viking://`), hierarchical retrieval, staged retrieval trajectory | Improve query planning, recursive retrieval, and explainable stage outputs | | mem0 | Broad ecosystem (SDK + hosted + OpenMemory), multi-entity scope, lifecycle + optional graph memory | Strengthen event/history APIs and additive graph context channel | | qmd | High-quality local retrieval pipeline (query expansion + weighted fusion + rerank), strong CLI/MCP workflow | Borrow transparent routing/fusion knobs and local debugging ergonomics | @@ -154,8 +155,9 @@ Detailed comparison, mechanism-level analysis, and source map: - [Detailed External Comparison](docs/guide/research/comparison_external_projects.md) - [Research Projects Inventory](docs/guide/research/research_projects_inventory.md) +- [Agent Memory Selection Research Run](docs/research/2026-06-08-agent-memory-selection.json) -Snapshot date in that document: February 17, 2026. +Latest external research refresh: June 8, 2026. ## Documentation diff --git a/docs/governance.md b/docs/governance.md index 856fc882..e2b3fe1e 100644 --- a/docs/governance.md +++ b/docs/governance.md @@ -24,6 +24,7 @@ The split between `spec` and `guide` is by task shape, not by reader type. | --- | --- | --- | --- | --- | | Spec | `docs/spec/` | What must be true? | Contracts, schemas, invariants, required behavior | Any behavior or schema change | | Guide | `docs/guide/` | What should I do? | Runbooks, migrations, validation, troubleshooting | Any procedure or operational change | +| Research runs | `docs/research/` | Which evidence-backed research run reached what state? | Machine-readable hypotheses, evidence, trade-offs, challenge records, and terminal decision state | A research workflow needs durable replayable state | | Plan artifacts | `docs/plans/` | Which saved plan artifact should a planning tool or execution workflow use? | Tool-managed planning outputs | As emitted or updated by the relevant tool | ## Placement rules @@ -32,6 +33,8 @@ The split between `spec` and `guide` is by task shape, not by reader type. - If a document defines actions, it belongs in `docs/guide/`. - If a document is non-normative decision support, comparison, or research input, treat it as guide-class material and store it under `docs/guide/`. +- If a research workflow requires a machine-readable run file with replayable events, + store that run file under `docs/research/` and link to it from the relevant guide. - Do not treat `docs/plans/` as a general-purpose docs bucket. - Use `docs/plans/` only for artifacts produced or consumed by planning tools or workflows that explicitly depend on saved plan files. @@ -85,7 +88,9 @@ When answering a repository question: - "What must be true?" -> `docs/spec/index.md` - "What should I do?" -> `docs/guide/index.md` 3. Read `Makefile.toml` when the task depends on repository automation or named tasks. -4. Use `docs/plans/` only when the task explicitly concerns a saved plan artifact used by +4. Use `docs/research/` only when the task explicitly concerns a machine-readable + research run file used by a research workflow. +5. Use `docs/plans/` only when the task explicitly concerns a saved plan artifact used by a planning tool or execution workflow. ## Update workflow diff --git a/docs/guide/research/comparison_external_projects.md b/docs/guide/research/comparison_external_projects.md index 177cf800..4594b8b2 100644 --- a/docs/guide/research/comparison_external_projects.md +++ b/docs/guide/research/comparison_external_projects.md @@ -8,6 +8,8 @@ Outputs: A comparison matrix and trade-off summary suitable for follow-up design Scope note: This document is intentionally detailed and source-heavy. Keep `README.md` concise and link here for full analysis. For a full list of reviewed and pending projects, see `docs/guide/research/research_projects_inventory.md`. +For the June 2026 agentmemory and dreaming decision run, see +`docs/research/2026-06-08-agent-memory-selection.json`. Comparison focuses on shared capabilities, ELF distinctives, and objective trade-offs. These projects solve adjacent problems, but their primary storage units and default workflows differ. @@ -30,6 +32,91 @@ Legend: Note: In this section, mem0 refers to the Mem0 ecosystem, including OpenMemory (an MCP memory server with a built-in UI). OpenViking is included as a newly reviewed project with mechanism-level analysis. +## June 2026 Agentmemory And Dreaming Refresh + +Snapshot date for this subsection: June 8, 2026. + +This refresh re-evaluates ELF after the June 2026 hardening work and after the +appearance of [agentmemory](https://github.com/rohitg00/agentmemory) as a high-velocity +coding-agent memory project. It also records the current vendor direction around +dreaming-style background memory consolidation. + +### Current ELF Position + +ELF remains strongest as a high-trust memory service rather than a turnkey coding-agent +continuity plugin. The current main branch has: + +- evidence-linked fact writes and quote-bound provenance; +- deterministic `add_note` separated from LLM-driven `add_event`; +- Postgres as source of truth and Qdrant as a rebuildable derived index; +- scoped HTTP/MCP service semantics, TTL/lifecycle policy, graph-lite relation context, + and retrieval evaluation tooling; +- recently restored local gates, stricter config presence, generated OpenAPI/Scalar docs, + and Docker Compose service dependencies. + +### agentmemory + +agentmemory is now important enough to track as a first-class comparison target. Its +public README advertises cross-agent support for Claude Code, Codex CLI, Cursor, Gemini +CLI, OpenCode, and generic MCP clients; MCP/REST access; hook-based capture; hybrid +BM25/vector/graph retrieval; consolidation/lifecycle behavior; a local viewer on `:3113`; +and iii console observability for traces, KV state, triggers, queues, and streams. Its +roadmap still lists benchmark CI, session replay UI, governance baseline, enterprise trust +features, and a v1.0 stability freeze as future work. + +ELF implication: do not replace ELF with agentmemory. Treat it as: + +- an optional capture/import adapter for coding-agent session observations; +- a benchmark and UX baseline for local continuity workflows; +- a source of product ideas around hooks, viewer, replay, audit, and tool breadth. + +### Dreaming And Background Consolidation + +OpenAI frames dreaming as background curation that synthesizes memory state, applies +preferences, and keeps memory current over time. Anthropic Claude Dreams is the strongest +safety reference: a dream reads an input memory store plus 1-100 sessions, produces a +separate output memory store, never modifies the input store, and leaves the output +reviewable, attachable, discardable, archivable, or deletable. Google examples add two +operator patterns: Always-On Memory Agent runs scheduled consolidation, while Gemini CLI +Auto Memory mines idle transcripts but writes reviewable patches and skill drafts to an +inbox before anything is applied. + +ELF implication: dreaming should be a reviewed derived layer over authoritative evidence, +not a destructive rewrite path. The target shape is: + +- immutable observations, notes, events, traces, and source pointers as input; +- asynchronous consolidation jobs that produce candidate derived memories, pages, graph + views, or skills; +- explicit lineage, diff, confidence, contradiction/staleness markers, and review/apply + controls; +- rebuildable outputs that can be discarded without corrupting source-of-truth memory. + +### Current Recommendation + +Continue building ELF. Do not directly adopt agentmemory or managed dreaming as the core +backend. The next work should prioritize: + +1. a reviewable derived consolidation pipeline; +2. read-only viewer plus retrieval/consolidation observability; +3. optional agentmemory import/baseline adapter; +4. graph-lite typed query and derived knowledge pages with provenance/lint. + +This ordering reuses the existing vNext planning surface instead of starting a parallel +roadmap: [XY-286](https://linear.app/hack-ink/issue/XY-286/knowledge-memory-derived-entityconceptproject-pages-with-provenance), +[XY-19](https://linear.app/hack-ink/issue/XY-19/add-a-read-only-web-viewer-for-sessions-and-traces), +[XY-27](https://linear.app/hack-ink/issue/XY-27/viewer-add-retrieval-observability-panels-on-top-of-the-read-only), +and [XY-70](https://linear.app/hack-ink/issue/XY-70/graph-lite-dx-typed-schema-typed-query-nanograph-inspired) +remain the right backbone. + +Primary sources for this refresh: + +- https://github.com/rohitg00/agentmemory +- https://raw.githubusercontent.com/rohitg00/agentmemory/main/ROADMAP.md +- https://openai.com/index/chatgpt-memory-dreaming/ +- https://platform.claude.com/docs/en/managed-agents/dreams +- https://github.com/GoogleCloudPlatform/generative-ai/tree/main/gemini/agents/always-on-memory-agent +- https://github.com/google-gemini/gemini-cli/blob/main/docs/cli/auto-memory.md + ## Scope And Intended Use | Aspect | ELF | [memsearch](https://github.com/zilliztech/memsearch) | [qmd](https://github.com/tobi/qmd) | [claude-mem](https://github.com/thedotmack/claude-mem) | [mem0](https://github.com/mem0ai/mem0) | diff --git a/docs/guide/research/index.md b/docs/guide/research/index.md new file mode 100644 index 00000000..2c3c562d --- /dev/null +++ b/docs/guide/research/index.md @@ -0,0 +1,18 @@ +# Research Guide Index + +Goal: Route agents to external comparison and decision-support research for ELF memory architecture. +Read this when: You need to compare ELF with adjacent memory, context, RAG, or consolidation systems. +Inputs: Current ELF docs/code, public external project docs, tracker state, and checked-in research run files. +Depends on: `docs/index.md`, `docs/governance.md`, and `docs/research/` for machine-readable research runs. +Outputs: The smallest comparison or inventory document needed for implementation decisions. + +## Documents + +- `research_projects_inventory.md`: audited and pending external projects, research depth, and current planning surface. +- `comparison_external_projects.md`: detailed capability comparison, project trade-offs, source map, and research-backed ELF directions. + +## Machine-Readable Runs + +Machine-authoritative research run JSON files live under `docs/research/`. +Use those files when a research conclusion needs replayable hypotheses, evidence, +trade-offs, challenge records, and terminal decision state. diff --git a/docs/guide/research/research_projects_inventory.md b/docs/guide/research/research_projects_inventory.md index 28c5b0d8..6cf50e62 100644 --- a/docs/guide/research/research_projects_inventory.md +++ b/docs/guide/research/research_projects_inventory.md @@ -6,7 +6,7 @@ Inputs: Existing research notes, open architecture questions, and tracked adopti Depends on: `docs/guide/research/comparison_external_projects.md`. Outputs: A current inventory of reviewed and pending external projects. -Last updated: April 17, 2026. +Last updated: June 8, 2026. ## Legend @@ -18,6 +18,10 @@ Last updated: April 17, 2026. | Project | Research depth | Current status | Why it matters to ELF | Primary reference | | ------- | -------------- | -------------- | --------------------- | ----------------- | +| [agentmemory](https://github.com/rohitg00/agentmemory) | D1 | Reviewed | Cross-agent coding-memory hooks, MCP/REST surface, viewer, consolidation lifecycle, and external benchmark target | `docs/guide/research/comparison_external_projects.md`; `docs/research/2026-06-08-agent-memory-selection.json` | +| [OpenAI ChatGPT Memory Dreaming](https://openai.com/index/chatgpt-memory-dreaming/) | D1 | Reviewed | Background memory synthesis and staleness repair as a product direction | `docs/research/2026-06-08-agent-memory-selection.json` | +| [Claude Managed Agents Dreams](https://platform.claude.com/docs/en/managed-agents/dreams) | D1 | Reviewed | Reviewable derived memory-store output over past sessions; strong safety shape for ELF consolidation | `docs/research/2026-06-08-agent-memory-selection.json` | +| [Gemini CLI Auto Memory](https://github.com/google-gemini/gemini-cli/blob/main/docs/cli/auto-memory.md) | D1 | Reviewed | Background session mining with project-local review inbox for memory patches and skills | `docs/research/2026-06-08-agent-memory-selection.json` | | [mem0](https://github.com/mem0ai/mem0) | D2 | Reviewed | Graph memory as additive context, memory history and async mode trade-offs | `docs/guide/research/comparison_external_projects.md` | | [memsearch](https://github.com/zilliztech/memsearch) | D2 | Reviewed | Markdown-first SoT + rebuildable index pattern | `docs/guide/research/comparison_external_projects.md` | | [qmd](https://github.com/tobi/qmd) | D2 | Reviewed | Retrieval routing, weighted fusion, and local-first explainability | `docs/guide/research/comparison_external_projects.md` | @@ -35,6 +39,26 @@ Last updated: April 17, 2026. | [LightRAG](https://github.com/HKUDS/LightRAG) | D0 | Pending deep dive | Graph-augmented RAG strategy relevance; not yet audited to adoption level | Discussion history only | | [GraphRAG](https://www.microsoft.com/en-us/research/project/graphrag/) | D0 | Pending deep dive | Graph-based retrieval concepts; not yet audited to implementation decision level | Discussion history only | +## June 2026 Activity Snapshot + +GitHub API snapshot time: 2026-06-08T06:01:57Z. + +The monitored project set is still moving quickly. Recent push activity was observed for +agentmemory, mem0, qmd, claude-mem, OpenViking, gbrain, graphify, LangGraph, Graphiti, +RAGFlow, LightRAG, and GraphRAG. Notable current scale signals: + +- agentmemory: 21,783 stars, latest release `v0.9.27`, pushed 2026-06-07. +- mem0: 58,005 stars, latest release `cli-node-v0.2.8`, pushed 2026-06-06. +- claude-mem: 81,157 stars, latest release `v13.4.1`, pushed 2026-06-08. +- graphify: 62,294 stars, latest release `v0.8.35`, pushed 2026-06-07. +- RAGFlow: 82,150 stars, latest release `v0.25.6`, pushed 2026-06-08. +- LightRAG: 36,270 stars, latest release `v1.5.0`, pushed 2026-06-08. +- GraphRAG: 33,545 stars, latest release `v3.1.0`, pushed 2026-06-05. + +Interpretation: this is not a settled market. ELF should keep watching external +implementation velocity, but the current activity signal alone does not justify +replacing ELF's evidence-bound service contract. + ## Current Planning Surface - Linear project: [ELF vNext: Evidence-to-Knowledge Memory](https://linear.app/hack-ink/project/elf-vnext-evidence-to-knowledge-memory-d7a9dd3f3e86) @@ -46,6 +70,8 @@ Last updated: April 17, 2026. - [XY-40](https://linear.app/hack-ink/issue/XY-40/vision-track-elf-as-a-high-trust-memory-system-for-singlemulti-agent) - [XY-51](https://linear.app/hack-ink/issue/XY-51/agent-memory-ux-mcp-surface-skills-doc-pointers-epic) - [XY-63](https://linear.app/hack-ink/issue/XY-63/research-openviking-as-optional-doc-backend-integration-sketch) +- Current June 2026 research run: + - `docs/research/2026-06-08-agent-memory-selection.json` ## Notes diff --git a/docs/index.md b/docs/index.md index 3a5ce3ae..1c4c6cd1 100644 --- a/docs/index.md +++ b/docs/index.md @@ -2,8 +2,8 @@ Purpose: Route agents to the smallest correct document set for the current task. Read this when: You are starting from repository docs and need to choose the right lane. -Not this document: Detailed subsystem contracts, step-by-step runbooks, or saved plan artifacts. -Routes to: `docs/governance.md`, `docs/spec/`, `docs/guide/`, `docs/plans/`, and `Makefile.toml`. +Not this document: Detailed subsystem contracts, step-by-step runbooks, research run state, or saved plan artifacts. +Routes to: `docs/governance.md`, `docs/spec/`, `docs/guide/`, `docs/research/`, `docs/plans/`, and `Makefile.toml`. Audience: All documentation in this repository is written for AI agents and LLM workflows. The split below is by question type, not by human-versus-agent audience. @@ -15,6 +15,8 @@ The split below is by question type, not by human-versus-agent audience. - Then choose one primary lane: - `docs/spec/index.md` when the question is "what must be true?" - `docs/guide/index.md` when the question is "what should I do?" +- Use `docs/research/` only when a research workflow explicitly points to a + machine-readable research run file there. - Use `docs/plans/` only when a planning tool or execution workflow explicitly points to a saved plan artifact there. @@ -25,6 +27,8 @@ The split below is by question type, not by human-versus-agent audience. - Need runbooks, migrations, validation steps, troubleshooting, or operational sequences -> `docs/guide/` - Need external comparisons or architecture research inputs -> `docs/guide/research/` +- Need machine-readable research run state, evidence, trade-offs, and decision status -> + `docs/research/` - Need repo task names or automation entrypoints -> `Makefile.toml` - Need documentation placement or authoring rules -> `docs/governance.md` - Need a planning-tool artifact or saved execution plan -> `docs/plans/` diff --git a/docs/research/2026-06-08-agent-memory-selection.json b/docs/research/2026-06-08-agent-memory-selection.json new file mode 100644 index 00000000..0e4c6899 --- /dev/null +++ b/docs/research/2026-06-08-agent-memory-selection.json @@ -0,0 +1,221 @@ +{ + "schema": "research-run/2", + "run_id": "2026-06-08-agent-memory-selection", + "question": "Given agentmemory, current monitored memory projects, and OpenAI/Anthropic/Google dreaming-style memory consolidation, should ELF continue building its own memory system or adopt an external system?", + "success_criteria": [ + "Use current ELF main-branch evidence, current Decodex/Linear state, and current external sources.", + "Compare continue-build, adopt-agentmemory, and adopt-managed-dreaming options.", + "Return guidance that can shape the next ELF Linear issues without relaxing evidence/provenance requirements." + ], + "constraints": [ + "Do not treat external benchmark or README claims as independently verified unless ELF has reproduced them.", + "Do not recommend destructive memory rewriting without reviewable derived output and provenance.", + "Keep ELF source-of-truth semantics separate from optional adapters and derived views." + ], + "stop_rule": "Stop once the recommendation is decision-ready for issue shaping or the remaining uncertainty would require implementation benchmarks beyond this research pass.", + "primary_hypothesis": "ELF should continue as the evidence-bound core memory service and borrow or integrate external systems only at the capture, evaluation, viewer, and derived-consolidation layers.", + "rival_hypotheses": [ + "Replace ELF with agentmemory because it already packages cross-agent hooks, MCP tools, benchmarks, viewer, and consolidation.", + "Replace ELF's roadmap with managed dreaming APIs because large vendors are converging on background memory curation.", + "Pause ELF core development until the agent-memory market stabilizes." + ], + "falsifiers": [ + "If agentmemory or another external project exposes ELF-equivalent evidence-bound deterministic write contracts, multi-tenant service semantics, and rebuildable source-of-truth storage with lower integration risk, replacement becomes viable.", + "If managed dreaming APIs provide portable, self-hostable, reviewable, evidence-linked memory stores that can satisfy ELF governance boundaries, adopting them as core becomes viable.", + "If ELF's own hardening and validation surface is not operational after the June 2026 work, continuing core development should be deferred until reliability is restored." + ], + "coverage": { + "mode": "broad_external", + "min_source_families": 4 + }, + "continuation": { + "mode": "auto_if_not_decision_ready", + "attempt": 1, + "max_attempts": 2, + "session_id": "2026-06-08-agent-memory-selection" + }, + "events": [ + { + "seq": 1, + "type": "probe_completed", + "remaining_option_count": 3, + "independent_option_questions": [ + "Should ELF continue as the core memory service or be replaced by agentmemory?", + "Should dreaming-style consolidation become authoritative or derived/reviewed?", + "Which current ELF backlog items become higher priority after the refresh?" + ], + "external_slices": [] + }, + { + "seq": 2, + "type": "evidence_recorded", + "evidence": [ + { + "id": "E1", + "kind": "observation", + "summary": "Current ELF main presents itself as evidence-linked fact memory with deterministic add_note and LLM-driven add_event separation, Postgres source-of-truth, rebuildable Qdrant index, multi-tenant scoped APIs, HTTP/MCP surfaces, graph-lite relation context, and evaluation tooling.", + "source_family": "repo_docs", + "source_locator": "README.md; config/local/elf.docker.toml; docker-compose.yml; Makefile.toml" + }, + { + "id": "E2", + "kind": "observation", + "summary": "The June 2026 ELF hardening sequence landed local service gates, MCP default-set PUT forwarding, getting-started docs, utoipa/Scalar API docs, strict config field presence, Docker Compose dependencies, and a checked-in decision record.", + "source_family": "repo_docs", + "source_locator": "docs/plans/2026-06-08-elf-hardening-evaluation-decisions.md" + }, + { + "id": "E3", + "kind": "observation", + "summary": "GitHub and Linear current-state checks show PRs #109-#113 merged and XY-789, XY-790, XY-791, XY-792, and XY-798 completed; Decodex top-level live status has zero active, running, queued, waiting, and attention lanes, although old attempt history still includes a stale XY-790 needs_attention ledger.", + "source_family": "tracker_runtime", + "source_locator": "gh pr view 109-113; Linear issue(id) query; decodex status --live --json --config /Users/x/.codex/decodex/projects/elf" + }, + { + "id": "E4", + "kind": "observation", + "summary": "agentmemory is a fast-moving Apache-2.0 coding-agent memory project with cross-agent MCP/REST/hook integration, advertised hybrid BM25/vector/graph retrieval, lifecycle/consolidation claims, a local viewer, iii console observability, v0.9.27 release, and recent push activity. Its own roadmap still lists governance, benchmark CI, session replay UI, enterprise trust, and v1.0 stability as future work.", + "source_family": "external_project", + "source_locator": "https://github.com/rohitg00/agentmemory; https://raw.githubusercontent.com/rohitg00/agentmemory/main/ROADMAP.md; GitHub API snapshot 2026-06-08T06:01:57Z" + }, + { + "id": "E5", + "kind": "observation", + "summary": "OpenAI describes dreaming as a background memory curation process that synthesizes memory state from conversations, improves preference use, and keeps memory current over time rather than treating old memories as static facts.", + "source_family": "vendor_docs", + "source_locator": "https://openai.com/index/chatgpt-memory-dreaming/" + }, + { + "id": "E6", + "kind": "observation", + "summary": "Anthropic Claude Dreams treats dreaming as an asynchronous research-preview job over a memory store plus 1-100 past sessions. It produces a separate output memory store, never modifies the input store, exposes progress/session events, and expects review, attach, discard, archive, or delete decisions after completion.", + "source_family": "vendor_docs", + "source_locator": "https://platform.claude.com/docs/en/managed-agents/dreams" + }, + { + "id": "E7", + "kind": "observation", + "summary": "Google examples split into two useful patterns: Always-On Memory Agent productizes file/API/dashboard ingest plus timer-based consolidation, while Gemini CLI Auto Memory keeps background extraction review-gated by writing patches and skill drafts to a project-local inbox before any approval.", + "source_family": "vendor_docs", + "source_locator": "https://github.com/GoogleCloudPlatform/generative-ai/tree/main/gemini/agents/always-on-memory-agent; https://github.com/google-gemini/gemini-cli/blob/main/docs/cli/auto-memory.md" + }, + { + "id": "E8", + "kind": "observation", + "summary": "The monitored project set remains active as of 2026-06-08. GitHub API snapshots showed recent pushes for agentmemory, mem0, qmd, claude-mem, OpenViking, gbrain, graphify, LangGraph, Graphiti, RAGFlow, LightRAG, and GraphRAG, with agentmemory at 21,783 stars and v0.9.27, mem0 at 58,005 stars, claude-mem at 81,157 stars, graphify at 62,294 stars, and RAGFlow at 82,150 stars.", + "source_family": "external_project", + "source_locator": "GitHub API repository metadata snapshot 2026-06-08T06:01:57Z" + }, + { + "id": "E9", + "kind": "observation", + "summary": "The existing ELF vNext backlog already has directly relevant Backlog issues for knowledge memory pages with provenance and lint (XY-286), read-only viewer (XY-19), retrieval observability panels (XY-27), and graph-lite typed query/DX (XY-70).", + "source_family": "tracker_runtime", + "source_locator": "Linear issue(id) query for XY-286, XY-19, XY-27, XY-70" + } + ] + }, + { + "seq": 3, + "type": "tradeoffs_recorded", + "tradeoffs": [ + { + "id": "T1", + "summary": "Continuing ELF preserves the evidence-bound, deterministic, scoped service contract that external coding-agent products do not clearly replace; the trade-off is slower product UX unless viewer and capture adapters are prioritized.", + "supporting_evidence_ids": [ + "E1", + "E4", + "E8" + ], + "disconfirming_evidence_ids": [] + }, + { + "id": "T2", + "summary": "Dreaming-style consolidation is now validated by major vendors as a product direction, but the safest shared pattern is separate or review-gated output rather than destructive authoritative rewriting.", + "supporting_evidence_ids": [ + "E5", + "E6", + "E7" + ], + "disconfirming_evidence_ids": [] + }, + { + "id": "T3", + "summary": "agentmemory should be treated as an integration and benchmark target for coding-agent session capture, not as a core replacement, because its strongest value is hooks, viewer, tool breadth, and packaged local UX while ELF's strongest value is provenance and service governance.", + "supporting_evidence_ids": [ + "E1", + "E4" + ], + "disconfirming_evidence_ids": [] + }, + { + "id": "T4", + "summary": "The refreshed evidence reorders ELF priorities toward viewer/observability and derived consolidation before more automatic memory authority, because operators need to inspect what was remembered, why, and how consolidation proposals were formed.", + "supporting_evidence_ids": [ + "E4", + "E6", + "E7", + "E9" + ], + "disconfirming_evidence_ids": [] + } + ] + }, + { + "seq": 4, + "type": "judgment_candidate_created", + "judgment_payload": { + "decision_claim": "Continue ELF as the evidence-bound memory core. Do not replace it with agentmemory or managed dreaming. Use agentmemory and managed dreaming systems as comparison baselines and optional adapters while prioritizing reviewable derived consolidation, operator viewer/observability, and graph-lite/knowledge-memory work in ELF.", + "implementation_order": [ + "Persist the research refresh and use it as the source for issue shaping.", + "Build a reviewed, derived consolidation pipeline over immutable evidence-bound notes and traces.", + "Ship the read-only viewer and retrieval observability panels before expanding automatic consolidation authority.", + "Add an optional agentmemory import/baseline adapter for coding-agent session observations.", + "Advance graph-lite typed query and derived knowledge pages with provenance and lint." + ], + "judgment_type": "recommend", + "key_evidence_ids": [ + "E1", + "E2", + "E3", + "E4", + "E5", + "E6", + "E7", + "E8" + ], + "key_tradeoff_ids": [ + "T1", + "T2", + "T3", + "T4" + ], + "preferred_option": "continue-elf-core-with-dreaming-inspired-derived-consolidation-and-agentmemory-baseline-integration", + "rejected_options": [ + "replace-elf-with-agentmemory", + "replace-elf-with-managed-dreaming", + "pause-elf-core-development-until-the-market-settles" + ] + }, + "judgment_hash": "sha256:854918f581d32764fad76ac0481e58a72701bc348a827afa2a2b76978cc341f9" + }, + { + "seq": 5, + "type": "worker_completed", + "worker": "skeptic", + "target_judgment_hash": "sha256:854918f581d32764fad76ac0481e58a72701bc348a827afa2a2b76978cc341f9", + "summary": "The strongest objection is that agentmemory's product surface is already ahead of ELF for coding-agent continuity. That does not defeat the judgment because it supports an adapter/baseline and viewer priority, not replacement of ELF's stricter source-of-truth and evidence contract.", + "objections": [] + }, + { + "seq": 6, + "type": "finalized_decision_ready", + "judgment_hash": "sha256:854918f581d32764fad76ac0481e58a72701bc348a827afa2a2b76978cc341f9", + "confidence": "medium", + "missing_evidence": [ + "ELF has not independently reproduced agentmemory's benchmark claims.", + "The next implementation pass still needs issue-local design for the consolidation data model and adapter boundaries." + ] + } + ] +}