Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions Makefile.toml
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,9 @@ args = [
# | real-world-memory-consolidation | composite | |
# | real-world-memory-consolidation-json | command | |
# | real-world-memory-consolidation-report | command | |
# | real-world-memory-summary | composite | |
# | real-world-memory-summary-json | command | |
# | real-world-memory-summary-report | command | |
# | real-world-memory-live-consolidation | command | |
# | real-world-job-operator-ux | composite | |
# | real-world-job-operator-ux-json | command | |
Expand Down Expand Up @@ -831,6 +834,55 @@ args = [
"tmp/real-world-memory/consolidation/report.md",
]

[tasks.real-world-memory-summary]
workspace = false
dependencies = [
"real-world-memory-summary-report",
]

[tasks.real-world-memory-summary-json]
workspace = false
command = "cargo"
args = [
"run",
"-p",
"elf-eval",
"--bin",
"real_world_job_benchmark",
"--",
"run",
"--fixtures",
"apps/elf-eval/fixtures/real_world_memory/memory_summary",
"--out",
"tmp/real-world-memory/memory-summary/report.json",
"--run-id",
"real-world-memory-summary",
"--adapter-id",
"fixture_memory_summary",
"--adapter-name",
"ELF memory summary fixture",
]

[tasks.real-world-memory-summary-report]
workspace = false
dependencies = [
"real-world-memory-summary-json",
]
command = "cargo"
args = [
"run",
"-p",
"elf-eval",
"--bin",
"real_world_job_benchmark",
"--",
"publish",
"--report",
"tmp/real-world-memory/memory-summary/report.json",
"--out",
"tmp/real-world-memory/memory-summary/report.md",
]

[tasks.real-world-memory-live-consolidation]
workspace = false
command = "bash"
Expand Down
14 changes: 8 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,15 +152,17 @@ provider-backed ELF evidence was required.
its pinned Docker local embedding path and is reported as `wrong_result` when
same-corpus evidence terms are missed; claude-mem and OpenViking non-retrieval
coverage remain typed non-pass states.
- Real-world agent memory aggregate after XY-927 and XY-928: 49 fixture-backed
jobs across 13 suites, 44 pass, 0 incomplete, 5 blocked, 0 wrong-result,
- Real-world agent memory aggregate after XY-952: 50 fixture-backed
jobs across 14 suites, 45 pass, 0 incomplete, 5 blocked, 0 wrong-result,
0 not-encoded, and 0 unsupported-claim results. The remaining non-pass jobs are
production-ops operator boundaries plus blocked OpenViking staged trajectory,
hierarchy selection, and recursive/context expansion measurement gates, not
hidden benchmark wins. The new `core_archival_memory` suite passes 6 fixture
jobs for core block attachment, scope, provenance, stale-core detection,
archival fallback, and project-decision recovery; it does not create an
ELF-over-Letta claim.
hidden benchmark wins. The `core_archival_memory` suite passes 6 fixture jobs for
core block attachment, scope, provenance, stale-core detection, archival fallback,
and project-decision recovery; it does not create an ELF-over-Letta claim. The new
`memory_summary` fixture passes 1 source-trace job for reviewable top-of-mind,
background, stale, superseded, tombstoned, and derived project-profile entries; it
does not create a managed-memory parity claim.
- Full-suite live real-world adapter sweep after XY-926: ELF and qmd emit
Docker-isolated `live_real_world` records for all 55 checked-in jobs across 13 suites
through `cargo make real-world-memory-live-adapters`. Both keep the original
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
},
"run": {
"status": "blocked",
"evidence": "The current fixture set reports 49 jobs across 13 suites: 44 pass, 0 incomplete, 5 blocked, 0 wrong_result, 0 not_encoded, and 0 unsupported_claim. The six core_archival_memory jobs pass as ELF fixture evidence, not as live Letta comparison evidence; context_trajectory remains blocked behind OpenViking staged-artifact materialization.",
"evidence": "The current fixture set reports 50 jobs across 14 suites: 45 pass, 0 incomplete, 5 blocked, 0 wrong_result, 0 not_encoded, and 0 unsupported_claim. The six core_archival_memory jobs pass as ELF fixture evidence, not as live Letta comparison evidence; the one memory_summary job passes as fixture-backed source-trace evidence, not as managed-memory parity evidence; context_trajectory remains blocked behind OpenViking staged-artifact materialization.",
"command": "cargo make real-world-memory",
"artifact": "tmp/real-world-memory/real-world-memory-report.json"
},
Expand Down
Loading