diff --git a/apps/elf-eval/fixtures/agentmemory/sample_session.json b/apps/elf-eval/fixtures/agentmemory/sample_session.json new file mode 100644 index 00000000..c02c4162 --- /dev/null +++ b/apps/elf-eval/fixtures/agentmemory/sample_session.json @@ -0,0 +1,106 @@ +{ + "schema": "agentmemory.fixture/v1", + "fixture_id": "agentmemory-sample-2026-06-08", + "source": { + "system": "agentmemory", + "version": "v0.9.27", + "export_id": "agentmemory-export-sample", + "exported_at": "2026-06-08T06:30:00Z" + }, + "sessions": [ + { + "session_id": "am-session-2026-06-08", + "agent": "codex", + "project": "ELF", + "started_at": "2026-06-08T05:45:00Z", + "ended_at": "2026-06-08T06:10:00Z", + "observations": [ + { + "observation_id": "obs-architecture", + "ts": "2026-06-08T05:50:00Z", + "role": "assistant", + "kind": "implementation_note", + "text": "ELF keeps Postgres as the source of truth and treats Qdrant as a rebuildable derived index.", + "metadata": { + "agentmemory_workspace": "elf-local", + "capture_method": "fixture" + } + }, + { + "observation_id": "obs-policy", + "ts": "2026-06-08T05:55:00Z", + "role": "assistant", + "kind": "implementation_note", + "text": "Imported agentmemory facts must still pass ELF note write policy before they become authoritative notes.", + "metadata": { + "agentmemory_workspace": "elf-local", + "capture_method": "fixture" + } + } + ], + "memories": [ + { + "memory_id": "mem-architecture-sot", + "kind": "fact", + "key": "architecture_sot", + "text": "ELF keeps Postgres as the source of truth and Qdrant as a rebuildable derived index.", + "importance": 0.8, + "confidence": 0.9, + "created_at": "2026-06-08T05:50:00Z", + "updated_at": "2026-06-08T05:50:00Z", + "source_observation_ids": ["obs-architecture"], + "metadata": { + "agentmemory_memory_type": "fact", + "capture_method": "fixture" + } + }, + { + "memory_id": "mem-import-policy", + "kind": "constraint", + "key": "agentmemory_import_policy", + "text": "Agentmemory imports must use ELF ingestion policy instead of writing directly to storage.", + "importance": 0.7, + "confidence": 0.9, + "created_at": "2026-06-08T05:55:00Z", + "updated_at": "2026-06-08T05:55:00Z", + "source_observation_ids": ["obs-policy"], + "metadata": { + "agentmemory_memory_type": "constraint", + "capture_method": "fixture" + } + }, + { + "memory_id": "mem-raw-summary", + "kind": "summary", + "text": "This raw summary is intentionally ignored because the adapter does not infer ELF note types from unsupported agentmemory kinds.", + "importance": 0.4, + "confidence": 0.5, + "created_at": "2026-06-08T06:00:00Z", + "updated_at": "2026-06-08T06:00:00Z", + "source_observation_ids": ["obs-architecture"], + "metadata": { + "agentmemory_memory_type": "summary", + "capture_method": "fixture" + } + } + ], + "retrieval_cases": [ + { + "query_id": "q-architecture-sot", + "query": "where does ELF keep the authoritative memory store", + "expected_memory_ids": ["mem-architecture-sot"], + "agentmemory_results": [ + { + "memory_id": "mem-architecture-sot", + "rank": 1, + "score": 0.98 + } + ], + "metadata": { + "claim_source": "fixture_only" + } + } + ] + } + ] +} diff --git a/apps/elf-eval/src/bin/agentmemory_fixture_adapter.rs b/apps/elf-eval/src/bin/agentmemory_fixture_adapter.rs new file mode 100644 index 00000000..91479958 --- /dev/null +++ b/apps/elf-eval/src/bin/agentmemory_fixture_adapter.rs @@ -0,0 +1,639 @@ +#![allow(clippy::single_component_path_imports, unused_crate_dependencies)] + +//! Offline adapter for agentmemory-style fixture exports. + +use std::{collections::HashMap, fs, path::PathBuf}; + +use clap::Parser; +use color_eyre; +use serde::{Deserialize, Serialize}; +use serde_json::{self, Value}; +use time::{OffsetDateTime, format_description::well_known::Rfc3339}; +use uuid::Uuid; + +const OUTPUT_SCHEMA: &str = "elf.agentmemory_adapter/v1"; +const FIXTURE_RESOLVER: &str = "agentmemory_fixture/v1"; +const DEFAULT_IMPORTANCE: f32 = 0.5; +const DEFAULT_CONFIDENCE: f32 = 0.5; + +#[derive(Debug, Parser)] +#[command( + version = elf_cli::VERSION, + rename_all = "kebab", + styles = elf_cli::styles(), +)] +struct Args { + /// Path to a sanitized agentmemory-style JSON fixture. + #[arg(long, short = 'f', value_name = "FILE")] + fixture: PathBuf, + /// Write adapter JSON to this file (defaults to stdout). + #[arg(long, value_name = "FILE")] + out: Option, + /// ELF write scope to attach to emitted note and doc candidates. + #[arg(long, default_value = "agent_private")] + scope: String, + /// Maximum note text length accepted for note candidates. + #[arg(long, default_value_t = 240)] + max_note_chars: usize, +} + +#[derive(Debug, Deserialize)] +struct AgentmemoryFixture { + schema: Option, + + fixture_id: Option, + #[serde(default)] + source: FixtureSource, + #[serde(default)] + sessions: Vec, +} + +#[derive(Debug, Default, Deserialize)] +struct FixtureSource { + system: Option, + + version: Option, + + export_id: Option, + + exported_at: Option, +} + +#[derive(Debug, Deserialize)] +struct AgentmemorySession { + session_id: String, + + agent: Option, + + project: Option, + + started_at: Option, + + ended_at: Option, + #[serde(default)] + observations: Vec, + #[serde(default)] + memories: Vec, + #[serde(default)] + retrieval_cases: Vec, +} + +#[derive(Debug, Deserialize)] +struct AgentmemoryObservation { + observation_id: String, + + ts: Option, + + role: Option, + + kind: Option, + text: String, + #[serde(default)] + metadata: Value, +} + +#[derive(Debug, Deserialize)] +struct AgentmemoryMemory { + memory_id: String, + + kind: Option, + + key: Option, + text: String, + + importance: Option, + + confidence: Option, + + ttl_days: Option, + + created_at: Option, + + updated_at: Option, + #[serde(default)] + source_observation_ids: Vec, + #[serde(default)] + metadata: Value, +} + +#[derive(Debug, Deserialize)] +struct AgentmemoryRetrievalCase { + query_id: String, + query: String, + #[serde(default)] + expected_memory_ids: Vec, + #[serde(default)] + agentmemory_results: Vec, + #[serde(default)] + metadata: Value, +} + +#[derive(Clone, Debug, Deserialize, Serialize)] +struct AgentmemorySearchResult { + memory_id: String, + #[serde(skip_serializing_if = "Option::is_none")] + rank: Option, + #[serde(skip_serializing_if = "Option::is_none")] + score: Option, +} + +#[derive(Debug, Serialize)] +struct AdapterOutput { + schema: &'static str, + fixture_id: String, + source: AdapterSource, + summary: AdapterSummary, + note_candidates: Vec, + doc_candidates: Vec, + baseline_queries: Vec, + ignored_items: Vec, +} + +#[derive(Debug, Serialize)] +struct AdapterSource { + system: String, + #[serde(skip_serializing_if = "Option::is_none")] + version: Option, + #[serde(skip_serializing_if = "Option::is_none")] + export_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + exported_at: Option, + #[serde(skip_serializing_if = "Option::is_none")] + fixture_schema: Option, +} + +#[derive(Debug, Serialize)] +struct AdapterSummary { + session_count: usize, + observation_count: usize, + memory_count: usize, + note_candidate_count: usize, + doc_candidate_count: usize, + baseline_query_count: usize, + ignored_count: usize, +} + +#[derive(Clone, Debug, Serialize)] +struct NoteCandidate { + candidate_id: Uuid, + scope: String, + session_id: String, + source_memory_id: String, + source_observation_ids: Vec, + notes_ingest_item: ElfNoteCandidate, + #[serde(skip_serializing_if = "Value::is_null")] + source_metadata: Value, +} + +#[derive(Clone, Debug, Serialize)] +struct ElfNoteCandidate { + #[serde(rename = "type")] + note_type: String, + #[serde(skip_serializing_if = "Option::is_none")] + key: Option, + text: String, + importance: f32, + confidence: f32, + #[serde(skip_serializing_if = "Option::is_none")] + ttl_days: Option, + source_ref: Value, +} + +#[derive(Debug, Serialize)] +struct DocCandidate { + candidate_id: Uuid, + scope: String, + session_id: String, + source_observation_id: String, + docs_put: DocsPutCandidate, + #[serde(skip_serializing_if = "Value::is_null")] + source_metadata: Value, +} + +#[derive(Debug, Serialize)] +struct DocsPutCandidate { + scope: String, + doc_type: &'static str, + title: String, + source_ref: Value, + content: String, +} + +#[derive(Debug, Serialize)] +struct BaselineQuery { + query_id: String, + session_id: String, + query: String, + expected_source_memory_ids: Vec, + expected_candidate_ids: Vec, + expected_keys: Vec, + #[serde(skip_serializing_if = "Vec::is_empty")] + agentmemory_results: Vec, + #[serde(skip_serializing_if = "Value::is_null")] + source_metadata: Value, +} + +#[derive(Debug, Serialize)] +struct IgnoredItem { + item_kind: &'static str, + session_id: String, + source_id: String, + reason: &'static str, + #[serde(skip_serializing_if = "Option::is_none")] + detail: Option, +} + +#[derive(Clone)] +struct FixtureContext { + fixture_id: String, + source_system: String, + source_version: Option, + exported_at: Option, + scope: String, + max_note_chars: usize, +} + +fn main() -> color_eyre::Result<()> { + color_eyre::install()?; + + let args = Args::parse(); + let raw = fs::read_to_string(&args.fixture)?; + let fixture: AgentmemoryFixture = serde_json::from_str(&raw)?; + let output = adapt_fixture(&fixture, args.scope.as_str(), args.max_note_chars); + let json = serde_json::to_string_pretty(&output)?; + + if let Some(path) = args.out { + write_output(path, json.as_str())?; + } else { + println!("{json}"); + } + + Ok(()) +} + +fn write_output(path: PathBuf, json: &str) -> color_eyre::Result<()> { + if let Some(parent) = path.parent() + && !parent.as_os_str().is_empty() + { + fs::create_dir_all(parent)?; + } + + fs::write(path, json)?; + + Ok(()) +} + +fn adapt_fixture( + fixture: &AgentmemoryFixture, + scope: &str, + max_note_chars: usize, +) -> AdapterOutput { + let source = adapter_source(fixture); + let fixture_id = fixture_id(fixture, source.system.as_str()); + let ctx = FixtureContext { + fixture_id: fixture_id.clone(), + source_system: source.system.clone(), + source_version: source.version.clone(), + exported_at: source.exported_at.clone(), + scope: scope.to_string(), + max_note_chars, + }; + let mut notes = Vec::new(); + let mut docs = Vec::new(); + let mut baselines = Vec::new(); + let mut ignored = Vec::new(); + let mut memory_map = HashMap::new(); + + for session in &fixture.sessions { + map_observations(session, &ctx, &mut docs, &mut ignored); + map_memories(session, &ctx, &mut notes, &mut memory_map, &mut ignored); + map_baselines(session, &memory_map, &mut baselines, &mut ignored); + } + + AdapterOutput { + schema: OUTPUT_SCHEMA, + fixture_id, + source, + summary: AdapterSummary { + session_count: fixture.sessions.len(), + observation_count: fixture + .sessions + .iter() + .map(|session| session.observations.len()) + .sum(), + memory_count: fixture.sessions.iter().map(|session| session.memories.len()).sum(), + note_candidate_count: notes.len(), + doc_candidate_count: docs.len(), + baseline_query_count: baselines.len(), + ignored_count: ignored.len(), + }, + note_candidates: notes, + doc_candidates: docs, + baseline_queries: baselines, + ignored_items: ignored, + } +} + +fn adapter_source(fixture: &AgentmemoryFixture) -> AdapterSource { + AdapterSource { + system: clean_string(fixture.source.system.as_deref()) + .unwrap_or_else(|| "agentmemory".to_string()), + version: clean_string(fixture.source.version.as_deref()), + export_id: clean_string(fixture.source.export_id.as_deref()), + exported_at: clean_string(fixture.source.exported_at.as_deref()), + fixture_schema: clean_string(fixture.schema.as_deref()), + } +} + +fn fixture_id(fixture: &AgentmemoryFixture, source_system: &str) -> String { + clean_string(fixture.fixture_id.as_deref()) + .or_else(|| clean_string(fixture.source.export_id.as_deref())) + .unwrap_or_else(|| stable_uuid("fixture", &[source_system]).to_string()) +} + +fn map_observations( + session: &AgentmemorySession, + ctx: &FixtureContext, + docs: &mut Vec, + ignored: &mut Vec, +) { + for observation in &session.observations { + match doc_candidate(session, observation, ctx) { + Ok(candidate) => docs.push(candidate), + Err(reason) => ignored.push(IgnoredItem { + item_kind: "observation", + session_id: session.session_id.clone(), + source_id: observation.observation_id.clone(), + reason, + detail: None, + }), + } + } +} + +fn map_memories( + session: &AgentmemorySession, + ctx: &FixtureContext, + notes: &mut Vec, + memory_map: &mut HashMap, + ignored: &mut Vec, +) { + for memory in &session.memories { + match note_candidate(session, memory, ctx) { + Ok(candidate) => { + memory_map.insert(memory.memory_id.clone(), candidate.clone()); + notes.push(candidate); + }, + Err(reason) => ignored.push(IgnoredItem { + item_kind: "memory", + session_id: session.session_id.clone(), + source_id: memory.memory_id.clone(), + reason, + detail: None, + }), + } + } +} + +fn map_baselines( + session: &AgentmemorySession, + memory_map: &HashMap, + baselines: &mut Vec, + ignored: &mut Vec, +) { + for case in &session.retrieval_cases { + match baseline_query(session, case, memory_map) { + Some(baseline) => baselines.push(baseline), + None => ignored.push(IgnoredItem { + item_kind: "retrieval_case", + session_id: session.session_id.clone(), + source_id: case.query_id.clone(), + reason: "no_mapped_expected_memories", + detail: None, + }), + } + } +} + +fn doc_candidate( + session: &AgentmemorySession, + observation: &AgentmemoryObservation, + ctx: &FixtureContext, +) -> std::result::Result { + let text = observation.text.trim(); + + if text.is_empty() { + return Err("empty_text"); + } + + let Some(ts) = observation_timestamp(session, observation, ctx) else { + return Err("missing_or_invalid_timestamp"); + }; + let candidate_id = stable_uuid( + "observation", + &[ + ctx.fixture_id.as_str(), + session.session_id.as_str(), + observation.observation_id.as_str(), + ], + ); + let role = clean_string(observation.role.as_deref()) + .or_else(|| clean_string(observation.kind.as_deref())) + .unwrap_or_else(|| "observation".to_string()); + let title = format!("agentmemory observation {}", observation.observation_id); + let source_ref = serde_json::json!({ + "schema": "doc_source_ref/v1", + "doc_type": "chat", + "ts": ts, + "thread_id": session.session_id, + "role": role, + "message_id": observation.observation_id, + "agentmemory_fixture_id": ctx.fixture_id, + "agentmemory_source_system": ctx.source_system, + "agentmemory_observation_kind": clean_string(observation.kind.as_deref()), + "agent": clean_string(session.agent.as_deref()), + "project": clean_string(session.project.as_deref()), + }); + + Ok(DocCandidate { + candidate_id, + scope: ctx.scope.clone(), + session_id: session.session_id.clone(), + source_observation_id: observation.observation_id.clone(), + docs_put: DocsPutCandidate { + scope: ctx.scope.clone(), + doc_type: "chat", + title, + source_ref, + content: observation.text.clone(), + }, + source_metadata: observation.metadata.clone(), + }) +} + +fn note_candidate( + session: &AgentmemorySession, + memory: &AgentmemoryMemory, + ctx: &FixtureContext, +) -> std::result::Result { + let text = memory.text.trim(); + + if text.is_empty() { + return Err("empty_text"); + } + if text.chars().count() > ctx.max_note_chars { + return Err("note_text_too_long"); + } + + let Some(note_type) = memory.kind.as_deref().and_then(map_note_type) else { + return Err("unsupported_memory_kind"); + }; + let Some(importance) = score_or_default(memory.importance, DEFAULT_IMPORTANCE) else { + return Err("invalid_importance"); + }; + let Some(confidence) = score_or_default(memory.confidence, DEFAULT_CONFIDENCE) else { + return Err("invalid_confidence"); + }; + let candidate_id = stable_uuid( + "memory", + &[ctx.fixture_id.as_str(), session.session_id.as_str(), memory.memory_id.as_str()], + ); + let source_ref = note_source_ref(session, memory, ctx); + + Ok(NoteCandidate { + candidate_id, + scope: ctx.scope.clone(), + session_id: session.session_id.clone(), + source_memory_id: memory.memory_id.clone(), + source_observation_ids: memory.source_observation_ids.clone(), + notes_ingest_item: ElfNoteCandidate { + note_type: note_type.to_string(), + key: clean_string(memory.key.as_deref()), + text: memory.text.clone(), + importance, + confidence, + ttl_days: memory.ttl_days.filter(|days| *days > 0), + source_ref, + }, + source_metadata: memory.metadata.clone(), + }) +} + +fn note_source_ref( + session: &AgentmemorySession, + memory: &AgentmemoryMemory, + ctx: &FixtureContext, +) -> Value { + serde_json::json!({ + "schema": "source_ref/v1", + "resolver": FIXTURE_RESOLVER, + "ref": { + "fixture_id": ctx.fixture_id, + "session_id": session.session_id, + "memory_id": memory.memory_id, + "observation_ids": memory.source_observation_ids, + }, + "state": { + "source_system": ctx.source_system, + "source_version": ctx.source_version, + "exported_at": ctx.exported_at, + "session_started_at": session.started_at, + "session_ended_at": session.ended_at, + "memory_created_at": memory.created_at, + "memory_updated_at": memory.updated_at, + }, + "locator": { + "memory_id": memory.memory_id, + "observation_ids": memory.source_observation_ids, + }, + "hints": { + "agent": session.agent, + "project": session.project, + "origin_kind": memory.kind, + }, + }) +} + +fn baseline_query( + session: &AgentmemorySession, + case: &AgentmemoryRetrievalCase, + memory_map: &HashMap, +) -> Option { + if case.query.trim().is_empty() || case.expected_memory_ids.is_empty() { + return None; + } + + let expected: Vec<&NoteCandidate> = + case.expected_memory_ids.iter().filter_map(|id| memory_map.get(id)).collect(); + + if expected.is_empty() { + return None; + } + + Some(BaselineQuery { + query_id: case.query_id.clone(), + session_id: session.session_id.clone(), + query: case.query.clone(), + expected_source_memory_ids: expected + .iter() + .map(|candidate| candidate.source_memory_id.clone()) + .collect(), + expected_candidate_ids: expected.iter().map(|candidate| candidate.candidate_id).collect(), + expected_keys: expected + .iter() + .filter_map(|candidate| candidate.notes_ingest_item.key.clone()) + .collect(), + agentmemory_results: case.agentmemory_results.clone(), + source_metadata: case.metadata.clone(), + }) +} + +fn observation_timestamp( + session: &AgentmemorySession, + observation: &AgentmemoryObservation, + ctx: &FixtureContext, +) -> Option { + [observation.ts.as_deref(), session.started_at.as_deref(), ctx.exported_at.as_deref()] + .into_iter() + .flatten() + .find_map(normalize_rfc3339) +} + +fn normalize_rfc3339(value: &str) -> Option { + OffsetDateTime::parse(value, &Rfc3339) + .ok() + .and_then(|timestamp| timestamp.format(&Rfc3339).ok()) +} + +fn map_note_type(kind: &str) -> Option<&'static str> { + match kind.trim().to_ascii_lowercase().as_str() { + "preference" => Some("preference"), + "constraint" => Some("constraint"), + "decision" => Some("decision"), + "profile" => Some("profile"), + "fact" => Some("fact"), + "plan" => Some("plan"), + _ => None, + } +} + +fn score_or_default(score: Option, default: f32) -> Option { + let score = score.unwrap_or(default); + + if score.is_finite() && (0.0..=1.0).contains(&score) { Some(score) } else { None } +} + +fn clean_string(value: Option<&str>) -> Option { + value.map(str::trim).filter(|value| !value.is_empty()).map(str::to_string) +} + +fn stable_uuid(kind: &str, parts: &[&str]) -> Uuid { + let mut key = format!("https://hack.ink/elf/{OUTPUT_SCHEMA}/{kind}"); + + for part in parts { + key.push('/'); + key.push_str(part); + } + + Uuid::new_v5(&Uuid::NAMESPACE_URL, key.as_bytes()) +} diff --git a/apps/elf-eval/tests/agentmemory_fixture_adapter.rs b/apps/elf-eval/tests/agentmemory_fixture_adapter.rs new file mode 100644 index 00000000..452158d4 --- /dev/null +++ b/apps/elf-eval/tests/agentmemory_fixture_adapter.rs @@ -0,0 +1,102 @@ +#![allow(unused_crate_dependencies)] + +//! Integration tests for the offline agentmemory fixture adapter. + +use std::{path::Path, process::Command}; + +use color_eyre::{Result, eyre}; +use serde_json::Value; + +fn run_adapter() -> Result { + let fixture = Path::new(env!("CARGO_MANIFEST_DIR")) + .join("fixtures") + .join("agentmemory") + .join("sample_session.json"); + let output = Command::new(env!("CARGO_BIN_EXE_agentmemory_fixture_adapter")) + .arg("--fixture") + .arg(fixture) + .output()?; + + assert!( + output.status.success(), + "agentmemory fixture adapter failed: {}", + String::from_utf8_lossy(&output.stderr), + ); + + Ok(serde_json::from_slice(&output.stdout)?) +} + +fn array_at<'a>(value: &'a Value, pointer: &str) -> Result<&'a Vec> { + value + .pointer(pointer) + .and_then(Value::as_array) + .ok_or_else(|| eyre::eyre!("missing array at {pointer}")) +} + +fn find_by_field<'a>(items: &'a [Value], field: &str, expected: &str) -> Result<&'a Value> { + items + .iter() + .find(|item| item.pointer(field).and_then(Value::as_str) == Some(expected)) + .ok_or_else(|| eyre::eyre!("missing item with {field} = {expected}")) +} + +#[test] +fn fixture_maps_memories_observations_and_baselines() -> Result<()> { + let output = run_adapter()?; + + assert_eq!( + output.pointer("/schema").and_then(Value::as_str), + Some("elf.agentmemory_adapter/v1") + ); + assert_eq!(output.pointer("/summary/session_count").and_then(Value::as_u64), Some(1)); + assert_eq!(output.pointer("/summary/note_candidate_count").and_then(Value::as_u64), Some(2)); + assert_eq!(output.pointer("/summary/doc_candidate_count").and_then(Value::as_u64), Some(2)); + assert_eq!(output.pointer("/summary/baseline_query_count").and_then(Value::as_u64), Some(1)); + assert_eq!(output.pointer("/summary/ignored_count").and_then(Value::as_u64), Some(1)); + + let notes = array_at(&output, "/note_candidates")?; + let note = find_by_field(notes, "/source_memory_id", "mem-architecture-sot")?; + + assert_eq!(note.pointer("/notes_ingest_item/type").and_then(Value::as_str), Some("fact")); + assert_eq!( + note.pointer("/notes_ingest_item/key").and_then(Value::as_str), + Some("architecture_sot"), + ); + assert_eq!( + note.pointer("/notes_ingest_item/source_ref/resolver").and_then(Value::as_str), + Some("agentmemory_fixture/v1"), + ); + + let docs = array_at(&output, "/doc_candidates")?; + let doc = find_by_field(docs, "/source_observation_id", "obs-architecture")?; + + assert_eq!(doc.pointer("/docs_put/doc_type").and_then(Value::as_str), Some("chat")); + assert_eq!( + doc.pointer("/docs_put/source_ref/schema").and_then(Value::as_str), + Some("doc_source_ref/v1"), + ); + assert_eq!( + doc.pointer("/docs_put/source_ref/thread_id").and_then(Value::as_str), + Some("am-session-2026-06-08"), + ); + + let baselines = array_at(&output, "/baseline_queries")?; + let baseline = find_by_field(baselines, "/query_id", "q-architecture-sot")?; + let expected_keys = array_at(baseline, "/expected_keys")?; + + assert_eq!(expected_keys.len(), 1); + assert_eq!(expected_keys.first().and_then(Value::as_str), Some("architecture_sot")); + + Ok(()) +} + +#[test] +fn fixture_reports_unsupported_memory_kind_without_rewriting() -> Result<()> { + let output = run_adapter()?; + let ignored_items = array_at(&output, "/ignored_items")?; + let ignored = find_by_field(ignored_items, "/source_id", "mem-raw-summary")?; + + assert_eq!(ignored.pointer("/reason").and_then(Value::as_str), Some("unsupported_memory_kind")); + + Ok(()) +} diff --git a/docs/guide/evaluation.md b/docs/guide/evaluation.md index e84afaa0..e37c0fb6 100644 --- a/docs/guide/evaluation.md +++ b/docs/guide/evaluation.md @@ -101,6 +101,8 @@ The command prints a JSON report containing summary metrics and per-query detail - `--search-mode quick_find` (lower latency) - `--search-mode planned_search` (planning-enabled path; useful when you need query plans and staged trajectory metadata) - When running a config comparison with `--config-b`, you can set `--search-mode-b` to override the mode for the B side. +- To compare against sanitized agentmemory session fixtures without running an agentmemory server, use + `docs/guide/research/agentmemory_adapter.md`. - The dataset should avoid secrets and sensitive data. - To persist traces for later replay without running `elf-worker`, set `search.explain.write_mode = "inline"` in the config used by `elf-eval`. diff --git a/docs/guide/research/agentmemory_adapter.md b/docs/guide/research/agentmemory_adapter.md new file mode 100644 index 00000000..65d51662 --- /dev/null +++ b/docs/guide/research/agentmemory_adapter.md @@ -0,0 +1,175 @@ +# Agentmemory Fixture Adapter + +Goal: Convert sanitized agentmemory-style session exports into ELF-owned note/doc +candidates and retrieval baseline records. +Read this when: You need to compare coding-agent memory capture against ELF without +running an agentmemory server or bypassing ELF ingestion. +Inputs: A local JSON fixture with agentmemory-style sessions, observations, memories, +and retrieval cases. +Depends on: `elf-eval`, `docs/research/2026-06-08-agent-memory-selection.json`, +`docs/spec/system_elf_memory_service_v2.md`, `docs/spec/system_doc_source_ref_v1.md`, +and `docs/spec/system_source_ref_doc_pointer_v1.md`. +Outputs: A deterministic `elf.agentmemory_adapter/v1` JSON bundle with note candidates, +doc candidates, baseline queries, and ignored-item reasons. + +## Boundary + +The adapter is an offline comparison/import boundary, not an ingestion path. +It does not call agentmemory, ELF HTTP APIs, providers, Postgres, Qdrant, or any LLM. +It only rewrites a sanitized fixture into records that can later be reviewed, grouped, +and submitted through normal ELF endpoints. + +Use this boundary when the question is: + +- Which agentmemory memories are plausible ELF note candidates? +- Which raw observations should be retained as document evidence? +- Which retrieval cases can become ELF evaluation datasets after candidate notes are + ingested through `/v2/notes/ingest`? + +Do not use it to claim that ELF reproduces agentmemory benchmark numbers. Fixture +retrieval cases preserve agentmemory result ranks and scores as external baseline +metadata only. + +## Command + +Run the adapter through `cargo run`: + +```sh +cargo run -p elf-eval --bin agentmemory_fixture_adapter -- \ + --fixture apps/elf-eval/fixtures/agentmemory/sample_session.json \ + --out tmp/agentmemory-adapter.json +``` + +Optional flags: + +- `--scope`: ELF write scope attached to emitted note and doc candidates. Defaults to + `agent_private`. +- `--max-note-chars`: maximum accepted note length before a memory is reported as + ignored. Defaults to `240`, matching the canonical local config limit. + +## Fixture Shape + +The fixture is intentionally small and producer-owned. It should use this shape: + +```json +{ + "schema": "agentmemory.fixture/v1", + "fixture_id": "agentmemory-sample-2026-06-08", + "source": { + "system": "agentmemory", + "version": "v0.9.27", + "export_id": "agentmemory-export-sample", + "exported_at": "2026-06-08T06:30:00Z" + }, + "sessions": [ + { + "session_id": "am-session-2026-06-08", + "agent": "codex", + "project": "ELF", + "started_at": "2026-06-08T05:45:00Z", + "observations": [], + "memories": [], + "retrieval_cases": [] + } + ] +} +``` + +The checked-in sample fixture is sanitized and exists only to exercise the mapping. +External exports must be reviewed for secrets and sensitive content before being +committed or shared. + +## Mapping + +Agentmemory memories become `note_candidates` only when all of these are true: + +- `kind` maps directly to one ELF note type: `preference`, `constraint`, `decision`, + `profile`, `fact`, or `plan`. +- `text` is non-empty and does not exceed `--max-note-chars`. +- `importance` and `confidence`, when present, are finite values in `0.0..=1.0`. + +The emitted `notes_ingest_item` is shaped like a single `/v2/notes/ingest` note item. +It includes a `source_ref/v1` envelope with `resolver = "agentmemory_fixture/v1"` and +stable origin fields: + +- fixture id +- session id +- memory id +- source observation ids +- source system/version +- export, session, and memory timestamps + +The adapter does not infer missing ELF note types, does not truncate text, and does not +rewrite memory text into a canonical note sentence. + +Agentmemory observations become `doc_candidates` when they have non-empty text and an +RFC3339 timestamp from the observation, session, or export. The emitted `docs_put` +payload uses: + +- `doc_type = "chat"` +- `source_ref.schema = "doc_source_ref/v1"` +- `thread_id = session_id` +- `message_id = observation_id` +- `role` from the observation role, observation kind, or `observation` + +This keeps raw session evidence separate from authoritative ELF notes. If operators +later ingest docs and want hydrated note evidence, they should attach normal +`elf_doc_ext/v1` doc pointers after `docs_put` returns concrete `doc_id` values. + +Retrieval cases become `baseline_queries` when at least one expected memory id maps to +a note candidate. The baseline record preserves: + +- query id and query text +- expected agentmemory memory ids +- deterministic note candidate ids +- expected note keys, when available +- agentmemory result ranks/scores, when present + +These records are suitable for building an ELF eval dataset after candidate notes are +ingested through ELF policy. They are not benchmark proof on their own. + +## Ignored Items + +The adapter reports ignored items instead of repairing them. Current reasons include: + +- `empty_text` +- `missing_or_invalid_timestamp` +- `note_text_too_long` +- `unsupported_memory_kind` +- `invalid_importance` +- `invalid_confidence` +- `no_mapped_expected_memories` + +Ignored items can still be reviewed manually. Do not force them into ELF notes by +loosening the adapter; either fix the fixture upstream or store long/ambiguous evidence +as docs and use normal ELF extraction/review workflows. + +## Comparing Retrieval Quality + +Use a two-step comparison: + +1. Review the adapter output and ingest selected `notes_ingest_item` records through + `/v2/notes/ingest`, grouped by scope. ELF write policy, English gate, provenance + validation, duplicate/update resolution, and indexing still run normally. +2. Convert selected `baseline_queries` into the `elf-eval` dataset format. Prefer + `expected_keys` when keys were emitted; otherwise resolve ingested note IDs and use + `expected_note_ids`. + +Then run `elf-eval` as usual: + +```sh +cargo run -p elf-eval -- -c ./elf.toml --dataset tmp/agentmemory-eval.json +``` + +For config-to-config comparisons or trace replay, follow `docs/guide/evaluation.md`. + +## Verification + +Run the adapter fixture test without network services: + +```sh +cargo test -p elf-eval --test agentmemory_fixture_adapter +``` + +Before review handoff for changes to this boundary, run the repository gate from +`Makefile.toml`. diff --git a/docs/guide/research/index.md b/docs/guide/research/index.md index 2c3c562d..d9d85967 100644 --- a/docs/guide/research/index.md +++ b/docs/guide/research/index.md @@ -10,6 +10,7 @@ Outputs: The smallest comparison or inventory document needed for implementation - `research_projects_inventory.md`: audited and pending external projects, research depth, and current planning surface. - `comparison_external_projects.md`: detailed capability comparison, project trade-offs, source map, and research-backed ELF directions. +- `agentmemory_adapter.md`: fixture-backed agentmemory import and baseline adapter boundary for `elf-eval`. ## Machine-Readable Runs