diff --git a/apps/elf-api/src/routes.rs b/apps/elf-api/src/routes.rs index 76a8401a..aea1804c 100644 --- a/apps/elf-api/src/routes.rs +++ b/apps/elf-api/src/routes.rs @@ -431,6 +431,10 @@ struct KnowledgePageRebuildBody { page_key: String, title: Option, #[serde(default)] + doc_ids: Vec, + #[serde(default)] + doc_chunk_ids: Vec, + #[serde(default)] note_ids: Vec, #[serde(default)] event_ids: Vec, @@ -3286,6 +3290,8 @@ async fn knowledge_page_rebuild( page_kind: payload.page_kind, page_key: payload.page_key, title: payload.title, + doc_ids: payload.doc_ids, + doc_chunk_ids: payload.doc_chunk_ids, note_ids: payload.note_ids, event_ids: payload.event_ids, relation_ids: payload.relation_ids, @@ -4045,7 +4051,7 @@ mod tests { assert!(html.contains("Providers And Ranking")); assert!(html.contains("Relation Context")); assert!(html.contains("Knowledge Page Snippets")); - assert!(html.contains("Derived page: source notes")); + assert!(html.contains("Derived page: source documents")); assert!(html.contains("directTraceId")); assert!(html.contains("trace_id")); assert!(html.contains("loadInitialTrace")); diff --git a/apps/elf-api/static/viewer.html b/apps/elf-api/static/viewer.html index 83e555bc..f8149a2d 100644 --- a/apps/elf-api/static/viewer.html +++ b/apps/elf-api/static/viewer.html @@ -1157,7 +1157,7 @@

Recent Traces

["status", page.status], ["updated_at", dateText(page.updated_at)], ["rebuilt_at", dateText(page.rebuilt_at)], - ["derived notice", "Derived page: source notes, events, relations, and proposals remain authoritative."] + ["derived notice", "Derived page: source documents, spans, memory notes, events, relations, and proposals remain authoritative."] ]), make("div", { className: "split-stack", style: "margin-top: 12px;" }, [ make("div", { className: "title", text: "Source coverage" }), diff --git a/apps/elf-eval/src/bin/real_world_live_adapter.rs b/apps/elf-eval/src/bin/real_world_live_adapter.rs index 23c36985..0e6f6f3e 100644 --- a/apps/elf-eval/src/bin/real_world_live_adapter.rs +++ b/apps/elf-eval/src/bin/real_world_live_adapter.rs @@ -4645,6 +4645,8 @@ async fn materialize_elf_knowledge( page_kind: KnowledgePageKind::Project, page_key, title: Some(loaded.job.title.clone()), + doc_ids: Vec::new(), + doc_chunk_ids: Vec::new(), note_ids: note_ids.clone(), event_ids: Vec::new(), relation_ids: Vec::new(), diff --git a/docs/spec/agent_memory_knowledge_system_v1.md b/docs/spec/agent_memory_knowledge_system_v1.md index 465e199f..25fce9e3 100644 --- a/docs/spec/agent_memory_knowledge_system_v1.md +++ b/docs/spec/agent_memory_knowledge_system_v1.md @@ -12,16 +12,15 @@ tags: - spec - agent-memory - knowledge -source_refs: +source_refs: [] +code_refs: + - Makefile.toml +related: - docs/evidence/benchmarking/2026-06-20-agent-knowledge-os-closeout-benchmark-report.md - docs/evidence/benchmarking/2026-06-22-p1-memory-authority-closeout-report.md - docs/runbook/benchmarking/real_world_agent_memory_benchmark.md - docs/spec/real_world_agent_memory_benchmark_v1.md - docs/spec/system_elf_memory_service_v2.md -code_refs: - - Makefile.toml -related: - - docs/spec/system_elf_memory_service_v2.md - docs/spec/system_knowledge_pages_v1.md - docs/spec/system_recall_debug_panel_v1.md - docs/spec/system_graph_memory_postgres_v1.md @@ -64,7 +63,7 @@ The product is composed of six typed layers: | --- | --- | --- | | Source Library | Captured documents, excerpts, imports, and source refs. | Sources remain evidence. Derived memory and pages must cite sources instead of replacing them. | | Memory Authority | Notes, core blocks, ingest decisions, history, corrections, and rollback evidence. | Memory writes are policy-gated, evidence-linked, auditable, and reversible. | -| Knowledge Workspace | Derived project, entity, concept, issue, and decision pages. | Pages are rebuildable derived artifacts with citations, lint, and stale-source detection. | +| Knowledge Workspace | Derived project, entity, concept, issue, decision, author, and timeline pages. | Pages are rebuildable derived artifacts with citations, lint, and stale-source detection. | | Graph-lite Facts | Postgres-backed relation facts and temporal markers. | Graph facts are source-backed context, not a separate authority store. | | Dreaming Review | Reviewable consolidation, summary, brief, tag, correction, and promotion proposals. | Derived proposals must be reviewable and must not mutate sources without an explicit accepted transition. | | Recall Debug | Search traces, dropped candidates, source/doc/page/graph/proposal rows, and replay aids. | Recall must expose why context was selected, dropped, unavailable, blocked, or not requested. | @@ -151,7 +150,7 @@ implement every item in a phase at once. | --- | --- | --- | --- | | P0 | Product contract and phase gate | Codify this product boundary, roadmap, competitor absorption rules, validation expectations, and closeout checklist. | Docs are reviewed, repo docs validation passes, claim boundaries match the June 20 closeout evidence, and the main thread accepts the next phase. | | P1 | Memory Authority MVP loop | Deliver one source-backed memory-authority vertical slice: capture source evidence, create/review one proposal through a proposal inbox, record the authority ledger, apply/correct/rollback, recall through agent-facing tools, and debug stale/correction behavior. | The slice has service tests, provenance/history evidence, recall/debug readback, and at least one real-world stale/correction benchmark job. | -| P2 | Knowledge Workspace | Promote source-linked project/entity/concept/timeline pages with rebuild, lint, watch, search, and version-diff readback. | Pages stay derived, every section is cited or explicitly unsupported, stale-source lint runs, and benchmark reports publish citation/staleness metrics. | +| P2 | Knowledge Workspace | Promote source-linked project/entity/concept/issue/decision/author/timeline pages with rebuild, lint, watch, search, and version-diff readback. | Pages stay derived, every section is cited or explicitly unsupported, stale-source lint runs, and benchmark reports publish citation/staleness metrics. | | P3 | Competitor-strength adapters | Add contained comparison adapters for qmd replay, PageIndex/OpenKB, mem0/OpenMemory, Letta, Graphiti/Zep, OpenViking, graph/RAG references, and other accepted deltas. | Each adapter preserves typed non-pass states and emits same-corpus evidence before any parity, win, tie, or loss claim. | | P4 | Benchmark and quality hardening | Expand adversarial jobs, public comparison grammar, quality metrics, latency/cost/resource reporting, and unsupported-claim detection. | Reports preserve job/suite/project typed states, expected evidence recall, irrelevant context ratio, unsupported claims, and resource metrics. | | P5 | Productization | Improve local setup, agent recipes, operator UI, privacy/delete/export boundaries, and production-quality workflows. | Operator workflows have documented setup, privacy/delete/export semantics, and validation evidence without weakening source authority. | diff --git a/docs/spec/system_knowledge_pages_v1.md b/docs/spec/system_knowledge_pages_v1.md index 2068ce8d..022c6dcf 100644 --- a/docs/spec/system_knowledge_pages_v1.md +++ b/docs/spec/system_knowledge_pages_v1.md @@ -6,21 +6,31 @@ resource: docs/spec/system_knowledge_pages_v1.md status: active authority: normative owner: spec -last_verified: 2026-06-18 +last_verified: 2026-06-22 tags: - docs - spec source_refs: [] -code_refs: [] +code_refs: + - packages/elf-domain/src/knowledge.rs + - packages/elf-service/src/knowledge.rs + - packages/elf-storage/src/knowledge.rs + - sql/tables/035_knowledge_pages.sql + - sql/tables/037_knowledge_page_source_refs.sql related: [] drift_watch: - docs/spec/system_knowledge_pages_v1.md + - packages/elf-domain/src/knowledge.rs + - packages/elf-service/src/knowledge.rs + - packages/elf-storage/src/knowledge.rs + - sql/tables/035_knowledge_pages.sql + - sql/tables/037_knowledge_page_source_refs.sql --- # Derived Knowledge Pages v1 Specification -Purpose: Define derived knowledge page storage, rebuild, citation, and lint contracts. +Purpose: Define derived knowledge page storage, rebuild, citation, source-span, and lint contracts. Status: normative -Read this when: You are implementing, validating, or reviewing project/entity/concept/issue/decision page rebuild behavior. +Read this when: You are implementing, validating, or reviewing project/entity/concept/issue/decision/author/timeline page rebuild behavior. Not this document: Viewer integration, search ranking, live LLM page generation, or source-note mutation. Defines: `elf.knowledge_page/v1` pages, sections, source refs, lint findings, and deterministic rebuild metadata. @@ -52,9 +62,13 @@ Allowed `knowledge_pages.page_kind` values: - `concept` - `issue` - `decision` +- `author` +- `timeline` Allowed `knowledge_page_source_refs.source_kind` values: +- `doc` +- `doc_chunk` - `note` - `event` - `relation` @@ -76,6 +90,7 @@ The normalized source ref must preserve: - `source_kind` - `source_id` +- Source Library document id and chunk/span locator when `source_kind = "doc_chunk"` - source status when available - source `updated_at` or equivalent freshness timestamp when available - source content hash when available @@ -87,6 +102,8 @@ The v1 rebuild path is deterministic for the same explicit source snapshot. Rebuild input sources may include: +- active Source Library `doc_documents` +- active Source Library `doc_chunks` as cited source spans - active or historical `memory_notes` - durable `add_event` audit rows from `memory_ingest_decisions` - `graph_facts` plus `graph_fact_evidence` @@ -110,6 +127,9 @@ Unreviewed consolidation proposals must not be used as source input for persiste - `schema = "elf.knowledge_page.rebuild/v1"` - `source_snapshot_hash` - `deterministic` +- `generated_by` metadata with actor agent id, runtime path, mode, and per-kind source input counts +- `version_identity` with schema `elf.knowledge_page.version_identity/v1`, page kind, page key, source snapshot hash, content hash, section hashes, and `source_mutation_allowed = false` +- `memory_candidate_policy` with schema `elf.knowledge_page.memory_candidate_policy/v1`, `review_required = true`, `review_surface = "consolidation_proposals"`, allowed memory-promotion apply intents, `direct_memory_ledger_mutation_allowed = false`, and `source_mutation_allowed = false` - `provider_metadata` - `allowed_variance` - `previous_version_diff` @@ -122,9 +142,9 @@ unchanged section key lists and counts, a human-readable summary, and `source_mutation_allowed = false`. Previous-version diff metadata is rebuild readback metadata, not source content. Page -content hashes must not include `previous_version_diff`; otherwise repeating the same -source rebuild would appear nondeterministic solely because the previous-version -metadata changed. +content hashes must not include `previous_version_diff`, `generated_by`, +`version_identity`, or `memory_candidate_policy`; otherwise repeating the same source +rebuild would appear nondeterministic solely because readback metadata changed. When future provider-backed or LLM-derived page text is persisted, `rebuild_metadata.deterministic` must be false unless the provider output is fully @@ -160,6 +180,19 @@ advisory and must not mutate source memory. Lint findings are derived diagnostics. They must not mutate authoritative source memory. +## Memory Candidate Boundary + +Generated knowledge page content may feed memory candidates only through reviewable +consolidation proposals. Knowledge page rebuild, list, detail, search, and lint +readback must not insert, update, delete, deprecate, restore, or enqueue indexing for +`memory_notes`. + +When a page section becomes candidate memory, the candidate must be represented as a +`consolidation_proposals` row with `contract_schema = "elf.consolidation/v1"` and +`apply_intent` of `create_derived_note` or `update_derived_note`. Applying that +proposal follows the Memory Promotion Apply Contract in +`system_consolidation_proposals_v1.md`. + ## Search and Viewer Readback Knowledge page search is a derived-artifact readback surface, not the authoritative @@ -177,8 +210,8 @@ Page search results must include: - rebuild metadata, including previous-version diff metadata when present - lint summary and trust state that distinguishes clean, warning, error, and low coverage results -- a derived-result notice that source notes, event audits, relation facts, and applied - proposals remain authoritative +- a derived-result notice that source documents, spans, approved memory notes, event + audits, relation facts, and applied proposals remain authoritative - repair or rebuild guidance when lint or source coverage indicates stale, unsupported, missing, or weakly covered content diff --git a/packages/elf-domain/src/knowledge.rs b/packages/elf-domain/src/knowledge.rs index d076ba02..5ec3cc0e 100644 --- a/packages/elf-domain/src/knowledge.rs +++ b/packages/elf-domain/src/knowledge.rs @@ -25,6 +25,10 @@ pub enum KnowledgePageKind { Issue, /// Decision page. Decision, + /// Author page. + Author, + /// Timeline page. + Timeline, } impl KnowledgePageKind { /// Returns the canonical storage string. @@ -35,6 +39,8 @@ impl KnowledgePageKind { Self::Concept => "concept", Self::Issue => "issue", Self::Decision => "decision", + Self::Author => "author", + Self::Timeline => "timeline", } } @@ -46,6 +52,8 @@ impl KnowledgePageKind { "concept" => Some(Self::Concept), "issue" => Some(Self::Issue), "decision" => Some(Self::Decision), + "author" => Some(Self::Author), + "timeline" => Some(Self::Timeline), _ => None, } } @@ -55,6 +63,10 @@ impl KnowledgePageKind { #[derive(Clone, Copy, Debug, Eq, PartialEq, Deserialize, Serialize)] #[serde(rename_all = "snake_case")] pub enum KnowledgeSourceKind { + /// Source Library document source. + Doc, + /// Source Library document chunk or span source. + DocChunk, /// Memory note source. Note, /// Event source reserved for future durable event rows. @@ -68,6 +80,8 @@ impl KnowledgeSourceKind { /// Returns the canonical storage string. pub fn as_str(self) -> &'static str { match self { + Self::Doc => "doc", + Self::DocChunk => "doc_chunk", Self::Note => "note", Self::Event => "event", Self::Relation => "relation", @@ -78,6 +92,8 @@ impl KnowledgeSourceKind { /// Parses a canonical storage string. pub fn parse(raw: &str) -> Option { match raw { + "doc" => Some(Self::Doc), + "doc_chunk" => Some(Self::DocChunk), "note" => Some(Self::Note), "event" => Some(Self::Event), "relation" => Some(Self::Relation), diff --git a/packages/elf-service/src/knowledge.rs b/packages/elf-service/src/knowledge.rs index fce10380..93071e48 100644 --- a/packages/elf-service/src/knowledge.rs +++ b/packages/elf-service/src/knowledge.rs @@ -19,9 +19,10 @@ use elf_domain::{ }; use elf_storage::{ knowledge::{ - self, KnowledgeEventSource, KnowledgeNoteSource, KnowledgePageLintFindingInsert, - KnowledgePageSearchRow, KnowledgePageSectionInsert, KnowledgePageSourceRefInsert, - KnowledgePageUpsert, KnowledgeProposalSource, KnowledgeRelationSource, + self, KnowledgeDocChunkSource, KnowledgeDocSource, KnowledgeEventSource, + KnowledgeNoteSource, KnowledgePageLintFindingInsert, KnowledgePageSearchRow, + KnowledgePageSectionInsert, KnowledgePageSourceRefInsert, KnowledgePageUpsert, + KnowledgeProposalSource, KnowledgeRelationSource, }, models::{ KnowledgePage, KnowledgePageLintFinding, KnowledgePageSection, KnowledgePageSourceRef, @@ -49,6 +50,12 @@ pub struct KnowledgePageRebuildRequest { /// Optional display title; a deterministic title is generated when omitted. pub title: Option, #[serde(default)] + /// Source Library documents to compile into the page. + pub doc_ids: Vec, + #[serde(default)] + /// Source Library document chunks or spans to compile into the page. + pub doc_chunk_ids: Vec, + #[serde(default)] /// Memory note sources to compile into the page. pub note_ids: Vec, #[serde(default)] @@ -500,6 +507,8 @@ struct LintDraft { #[derive(Clone, Debug)] struct SourceIds { + doc_ids: Vec, + doc_chunk_ids: Vec, note_ids: Vec, event_ids: Vec, relation_ids: Vec, @@ -508,6 +517,8 @@ struct SourceIds { impl SourceIds { fn from_request(req: &KnowledgePageRebuildRequest) -> Result { let ids = Self { + doc_ids: sorted_unique(&req.doc_ids), + doc_chunk_ids: sorted_unique(&req.doc_chunk_ids), note_ids: sorted_unique(&req.note_ids), event_ids: sorted_unique(&req.event_ids), relation_ids: sorted_unique(&req.relation_ids), @@ -520,6 +531,8 @@ impl SourceIds { } fn from_source_refs(source_refs: &[KnowledgePageSourceRef]) -> Result { + let mut doc_ids = Vec::new(); + let mut doc_chunk_ids = Vec::new(); let mut note_ids = Vec::new(); let mut event_ids = Vec::new(); let mut relation_ids = Vec::new(); @@ -527,6 +540,8 @@ impl SourceIds { for source_ref in source_refs { match KnowledgeSourceKind::parse(source_ref.source_kind.as_str()) { + Some(KnowledgeSourceKind::Doc) => doc_ids.push(source_ref.source_id), + Some(KnowledgeSourceKind::DocChunk) => doc_chunk_ids.push(source_ref.source_id), Some(KnowledgeSourceKind::Note) => note_ids.push(source_ref.source_id), Some(KnowledgeSourceKind::Event) => event_ids.push(source_ref.source_id), Some(KnowledgeSourceKind::Relation) => relation_ids.push(source_ref.source_id), @@ -540,6 +555,8 @@ impl SourceIds { } Ok(Self { + doc_ids: sorted_unique(&doc_ids), + doc_chunk_ids: sorted_unique(&doc_chunk_ids), note_ids: sorted_unique(¬e_ids), event_ids: sorted_unique(&event_ids), relation_ids: sorted_unique(&relation_ids), @@ -548,7 +565,9 @@ impl SourceIds { } fn validate_non_empty(&self) -> Result<()> { - if self.note_ids.is_empty() + if self.doc_ids.is_empty() + && self.doc_chunk_ids.is_empty() + && self.note_ids.is_empty() && self.event_ids.is_empty() && self.relation_ids.is_empty() && self.proposal_ids.is_empty() @@ -564,19 +583,23 @@ impl SourceIds { fn require_counts( &self, + docs: usize, + doc_chunks: usize, notes: usize, events: usize, relations: usize, proposals: usize, ) -> Result<()> { - if notes != self.note_ids.len() + if docs != self.doc_ids.len() + || doc_chunks != self.doc_chunk_ids.len() + || notes != self.note_ids.len() || events != self.event_ids.len() || relations != self.relation_ids.len() || proposals != self.proposal_ids.len() { return Err(Error::InvalidRequest { message: - "all requested knowledge page sources must exist and proposals must be applied" + "all requested knowledge page sources must exist, document sources must be active, and proposals must be applied" .to_string(), }); } @@ -625,7 +648,7 @@ impl ElfService { let source_coverage = source_coverage_value(req.page_kind, &req.page_key, §ions, &sources); - let base_rebuild_metadata = rebuild_metadata(&source_hash, &req.provider_metadata); + let base_rebuild_metadata = rebuild_metadata(&source_hash, &req.provider_metadata, &req); let content_hash = page_content_hash(&title, §ions, &source_coverage, &base_rebuild_metadata)?; let previous_version_diff = previous_version_diff_value( @@ -636,9 +659,17 @@ impl ElfService { content_hash.as_str(), §ions, ); + let version_identity = version_identity_value( + req.page_kind, + req.page_key.as_str(), + source_hash.as_str(), + content_hash.as_str(), + §ions, + ); let rebuild_metadata = rebuild_metadata_with_previous_version_diff( base_rebuild_metadata, previous_version_diff, + version_identity, ); let page_id = Uuid::new_v4(); let mut tx = self.db.pool.begin().await?; @@ -824,13 +855,20 @@ impl ElfService { req: &KnowledgePageRebuildRequest, ids: &SourceIds, ) -> Result> { - let (notes, events, relations, proposals) = self + let (docs, doc_chunks, notes, events, relations, proposals) = self .resolve_existing_source_rows(req.tenant_id.as_str(), req.project_id.as_str(), ids) .await?; - ids.require_counts(notes.len(), events.len(), relations.len(), proposals.len())?; + ids.require_counts( + docs.len(), + doc_chunks.len(), + notes.len(), + events.len(), + relations.len(), + proposals.len(), + )?; - Ok(source_snapshots(notes, events, relations, proposals)) + Ok(source_snapshots(docs, doc_chunks, notes, events, relations, proposals)) } async fn resolve_existing_source_rows( @@ -839,11 +877,27 @@ impl ElfService { project_id: &str, ids: &SourceIds, ) -> Result<( + Vec, + Vec, Vec, Vec, Vec, Vec, )> { + let docs = knowledge::fetch_knowledge_doc_sources( + &self.db.pool, + tenant_id, + project_id, + &ids.doc_ids, + ) + .await?; + let doc_chunks = knowledge::fetch_knowledge_doc_chunk_sources( + &self.db.pool, + tenant_id, + project_id, + &ids.doc_chunk_ids, + ) + .await?; let notes = knowledge::fetch_knowledge_note_sources( &self.db.pool, tenant_id, @@ -873,7 +927,7 @@ impl ElfService { ) .await?; - Ok((notes, events, relations, proposals)) + Ok((docs, doc_chunks, notes, events, relations, proposals)) } async fn lint_source_refs( @@ -909,16 +963,18 @@ impl ElfService { let _page_kind = KnowledgePageKind::parse(page.page_kind.as_str()).ok_or_else(|| { Error::InvalidRequest { message: "stored knowledge page kind is invalid".to_string() } })?; - let (notes, events, relations, proposals) = self + let (docs, doc_chunks, notes, events, relations, proposals) = self .resolve_existing_source_rows(page.tenant_id.as_str(), page.project_id.as_str(), ids) .await?; - let mut sources = source_snapshots(notes, events, relations, proposals); + let mut sources = source_snapshots(docs, doc_chunks, notes, events, relations, proposals); Ok(sources.drain(..).map(|source| (source_key(&source), source)).collect()) } } fn source_snapshots( + docs: Vec, + doc_chunks: Vec, notes: Vec, events: Vec, relations: Vec, @@ -926,6 +982,8 @@ fn source_snapshots( ) -> Vec { let mut sources = Vec::new(); + sources.extend(docs.into_iter().map(doc_source_snapshot)); + sources.extend(doc_chunks.into_iter().map(doc_chunk_source_snapshot)); sources.extend(notes.into_iter().map(note_source_snapshot)); sources.extend(events.into_iter().map(event_source_snapshot)); sources.extend(relations.into_iter().map(relation_source_snapshot)); @@ -1033,8 +1091,8 @@ fn knowledge_page_search_item( lint_summary, trust_state, derived_notice: - "Derived knowledge page snippet. Verify cited source notes, events, relations, or proposals before treating it as authoritative." - .to_string(), + "Derived knowledge page snippet. Verify cited source documents, spans, memory notes, events, relations, or proposals before treating it as authoritative." + .to_string(), repair_guidance, updated_at: row.page_updated_at, rebuilt_at: row.rebuilt_at, @@ -1079,12 +1137,30 @@ fn search_repair_guidance(trust_state: &str) -> Option { } fn build_sections(sources: &[SourceSnapshot]) -> Result> { + let doc_indexes = source_indexes(sources, KnowledgeSourceKind::Doc); + let doc_chunk_indexes = source_indexes(sources, KnowledgeSourceKind::DocChunk); let note_indexes = source_indexes(sources, KnowledgeSourceKind::Note); let event_indexes = source_indexes(sources, KnowledgeSourceKind::Event); let relation_indexes = source_indexes(sources, KnowledgeSourceKind::Relation); let proposal_indexes = source_indexes(sources, KnowledgeSourceKind::Proposal); let mut sections = Vec::new(); + push_section( + &mut sections, + "source-documents", + "Source Documents", + "source_documents", + sources, + doc_indexes, + ); + push_section( + &mut sections, + "source-spans", + "Source Spans", + "source_spans", + sources, + doc_chunk_indexes, + ); push_section( &mut sections, "source-notes", @@ -1312,6 +1388,99 @@ fn citations_value(section: &DraftSection, sources: &[SourceSnapshot]) -> Value ) } +fn doc_source_snapshot(row: KnowledgeDocSource) -> SourceSnapshot { + let title = row.title.clone().unwrap_or_else(|| "Untitled source document".to_string()); + let excerpt = truncate_chars(normalize_whitespace(row.content.as_str()).as_str(), 240); + let line = format!("[doc:{}] {title}: {excerpt}", row.doc_type); + let snapshot = serde_json::json!({ + "kind": "doc", + "doc_id": row.doc_id, + "agent_id": row.agent_id.clone(), + "scope": row.scope.clone(), + "doc_type": row.doc_type.clone(), + "status": row.status.clone(), + "title": row.title.clone(), + "content_bytes": row.content_bytes, + "content_hash": row.content_hash.clone(), + "source_ref": row.source_ref.clone(), + "created_at": row.created_at, + "updated_at": row.updated_at, + }); + + SourceSnapshot { + kind: KnowledgeSourceKind::Doc, + id: row.doc_id, + status: Some(row.status), + updated_at: Some(row.updated_at), + content_hash: Some(row.content_hash), + snapshot, + citation_metadata: serde_json::json!({ "section_role": "source_document" }), + line, + } +} + +fn doc_chunk_source_snapshot(row: KnowledgeDocChunkSource) -> SourceSnapshot { + let title = row.title.clone().unwrap_or_else(|| "Untitled source document".to_string()); + let excerpt = truncate_chars(normalize_whitespace(row.chunk_text.as_str()).as_str(), 240); + let span_id = source_span_id( + row.doc_content_hash.as_str(), + row.start_offset.max(0) as usize, + row.end_offset.max(row.start_offset).max(0) as usize, + "captured", + ); + let line = format!( + "[doc_chunk:{}:{}-{}] {title}: {excerpt}", + row.chunk_index, row.start_offset, row.end_offset + ); + let source_span = serde_json::json!({ + "schema": "doc_source_span/v1", + "span_id": span_id, + "chunk_id": row.chunk_id, + "status": "captured", + "reason_code": null, + "start_offset": row.start_offset, + "end_offset": row.end_offset, + "content_hash": row.doc_content_hash.clone(), + "chunk_hash": row.chunk_hash.clone(), + }); + let snapshot = serde_json::json!({ + "kind": "doc_chunk", + "chunk_id": row.chunk_id, + "doc_id": row.doc_id, + "agent_id": row.agent_id.clone(), + "scope": row.scope.clone(), + "doc_type": row.doc_type.clone(), + "status": row.status.clone(), + "title": row.title.clone(), + "source_ref": row.source_ref.clone(), + "doc_content_hash": row.doc_content_hash.clone(), + "doc_updated_at": row.doc_updated_at, + "chunk_index": row.chunk_index, + "start_offset": row.start_offset, + "end_offset": row.end_offset, + "chunk_hash": row.chunk_hash.clone(), + "chunk_created_at": row.chunk_created_at, + "source_span": source_span, + }); + + SourceSnapshot { + kind: KnowledgeSourceKind::DocChunk, + id: row.chunk_id, + status: Some(row.status), + updated_at: Some(row.doc_updated_at), + content_hash: Some(row.chunk_hash), + snapshot, + citation_metadata: serde_json::json!({ + "section_role": "source_span", + "doc_id": row.doc_id, + "span_id": span_id, + "start_offset": row.start_offset, + "end_offset": row.end_offset, + }), + line, + } +} + fn note_source_snapshot(row: KnowledgeNoteSource) -> SourceSnapshot { let content_hash = hash_text(row.text.as_str()); let line = format!("{}{}", note_prefix(&row), row.text); @@ -1514,7 +1683,11 @@ fn source_counts(sources: &[SourceSnapshot]) -> Value { serde_json::json!(counts) } -fn rebuild_metadata(source_hash: &str, provider_metadata: &Value) -> Value { +fn rebuild_metadata( + source_hash: &str, + provider_metadata: &Value, + req: &KnowledgePageRebuildRequest, +) -> Value { let llm_derived = provider_metadata.get("llm_derived").and_then(Value::as_bool).unwrap_or(false); @@ -1523,6 +1696,29 @@ fn rebuild_metadata(source_hash: &str, provider_metadata: &Value) -> Value { "source_snapshot_hash": source_hash, "deterministic": !llm_derived, "provider_metadata": provider_metadata, + "generated_by": { + "schema": "elf.knowledge_page.generated_by/v1", + "runtime": "ElfService::knowledge_page_rebuild", + "actor_agent_id": req.agent_id, + "mode": if llm_derived { "provider_metadata_declared_llm" } else { "deterministic_service" }, + "source_input_counts": { + "doc": req.doc_ids.len(), + "doc_chunk": req.doc_chunk_ids.len(), + "note": req.note_ids.len(), + "event": req.event_ids.len(), + "relation": req.relation_ids.len(), + "proposal": req.proposal_ids.len(), + }, + }, + "memory_candidate_policy": { + "schema": "elf.knowledge_page.memory_candidate_policy/v1", + "review_required": true, + "review_surface": "consolidation_proposals", + "proposal_contract_schema": "elf.consolidation/v1", + "allowed_apply_intents": ["create_derived_note", "update_derived_note"], + "direct_memory_ledger_mutation_allowed": false, + "source_mutation_allowed": false, + }, "allowed_variance": if llm_derived { serde_json::json!(["LLM-derived page text may vary; provider metadata records the nondeterministic input path."]) } else { @@ -1531,12 +1727,20 @@ fn rebuild_metadata(source_hash: &str, provider_metadata: &Value) -> Value { }) } -fn rebuild_metadata_with_previous_version_diff(mut metadata: Value, diff: Value) -> Value { +fn rebuild_metadata_with_previous_version_diff( + mut metadata: Value, + diff: Value, + version_identity: Value, +) -> Value { let Some(object) = metadata.as_object_mut() else { - return serde_json::json!({ PREVIOUS_VERSION_DIFF_KEY: diff }); + return serde_json::json!({ + PREVIOUS_VERSION_DIFF_KEY: diff, + "version_identity": version_identity, + }); }; object.insert(PREVIOUS_VERSION_DIFF_KEY.to_string(), diff); + object.insert("version_identity".to_string(), version_identity); metadata } @@ -1631,6 +1835,33 @@ fn previous_version_diff_value( }) } +fn version_identity_value( + page_kind: KnowledgePageKind, + page_key: &str, + source_hash: &str, + content_hash: &str, + sections: &[DraftSection], +) -> Value { + serde_json::json!({ + "schema": "elf.knowledge_page.version_identity/v1", + "contract_schema": KNOWLEDGE_PAGE_CONTRACT_SCHEMA_V1, + "page_kind": page_kind.as_str(), + "page_key": page_key, + "source_snapshot_hash": source_hash, + "content_hash": content_hash, + "section_hashes": sections + .iter() + .map(|section| { + serde_json::json!({ + "section_key": section.section_key.clone(), + "content_hash": section.content_hash.clone(), + }) + }) + .collect::>(), + "source_mutation_allowed": false, + }) +} + fn sorted_strings<'a>(items: impl Iterator) -> Vec { let mut out = items.map(ToString::to_string).collect::>(); @@ -1669,6 +1900,9 @@ fn content_hash_rebuild_metadata(rebuild_metadata: &Value) -> Value { let mut stable = object.clone(); stable.remove(PREVIOUS_VERSION_DIFF_KEY); + stable.remove("generated_by"); + stable.remove("memory_candidate_policy"); + stable.remove("version_identity"); Value::Object(stable) } @@ -1866,6 +2100,8 @@ fn title_kind(page_kind: KnowledgePageKind) -> &'static str { KnowledgePageKind::Concept => "Concept", KnowledgePageKind::Issue => "Issue", KnowledgePageKind::Decision => "Decision", + KnowledgePageKind::Author => "Author", + KnowledgePageKind::Timeline => "Timeline", } } @@ -1922,6 +2158,13 @@ fn hash_json(value: &Value) -> Result { Ok(blake3::hash(&raw).to_hex().to_string()) } +fn source_span_id(content_hash: &str, start: usize, end: usize, span_kind: &str) -> Uuid { + let name = serde_json::json!(["elf-doc-source-span/v1", content_hash, start, end, span_kind]) + .to_string(); + + Uuid::new_v5(&Uuid::NAMESPACE_OID, name.as_bytes()) +} + async fn replace_page_children( tx: &mut Transaction<'_, Postgres>, page_id: Uuid, @@ -2058,13 +2301,35 @@ mod tests { } } + fn test_rebuild_request( + page_kind: KnowledgePageKind, + ) -> knowledge::KnowledgePageRebuildRequest { + knowledge::KnowledgePageRebuildRequest { + tenant_id: "tenant".to_string(), + project_id: "project".to_string(), + agent_id: "agent".to_string(), + page_kind, + page_key: "elf".to_string(), + title: Some("ELF".to_string()), + doc_ids: Vec::new(), + doc_chunk_ids: Vec::new(), + note_ids: Vec::new(), + event_ids: Vec::new(), + relation_ids: Vec::new(), + proposal_ids: Vec::new(), + provider_metadata: knowledge::empty_object(), + } + } + #[test] fn build_sections_preserves_citations_and_deterministic_hashes() { let sources = vec![ - test_source(KnowledgeSourceKind::Note, 1, "A source note supports the page."), - test_source(KnowledgeSourceKind::Event, 2, "An event audit supports the page."), - test_source(KnowledgeSourceKind::Relation, 3, "A relation supports the page."), - test_source(KnowledgeSourceKind::Proposal, 4, "An applied proposal supports the page."), + test_source(KnowledgeSourceKind::Doc, 1, "A source document supports the page."), + test_source(KnowledgeSourceKind::DocChunk, 2, "A source span supports the page."), + test_source(KnowledgeSourceKind::Note, 3, "A source note supports the page."), + test_source(KnowledgeSourceKind::Event, 4, "An event audit supports the page."), + test_source(KnowledgeSourceKind::Relation, 5, "A relation supports the page."), + test_source(KnowledgeSourceKind::Proposal, 6, "An applied proposal supports the page."), ]; let mut first_sections = knowledge::build_sections(&sources).expect("sections should build"); @@ -2075,7 +2340,7 @@ mod tests { .expect("section hash should serialize"); } - assert_eq!(first_sections.len(), 4); + assert_eq!(first_sections.len(), 6); assert!(first_sections.iter().all(|section| { section.citations.as_array().is_some_and(|citations| !citations.is_empty()) })); @@ -2086,7 +2351,9 @@ mod tests { &first_sections, &sources, ); - let metadata = knowledge::rebuild_metadata("source-hash", &knowledge::empty_object()); + let request = test_rebuild_request(KnowledgePageKind::Project); + let metadata = + knowledge::rebuild_metadata("source-hash", &knowledge::empty_object(), &request); let first_hash = knowledge::page_content_hash("ELF", &first_sections, &coverage, &metadata) .expect("page hash should serialize"); let second_hash = @@ -2095,6 +2362,10 @@ mod tests { assert_eq!(coverage["coverage_complete"], true); assert_eq!(metadata["deterministic"], true); + assert_eq!( + metadata["memory_candidate_policy"]["direct_memory_ledger_mutation_allowed"], + false + ); assert_eq!(first_hash, second_hash); } @@ -2107,11 +2378,25 @@ mod tests { "provider_id": "fixture", "model": "fixture-model", }), + &test_rebuild_request(KnowledgePageKind::Timeline), ); assert_eq!(metadata["deterministic"], false); assert!(metadata["allowed_variance"].as_array().is_some_and(|items| !items.is_empty())); assert_eq!(metadata["provider_metadata"]["provider_id"], "fixture"); + assert_eq!(metadata["generated_by"]["actor_agent_id"], "agent"); + } + + #[test] + fn generated_titles_cover_author_and_timeline_pages() { + assert_eq!( + knowledge::generated_title(KnowledgePageKind::Author, "ada"), + "Author Knowledge Page: ada" + ); + assert_eq!( + knowledge::generated_title(KnowledgePageKind::Timeline, "release-plan"), + "Timeline Knowledge Page: release-plan" + ); } #[test] @@ -2131,8 +2416,9 @@ mod tests { content_hash: "new-section-hash".to_string(), citations: serde_json::json!([{ "source_kind": "note" }]), }]; + let request = test_rebuild_request(KnowledgePageKind::Project); let base_metadata = - knowledge::rebuild_metadata("new-source-hash", &knowledge::empty_object()); + knowledge::rebuild_metadata("new-source-hash", &knowledge::empty_object(), &request); let coverage = serde_json::json!({ "coverage_complete": true }); let hash_without_diff = knowledge::page_content_hash("ELF", §ions, &coverage, &base_metadata) @@ -2145,8 +2431,18 @@ mod tests { hash_without_diff.as_str(), §ions, ); - let metadata_with_diff = - knowledge::rebuild_metadata_with_previous_version_diff(base_metadata, diff.clone()); + let version_identity = knowledge::version_identity_value( + KnowledgePageKind::Project, + "elf", + "new-source-hash", + hash_without_diff.as_str(), + §ions, + ); + let metadata_with_diff = knowledge::rebuild_metadata_with_previous_version_diff( + base_metadata, + diff.clone(), + version_identity, + ); let hash_with_diff = knowledge::page_content_hash("ELF", §ions, &coverage, &metadata_with_diff) .expect("hash should ignore previous-version diff metadata"); @@ -2161,6 +2457,10 @@ mod tests { .expect("diff should be extractable")["section_changed_count"], 1 ); + assert_eq!( + metadata_with_diff["version_identity"]["schema"], + "elf.knowledge_page.version_identity/v1" + ); } #[test] diff --git a/packages/elf-service/tests/acceptance/knowledge_pages.rs b/packages/elf-service/tests/acceptance/knowledge_pages.rs index 69761ede..2aa85c8d 100644 --- a/packages/elf-service/tests/acceptance/knowledge_pages.rs +++ b/packages/elf-service/tests/acceptance/knowledge_pages.rs @@ -7,7 +7,7 @@ use crate::acceptance::{self, SpyExtractor, StubEmbedding, StubRerank}; use elf_domain::knowledge::KnowledgePageKind; use elf_service::{ AddNoteInput, AddNoteRequest, ElfService, KnowledgePageLintRequest, - KnowledgePageRebuildRequest, Providers, + KnowledgePageRebuildRequest, KnowledgePageRebuildResponse, Providers, }; use elf_testkit::TestDatabase; @@ -20,6 +20,67 @@ struct KnowledgeFixture { _test_db: TestDatabase, } +#[derive(Clone, Copy)] +struct KnowledgeSourceIds { + note_id: Uuid, + event_id: Uuid, + doc_id: Uuid, + chunk_id: Uuid, + fact_id: Uuid, + proposal_id: Uuid, +} + +fn knowledge_foundation_request(ids: KnowledgeSourceIds) -> KnowledgePageRebuildRequest { + KnowledgePageRebuildRequest { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + agent_id: AGENT_ID.to_string(), + page_kind: KnowledgePageKind::Project, + page_key: "knowledge-foundation".to_string(), + title: Some("Knowledge Foundation".to_string()), + doc_ids: vec![ids.doc_id], + doc_chunk_ids: vec![ids.chunk_id], + note_ids: vec![ids.note_id], + event_ids: vec![ids.event_id], + relation_ids: vec![ids.fact_id], + proposal_ids: vec![ids.proposal_id], + provider_metadata: serde_json::json!({}), + } +} + +fn assert_first_rebuild(first: &KnowledgePageRebuildResponse) { + assert_eq!(first.page.sections.len(), 6); + assert_eq!(first.page.source_refs.len(), 6); + assert!(first.page.sections.iter().all(|section| { + section.citations.as_array().is_some_and(|citations| !citations.is_empty()) + })); + assert!(first.page.source_refs.iter().any(|source_ref| source_ref.source_kind == "doc")); + assert!(first.page.source_refs.iter().any(|source_ref| source_ref.source_kind == "doc_chunk")); + assert_eq!(first.page.page.source_coverage["coverage_complete"], true); + assert_eq!(first.page.page.rebuild_metadata["deterministic"], true); + assert_eq!( + first.page.page.rebuild_metadata["generated_by"]["runtime"], + "ElfService::knowledge_page_rebuild" + ); + assert_eq!( + first.page.page.rebuild_metadata["memory_candidate_policy"]["direct_memory_ledger_mutation_allowed"], + false + ); + assert_eq!( + first.page.page.rebuild_metadata["version_identity"]["schema"], + "elf.knowledge_page.version_identity/v1" + ); + assert_eq!( + first + .page + .page + .previous_version_diff + .as_ref() + .expect("initial rebuild should expose no-previous diff")["available"], + false + ); +} + async fn setup_service(test_name: &str) -> Option { let Some(test_db) = acceptance::test_db().await else { eprintln!("Skipping {test_name}; set ELF_PG_DSN to run this test."); @@ -120,6 +181,91 @@ VALUES ($1,$2,$3,$4,'agent_private','add_event','fact','knowledge_event',$5,'rem decision_id } +async fn insert_source_document(service: &ElfService) -> (Uuid, Uuid) { + let doc_id = Uuid::new_v4(); + let chunk_id = Uuid::new_v4(); + let content = "The Knowledge Workspace compiles Source Library spans into cited derived pages."; + let content_hash = blake3::hash(content.as_bytes()).to_hex().to_string(); + let chunk_hash = blake3::hash(content.as_bytes()).to_hex().to_string(); + let source_ref = serde_json::json!({ + "schema": "doc_source_ref/v1", + "doc_type": "knowledge", + "uri": "docs://knowledge/workspace/source-span-fixture", + "source_record_id": doc_id, + "content_hash": content_hash, + "source_spans": [ + { + "schema": "doc_source_span/v1", + "span_id": Uuid::new_v4(), + "chunk_id": chunk_id, + "status": "captured", + "start_offset": 0, + "end_offset": content.len(), + "content_hash": content_hash, + "chunk_hash": chunk_hash + } + ] + }); + + sqlx::query( + "\ +INSERT INTO doc_documents ( + doc_id, + tenant_id, + project_id, + agent_id, + scope, + doc_type, + status, + title, + source_ref, + content, + content_bytes, + content_hash, + created_at, + updated_at +) +VALUES ($1,$2,$3,$4,'project_shared','knowledge','active','Knowledge Workspace Source',$5,$6,$7,$8,$9,$9)", + ) + .bind(doc_id) + .bind(TENANT_ID) + .bind(PROJECT_ID) + .bind(AGENT_ID) + .bind(source_ref) + .bind(content) + .bind(i32::try_from(content.len()).expect("fixture content length should fit i32")) + .bind(content_hash) + .bind(OffsetDateTime::UNIX_EPOCH) + .execute(&service.db.pool) + .await + .expect("source document should be inserted"); + sqlx::query( + "\ +INSERT INTO doc_chunks ( + chunk_id, + doc_id, + chunk_index, + start_offset, + end_offset, + chunk_text, + chunk_hash, + created_at +) +VALUES ($1,$2,0,0,$3,$4,$5,$6)", + ) + .bind(chunk_id) + .bind(doc_id) + .bind(i32::try_from(content.len()).expect("fixture content length should fit i32")) + .bind(content) + .bind(chunk_hash) + .bind(OffsetDateTime::UNIX_EPOCH) + .execute(&service.db.pool) + .await + .expect("source document chunk should be inserted"); + + (doc_id, chunk_id) +} + async fn insert_relation(service: &ElfService, note_id: Uuid) -> Uuid { let subject_id = Uuid::new_v4(); let fact_id = Uuid::new_v4(); @@ -291,15 +437,7 @@ VALUES ($1,$2,$3,$4,$5,'elf.consolidation/v1','knowledge_page','create_derived_k proposal_id } -#[tokio::test] -#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run this test."] -async fn rebuilds_pages_with_citations_and_detects_stale_sources() { - let Some(fixture) = - setup_service("rebuilds_pages_with_citations_and_detects_stale_sources").await - else { - return; - }; - let service = &fixture.service; +async fn insert_rebuild_sources(service: &ElfService) -> KnowledgeSourceIds { let note_id = insert_source_note( service, "knowledge_pages_foundation", @@ -307,56 +445,32 @@ async fn rebuilds_pages_with_citations_and_detects_stale_sources() { ) .await; let event_id = insert_event_audit(service, note_id).await; + let (doc_id, chunk_id) = insert_source_document(service).await; let fact_id = insert_relation(service, note_id).await; let proposal_id = insert_applied_proposal(service, note_id).await; + + KnowledgeSourceIds { note_id, event_id, doc_id, chunk_id, fact_id, proposal_id } +} + +#[tokio::test] +#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run this test."] +async fn rebuilds_pages_with_citations_and_detects_stale_sources() { + let Some(fixture) = + setup_service("rebuilds_pages_with_citations_and_detects_stale_sources").await + else { + return; + }; + let service = &fixture.service; + let source_ids = insert_rebuild_sources(service).await; let first = service - .knowledge_page_rebuild(KnowledgePageRebuildRequest { - tenant_id: TENANT_ID.to_string(), - project_id: PROJECT_ID.to_string(), - agent_id: AGENT_ID.to_string(), - page_kind: KnowledgePageKind::Project, - page_key: "knowledge-foundation".to_string(), - title: Some("Knowledge Foundation".to_string()), - note_ids: vec![note_id], - event_ids: vec![event_id], - relation_ids: vec![fact_id], - proposal_ids: vec![proposal_id], - provider_metadata: serde_json::json!({}), - }) + .knowledge_page_rebuild(knowledge_foundation_request(source_ids)) .await .expect("knowledge page should rebuild"); - assert_eq!(first.page.sections.len(), 4); - assert_eq!(first.page.source_refs.len(), 4); - assert!(first.page.sections.iter().all(|section| { - section.citations.as_array().is_some_and(|citations| !citations.is_empty()) - })); - assert_eq!(first.page.page.source_coverage["coverage_complete"], true); - assert_eq!(first.page.page.rebuild_metadata["deterministic"], true); - assert_eq!( - first - .page - .page - .previous_version_diff - .as_ref() - .expect("initial rebuild should expose no-previous diff")["available"], - false - ); + assert_first_rebuild(&first); let second = service - .knowledge_page_rebuild(KnowledgePageRebuildRequest { - tenant_id: TENANT_ID.to_string(), - project_id: PROJECT_ID.to_string(), - agent_id: AGENT_ID.to_string(), - page_kind: KnowledgePageKind::Project, - page_key: "knowledge-foundation".to_string(), - title: Some("Knowledge Foundation".to_string()), - note_ids: vec![note_id], - event_ids: vec![event_id], - relation_ids: vec![fact_id], - proposal_ids: vec![proposal_id], - provider_metadata: serde_json::json!({}), - }) + .knowledge_page_rebuild(knowledge_foundation_request(source_ids)) .await .expect("knowledge page should rebuild deterministically"); @@ -384,7 +498,7 @@ WHERE note_id = $3", ) .bind("Fact: Derived knowledge pages changed after the page snapshot was rebuilt.") .bind(OffsetDateTime::now_utc()) - .bind(note_id) + .bind(source_ids.note_id) .execute(&service.db.pool) .await .expect("source note should update"); @@ -401,6 +515,6 @@ WHERE note_id = $3", assert!(lint.findings.iter().any(|finding| { finding.finding_type == "stale_source_ref" && finding.source_kind.as_deref() == Some("note") - && finding.source_id == Some(note_id) + && finding.source_id == Some(source_ids.note_id) })); } diff --git a/packages/elf-storage/src/knowledge.rs b/packages/elf-storage/src/knowledge.rs index eef76197..d1f0fd97 100644 --- a/packages/elf-storage/src/knowledge.rs +++ b/packages/elf-storage/src/knowledge.rs @@ -252,6 +252,72 @@ pub struct KnowledgeProposalSource { pub updated_at: OffsetDateTime, } +/// Source Library document row used by the knowledge page rebuilder. +#[derive(Debug, FromRow)] +pub struct KnowledgeDocSource { + /// Document identifier. + pub doc_id: Uuid, + /// Agent that captured the document. + pub agent_id: String, + /// Document scope. + pub scope: String, + /// Document type. + pub doc_type: String, + /// Document lifecycle status. + pub status: String, + /// Optional document title. + pub title: Option, + /// Document source reference. + pub source_ref: Value, + /// Persisted document content. + pub content: String, + /// Persisted byte length. + pub content_bytes: i32, + /// Whole-document content hash. + pub content_hash: String, + /// Document creation timestamp. + pub created_at: OffsetDateTime, + /// Document update timestamp. + pub updated_at: OffsetDateTime, +} + +/// Source Library document chunk row used by the knowledge page rebuilder. +#[derive(Debug, FromRow)] +pub struct KnowledgeDocChunkSource { + /// Chunk identifier. + pub chunk_id: Uuid, + /// Parent document identifier. + pub doc_id: Uuid, + /// Agent that captured the document. + pub agent_id: String, + /// Document scope. + pub scope: String, + /// Document type. + pub doc_type: String, + /// Document lifecycle status. + pub status: String, + /// Optional document title. + pub title: Option, + /// Document source reference. + pub source_ref: Value, + /// Whole-document content hash. + pub doc_content_hash: String, + /// Document update timestamp. + pub doc_updated_at: OffsetDateTime, + /// Zero-based chunk index. + pub chunk_index: i32, + /// Inclusive start byte offset. + pub start_offset: i32, + /// Exclusive end byte offset. + pub end_offset: i32, + /// Chunk text. + pub chunk_text: String, + /// Chunk content hash. + pub chunk_hash: String, + /// Chunk creation timestamp. + pub chunk_created_at: OffsetDateTime, +} + /// Searchable knowledge page section row with page and lint metadata. #[derive(Debug, FromRow)] pub struct KnowledgePageSearchRow { @@ -1112,3 +1178,98 @@ ORDER BY updated_at ASC, proposal_id ASC", Ok(rows) } + +/// Fetches active Source Library documents by identifier for a knowledge page rebuild. +pub async fn fetch_knowledge_doc_sources<'e, E>( + executor: E, + tenant_id: &str, + project_id: &str, + doc_ids: &[Uuid], +) -> Result> +where + E: PgExecutor<'e>, +{ + if doc_ids.is_empty() { + return Ok(Vec::new()); + } + + let rows = sqlx::query_as::<_, KnowledgeDocSource>( + "\ +SELECT + doc_id, + agent_id, + scope, + doc_type, + status, + title, + COALESCE(source_ref, '{}'::jsonb) AS source_ref, + content, + content_bytes, + content_hash, + created_at, + updated_at +FROM doc_documents +WHERE tenant_id = $1 + AND project_id = $2 + AND doc_id = ANY($3::uuid[]) + AND status = 'active' +ORDER BY updated_at ASC, doc_id ASC", + ) + .bind(tenant_id) + .bind(project_id) + .bind(doc_ids) + .fetch_all(executor) + .await?; + + Ok(rows) +} + +/// Fetches active Source Library document chunks by identifier for a knowledge page rebuild. +pub async fn fetch_knowledge_doc_chunk_sources<'e, E>( + executor: E, + tenant_id: &str, + project_id: &str, + chunk_ids: &[Uuid], +) -> Result> +where + E: PgExecutor<'e>, +{ + if chunk_ids.is_empty() { + return Ok(Vec::new()); + } + + let rows = sqlx::query_as::<_, KnowledgeDocChunkSource>( + "\ +SELECT + c.chunk_id, + c.doc_id, + d.agent_id, + d.scope, + d.doc_type, + d.status, + d.title, + COALESCE(d.source_ref, '{}'::jsonb) AS source_ref, + d.content_hash AS doc_content_hash, + d.updated_at AS doc_updated_at, + c.chunk_index, + c.start_offset, + c.end_offset, + c.chunk_text, + c.chunk_hash, + c.created_at AS chunk_created_at +FROM doc_chunks c +JOIN doc_documents d ON d.doc_id = c.doc_id +WHERE d.tenant_id = $1 + AND d.project_id = $2 + AND c.chunk_id = ANY($3::uuid[]) + AND d.status = 'active' +ORDER BY d.updated_at ASC, c.chunk_index ASC, c.chunk_id ASC", + ) + .bind(tenant_id) + .bind(project_id) + .bind(chunk_ids) + .fetch_all(executor) + .await?; + + Ok(rows) +} diff --git a/packages/elf-storage/src/models.rs b/packages/elf-storage/src/models.rs index 2276d977..897ba6fc 100644 --- a/packages/elf-storage/src/models.rs +++ b/packages/elf-storage/src/models.rs @@ -433,7 +433,7 @@ pub struct KnowledgePage { pub tenant_id: String, /// Project that owns the page. pub project_id: String, - /// Page kind, such as project, entity, concept, issue, or decision. + /// Page kind, such as project, entity, concept, issue, decision, author, or timeline. pub page_kind: String, /// Stable page key within the tenant/project/kind namespace. pub page_key: String, @@ -499,7 +499,7 @@ pub struct KnowledgePageSourceRef { pub page_id: Uuid, /// Section that cites the source, if section-scoped. pub section_id: Option, - /// Source kind, such as note, relation, proposal, or event. + /// Source kind, such as doc, doc_chunk, note, relation, proposal, or event. pub source_kind: String, /// Authoritative source identifier. pub source_id: Uuid, diff --git a/sql/tables/035_knowledge_pages.sql b/sql/tables/035_knowledge_pages.sql index a13f3cbe..02dc34eb 100644 --- a/sql/tables/035_knowledge_pages.sql +++ b/sql/tables/035_knowledge_pages.sql @@ -21,7 +21,7 @@ ALTER TABLE knowledge_pages DROP CONSTRAINT IF EXISTS ck_knowledge_pages_page_kind; ALTER TABLE knowledge_pages ADD CONSTRAINT ck_knowledge_pages_page_kind - CHECK (page_kind IN ('project', 'entity', 'concept', 'issue', 'decision')); + CHECK (page_kind IN ('project', 'entity', 'concept', 'issue', 'decision', 'author', 'timeline')); ALTER TABLE knowledge_pages DROP CONSTRAINT IF EXISTS ck_knowledge_pages_status; diff --git a/sql/tables/037_knowledge_page_source_refs.sql b/sql/tables/037_knowledge_page_source_refs.sql index d157c563..208398d4 100644 --- a/sql/tables/037_knowledge_page_source_refs.sql +++ b/sql/tables/037_knowledge_page_source_refs.sql @@ -16,7 +16,7 @@ ALTER TABLE knowledge_page_source_refs DROP CONSTRAINT IF EXISTS ck_knowledge_page_source_refs_source_kind; ALTER TABLE knowledge_page_source_refs ADD CONSTRAINT ck_knowledge_page_source_refs_source_kind - CHECK (source_kind IN ('note', 'event', 'relation', 'proposal')); + CHECK (source_kind IN ('doc', 'doc_chunk', 'note', 'event', 'relation', 'proposal')); ALTER TABLE knowledge_page_source_refs DROP CONSTRAINT IF EXISTS ck_knowledge_page_source_refs_source_snapshot; diff --git a/sql/tables/038_knowledge_page_lint_findings.sql b/sql/tables/038_knowledge_page_lint_findings.sql index e76a5aa2..6b13eb61 100644 --- a/sql/tables/038_knowledge_page_lint_findings.sql +++ b/sql/tables/038_knowledge_page_lint_findings.sql @@ -21,7 +21,7 @@ ALTER TABLE knowledge_page_lint_findings DROP CONSTRAINT IF EXISTS ck_knowledge_page_lint_findings_source_kind; ALTER TABLE knowledge_page_lint_findings ADD CONSTRAINT ck_knowledge_page_lint_findings_source_kind - CHECK (source_kind IS NULL OR source_kind IN ('note', 'event', 'relation', 'proposal')); + CHECK (source_kind IS NULL OR source_kind IN ('doc', 'doc_chunk', 'note', 'event', 'relation', 'proposal')); ALTER TABLE knowledge_page_lint_findings DROP CONSTRAINT IF EXISTS ck_knowledge_page_lint_findings_details;