diff --git a/Cargo.lock b/Cargo.lock index d3cd32caf..68a884dd1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2372,6 +2372,7 @@ dependencies = [ name = "multitude" version = "0.3.0" dependencies = [ + "alloc_tracker", "allocator-api2 0.4.0", "bolero", "bumpalo", diff --git a/crates/multitude/Cargo.toml b/crates/multitude/Cargo.toml index ec55f3b33..74a2b58e3 100644 --- a/crates/multitude/Cargo.toml +++ b/crates/multitude/Cargo.toml @@ -61,6 +61,7 @@ zerocopy = { workspace = true, optional = true } loom = { workspace = true } [dev-dependencies] +alloc_tracker = { workspace = true } allocator-api2 = { workspace = true, features = ["alloc"] } bolero = { workspace = true, features = ["std"] } # `std` transitively enables `bolero-engine/any`, which is required @@ -107,3 +108,8 @@ name = "multitude_basic" [[example]] name = "strings" + +[[example]] +name = "object_tree" +path = "examples/object_tree/main.rs" +required-features = ["utf16"] diff --git a/crates/multitude/examples/object_tree/backend.rs b/crates/multitude/examples/object_tree/backend.rs new file mode 100644 index 000000000..ad3c6b825 --- /dev/null +++ b/crates/multitude/examples/object_tree/backend.rs @@ -0,0 +1,94 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Backend: statically-allocated rows + a reading API (`DataAccess`). +//! +//! Mocked here with statically-allocated data, but the shape — iterate rows, +//! read each row's properties — matches a real storage or IPC source. + +/// Tree shape: a forest of `ROOT_ROWS` roots, each `DEPTH` levels deep with +/// `FANOUT` children per node. Every node carries a name and a binary blob. +const ROOT_ROWS: usize = 12; +const FANOUT: usize = 2; +const DEPTH: usize = 2; +const BLOB_SIZE: usize = 256; + +/// A single statically-allocated row of the backing data source. +struct Row { + id: i64, + name: &'static str, + blob: &'static [u8], + children: &'static [Self], +} + +/// Reads the properties of a single [`Row`]. This is the only way the object +/// layer is allowed to touch backend data. +pub(crate) struct RowReader<'a> { + row: &'a Row, +} + +impl<'a> RowReader<'a> { + #[must_use] + pub(crate) fn id(&self) -> i64 { + self.row.id + } + + #[must_use] + pub(crate) fn name(&self) -> &'a str { + self.row.name + } + + #[must_use] + pub(crate) fn blob(&self) -> &'a [u8] { + self.row.blob + } + + /// A child [`DataAccess`] over the rows nested under this one. + #[must_use] + pub(crate) fn children(&self) -> DataAccess<'a> { + DataAccess { rows: self.row.children } + } +} + +/// The backend handle over a set of rows. +#[derive(Clone, Copy)] +pub(crate) struct DataAccess<'a> { + rows: &'a [Row], +} + +impl<'a> DataAccess<'a> { + /// Iterates the rows, yielding a [`RowReader`] for each. + pub(crate) fn rows(&self) -> impl ExactSizeIterator> { + self.rows.iter().map(|row| RowReader { row }) + } +} + +/// Builds the mock dataset once and leaks it to obtain `'static` rows that +/// stand in for statically-allocated backend data. +#[must_use] +pub(crate) fn make_dataset() -> DataAccess<'static> { + let mut next_id = 0; + DataAccess { + rows: make_rows(DEPTH, ROOT_ROWS, &mut next_id), + } +} + +fn make_rows(depth: usize, count: usize, next_id: &mut i64) -> &'static [Row] { + let mut rows = Vec::with_capacity(count); + for _ in 0..count { + let id = *next_id; + *next_id += 1; + let children = if depth == 0 { + &[][..] + } else { + make_rows(depth - 1, FANOUT, next_id) + }; + rows.push(Row { + id, + name: "property-name", + blob: &[0xABu8; BLOB_SIZE], + children, + }); + } + Vec::leak(rows) +} diff --git a/crates/multitude/examples/object_tree/loader.rs b/crates/multitude/examples/object_tree/loader.rs new file mode 100644 index 000000000..bd6d3965e --- /dev/null +++ b/crates/multitude/examples/object_tree/loader.rs @@ -0,0 +1,37 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Loader: bridges the backend [`DataAccess`] to the [`Value`] object model by +//! materializing a row forest into an arena. + +use multitude::Arena; + +use crate::backend::{DataAccess, RowReader}; +use crate::object::Value; +use crate::rc::{RcArray, RcBinary, RcStr, RcUtf16Str}; + +/// Materializes the whole forest into `arena`. +#[must_use] +pub(crate) fn load(arena: &Arena, da: DataAccess<'_>) -> RcArray { + RcArray::new(arena, da.rows().map(|r| load_object(arena, &r))) +} + +/// Materializes one object (a row and everything beneath it). Each object is a +/// field array; the name is materialized both as a UTF-8 [`RcStr`] and a +/// UTF-16 [`RcUtf16Str`]. +fn load_object(arena: &Arena, reader: &RowReader<'_>) -> Value { + let children = reader.children(); + let child_array = RcArray::new(arena, children.rows().map(|c| load_object(arena, &c))); + + RcArray::new( + arena, + [ + reader.id().into(), + RcStr::new(arena, reader.name()).into(), + RcUtf16Str::new(arena, reader.name()).into(), + RcBinary::new(arena, reader.blob()).into(), + child_array.into(), + ], + ) + .into() +} diff --git a/crates/multitude/examples/object_tree/main.rs b/crates/multitude/examples/object_tree/main.rs new file mode 100644 index 000000000..5ce45b303 --- /dev/null +++ b/crates/multitude/examples/object_tree/main.rs @@ -0,0 +1,93 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Materializes an **object tree** from a mocked, statically-allocated data +//! backend into an arena-backed, typed object model ([`multitude::Arc`]). +//! +//! Object models often use a fixed-size `Value` (a tagged union) plus a few +//! reference-counted leaf types, each independently heap-allocated. This builds +//! the same model in an arena instead, demonstrating that it: +//! +//! - **stays small** — arena `Arc`s are *thin* (8 bytes) even for DSTs, so +//! [`object::Value`] is **16 bytes**; +//! - **outlives the arena** — the `Arc` handles keep their chunks alive after +//! the arena is dropped; +//! - **allocates better** — one tree takes a few large chunk allocations rather +//! than one per node, measured below with [`alloc_tracker`]. +//! +//! Layers: [`mod@backend`] (the data source), [`mod@object`] (the [`Value`] +//! model), and [`mod@loader`] (materializes the tree from a [`backend::DataAccess`]). +//! +//! Run with: `cargo run --release --example object_tree --features utf16` +#![allow(clippy::unwrap_used, reason = "example code")] +#![allow(clippy::missing_panics_doc, reason = "example code")] +#![allow(clippy::std_instead_of_core, reason = "example uses std::time/std::sync")] +#![allow(dead_code, reason = "the Value model defines variants this example does not read back")] + +mod backend; +mod loader; +mod object; +mod rc; + +use std::time::Instant; + +use alloc_tracker::{Allocator, Session}; +use multitude::Arena; + +use crate::backend::DataAccess; +use crate::object::Value; + +#[global_allocator] +static ALLOCATOR: Allocator = Allocator::system(); + +const ITERATIONS: u32 = 10; + +/// Creates an arena and warms it with a throwaway load, then resets it for +/// reuse so its chunks are already allocated. +fn create_warmed_up_arena(dataset: DataAccess<'_>) -> Arena { + let mut arena = Arena::new(); + let _ = loader::load(&arena, dataset); + arena.reset(); + arena +} + +fn main() { + let dataset = backend::make_dataset(); + + // 1. Fixed, small per-instance `Value` size. + println!("== value size =="); + println!("Value (multitude::Arc, thin) = {} bytes", size_of::()); + println!(); + + // 2. Object count and memory footprint of one materialized tree. + let probe_arena = Arena::new(); + let tree = loader::load(&probe_arena, dataset); + let stats = object::measure(&tree); + println!("== tree shape =="); + println!("objects (Value nodes) : {}", stats.objects); + println!("memory used by tree : {} bytes", stats.bytes); + println!(); + drop(tree); + drop(probe_arena); + + // 3. Allocation profile + timing: warm the arena once, then reset and reuse + // it each iteration. + let session = Session::new(); + let mut arena = create_warmed_up_arena(dataset); + + let arena_op = session.operation("load-tree"); + let start = Instant::now(); + for _ in 0..ITERATIONS { + let _span = arena_op.measure_thread(); + let _ = loader::load(&arena, dataset); + arena.reset(); + } + let elapsed = start.elapsed(); + + println!("== timing ({ITERATIONS} iterations) =="); + println!("arena : {}ms ({}ms/tree)", elapsed.as_millis(), (elapsed / ITERATIONS).as_millis()); + println!(); + + println!("== allocation profile (per tree) =="); + session.print_to_stdout(); +} diff --git a/crates/multitude/examples/object_tree/object.rs b/crates/multitude/examples/object_tree/object.rs new file mode 100644 index 000000000..dbfd38b8c --- /dev/null +++ b/crates/multitude/examples/object_tree/object.rs @@ -0,0 +1,80 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Object layer: the typed `Value` model and its conversions. + +use crate::rc::{RcArray, RcBinary, RcStr, RcUtf16Str}; + +/// Arena object model. Leaves are reference-counted handles ([`RcStr`], +/// [`RcUtf16Str`], [`RcBinary`], [`RcArray`]); because every arena handle is +/// *thin* (8 bytes), even for DSTs, this `Value` is a fixed 16 bytes and can +/// outlive the arena. +pub(crate) enum Value { + Int(i64), + Str(RcStr), + Utf16(RcUtf16Str), + Binary(RcBinary), + Array(RcArray), +} + +impl From for Value { + fn from(value: i64) -> Self { + Self::Int(value) + } +} + +impl From for Value { + fn from(value: RcStr) -> Self { + Self::Str(value) + } +} + +impl From for Value { + fn from(value: RcBinary) -> Self { + Self::Binary(value) + } +} + +impl From> for Value { + fn from(value: RcArray) -> Self { + Self::Array(value) + } +} + +impl From for Value { + fn from(value: RcUtf16Str) -> Self { + Self::Utf16(value) + } +} + +/// Aggregate measurements of a materialized [`Value`] tree. +#[derive(Default)] +pub(crate) struct TreeStats { + /// Number of [`Value`] nodes in the tree (every array element). + pub objects: usize, + /// Logical bytes the tree owns: every array's backing storage + /// (`len * size_of::()`) plus the string and binary payloads. + pub bytes: usize, +} + +/// Walks `forest` and totals its node count and logical byte footprint. +#[must_use] +pub(crate) fn measure(forest: &[Value]) -> TreeStats { + let mut stats = TreeStats::default(); + measure_into(forest, &mut stats); + stats +} + +fn measure_into(values: &[Value], stats: &mut TreeStats) { + stats.objects += values.len(); + stats.bytes += size_of_val(values); + for value in values { + match value { + Value::Int(_) => {} + Value::Str(s) => stats.bytes += s.len(), + Value::Binary(b) => stats.bytes += b.len(), + Value::Array(children) => measure_into(children, stats), + Value::Utf16(s) => stats.bytes += s.len() * size_of::(), + } + } +} diff --git a/crates/multitude/examples/object_tree/rc.rs b/crates/multitude/examples/object_tree/rc.rs new file mode 100644 index 000000000..8fc5f93d6 --- /dev/null +++ b/crates/multitude/examples/object_tree/rc.rs @@ -0,0 +1,104 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Reference-counted leaf types, allocated in an [`Arena`]. +//! +//! Each is a thin 8-byte newtype over a `multitude::Arc` (8 bytes even for DST +//! payloads), can outlive the arena, and offers a `&Arena` constructor. + +use core::ops::Deref; + +use multitude::{Arc, Arena}; + +/// An immutable, reference-counted binary blob (`Arc<[u8]>`), 8 bytes. +pub(crate) struct RcBinary(Arc<[u8]>); + +impl RcBinary { + /// Copies `bytes` into `arena`. + #[must_use] + pub(crate) fn new(arena: &Arena, bytes: &[u8]) -> Self { + Self(arena.alloc_slice_copy_arc(bytes)) + } +} + +impl Deref for RcBinary { + type Target = [u8]; + + fn deref(&self) -> &[u8] { + &self.0 + } +} + +/// An immutable, reference-counted UTF-8 string (`Arc`), 8 bytes. +pub(crate) struct RcStr(Arc); + +impl RcStr { + /// Copies `s` into `arena`. + #[must_use] + pub(crate) fn new(arena: &Arena, s: &str) -> Self { + Self(arena.alloc_str_arc(s)) + } +} + +impl Deref for RcStr { + type Target = str; + + fn deref(&self) -> &str { + &self.0 + } +} + +/// An immutable, reference-counted UTF-16 string (`ArcUtf16Str`), 8 bytes. +/// +/// Like [`RcStr`] but transcoded to UTF-16 (handy at FFI / Windows boundaries); +/// still a thin 8-byte handle, with the `u16` length held in the chunk prefix. +pub(crate) struct RcUtf16Str(multitude::strings::ArcUtf16Str); + +impl RcUtf16Str { + /// Transcodes `s` to UTF-16 and copies it into `arena`. + #[must_use] + pub(crate) fn new(arena: &Arena, s: &str) -> Self { + Self(arena.alloc_utf16_str_arc_from_str(s)) + } + + /// Length in UTF-16 code units (`u16` elements). + #[must_use] + pub(crate) fn len(&self) -> usize { + self.0.len() + } + + /// True iff the string is empty. + #[must_use] + pub(crate) fn is_empty(&self) -> bool { + self.0.is_empty() + } +} + +/// An immutable, reference-counted slim array (`Arc<[T]>`), 8 bytes. +pub(crate) struct RcArray(Arc<[T]>); + +impl RcArray { + /// Materializes `items` into `arena` and freezes them. The iterator's exact + /// length sizes the allocation precisely, so the freeze never reallocates. + #[must_use] + pub(crate) fn new(arena: &Arena, items: I) -> Self + where + I: IntoIterator, + I::IntoIter: ExactSizeIterator, + { + let iter = items.into_iter(); + let mut vec = arena.alloc_vec_with_capacity(iter.len()); + for item in iter { + vec.push(item); + } + Self(vec.try_into_arc().expect("arena allocation cannot fail in this example")) + } +} + +impl Deref for RcArray { + type Target = [T]; + + fn deref(&self) -> &[T] { + &self.0 + } +}