diff --git a/.spelling b/.spelling index 96834b58c..27d7c4f13 100644 --- a/.spelling +++ b/.spelling @@ -608,3 +608,5 @@ u32 POV lossy unrounded +unpadded +unyielded diff --git a/Cargo.lock b/Cargo.lock index 19cc31de8..2ab85d258 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2370,7 +2370,7 @@ dependencies = [ [[package]] name = "multitude" -version = "0.3.1" +version = "0.3.2" dependencies = [ "allocator-api2 0.4.0", "bolero", diff --git a/Cargo.toml b/Cargo.toml index c27799b3d..f827117cb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,7 +45,7 @@ fundle_macros = { path = "crates/fundle_macros", default-features = false, versi fundle_macros_impl = { path = "crates/fundle_macros_impl", default-features = false, version = "0.3.3" } http_extensions = { path = "crates/http_extensions", default-features = false, version = "0.6.2" } layered = { path = "crates/layered", default-features = false, version = "0.3.4" } -multitude = { path = "crates/multitude", default-features = false, version = "0.3.1" } +multitude = { path = "crates/multitude", default-features = false, version = "0.3.2" } ohno = { path = "crates/ohno", default-features = false, version = "0.3.6" } ohno_macros = { path = "crates/ohno_macros", default-features = false, version = "0.3.4" } recoverable = { path = "crates/recoverable", default-features = false, version = "0.1.6" } diff --git a/crates/multitude/Cargo.toml b/crates/multitude/Cargo.toml index cf52a9d36..5ea59ff16 100644 --- a/crates/multitude/Cargo.toml +++ b/crates/multitude/Cargo.toml @@ -3,7 +3,7 @@ [package] name = "multitude" -version = "0.3.1" +version = "0.3.2" description = "Fast and flexible arena allocator." readme = "README.md" keywords = ["arena", "memory", "allocator", "bump"] @@ -91,6 +91,10 @@ harness = false name = "criterion_drop" harness = false +[[bench]] +name = "criterion_arc_array" +harness = false + # Callgrind benches require Linux (Valgrind). The bench files are gated to compile # to a no-op on non-Linux targets, but the [[bench]] entry itself cannot be # cfg-gated, so it is unconditional here. @@ -102,6 +106,10 @@ harness = false name = "gungraun_drop" harness = false +[[bench]] +name = "gungraun_arc_array" +harness = false + [[example]] name = "multitude_basic" diff --git a/crates/multitude/README.md b/crates/multitude/README.md index 0e511587e..a728bfa11 100644 --- a/crates/multitude/README.md +++ b/crates/multitude/README.md @@ -397,94 +397,94 @@ existing `_arc` slice methods). This crate was developed as part of The Oxidizer Project. Browse this crate's source code. - [__cargo_doc2readme_dependencies_info]: ggGmYW0CYXZlMC43LjJhdIQbLiTyV0MU86EbZU15e0PmecoboQ9jo59bnAEbyDXw04U13GlhYvRhcoQbBzV3ofWgqIgbt8brW1MeN_Mb9N6Ac8XJFEIbIYjmnKUrOjRhZIWCaGJ5dGVtdWNrZjEuMjUuMIJlYnl0ZXNmMS4xMS4xgmhieXRlc2J1ZmUwLjUuNYJpbXVsdGl0dWRlZTAuMy4xgmh6ZXJvY29weWYwLjguNTA + [__cargo_doc2readme_dependencies_info]: ggGmYW0CYXZlMC43LjJhdIQbLiTyV0MU86EbZU15e0PmecoboQ9jo59bnAEbyDXw04U13GlhYvRhcoQbBzV3ofWgqIgbt8brW1MeN_Mb9N6Ac8XJFEIbIYjmnKUrOjRhZIWCaGJ5dGVtdWNrZjEuMjUuMIJlYnl0ZXNmMS4xMS4xgmhieXRlc2J1ZmUwLjUuNYJpbXVsdGl0dWRlZTAuMy4ygmh6ZXJvY29weWYwLjguNTA [__link0]: https://crates.io/crates/bumpalo - [__link1]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc - [__link10]: https://docs.rs/multitude/0.3.1/multitude/?search=vec::Vec + [__link1]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc + [__link10]: https://docs.rs/multitude/0.3.2/multitude/?search=vec::Vec [__link11]: https://crates.io/crates/dst-factory - [__link12]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::format - [__link13]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::ArcUtf16Str - [__link14]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::BoxUtf16Str - [__link15]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::Utf16String - [__link16]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::format_utf16 - [__link17]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc - [__link18]: https://docs.rs/multitude/0.3.1/multitude/?search=Box - [__link19]: https://docs.rs/multitude/0.3.1/multitude/?search=Arena - [__link2]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc + [__link12]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::format + [__link13]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::ArcUtf16Str + [__link14]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::BoxUtf16Str + [__link15]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::Utf16String + [__link16]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::format_utf16 + [__link17]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc + [__link18]: https://docs.rs/multitude/0.3.2/multitude/?search=Box + [__link19]: https://docs.rs/multitude/0.3.2/multitude/?search=Arena + [__link2]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc [__link20]: https://doc.rust-lang.org/stable/std/marker/trait.Send.html - [__link21]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc - [__link22]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc - [__link23]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc - [__link24]: https://docs.rs/multitude/0.3.1/multitude/?search=Box + [__link21]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc + [__link22]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc + [__link23]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc + [__link24]: https://docs.rs/multitude/0.3.2/multitude/?search=Box [__link25]: https://doc.rust-lang.org/stable/alloc/?search=boxed::Box - [__link26]: https://docs.rs/multitude/0.3.1/multitude/?search=vec::Vec - [__link27]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::String + [__link26]: https://docs.rs/multitude/0.3.2/multitude/?search=vec::Vec + [__link27]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::String [__link28]: https://crates.io/crates/allocator-api2 - [__link29]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::String - [__link3]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc - [__link30]: https://docs.rs/multitude/0.3.1/multitude/?search=vec::Vec - [__link31]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::String::into_boxed_str - [__link32]: https://docs.rs/multitude/0.3.1/multitude/?search=Box - [__link33]: https://docs.rs/multitude/0.3.1/multitude/?search=Box - [__link34]: https://docs.rs/multitude/0.3.1/multitude/?search=vec::Vec::into_boxed_slice - [__link35]: https://docs.rs/multitude/0.3.1/multitude/?search=Box - [__link36]: https://docs.rs/multitude/0.3.1/multitude/?search=Box - [__link37]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc - [__link38]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc - [__link39]: https://docs.rs/multitude/0.3.1/multitude/?search=vec::Vec::leak - [__link4]: https://docs.rs/multitude/0.3.1/multitude/?search=Box + [__link29]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::String + [__link3]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc + [__link30]: https://docs.rs/multitude/0.3.2/multitude/?search=vec::Vec + [__link31]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::String::into_boxed_str + [__link32]: https://docs.rs/multitude/0.3.2/multitude/?search=Box + [__link33]: https://docs.rs/multitude/0.3.2/multitude/?search=Box + [__link34]: https://docs.rs/multitude/0.3.2/multitude/?search=vec::Vec::into_boxed_slice + [__link35]: https://docs.rs/multitude/0.3.2/multitude/?search=Box + [__link36]: https://docs.rs/multitude/0.3.2/multitude/?search=Box + [__link37]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc + [__link38]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc + [__link39]: https://docs.rs/multitude/0.3.2/multitude/?search=vec::Vec::leak + [__link4]: https://docs.rs/multitude/0.3.2/multitude/?search=Box [__link40]: https://github.com/microsoft/oxidizer/blob/main/crates/multitude/BUMPALO.md [__link41]: https://crates.io/crates/bumpalo - [__link42]: https://docs.rs/multitude/0.3.1/multitude/strings/index.html - [__link43]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc - [__link44]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::ArcUtf16Str - [__link45]: https://docs.rs/multitude/0.3.1/multitude/?search=Box - [__link46]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::BoxUtf16Str - [__link47]: https://docs.rs/multitude/0.3.1/multitude/?search=Arena - [__link48]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::String - [__link49]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::Utf16String - [__link5]: https://docs.rs/multitude/0.3.1/multitude/?search=Box - [__link50]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::format - [__link51]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::format_utf16 - [__link52]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::String - [__link53]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::String::into_boxed_str - [__link54]: https://docs.rs/multitude/0.3.1/multitude/?search=Box - [__link55]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::Utf16String - [__link56]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::Utf16String::into_boxed_utf16_str - [__link57]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::BoxUtf16Str - [__link58]: https://docs.rs/multitude/0.3.1/multitude/?search=Box - [__link59]: https://docs.rs/multitude/0.3.1/multitude/?search=Arena - [__link6]: https://docs.rs/multitude/0.3.1/multitude/?search=Box - [__link60]: https://docs.rs/multitude/0.3.1/multitude/?search=Arena::alloc_dst_arc - [__link61]: https://docs.rs/multitude/0.3.1/multitude/?search=Arena::alloc_dst_box + [__link42]: https://docs.rs/multitude/0.3.2/multitude/strings/index.html + [__link43]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc + [__link44]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::ArcUtf16Str + [__link45]: https://docs.rs/multitude/0.3.2/multitude/?search=Box + [__link46]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::BoxUtf16Str + [__link47]: https://docs.rs/multitude/0.3.2/multitude/?search=Arena + [__link48]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::String + [__link49]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::Utf16String + [__link5]: https://docs.rs/multitude/0.3.2/multitude/?search=Box + [__link50]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::format + [__link51]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::format_utf16 + [__link52]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::String + [__link53]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::String::into_boxed_str + [__link54]: https://docs.rs/multitude/0.3.2/multitude/?search=Box + [__link55]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::Utf16String + [__link56]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::Utf16String::into_boxed_utf16_str + [__link57]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::BoxUtf16Str + [__link58]: https://docs.rs/multitude/0.3.2/multitude/?search=Box + [__link59]: https://docs.rs/multitude/0.3.2/multitude/?search=Arena + [__link6]: https://docs.rs/multitude/0.3.2/multitude/?search=Box + [__link60]: https://docs.rs/multitude/0.3.2/multitude/?search=Arena::alloc_dst_arc + [__link61]: https://docs.rs/multitude/0.3.2/multitude/?search=Arena::alloc_dst_box [__link62]: https://doc.rust-lang.org/stable/core/?search=alloc::Layout [__link63]: https://crates.io/crates/dst-factory [__link64]: https://doc.rust-lang.org/stable/std/?search=io::Write - [__link65]: https://docs.rs/multitude/0.3.1/multitude/?search=vec::Vec - [__link66]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc - [__link67]: https://docs.rs/multitude/0.3.1/multitude/?search=Box - [__link68]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::String - [__link69]: https://docs.rs/multitude/0.3.1/multitude/?search=vec::Vec - [__link7]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc - [__link70]: https://docs.rs/multitude/0.3.1/multitude/?search=Arena::alloc_dst_arc - [__link71]: https://docs.rs/multitude/0.3.1/multitude/?search=Arena::alloc_dst_box - [__link72]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::ArcUtf16Str - [__link73]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::BoxUtf16Str - [__link74]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::Utf16String - [__link75]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::format_utf16 + [__link65]: https://docs.rs/multitude/0.3.2/multitude/?search=vec::Vec + [__link66]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc + [__link67]: https://docs.rs/multitude/0.3.2/multitude/?search=Box + [__link68]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::String + [__link69]: https://docs.rs/multitude/0.3.2/multitude/?search=vec::Vec + [__link7]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc + [__link70]: https://docs.rs/multitude/0.3.2/multitude/?search=Arena::alloc_dst_arc + [__link71]: https://docs.rs/multitude/0.3.2/multitude/?search=Arena::alloc_dst_box + [__link72]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::ArcUtf16Str + [__link73]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::BoxUtf16Str + [__link74]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::Utf16String + [__link75]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::format_utf16 [__link76]: https://crates.io/crates/widestring - [__link77]: https://docs.rs/multitude/0.3.1/multitude/?search=zerocopy::ZerocopyView + [__link77]: https://docs.rs/multitude/0.3.2/multitude/?search=zerocopy::ZerocopyView [__link78]: https://docs.rs/zerocopy/0.8.50/zerocopy/?search=FromZeros - [__link79]: https://docs.rs/multitude/0.3.1/multitude/?search=Arena::zerocopy - [__link8]: https://docs.rs/multitude/0.3.1/multitude/?search=Box - [__link80]: https://docs.rs/multitude/0.3.1/multitude/?search=bytemuck::BytemuckView + [__link79]: https://docs.rs/multitude/0.3.2/multitude/?search=Arena::zerocopy + [__link8]: https://docs.rs/multitude/0.3.2/multitude/?search=Box + [__link80]: https://docs.rs/multitude/0.3.2/multitude/?search=bytemuck::BytemuckView [__link81]: https://docs.rs/bytemuck/1.25.0/bytemuck/?search=Zeroable - [__link82]: https://docs.rs/multitude/0.3.1/multitude/?search=Arena::bytemuck + [__link82]: https://docs.rs/multitude/0.3.2/multitude/?search=Arena::bytemuck [__link83]: https://doc.rust-lang.org/stable/std/convert/trait.From.html - [__link84]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc - [__link85]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc + [__link84]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc + [__link85]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc [__link86]: https://docs.rs/bytes/1.11.1/bytes/?search=Bytes [__link87]: https://docs.rs/bytesbuf/0.5.5/bytesbuf/?search=mem::Memory - [__link88]: https://docs.rs/multitude/0.3.1/multitude/?search=Arena + [__link88]: https://docs.rs/multitude/0.3.2/multitude/?search=Arena [__link89]: https://docs.rs/bytesbuf/0.5.5/bytesbuf/?search=BytesBuf - [__link9]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::String + [__link9]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::String diff --git a/crates/multitude/benches/criterion_arc_array.rs b/crates/multitude/benches/criterion_arc_array.rs new file mode 100644 index 000000000..a24c4337b --- /dev/null +++ b/crates/multitude/benches/criterion_arc_array.rs @@ -0,0 +1,105 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Builds an `Arc<[Arc<[u8]>]>` of `PROPERTIES` binary blobs two ways and +//! compares them: `std::sync::Arc` (global allocator) vs `multitude::Arc` +#![allow(clippy::unwrap_used, reason = "benchmark code")] +#![allow(clippy::missing_panics_doc, reason = "benchmark code")] +#![allow(unused_results, reason = "benchmark code")] +#![allow(clippy::std_instead_of_core, reason = "benchmark code")] +#![allow(dead_code, reason = "array properties are held only to keep the allocation alive")] + +use std::hint::black_box; +use std::sync::Arc as StdArc; + +use criterion::{Criterion, criterion_group, criterion_main}; +use multitude::{Arc as ArenaArc, Arena}; + +// --------------------------------------------------------------------------- +// Array shape: `PROPERTIES` binary blobs of `PROPERTY_SIZE` bytes each. +// --------------------------------------------------------------------------- + +const PROPERTIES: usize = 8; +const PROPERTY_SIZE: usize = 16; + +// --------------------------------------------------------------------------- +// Global-allocator array +// --------------------------------------------------------------------------- + +fn build_global(payload: &[u8]) -> StdArc<[StdArc<[u8]>]> { + let mut properties = Vec::with_capacity(PROPERTIES); + for _ in 0..PROPERTIES { + properties.push(StdArc::<[u8]>::from(payload)); + } + StdArc::from(properties) +} + +fn build_global_from_slice(properties: &[StdArc<[u8]>]) -> StdArc<[StdArc<[u8]>]> { + StdArc::from(properties) +} + +// --------------------------------------------------------------------------- +// Arena-backed array +// --------------------------------------------------------------------------- + +fn build_arena(arena: &Arena, payload: &[u8]) -> ArenaArc<[ArenaArc<[u8]>]> { + let mut properties = arena.alloc_vec_with_capacity::>(PROPERTIES); + for _ in 0..PROPERTIES { + properties.push(arena.alloc_slice_copy_arc(payload)); + } + properties.try_into_arc().unwrap() +} + +fn build_arena_from_slice(arena: &Arena, properties: &[StdArc<[u8]>]) -> ArenaArc<[StdArc<[u8]>]> { + arena.alloc_slice_clone_arc(properties) +} + +fn global_properties(payload: &[u8]) -> Vec> { + (0..PROPERTIES).map(|_| StdArc::<[u8]>::from(payload)).collect() +} + +// --------------------------------------------------------------------------- +// Criterion timing + per-iteration allocation tracking +// --------------------------------------------------------------------------- + +fn bench_arc_array(c: &mut Criterion) { + let payload = vec![0xABu8; PROPERTY_SIZE]; + + let mut group = c.benchmark_group("arc_array"); + + group.bench_function("global", |b| { + b.iter(|| { + black_box(build_global(black_box(&payload))); + }); + }); + + let arena = Arena::new(); + black_box(build_arena(&arena, &payload)); + + group.bench_function("arena", |b| { + b.iter(|| { + black_box(build_arena(&arena, black_box(&payload))); + }); + }); + + let global_props = global_properties(&payload); + group.bench_function("global_from_slice", |b| { + b.iter(|| { + black_box(build_global_from_slice(black_box(&global_props))); + }); + }); + + let work_arena = Arena::new(); + black_box(build_arena_from_slice(&work_arena, &global_props)); + + group.bench_function("arena_from_slice", |b| { + b.iter(|| { + black_box(build_arena_from_slice(&work_arena, black_box(&global_props))); + }); + }); + + group.finish(); +} + +criterion_group!(benches, bench_arc_array); +criterion_main!(benches); diff --git a/crates/multitude/benches/gungraun_arc_array/linux.rs b/crates/multitude/benches/gungraun_arc_array/linux.rs new file mode 100644 index 000000000..39846f4ad --- /dev/null +++ b/crates/multitude/benches/gungraun_arc_array/linux.rs @@ -0,0 +1,172 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Instruction-precise `Arc<[Arc<[u8]>]>` build benchmarks for multitude. +//! +//! Mirrors `benches/criterion_arc_array.rs` 1:1: each gungraun function +//! `` corresponds to a criterion benchmark `arc_array/`. +//! Builds an `Arc<[Arc<[u8]>]>` of `PROPERTIES` binary blobs two ways and +//! compares them: `std::sync::Arc` (global allocator) vs `multitude::Arc` +//! (arena). Each is built with two strategies: +//! +//! - `*` — push freshly allocated properties through a growable vec, then +//! freeze it into the `Arc`. +//! - `*_from_slice` — build directly from a pre-created slice of properties, +//! with no intermediate vec. +//! +//! # Allocation hygiene +//! +//! Following the same toggle hygiene as `gungraun_alloc`: setup (the arena +//! warm-up, the payload, the pre-created property slice, and the pre-sized +//! output `Vec`) runs outside the callgrind toggle via `#[bench::run(...)]`. +//! The timed body only builds the structures and pushes the handles into the +//! pre-sized output `Vec`, which is returned by value so its `Drop` runs +//! outside the toggle. The only traffic counted is the build itself. + +#![allow(missing_docs, reason = "Benchmark")] +#![allow(unused_results, reason = "black_box of bench input is intentional")] +#![allow(clippy::unwrap_used, reason = "benchmark code")] +#![allow( + clippy::needless_pass_by_value, + reason = "gungraun bench inputs are passed by value by the framework" +)] +#![allow(clippy::type_complexity, reason = "benchmark state tuples are inherently complex")] +#![allow(clippy::too_many_lines, reason = "benchmark file")] + +use core::hint::black_box; +use std::sync::Arc as StdArc; + +use gungraun::{Callgrind, LibraryBenchmarkConfig, library_benchmark, library_benchmark_group, main}; +use multitude::{Arc as ArenaArc, Arena}; + +// Array shape: `PROPERTIES` binary blobs of `PROPERTY_SIZE` bytes each, built +// `N` times per bench so the per-build instruction count is stable. +const PROPERTIES: usize = 8; +const PROPERTY_SIZE: usize = 16; +const N: usize = 1_000; + +type GlobalArray = StdArc<[StdArc<[u8]>]>; +type ArenaArrayOfArena = ArenaArc<[ArenaArc<[u8]>]>; +type ArenaArrayOfGlobal = ArenaArc<[StdArc<[u8]>]>; + +// ===== shared builders (mirror criterion_arc_array.rs) ===== + +fn build_global(payload: &[u8]) -> GlobalArray { + let mut properties = Vec::with_capacity(PROPERTIES); + for _ in 0..PROPERTIES { + properties.push(StdArc::<[u8]>::from(payload)); + } + StdArc::from(properties) +} + +fn build_global_from_slice(properties: &[StdArc<[u8]>]) -> GlobalArray { + StdArc::from(properties) +} + +fn build_arena(arena: &Arena, payload: &[u8]) -> ArenaArrayOfArena { + let mut properties = arena.alloc_vec_with_capacity::>(PROPERTIES); + for _ in 0..PROPERTIES { + properties.push(arena.alloc_slice_copy_arc(payload)); + } + properties.try_into_arc().unwrap() +} + +fn build_arena_from_slice(arena: &Arena, properties: &[StdArc<[u8]>]) -> ArenaArrayOfGlobal { + arena.alloc_slice_clone_arc(properties) +} + +// ===== leaf setup helpers ===== + +fn payload() -> Vec { + vec![0xAB_u8; PROPERTY_SIZE] +} + +fn global_properties() -> Vec> { + let payload = payload(); + (0..PROPERTIES).map(|_| StdArc::<[u8]>::from(payload.as_slice())).collect() +} + +fn warm_arena() -> Arena { + // Warm: preallocate one chunk of the largest size class for each flavor + // AND prime the arena's current_local / current_shared mutators with a + // throwaway allocation, so the timed body never pays a cold `refill_*`. + // Mirrors `gungraun_alloc::warm_arena`. + let arena = Arena::builder() + .with_capacity_local(64 * 1024) + .with_capacity_shared(64 * 1024) + .build(); + let _: &mut u64 = arena.alloc(0_u64); + let _ = arena.alloc_arc(0_u64); + arena +} + +// ===== composite setups (pre-allocate the output Vec to N) ===== + +fn setup_global() -> (Vec, Vec) { + (payload(), Vec::with_capacity(N)) +} + +fn setup_arena() -> (Arena, Vec, Vec) { + (warm_arena(), payload(), Vec::with_capacity(N)) +} + +fn setup_global_from_slice() -> (Vec>, Vec) { + (global_properties(), Vec::with_capacity(N)) +} + +fn setup_arena_from_slice() -> (Arena, Vec>, Vec) { + (warm_arena(), global_properties(), Vec::with_capacity(N)) +} + +// ===== bench bodies — only the build is inside the toggle ===== + +#[library_benchmark] +#[bench::run(setup_global())] +fn global(state: (Vec, Vec)) -> (Vec, Vec) { + let (payload, mut out) = state; + for _ in 0..N { + out.push(black_box(build_global(black_box(&payload)))); + } + (payload, out) +} + +#[library_benchmark] +#[bench::run(setup_arena())] +fn arena(state: (Arena, Vec, Vec)) -> (Arena, Vec, Vec) { + let (arena, payload, mut out) = state; + for _ in 0..N { + out.push(black_box(build_arena(&arena, black_box(&payload)))); + } + (arena, payload, out) +} + +#[library_benchmark] +#[bench::run(setup_global_from_slice())] +fn global_from_slice(state: (Vec>, Vec)) -> (Vec>, Vec) { + let (properties, mut out) = state; + for _ in 0..N { + out.push(black_box(build_global_from_slice(black_box(&properties)))); + } + (properties, out) +} + +#[library_benchmark] +#[bench::run(setup_arena_from_slice())] +fn arena_from_slice(state: (Arena, Vec>, Vec)) -> (Arena, Vec>, Vec) { + let (arena, properties, mut out) = state; + for _ in 0..N { + out.push(black_box(build_arena_from_slice(&arena, black_box(&properties)))); + } + (arena, properties, out) +} + +library_benchmark_group!( + name = arc_array_group; + benchmarks = global, arena, global_from_slice, arena_from_slice +); + +main!( + config = LibraryBenchmarkConfig::default() + .tool(Callgrind::with_args(["--branch-sim=yes"])); + library_benchmark_groups = arc_array_group +); diff --git a/crates/multitude/benches/gungraun_arc_array/main.rs b/crates/multitude/benches/gungraun_arc_array/main.rs new file mode 100644 index 000000000..f28534120 --- /dev/null +++ b/crates/multitude/benches/gungraun_arc_array/main.rs @@ -0,0 +1,46 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Instruction-precise `Arc<[Arc<[u8]>]>` build benchmarks for multitude. +//! +//! Mirrors `benches/criterion_arc_array.rs` 1:1: each gungraun function +//! `` corresponds to a criterion benchmark `arc_array/`. +//! +//! Run with `cargo bench --bench gungraun_arc_array` on a Linux host with +//! Valgrind. + +#![allow(missing_docs, reason = "Benchmark")] +#![allow(unused_results, reason = "black_box of bench input is intentional")] +#![allow( + clippy::needless_pass_by_value, + reason = "gungraun bench inputs are passed by value by the framework" +)] +#![allow(clippy::type_complexity, reason = "benchmark state tuples are inherently complex")] +#![allow(clippy::too_many_lines, reason = "benchmark file")] +#![cfg_attr( + target_os = "linux", + expect( + clippy::exit, + clippy::missing_docs_in_private_items, + unused_qualifications, + reason = "Triggered by Gungraun macro expansion. Upstream tracking issues are pending." + ) +)] + +// Gungraun requires Valgrind, which is Linux-only. On other platforms this +// bench target compiles to a no-op so `cargo build --all-targets` still works. +#[cfg(not(target_os = "linux"))] +fn main() {} + +#[cfg(target_os = "linux")] +mod linux; + +#[cfg(target_os = "linux")] +use linux::*; + +#[cfg(target_os = "linux")] +gungraun::main!( + config = gungraun::LibraryBenchmarkConfig::default() + .tool(gungraun::Callgrind::with_args(["--branch-sim=yes"])); + library_benchmark_groups = arc_array_group +); diff --git a/crates/multitude/docs/BUMPALO.md b/crates/multitude/docs/BUMPALO.md index 1229dde9e..287413c36 100644 --- a/crates/multitude/docs/BUMPALO.md +++ b/crates/multitude/docs/BUMPALO.md @@ -12,7 +12,7 @@ spirit; here's how multitude differs. | Refcounted smart pointers | ❌ (raw `&'bump T`) | ✅ `Arc` (atomic; thread-safe sharing) | | Smart pointers outlive the arena | ❌ | ✅ (`Arc` / `Box` and their `str` variants — simple references are lifetime-bound) | | Cross-thread sharing of individual values | ❌ | ✅ via `Arc` | -| Automatic per-object `Drop` | Only via `bumpalo::boxed::Box` | ✅ Automatic (refcount smart pointers drop at chunk teardown; `Box` drops at smart pointer drop; simple references drop at arena drop) | +| Automatic per-object `Drop` | Only via `bumpalo::boxed::Box` | ✅ Automatic (`Arc` drops at last-clone drop, `Box` drops at smart-pointer drop, simple references drop at arena reset/drop) | | Owned single smart pointer (`Drop` on drop) | `bumpalo::boxed::Box` | `Box` | | Smart-pointer width | 16 bytes for fat DSTs (`&str`, `Bump-allocated boxed slice` are 2-word) | **8 bytes uniformly** — `Arc` / `Box` are thin even for DST `T` (slice / `str` / `dyn Trait` / custom `Pointee`); DST metadata is stored unaligned in a chunk prefix | | Single-pointer string smart pointers | ❌ (`&str` is 16 bytes) | ✅ `Arc` / `Box` / `ArcUtf16Str` / `BoxUtf16Str` are all 8 bytes (length stored unaligned in a `usize` prefix in the chunk; zero per-string padding) | diff --git a/crates/multitude/docs/DESIGN.md b/crates/multitude/docs/DESIGN.md index bdb688334..f58983365 100644 --- a/crates/multitude/docs/DESIGN.md +++ b/crates/multitude/docs/DESIGN.md @@ -72,13 +72,17 @@ the shared chunk's `AtomicUsize` refcount on every allocation would be a hot-path atomic. Instead, at install time the arena pre-credits the chunk's atomic `ref_count` with `LARGE_SHARED_REF_SURPLUS` (2^30) and tracks per-allocation handouts in the non-atomic `local_shared_count` -(`Cell`). At retire (refill / reset / arena drop) the surplus is -reconciled with a single +(`Cell`). At retire (`refill_shared` or `Arena::drop`) the surplus +is reconciled with a single `fetch_sub(LARGE_SHARED_REF_SURPLUS - local_shared_count)`, leaving the chunk's atomic count equal to the number of escaped handles. The 2^30 surplus is large enough that concurrent `Arc::drop` on other threads -cannot underflow it, while the `u32` counter leaves ~2^30 headroom -against `Arc::clone` overflow. +cannot underflow it. `Arc::clone` does not touch this count — +each `Arc` family takes exactly one chunk refcount at allocation and +releases it when its last clone drops (clones bump only the per-`Arc` +strong count; see *Per-`Arc` reference counting*). `Arena::reset` does +not reconcile or detach the installed shared chunk — it resets only +local-chunk state, so shared allocations continue on the same chunk. **Size-class ratchet.** Each successful refill bumps the matching `next_*_class` toward the largest cacheable class (`NUM_CHUNK_CLASSES @@ -155,7 +159,6 @@ pub(crate) struct SharedChunk { capacity: usize, ref_count: AtomicUsize, next: AtomicPtr, // intrusive cache-freelist link - drop_entry_count: AtomicU16, #[cfg(feature = "stats")] wasted_at_retire: AtomicU32, data: [UnsafeCell], @@ -209,7 +212,10 @@ is a **single 8-byte raw pointer** into the chunk's `data` tail. DST metadata (slice length, vtable) lives unaligned in the chunk prefix immediately preceding the value payload, read with `core::ptr::read_unaligned`. For `T: Sized` the metadata is `()` so -there's no prefix overhead. +there's no prefix overhead. `Arc` additionally stores its +per-`Arc` strong count (an `AtomicU32`) in the prefix, before the +metadata (see *Per-`Arc` reference counting*); `Box` has no such +prefix. To recover the owning chunk's header from a smart-pointer value, each smart-pointer type **masks the low bits to the 64 KiB boundary** @@ -237,23 +243,64 @@ Two consequences of the masking scheme: refill path if a ZST would otherwise land at the one-past-end boundary. +## Per-`Arc` reference counting + +Each `Arc` carries **its own** strong reference count — an +`AtomicU32` stored in the chunk payload immediately *before* the value +(and before the DST metadata, if any). The layout of an `Arc` value is: + +```text +[strong (AtomicU32, at reservation base)][pad][T::Metadata (unaligned)][T payload] + ^ value pointer +``` + +The reservation is aligned to `max(align_of::(), 4)` so the leading +strong slot is 4-byte aligned; the value pointer is `align_of::()` +aligned and the metadata sits immediately before it (recovered with +`read_unaligned`, exactly as for `Box`). The strong count is recovered +from the value pointer by subtracting a fixed prefix +(`thin_dst::strong_prefix_bytes_for`) and is accessed only as an +`AtomicU32` — never through a reference that spans the (possibly +uninitialized) payload, which keeps the scheme sound under Miri. + +The accounting works as follows: + +- **Allocation** writes `strong = 1` and takes **one** refcount on the + hosting chunk for the whole `Arc` family (via the pre-credited + surplus, as for any shared allocation). +- **`Arc::clone`** bumps only the per-`Arc` `strong` with a single + `Relaxed` increment — it does **not** touch the chunk refcount. +- **`Arc::drop`** does a `Release` decrement of `strong`; on the + `strong → 0` transition it runs an `Acquire` fence, drops the value + in place (`drop_in_place::`, which natively handles `?Sized`), + and releases the family's single chunk refcount (adopted *before* + the value drop, so a panicking destructor still releases the chunk). + +Because the value's destructor runs eagerly on the last `Arc` (rather +than being deferred to chunk teardown), nested arena `Arc`s — e.g. +`Arc<[Arc]>` whose inner and outer handles share a chunk — release +their storage promptly instead of forming a self-pinning cycle. + +`Arc::>::assume_init` is a pure reinterpret: `MaybeUninit` +and `T` share size, alignment, and metadata, so the strong-prefix layout +is identical and the strong count is untouched. + ## `DropEntry` -`DropEntry` records the deferred destructor work for values whose -`Drop` cannot be run by the smart pointer itself — i.e. arena -references (`&mut T` / `&mut [T]`, which have no `Drop` of their own) -and `Arc` (whose value must be dropped by whichever handle observes -the last refcount, a moment only the chunk can detect). **No `Box` -variant registers a drop entry**: `Box::drop` runs `drop_in_place` on -the (re-fattened) value pointer eagerly, which natively handles `?Sized` -`T`, so sized `Box`, slice `Box<[T]>`, and DST `Box` all -need no entry. - -Each such allocation reserves **both** `size_of::()` at the front -of the free region *and* one `DropEntry` slot at the back. The -effective remaining capacity is `drop_top - bump`; overflow is -detected when those two meet. Allocations of `T: !Drop` skip the -reservation entirely. +`DropEntry` records the deferred destructor work for **local arena +references only** — `Arena::alloc -> &mut T` and `&mut [T]`, which have +no `Drop` of their own and whose backing chunk runs the destructor at +teardown. **Neither `Box` nor `Arc` registers a drop entry, and shared +chunks never carry one**: `Box::drop` runs `drop_in_place` eagerly on +the (re-fattened) value pointer, and `Arc::drop` does the same on the +last strong reference (see *Per-`Arc` reference counting* above). Drop +entries therefore live exclusively on `LocalChunk`s. + +Each such reference allocation reserves **both** `size_of::()` at the +front of the free region *and* one `DropEntry` slot at the back. The +effective remaining capacity is `drop_top - bump`; overflow is detected +when those two meet. Allocations of `T: !Drop` skip the reservation +entirely. ```rust #[repr(C)] @@ -266,9 +313,11 @@ struct DropEntry { } ``` -`len` is a `u16`; slice/DST allocations whose `needs_drop` count +`len` is a `u16`; local slice references whose `needs_drop` count exceeds `u16::MAX` are rejected up front by their `alloc_*` orchestrator -so the placeholder never overflows. +so the placeholder never overflows. (The `Arc<[T]>` family has **no** +such cap, since it drops via `drop_in_place::<[T]>` rather than a +counted entry.) **Two-phase write.** Allocation paths reserve a *placeholder* (null `drop_fn`, real `value_offset`/`len`) up front. After the value is @@ -279,26 +328,27 @@ initialization closure panicked or whose `Uninit` ticket was dropped without `init`. Storing as `AtomicPtr<()>` (not `AtomicUsize`) preserves function-pointer provenance under Miri's strict provenance. -The commit is idempotent: concurrent `Arc::>::assume_init` -on cloned handles all install the same `T`-determined shim. - -**Replay.** When the chunk's last refcount drops, the chunk walks its -drop-entry stack **newest-first** (LIFO, matching Rust drop order) and -invokes `(drop_fn)(data + value_offset, len)` on each committed -entry. A panic in any shim is contained; replay continues so remaining -destructors still run. +**Replay.** When a `LocalChunk`'s refcount drops to zero (at +`Arena::reset` / `Arena::drop`), the chunk walks its drop-entry stack +**newest-first** (LIFO, matching Rust drop order) and invokes +`(drop_fn)(data + value_offset, len)` on each committed entry. Shared +chunks skip this step entirely. A panic in any shim is contained; +replay continues so remaining destructors still run. **Closure-panic safety.** The smart-pointer construction paths take a protective `ChunkRef` (`+1` guard) before invoking the user closure. On unwinding, the `ChunkRef`'s `Drop` releases the +1; on success the caller calls `ChunkRef::forget` to transfer the +1 into the freshly-constructed smart pointer. Combined with the two-phase -placeholder, a panicking closure leaves no `T::drop` queued on +placeholder (for local references) and eager `drop_in_place` (for +`Box`/`Arc`), a panicking closure leaves no `T::drop` queued on uninitialized memory and no refcount leaked. -**Refcount overflow.** Both `inc_ref` paths check against the -wraparound boundary and abort (`std::process::abort` or a forced -double-panic under `no_std`) if exceeded. The abort helper is -`#[cold] #[inline(never)]` so the hot-path call site stays small. -This mirrors `std::sync::Arc`: a wraparound would race live pointers -with a free, and the only sound response is to terminate. +**Refcount overflow.** Both the chunk `inc_ref` paths and `Arc::clone`'s +per-`Arc` `strong` increment check against the wraparound boundary and +abort (`std::process::abort` or a forced double-panic under `no_std`) if +exceeded. The abort helper is `#[cold] #[inline(never)]` so the hot-path +call site stays small. This mirrors `std::sync::Arc`: a wraparound would +race live pointers with a free, and the only sound response is to +terminate. + diff --git a/crates/multitude/docs/PERF.md b/crates/multitude/docs/PERF.md index 173e127e6..e41eb71d5 100644 --- a/crates/multitude/docs/PERF.md +++ b/crates/multitude/docs/PERF.md @@ -13,98 +13,98 @@ Bench names are aligned between criterion and gungraun via the `GROUPS` table in | Variant | Time (criterion) | Instructions | Branch misses | Mem accesses | |---|---:|---:|---:|---:| -| `multitude_new` | 38 ns | 316 | 9 | 457 | +| `multitude_new` | 37 ns | 316 | 8 | 457 | | `bumpalo_new` | 1 ns | 16 | 1 | 26 | ## `alloc_u64` | Variant | Time (criterion) | Instructions | Branch misses | Mem accesses | |---|---:|---:|---:|---:| -| `alloc` | 6.45 µs | 14,026 | 6 | 21,043 | -| `alloc_with` | 6.53 µs | 14,024 | 11 | 21,040 | -| `alloc_box` | 5.24 µs | 23,043 | 9 | 37,078 | -| `alloc_box_with` | 5.18 µs | 24,043 | 9 | 38,078 | -| `alloc_uninit_box` | 2.33 µs | 20,043 | 9 | 31,078 | -| `alloc_zeroed_box` | 4.85 µs | 21,043 | 9 | 33,078 | -| `alloc_arc` | 5.36 µs | 23,043 | 7 | 37,078 | -| `alloc_arc_with` | 5.19 µs | 24,043 | 9 | 38,078 | -| `alloc_uninit_arc` | 2.33 µs | 20,043 | 9 | 31,078 | -| `alloc_zeroed_arc` | 4.96 µs | 21,043 | 9 | 33,078 | -| `bumpalo_alloc` | 5.97 µs | 19,022 | 4 | 27,037 | -| `bumpalo_alloc_with` | 6.08 µs | 19,020 | 4 | 27,034 | +| `alloc` | 6.58 µs | 14,026 | 6 | 21,043 | +| `alloc_with` | 6.62 µs | 14,024 | 9 | 21,040 | +| `alloc_box` | 5.83 µs | 23,043 | 9 | 37,078 | +| `alloc_box_with` | 5.94 µs | 24,043 | 9 | 38,078 | +| `alloc_uninit_box` | 3.10 µs | 20,043 | 9 | 31,078 | +| `alloc_zeroed_box` | 5.58 µs | 21,043 | 9 | 33,078 | +| `alloc_arc` | 9.48 µs | 25,043 | 9 | 40,078 | +| `alloc_arc_with` | 9.78 µs | 26,043 | 8 | 41,078 | +| `alloc_uninit_arc` | 9.26 µs | 22,043 | 9 | 34,078 | +| `alloc_zeroed_arc` | 9.53 µs | 23,043 | 9 | 36,078 | +| `bumpalo_alloc` | 6.57 µs | 19,022 | 6 | 27,037 | +| `bumpalo_alloc_with` | 6.58 µs | 19,020 | 4 | 27,034 | ## `alloc_str` | Variant | Time (criterion) | Instructions | Branch misses | Mem accesses | |---|---:|---:|---:|---:| -| `alloc_str` | 8.24 µs | 51,053 | 10 | 76,098 | -| `alloc_str_box` | 11.83 µs | 59,053 | 11 | 85,098 | -| `alloc_str_arc` | 11.89 µs | 59,053 | 11 | 85,098 | -| `bumpalo_alloc_str` | 9.13 µs | 50,048 | 13 | 75,088 | +| `alloc_str` | 8.36 µs | 51,053 | 10 | 76,098 | +| `alloc_str_box` | 12.64 µs | 59,053 | 11 | 85,098 | +| `alloc_str_arc` | 14.00 µs | 58,054 | 11 | 84,099 | +| `bumpalo_alloc_str` | 9.56 µs | 50,048 | 13 | 75,088 | ## `alloc_slice` | Variant | Time (criterion) | Instructions | Branch misses | Mem accesses | |---|---:|---:|---:|---:| -| `alloc_slice_copy` | 22.82 µs | 41,049 | 4 | 57,090 | -| `alloc_slice_clone` | 22.50 µs | 45,050 | 10 | 58,091 | -| `alloc_slice_fill_with` | 24.07 µs | 38,026 | 11 | 68,043 | -| `alloc_slice_fill_iter` | 24.18 µs | 38,027 | 11 | 68,044 | -| `alloc_slice_copy_box` | 41.99 µs | 55,646 | 33 | 83,916 | -| `alloc_slice_clone_box` | 42.18 µs | 68,646 | 40 | 92,915 | -| `alloc_slice_fill_with_box` | 43.59 µs | 48,585 | 40 | 86,809 | -| `alloc_slice_fill_iter_box` | 43.84 µs | 50,585 | 39 | 90,809 | -| `alloc_uninit_slice_box` | 39.83 µs | 23,585 | 40 | 36,809 | -| `alloc_zeroed_slice_box` | 40.73 µs | 27,585 | 40 | 43,809 | -| `alloc_slice_copy_arc` | 42.49 µs | 53,647 | 34 | 80,917 | -| `alloc_slice_clone_arc` | 42.37 µs | 59,645 | 39 | 80,914 | -| `alloc_slice_fill_with_arc` | 44.47 µs | 46,585 | 41 | 82,809 | -| `alloc_slice_fill_iter_arc` | 43.77 µs | 47,585 | 40 | 84,809 | -| `alloc_uninit_slice_arc` | 40.01 µs | 22,585 | 40 | 34,809 | -| `alloc_zeroed_slice_arc` | 41.27 µs | 25,585 | 40 | 39,809 | -| `bumpalo_alloc_slice_copy` | 23.49 µs | 38,042 | 4 | 55,076 | -| `bumpalo_alloc_slice_clone` | 24.38 µs | 60,046 | 9 | 74,083 | -| `bumpalo_alloc_slice_fill_with` | 25.44 µs | 40,020 | 5 | 70,033 | -| `bumpalo_alloc_slice_fill_iter` | 25.43 µs | 40,020 | 5 | 70,033 | +| `alloc_slice_copy` | 33.81 µs | 41,049 | 3 | 57,090 | +| `alloc_slice_clone` | 33.49 µs | 45,050 | 10 | 58,091 | +| `alloc_slice_fill_with` | 35.52 µs | 38,026 | 10 | 68,043 | +| `alloc_slice_fill_iter` | 35.82 µs | 38,027 | 9 | 68,044 | +| `alloc_slice_copy_box` | 50.31 µs | 55,624 | 28 | 83,885 | +| `alloc_slice_clone_box` | 49.03 µs | 68,624 | 36 | 92,884 | +| `alloc_slice_fill_with_box` | 51.45 µs | 48,563 | 31 | 86,778 | +| `alloc_slice_fill_iter_box` | 52.38 µs | 50,563 | 34 | 90,778 | +| `alloc_uninit_slice_box` | 46.89 µs | 23,563 | 34 | 36,778 | +| `alloc_zeroed_slice_box` | 48.11 µs | 27,563 | 34 | 43,778 | +| `alloc_slice_copy_arc` | 54.53 µs | 55,625 | 28 | 83,886 | +| `alloc_slice_clone_arc` | 54.06 µs | 61,623 | 36 | 83,883 | +| `alloc_slice_fill_with_arc` | 56.68 µs | 47,563 | 33 | 84,778 | +| `alloc_slice_fill_iter_arc` | 55.84 µs | 48,563 | 32 | 86,778 | +| `alloc_uninit_slice_arc` | 51.35 µs | 23,563 | 34 | 36,778 | +| `alloc_zeroed_slice_arc` | 51.95 µs | 26,563 | 33 | 41,778 | +| `bumpalo_alloc_slice_copy` | 36.94 µs | 38,042 | 7 | 55,076 | +| `bumpalo_alloc_slice_clone` | 36.81 µs | 60,046 | 10 | 74,083 | +| `bumpalo_alloc_slice_fill_with` | 36.03 µs | 40,020 | 5 | 70,033 | +| `bumpalo_alloc_slice_fill_iter` | 37.52 µs | 40,020 | 5 | 70,033 | ## `string_builder` | Variant | Time (criterion) | Instructions | Branch misses | Mem accesses | |---|---:|---:|---:|---:| -| `alloc_string` | 8.05 µs | 36,836 | 32 | 51,184 | -| `alloc_string_with_capacity` | 7.64 µs | 37,194 | 21 | 52,304 | -| `bumpalo_string_new_in` | 9.20 µs | 35,843 | 76 | 50,867 | -| `bumpalo_string_with_capacity_in` | 10.62 µs | 34,708 | 28 | 49,159 | +| `alloc_string` | 8.23 µs | 36,849 | 28 | 51,203 | +| `alloc_string_with_capacity` | 8.09 µs | 37,210 | 20 | 52,325 | +| `bumpalo_string_new_in` | 12.16 µs | 35,843 | 74 | 50,867 | +| `bumpalo_string_with_capacity_in` | 11.79 µs | 34,708 | 30 | 49,159 | ## `vec_builder` | Variant | Time (criterion) | Instructions | Branch misses | Mem accesses | |---|---:|---:|---:|---:| -| `alloc_vec` | 1.25 µs | 11,765 | 31 | 17,053 | -| `alloc_vec_with_capacity` | 1.23 µs | 12,132 | 8 | 18,215 | -| `bumpalo_vec_new_in` | 3.72 µs | 12,281 | 61 | 18,888 | -| `bumpalo_vec_with_capacity_in` | 3.48 µs | 11,069 | 2 | 17,116 | +| `alloc_vec` | 1.29 µs | 11,792 | 30 | 17,087 | +| `alloc_vec_with_capacity` | 1.23 µs | 12,139 | 10 | 18,221 | +| `bumpalo_vec_new_in` | 3.89 µs | 12,281 | 61 | 18,888 | +| `bumpalo_vec_with_capacity_in` | 3.63 µs | 11,069 | 2 | 17,116 | ## `drop` | Variant | Time (criterion) | Instructions | Branch misses | Mem accesses | |---|---:|---:|---:|---:| -| `box_u64` | 8.42 µs | 10,309 | 55 | 13,904 | -| `rc_u64` | 8.18 µs | 10,309 | 55 | 13,904 | -| `arc_u64` | 8.36 µs | 10,309 | 55 | 13,904 | -| `box_droppy` | 22.06 µs | 186,161 | 77 | 272,621 | -| `rc_droppy` | 27.37 µs | 219,386 | 80 | 320,930 | -| `arc_droppy` | 27.25 µs | 219,386 | 80 | 320,930 | -| `str_box` | 7.59 µs | 10,309 | 55 | 13,904 | -| `str_rc` | 7.68 µs | 10,309 | 55 | 13,904 | -| `str_arc` | 7.71 µs | 10,309 | 55 | 13,904 | -| `slice_box_u64` | 13.96 µs | 10,819 | 58 | 14,639 | -| `slice_rc_u64` | 12.30 µs | 10,819 | 58 | 14,639 | -| `slice_arc_u64` | 12.59 µs | 10,819 | 58 | 14,639 | -| `slice_box_droppy` | 115.72 µs | 1,520,210 | 1,848 | 2,214,775 | -| `slice_rc_droppy` | 122.93 µs | 1,546,283 | 1,110 | 2,253,860 | -| `slice_arc_droppy` | 122.05 µs | 1,546,283 | 1,110 | 2,253,860 | -| `alloc` | 686 ns | 337 | 15 | 504 | +| `box_u64` | 8.05 µs | 10,660 | 68 | 14,433 | +| `rc_u64` | 12.85 µs | 13,005 | 64 | 18,929 | +| `arc_u64` | 12.53 µs | 13,005 | 64 | 18,929 | +| `box_droppy` | 15.19 µs | 186,501 | 86 | 273,127 | +| `rc_droppy` | 15.53 µs | 188,852 | 83 | 277,632 | +| `arc_droppy` | 20.20 µs | 188,852 | 83 | 277,632 | +| `str_box` | 7.50 µs | 10,660 | 68 | 14,433 | +| `str_rc` | 12.30 µs | 13,005 | 70 | 18,929 | +| `str_arc` | 12.13 µs | 13,005 | 70 | 18,929 | +| `slice_box_u64` | 14.68 µs | 11,395 | 68 | 15,498 | +| `slice_rc_u64` | 19.22 µs | 13,390 | 63 | 19,490 | +| `slice_arc_u64` | 19.27 µs | 13,390 | 63 | 19,490 | +| `slice_box_droppy` | 123.93 µs | 1,480,204 | 1,362 | 2,162,703 | +| `slice_rc_droppy` | 122.57 µs | 1,482,204 | 1,107 | 2,166,702 | +| `slice_arc_droppy` | 123.82 µs | 1,482,204 | 1,107 | 2,166,702 | +| `alloc` | 970 ns | 345 | 13 | 514 | ## Multitude vs Bumpalo Head-to-Head @@ -112,14 +112,14 @@ Direct comparisons of multitude versus bumpalo on identical workloads (the multi | Workload | Multitude time | Bumpalo time | Δ time | Multitude instr | Bumpalo instr | Δ instr | |---|---:|---:|---:|---:|---:|---:| -| `alloc` vs `bumpalo_alloc` | 6.45 µs | 5.97 µs | +8.1% | 14,026 | 19,022 | -26.3% | -| `alloc_str` vs `bumpalo_alloc_str` | 8.24 µs | 9.13 µs | -9.7% | 51,053 | 50,048 | +2.0% | -| `alloc_slice_copy` vs `bumpalo_alloc_slice_copy` | 22.82 µs | 23.49 µs | -2.9% | 41,049 | 38,042 | +7.9% | -| `alloc_slice_clone` vs `bumpalo_alloc_slice_clone` | 22.50 µs | 24.38 µs | -7.7% | 45,050 | 60,046 | -25.0% | -| `alloc_slice_fill_with` vs `bumpalo_alloc_slice_fill_with` | 24.07 µs | 25.44 µs | -5.4% | 38,026 | 40,020 | -5.0% | -| `alloc_slice_fill_iter` vs `bumpalo_alloc_slice_fill_iter` | 24.18 µs | 25.43 µs | -4.9% | 38,027 | 40,020 | -5.0% | -| `alloc_string` vs `bumpalo_string_new_in` | 8.05 µs | 9.20 µs | -12.5% | 36,836 | 35,843 | +2.8% | -| `alloc_string_with_capacity` vs `bumpalo_string_with_capacity_in` | 7.64 µs | 10.62 µs | -28.0% | 37,194 | 34,708 | +7.2% | -| `alloc_vec` vs `bumpalo_vec_new_in` | 1.25 µs | 3.72 µs | -66.3% | 11,765 | 12,281 | -4.2% | -| `alloc_vec_with_capacity` vs `bumpalo_vec_with_capacity_in` | 1.23 µs | 3.48 µs | -64.6% | 12,132 | 11,069 | +9.6% | +| `alloc` vs `bumpalo_alloc` | 6.58 µs | 6.57 µs | +0.2% | 14,026 | 19,022 | -26.3% | +| `alloc_str` vs `bumpalo_alloc_str` | 8.36 µs | 9.56 µs | -12.5% | 51,053 | 50,048 | +2.0% | +| `alloc_slice_copy` vs `bumpalo_alloc_slice_copy` | 33.81 µs | 36.94 µs | -8.5% | 41,049 | 38,042 | +7.9% | +| `alloc_slice_clone` vs `bumpalo_alloc_slice_clone` | 33.49 µs | 36.81 µs | -9.0% | 45,050 | 60,046 | -25.0% | +| `alloc_slice_fill_with` vs `bumpalo_alloc_slice_fill_with` | 35.52 µs | 36.03 µs | -1.4% | 38,026 | 40,020 | -5.0% | +| `alloc_slice_fill_iter` vs `bumpalo_alloc_slice_fill_iter` | 35.82 µs | 37.52 µs | -4.5% | 38,027 | 40,020 | -5.0% | +| `alloc_string` vs `bumpalo_string_new_in` | 8.23 µs | 12.16 µs | -32.3% | 36,849 | 35,843 | +2.8% | +| `alloc_string_with_capacity` vs `bumpalo_string_with_capacity_in` | 8.09 µs | 11.79 µs | -31.3% | 37,210 | 34,708 | +7.2% | +| `alloc_vec` vs `bumpalo_vec_new_in` | 1.29 µs | 3.89 µs | -66.8% | 11,792 | 12,281 | -4.0% | +| `alloc_vec_with_capacity` vs `bumpalo_vec_with_capacity_in` | 1.23 µs | 3.63 µs | -66.1% | 12,139 | 11,069 | +9.7% | diff --git a/crates/multitude/src/allocator_impl.rs b/crates/multitude/src/allocator_impl.rs index 4c7f2c67c..daef0e31e 100644 --- a/crates/multitude/src/allocator_impl.rs +++ b/crates/multitude/src/allocator_impl.rs @@ -56,7 +56,7 @@ unsafe impl Allocator for &Arena { let _ = chunk_ref.forget(); return Ok(NonNull::slice_from_raw_parts(ptr, layout.size())); } - if self.is_oversized_shared(refill_hint) { + if self.is_oversized(refill_hint) { return self.alloc_oversized_shared_with(refill_hint, |mutator, chunk_ptr| { let (slot, _chunk) = mutator .try_alloc_with_chunk(layout.size(), layout.align()) diff --git a/crates/multitude/src/arc.rs b/crates/multitude/src/arc.rs index 37a51e2cb..afed3b0b9 100644 --- a/crates/multitude/src/arc.rs +++ b/crates/multitude/src/arc.rs @@ -10,35 +10,43 @@ use core::marker::PhantomData; use core::mem::{self, MaybeUninit}; use core::pin::Pin; use core::ptr::{self, NonNull}; +use core::sync::atomic::{Ordering, fence}; use allocator_api2::alloc::{Allocator, Global}; use ptr_meta::Pointee; -use crate::internal::chunk::Chunk; use crate::internal::chunk_ref::ChunkRef; -use crate::internal::drop_entry::{self, DropFn}; -use crate::internal::shared_chunk::SharedChunk; +use crate::internal::constants::refcount_overflow_abort; use crate::internal::thin_dst; use crate::thin_smart_ptr_common::impl_thin_smart_ptr_common; use crate::vec::Vec; +/// Strong-count saturation threshold. Cloning past this aborts the +/// process, mirroring `std::sync::Arc`'s `MAX_REFCOUNT` guard (using +/// the `u32` strong counter's half-range instead of `isize::MAX`). +const MAX_STRONG_REFCOUNT: u32 = u32::MAX >> 1; + /// A thread-safe reference-counted smart pointer to a `T` stored in an [`Arena`](crate::Arena). /// /// Safe to share across threads when `T: Send + Sync`. /// /// Created via [`Arena::alloc_arc`](crate::Arena::alloc_arc). Cloning is -/// **O(1)** and uses a single Relaxed atomic increment (matching -/// `std::sync::Arc`). Dropping a clone is one Release decrement plus, -/// on the final dec to zero, an Acquire fence before chunk teardown. +/// **O(1)** and uses a single Relaxed atomic increment of the `Arc`'s +/// own strong count (matching `std::sync::Arc`). Dropping a clone is one +/// Release decrement plus, on the final dec to zero, an Acquire fence, +/// the value's destructor (`T::drop`), and the release of the chunk +/// reference. /// -/// `Arc` keeps its containing chunk alive by holding a +1 refcount on -/// it, so the smart pointer can outlive the arena it came from and -/// survives [`Arena::reset`](crate::Arena::reset). For `T: Drop`, a -/// drop entry is registered at allocation time and `T::drop` runs at -/// chunk teardown (when the chunk's last reference is released); for -/// `T: !Drop` (the common case for strings, numbers, slices, etc.), -/// no drop entry is reserved and the only per-allocation cost beyond -/// the value itself is the chunk's atomic refcount. +/// Each `Arc` carries its own strong reference count — an +/// [`AtomicU32`](core::sync::atomic::AtomicU32) stored in the chunk's +/// payload immediately before the value. The allocation also holds +/// **one** refcount on its containing chunk for the whole `Arc` family +/// (all clones share it); that chunk reference is released only when the +/// last `Arc` drops. This keeps the value alive across +/// [`Arena::reset`](crate::Arena::reset) and lets the `Arc` outlive the +/// arena, while running `T::drop` eagerly on the last drop — so nested +/// `Arc`s (e.g. `Arc<[Arc]>`) release their storage promptly instead +/// of deferring to chunk teardown. /// /// # Pinning /// @@ -86,13 +94,17 @@ impl Arc { /// /// - `thin` must reference the payload of a fully-initialized `T` /// whose storage was bump-allocated from a [`SharedChunk`] via - /// the thin-DST allocator path. For DST `T` the chunk prefix - /// must carry the matching `T::Metadata`. For `T: Drop`, a drop - /// entry must already be registered so the destructor runs at - /// chunk teardown. + /// the strong-prefixed `Arc` allocator path: a per-`Arc` + /// [`AtomicU32`](core::sync::atomic::AtomicU32) strong count must + /// already be initialized in the chunk prefix (see + /// [`thin_dst::strong_ref`](crate::internal::thin_dst::strong_ref)), + /// and for DST `T` the prefix must also carry the matching + /// `T::Metadata`. /// - The caller must have just acquired a +1 refcount on that chunk - /// in the new `Arc`'s name; the returned `Arc` takes ownership of - /// that +1 and releases it in [`Drop`]. + /// for the new `Arc` family, and the strong count must account for + /// this handle; the returned `Arc` owns that strong reference and + /// releases the chunk +1 (plus runs `T::drop`) when the strong + /// count reaches zero. /// - `thin` must lie within the first `CHUNK_ALIGN` bytes of the /// chunk so the header-from-mask helper recovers the chunk /// address correctly. @@ -134,33 +146,17 @@ impl Arc, A> { /// The `MaybeUninit` must contain a fully-initialized, valid /// `T`. The allocation must come from /// [`Arena::alloc_uninit_arc`](crate::Arena::alloc_uninit_arc) or - /// [`Arena::alloc_zeroed_arc`](crate::Arena::alloc_zeroed_arc) so a - /// drop entry was reserved up front; - /// `Arena::alloc_arc(MaybeUninit::new(...))` does not reserve one - /// and panics here for `T: Drop`. - /// - /// # Panics - /// - /// Panics for `T: Drop` when no drop entry is found in the chunk - /// — see the safety contract above. + /// [`Arena::alloc_zeroed_arc`](crate::Arena::alloc_zeroed_arc). #[inline] #[must_use] pub unsafe fn assume_init(self) -> Arc { - if const { mem::needs_drop::() } { - // SAFETY: `self.ptr` references a live value inside a - // `SharedChunk` this `Arc` holds a +1 on; `alloc_uninit_arc` - // reserved a placeholder drop entry for it. Commit the real shim - // so `T::drop` runs at chunk teardown. - unsafe { - commit_uninit_drop_entry::(self.ptr, 1, drop_entry::drop_shim::, false); - } - } let thin = self.ptr; mem::forget(self); - // SAFETY: `thin` carries the +1 the consumed handle held; the value is - // now a valid `T` per the caller's contract. `Arc>` and - // `Arc` for sized `T` share the same chunk layout (no metadata - // prefix), so no prefix rewrite is needed. + // SAFETY: `thin` carries the strong-count prefix and the live + // reference the consumed handle held; the value is now a valid + // `T` per the caller's contract. `MaybeUninit` and `T` share + // size, alignment, and (empty) metadata, so the strong-prefix + // chunk layout is identical and no rewrite is needed. unsafe { Arc::from_raw(thin) } } @@ -198,33 +194,17 @@ impl Arc<[MaybeUninit], A> { /// [`Arena::alloc_uninit_slice_arc`](crate::Arena::alloc_uninit_slice_arc) /// or /// [`Arena::alloc_zeroed_slice_arc`](crate::Arena::alloc_zeroed_slice_arc). - /// - /// # Panics - /// - /// Panics for `T: Drop` when no drop entry is found in the chunk. #[inline] #[must_use] pub unsafe fn assume_init(self) -> Arc<[T], A> { - // SAFETY: `Arc<[MaybeUninit]>` and `Arc<[T]>` share an - // identical chunk prefix layout (the slice length, written as - // `usize` by the allocator); read the length from the prefix - // directly rather than relying on the (now-thin) `self.ptr`. - let len: usize = unsafe { thin_dst::read_metadata::<[T]>(self.ptr) }; - if const { mem::needs_drop::() } { - // SAFETY: see the scalar `assume_init`; the placeholder slice - // drop entry reserved by `alloc_uninit_slice_arc` is committed to - // `drop_shim::` so all `len` elements drop at chunk teardown. - unsafe { - commit_uninit_drop_entry::(self.ptr, len, drop_entry::drop_shim::, true); - } - } let thin = self.ptr; mem::forget(self); - // SAFETY: `thin` carries the +1 the consumed handle held; every - // element is now a valid `T` per the caller's contract. - // `Arc<[MaybeUninit]>` and `Arc<[T]>` share the same chunk - // prefix layout, so the length already stored there matches the - // new fat pointer's metadata. + // SAFETY: `thin` carries the strong-count prefix and the live + // reference the consumed handle held; every element is now a + // valid `T`. `[MaybeUninit]` and `[T]` share an identical + // chunk prefix layout (the slice length, stored as `usize`), so + // the metadata already in the prefix matches the new fat + // pointer. unsafe { Arc::from_raw(thin) } } @@ -249,67 +229,29 @@ impl Arc<[MaybeUninit], A> { } } -/// Locates the placeholder [`DropEntry`](crate::internal::drop_entry) that -/// `Arena::alloc_uninit_arc` / `alloc_uninit_slice_arc` reserved for the -/// value at `value` and commits `drop_fn` into it, so the value's destructor -/// runs when the hosting chunk is torn down. -/// -/// `len` is `1` for a scalar value or the element count for a slice. -/// `is_slice` only selects the panic message. -/// -/// # Safety -/// -/// - `value` must point at a value reserved via the uninit-`Arc` path, living -/// in the first `CHUNK_ALIGN` bytes of a live `SharedChunk` on which the -/// caller holds a strong reference. -/// - `assume_init` must be called at most once per allocation (the placeholder -/// commit is a non-atomic write; concurrent commits on cloned handles are -/// not supported). -#[inline] -unsafe fn commit_uninit_drop_entry(value: NonNull, len: usize, drop_fn: DropFn, is_slice: bool) { - let header = SharedChunk::::header_from_value_ptr(value); - // SAFETY: `header` has full chunk provenance via `with_addr`; - // reconstruct the fat DST pointer for typed field access. - let chunk = unsafe { NonNull::new_unchecked(SharedChunk::::header_to_fat(header.as_ptr())) }; - // SAFETY: `chunk` is a live `SharedChunk` (caller holds a +1). - let chunk_ref = unsafe { chunk.as_ref() }; - // SAFETY: `chunk` is live; `payload_ptr` returns its payload start. - let payload = unsafe { SharedChunk::::payload_ptr(chunk) }.as_ptr(); - let payload_len = chunk_ref.capacity(); - let value_offset = (value.as_ptr() as usize) - (payload as usize); - // Acquire pairs with the owner thread's Release publish of the count in - // `ChunkMutator::publish_drop_count`, so the placeholder slot's bytes are - // visible to this (possibly different) thread before we read/commit it. - let count = chunk_ref.drop_entry_count_acquire(); - // SAFETY: `payload`, `payload_len`, and `count` describe the live chunk's - // drop region; we hold a +1 and the contract forbids concurrent commits. - let committed = unsafe { drop_entry::commit_placeholder_drop_fn(payload, payload_len, count, value_offset, len, drop_fn) }; - assert!( - committed, - "{}", - if is_slice { - "Arc::<[MaybeUninit]>::assume_init: no drop entry reserved for this allocation. \ - Use `Arena::alloc_uninit_slice_arc::()` / `alloc_zeroed_slice_arc`; allocating \ - a `MaybeUninit` slice via the ordinary slice-Arc helpers does not reserve one \ - and would silently leak each `T::drop`." - } else { - "Arc::>::assume_init: no drop entry reserved for this allocation. \ - Use `Arena::alloc_uninit_arc::()` / `alloc_zeroed_arc`; \ - `Arena::alloc_arc(MaybeUninit::new(...))` does not reserve an entry and would \ - silently leak `T::drop`." - } - ); +/// Saturation guard for [`Arc::clone`]: aborts the process when the +/// strong count would overflow, mirroring `std::sync::Arc`. +#[cfg_attr(coverage_nightly, coverage(off))] +#[inline(never)] +#[cold] +fn strong_overflow_abort() -> ! { + refcount_overflow_abort() } impl Clone for Arc { #[inline] fn clone(&self) -> Self { - // SAFETY: `self` owns a live +1 on its chunk so the chunk is - // alive; `clone_from_value_ptr` mints a fresh +1 via an - // atomic bump and returns a `ChunkRef` that owns it. We - // `forget` that `ChunkRef`, handing the +1 to the new `Arc`. - let chunk_ref = unsafe { ChunkRef::::clone_from_value_ptr(self.ptr) }; - let _ = chunk_ref.forget(); + let value_align = mem::align_of_val::(&**self); + // SAFETY: `self` keeps the value (and its strong-count prefix) + // alive, so the strong slot is live, aligned, and within the + // chunk's provenance. + let strong = unsafe { thin_dst::strong_ref::(self.ptr, value_align) }; + // Relaxed suffices (as `std::sync::Arc`): the new handle need not + // synchronize until it is dropped. + let prev = strong.fetch_add(1, Ordering::Relaxed); + if prev > MAX_STRONG_REFCOUNT { + strong_overflow_abort(); + } Self { ptr: self.ptr, _phantom: PhantomData, @@ -320,15 +262,30 @@ impl Clone for Arc { impl Drop for Arc { #[inline] fn drop(&mut self) { - // SAFETY: `ptr` is hosted in a 64K-aligned SharedChunk we - // hold a +1 strong reference on. `ChunkRef::from_value_ptr` - // adopts that +1 and releases it on its own drop. We do not - // invoke `T::drop` here — for `T: Drop`, a drop entry was - // registered at allocation time so the chunk's teardown runs - // `T::drop` when the last reference releases the chunk; for - // `T: !Drop` no destructor is needed. + let value_align = mem::align_of_val::(&**self); + // SAFETY: the value (and its strong-count prefix) is still live + // while this handle exists; the strong slot is aligned and + // within chunk provenance. + let strong = unsafe { thin_dst::strong_ref::(self.ptr, value_align) }; + // Release so prior accesses happen-before teardown (as `std::sync::Arc`). + if strong.fetch_sub(1, Ordering::Release) != 1 { + return; + } + // Last strong reference: Acquire-fence so other handles' writes are + // visible before we drop the value and release the chunk. + fence(Ordering::Acquire); + // Adopt the chunk's +1 *before* `T::drop` so a panicking destructor + // still releases the chunk via `ChunkRef`'s `Drop` (the in-chunk slot + // leaks, per the `alloc_arc*` panic semantics). + // + // SAFETY: `ptr` is hosted in a 64K-aligned `SharedChunk` that + // holds exactly one outstanding +1 for this whole allocation; + // `from_value_ptr` adopts it. The value is a valid `T` and is + // dropped exactly once (only on the strong → 0 transition). unsafe { - let _ref: ChunkRef = ChunkRef::from_value_ptr(self.ptr); + let _chunk: ChunkRef = ChunkRef::from_value_ptr(self.ptr); + let fat = self.as_fat_ptr(); + ptr::drop_in_place(fat.as_ptr()); } } } @@ -351,3 +308,72 @@ where v.freeze_into_arc() } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::Arena; + + // Pins the saturation threshold to the `u32` half-range, killing the + // mutant that swaps `>>` for `<<` in the constant (which would yield + // `0xFFFF_FFFE`). Behavioral tests cannot reach this — the boundary + // sits ~2 billion clones away — so assert the value directly. + #[test] + fn max_strong_refcount_is_u32_half_range() { + assert_eq!(MAX_STRONG_REFCOUNT, u32::MAX >> 1); + assert_eq!(MAX_STRONG_REFCOUNT, 0x7FFF_FFFF); + } + + // `Arc::clone` checks `prev > MAX_STRONG_REFCOUNT` on the value + // returned by `fetch_add` (the count *before* the increment), so a + // clone observing `prev == MAX_STRONG_REFCOUNT` must NOT abort. + // Driving the strong count to exactly the threshold and cloning kills + // the `>` -> `==` and `>` -> `>=` mutants on that comparison: both + // would abort the process here. + #[test] + fn clone_at_max_refcount_threshold_does_not_abort() { + let arena = Arena::new(); + let arc = arena.alloc_arc(0xABCD_u32); + // SAFETY: `arc` keeps the value and its strong-count prefix live, + // so the strong slot is aligned and within chunk provenance. + let strong = unsafe { thin_dst::strong_ref::(arc.thin_ptr(), mem::align_of::()) }; + // Force the next clone to observe `prev == MAX_STRONG_REFCOUNT`. + strong.store(MAX_STRONG_REFCOUNT, Ordering::Relaxed); + #[expect( + clippy::redundant_clone, + reason = "exercising Arc::clone's overflow guard at the threshold is the point of the test" + )] + let clone = arc.clone(); + assert_eq!(*clone, 0xABCD); + // Restore the true live-handle count (`arc` + `clone`) so the two + // drops tear the value and chunk down correctly instead of + // leaking the strong count above 1 forever. + strong.store(2, Ordering::Relaxed); + } + + // A clone observing `prev > MAX_STRONG_REFCOUNT` MUST abort. Driving + // the strong count one past the threshold reaches the + // `strong_overflow_abort()` call site in `Arc::clone` (which panics + // instead of aborting under `cfg(test)`), covering that guard and + // killing the `>` -> `==` mutant (which would not fire here). + #[test] + #[should_panic(expected = "refcount overflow")] + fn clone_above_max_refcount_threshold_aborts() { + let arena = Arena::new(); + let arc = arena.alloc_arc(0xABCD_u32); + // SAFETY: `arc` keeps the value and its strong-count prefix live, + // so the strong slot is aligned and within chunk provenance. + let strong = unsafe { thin_dst::strong_ref::(arc.thin_ptr(), mem::align_of::()) }; + strong.store(MAX_STRONG_REFCOUNT + 1, Ordering::Relaxed); + // The clone panics in its overflow guard before returning, so no + // clone is produced (but `fetch_add` already bumped the count). + // Catch it, restore the real live-handle count (just `arc`) so + // teardown releases the chunk instead of leaking (keeps Miri + // happy), then resume so `should_panic` observes the panic. + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + let _c = arc.clone(); + })); + strong.store(1, Ordering::Relaxed); + std::panic::resume_unwind(result.expect_err("clone past the threshold must panic")); + } +} diff --git a/crates/multitude/src/arena/alloc_prefixed.rs b/crates/multitude/src/arena/alloc_prefixed.rs index ee29c271f..1c2bee081 100644 --- a/crates/multitude/src/arena/alloc_prefixed.rs +++ b/crates/multitude/src/arena/alloc_prefixed.rs @@ -83,6 +83,15 @@ impl Arena { // recovery invariant used by the smart pointers' `Drop`. let payload_bytes = len.checked_mul(elem_size).ok_or(AllocError)?.max(elem_align); let total = PREFIX_BYTES.checked_add(payload_bytes).ok_or(AllocError)?; + // `total` is an exact reservation size, not a worst-case hint: unlike + // the slice paths (which permit over-aligned `T` and so add `elem_align` + // of front-padding slack to their routing hint), the const-assert above + // bounds `elem_align <= align_of::() <= value_align`. A fresh + // chunk's payload base is `value_align`-aligned, so an `elem_align` + // reservation on a freshly refilled chunk never consumes front padding. + // Routing/refilling with `total` therefore always yields a chunk into + // which the retry's `try_alloc_with_chunk(total, elem_align)` fits — no + // `total` vs `total + elem_align` boundary loop is possible here. loop { // Allocate `total` bytes aligned to `align_of::()` so the // payload (at offset PREFIX_BYTES, a multiple of any align @@ -94,7 +103,7 @@ impl Arena { let _ = chunk_ref.forget(); return Ok(payload); } - if self.is_oversized_shared(total) { + if self.is_oversized(total) { return self.alloc_oversized_shared_with(total, |mutator, chunk_ptr| { let (base, _chunk_unused) = mutator .try_alloc_with_chunk(total, elem_align) @@ -110,6 +119,69 @@ impl Arena { } } +impl Arena { + /// Strong-prefixed [`Arc`](crate::Arc) variant of + /// [`Self::impl_alloc_prefixed_shared`]: reserves a per-`Arc` + /// [`AtomicU32`](core::sync::atomic::AtomicU32) strong count and a + /// `usize` length metadata word immediately before the payload, + /// initializes the strong count to `1`, writes the length and the + /// payload, takes one chunk refcount for the new `Arc` family, and + /// returns a thin `NonNull` to the first payload element. + /// + /// `T` must have `align_of::() <= align_of::()`; see + /// module docs. + #[inline(always)] + pub(crate) fn impl_alloc_prefixed_shared_arc(&self, src: &[T]) -> Result, AllocError> { + const { + assert!( + mem::align_of::() <= mem::align_of::(), + "impl_alloc_prefixed_shared_arc: T's align must not exceed usize's align", + ); + } + let len = src.len(); + // `src` is a live `&[T]`, so `size_of_val(src)` is a valid usize. + let payload_bytes = mem::size_of_val(src); + let bytes_needed = worst_case_arc_slice_payload::(len); + loop { + // SAFETY: `payload_bytes == size_of_val(src) == size_of::() * len`. + let reserved = unsafe { self.try_reserve_arc_slice_with_size::(len, payload_bytes) }; + if let Some((uninit, chunk_ptr)) = reserved { + let chunk_ref: ChunkRef = self.acquire_current_shared_chunk_ref(chunk_ptr); + let slice_ptr = uninit.init_copy_from_slice_ptr(src); + let _ = chunk_ref.forget(); + return Ok(slice_ptr.cast::()); + } + if self.is_oversized(bytes_needed) { + return self.alloc_oversized_shared_with(bytes_needed, |mutator, chunk_ptr| { + let (ticket, _chunk) = mutator + .try_alloc_arc_slice::(len) + .expect("dedicated oversized chunk sized to fit prefixed Arc payload"); + let chunk_ref: ChunkRef = acquire_shared_chunk_ref::(chunk_ptr); + let slice_ptr = ticket.init_copy_from_slice_ptr(src); + let _ = chunk_ref.forget(); + slice_ptr.cast::() + }); + } + self.refill_shared(bytes_needed)?; + } + } +} + +/// Worst-case byte budget for a strong-prefixed `Arc` slice/prefixed +/// payload of `len` elements: per-`Arc` strong count + slice-length +/// prefix + payload + front alignment slack. Shared by the `Arc<[T]>`, +/// `Arc`, and `ArcUtf16Str` allocation paths. +#[cfg_attr(test, mutants::skip)] // underestimating refill hint ⇒ refill spin +#[inline] +pub(crate) fn worst_case_arc_slice_payload(len: usize) -> usize { + use crate::internal::thin_dst; + let align = mem::align_of::(); + let value_bytes = mem::size_of::().saturating_mul(len).max(1); + thin_dst::strong_prefix_bytes_for(align, mem::size_of::()) + .saturating_add(value_bytes) + .saturating_add(thin_dst::arc_block_align(align)) +} + /// Write the length prefix (unaligned `usize`) at `base` and copy /// `src` immediately after, returning a thin pointer to the first /// payload element. diff --git a/crates/multitude/src/arena/alloc_slice_arc.rs b/crates/multitude/src/arena/alloc_slice_arc.rs index c0a2b6b0c..9d6fcdcb3 100644 --- a/crates/multitude/src/arena/alloc_slice_arc.rs +++ b/crates/multitude/src/arena/alloc_slice_arc.rs @@ -10,7 +10,7 @@ use core::pin::Pin; use allocator_api2::alloc::{AllocError, Allocator}; -use super::alloc_prefixed::worst_case_thin_slice_payload; +use super::alloc_prefixed::worst_case_arc_slice_payload; use super::alloc_value::{MAX_SMART_PTR_ALIGN, acquire_shared_chunk_ref}; use super::{Arena, ExpectAlloc}; use crate::arc::Arc; @@ -149,34 +149,34 @@ impl Arena { } /// Arc + Copy: no element-drop runs, but we still take an Arc-owned - /// refcount on the chunk. + /// refcount on the chunk and reserve the strong-count prefix. #[inline] fn impl_alloc_slice_arc_copy(&self, src: &[T]) -> Result, AllocError> { - check_slice_arc_layout::(src.len())?; + check_slice_arc_layout::()?; let len = src.len(); - // Copy is never `Drop`, so use the no-drop reservation. - let bytes_needed = worst_case_thin_slice_payload::(len); + let bytes_needed = worst_case_arc_slice_payload::(len); // `src` is a live `&[T]`, so `size_of_val(src)` is a valid // `usize`. Hoisting the precomputed byte size lets the inner // reservation helper skip the `checked_mul` overflow guard. let payload_bytes = mem::size_of_val(src); loop { // SAFETY: `payload_bytes == size_of_val(src) == size_of::() * len`. - let reserved = unsafe { self.try_reserve_shared_slice_with_size::(len, payload_bytes) }; + let reserved = unsafe { self.try_reserve_arc_slice_with_size::(len, payload_bytes) }; if let Some((uninit, chunk_ptr)) = reserved { let chunk_ref = self.acquire_current_shared_chunk_ref(chunk_ptr); let slice_ptr = uninit.init_copy_from_slice_ptr(src); let _ = chunk_ref.forget(); // SAFETY: `slice_ptr` points to `len` initialized `T`s in a - // shared chunk with a fresh +1; `Arc::from_raw` adopts that - // +1. Chunk-wide provenance preserved via `init_copy_from_slice_ptr`. + // shared chunk with a fresh +1 and an initialized strong + // prefix; `Arc::from_raw` adopts that family. Chunk-wide + // provenance preserved via `init_copy_from_slice_ptr`. return Ok(unsafe { Arc::from_raw(slice_ptr.cast::()) }); } - if self.is_oversized_shared(bytes_needed) { + if self.is_oversized(bytes_needed) { return self.alloc_oversized_shared_with(bytes_needed, |mutator, chunk_ptr| { - let ticket = mutator - .try_alloc_uninit_slice_prefixed::(len) - .expect("dedicated oversized chunk sized to fit slice"); + let (ticket, _chunk) = mutator + .try_alloc_arc_slice::(len) + .expect("dedicated oversized chunk sized to fit slice + strong prefix"); let chunk_ref = acquire_shared_chunk_ref::(chunk_ptr); let slice_ptr = ticket.init_copy_from_slice_ptr(src); let _ = chunk_ref.forget(); @@ -188,32 +188,16 @@ impl Arena { } } - /// Arc + closure fill: records a chunk drop entry when `T: Drop`, - /// so the chunk's teardown runs `T::drop` on each element after the - /// last `Arc` releases. + /// Arc + closure fill: `T::drop` (if any) runs eagerly in + /// [`Arc::drop`](crate::Arc) on the last reference via + /// `drop_in_place::<[T]>`, so no chunk drop entry is reserved. #[inline] fn impl_alloc_slice_arc_with T>(&self, len: usize, f: F) -> Result, AllocError> { - check_slice_arc_layout::(len)?; - // Refill hint accounts for the length prefix, payload alignment - // slack, payload bytes, and (for `T: Drop`) a drop-entry slot. - let bytes_needed = worst_case_thin_slice_payload::(len); + check_slice_arc_layout::()?; + let bytes_needed = worst_case_arc_slice_payload::(len); let mut f = Some(f); loop { - // Branch on needs_drop at const time so monomorphizations - // pick the right reservation helper. - if const { mem::needs_drop::() } { - if let Some((uninit, chunk_ptr)) = self.try_reserve_shared_slice_with_drop::(len) { - let chunk_ref = self.acquire_current_shared_chunk_ref(chunk_ptr); - let f = f.take().expect("with closure taken twice"); - let slice_ptr = uninit.init_with_ptr(f); - let _ = chunk_ref.forget(); - // SAFETY: see `impl_alloc_slice_arc_copy`; the drop entry - // was committed by `init_with_ptr` for the chunk-teardown - // path. `slice_ptr` carries chunk-wide provenance so the - // Arc's later `byte_sub` to the chunk header is sound. - return Ok(unsafe { Arc::from_raw(slice_ptr.cast::()) }); - } - } else if let Some((uninit, chunk_ptr)) = self.try_reserve_shared_slice::(len) { + if let Some((uninit, chunk_ptr)) = self.try_reserve_arc_slice::(len) { let chunk_ref = self.acquire_current_shared_chunk_ref(chunk_ptr); let f = f.take().expect("with closure taken twice"); let slice_ptr = uninit.init_with_ptr(f); @@ -222,27 +206,16 @@ impl Arena { // provenance preserved via `init_with_ptr`. return Ok(unsafe { Arc::from_raw(slice_ptr.cast::()) }); } - if self.is_oversized_shared(bytes_needed) { + if self.is_oversized(bytes_needed) { let fclosure = f.take().expect("with closure taken twice"); return self.alloc_oversized_shared_with(bytes_needed, |mutator, chunk_ptr| { - let slice_ptr = if const { mem::needs_drop::() } { - let ticket = mutator - .try_alloc_uninit_slice_with_drop_prefixed::(len) - .expect("dedicated oversized chunk sized to fit slice + drop entry"); - let chunk_ref = acquire_shared_chunk_ref::(chunk_ptr); - let p = ticket.init_with_ptr(fclosure); - let _ = chunk_ref.forget(); - p - } else { - let ticket = mutator - .try_alloc_uninit_slice_prefixed::(len) - .expect("dedicated oversized chunk sized to fit slice"); - let chunk_ref = acquire_shared_chunk_ref::(chunk_ptr); - let p = ticket.init_with_ptr(fclosure); - let _ = chunk_ref.forget(); - p - }; - // SAFETY: see the non-oversized branches above. + let (ticket, _chunk) = mutator + .try_alloc_arc_slice::(len) + .expect("dedicated oversized chunk sized to fit slice + strong prefix"); + let chunk_ref = acquire_shared_chunk_ref::(chunk_ptr); + let slice_ptr = ticket.init_with_ptr(fclosure); + let _ = chunk_ref.forget(); + // SAFETY: see the non-oversized branch above. unsafe { Arc::from_raw(slice_ptr.cast::()) } }); } @@ -289,25 +262,15 @@ impl Arena { } } -/// Common up-front checks for the `Arc<[T]>` slice family. Rejects -/// over-aligned `T` (would break the smart-pointer header recovery) and -/// `T: Drop` slices whose `len > u16::MAX` (the chunk drop entry packs -/// the element count into a `u16`). -// -// Mutation testing is suppressed here: any mutation that bypasses the -// `len > u16::MAX` rejection (e.g. `&&`→`||`, `>`→`==`) sends the -// caller's refill loop into an unbounded chunk-allocation spin (see the -// detailed note in `alloc_slice_ref::reject_drop_slice_too_long`). -// Correctness is exercised by integration tests in `coverage_gaps.rs`, -// `arena.rs`, and `mutants_extras.rs`. -#[cfg_attr(test, mutants::skip)] +/// Up-front check for the `Arc<[T]>` slice family. Rejects over-aligned +/// `T` (would break the smart-pointer header recovery). Unlike the +/// old drop-entry design, there is no `len > u16::MAX` restriction: +/// element destructors run via `drop_in_place::<[T]>` in +/// [`Arc::drop`](crate::Arc), not a `u16`-counted chunk drop entry. #[inline] -fn check_slice_arc_layout(len: usize) -> Result<(), AllocError> { +fn check_slice_arc_layout() -> Result<(), AllocError> { if mem::align_of::() >= MAX_SMART_PTR_ALIGN { return Err(AllocError); } - if mem::needs_drop::() && len > u16::MAX as usize { - return Err(AllocError); - } Ok(()) } diff --git a/crates/multitude/src/arena/alloc_slice_box.rs b/crates/multitude/src/arena/alloc_slice_box.rs index dcfa0553f..e76956411 100644 --- a/crates/multitude/src/arena/alloc_slice_box.rs +++ b/crates/multitude/src/arena/alloc_slice_box.rs @@ -163,19 +163,15 @@ impl Arena { fn impl_alloc_slice_box_copy(&self, src: &[T]) -> Result, AllocError> { check_slice_box_layout::(src.len())?; let len = src.len(); - // `src` is a live `&[T]`, so `size_of_val(src)` is a valid - // `usize`. Hoisting it past the refill loop spares the inner - // reservation a `checked_mul` overflow guard. + // Precompute byte size so the reservation helper skips checked_mul. let payload_bytes = mem::size_of_val(src); let ptr = self.reserve_slice_box::(len, payload_bytes, |slot_ptr| { // SAFETY: `slot_ptr` is the reservation start; `len` elements // of `T` fit by construction. unsafe { ptr::copy_nonoverlapping(src.as_ptr(), slot_ptr, len) }; })?; - // `ptr` points to `len` initialized `T`s in a shared chunk that - // has a fresh +1; `Box::from_raw` adopts that +1 and `Box::drop` runs - // `drop_in_place` on the slice when the smart pointer is dropped. - // SAFETY: see above. + // SAFETY: `ptr` points to `len` initialized `T`s in a shared + // chunk with a fresh +1; `Box::from_raw` adopts that +1. Ok(unsafe { Box::from_raw(ptr.cast::()) }) } @@ -184,10 +180,7 @@ impl Arena { #[inline] fn impl_alloc_slice_box_with T>(&self, len: usize, mut f: F) -> Result, AllocError> { check_slice_box_layout::(len)?; - // Caller-provided `len`: must overflow-check the payload size - // up front so the hot loop can skip the `checked_mul`. On - // overflow we report `AllocError` immediately rather than spin - // refilling. + // Check overflow before the refill loop. let payload_bytes = mem::size_of::().checked_mul(len).ok_or(AllocError)?; let ptr = self.reserve_slice_box::(len, payload_bytes, |slot_ptr| { // SAFETY: `slot_ptr` is the reservation start; we init `len` slots @@ -243,7 +236,7 @@ impl Arena { let _ = chunk_ref.forget(); return Ok(base); } - if self.is_oversized_shared(bytes_needed) { + if self.is_oversized(bytes_needed) { let init_owned = init.take().expect("reserve_slice_box init taken twice"); return self.alloc_oversized_shared_with(bytes_needed, |mutator, chunk_ptr| { let ticket = mutator @@ -292,29 +285,18 @@ impl Arena { } } -/// Common up-front checks for the `Box<[T]>` slice family. `Box::drop` -/// runs `drop_in_place` on the entire slice eagerly, so no chunk drop -/// entry is recorded; however we still reject `T: Drop` slices with -/// `len > u16::MAX` so a future `Box<[T]> -> Arc<[T]>` conversion has -/// a slot to populate (parity with the `alloc_dst_box` guard). -// -// Mutation testing is suppressed: bypassing the `len > u16::MAX` -// rejection sends the caller's refill loop into an unbounded -// chunk-allocation spin (see `alloc_slice_ref::reject_drop_slice_too_long`). -#[cfg_attr(test, mutants::skip)] +/// Up-front check for `Box<[T]>`: reject alignments that break +/// smart-pointer header recovery. Slice length is full-width in the +/// chunk prefix. #[inline] -fn check_slice_box_layout(len: usize) -> Result<(), AllocError> { +fn check_slice_box_layout(_len: usize) -> Result<(), AllocError> { if mem::align_of::() >= MAX_SMART_PTR_ALIGN { return Err(AllocError); } - if mem::needs_drop::() && len > u16::MAX as usize { - return Err(AllocError); - } Ok(()) } -/// Drop-guard for partial init in `alloc_slice_*_box`. Mirrors the -/// `InitGuard` in `internal::uninit`. +/// Drop guard for partially initialized boxed slices. struct InitGuard { dst: *mut T, initialized: usize, diff --git a/crates/multitude/src/arena/alloc_slice_ref.rs b/crates/multitude/src/arena/alloc_slice_ref.rs index ea96a67e3..ee0ee4d4a 100644 --- a/crates/multitude/src/arena/alloc_slice_ref.rs +++ b/crates/multitude/src/arena/alloc_slice_ref.rs @@ -290,7 +290,7 @@ impl Arena { #[cfg_attr(test, mutants::skip)] fn refill_or_alloc_oversized_slice_copy(&self, src: &[T]) -> Result, AllocError> { let refill_hint = worst_case_slice_payload::(src.len()); - if self.is_oversized_local(refill_hint) { + if self.is_oversized(refill_hint) { return Ok(Some(self.alloc_oversized_slice_copy::(refill_hint, src)?)); } self.refill_local(refill_hint)?; @@ -363,7 +363,7 @@ impl Arena { fn refill_or_alloc_oversized_slice_clone(&self, src: &[T]) -> Result, AllocError> { let len = src.len(); let refill_hint = worst_case_slice_payload::(len); - if self.is_oversized_local(refill_hint) { + if self.is_oversized(refill_hint) { let mut ptr = self.alloc_oversized_local_with(refill_hint, |mutator| { if const { mem::needs_drop::() } { let ticket = mutator @@ -413,7 +413,7 @@ impl Arena { let f = f.take().expect("with closure taken twice"); return Ok(u.init_with(f)); } - if self.is_oversized_local(refill_hint) { + if self.is_oversized(refill_hint) { let f = f.take().expect("with closure taken twice"); let mut ptr = self.alloc_oversized_local_with(refill_hint, |mutator| { if const { mem::needs_drop::() } { @@ -468,7 +468,7 @@ impl Arena { let it = iter.take().expect("iterator taken twice"); return Ok(u.init_from_iter(it)); } - if self.is_oversized_local(refill_hint) { + if self.is_oversized(refill_hint) { let mut it = iter.take().expect("iterator taken twice"); let mut ptr = self.alloc_oversized_local_with(refill_hint, |mutator| { if const { mem::needs_drop::() } { diff --git a/crates/multitude/src/arena/alloc_str.rs b/crates/multitude/src/arena/alloc_str.rs index efe2ab9b3..08eb368d7 100644 --- a/crates/multitude/src/arena/alloc_str.rs +++ b/crates/multitude/src/arena/alloc_str.rs @@ -120,7 +120,7 @@ impl Arena { where A: Send + Sync, { - self.impl_alloc_prefixed_shared::(s.as_ref().as_bytes()).map(|ptr| + self.impl_alloc_prefixed_shared_arc::(s.as_ref().as_bytes()).map(|ptr| // SAFETY: see `Self::alloc_str_arc`. unsafe { Arc::from_raw(ptr) }) } @@ -150,7 +150,7 @@ impl Arena { if let Some(u) = self.try_reserve_local_bytes(len) { return Ok(u.init_copy_from_str(s)); } - if self.is_oversized_local(len) { + if self.is_oversized(len) { let ptr = self.alloc_oversized_local_with(len, |mutator| { let ticket = mutator.try_alloc_bytes(len).expect("dedicated oversized chunk sized to fit string"); // `init_copy_from_str` returns `&mut str` bound to the diff --git a/crates/multitude/src/arena/alloc_uninit.rs b/crates/multitude/src/arena/alloc_uninit.rs index f05c47eb3..a11e61b5c 100644 --- a/crates/multitude/src/arena/alloc_uninit.rs +++ b/crates/multitude/src/arena/alloc_uninit.rs @@ -7,13 +7,12 @@ //! groups the `alloc_uninit_*` / `alloc_zeroed_*` family together to //! keep the central `mod.rs` smaller. -use core::mem; use core::mem::MaybeUninit; use core::pin::Pin; use allocator_api2::alloc::{AllocError, Allocator}; -use super::{Arena, ExpectAlloc}; +use super::Arena; use crate::arc::Arc; use crate::r#box::Box; @@ -185,9 +184,11 @@ impl Arena { /// Allocate uninitialized space for a `T` and return an /// [`Arc, A>`](crate::Arc). /// - /// For `T: Drop`, this reserves a placeholder drop entry. Dropping - /// `Arc>` without `assume_init` is sound; `assume_init` - /// commits the entry so a later `Arc` drop runs `T::drop`. + /// No drop entry is reserved. Dropping `Arc>` without + /// `assume_init` is sound (`MaybeUninit` has no drop glue); after + /// `assume_init`, dropping the last `Arc` runs `T::drop` eagerly + /// via `drop_in_place::` (see [`Arc`](crate::Arc)'s per-pointer + /// reference counting). /// /// # Panics /// @@ -200,11 +201,7 @@ impl Arena { A: Send + Sync, T: Send + Sync, { - if const { mem::needs_drop::() } { - (self.impl_alloc_uninit_arc::(false)).expect_alloc() - } else { - self.alloc_arc_with::, _>(MaybeUninit::uninit) - } + self.alloc_arc_with::, _>(MaybeUninit::uninit) } /// Fallible variant of [`Self::alloc_uninit_arc`]. @@ -219,11 +216,7 @@ impl Arena { A: Send + Sync, T: Send + Sync, { - if const { mem::needs_drop::() } { - self.impl_alloc_uninit_arc::(false) - } else { - self.try_alloc_arc_with::, _>(MaybeUninit::uninit) - } + self.try_alloc_arc_with::, _>(MaybeUninit::uninit) } /// Like [`Self::alloc_uninit_arc`] but the value bytes are zeroed. @@ -239,11 +232,7 @@ impl Arena { A: Send + Sync, T: Send + Sync, { - if const { mem::needs_drop::() } { - (self.impl_alloc_uninit_arc::(true)).expect_alloc() - } else { - self.alloc_arc_with::, _>(MaybeUninit::zeroed) - } + self.alloc_arc_with::, _>(MaybeUninit::zeroed) } /// Fallible variant of [`Self::alloc_zeroed_arc`]. @@ -258,20 +247,17 @@ impl Arena { A: Send + Sync, T: Send + Sync, { - if const { mem::needs_drop::() } { - self.impl_alloc_uninit_arc::(true) - } else { - self.try_alloc_arc_with::, _>(MaybeUninit::zeroed) - } + self.try_alloc_arc_with::, _>(MaybeUninit::zeroed) } /// Allocate `len` uninitialized `T` slots and return an /// [`Arc<[MaybeUninit], A>`](crate::Arc). /// - /// For `T: Drop`, this reserves a placeholder slice drop entry. - /// Dropping `Arc<[MaybeUninit]>` without `assume_init` is sound; - /// `assume_init` commits the entry so dropping `Arc<[T]>` runs element - /// destructors. + /// No drop entry is reserved. Dropping `Arc<[MaybeUninit]>` + /// without `assume_init` is sound (`MaybeUninit` has no drop + /// glue); after `assume_init`, dropping the last `Arc<[T]>` runs the + /// element destructors eagerly via `drop_in_place::<[T]>` (see + /// [`Arc`](crate::Arc)'s per-pointer reference counting). /// /// # Panics /// @@ -284,11 +270,7 @@ impl Arena { A: Send + Sync, T: Send + Sync, { - if const { mem::needs_drop::() } { - (self.impl_alloc_uninit_slice_arc::(len, false)).expect_alloc() - } else { - self.alloc_slice_fill_with_arc::, _>(len, |_| MaybeUninit::uninit()) - } + self.alloc_slice_fill_with_arc::, _>(len, |_| MaybeUninit::uninit()) } /// Fallible variant of [`Self::alloc_uninit_slice_arc`]. @@ -303,11 +285,7 @@ impl Arena { A: Send + Sync, T: Send + Sync, { - if const { mem::needs_drop::() } { - self.impl_alloc_uninit_slice_arc::(len, false) - } else { - self.try_alloc_slice_fill_with_arc::, _>(len, |_| MaybeUninit::uninit()) - } + self.try_alloc_slice_fill_with_arc::, _>(len, |_| MaybeUninit::uninit()) } /// Like [`Self::alloc_uninit_slice_arc`] but the slice bytes are zeroed. @@ -323,11 +301,7 @@ impl Arena { A: Send + Sync, T: Send + Sync, { - if const { mem::needs_drop::() } { - (self.impl_alloc_uninit_slice_arc::(len, true)).expect_alloc() - } else { - self.alloc_slice_fill_with_arc::, _>(len, |_| MaybeUninit::zeroed()) - } + self.alloc_slice_fill_with_arc::, _>(len, |_| MaybeUninit::zeroed()) } /// Fallible variant of [`Self::alloc_zeroed_slice_arc`]. @@ -342,11 +316,7 @@ impl Arena { A: Send + Sync, T: Send + Sync, { - if const { mem::needs_drop::() } { - self.impl_alloc_uninit_slice_arc::(len, true) - } else { - self.try_alloc_slice_fill_with_arc::, _>(len, |_| MaybeUninit::zeroed()) - } + self.try_alloc_slice_fill_with_arc::, _>(len, |_| MaybeUninit::zeroed()) } /// Allocate `len` uninitialized `T` slots and return an diff --git a/crates/multitude/src/arena/alloc_unsized.rs b/crates/multitude/src/arena/alloc_unsized.rs index 1d30ce9e0..c11706a71 100644 --- a/crates/multitude/src/arena/alloc_unsized.rs +++ b/crates/multitude/src/arena/alloc_unsized.rs @@ -4,13 +4,15 @@ //! DST (unsized) value allocation API on [`Arena`]. //! //! Implements `alloc_dst_arc`, `alloc_dst_box` and their `try_*` -//! variants under the `dst` Cargo feature. The trailing drop entry -//! stores the pointer-metadata as a `u16`, which limits supported DSTs -//! to those whose pointer-metadata is either zero-sized (sized `T`) or -//! `usize`-sized AND fits in `u16` (slices of length up to -//! `u16::MAX`). For drop-aware slices with more than `u16::MAX` -//! elements, the non-DST `alloc_slice_arc` / `_box` family stores the -//! length in a separate prefix word and has no such cap. +//! variants under the `dst` Cargo feature. The pointer-metadata is +//! stored verbatim in the chunk prefix (immediately before the +//! payload), so supported DSTs are those whose metadata is either +//! zero-sized (sized `T`) or `usize`-sized (slice DSTs and trait +//! objects). `Arc` runs `T`'s destructor eagerly on the last clone via +//! `drop_in_place::`; `Box` does so in its own `Drop`. Neither +//! family caps the metadata width for `T: Drop`: both drop via +//! `drop_in_place` on a full-width fat pointer, so a `Drop` trait +//! object or a slice longer than `u16::MAX` is accepted by both. use core::alloc::Layout; use core::mem; @@ -25,7 +27,6 @@ use super::{Arena, ExpectAlloc}; use crate::arc::Arc; use crate::r#box::Box; use crate::internal::constants::max_smart_ptr_align; -use crate::internal::drop_entry::DropFn; /// Maximum `layout.align()` accepted by smart-pointer allocations. /// Mirrors the constant of the same name in [`alloc_value`](super::alloc_value): @@ -38,8 +39,10 @@ impl Arena { /// /// The closure `init` receives a typed fat pointer to the buffer /// (built from `(thin_ptr, metadata)`) and is responsible for - /// writing a valid `T` through it. multitude reconstructs the same - /// metadata at chunk teardown so `T`'s destructor runs correctly. + /// writing a valid `T` through it. The metadata is stored in the + /// chunk prefix and recovered on demand, so `T`'s destructor runs + /// eagerly (via `drop_in_place::`) when the last `Arc` clone is + /// dropped. /// /// For sized `T`, prefer [`Self::alloc_arc`] / [`Self::alloc_arc_with`]. /// @@ -57,11 +60,9 @@ impl Arena { /// - `init` must initialize all bytes covered by `layout` to a valid `T`. /// - `metadata` must be valid for the value just written. /// - `T::Metadata` must be either zero-sized (sized `T`) or - /// `usize`-sized AND fit in `u16` after reinterpretation. This - /// means **slices** (`[U]`, where the metadata is the slice - /// length) and **sized** `T` are supported; trait objects (`dyn - /// Trait`) and other DSTs whose metadata cannot be packed into - /// `u16` are **not** supported. + /// `usize`-sized (slice DSTs `[U]` and trait objects `dyn Trait`, + /// whose metadata — slice length or vtable pointer — is stored + /// verbatim in the chunk prefix). #[cfg_attr(docsrs, doc(cfg(feature = "dst")))] pub unsafe fn alloc_dst_arc( &self, @@ -107,8 +108,9 @@ impl Arena { /// Allocate a possibly-unsized `T` and return a [`Box`](crate::Box). /// See [`Self::alloc_dst_arc`] for the contract. /// - /// Unlike the refcount variants, the resulting [`Box`](crate::Box) runs - /// `T`'s destructor immediately when the smart pointer is dropped. + /// The resulting [`Box`](crate::Box) is the sole owner, so it runs + /// `T`'s destructor when it is dropped (the `Arc` variants run it + /// when the last clone is dropped; both are eager). /// /// # Panics /// @@ -149,12 +151,8 @@ impl Arena { /// Shared implementation for `alloc_dst_arc` / `try_alloc_dst_arc`. /// - /// Reserves `layout.size()` bytes aligned to `layout.align()` in - /// the current shared chunk, places a drop-entry placeholder (if - /// `T` requires drop), invokes `init` on the typed fat pointer, - /// commits the drop shim, and wraps the result in an [`Arc`]. - /// - /// `TRY` selects the panic / error arm. + /// Reserves a strong-prefixed shared slot, invokes `init` on the + /// typed fat pointer, and wraps the result in an [`Arc`]. /// /// # Safety /// @@ -178,10 +176,10 @@ impl Arena { } /// Shared implementation for `alloc_dst_box` / `try_alloc_dst_box`. - /// Mirrors `impl_alloc_dst_arc` but skips drop-entry reservation: - /// [`Box::drop`] runs `drop_in_place::` on the value pointer - /// (which natively handles `?Sized`), so no chunk-teardown drop - /// entry is needed. + /// Like `impl_alloc_dst_arc` but without the per-`Arc` strong-count + /// prefix: [`Box::drop`] runs `drop_in_place::` on the value + /// pointer (which natively handles `?Sized`). Neither variant + /// reserves a chunk drop entry. /// /// # Safety /// @@ -196,48 +194,30 @@ impl Arena { if layout.align() >= MAX_SMART_PTR_ALIGN { return Err(AllocError); } - // Guard parity with the Arc path: even though `Box::drop` runs - // `T::drop` eagerly (no chunk-teardown drop entry needed), reject - // DST values with `T: Drop` whose metadata cannot pack into the - // chunk drop-list's `u16` slot. This keeps the Box convertible - // to `Arc` later via `into_arc`-style APIs and matches the - // non-DST `alloc_slice_box` family. - if mem::needs_drop::() && !metadata_fits_u16::(metadata) { - return Err(AllocError); - } let meta_bytes = mem::size_of::(); // Payload starts at the lowest layout-aligned offset >= // meta_bytes. For sized T (meta_bytes = 0) payload starts at 0. let payload_offset = if meta_bytes == 0 { 0 } else { meta_bytes.max(layout.align()) }; - // Floor the value byte count to 1 so the returned payload pointer - // (at offset `payload_offset` within the reservation) is strictly - // less than `reservation_end`, never landing at - // `chunk_base + CHUNK_ALIGN` for `layout.size() == 0`. + // Keep the payload pointer inside the reservation for ZSTs. let value_bytes = layout.size().max(1); let total = payload_offset.checked_add(value_bytes).ok_or(AllocError)?; - // Refill hint must include `layout.align() - 1` bytes of slack - // so `try_alloc(total, align)` always succeeds inside a chunk - // sized for this allocation. The same hint drives the oversized - // routing check so the dedicated chunk also has the slack. + // Include alignment slack so the retry fits the chosen chunk. let refill_hint = total.saturating_add(layout.align()); let mut init = Some(init); loop { if let Some((reservation, chunk_ptr)) = self.current_shared().try_alloc_with_chunk(total, layout.align().max(1)) { let init = init.take().expect("init taken twice"); let chunk_ref = self.acquire_current_shared_chunk_ref(chunk_ptr); - // SAFETY: see `write_dst_prefix_and_init` — `reservation` - // is the freshly reserved exclusive storage; we write - // metadata at `payload - meta_bytes` and hand `init` a - // fat pointer to the payload. + // SAFETY: `reservation` is fresh exclusive storage; metadata + // is written before `init` receives the fat payload pointer. let payload_nn = unsafe { write_dst_prefix_and_init::(reservation.as_non_null(), payload_offset, meta_bytes, metadata, init) }; let _ = chunk_ref.forget(); - // SAFETY: `payload_nn` references a fully-initialized - // `T` whose metadata is in the chunk prefix; the - // hosting chunk now holds +1 in the new `Box`'s name. + // SAFETY: `payload_nn` references initialized `T`; the + // hosting chunk holds the new `Box`'s +1. return Ok(unsafe { Box::from_raw(payload_nn) }); } - if self.is_oversized_shared(refill_hint) { + if self.is_oversized(refill_hint) { let init = init.take().expect("init taken twice"); return self.alloc_oversized_shared_with(refill_hint, |mutator, chunk_ptr| { let (reservation, _chunk) = mutator @@ -256,10 +236,12 @@ impl Arena { } } - /// Reserve raw storage + drop entry in the current shared chunk, - /// run `init` on a typed fat pointer, commit the DST drop shim, - /// and return the fat `NonNull`. Skips the drop entry when `T` - /// is drop-free. + /// Reserve a strong-prefixed `Arc` slot in the current shared + /// chunk (per-`Arc` strong count + `T::Metadata` prefix + payload), + /// run `init` on a typed fat pointer, and return the thin payload + /// pointer. No chunk drop entry is reserved: + /// [`Arc::drop`](crate::Arc) runs `drop_in_place::` (which natively + /// handles `?Sized`) on the last reference. /// /// # Safety /// @@ -274,72 +256,33 @@ impl Arena { if layout.align() >= MAX_SMART_PTR_ALIGN { return Err(AllocError); } - - let needs_drop = mem::needs_drop::(); - - // For DST values that need drop, the drop entry packs `metadata` - // into a `u16`. Reject metadata that doesn't fit before doing - // any allocation. - if needs_drop && !metadata_fits_u16::(metadata) { - return Err(AllocError); - } - let metadata_u16 = if needs_drop { encode_metadata_u16::(metadata) } else { 0 }; let meta_bytes = mem::size_of::(); - // Payload starts at the lowest layout-aligned offset >= - // meta_bytes. For sized T (meta_bytes = 0) payload starts at 0. - let payload_offset = if meta_bytes == 0 { 0 } else { meta_bytes.max(layout.align()) }; - // Floor the value byte count to 1 so the returned payload pointer - // is strictly inside the reservation; see `impl_alloc_dst_box`. - let value_bytes = layout.size().max(1); - let total = payload_offset.checked_add(value_bytes).ok_or(AllocError)?; + let value_align = layout.align().max(1); + // Keep the payload pointer inside the reservation for ZSTs. + let payload_bytes = layout.size().max(1); + let refill_hint = worst_case_arc_dst(payload_bytes, value_align, meta_bytes); let mut init = Some(init); loop { - let reservation = self.current_shared().try_alloc_thin_dst_smart_with_chunk( - total, - layout.align().max(1), - payload_offset, - needs_drop, - metadata_u16, - ); - - if let Some((base_in_chunk, drop_slot_opt, chunk_ptr)) = reservation { + if let Some((value_ptr, chunk_ptr)) = self.current_shared().try_alloc_arc_dst(payload_bytes, value_align, meta_bytes) { let init = init.take().expect("init taken twice"); let chunk_ref = self.acquire_current_shared_chunk_ref(chunk_ptr); - // SAFETY: see `write_dst_prefix_and_init`. - let payload_nn = - unsafe { write_dst_prefix_and_init::(base_in_chunk.as_non_null(), payload_offset, meta_bytes, metadata, init) }; - if let Some(slot) = drop_slot_opt { - // SAFETY: `slot.as_ptr()` references a freshly - // placed `DropEntry::placeholder` we own - // exclusively until commit. - unsafe { - (*slot.as_ptr()).commit_drop_fn(dst_drop_shim:: as DropFn); - } - } + // SAFETY: `value_ptr` is fresh payload storage with a + // strong prefix; metadata is written before `init`. + let payload_nn = unsafe { write_dst_meta_and_init::(value_ptr, meta_bytes, metadata, init) }; let _ = chunk_ref.forget(); return Ok(payload_nn); } - let refill_hint = total - .saturating_add(layout.align()) - .saturating_add(mem::size_of::()); - if self.is_oversized_shared(refill_hint) { + if self.is_oversized(refill_hint) { let init = init.take().expect("init taken twice"); return self.alloc_oversized_shared_with(refill_hint, |mutator, chunk_ptr| { - let (base_in_chunk, drop_slot_opt) = mutator - .try_alloc_thin_dst_smart(total, layout.align().max(1), payload_offset, needs_drop, metadata_u16) - .expect("dedicated oversized chunk sized to fit DST value + optional drop entry"); + let (value_ptr, _chunk) = mutator + .try_alloc_arc_dst(payload_bytes, value_align, meta_bytes) + .expect("dedicated oversized chunk sized to fit DST value + strong prefix"); let chunk_ref = acquire_shared_chunk_ref::(chunk_ptr); // SAFETY: see the in-arena branch above. - let payload_nn = - unsafe { write_dst_prefix_and_init::(base_in_chunk.as_non_null(), payload_offset, meta_bytes, metadata, init) }; - if let Some(slot) = drop_slot_opt { - // SAFETY: see the in-arena branch above. - unsafe { - (*slot.as_ptr()).commit_drop_fn(dst_drop_shim:: as DropFn); - } - } + let payload_nn = unsafe { write_dst_meta_and_init::(value_ptr, meta_bytes, metadata, init) }; let _ = chunk_ref.forget(); payload_nn }); @@ -455,66 +398,62 @@ impl Arena { } } -/// Reinterpret the pointer-metadata for `T` as a `u16`. -/// -/// Returns the low 16 bits of the metadata value when interpreted as a -/// `usize`. For metadata kinds we don't support packing -/// (vtable-bearing trait objects), the returned value is meaningless; -/// [`metadata_fits_u16`] gates this. -/// -/// For sized `T` (`Metadata = ()`), returns `0`. +/// Worst-case byte budget for a single strong-prefixed `Arc` DST +/// allocation: per-`Arc` strong count + `T::Metadata` prefix + payload + +/// front alignment slack. +#[cfg_attr(test, mutants::skip)] // underestimating refill hint ⇒ refill spin #[inline] -#[cfg_attr(test, mutants::skip)] // saturating cast; callers gate via `metadata_fits_u16` -fn encode_metadata_u16(metadata: T::Metadata) -> u16 { - if mem::size_of::() == 0 { - return 0; - } - debug_assert_eq!( - mem::size_of::(), - mem::size_of::(), - "alloc_dst_*: T::Metadata must be either ZST or usize-sized" - ); - // SAFETY: branch above ensures `T::Metadata` is `usize`-sized; we - // read it through a `usize` window, which is layout-compatible for - // the supported subset (`[U]` slices: metadata is the length). - let raw: usize = unsafe { mem::transmute_copy::(&metadata) }; - // Saturating cast: if the value exceeds u16::MAX we set u16::MAX - // and `metadata_fits_u16` will reject it. - #[allow( - clippy::cast_possible_truncation, - reason = "saturating cast: value > u16::MAX is guarded by the branch above" - )] - if raw > u16::MAX as usize { u16::MAX } else { raw as u16 } +fn worst_case_arc_dst(payload_bytes: usize, value_align: usize, meta_bytes: usize) -> usize { + use crate::internal::thin_dst; + thin_dst::strong_prefix_bytes_for(value_align, meta_bytes) + .saturating_add(payload_bytes) + .saturating_add(thin_dst::arc_block_align(value_align)) } -/// Returns whether `metadata` packs losslessly into a `u16`. -#[cfg_attr(test, mutants::skip)] // see `alloc_slice_ref::reject_drop_slice_too_long` -#[inline] -fn metadata_fits_u16(metadata: T::Metadata) -> bool { - if mem::size_of::() == 0 { - return true; - } - if mem::size_of::() != mem::size_of::() { - return false; - } - // SAFETY: branch above ensures `T::Metadata` is `usize`-sized. - let raw: usize = unsafe { mem::transmute_copy::(&metadata) }; - u16::try_from(raw).is_ok() +/// Write metadata, call `init` on the reconstructed fat pointer, and +/// return the thin payload pointer. Used by strong-prefixed `Arc` DSTs. +/// +/// # Safety +/// +/// - `value_ptr` must be the payload pointer of a strong-prefixed `Arc` +/// reservation whose prefix has room for `meta_bytes` immediately +/// before it. +/// - `init` must initialize a valid `T` through the fat pointer it +/// receives. +#[inline(always)] +unsafe fn write_dst_meta_and_init( + value_ptr: NonNull, + meta_bytes: usize, + metadata: T::Metadata, + init: impl FnOnce(*mut T), +) -> NonNull { + // SAFETY: per the function contract. The metadata word sits in + // `[value_ptr - meta_bytes, value_ptr)`, inside the reservation + // prefix; `write_unaligned` tolerates any alignment. For sized T + // (meta_bytes == 0) the write is skipped. + let fat = unsafe { + if meta_bytes != 0 { + let prefix_ptr = value_ptr.as_ptr().sub(meta_bytes).cast::(); + ptr::write_unaligned(prefix_ptr, metadata); + } + ptr_meta::from_raw_parts_mut::(value_ptr.as_ptr().cast::<()>(), metadata) + }; + // Caller's contract: `init` writes a valid `T` through `fat`. If it + // panics, callers' `ChunkRef` guard releases the chunk's `+1`. + init(fat); + value_ptr } -/// Write `T::Metadata` (if any) at `base + payload_offset - meta_bytes`, -/// reconstruct the fat `*mut T`, run the caller-provided `init` on -/// it, and return the thin payload pointer adopted by the smart -/// pointer (metadata is recovered on demand from the chunk prefix). +/// `Box` DST variant of [`write_dst_meta_and_init`]. `Box` has no +/// strong-count prefix, so the reservation starts at the metadata region. /// /// # Safety /// /// - `base` must reference `payload_offset + layout.size()` bytes of /// exclusively-owned chunk storage aligned to `layout.align()`. -/// - `payload_offset` must equal the value computed at the call site -/// (i.e. `meta_bytes.max(layout.align())` for DST or `0` for sized). -/// - `init` must initialize a valid `T` through the fat pointer it -/// receives. +/// - `payload_offset` must equal `meta_bytes.max(layout.align())` for +/// DST or `0` for sized `T`. +/// - `init` must initialize a valid `T` through the fat pointer. #[inline(always)] unsafe fn write_dst_prefix_and_init( base: NonNull, @@ -525,10 +464,8 @@ unsafe fn write_dst_prefix_and_init( ) -> NonNull { // SAFETY: per the function contract. `byte_add(payload_offset)` // stays within the reservation. The prefix at `payload - meta_bytes` - // lies in `[base, base + payload_offset)` (low-align T fills the - // prefix region; high-align T leaves the prefix in the padding). - // For sized T (meta_bytes == 0) the prefix write is a no-op. - // `from_raw_parts_mut` rebuilds the fat pointer for `init`'s call. + // lies in `[base, base + payload_offset)`. For sized T (meta_bytes + // == 0) the prefix write is a no-op. let (payload_nn, fat) = unsafe { let payload_nn = base.byte_add(payload_offset); if meta_bytes != 0 { @@ -538,50 +475,20 @@ unsafe fn write_dst_prefix_and_init( let fat = ptr_meta::from_raw_parts_mut::(payload_nn.as_ptr().cast::<()>(), metadata); (payload_nn, fat) }; - // Caller's contract: `init` writes a valid `T` through `fat`. If - // it panics, callers' `ChunkRef` guard releases the chunk's `+1`. + // Caller's contract: `init` writes a valid `T` through `fat`. If it + // panics, callers' `ChunkRef` guard releases the chunk's `+1`. init(fat); payload_nn } -/// Drop shim used by the DST path. Reconstructs the fat `*mut T` from -/// `(thin, metadata_u16)` and runs `drop_in_place::` on it. -/// -/// # Safety -/// -/// - `thin` must point at a fully-initialized `T` whose size/alignment -/// match the [`Layout`] used at allocation time. -/// - `T::Metadata` must be either zero-sized or `usize`-sized -/// (enforced at the public API by `encode_metadata_u16` / -/// `metadata_fits_u16`). -/// - `metadata_raw`, when interpreted as `T::Metadata`, must equal the -/// metadata that was paired with the value at allocation time. -unsafe fn dst_drop_shim(thin: *mut u8, metadata_raw: usize) { - // Recover `T::Metadata` from the stored `usize`. For sized `T` - // (Metadata = `()`), the read is a zero-byte no-op. - let metadata: T::Metadata = if mem::size_of::() == 0 { - // SAFETY: `T::Metadata` is zero-sized; read produces the - // single uninhabited-by-data unit value. - unsafe { mem::zeroed() } - } else { - // SAFETY: by the function's safety contract. - unsafe { mem::transmute_copy::(&metadata_raw) } - }; - let fat: *mut T = ptr_meta::from_raw_parts_mut(thin.cast::<()>(), metadata); - // SAFETY: by the function's safety contract `fat` references a - // fully-initialized `T`; we hold exclusive access (chunk refcount - // is zero on the teardown path that invokes this shim). - unsafe { ptr::drop_in_place(fat) }; -} - #[cfg(test)] mod tests { use super::*; use crate::Arena as TestArena; - /// Cover `encode_metadata_u16` / `metadata_fits_u16` zero-sized - /// branches (lines 434, 458) and `dst_drop_shim`'s `Metadata = ()` - /// branch (line 486) via an `alloc_dst_arc` of a sized drop-bearing `T`. + /// Exercises `alloc_dst_arc` of a sized drop-bearing `T`: the value's + /// destructor must run eagerly when the last `Arc` clone drops + /// (before the arena is torn down). #[test] fn dst_arc_sized_drop_type_metadata_zero_sized_paths() { use std::sync::Arc as StdArc; @@ -606,27 +513,4 @@ mod tests { drop(arena); assert_eq!(counter.load(Ordering::Relaxed), 1); } - - // A `?Sized` type whose `ptr_meta` pointer metadata (`u8`) is neither - // zero-sized (as for `Sized` `T`) nor `usize`-sized (as for slices, `str`, - // and trait objects). No DST produced by real allocations has such - // metadata, so this exercises the otherwise-unreachable reject branch in - // `metadata_fits_u16`. - #[allow(dead_code, reason = "exists only to provide a non-usize Pointee::Metadata type")] - struct OddMetadataDst(str); - - // SAFETY: `OddMetadataDst` is never constructed, and no pointer to it is - // ever formed or split via `ptr_meta`. The impl exists solely to give - // `metadata_fits_u16` a metadata type (`u8`) whose size is neither 0 nor - // `size_of::()`. - unsafe impl Pointee for OddMetadataDst { - type Metadata = u8; - } - - /// Cover `metadata_fits_u16`'s non-`usize`-sized metadata reject branch: - /// `size_of::()` is 1, which is neither 0 nor `size_of::()`. - #[test] - fn metadata_fits_u16_rejects_non_usize_metadata() { - assert!(!metadata_fits_u16::(0u8)); - } } diff --git a/crates/multitude/src/arena/alloc_utf16.rs b/crates/multitude/src/arena/alloc_utf16.rs index 6a626686f..bb42d9e89 100644 --- a/crates/multitude/src/arena/alloc_utf16.rs +++ b/crates/multitude/src/arena/alloc_utf16.rs @@ -42,7 +42,7 @@ impl Arena { where A: Send + Sync, { - self.impl_alloc_prefixed_shared::(s.as_ref().as_slice()).map(|ptr| + self.impl_alloc_prefixed_shared_arc::(s.as_ref().as_slice()).map(|ptr| // SAFETY: see `Self::alloc_utf16_str_arc`. unsafe { ArcUtf16Str::from_raw(ptr) }) } @@ -90,7 +90,7 @@ impl Arena { where A: Send + Sync, { - self.impl_alloc_utf16_prefixed_from_str(s.as_ref()).map(|ptr| + self.impl_alloc_utf16_prefixed_from_str_arc(s.as_ref()).map(|ptr| // SAFETY: see `Self::alloc_utf16_str_arc`. unsafe { ArcUtf16Str::from_raw(ptr) }) } @@ -182,7 +182,7 @@ impl Arena { let _ = chunk_ref.forget(); return Ok(payload); } - if self.is_oversized_shared(total) { + if self.is_oversized(total) { return self.alloc_oversized_shared_with(total, |mutator, chunk_ptr| { let (base, _chunk_unused) = mutator .try_alloc_with_chunk(total, elem_align) @@ -196,6 +196,38 @@ impl Arena { self.refill_shared(total)?; } } + + /// Strong-prefixed [`ArcUtf16Str`](crate::strings::ArcUtf16Str) + /// variant of [`Self::impl_alloc_utf16_prefixed_from_str`]: reserves + /// a per-`Arc` strong count and slice-length prefix, transcodes `s` + /// into the `u16` payload, and returns a thin pointer to the first + /// payload element. + #[inline(always)] + #[cfg_attr(test, mutants::skip)] // size-hint mutation ⇒ refill spin (OOM) + fn impl_alloc_utf16_prefixed_from_str_arc(&self, s: &str) -> Result, AllocError> { + let exact: usize = s.chars().map(char::len_utf16).sum(); + let bytes_needed = super::alloc_prefixed::worst_case_arc_slice_payload::(exact); + loop { + if let Some((uninit, chunk_ptr)) = self.try_reserve_arc_slice::(exact) { + let chunk_ref = self.acquire_current_shared_chunk_ref(chunk_ptr); + let payload = uninit.init_from_iter_ptr(s.encode_utf16()); + let _ = chunk_ref.forget(); + return Ok(payload.cast::()); + } + if self.is_oversized(bytes_needed) { + return self.alloc_oversized_shared_with(bytes_needed, |mutator, chunk_ptr| { + let (ticket, _chunk) = mutator + .try_alloc_arc_slice::(exact) + .expect("dedicated oversized chunk sized to fit utf-16 Arc payload"); + let chunk_ref = acquire_shared_chunk_ref::(chunk_ptr); + let payload = ticket.init_from_iter_ptr(s.encode_utf16()); + let _ = chunk_ref.forget(); + payload.cast::() + }); + } + self.refill_shared(bytes_needed)?; + } + } } /// Writes the `usize` element-count prefix at `base`, transcodes `s` diff --git a/crates/multitude/src/arena/alloc_value.rs b/crates/multitude/src/arena/alloc_value.rs index 67b870887..93d532874 100644 --- a/crates/multitude/src/arena/alloc_value.rs +++ b/crates/multitude/src/arena/alloc_value.rs @@ -14,12 +14,12 @@ use allocator_api2::alloc::{AllocError, Allocator}; use super::{Arena, ExpectAlloc}; use crate::arc::Arc; use crate::r#box::Box; -use crate::internal::Chunk; use crate::internal::chunk_ref::ChunkRef; use crate::internal::constants::max_smart_ptr_align; use crate::internal::drop_entry::DropEntry; use crate::internal::shared_chunk::SharedChunk; -use crate::internal::uninit::{Uninit, UninitDrop}; +use crate::internal::uninit::Uninit; +use crate::internal::{Chunk, thin_dst}; /// Worst-case bytes consumed by a single value allocation of type `T` in /// a chunk: value bytes + alignment padding, plus one [`DropEntry`] slot @@ -35,6 +35,18 @@ const fn worst_case_payload() -> usize { } } +/// Worst-case bytes consumed by a single `Arc` value allocation: the +/// per-`Arc` strong-count prefix + value bytes + front alignment slack. +#[cfg_attr(test, mutants::skip)] // under-sized hint ⇒ refill loop spin (OOM) +#[inline] +const fn worst_case_arc_payload() -> usize { + let align = mem::align_of::(); + let value_bytes = if mem::size_of::() == 0 { 1 } else { mem::size_of::() }; + thin_dst::strong_prefix_bytes_for(align, 0) + .saturating_add(value_bytes) + .saturating_add(thin_dst::arc_block_align(align)) +} + /// Maximum `align_of::()` accepted by smart-pointer allocations. /// /// Boxes recover their chunk header by subtracting the value pointer's @@ -445,7 +457,7 @@ impl Arena { return Ok(u.init(f())); } let wcp = worst_case_payload::(); - if self.is_oversized_local(wcp) { + if self.is_oversized(wcp) { return self.alloc_oversized_value_with::(wcp, f); } self.refill_local(wcp)?; @@ -497,45 +509,53 @@ impl Arena { fn impl_alloc_box_with T>(&self, f: F) -> Result, AllocError> { // SAFETY: `impl_alloc_smart_with` returns a `NonNull` to a // freshly-written `T` whose containing chunk has just been - // bumped by +1 in the new smart pointer's name. `Box` runs - // `T::drop` eagerly in its own `Drop`, so it does *not* register - // a chunk drop entry (`REGISTER_DROP = false`); otherwise the - // value would be dropped twice (once by `Box::drop`, once by the - // chunk teardown replay). `Box::from_raw` adopts that +1. - self.impl_alloc_smart_with::(f) + // bumped by +1 in the new `Box`'s name. `Box` runs `T::drop` + // eagerly in its own `Drop` and adopts that +1 via + // `Box::from_raw`. + self.impl_alloc_smart_with::(f) .map(|ptr| unsafe { Box::from_raw(ptr.cast::()) }) } - /// Shared fast-path body for the `alloc_arc` family. Identical - /// shape to [`Self::impl_alloc_box_with`] — the only differences - /// between `Box` and `Arc` live in their `Clone`/`Send`/`Sync` - /// impls, not at allocation time. + /// Shared fast-path body for the `alloc_arc` family. + /// + /// Unlike [`Box`], an [`Arc`] reserves a per-`Arc` strong reference + /// count in the chunk prefix (initialized to `1`), takes one chunk + /// refcount for the whole `Arc` family, and runs `T::drop` eagerly + /// when the strong count reaches zero — never via a chunk + /// drop-entry. #[inline(always)] + #[cfg_attr(test, mutants::skip)] // routing-predicate mutations ⇒ refill spin (OOM) fn impl_alloc_arc_with T>(&self, f: F) -> Result, AllocError> where A: Send + Sync, T: Send + Sync, { - // SAFETY: see `Self::impl_alloc_box_with` — `Arc::from_raw` - // adopts the fresh +1 on the containing chunk. Unlike `Box`, - // `Arc` keeps the value alive until the chunk is torn down, so a - // drop entry IS registered for `T: Drop` (`REGISTER_DROP = true`). - self.impl_alloc_smart_with::(f) - .map(|ptr| unsafe { Arc::from_raw(ptr.cast::()) }) + if const { mem::align_of::() >= MAX_SMART_PTR_ALIGN } { + return Err(AllocError); + } + let mut f = Some(f); + loop { + if let Some((uninit, chunk_ptr)) = self.try_reserve_arc_value::() { + let chunk_ref = self.acquire_current_shared_chunk_ref(chunk_ptr); + let f = f.take().expect("closure taken twice"); + let ptr = init_smart_slot::(uninit, chunk_ref, f); + // SAFETY: the strong prefix was written (count = 1) and the + // chunk holds a fresh +1 for this `Arc` family. + return Ok(unsafe { Arc::from_raw(ptr.cast::()) }); + } + let wcp = worst_case_arc_payload::(); + if self.is_oversized(wcp) { + let f = f.take().expect("closure taken twice"); + return self.alloc_oversized_arc_with::(wcp, f); + } + self.refill_shared(wcp)?; + } } - /// Bump-allocates `T` in the arena's current shared chunk, takes a - /// +1 refcount on that chunk for the resulting smart pointer, and - /// writes the value into the reservation. When `REGISTER_DROP` is - /// `true` and `T` needs drop, a drop entry is committed so the - /// chunk's teardown runs `T::drop` when the last reference releases - /// the chunk ([`Arc`] semantics). [`Box`] passes `REGISTER_DROP = - /// false` because it runs `T::drop` eagerly in its own `Drop`; - /// registering an entry as well would drop the value twice. - /// - /// The returned `NonNull` carries no ownership marker; the - /// caller wraps it in the appropriate smart pointer ([`Box`] or - /// [`Arc`]) and that wrapper owns the +1. + /// Bump-allocates `T` in the arena's current shared chunk for a + /// [`Box`], takes a +1 refcount on that chunk, and writes the value + /// into the reservation. [`Box`] runs `T::drop` eagerly in its own + /// `Drop`, so no chunk drop entry is reserved. /// /// Rejects alignments at or above [`MAX_SMART_PTR_ALIGN`]: such /// values cannot live inside the first [`CHUNK_ALIGN`] bytes of a @@ -543,222 +563,70 @@ impl Arena { /// smart pointers' `Drop` impls. #[inline(always)] #[cfg_attr(test, mutants::skip)] // routing-predicate mutations ⇒ refill spin (OOM) - fn impl_alloc_smart_with T, const REGISTER_DROP: bool>(&self, f: F) -> Result, AllocError> { + fn impl_alloc_smart_with T>(&self, f: F) -> Result, AllocError> { if const { mem::align_of::() >= MAX_SMART_PTR_ALIGN } { return Err(AllocError); } loop { - // A ZST whose allocation reserves no drop entry does not - // advance the bump cursor (`try_alloc(0, _)` is a no-op on - // the cursor), so back-to-back handouts would never refill - // the chunk. The per-allocation handout count is tracked in - // the non-atomic `local_shared_count` and draws down the - // pre-credited ref surplus; an unbounded run from a single - // chunk could exhaust that surplus, driving the chunk's - // atomic refcount to zero while it is still installed - // (use-after-free) or underflowing the surplus reconciliation - // at retire (double-free). Pre-reserve a 1-byte tag so each - // such handout advances the cursor, bounding per-chunk - // handouts to the chunk capacity (well below the surplus). - // The drop-entry path below already advances `drop_top`, so - // drop-registering reservations need no tag. Mirrors the - // guard in `impl_alloc_uninit_smart`. - if const { mem::size_of::() == 0 && !(REGISTER_DROP && mem::needs_drop::()) } - && self.current_shared().try_alloc(1, 1).is_none() - { + // A non-drop ZST allocation does not advance the bump cursor + // (`try_alloc(0, _)` is a no-op), so back-to-back handouts + // would never refill the chunk. The per-allocation handout + // count draws down the pre-credited ref surplus; an unbounded + // run could exhaust it (use-after-free) or underflow the + // surplus reconciliation at retire (double-free). Pre-reserve + // a 1-byte tag so each such handout advances the cursor, + // bounding per-chunk handouts to the chunk capacity. + if const { mem::size_of::() == 0 } && self.current_shared().try_alloc(1, 1).is_none() { self.refill_shared(worst_case_payload::())?; continue; } - if const { REGISTER_DROP && mem::needs_drop::() } { - if let Some((uninit, chunk_ptr)) = self.try_reserve_shared_with_drop::() { - let chunk_ref = self.acquire_current_shared_chunk_ref(chunk_ptr); - return Ok(init_smart_slot_with_drop::(uninit, chunk_ref, f)); - } - } else if let Some((uninit, chunk_ptr)) = self.try_reserve_shared::() { + if let Some((uninit, chunk_ptr)) = self.try_reserve_shared::() { let chunk_ref = self.acquire_current_shared_chunk_ref(chunk_ptr); return Ok(init_smart_slot::(uninit, chunk_ref, f)); } - // Worst-case payload includes a drop entry for `T: Drop` - // so refill always sizes the chunk for the with-drop - // reservation above. let wcp = worst_case_payload::(); - if self.is_oversized_shared(wcp) { - return self.alloc_oversized_smart_with::(wcp, f); + if self.is_oversized(wcp) { + return self.alloc_oversized_smart_with::(wcp, f); } self.refill_shared(wcp)?; } } - /// Cold oversized-smart-pointer fallback for - /// [`Self::impl_alloc_smart_with`]. - /// - /// Kept `#[inline(never)]` for the same reason as - /// [`Self::alloc_oversized_value_with`]: the fast-path body must - /// stay small enough for the public smart-pointer entry points to - /// inline; closure-free in `f` to avoid spilling the user closure's - /// environment to memory on the hot path. + /// Cold oversized-`Box` fallback for [`Self::impl_alloc_smart_with`]. #[cold] #[inline(never)] - fn alloc_oversized_smart_with T, const REGISTER_DROP: bool>( - &self, - wcp: usize, - f: F, - ) -> Result, AllocError> { + fn alloc_oversized_smart_with T>(&self, wcp: usize, f: F) -> Result, AllocError> { let (mutator, chunk_ptr) = self.acquire_oversized_shared_mutator(wcp)?; - let ptr = if const { REGISTER_DROP && mem::needs_drop::() } { - let ticket = mutator - .try_alloc_uninit_with_drop::() - .expect("dedicated oversized chunk sized to fit one value + drop entry"); - let chunk_ref = acquire_shared_chunk_ref::(chunk_ptr); - init_smart_slot_with_drop::(ticket, chunk_ref, f) - } else { - let ticket = mutator - .try_alloc_uninit::() - .expect("dedicated oversized chunk sized to fit one value"); - let chunk_ref = acquire_shared_chunk_ref::(chunk_ptr); - init_smart_slot::(ticket, chunk_ref, f) - }; + let ticket = mutator + .try_alloc_uninit::() + .expect("dedicated oversized chunk sized to fit one value"); + let chunk_ref = acquire_shared_chunk_ref::(chunk_ptr); + let ptr = init_smart_slot::(ticket, chunk_ref, f); // `mutator` drops here, releasing its `+1`. The smart-pointer // `chunk_ref` taken above owns the surviving `+1`. drop(mutator); Ok(ptr) } - /// Shared body for the uninit/zeroed `Arc>` family, - /// **for `T: Drop` only** (callers route `T: !Drop` to the ordinary - /// no-entry value-Arc path). - /// - /// Reserves a placeholder [`DropEntry`] alongside the value, writes the - /// uninitialized (or zeroed) `MaybeUninit` without committing the - /// entry, and eagerly publishes the chunk's drop-entry count so a later - /// [`Arc::>::assume_init`](crate::Arc) can locate and - /// commit it while the chunk is still the arena's active chunk. - #[inline] - #[cfg_attr(test, mutants::skip)] // ZST tag branch && → || ⇒ refill spin - pub(crate) fn impl_alloc_uninit_arc(&self, zeroed: bool) -> Result, A>, AllocError> - where - A: Send + Sync, - T: Send + Sync, - { - if const { mem::align_of::() >= MAX_SMART_PTR_ALIGN } { - return Err(AllocError); - } - loop { - // For ZST `T: Drop`, `size_of::() == 0`, so the bump - // cursor doesn't advance per allocation. Back-to-back - // `alloc_uninit_arc()` calls would otherwise - // produce placeholders that share `(value_offset, len = 1)`, - // and `commit_placeholder_drop_fn`'s lookup (which matches - // on that key) would re-commit the first placeholder on - // every subsequent `assume_init`, silently leaving the - // others uncommitted and skipping their destructors. - // - // Pre-reserve a 1-byte tag so each placeholder lands at a - // distinct `value_offset`. For ZST `T` the returned - // value-pointer points one byte past the previous cursor, - // which is fine because writes/reads/drops of a ZST touch - // zero bytes — the pointer's address only serves as the - // placeholder's lookup key. - if const { mem::size_of::() == 0 } && self.current_shared().try_alloc(1, 1).is_none() { - self.refill_shared(worst_case_payload::())?; - continue; - } - if let Some((uninit, chunk_ptr)) = self.try_reserve_shared_with_drop::() { - let chunk_ref = self.acquire_current_shared_chunk_ref(chunk_ptr); - let value = if zeroed { - mem::MaybeUninit::::zeroed() - } else { - mem::MaybeUninit::::uninit() - }; - let ptr = uninit.into_uninit_placeholder(value); - let _ = chunk_ref.forget(); - // Publish the just-written placeholder so `assume_init` sees it. - self.current_shared().publish_drop_count(); - // SAFETY: the chunk was bumped +1 for this `Arc` and a - // placeholder drop entry is reserved and published; - // `assume_init` commits the real shim once the value is set. - return Ok(unsafe { Arc::from_raw(ptr.cast::()) }); - } - let wcp = worst_case_payload::(); - if self.is_oversized_shared(wcp) { - return self.alloc_oversized_shared_with(wcp, |mutator, chunk_ptr| { - let ticket = mutator - .try_alloc_uninit_with_drop::() - .expect("dedicated oversized chunk sized to fit one value + drop entry"); - let chunk_ref = acquire_shared_chunk_ref::(chunk_ptr); - let value = if zeroed { - mem::MaybeUninit::::zeroed() - } else { - mem::MaybeUninit::::uninit() - }; - let ptr = ticket.into_uninit_placeholder(value); - let _ = chunk_ref.forget(); - // SAFETY: see the non-oversized branch above. The - // temporary mutator's `Drop` publishes the drop-entry - // count before this function returns, so `assume_init` - // can locate the placeholder via the chunk header. - unsafe { Arc::from_raw(ptr.cast::()) } - }); - } - self.refill_shared(wcp)?; - } - } - - /// Slice mirror of [`Self::impl_alloc_uninit_arc`], **for `T: Drop` - /// only**. Reserves a placeholder slice drop entry, fills the buffer - /// (uninitialized or zeroed) without committing, and publishes the - /// drop-entry count for a later - /// [`Arc::<[MaybeUninit]>::assume_init`](crate::Arc). - #[inline] - pub(crate) fn impl_alloc_uninit_slice_arc(&self, len: usize, zeroed: bool) -> Result], A>, AllocError> + /// Cold oversized-`Arc` fallback for [`Self::impl_alloc_arc_with`]. + #[cold] + #[inline(never)] + fn alloc_oversized_arc_with T>(&self, wcp: usize, f: F) -> Result, AllocError> where A: Send + Sync, T: Send + Sync, { - if const { mem::align_of::() >= MAX_SMART_PTR_ALIGN } { - return Err(AllocError); - } - reject_uninit_slice_arc_too_long(len)?; - // Refill hint accounts for prefix + payload alignment slack + - // payload bytes + drop entry. - let min_payload = super::alloc_prefixed::worst_case_thin_slice_payload::(len); - loop { - if let Some((uninit, chunk_ptr)) = self.try_reserve_shared_slice_with_drop::(len) { - let chunk_ref = self.acquire_current_shared_chunk_ref(chunk_ptr); - let ptr = uninit.into_uninit_slice_placeholder(zeroed); - let _ = chunk_ref.forget(); - self.current_shared().publish_drop_count(); - // SAFETY: as in `impl_alloc_uninit_arc`; the placeholder slice - // drop entry is reserved and published for `assume_init`. - return Ok(unsafe { Arc::from_raw(ptr.cast::()) }); - } - if self.is_oversized_shared(min_payload) { - return self.alloc_oversized_shared_with(min_payload, |mutator, chunk_ptr| { - let ticket = mutator - .try_alloc_uninit_slice_with_drop_prefixed::(len) - .expect("dedicated oversized chunk sized to fit slice + drop entry"); - let chunk_ref = acquire_shared_chunk_ref::(chunk_ptr); - let ptr = ticket.into_uninit_slice_placeholder(zeroed); - let _ = chunk_ref.forget(); - // SAFETY: see the non-oversized branch above. - unsafe { Arc::from_raw(ptr.cast::()) } - }); - } - self.refill_shared(min_payload)?; - } - } -} -/// Reject slice-arc uninit requests whose `len > u16::MAX`: the chunk -/// drop entry packs the element count into a `u16`, so a longer slice -/// can never be encoded and the caller's refill loop would otherwise -/// spin allocating chunks until OOM. -#[cfg_attr(test, mutants::skip)] // see `alloc_slice_ref::reject_drop_slice_too_long` -#[inline] -fn reject_uninit_slice_arc_too_long(len: usize) -> Result<(), AllocError> { - if len > u16::MAX as usize { - return Err(AllocError); + let (mutator, chunk_ptr) = self.acquire_oversized_shared_mutator(wcp)?; + let (ticket, _chunk) = mutator + .try_alloc_arc_value::() + .expect("dedicated oversized chunk sized to fit one Arc value + strong prefix"); + let chunk_ref = acquire_shared_chunk_ref::(chunk_ptr); + let ptr = init_smart_slot::(ticket, chunk_ref, f); + drop(mutator); + // SAFETY: the strong prefix was written (count = 1) and the chunk + // holds a fresh +1 for this `Arc` family. + Ok(unsafe { Arc::from_raw(ptr.cast::()) }) } - Ok(()) } /// writes the value produced by `f` into the reservation. Factored out @@ -771,20 +639,6 @@ fn init_smart_slot T>(uninit: Uninit<'_, uninit.init_raw(value) } -/// Parallel to [`init_smart_slot`] but consumes a -/// [`UninitDrop`](crate::internal::uninit::UninitDrop) ticket so the -/// value's `Drop` runs from the chunk's drop-list at teardown. -#[inline(always)] -fn init_smart_slot_with_drop T>( - uninit: UninitDrop<'_, T>, - chunk_ref: ChunkRef, - f: F, -) -> NonNull { - let value = f(); - let _ = chunk_ref.forget(); - uninit.init_raw(value) -} - /// Bumps the strong refcount on `chunk_ptr` and returns a /// [`ChunkRef`](crate::internal::chunk_ref::ChunkRef) that owns the /// fresh +1. Shared by [`Arena::init_box_slot`] and diff --git a/crates/multitude/src/arena/mod.rs b/crates/multitude/src/arena/mod.rs index f348407d0..b9af72dc2 100644 --- a/crates/multitude/src/arena/mod.rs +++ b/crates/multitude/src/arena/mod.rs @@ -319,26 +319,20 @@ impl Arena { self.relocations.set(self.relocations.get() + 1); } - /// Reset the arena to a fresh state, ready for a new allocation phase. + /// Reset the arena's local-chunk state for a new allocation phase: + /// the current local chunk and all retired local chunks are released + /// (running any pending drop entries) and their bytes returned to the + /// chunk cache. /// /// Given that this takes `&mut self`, the borrow checker ensures no - /// outstanding simple references can still be live. Outstanding `Arc`s - /// from shared chunks continue to hold their backing chunks alive - /// independently. - /// - /// The reset is lazy: the current chunk slots are returned to the - /// empty state and a fresh chunk is acquired on the first subsequent - /// allocation, mirroring the lazy semantics of [`Self::new`]. + /// outstanding simple references can still be live. The currently + /// installed shared chunk is **not** detached or rewound — shared + /// allocations continue on it — and outstanding `Arc`s from shared + /// chunks keep their backing chunks alive independently. #[cold] pub fn reset(&mut self) { - // Reconcile the surplus on the current shared chunk before - // the mutator's Drop fires its own dec_ref — keeps the - // chunk's atomic refcount in sync with the number of escaped - // handles. - self.reconcile_shared_surplus(); self.retired_local.clear(); *self.current_local.get_mut() = ChunkMutator::>::empty(); - *self.current_shared.get_mut() = ChunkMutator::>::empty(); } /// Returns a [`ZerocopyView`](crate::zerocopy::ZerocopyView) @@ -381,24 +375,19 @@ impl Arena { self.provider.config().max_normal_alloc() } - /// True iff a shared-chunk allocation request of `min_payload` bytes - /// must be routed to a one-shot oversized chunk instead of the normal - /// size-class pool. Callers that detect this case should use - /// [`Self::alloc_oversized_shared_with`] rather than - /// [`Self::refill_shared`]. + /// True iff an allocation request of `min_payload` bytes must be routed + /// to a one-shot oversized chunk instead of the normal size-class pool. + /// Callers that detect this case should use the matching oversized path + /// ([`Self::alloc_oversized_shared_with`] / + /// [`Self::alloc_oversized_local_with`]) rather than the normal refill. /// - /// `ArenaBuilder` caps `max_normal_alloc` at `max_bump_extent` - /// (`MAX_CHUNK_BYTES - header_size`), so `min_payload <= - /// max_normal_alloc` always implies `header + min_payload <= - /// MAX_CHUNK_BYTES` — a single threshold check is enough. + /// The threshold is the same for local and shared chunks: `ArenaBuilder` + /// caps `max_normal_alloc` at `max_bump_extent` (`MAX_CHUNK_BYTES - + /// header_size`), so `min_payload <= max_normal_alloc` always implies + /// `header + min_payload <= MAX_CHUNK_BYTES` — a single threshold check + /// is enough for both flavors. #[inline] - pub(crate) fn is_oversized_shared(&self, min_payload: usize) -> bool { - min_payload > self.max_normal_alloc() - } - - /// Local mirror of [`Self::is_oversized_shared`]. - #[inline] - pub(crate) fn is_oversized_local(&self, min_payload: usize) -> bool { + pub(crate) fn is_oversized(&self, min_payload: usize) -> bool { min_payload > self.max_normal_alloc() } @@ -444,7 +433,7 @@ impl Arena { /// `min_payload` bytes. The previous mutator is dropped immediately — /// any outstanding `Arc`s independently keep the prior chunk alive. /// - /// The caller must have verified `!self.is_oversized_shared(min_payload)` + /// The caller must have verified `!self.is_oversized(min_payload)` /// before invoking this; oversized requests must go through /// [`Self::alloc_oversized_shared_with`] so they don't replace (and /// thus waste) the current chunk. @@ -461,19 +450,15 @@ impl Arena { // the replacement so a now-unreferenced chunk frees its bytes and // lets the new reservation reuse the budget. self.current_shared.drop_replace(ChunkMutator::>::empty()); - // The previous `drop_replace` may have run user-supplied drop - // shims (chunk teardown). Those can re-enter the arena via - // `alloc_arc`/`alloc_box` which call `refill_shared` - // recursively and install a fresh chunk into `current_shared`. - // Honor that installation as-is: returning `Ok` lets the - // caller's retry loop re-attempt the allocation against the - // reentry-installed chunk. If it doesn't fit `min_payload`, - // the caller will simply call us again and we'll reconcile + - // replace that chunk in turn (its own `local_shared_count` - // already tracks any nested handouts). - if self.current_shared.borrow().chunk_ptr().is_some() { - return Ok(()); - } + // Unlike `refill_local`, this `drop_replace` cannot re-enter the + // arena: shared chunks register no drop entries, and a refcount-zero + // shared chunk is cached (never deallocated) here, so its teardown + // runs no user code. `current_shared` is therefore always empty at + // this point. + debug_assert!( + self.current_shared.borrow().chunk_ptr().is_none(), + "shared drop_replace cannot install a chunk: shared teardown runs no user code", + ); let new_chunk = self.provider.acquire_shared(min_payload, self.next_shared_class.get())?; // Pre-credit a large surplus of refs on the new chunk so the // per-allocation hot path can just bump a non-atomic local diff --git a/crates/multitude/src/arena/reserve.rs b/crates/multitude/src/arena/reserve.rs index 4a93d6f2f..43d1687d7 100644 --- a/crates/multitude/src/arena/reserve.rs +++ b/crates/multitude/src/arena/reserve.rs @@ -123,40 +123,14 @@ impl Arena { Some(unsafe { (ticket.rebind(), mutator.chunk_ptr_unchecked()) }) } - /// Try to reserve uninitialized storage for one `T` plus a drop - /// entry slot in the current shared chunk. - #[inline(always)] - #[cfg_attr(test, mutants::skip)] // see `try_reserve_shared` - pub(crate) fn try_reserve_shared_with_drop(&self) -> Option<(UninitDrop<'_, T>, NonNull>)> { - let mutator = self.current_shared(); - let ticket = mutator.try_alloc_uninit_with_drop::()?; - // SAFETY: see `try_reserve_shared`. - Some(unsafe { (ticket.rebind(), mutator.chunk_ptr_unchecked()) }) - } - /// Try to reserve uninitialized storage for `len` consecutive `T`s - /// in the current shared chunk. + /// in the current shared chunk, taking the precomputed payload byte + /// size; the slice-copy fast paths hold an existing `&[T]` and + /// compute `size_of_val(src)` once outside the refill loop, sparing + /// the inner reservation a `checked_mul` overflow guard. /// /// Includes a thin-pointer DST length prefix immediately before /// the payload — see [`ChunkMutator::try_alloc_uninit_slice_prefixed`]. - #[inline(always)] - #[cfg_attr(test, mutants::skip)] // see `try_reserve_shared` - #[allow( - clippy::type_complexity, - reason = "ticket + chunk-ptr tuple is the natural shape; type alias would obscure rather than clarify" - )] - pub(crate) fn try_reserve_shared_slice(&self, len: usize) -> Option<(Uninit<'_, [T]>, NonNull>)> { - let mutator = self.current_shared(); - let ticket = mutator.try_alloc_uninit_slice_prefixed::(len)?; - // SAFETY: see `try_reserve_shared`. - Some(unsafe { (ticket.rebind(), mutator.chunk_ptr_unchecked()) }) - } - - /// Like [`Self::try_reserve_shared_slice`] but takes the precomputed - /// payload byte size; the slice-copy fast paths hold an existing - /// `&[T]` and compute `size_of_val(src)` once outside the refill - /// loop, sparing the inner reservation a `checked_mul` overflow - /// guard. /// /// # Safety /// @@ -180,19 +154,51 @@ impl Arena { Some(unsafe { (ticket.rebind(), mutator.chunk_ptr_unchecked()) }) } - /// Try to reserve uninitialized storage for `len` consecutive `T`s - /// plus a drop entry slot in the current shared chunk. Includes a - /// thin-pointer DST length prefix immediately before the payload. + /// Try to reserve storage for one strong-prefixed `Arc` value in + /// the current shared chunk. The returned ticket addresses the + /// payload (the strong count is already initialized to `1`). + #[inline(always)] + #[cfg_attr(test, mutants::skip)] // see `try_reserve_shared` + pub(crate) fn try_reserve_arc_value(&self) -> Option<(Uninit<'_, T>, NonNull>)> { + let (ticket, chunk) = self.current_shared().try_alloc_arc_value::()?; + // SAFETY: see `try_reserve_shared`. + Some(unsafe { (ticket.rebind(), chunk) }) + } + + /// Slice form of [`Self::try_reserve_arc_value`]: reserves a strong + /// prefix, slice-length metadata, and `len` `T`s. #[inline(always)] #[cfg_attr(test, mutants::skip)] // see `try_reserve_shared` #[allow( clippy::type_complexity, reason = "ticket + chunk-ptr tuple is the natural shape; type alias would obscure rather than clarify" )] - pub(crate) fn try_reserve_shared_slice_with_drop(&self, len: usize) -> Option<(UninitDrop<'_, [T]>, NonNull>)> { - let mutator = self.current_shared(); - let ticket = mutator.try_alloc_uninit_slice_with_drop_prefixed::(len)?; + pub(crate) fn try_reserve_arc_slice(&self, len: usize) -> Option<(Uninit<'_, [T]>, NonNull>)> { + let (ticket, chunk) = self.current_shared().try_alloc_arc_slice::(len)?; // SAFETY: see `try_reserve_shared`. - Some(unsafe { (ticket.rebind(), mutator.chunk_ptr_unchecked()) }) + Some(unsafe { (ticket.rebind(), chunk) }) + } + + /// Like [`Self::try_reserve_arc_slice`] but takes the precomputed + /// payload byte size (held by callers with a live `&[T]`). + /// + /// # Safety + /// + /// `payload_bytes` must equal `size_of::() * len` (without overflow). + #[inline(always)] + #[cfg_attr(test, mutants::skip)] // see `try_reserve_shared` + #[allow( + clippy::type_complexity, + reason = "ticket + chunk-ptr tuple is the natural shape; type alias would obscure rather than clarify" + )] + pub(crate) unsafe fn try_reserve_arc_slice_with_size( + &self, + len: usize, + payload_bytes: usize, + ) -> Option<(Uninit<'_, [T]>, NonNull>)> { + // SAFETY: forwarded to the caller. + let (ticket, chunk) = unsafe { self.current_shared().try_alloc_arc_slice_with_size::(len, payload_bytes) }?; + // SAFETY: see `try_reserve_shared`. + Some(unsafe { (ticket.rebind(), chunk) }) } } diff --git a/crates/multitude/src/box.rs b/crates/multitude/src/box.rs index e06c5f857..4956703fd 100644 --- a/crates/multitude/src/box.rs +++ b/crates/multitude/src/box.rs @@ -33,9 +33,10 @@ use crate::thin_smart_ptr_common::impl_thin_smart_ptr_common; /// /// Like [`Arc`](crate::Arc), `Box` keeps its containing chunk alive by /// holding a +1 refcount, so it can outlive the arena it came from and -/// survives [`Arena::reset`](crate::Arena::reset). Unlike `Arc`, the -/// `T` destructor runs eagerly when the `Box` itself is dropped -/// (single owner), not at chunk teardown. +/// survives [`Arena::reset`](crate::Arena::reset), and it runs `T`'s +/// destructor eagerly — never deferred to chunk teardown. As the sole +/// owner, `Box` drops `T` when the `Box` itself is dropped, whereas +/// `Arc` drops `T` when its last clone is dropped. /// /// # `Send` and `Sync` /// diff --git a/crates/multitude/src/internal/arena_buf.rs b/crates/multitude/src/internal/arena_buf.rs index b5bb357d0..24f8a430e 100644 --- a/crates/multitude/src/internal/arena_buf.rs +++ b/crates/multitude/src/internal/arena_buf.rs @@ -3,16 +3,10 @@ //! Growable, arena-backed buffer of `T`. //! -//! `ArenaBuf` is the internal storage primitive that backs the public -//! `Vec<'a, T, A>`, `String<'a, A>`, and `Utf16String<'a, A>` types. It owns -//! an in-chunk pointer plus a length and capacity, and exposes safe slice -//! accessors. Growth (in-place when possible, copy-to-new-allocation -//! otherwise) is mediated by [`ChunkMutator`](super::ChunkMutator) so this -//! type stays free of allocator concerns. -//! -//! All `unsafe` related to the `(ptr, len, cap)` invariant of an -//! arena-backed buffer lives in this file. Higher layers (`vec/*`, -//! `strings/*`) compose `ArenaBuf` via its safe methods. +//! Backing storage for `Vec<'a, T, A>`, `String<'a, A>`, and +//! `Utf16String<'a, A>`. Growth is mediated by +//! [`ChunkMutator`](super::chunk_mutator::ChunkMutator); this file owns the unsafe +//! `(ptr, len, cap)` invariant. use core::iter::FusedIterator; use core::marker::PhantomData; @@ -43,10 +37,8 @@ impl ArenaBuf<'_, T> { /// /// # Safety /// - /// The `(ptr, len, cap)` triple must satisfy the type's invariants for - /// some live arena chunk that outlives `'a` — e.g. parts taken from - /// another `ArenaBuf` (possibly reinterpreted, as in - /// [`Vec::into_flattened`](crate::vec::Vec::into_flattened)). + /// `(ptr, len, cap)` must satisfy the type invariants for storage in a + /// live arena chunk that outlives `'a`. #[inline] pub(crate) const unsafe fn from_raw_parts(ptr: NonNull, len: usize, cap: usize) -> Self { Self { @@ -59,9 +51,8 @@ impl ArenaBuf<'_, T> { } impl<'a, T> ArenaBuf<'a, T> { - /// Creates an empty buffer with no backing storage. ZSTs are - /// initialized with `cap = usize::MAX` since no real storage is - /// ever needed for them. + /// Creates an empty buffer. ZST buffers use `cap = usize::MAX` + /// because they need no backing storage. #[inline] pub(crate) const fn new() -> Self { let cap = if mem::size_of::() == 0 { usize::MAX } else { 0 }; @@ -171,10 +162,8 @@ impl<'a, T> ArenaBuf<'a, T> { unsafe { self.replace_buffer_raw(new_ptr, new_cap) }; } - /// Raw-pointer variant of [`Self::replace_buffer`]. Used by the - /// oversized-chunk growth path in [`crate::vec::Vec`], where the - /// fresh reservation comes from a temporary [`ChunkMutator`] whose - /// ticket lifetime can't be rebound to `'a` through the public API. + /// Raw-pointer variant of [`Self::replace_buffer`] for oversized + /// growth through a temporary [`ChunkMutator`](super::chunk_mutator::ChunkMutator). /// /// # Safety /// @@ -329,12 +318,8 @@ impl<'a, T> ArenaBuf<'a, T> { /// Splits the buffer at `at`, keeping `[0, at)` in `self` and /// returning a new buffer that owns `[at, len)`. /// - /// The returned buffer shares the same chunk storage as `self`; no - /// elements are copied. After the split, `self`'s capacity is capped - /// at `at` (so a later push reallocates rather than overwriting the - /// tail), and the tail buffer covers the remaining capacity. This is - /// sound because chunk storage is reclaimed only at arena teardown, - /// which outlives both buffers (lifetime `'a`). + /// No elements are copied. `self.cap` is capped at `at`, and the returned + /// tail owns the remaining capacity in the same arena chunk. /// /// Caller must ensure `at <= len`. #[inline] @@ -368,13 +353,9 @@ impl<'a, T> ArenaBuf<'a, T> { /// Attempts to absorb `other`'s storage in O(1) when it directly /// abuts the end of `self`'s storage in the same chunk. /// - /// Succeeds only when `self` is exactly full (`len == cap`, so there - /// is no uninitialized gap before `other`) and `other`'s buffer - /// begins exactly at `self`'s one-past-the-end address. On success, - /// `self` grows to cover `other`'s elements and capacity, and `other` - /// is reset to empty without dropping its elements (ownership moves - /// to `self`). Returns `false` (leaving both buffers untouched) when - /// the buffers are not adjacent. Not used for ZSTs. + /// Succeeds only when `self` is full and `other` starts at `self`'s + /// one-past-end address. On success, `self` owns both ranges and + /// `other` is reset to empty without dropping elements. #[inline] pub(crate) fn try_absorb_adjacent(&mut self, other: &mut Self) -> bool { debug_assert!(mem::size_of::() != 0, "try_absorb_adjacent: not for ZSTs"); @@ -382,15 +363,8 @@ impl<'a, T> ArenaBuf<'a, T> { return false; } let self_end = self.ptr.as_ptr().wrapping_add(self.cap); - // The exact pointer-equality test below is also a proof that `other` - // lives in the *same chunk* as `self` (so `self.ptr`'s chunk-wide - // provenance legitimately covers the absorbed region). A distinct - // chunk's payload always begins `header_size > 0` bytes after its - // base, and chunk allocations never overlap, so a buffer in another - // chunk can never start exactly at `self`'s one-past-the-end address: - // that would require the other chunk's base to fall *inside* `self`'s - // chunk. Hence `ptr::eq(self_end, other.ptr)` can only hold when both - // buffers were carved from one chunk's bump region. + // Pointer equality proves same-chunk adjacency: another chunk's + // payload cannot begin exactly at this chunk's one-past-end address. if !ptr::eq(self_end.cast_const(), other.ptr.as_ptr().cast_const()) { return false; } @@ -433,25 +407,14 @@ impl<'a, T> ArenaBuf<'a, T> { } } - /// Returns an owning, double-ended iterator that yields the live - /// elements in order, leaving the buffer empty. The iterator's - /// `Drop` drops any elements that were not yielded. The iterator - /// is bound to the arena lifetime `'a` of the buffer. + /// Returns an owning iterator over the live elements and leaves the + /// buffer empty. Dropping the iterator drops any unyielded elements. /// /// # Caller contract /// - /// The returned [`DrainAll`] is deliberately bound to the arena - /// lifetime `'a` rather than to the `&mut self` borrow, so that an - /// owning [`IntoIter`](crate::vec::IntoIter) can hold it past the - /// `ManuallyDrop` that produced it. Because the borrow checker - /// therefore does **not** tie the iterator to this buffer, the - /// caller MUST NOT touch `self` (push, grow, drain again, drop the - /// elements, etc.) until the returned iterator has been fully - /// consumed or dropped: the iterator keeps a *copy* of `self.ptr` - /// and still logically owns `[0, len)`, so any concurrent write or - /// re-read of those slots would alias and double-own the elements. - /// All current callers consume the iterator immediately and never - /// reuse the buffer afterwards. + /// [`DrainAll`] is bound to arena lifetime `'a`, not the `&mut self` + /// borrow. The caller must not touch `self` until the iterator is + /// consumed or dropped, because the iterator owns `[0, len)`. #[inline] pub(crate) fn drain_all(&mut self) -> DrainAll<'a, T> { let len = self.len; @@ -467,9 +430,7 @@ impl<'a, T> ArenaBuf<'a, T> { } } -/// Owning iterator over every element of an [`ArenaBuf`], in order. -/// Bound to the arena lifetime `'a` rather than to the buffer that -/// produced it, so the iterator can outlive the `ArenaBuf`. +/// Owning iterator over an [`ArenaBuf`]'s live elements. pub(crate) struct DrainAll<'a, T> { ptr: NonNull, head: usize, diff --git a/crates/multitude/src/internal/chunk.rs b/crates/multitude/src/internal/chunk.rs index 9ae7a4636..59245c516 100644 --- a/crates/multitude/src/internal/chunk.rs +++ b/crates/multitude/src/internal/chunk.rs @@ -5,18 +5,9 @@ /// A contiguous block of memory that an arena carves bump allocations out of. /// -/// Both [`LocalChunk`](super::LocalChunk) and [`SharedChunk`](super::SharedChunk) -/// implement this trait. They differ in how the chunk and its allocations are -/// owned and shared: -/// -/// - `LocalChunk` is used for allocations whose lifetime is tied to the arena -/// itself and never crosses thread boundaries; no synchronization is needed. -/// - `SharedChunk` is used for allocations whose lifetime can outlive the -/// arena (reference-counted handles), and uses atomics for cross-thread -/// refcounting. -/// -/// Implementors are dynamically-sized types: the struct ends with a `[u8]` -/// payload that holds the actual bump-allocation buffer. +/// Implemented by [`LocalChunk`](super::local_chunk::LocalChunk) and +/// [`SharedChunk`](super::shared_chunk::SharedChunk). Both are DSTs with a payload tail; +/// local chunks are arena-thread confined, shared chunks use atomic refcounts. pub(crate) trait Chunk { /// Returns the chunk's payload capacity in bytes (i.e. `data.len()`). fn capacity(&self) -> usize; @@ -33,12 +24,4 @@ pub(crate) trait Chunk { /// responsible for tearing down the chunk (running drop entries and /// routing the backing memory back to the provider or deallocator). fn dec_ref(&self) -> bool; - - /// Returns the number of drop entries currently stored at the tail of the - /// chunk. - fn drop_entry_count(&self) -> usize; - - /// Sets the number of drop entries currently stored at the tail of the - /// chunk. - fn set_drop_entry_count(&self, count: usize); } diff --git a/crates/multitude/src/internal/chunk_alloc.rs b/crates/multitude/src/internal/chunk_alloc.rs index 6830ebe32..3bb196eea 100644 --- a/crates/multitude/src/internal/chunk_alloc.rs +++ b/crates/multitude/src/internal/chunk_alloc.rs @@ -1,36 +1,24 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -//! Shared raw-allocation helpers used by `LocalChunk::allocate` and -//! `SharedChunk::allocate`. Both build a `header + payload_size` byte -//! allocation aligned for the chunk header, then write fields through a -//! freshly-constructed fat DST pointer. +//! Shared raw-allocation helpers for chunk `allocate` / `destroy` paths. +//! They centralize layout size and alignment. use core::alloc::Layout; use core::ptr::NonNull; use allocator_api2::alloc::{AllocError, Allocator}; -/// Computes the canonical `Layout` for a chunk allocation, the single -/// source of truth shared by every `allocate`/`destroy` pair so the two -/// can never disagree (a mismatched `deallocate` layout is UB). +/// Computes the canonical `Layout` for a chunk allocation. /// -/// Two *distinct* alignments are at play and must not be conflated: +/// Two alignments are distinct: /// /// * `value_align` — the chunk type's own alignment (`align_of::()`, -/// ignoring the `[UnsafeCell]` tail which is align-1). Rust rounds -/// the size of any value up to a multiple of its alignment, so a -/// reference built from the fat pointer covers `round_up(total, -/// value_align)` bytes. The allocation's **size** is rounded up to this -/// so the reference's footprint matches the allocation exactly (a -/// shortfall is UB, caught by Miri; an excess silently over-allocates). +/// ignoring the align-1 tail). The allocation size is rounded up to this. /// /// * `base_align` — the alignment of the allocation's **base address**, -/// which may be far larger than `value_align` (e.g. `CHUNK_ALIGN` = -/// 64 KiB for shared chunks, so the chunk header is recoverable by -/// masking the low bits of any interior pointer). This governs only the -/// `Layout` alignment; the **size is never rounded up to it**, otherwise -/// every shared chunk would inflate to a full `CHUNK_ALIGN`. +/// which may be much larger for shared chunks. This governs only +/// `Layout::align`; the size is not rounded up to it. /// /// `base_align >= value_align` and both must be powers of two. #[allow( @@ -47,13 +35,9 @@ pub(crate) fn chunk_layout(header_size: usize, payload_size: usize, value_align: Layout::from_size_align(rounded, base_align).map_err(|_| AllocError) } -/// The exact byte footprint of a chunk allocation — the rounded -/// `Layout::size()` that [`chunk_layout`] produces. This is the single -/// source of truth for both the allocation `Layout` and the provider's -/// byte-budget accounting, so the two can never disagree (accounting that -/// used the unrounded `header_size + payload_size` would under-report the -/// real allocator footprint when `header_size + payload_size` is not a -/// multiple of `value_align`, e.g. for oversized chunks). +/// Exact byte footprint of a chunk allocation: the rounded `Layout::size()` +/// produced by [`chunk_layout`]. Used for both allocation and byte-budget +/// accounting. #[inline] pub(crate) fn chunk_alloc_size(header_size: usize, payload_size: usize, value_align: usize) -> Result { debug_assert!(value_align.is_power_of_two(), "value_align must be a power of two"); @@ -62,15 +46,10 @@ pub(crate) fn chunk_alloc_size(header_size: usize, payload_size: usize, value_al Ok(total.checked_add(mask).ok_or(AllocError)? & !mask) } -/// Allocate a `header + payload_size` byte allocation whose base address -/// is `base_align`-aligned and whose size is rounded up to `value_align` -/// (see [`chunk_layout`]). +/// Allocates a chunk backing allocation using [`chunk_layout`]. /// -/// Returns `(raw_u8_ptr, layout)` on success. The pointer carries -/// provenance over the full allocation and is suitable as the data field -/// of a slice-DST fat pointer with metadata `payload_size`. The layout is -/// the exact one passed to `allocator.allocate`, suitable for a matching -/// `deallocate` call (reproduced by [`chunk_layout`] at destroy time). +/// Returns `(raw_u8_ptr, layout)`. The pointer covers the full allocation and +/// can be used as the data field of a slice-DST fat pointer. /// /// On size-overflow or end-of-address-space overflow, the allocation is /// freed and `AllocError` is returned. @@ -103,18 +82,12 @@ pub(crate) fn alloc_chunk_raw( mod tests { use super::chunk_layout; - /// `chunk_layout` must round the allocation *size* up to `value_align`. - /// Pins the exact round-up so the `value_align - 1` mask can't be - /// mutated to `value_align + 1` or `value_align / 1` — both corrupt the - /// rounding for totals that aren't already `value_align`-aligned (the - /// size-class tests use pre-aligned totals, so they can't catch this). + /// `chunk_layout` must round allocation size up to `value_align`. #[test] fn rounds_size_up_to_value_align() { - // A large power-of-two base (mirrors shared chunks); it governs the - // layout *alignment* only and must not affect the size rounding. + // Large base alignment must not affect size rounding. const BASE: usize = 65_536; - // (header, payload, value_align, expected_size). Totals are chosen - // to be NON-multiples of `value_align` so the mask actually rounds. + // Non-multiple totals force the rounding mask to matter. let cases = [ (10_usize, 7_usize, 8_usize, 24_usize), // total 17 -> 24 (34, 16, 8, 56), // total 50 -> 56 diff --git a/crates/multitude/src/internal/chunk_mutator.rs b/crates/multitude/src/internal/chunk_mutator.rs index 365e42bb7..473a72304 100644 --- a/crates/multitude/src/internal/chunk_mutator.rs +++ b/crates/multitude/src/internal/chunk_mutator.rs @@ -3,14 +3,10 @@ //! Bump allocator over a single chunk. //! -//! [`ChunkMutator`] owns one strong reference to a chunk and exposes safe -//! allocation primitives that hand out [`InChunk`] pointers, [`Uninit`] -//! tickets, and [`UninitDrop`] tickets. All `unsafe` interaction with the -//! chunk's raw memory is concentrated here. -//! -//! When the mutator is dropped it decrements the chunk's refcount; if that -//! drops the count to zero it replays pending drop entries and routes the -//! chunk back through [`ChunkOps::teardown_and_release`]. +//! [`ChunkMutator`] owns one strong chunk reference and hands out +//! [`InChunk`], [`Uninit`], and [`UninitDrop`] tickets. Drop publishes pending +//! drop entries, releases the refcount, and may trigger +//! [`ChunkOps::teardown_and_release`]. use core::cell::Cell; use core::ptr::{self, NonNull}; @@ -24,37 +20,21 @@ use super::uninit::{Uninit, UninitDrop}; /// Owns one strong reference to a chunk and tracks the bump cursor and the /// growing-down drop-entry top. /// -/// Hot-path layout is intentionally minimal: only `chunk`, `bump`, and -/// `drop_top` are stored. The payload start/end addresses are re-derived -/// from `chunk` in the cold paths that need them (capacity reporting, -/// drop-publish on `Drop`, drop-entry rollback, value-offset encoding for -/// drop-requiring types). +/// Hot-path layout stores only `chunk`, `bump`, and `drop_top`; cold paths +/// re-derive payload bounds from `chunk`. pub(crate) struct ChunkMutator { chunk: Option>, - /// Bump cursor stored as a pointer so that derivations preserve - /// the chunk's full provenance under Stacked / Tree Borrows. - /// Storing it as a `usize` and recovering the pointer via - /// `addr as *mut u8` would produce a no-provenance pointer that - /// would fail Miri whenever a derived value pointer is later read - /// back (e.g., during drop-entry replay). + /// Bump cursor stored as a pointer to preserve chunk provenance under + /// Stacked / Tree Borrows. bump: Cell>, /// Top of the drop-entry region (entries grow downward). Same /// pointer-preserves-provenance rationale as `bump`. drop_top: Cell>, } -// SAFETY: `ChunkMutator` owns one strong refcount on its chunk and -// accesses its payload via interior-mutable cells; the underlying -// `NonNull` is the only field that prevents auto-derivation of -// `Send`. Both implementors of `ChunkOps` (`LocalChunk`, `SharedChunk`) -// support cross-thread ownership transfer of a single owning reference -// (atomic refcount for shared, single-thread invariant for local that -// follows the owning thread). The `?Sized` bound matters: both chunk -// types are DSTs with a `[UnsafeCell]` tail, so a `C: Sized` bound -// would exclude every real instantiation and silently break -// `Arena: Send` (because the `Send` impl wouldn't apply). `Sync` is -// intentionally NOT implemented: the `Cell` fields make the mutator -// unsuitable for concurrent shared access. +// SAFETY: the mutator owns one strong chunk ref and moves that ownership +// across threads only when `C: Send`. `LocalChunk` follows the owning thread; +// `SharedChunk` uses atomics. The `Cell` fields intentionally make this `!Sync`. unsafe impl Send for ChunkMutator {} impl ChunkMutator { @@ -82,17 +62,13 @@ impl ChunkMutator { } } - /// Builds an empty mutator that owns no chunk. Every `try_alloc*` - /// returns `None`, so the arena's hot path falls through to a - /// `refill_*` call that installs a real chunk. Used to defer the - /// first chunk allocation until the first user-visible alloc. + /// Builds an empty mutator. Every `try_alloc*` returns `None`, deferring + /// chunk allocation until the first user-visible allocation. pub(crate) const fn empty() -> Self { Self { chunk: None, - // Sentinels: `bump > drop_top` so every `try_alloc*` falls - // through to the refill path via the bound check without - // any explicit `self.chunk?` test. Both `dangling()` values - // are non-null, fit in `isize`, and are never dereferenced. + // Sentinels: `bump > drop_top`, so bound checks fail without an + // explicit `self.chunk?`. These pointers are never dereferenced. bump: Cell::new(NonNull::::dangling().cast::()), drop_top: Cell::new(NonNull::::dangling()), } @@ -110,12 +86,8 @@ impl ChunkMutator { } /// Free byte count between the bump cursor and the drop-entry top. - /// Used by stats accounting at retire (`ChunkMutator::Drop` and - /// `ChunkMutator::forget_into_chunk`) and by `Arena::stats` to fold - /// the currently-active chunks' unused tails into - /// `ArenaStats::wasted_tail_bytes`. The empty-mutator sentinel - /// returns 0 (saturating). The value is reported as `u32` since - /// chunk capacity is bounded well below `u32::MAX`. + /// Stats helper; empty-mutator sentinels saturate to 0. Reported as `u32` + /// because chunk capacity is far below `u32::MAX`. #[cfg(feature = "stats")] #[inline] pub(crate) fn wasted_tail_for_stats(&self) -> u32 { @@ -128,9 +100,8 @@ impl ChunkMutator { /// /// # Panics /// - /// Panics on the empty mutator. Only the dead-code `capacity` / - /// `free_bytes` helpers can hit that path; all hot-path callers - /// invoke this after a successful `try_reserve_*`. + /// Panics on the empty mutator; hot-path callers invoke this only after a + /// successful reservation. #[inline] fn payload_range(&self) -> (usize, usize) { let chunk = self.chunk.expect("payload_range: chunk must be set"); @@ -150,11 +121,17 @@ impl ChunkMutator { // SAFETY: caller asserts `chunk` is live. let (start, cap) = unsafe { (C::payload_ptr(chunk), chunk.as_ref().capacity()) }; let start_addr = start.as_ptr() as usize; - // Align the reported end down to `align_of::()` so the - // drop-entry region (entries grow down from this point) stays - // naturally aligned regardless of payload-start alignment. - let entry_align = mem::align_of::(); - let end_addr = (start_addr + cap) & !(entry_align - 1); + let end_addr = if C::REGISTERS_DROPS { + // Align the reported end down to `align_of::()` so the + // drop-entry region (entries grow down from this point) stays + // naturally aligned regardless of payload-start alignment. + let entry_align = mem::align_of::(); + (start_addr + cap) & !(entry_align - 1) + } else { + // Flavors that never register drop entries let the bump cursor use + // the whole payload — no tail region is reserved. + start_addr + cap + }; (start_addr, end_addr) } @@ -186,11 +163,9 @@ impl ChunkMutator { /// /// # Overflow safety /// - /// `cur_addr` is asserted to fit in `isize` so the alignment math - /// has no overflow guard; chunks live in the lower half of the - /// address space on every realistic 64-bit platform. `size` is *not* - /// constrained, so the `aligned_addr + size` step uses `checked_add` - /// to refuse oversized requests. + /// On 64-bit targets `cur_addr` is asserted to fit in `isize`, allowing + /// overflow-free alignment math. `aligned_addr + size` still uses + /// `checked_add` because `size` is caller-controlled. #[inline] // Mutation testing is suppressed: any mutation that always rejects // sends callers into an infinite refill spin (OOM). @@ -204,12 +179,8 @@ impl ChunkMutator { // SAFETY: see the overflow-safety note above. unsafe { hint::assert_unchecked(cur_addr > 0); - // On 64-bit targets every valid address is far below - // `isize::MAX`, so this also holds and lets the optimizer treat - // the align-up below as overflow-free. It is only asserted where - // guaranteed: on a target where an address may exceed - // `isize::MAX` (e.g. 32-bit upper half) the assertion could be - // false (→ UB), so we drop the hint and use checked arithmetic. + // On 64-bit targets this lets the optimizer treat align-up as + // overflow-free. Narrower targets use checked arithmetic. #[cfg(target_pointer_width = "64")] hint::assert_unchecked(isize::try_from(cur_addr).is_ok()); } @@ -217,16 +188,9 @@ impl ChunkMutator { let aligned_addr = (cur_addr + (align - 1)) & !(align - 1); #[cfg(not(target_pointer_width = "64"))] let aligned_addr = (cur_addr.checked_add(align - 1)?) & !(align - 1); - // For zero-size allocations, probe one extra byte: a ZST alloc - // at the chunk tail (`cur_addr == drop_top_addr`) would otherwise - // return a value pointer equal to `chunk_base + CHUNK_ALIGN` for - // the largest chunk class, which masks to the next 64 KiB tile - // and breaks the smart-pointer chunk-header recovery. Non-zero- - // size allocs already satisfy this because the last payload byte - // sits at `end - 1`, strictly inside `[chunk_base, drop_top)`. - // `size.max(1)` is a branchless CMOV; the checked_add cannot - // overflow under the `cur_addr + (align - 1)` debug assertion - // above. + // ZST smart-pointer values must still point strictly inside the chunk; + // a tail one-past pointer would mask to the next 64 KiB tile. + // `size.max(1)` probes that byte without changing the ZST bump. let probe_end = aligned_addr.checked_add(size.max(1))?; if probe_end > drop_top_addr { return None; @@ -262,29 +226,6 @@ impl ChunkMutator { Some((in_chunk, unsafe { self.chunk_ptr_unchecked() })) } - /// [`Self::try_alloc_thin_dst_smart`] paired with the owning chunk - /// pointer. See [`Self::try_alloc_with_chunk`]. - #[inline] - #[cfg_attr(test, mutants::skip)] // see `try_alloc` - #[allow( - clippy::type_complexity, - reason = "matches try_alloc_thin_dst_smart's shape plus the chunk pointer" - )] - #[cfg(feature = "dst")] - pub(crate) fn try_alloc_thin_dst_smart_with_chunk( - &self, - total: usize, - align: usize, - payload_offset: usize, - needs_drop: bool, - metadata_u16: u16, - ) -> Option<(InChunk, Option>, NonNull)> { - let (base, drop_slot) = self.try_alloc_thin_dst_smart(total, align, payload_offset, needs_drop, metadata_u16)?; - // SAFETY: a successful reservation proves the mutator owns a - // chunk. - Some((base, drop_slot, unsafe { self.chunk_ptr_unchecked() })) - } - /// Byte-slice fast path: skips the alignment mask, `checked_mul`, /// and ZST branch. Only valid for `T = u8` (align 1, size 1). #[inline] @@ -312,16 +253,11 @@ impl ChunkMutator { Some(Uninit::new(bytes.into_slice::(len))) } - /// Like [`Self::try_alloc_uninit_slice`] but takes the precomputed - /// byte size, skipping the `size_of::().checked_mul(len)` - /// overflow guard. + /// Like [`Self::try_alloc_uninit_slice`] with a precomputed byte size. /// /// # Safety /// - /// `size` must equal `size_of::() * len` (without overflow). - /// Callers holding an existing `&[T]` satisfy this trivially via - /// [`core::mem::size_of_val`] (which is an unchecked intrinsic - /// guaranteed not to overflow for any live slice). + /// `size` must equal `size_of::() * len` without overflow. #[cfg_attr(test, mutants::skip)] // see `try_alloc`: body→None ⇒ refill spin pub(crate) unsafe fn try_alloc_uninit_slice_with_size(&self, len: usize, size: usize) -> Option> { debug_assert_eq!(size, mem::size_of::().wrapping_mul(len)); @@ -352,15 +288,12 @@ impl ChunkMutator { Some(Uninit::new(payload)) } - /// Like [`Self::try_alloc_uninit_slice_prefixed`] but takes the - /// precomputed payload byte size, skipping the - /// `size_of::().checked_mul(len)` overflow guard. + /// Like [`Self::try_alloc_uninit_slice_prefixed`] with a precomputed + /// payload byte size. /// /// # Safety /// - /// `payload_bytes` must equal `size_of::() * len` (without - /// overflow). Callers holding an existing `&[T]` satisfy this via - /// [`core::mem::size_of_val`]. + /// `payload_bytes` must equal `size_of::() * len` without overflow. #[cfg_attr(test, mutants::skip)] // see `try_alloc` #[allow( clippy::cast_ptr_alignment, @@ -372,9 +305,114 @@ impl ChunkMutator { Some(Uninit::new(payload)) } - /// Layout + alloc + prefix-write for "thin DST slice" reservations. - /// On success returns the payload ticket and the absolute payload - /// address (used by drop-tracked callers to encode `value_offset`). + /// Reserve storage for one `Arc`-style value with a leading + /// per-`Arc` strong reference count. + /// + /// Layout: `[strong][pad][metadata][payload]`. Initializes strong count + /// to 1 and returns the payload pointer. + /// + /// `payload_bytes` is floored to 1 so the value pointer stays inside the + /// chunk and preserves header recovery by mask. + #[inline] + #[cfg_attr(test, mutants::skip)] // see `try_alloc`: body→None ⇒ refill spin + #[allow( + clippy::cast_ptr_alignment, + reason = "reservation is aligned to >= STRONG_ALIGN, so the leading strong slot is aligned for AtomicU32" + )] + fn try_alloc_arc_prefixed(&self, payload_bytes: usize, value_align: usize, meta_bytes: usize) -> Option> { + use super::thin_dst::{arc_block_align, strong_prefix_bytes_for}; + let prefix = strong_prefix_bytes_for(value_align, meta_bytes); + let total = prefix.checked_add(payload_bytes.max(1))?; + let base = self.try_alloc(total, arc_block_align(value_align))?; + // SAFETY: `base` is aligned to `arc_block_align(value_align)` (>= + // STRONG_ALIGN), so the leading `AtomicU32` write is aligned and + // in chunk provenance; `base + prefix` is `value_align`-aligned + // and stays within the reservation. + unsafe { + base.as_ptr() + .cast::() + .write(core::sync::atomic::AtomicU32::new(1)); + Some(NonNull::new_unchecked(base.as_ptr().add(prefix))) + } + } + + /// [`Self::try_alloc_arc_prefixed`] plus the owning chunk pointer. + #[inline] + #[cfg_attr(test, mutants::skip)] // see `try_alloc`: body→None ⇒ refill spin + pub(crate) fn try_alloc_arc_value(&self) -> Option<(Uninit<'_, T>, NonNull)> { + let value_ptr = self.try_alloc_arc_prefixed(mem::size_of::(), mem::align_of::(), 0)?; + // SAFETY: a successful reservation proves the mutator owns a chunk. + Some((Uninit::new(InChunk::from_raw(value_ptr).cast::()), unsafe { + self.chunk_ptr_unchecked() + })) + } + + /// Slice form of [`Self::try_alloc_arc_value`], including the strong + /// prefix and slice-length metadata word. + #[inline] + #[cfg_attr(test, mutants::skip)] // see `try_alloc`: body→None ⇒ refill spin + #[allow( + clippy::type_complexity, + reason = "ticket + chunk-ptr tuple is the natural shape; type alias would obscure rather than clarify" + )] + pub(crate) fn try_alloc_arc_slice(&self, len: usize) -> Option<(Uninit<'_, [T]>, NonNull)> { + let payload_bytes = mem::size_of::().checked_mul(len)?; + // SAFETY: `payload_bytes == size_of::() * len` (just checked). + unsafe { self.try_alloc_arc_slice_with_size::(len, payload_bytes) } + } + + /// Like [`Self::try_alloc_arc_slice`] with a precomputed payload byte size. + /// + /// # Safety + /// + /// `payload_bytes` must equal `size_of::() * len` (without overflow). + #[inline] + #[cfg_attr(test, mutants::skip)] // see `try_alloc`: body→None ⇒ refill spin + #[allow( + clippy::type_complexity, + reason = "ticket + chunk-ptr tuple is the natural shape; type alias would obscure rather than clarify" + )] + #[allow( + clippy::cast_ptr_alignment, + reason = "slice-length metadata is written/read unaligned immediately before the payload" + )] + pub(crate) unsafe fn try_alloc_arc_slice_with_size( + &self, + len: usize, + payload_bytes: usize, + ) -> Option<(Uninit<'_, [T]>, NonNull)> { + debug_assert_eq!(payload_bytes, mem::size_of::().wrapping_mul(len)); + let value_ptr = self.try_alloc_arc_prefixed(payload_bytes, mem::align_of::(), mem::size_of::())?; + // SAFETY: the reservation placed `size_of::()` metadata + // bytes immediately before the payload; `write_unaligned` + // tolerates any alignment. + unsafe { + ptr::write_unaligned(value_ptr.as_ptr().sub(mem::size_of::()).cast::(), len); + } + // SAFETY: a successful reservation proves the mutator owns a chunk. + Some((Uninit::new(InChunk::from_raw(value_ptr).into_slice::(len)), unsafe { + self.chunk_ptr_unchecked() + })) + } + + /// DST form of [`Self::try_alloc_arc_value`]. The caller writes metadata + /// before the returned value pointer and initializes the payload. + #[inline] + #[cfg_attr(test, mutants::skip)] // see `try_alloc`: body→None ⇒ refill spin + #[cfg(feature = "dst")] + pub(crate) fn try_alloc_arc_dst( + &self, + payload_bytes: usize, + value_align: usize, + meta_bytes: usize, + ) -> Option<(NonNull, NonNull)> { + let value_ptr = self.try_alloc_arc_prefixed(payload_bytes, value_align, meta_bytes)?; + // SAFETY: a successful reservation proves the mutator owns a chunk. + Some((value_ptr, unsafe { self.chunk_ptr_unchecked() })) + } + + /// Thin-DST slice reservation; returns the payload ticket and absolute + /// payload address for drop-entry `value_offset` encoding. #[inline] #[cfg_attr(test, mutants::skip)] // see `try_alloc` fn try_alloc_prefixed_slice_payload(&self, len: usize) -> Option<(InChunk<[T]>, usize)> { @@ -404,11 +442,8 @@ impl ChunkMutator { // `prefix_size`. Both values are powers of two so `max` gives // the right answer. let payload_offset = prefix_size.max(elem_align); - // Floor the payload byte count to 1 so the returned payload - // pointer is strictly less than the reservation's end. Without - // this, an empty slice (`len == 0` or ZST element) at the chunk - // tail could return a payload pointer at `chunk_base + - // CHUNK_ALIGN`, masking to the wrong tile on smart-pointer Drop. + // Empty slices/ZSTs still need an in-chunk payload address for + // smart-pointer header recovery. let payload_bytes = payload_bytes.max(1); let total = payload_offset.checked_add(payload_bytes)?; let base_in_chunk = self.try_alloc(total, elem_align.max(1))?; @@ -458,11 +493,8 @@ impl ChunkMutator { #[cfg_attr(test, mutants::skip)] // see `try_alloc`: body→None ⇒ refill spin pub(crate) fn try_alloc_uninit_slice_with_drop(&self, len: usize) -> Option> { let size = mem::size_of::().checked_mul(len)?; - // The drop entry encodes the element count in a `u16`; reject longer - // slices up front, before committing any reservation, so we never - // leave a counted-but-uninitialized drop slot behind. Callers also - // guard this earlier (see `alloc_slice_*` layout checks) to convert - // it into a clean `AllocError`/panic rather than a refill spin. + // Drop entries store length as `u16`; reject larger slices before any + // reservation is committed. let len_u16 = u16::try_from(len).ok()?; let drop_slot = self.try_reserve_drop_entry()?; let Some(value_bytes_ptr) = self.try_alloc(size, mem::align_of::()) else { @@ -478,44 +510,11 @@ impl ChunkMutator { Some(UninitDrop::new(value, drop_slot)) } - /// Like [`Self::try_alloc_uninit_slice_with_drop`] but additionally - /// writes a thin-pointer DST length prefix (`size_of::()` - /// bytes, unaligned) immediately before the payload. See - /// [`Self::try_alloc_uninit_slice_prefixed`]. - #[cfg_attr(test, mutants::skip)] // see `try_alloc` - #[allow( - clippy::cast_ptr_alignment, - reason = "prefix slot may be unaligned for T's whose align < align_of::(); paired with write_unaligned/read_unaligned" - )] - pub(crate) fn try_alloc_uninit_slice_with_drop_prefixed(&self, len: usize) -> Option> { - // `len` must fit in the drop entry's `u16` element-count field. - let len_u16 = u16::try_from(len).ok()?; - let drop_slot = self.try_reserve_drop_entry()?; - let Some((value, payload_addr)) = self.try_alloc_prefixed_slice_payload::(len) else { - self.unwind_drop_entry(); - return None; - }; - // The drop entry's `value_offset` encodes the *payload* address - // (post-prefix) so `replay_drops` runs `drop_in_place::<[T]>` - // on the real elements. - let value_offset = self.offset_or_unwind(payload_addr)?; - // SAFETY: `drop_slot` is freshly reserved, aligned, exclusively - // owned slot in the chunk's drop region. - unsafe { - ptr::write(drop_slot.as_ptr(), DropEntry::placeholder(value_offset, len_u16)); - } - Some(UninitDrop::new(value, drop_slot)) - } - /// Attempts to reclaim the unused tail of the most recent bump /// allocation in O(1). /// - /// When `end_addr` (the one-past-the-end address of an allocation) - /// equals the current bump cursor — i.e. nothing has been allocated - /// after it — the cursor is rewound by `bytes`, returning that span to - /// the chunk, and `true` is returned. Returns `false` (leaving the - /// cursor untouched) when the allocation is not at the cursor or the - /// mutator owns no chunk. + /// Rewinds the bump cursor by `bytes` when `end_addr` is the current + /// cursor. Returns `false` if the allocation is not at the tail. #[inline] pub(crate) fn try_reclaim_tail(&self, end_addr: usize, bytes: usize) -> bool { if self.chunk.is_none() { @@ -569,49 +568,6 @@ impl ChunkMutator { true } - /// Thin-DST smart-pointer reservation. Reserves `total` bytes - /// aligned to `align`, optionally pre-reserves a drop entry that - /// will point at the *payload* address (i.e. `reservation_start + - /// payload_offset`, not the reservation start), and returns the - /// reservation start plus the drop slot. The caller is responsible - /// for writing the metadata prefix at `[0, payload_offset)` and the - /// payload at `[payload_offset, total)`. - /// - /// Used by the thin generic-DST smart-pointer alloc paths - /// ([`Arc`](crate::Arc) / [`Box`](crate::Box) for `T: ?Sized`). - #[cfg_attr(test, mutants::skip)] // see `try_alloc` - #[cfg(feature = "dst")] - pub(crate) fn try_alloc_thin_dst_smart( - &self, - total: usize, - align: usize, - payload_offset: usize, - needs_drop: bool, - metadata_u16: u16, - ) -> Option<(InChunk, Option>)> { - debug_assert!(align.is_power_of_two(), "align must be a power of two"); - debug_assert!(payload_offset <= total, "payload_offset must fit inside the reservation"); - if needs_drop { - let drop_slot = self.try_reserve_drop_entry()?; - let Some(base) = self.try_alloc(total, align) else { - self.unwind_drop_entry(); - return None; - }; - // Drop entry encodes the payload address (post-prefix), so - // `replay_drops` runs `drop_in_place::` on the real - // value bytes. - let payload_addr = base.addr().wrapping_add(payload_offset); - let value_offset = self.offset_or_unwind(payload_addr)?; - // SAFETY: freshly reserved, aligned, exclusively owned slot. - unsafe { - ptr::write(drop_slot.as_ptr(), DropEntry::placeholder(value_offset, metadata_u16)); - } - Some((base, Some(drop_slot))) - } else { - let base = self.try_alloc(total, align)?; - Some((base, None)) - } - } /// Reserves a [`DropEntry`]-sized slot at the top of the drop-entry /// region. Entries are packed end-to-end from the payload's high end /// downward, matching the layout walked by @@ -631,11 +587,8 @@ impl ChunkMutator { Some(InChunk::from_raw(new_top).cast::()) } - /// Reverses the most recent `try_reserve_drop_entry`. Used when a - /// downstream allocation in the same compound operation fails. - /// - /// Cold: this fires only on the compound-reservation failure path, - /// which co-occurs with a refill miss and is by definition rare. + /// Reverses the most recent `try_reserve_drop_entry` after a compound + /// reservation failure. #[cold] #[inline(never)] #[cfg_attr(test, mutants::skip)] // only observable via skip'd callers @@ -652,10 +605,7 @@ impl ChunkMutator { self.drop_top.set(clamped); } - /// Cold helper: roll back the most recently reserved drop entry and - /// return `None`. Out-of-line from compound-reservation paths so the - /// genuinely-unreachable `u16::try_from(...) == Err` arm is a single - /// line at the call site. + /// Rolls back the most recently reserved drop entry and returns `None`. #[cold] #[inline(never)] #[cfg_attr(coverage_nightly, coverage(off))] @@ -690,38 +640,25 @@ impl ChunkMutator { } /// Publishes the locally-tracked drop-entry count to the chunk header - /// eagerly, before the mutator is dropped. - /// - /// The count is normally published exactly once in [`Drop`]; teardown - /// reads it only after the refcount reaches zero, so the deferred publish - /// is sufficient for the common path. The uninit-`Arc` reservation path - /// (`Arena::alloc_uninit_arc`) calls this after writing a placeholder so - /// that `Arc::>::assume_init` can locate the entry while - /// the chunk is still the arena's active chunk. It must be invoked only - /// after the placeholder slot it counts has been fully written. + /// eagerly, before the mutator is dropped. A no-op for chunk flavors that + /// never register drop entries ([`ChunkOps::REGISTERS_DROPS`] is `false`). #[inline] pub(crate) fn publish_drop_count(&self) { + if !C::REGISTERS_DROPS { + return; + } let Some(chunk) = self.chunk else { return }; // SAFETY: the mutator owns a +1 on `chunk` for its whole lifetime, // so the header is live for this store. unsafe { - chunk.as_ref().set_drop_entry_count(self.local_drop_entry_count()); + C::publish_drop_entry_count(chunk, self.local_drop_entry_count()); } } - /// Consumes the mutator, publishing the locally-tracked drop-entry - /// count to the chunk header and returning the chunk pointer with - /// the mutator's `+1` retained ownership transferred to the caller. - /// The mutator's `Drop` (which would otherwise release the `+1`) is - /// bypassed. + /// Consumes the mutator and returns the owned chunk ref without running + /// this mutator's `Drop`. /// - /// Under the `stats` feature, this is also a "retire" event for - /// wasted-tail accounting: the chunk's free tail is recorded and - /// added to the provider's wasted-tail counter (the matching subtract - /// happens in `release_*` when the chunk is eventually cached or - /// destroyed). This matters for the `retired_local` push path, where - /// the chunk is removed from `current_local` (so its tail is wasted - /// from the user's POV) but the mutator's `Drop` is bypassed. + /// Under `stats`, also records wasted tail before transferring the chunk. /// /// Returns `None` for the empty (sentinel) mutator that has no /// chunk installed. @@ -745,24 +682,20 @@ impl Drop for ChunkMutator { let Some(chunk) = self.chunk else { return; }; - // SAFETY: chunk is live; we hold one of its refcount tickets. - // Publish the locally-tracked drop-entry count to the chunk header - // before releasing our refcount: if dec_ref returns true we own the - // unique remaining reference, and `teardown_and_release` will read - // the count to walk the drop list. + // SAFETY: chunk is live; we hold one refcount ticket. Publish the + // drop-entry count before releasing it so teardown can replay drops. unsafe { #[cfg(feature = "stats")] { - // Record the wasted-tail at retire BEFORE dec_ref so that - // (a) the chunk header carries the value for the eventual - // `release_*` subtract (handles may outlive us), and (b) the - // provider counter goes up before any potential immediate - // release-driven subtract. + // Record wasted tail before `dec_ref`; release may happen + // immediately and subtract the stashed value. let wasted = self.wasted_tail_for_stats(); C::record_retire(chunk, wasted); } let chunk_ref = chunk.as_ref(); - chunk_ref.set_drop_entry_count(self.local_drop_entry_count()); + // Publish the locally-tracked drop count; a no-op for flavors that + // never register drop entries (see `ChunkOps::publish_drop_entry_count`). + C::publish_drop_entry_count(chunk, self.local_drop_entry_count()); if chunk_ref.dec_ref() { C::teardown_and_release(chunk); } @@ -962,4 +895,65 @@ mod tests { "overflowing new_len must fail", ); } + + // Covers `try_alloc_uninit_with_drop`'s value-allocation rollback + // (the `unwind_drop_entry` arm): the drop slot is reserved from the + // top, but the value itself doesn't fit, so the reserved slot must be + // unwound and the call must report failure. We size the remaining + // free space into the window `[entry_size, entry_size + value_bytes)` + // so the drop-slot reservation succeeds while the value alloc fails. + #[test] + fn try_alloc_uninit_with_drop_rolls_back_when_value_does_not_fit() { + struct BigDrop([u8; 64]); + impl Drop for BigDrop { + fn drop(&mut self) { + core::hint::black_box(&self.0); + } + } + let arena = crate::Arena::new(); + // Force the first refill so `current_local` carries a live chunk. + let _ = arena.alloc(0_u8); + let m = arena.current_local(); + + let entry_size = mem::size_of::(); + // Leave exactly one byte of headroom past the drop slot so the + // value (64 bytes, align 1) cannot fit after the slot is reserved. + let target = entry_size + 1; + let free = m.free_bytes(); + assert!(free >= target, "post-refill chunk must have room for the setup"); + let _ = m.try_alloc_bytes(free - target).expect("setup fill"); + assert_eq!(m.free_bytes(), target); + + // Drop slot fits (free >= entry_size) but the value does not + // (free - entry_size == 1 < 64), driving the rollback path. + assert!( + m.try_alloc_uninit_with_drop::().is_none(), + "value that doesn't fit must report failure", + ); + // The reserved drop slot must have been unwound, restoring free space. + assert_eq!(m.free_bytes(), target, "unwind_drop_entry must restore the reserved drop slot"); + + // Also exercise `BigDrop`'s destructor end-to-end: allocate one into + // a fresh arena and let teardown replay the drop entry, so the drop + // shim actually runs. + let drop_arena = crate::Arena::new(); + let _ = drop_arena.alloc(BigDrop([0_u8; 64])); + drop(drop_arena); + } + + // Covers `publish_drop_count`'s early return for chunk flavors that never + // register drop entries (`REGISTERS_DROPS == false`): publishing the count + // on a shared mutator is a no-op that must leave the arena fully usable. + #[test] + fn publish_drop_count_is_noop_for_shared_mutator() { + let arena = crate::Arena::new(); + // Force a live shared chunk into `current_shared`. + let a = arena.alloc_arc(1_u32); + assert_eq!(*a, 1); + // Shared chunks register no drop entries, so this returns early. + arena.current_shared().publish_drop_count(); + // The arena keeps working afterward. + let b = arena.alloc_arc(2_u32); + assert_eq!(*b, 2); + } } diff --git a/crates/multitude/src/internal/chunk_ops.rs b/crates/multitude/src/internal/chunk_ops.rs index 61e8c88be..12d615bf6 100644 --- a/crates/multitude/src/internal/chunk_ops.rs +++ b/crates/multitude/src/internal/chunk_ops.rs @@ -3,12 +3,9 @@ //! Per-flavor chunk lifecycle and access operations. //! -//! [`ChunkOps`] is the trait that [`ChunkMutator`](super::ChunkMutator) uses -//! to manipulate either a [`LocalChunk`](super::LocalChunk) or a -//! [`SharedChunk`](super::SharedChunk) without caring which flavor it has. -//! It also drives the "refcount hit zero" teardown path, which depends on -//! the flavor: local chunks return to the provider's single-threaded cache, -//! shared chunks return to the provider's lock-free cache. +//! [`ChunkOps`] lets [`ChunkMutator`](super::chunk_mutator::ChunkMutator) handle +//! [`LocalChunk`](super::local_chunk::LocalChunk) and [`SharedChunk`](super::shared_chunk::SharedChunk) +//! through one lifecycle interface. // All trait methods are `unsafe fn` with documented safety contracts at the // function level; the inner unsafe wrappers required by edition 2024 add @@ -18,21 +15,56 @@ use core::ptr::NonNull; -use allocator_api2::alloc::Allocator; +use allocator_api2::alloc::{AllocError, Allocator}; use super::chunk::Chunk; +use super::chunk_alloc::chunk_alloc_size; use super::local_chunk::LocalChunk; use super::shared_chunk::SharedChunk; /// Operations every chunk flavor must support. /// -/// Implemented for [`LocalChunk`] and [`SharedChunk`]. The associated -/// `Allocator` type lets generic callers recover the provider type for -/// release-routing. +/// Implemented for [`LocalChunk`] and [`SharedChunk`]. pub(crate) trait ChunkOps: Chunk { /// Allocator type used to back this chunk flavor's underlying storage. type Allocator: Allocator + Clone; + /// Whether this chunk flavor stores per-allocation drop entries packed at + /// its payload tail. + /// + /// `true` only for [`LocalChunk`]: plain arena references (`&mut T` / + /// `&mut [T]`) have no destructor of their own, so the chunk runs them at + /// teardown. `false` for [`SharedChunk`], whose values are owned by `Box` + /// or `Arc` and dropped eagerly on their last reference. The + /// [`ChunkMutator`](super::chunk_mutator::ChunkMutator) keys all its + /// drop-entry bookkeeping off this const so the shared monomorphization + /// compiles the dead paths away. + const REGISTERS_DROPS: bool; + + /// Header size in bytes for this chunk flavor. + fn header_size() -> usize; + + /// Publishes the mutator's locally-tracked drop-entry count into the chunk + /// header so teardown can replay them. A no-op for flavors that never + /// register drop entries ([`Self::REGISTERS_DROPS`] is `false`). + /// + /// # Safety + /// + /// `chunk` must reference a live chunk the caller holds a reference to. + unsafe fn publish_drop_entry_count(chunk: NonNull, count: usize); + + /// Payload alignment for this chunk flavor. + fn value_align() -> usize; + + /// Rounded backing-allocation size (`Layout::size()`) of a chunk whose + /// payload holds `payload` bytes. The single source of truth for chunk + /// byte accounting: every reserve/release/cache path routes through here + /// so the rounded footprint stays balanced. + #[inline] + fn footprint(payload: usize) -> Result { + chunk_alloc_size(Self::header_size(), payload, Self::value_align()) + } + /// Returns a pointer to the first byte of the chunk's payload. /// /// # Safety @@ -48,12 +80,8 @@ pub(crate) trait ChunkOps: Chunk { /// Caller must hold the unique remaining reference to `chunk`. unsafe fn teardown_and_release(chunk: NonNull); - /// Stashes `wasted` on the chunk header and adds it to the provider's - /// wasted-tail counter. Called from `ChunkMutator::Drop` at retire-time - /// (i.e., as the mutator's `+1` is about to be released). The matching - /// subtract happens in [`ChunkProvider::release_local`] / - /// [`ChunkProvider::release_shared`] when the chunk is later cached - /// or destroyed. + /// Records wasted tail on retire; the provider subtracts it when the + /// chunk is later cached or destroyed. /// /// # Safety /// @@ -70,6 +98,24 @@ pub(crate) trait ChunkOps: Chunk { impl ChunkOps for LocalChunk { type Allocator = A; + const REGISTERS_DROPS: bool = true; + + #[inline] + fn header_size() -> usize { + LocalChunk::::header_size() + } + + #[inline] + unsafe fn publish_drop_entry_count(chunk: NonNull, count: usize) { + // SAFETY: caller holds a live reference to `chunk`. + chunk.as_ref().set_drop_entry_count(count); + } + + #[inline] + fn value_align() -> usize { + LocalChunk::::value_align() + } + #[inline] unsafe fn payload_ptr(chunk: NonNull) -> NonNull { // SAFETY: delegated to the inherent `LocalChunk::payload_ptr`. @@ -91,15 +137,8 @@ impl ChunkOps for LocalChunk { super::drop_entry::replay_drops(payload, capacity, drop_count); chunk_ref.set_drop_entry_count(0); } - // Route the just-released chunk back to the provider. The provider - // is guaranteed to outlive every local-chunk teardown: the arena's - // `provider: Arc` field is declared after the - // chunk-holding fields, so on `Arena::drop` the local mutators - // tear down first (running this code) while the provider Arc is - // still alive; chunks parked in the provider's own cache are torn - // down directly via `LocalChunk::destroy` in `drain_all` and do - // not reach this code path. See the type-level doc on - // `LocalChunk`. + // Local chunks teardown while the arena provider is still alive; cached + // local chunks are destroyed directly from provider drop. let provider = chunk_ref.provider(); debug_assert!(!provider.is_null(), "local-chunk provider back-pointer is null in teardown"); (*provider).release_local(chunk); @@ -122,6 +161,23 @@ impl ChunkOps for LocalChunk { impl ChunkOps for SharedChunk { type Allocator = A; + const REGISTERS_DROPS: bool = false; + + #[inline] + fn header_size() -> usize { + SharedChunk::::header_size() + } + + #[inline] + unsafe fn publish_drop_entry_count(_chunk: NonNull, _count: usize) { + // Shared chunks never register drop entries; nothing to publish. + } + + #[inline] + fn value_align() -> usize { + SharedChunk::::value_align() + } + #[inline] unsafe fn payload_ptr(chunk: NonNull) -> NonNull { // SAFETY: delegated to the inherent `SharedChunk::payload_ptr`. @@ -131,20 +187,11 @@ impl ChunkOps for SharedChunk { #[cold] #[inline(never)] unsafe fn teardown_and_release(chunk: NonNull) { - // SAFETY: see local variant. Replay drops + clear count before the - // chunk is recycled to the shared cache (where its payload's first - // bytes are reused as a Treiber-stack next-link). + // SAFETY: caller owns the unique remaining reference. Shared chunks + // register no drop entries; per-`Arc` values drop on their last strong + // reference before the chunk reaches the cache. let chunk_ref = &*chunk.as_ptr(); - let drop_count = chunk_ref.drop_entry_count(); - if drop_count != 0 { - let payload = SharedChunk::payload_ptr(chunk).as_ptr(); - let capacity = chunk_ref.capacity(); - super::drop_entry::replay_drops(payload, capacity, drop_count); - chunk_ref.set_drop_entry_count(0); - } - // Shared chunks CAN outlive their provider (an Arc backed by - // a shared chunk can be held past Arena::drop), so we still need - // the Weak::upgrade dance here. + // Shared chunks can outlive their provider, so release through `Weak`. if let Some(provider) = chunk_ref.provider().upgrade() { provider.release_shared(chunk); } else { @@ -156,18 +203,34 @@ impl ChunkOps for SharedChunk { unsafe fn record_retire(chunk: NonNull, wasted: u32) { let chunk_ref = &*chunk.as_ptr(); chunk_ref.set_wasted_at_retire(wasted); - // If the provider has already been dropped (shared chunks can - // outlive their arena), there is no counter left to update; - // the stashed `wasted_at_retire` will simply never be read. + // If the provider is gone, no stats counter remains to update. if let Some(provider) = chunk_ref.provider().upgrade() { provider.record_wasted_tail(u64::from(wasted)); } } } -// Note: the prior `orphan_local_chunk_is_destroyed_on_mutator_drop` test -// (which exercised the now-removed `destroy_orphan_local` defensive arm) -// is gone — that branch was eliminated when `LocalChunk` switched from a -// `Weak` to a non-owning raw back-pointer. See the -// type-level doc on `LocalChunk` for the soundness argument and -// `teardown_and_release` above for the simplified routing. +#[cfg(test)] +mod tests { + use allocator_api2::alloc::Global; + + use super::*; + + // Kills the `value_align -> 1` mutants on both `ChunkOps` impls: the + // trait method must report the real payload alignment + // (`align_of::()`), which every footprint computation depends on. + // The inherent `value_align` tests don't cover the trait impls. + #[test] + fn chunk_ops_value_align_reports_real_payload_alignment() { + assert_eq!( + as ChunkOps>::value_align(), + core::mem::align_of::(), + "LocalChunk trait value_align must match the real payload alignment" + ); + assert_eq!( + as ChunkOps>::value_align(), + core::mem::align_of::(), + "SharedChunk trait value_align must match the real payload alignment" + ); + } +} diff --git a/crates/multitude/src/internal/chunk_provider.rs b/crates/multitude/src/internal/chunk_provider.rs index 5597adb1d..c23247c93 100644 --- a/crates/multitude/src/internal/chunk_provider.rs +++ b/crates/multitude/src/internal/chunk_provider.rs @@ -4,35 +4,20 @@ //! Per-arena chunk cache and allocation source. //! //! [`ChunkProvider`] owns the arena's allocator clone, enforces a byte -//! budget, and maintains a freed-chunk cache of the **current floor -//! class**, so steady-state allocate/release pairs avoid hitting the -//! system allocator. +//! budget, and maintains freed-chunk caches at the current class floor. //! -//! Each cache holds at most one freelist. The associated **class floor** -//! (`local_cache_class` / `shared_cache_class`) ratchets monotonically -//! upward as the arena progresses to larger chunks. Chunks released -//! below the floor are returned to the system; cached chunks below the -//! floor are evicted at the next floor bump. The intent is that the -//! arena settles into the largest class it needs with the minimum -//! number of chunks retained. +//! Each cache holds one freelist. The class floor ratchets upward as the +//! arena needs larger chunks; below-floor chunks are evicted or destroyed. //! //! Two cache shapes coexist: //! -//! - **Local cache**: single freelist guarded by an [`OwnerThreadCell`]. -//! The provider's owning thread is the arena's thread; only that -//! thread allocates from or releases into the local cache. -//! - **Shared cache**: lock-free Treiber-style stack of -//! `AtomicPtr>`. Any thread can push a chunk (when its -//! last refcount handle drops); only the owning thread pops. A -//! concurrent push by a thread that has yet to observe the latest -//! floor bump may add a below-floor chunk; that straggler is destroyed -//! when the owner thread pops it (see [`ChunkProvider::pop_shared`]). - -// `release_local`, `release_shared`, `pop_shared`, `push_shared`, and the -// `destroy_or_cache_just_acquired` helpers are `unsafe fn` with their full -// safety contracts documented on the items themselves; the inner unsafe -// wrappers edition 2024 would otherwise require do not add a safety -// boundary, so we drop them. +//! - Local: single freelist in [`OwnerThreadCell`], accessed only by the +//! arena thread. +//! - Shared: lock-free Treiber stack; any thread can push, only the owner +//! pops. Below-floor stragglers are destroyed by [`ChunkProvider::pop_shared`]. + +// These `unsafe fn`s have item-level safety contracts; inner unsafe blocks +// would not add a boundary here. #![allow(unsafe_op_in_unsafe_fn, reason = "see module doc: inner unsafe blocks in unsafe fn add noise here")] #![allow(clippy::unnecessary_safety_comment, reason = "safety rationale documented at function level")] @@ -46,7 +31,7 @@ use core::sync::atomic::{AtomicPtr, AtomicU8, AtomicUsize, Ordering}; use allocator_api2::alloc::{AllocError, Allocator}; use super::chunk::Chunk; -use super::chunk_alloc::chunk_alloc_size; +use super::chunk_ops::ChunkOps; use super::constants::{MAX_CHUNK_BYTES, MAX_NORMAL_ALLOC, MIN_CHUNK_BYTES, SizeClass}; use super::drop_entry::DropEntry; use super::local_chunk::LocalChunk; @@ -135,17 +120,10 @@ pub(crate) struct ChunkProvider { /// Bytes currently outstanding (allocated, not yet freed). Updated via /// `AcqRel` speculative-add. bytes_outstanding: AtomicUsize, - /// Single-thread local-chunk cache: thin `*mut u8` header pointer to - /// the freelist head (chunks linked via [`LocalChunk::set_next`] - /// / [`LocalChunk::next`]). Holds at most one freelist for - /// the **current class floor** ([`Self::local_cache_class`]); chunks - /// below the floor are destroyed instead of cached. + /// Local-cache freelist head as a thin header pointer. Holds chunks at or + /// above [`Self::local_cache_class`]. local_cache: OwnerThreadCell<*mut u8>, - /// Current class floor for the local cache. Only chunks at class - /// greater than or equal to `local_cache_class` are cached; the - /// floor ratchets monotonically upward as the arena allocates - /// progressively larger chunks, and stale below-floor chunks are - /// evicted at each bump. + /// Current class floor for the local cache; below-floor chunks are evicted. local_cache_class: AtomicU8, /// Lock-free shared-chunk cache: single Treiber-stack head for the /// current class floor ([`Self::shared_cache_class`]). @@ -167,11 +145,8 @@ pub(crate) struct ChunkProvider { /// Lifetime count of oversized one-shot shared chunks allocated. #[cfg(feature = "stats")] oversized_shared_chunks_allocated: AtomicU64, - /// Bytes currently "wasted" in the unused free region of chunks that have - /// been retired from an arena's `current_*` slot but have not yet been - /// returned to the cache or freed back to the underlying allocator. Bumped - /// up when a chunk is retired, bumped back down when the same chunk is - /// later cached or destroyed. + /// Unused tail bytes in retired chunks not yet cached or freed. Retire + /// increments; cache/destroy decrements. #[cfg(feature = "stats")] wasted_tail_bytes: AtomicU64, } @@ -270,17 +245,10 @@ impl ChunkProvider { &self.allocator } - /// Acquires a normal-class local chunk whose payload has at least - /// `min_payload` bytes. The caller MUST have already verified the - /// request is not oversized (i.e. `min_payload <= max_normal_alloc` - /// and total fits in `MAX_CHUNK_BYTES`); use - /// [`Self::acquire_oversized_local`] otherwise. Returns with refcount - /// = 1. + /// Acquires a normal-class local chunk with at least `min_payload` bytes. + /// Caller must route oversized requests to [`Self::acquire_oversized_local`]. /// - /// `ratchet_class` is the caller's size-class floor (the refill - /// ratchet): the chosen chunk is sized to the larger of the class - /// needed for `min_payload` and `ratchet_class`, so the chunk can - /// grow with arena usage. + /// `ratchet_class` is the caller's size-class floor for refill growth. pub(crate) fn acquire_local(&self, min_payload: usize, ratchet_class: SizeClass) -> Result>, AllocError> { let header = LocalChunk::::header_size(); let needed_total = header.checked_add(min_payload).ok_or(AllocError)?; @@ -291,13 +259,9 @@ impl ChunkProvider { self.acquire_normal_local(SizeClass::min_for_bytes(needed_total).max(ratchet_class)) } - /// Acquires a normal (cacheable) local chunk in the given size `class`, - /// reusing a cached chunk when available. Never routes to oversized; the - /// caller is responsible for any oversized decision. + /// Acquires a cacheable local chunk in `class`, reusing cache when possible. /// - /// If `class` exceeds the cache's current class floor, the floor is - /// bumped (monotonically) and stale below-floor chunks in the cache - /// are destroyed before the pop attempt. + /// Bumps the cache floor and evicts stale chunks when `class` is higher. // // Mutation testing is suppressed on the `class > floor` branch: // `>` with `<` / `==` only changes when the floor advances; the @@ -334,9 +298,8 @@ impl ChunkProvider { self.allocate_fresh_local(class) } - /// Sets the local cache floor to `new_class` and destroys every cached - /// chunk whose total allocation is smaller than the new floor. - /// Idempotent: caller already verified `new_class > current_floor`. + /// Sets the local cache floor and destroys cached chunks below it. + /// Caller already verified `new_class > current_floor`. /// /// # Safety /// @@ -356,7 +319,8 @@ impl ChunkProvider { let fat = LocalChunk::::header_to_fat(cur); let chunk_nn = NonNull::new_unchecked(fat); let next = LocalChunk::next(chunk_nn); - let total = LocalChunk::::header_size() + (*chunk_nn.as_ptr()).capacity(); + let total = LocalChunk::::footprint((*chunk_nn.as_ptr()).capacity()) + .expect("evicted chunk's layout was valid when it was allocated"); if total >= new_min_total { LocalChunk::set_next(chunk_nn, new_head); new_head = cur; @@ -371,11 +335,7 @@ impl ChunkProvider { } } - /// Allocates a brand-new normal local chunk of the given size `class`, - /// bypassing the cache. Increments the lifetime allocation counter. - /// Used both on a cache miss in [`acquire_normal_local`](Self::acquire_normal_local) - /// and by [`preallocate_local`](Self::preallocate_local) (which must add - /// fresh chunks to the cache rather than recycle existing ones). + /// Allocates a fresh normal local chunk, bypassing the cache. fn allocate_fresh_local(&self, class: SizeClass) -> Result>, AllocError> { let header = LocalChunk::::header_size(); let total = class.bytes(); @@ -394,11 +354,8 @@ impl ChunkProvider { } } - /// Acquires a normal-class shared chunk whose payload has at least - /// `min_payload` bytes. The caller MUST have already verified the - /// request is not oversized; use [`Self::acquire_oversized_shared`] - /// otherwise. See [`Self::acquire_local`] for `ratchet_class` - /// semantics. Returns with refcount = 1. + /// Acquires a normal-class shared chunk with at least `min_payload` bytes. + /// Caller must route oversized requests to [`Self::acquire_oversized_shared`]. pub(crate) fn acquire_shared(&self, min_payload: usize, ratchet_class: SizeClass) -> Result>, AllocError> { let header = SharedChunk::::header_size(); let needed_total = header.checked_add(min_payload).ok_or(AllocError)?; @@ -409,10 +366,8 @@ impl ChunkProvider { self.acquire_normal_shared(SizeClass::min_for_bytes(needed_total).max(ratchet_class)) } - /// Acquires a normal (cacheable) shared chunk in the given size `class`. - /// If `class` exceeds the cache's current class floor, the floor is - /// bumped (monotonically) and stale below-floor chunks in the cache - /// are destroyed before the pop attempt. + /// Acquires a cacheable shared chunk in `class`, bumping the floor first + /// when needed. // // Mutation testing is suppressed on the `class > floor` branch for // the same reason as `acquire_normal_local`. @@ -435,12 +390,8 @@ impl ChunkProvider { self.allocate_fresh_shared(class) } - /// Sets the shared cache floor to `new_class` and destroys every - /// cached chunk whose total allocation is smaller than the new floor. - /// Called only by the owning thread; concurrent pushers (releasing - /// threads) may race a below-floor chunk into the cache after the - /// floor is observed-as-lower — those stragglers are caught by the - /// pop-time class check in [`Self::pop_shared`]. + /// Sets the shared cache floor and destroys detached chunks below it. + /// Racing below-floor pushes are handled by [`Self::pop_shared`]. /// /// # Safety /// @@ -453,9 +404,7 @@ impl ChunkProvider { // subsequent Acquire load sees it. self.shared_cache_class.store(new_class.raw(), Ordering::Release); let new_min_total = new_class.bytes(); - // Atomically detach the whole freelist. Concurrent pushers will - // push onto the now-empty head; the post-bump pushers may push - // either above-floor (kept) or below-floor (caught at pop) chunks. + // Detach the freelist; racing pushers target the empty head. let mut cur = self.shared_cache.swap(ptr::null_mut(), Ordering::AcqRel); // SAFETY: each linked chunk is a refcount-zero, uniquely-owned // chunk we just detached; we walk the list, re-push survivors, @@ -466,7 +415,8 @@ impl ChunkProvider { let chunk_nn = NonNull::new_unchecked(fat); let link = SharedChunk::cache_link(chunk_nn); let next = (*link).load(Ordering::Acquire); - let total = SharedChunk::::header_size() + (*chunk_nn.as_ptr()).capacity(); + let total = SharedChunk::::footprint((*chunk_nn.as_ptr()).capacity()) + .expect("evicted chunk's layout was valid when it was allocated"); if total >= new_min_total { self.push_shared(chunk_nn); } else { @@ -478,8 +428,7 @@ impl ChunkProvider { } } - /// Allocates a brand-new normal shared chunk of the given size `class`, - /// bypassing the cache. See [`allocate_fresh_local`](Self::allocate_fresh_local). + /// Allocates a fresh normal shared chunk, bypassing the cache. #[cfg_attr(test, mutants::skip)] // `total - header → total / header` ⇒ runaway allocations fn allocate_fresh_shared(&self, class: SizeClass) -> Result>, AllocError> { let header = SharedChunk::::header_size(); @@ -512,27 +461,16 @@ impl ChunkProvider { // outright or push the chunk onto the (single-threaded) cache by // writing its cache-link slot. let capacity = (*chunk.as_ptr()).capacity(); - // Match the rounded `Layout::size()` that `chunk_layout` allocated, so - // the byte budget / `total_bytes_allocated` track the real footprint - // (the round-up matters for oversized chunks whose header+capacity is - // not `value_align`-aligned). - let total = chunk_alloc_size(LocalChunk::::header_size(), capacity, LocalChunk::::value_align()) - .expect("released chunk's layout was valid when it was allocated"); + let total = LocalChunk::::footprint(capacity).expect("released chunk's layout was valid when it was allocated"); #[cfg(feature = "stats")] { - // Decrement the wasted-tail counter by the value stashed on - // the chunk header at retire time (0 for chunks that never - // went through a mutator, e.g. preallocated cache fills). + // Subtract the retire-time wasted-tail value, if any. let wasted = u64::from((*chunk.as_ptr()).wasted_at_retire()); if wasted != 0 { self.release_wasted_tail(wasted); } } - // Bypass the cache for non-class-size totals (oversized one-shots - // whose total isn't a power of two) and for chunks below the - // current cache class floor. The floor ratchets monotonically as - // the arena moves to larger chunks; smaller chunks released - // afterward are returned to the system so the cache stays uniform. + // Bypass the cache for oversized/non-class totals and below-floor chunks. if !is_cacheable_size(total) || total < SizeClass::new(self.local_cache_class.load(Ordering::Relaxed)).bytes() { LocalChunk::destroy(chunk, &self.allocator); self.release_bytes(total); @@ -552,15 +490,10 @@ impl ChunkProvider { pub(crate) unsafe fn release_shared(&self, chunk: NonNull>) { // SAFETY: chunk is live and uniquely owned by caller. let capacity = (*chunk.as_ptr()).capacity(); - // See `release_local`: round to the allocated `Layout::size()` so byte - // accounting matches the real footprint for oversized chunks. - let total = chunk_alloc_size(SharedChunk::::header_size(), capacity, SharedChunk::::value_align()) - .expect("released chunk's layout was valid when it was allocated"); + let total = SharedChunk::::footprint(capacity).expect("released chunk's layout was valid when it was allocated"); #[cfg(feature = "stats")] { - // See `release_local` for the symmetric subtract semantics. - // Acquire load on the shared chunk's atomic — the store may - // have happened on a different thread (last `Arc::drop`). + // Acquire load pairs with retire on another thread. let wasted = u64::from((*chunk.as_ptr()).wasted_at_retire()); if wasted != 0 { self.release_wasted_tail(wasted); @@ -577,10 +510,7 @@ impl ChunkProvider { /// Pre-warms the local cache with one chunk in the given size class. /// - /// Always allocates through the normal (cacheable) class path: a - /// preallocated chunk is a size-classed chunk regardless of the - /// configured `max_normal_alloc`, so it must never route to the - /// oversized (one-shot, non-cacheable) path even when its payload + /// Always allocates through the normal class path, even when the payload /// exceeds `max_normal_alloc`. pub(crate) fn preallocate_local(&self, class: SizeClass) -> Result<(), AllocError> { let chunk = self.allocate_fresh_local(class)?; @@ -626,25 +556,16 @@ impl ChunkProvider { self.bytes_outstanding.fetch_sub(n, Ordering::AcqRel); } - /// Allocates a one-shot oversized local chunk whose payload is sized - /// to fit a single allocation of `min_payload` bytes (plus rounding - /// for drop-entry alignment). The chunk bypasses the size-class cache. + /// Allocates a one-shot oversized local chunk sized for `min_payload`. /// - /// Used by [`Arena`](crate::Arena) for allocations whose worst-case - /// payload exceeds `max_normal_alloc`: the caller wraps the chunk - /// in a temporary [`ChunkMutator`](super::ChunkMutator), performs the - /// single allocation, and the current chunk is left untouched so - /// subsequent small allocations continue to use it. + /// The caller uses a temporary [`ChunkMutator`](super::chunk_mutator::ChunkMutator), so + /// the current chunk remains available for later small allocations. pub(crate) fn acquire_oversized_local(&self, min_payload: usize) -> Result>, AllocError> { - // Add `oversized_payload_align_slack()` to absorb the worst-case - // alignment skew the bump cursor pays at the start of an unaligned - // payload (chunk headers do not pad the payload to be 8-aligned). - // Callers requesting an `elem_align > align_of::()` must - // pre-size `min_payload` to cover the extra skew themselves. + // Add worst-case payload-start alignment skew. Callers with larger + // element alignment pre-size `min_payload` themselves. let payload = round_up_to_drop_align(min_payload.checked_add(oversized_payload_align_slack()).ok_or(AllocError)?)?; - // Reserve the rounded `Layout::size()` that `LocalChunk::allocate` - // will actually request, so the byte budget is enforced accurately. - let total = chunk_alloc_size(LocalChunk::::header_size(), payload, LocalChunk::::value_align())?; + // Reserve the exact rounded allocation size. + let total = LocalChunk::::footprint(payload)?; self.reserve_bytes(total)?; match LocalChunk::::allocate(&self.allocator, ptr::from_ref(self), payload) { Ok(chunk) => { @@ -664,7 +585,7 @@ impl ChunkProvider { // See `acquire_oversized_local` for the alignment-slack rationale. let payload = round_up_to_drop_align(min_payload.checked_add(oversized_payload_align_slack()).ok_or(AllocError)?)?; // See `acquire_oversized_local`: reserve the rounded allocation size. - let total = chunk_alloc_size(SharedChunk::::header_size(), payload, SharedChunk::::value_align())?; + let total = SharedChunk::::footprint(payload)?; self.reserve_bytes(total)?; match SharedChunk::::allocate(self.allocator.clone(), Weak::clone(&self.weak_self), payload) { Ok(chunk) => { @@ -679,10 +600,8 @@ impl ChunkProvider { } } - /// Pops a cached shared chunk at or above the current class floor. - /// Stale below-floor chunks (pushed by a release thread that raced - /// against [`Self::advance_shared_cache_floor`]) are destroyed and - /// the next chunk is tried. + /// Pops a cached shared chunk at or above the current class floor, + /// destroying below-floor stragglers. /// /// # Safety /// @@ -706,7 +625,8 @@ impl ChunkProvider { let Ok(popped) = updated else { return None }; let fat = SharedChunk::::header_to_fat(popped); let chunk_nn = NonNull::new_unchecked(fat); - let total = SharedChunk::::header_size() + (*chunk_nn.as_ptr()).capacity(); + let total = SharedChunk::::footprint((*chunk_nn.as_ptr()).capacity()) + .expect("popped chunk's layout was valid when it was allocated"); if total >= floor_min_total { return Some(chunk_nn); } @@ -726,16 +646,8 @@ impl ChunkProvider { let head = &self.shared_cache; let link = SharedChunk::cache_link(chunk); let new = chunk.cast::().as_ptr(); - // The chunk is exclusively ours until the publishing CAS below - // succeeds, so the link can be initialized via a non-atomic - // pointer write through `AtomicPtr::as_ptr()`. Doing the first - // write atomically triggers a Miri weak-memory ICE - // ("cannot have empty store buffer when previous write was - // atomic") on freshly-allocated chunk payload bytes; the - // non-atomic init sidesteps it. After the CAS, all subsequent - // mutations to the link go through atomic ops, and any popper - // observes the link via `head.load(Acquire)` which - // synchronizes-with the `Release` half of our CAS. + // Exclusive ownership permits non-atomic link initialization before + // the publishing CAS; later link changes use atomics. let mut cur = head.load(Ordering::Acquire); loop { ptr::write((*link).as_ptr(), cur); @@ -752,7 +664,7 @@ impl ChunkProvider { } } - /// Drains every cached chunk and deallocates its backing memory. + /// Drains cached chunks and deallocates their backing memory. fn drain_all(&self) { // SAFETY: drain runs in Drop with no outstanding mutators; the // provider is single-owner at this point, so the OwnerThreadCell @@ -799,10 +711,8 @@ pub(crate) fn is_cacheable_size(total: usize) -> bool { /// `align_of::()`. Returns `None` on overflow. /// /// [`ChunkMutator::from_owned`](super::chunk_mutator::ChunkMutator::from_owned) -/// aligns the chunk's `drop_top` *down* to `align_of::()`, -/// shaving up to `align - 1` bytes off the usable payload. Without this -/// rounding the usable capacity could fall below `min_payload` and -/// `impl_alloc_*`'s reserve/refill loop would spin until OOM. +/// aligns `drop_top` down, so rounding prevents usable capacity from falling +/// below `min_payload`. #[cfg_attr(test, mutants::skip)] // mask mutations underfit payload → OOM spin #[inline] fn round_up_to_drop_align(min_payload: usize) -> Result { @@ -834,9 +744,7 @@ fn exceeds_max_chunk_bytes(needed_total: usize) -> bool { // --- Helpers wired into chunk types via inherent impls ------------------------ impl LocalChunk { - /// Used by `preallocate_local`: route a just-acquired refcount-1 chunk - /// back to its provider's cache (refcount → 0) without going through - /// `ChunkMutator`. + /// Routes a just-acquired refcount-1 chunk to the provider cache. /// /// # Safety /// @@ -889,11 +797,7 @@ mod tests { static PUSH_RETRY_COUNT: Cell = const { Cell::new(0) }; } - /// Test hook invoked by `push_shared` just before its CAS. If the - /// thread-local injection slot is armed, splice that chunk onto the - /// stack as if a concurrent pusher had installed it: link it to the - /// value the pusher loaded, then publish it as the new head so the - /// pending CAS (still expecting `cur`) fails exactly once. + /// Test hook that injects a competing shared-cache push before the CAS. /// /// # Safety /// @@ -924,12 +828,8 @@ mod tests { assert_eq!(c.max_normal_alloc(), MAX_NORMAL_ALLOC); } - // Covers `pop_shared`'s below-floor straggler arm: a cached shared - // chunk smaller than the current class floor is destroyed (not - // returned) and the pop continues. Single-threaded code never caches - // a below-floor chunk via `release_shared`, so we model the - // push-races-floor-bump state directly: raise the floor on an empty - // cache, then inject a small (class-0) chunk via `push_shared`. + // Covers `pop_shared`'s below-floor straggler arm by raising the floor, + // then pushing a smaller chunk. #[test] fn pop_shared_destroys_below_floor_straggler() { let provider = ChunkProvider::::new(Global, ChunkProviderConfig::default()); @@ -969,13 +869,8 @@ mod tests { assert!(!is_cacheable_size(0)); } - // Covers `push_shared`'s contended CAS retry arm (the `Err(actual)` - // branch). A real concurrent push is non-deterministic, so we model - // the race directly: arm a thread-local injection so the test hook - // publishes a competing chunk onto the stack head between our load and - // CAS, forcing the pending CAS to fail and the loop to retry exactly - // once before it settles. Thread-local state keeps this isolated from - // other tests running in parallel. + // Covers `push_shared`'s contended CAS retry arm via deterministic + // thread-local race injection. #[test] fn push_shared_retries_on_contended_cas() { let provider = ChunkProvider::::new(Global, ChunkProviderConfig::default()); diff --git a/crates/multitude/src/internal/chunk_ref.rs b/crates/multitude/src/internal/chunk_ref.rs index 5efc8a64f..30adea109 100644 --- a/crates/multitude/src/internal/chunk_ref.rs +++ b/crates/multitude/src/internal/chunk_ref.rs @@ -4,10 +4,8 @@ //! [`ChunkRef`] — a RAII handle for a single strong reference on a //! [`SharedChunk`]. //! -//! Centralizes the "+1 on a chunk that must be released exactly once, -//! even on panic" pattern used by smart pointers and in-flight slot -//! initialization. One machine word, `!Send`/`!Sync`, and inhibits -//! implicit `Copy`/`Clone` so the +1 ownership is linear. +//! Centralizes linear "+1" chunk ownership used by smart pointers and +//! in-flight slot initialization. use core::marker::PhantomData; use core::mem; @@ -15,7 +13,6 @@ use core::ptr::NonNull; use allocator_api2::alloc::Allocator; -use super::chunk::Chunk; use super::shared_chunk::SharedChunk; /// Owns a single strong reference on a [`SharedChunk`]; releases the @@ -65,32 +62,7 @@ impl ChunkRef { } } - /// Bumps the strong refcount on the chunk containing `value` and - /// returns a [`ChunkRef`] owning the new +1. - /// - /// # Safety - /// - /// Same as [`Self::from_value_ptr`], plus caller must already hold - /// a live strong reference on the chunk. - #[inline] - pub(crate) unsafe fn clone_from_value_ptr(value: NonNull) -> Self { - // SAFETY: see from_value_ptr; caller's pre-existing +1 prevents - // teardown races. - unsafe { - let header = SharedChunk::::header_from_value_ptr(value.cast::()); - let chunk_fat = SharedChunk::::header_to_fat(header.as_ptr()); - let chunk = NonNull::new_unchecked(chunk_fat); - chunk.as_ref().inc_ref(); - Self { - chunk, - _phantom: PhantomData, - } - } - } - - /// Cancels release-on-drop and returns the raw chunk pointer with - /// the +1 still live. Use when ownership of the +1 is being - /// handed to another holder (e.g. a freshly-constructed `Box`). + /// Cancels release-on-drop and returns the chunk pointer with +1 live. #[inline] pub(crate) fn forget(self) -> NonNull> { let chunk = self.chunk; diff --git a/crates/multitude/src/internal/constants.rs b/crates/multitude/src/internal/constants.rs index 3999a7d50..9abdb06dc 100644 --- a/crates/multitude/src/internal/constants.rs +++ b/crates/multitude/src/internal/constants.rs @@ -13,13 +13,8 @@ pub(crate) const MIN_CHUNK_BYTES: usize = 512; pub(crate) const MAX_CHUNK_BYTES: usize = 65_536; /// Required alignment for every [`SharedChunk`](super::shared_chunk::SharedChunk) -/// allocation. Matches [`MAX_CHUNK_BYTES`] so that for any pointer to a -/// non-oversized value in the chunk, the chunk header's address can be -/// recovered by subtracting the low `CHUNK_ALIGN - 1` bits of the pointer. -/// -/// This in turn allows [`Box`](crate::Box) and similar smart pointers -/// to store a single value pointer without separately tracking the -/// chunk header. +/// allocation. Matching [`MAX_CHUNK_BYTES`] lets smart pointers recover the +/// chunk header from any non-oversized in-chunk value pointer. pub(crate) const CHUNK_ALIGN: usize = MAX_CHUNK_BYTES; /// Maximum alignment accepted by smart-pointer / `Allocator::allocate` @@ -42,12 +37,9 @@ pub(crate) const MAX_NORMAL_ALLOC: usize = 16 * 1024; /// Cache size-class index, range `0..NUM_CHUNK_CLASSES`. /// -/// Wraps the raw `u8` to make invalid classes harder to construct -/// accidentally and to centralize the -/// [`bytes`](Self::bytes)/[`saturating_inc`](Self::saturating_inc) -/// helpers. `#[repr(transparent)]` so that `AtomicU8` cache slots in -/// [`ChunkProvider`](super::chunk_provider::ChunkProvider) can keep -/// storing the raw byte without conversion. +/// `#[repr(transparent)]` wrapper around the raw `u8` used by +/// [`ChunkProvider`](super::chunk_provider::ChunkProvider)'s atomic cache +/// floor slots. #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)] #[repr(transparent)] pub(crate) struct SizeClass(u8); @@ -132,11 +124,8 @@ impl SizeClass { #[cfg_attr(coverage_nightly, coverage(off))] #[cfg_attr(test, mutants::skip)] // unreachable: refcount overflow requires usize::MAX live refs pub(crate) fn refcount_overflow_abort() -> ! { - // Under `cfg(test)` we panic instead of aborting so the overflow-guard - // call sites (otherwise unreachable without `usize::MAX` live references) - // can be exercised by `#[should_panic]` unit tests. Production builds are - // never compiled with `cfg(test)`, so the abort behavior below is the only - // one that ships. + // In tests, panic so overflow guards can be asserted with `#[should_panic]`. + // Non-test builds abort. #[cfg(test)] { panic!("multitude: refcount overflow (test)"); diff --git a/crates/multitude/src/internal/current_chunk.rs b/crates/multitude/src/internal/current_chunk.rs index 55737f10f..0f6d24f9d 100644 --- a/crates/multitude/src/internal/current_chunk.rs +++ b/crates/multitude/src/internal/current_chunk.rs @@ -3,17 +3,15 @@ //! Single-slot interior-mutable holder for a [`ChunkMutator`]. //! -//! [`CurrentChunk`] is a `repr(transparent)` newtype over -//! `UnsafeCell>` that encapsulates the `unsafe` access -//! patterns needed by [`Arena`](crate::Arena)'s hot path. +//! [`CurrentChunk`] wraps `UnsafeCell>` for +//! [`Arena`](crate::Arena)'s hot path. //! //! # Soundness contract //! -//! `CurrentChunk` does **not** track borrows at runtime. The holder -//! (currently [`Arena`](crate::Arena)) must obey two invariants: +//! `CurrentChunk` does not track borrows at runtime. The holder must ensure: //! -//! 1. *Single-threaded access*: the holder is `!Sync`. -//! 2. *No re-entry during borrow*: the shared reference returned by +//! 1. Single-threaded access (`!Sync` holder). +//! 2. No re-entry during borrow: the shared reference returned by //! [`borrow`](CurrentChunk::borrow) must not be held across any //! `replace`/`drop_replace` on the same cell. @@ -35,11 +33,7 @@ impl CurrentChunk { Self(UnsafeCell::new(mutator)) } - /// Borrow the contained mutator. Hot-path entry; inlines fully. - /// - /// The returned reference is valid only until the next - /// `replace`/`drop_replace` on this cell. See module docs for the - /// soundness contract. + /// Borrow the mutator until the next `replace` / `drop_replace`. #[expect(clippy::inline_always, reason = "hot-path entry; must inline fully for arena performance")] #[inline(always)] pub(crate) fn borrow(&self) -> &ChunkMutator { diff --git a/crates/multitude/src/internal/drop_entry.rs b/crates/multitude/src/internal/drop_entry.rs index 6b6a80d5f..21929a4fe 100644 --- a/crates/multitude/src/internal/drop_entry.rs +++ b/crates/multitude/src/internal/drop_entry.rs @@ -43,31 +43,18 @@ const PAD_BYTES: usize = pad_bytes(); /// A single entry in a chunk's trailing drop list. /// -/// Drop entries are appended at the high end of the chunk's payload, growing -/// downward, while bump allocations grow upward from the low end. The chunk's -/// `drop_entry_count` counts the number of entries written at the tail. +/// Entries grow downward from the high end of the payload; bump allocations +/// grow upward from the low end. /// /// # Two-phase commit /// -/// Each entry is created in two phases: -/// -/// 1. At allocation time, [`DropEntry::placeholder`] is written into the -/// slot and `drop_entry_count` is incremented. `drop_fn` is `None`, so the -/// replay loop will skip the slot if it is never committed. -/// 2. When the corresponding value is initialized, -/// [`DropEntry::commit_drop_fn`] is invoked to fill in the real shim -/// pointer. -/// -/// This two-phase scheme means out-of-order initialization is safe: a slot -/// whose `Uninit` was dropped without `init` simply stays in the placeholder -/// state and is harmless. +/// Allocation writes a [`DropEntry::placeholder`] and increments the count. +/// Initialization later calls [`DropEntry::commit_drop_fn`]. Uncommitted +/// placeholders are skipped during replay. #[repr(C)] pub(crate) struct DropEntry { - /// Type-erased shim. Stored as `AtomicPtr<()>` so the function - /// pointer's provenance survives the atomic store/load round-trip - /// (an `AtomicUsize` with `fn-as-usize` casts would lose provenance - /// under Miri's Stacked Borrows and the recovered function pointer - /// would be unresolvable when called). A null value means + /// Type-erased shim. `AtomicPtr<()>` preserves function-pointer + /// provenance across the store/load round-trip. Null means /// "uncommitted placeholder". /// /// The placeholder → committed transition is race-safe because @@ -103,8 +90,7 @@ impl DropEntry { /// Fills in the real drop shim pointer. Idempotent under races: when /// two threads commit the same slot, both writes are the same value /// (the shim is determined by `T`), so a relaxed-store is sufficient - /// once paired with the `Acquire` load in [`replay_drops`] / - /// [`commit_placeholder_drop_fn`]. + /// once paired with the `Acquire` load in [`replay_drops`]. #[inline] pub(crate) fn commit_drop_fn(&self, drop_fn: DropFn) { // Cast the fn pointer to `*mut ()` for atomic storage; this @@ -148,74 +134,6 @@ impl DropEntry { } } -/// Scans the `drop_entry_count` `DropEntry`s packed against the high end of -/// `payload` for the unique uncommitted placeholder whose `value_offset` and -/// `len` match, and commits `drop_fn` into it. Returns `true` if such an -/// entry was found and committed, `false` otherwise. -/// -/// Used by `Arc::>::assume_init` to retarget the placeholder -/// reserved by `Arena::alloc_uninit_arc` once the value is initialized. The -/// entry walk mirrors [`replay_drops`] exactly so the located slot is the -/// same one the teardown replay will later read. -/// -/// # Safety -/// -/// - `payload` / `payload_len` / `drop_entry_count` carry the same contract -/// as [`replay_drops`]: they must describe the live chunk's payload and the -/// number of entries previously written by the allocator at the tail. -/// - The caller must own a strong reference on the chunk (so it stays live) -/// and must not let another thread commit the same placeholder concurrently -/// (see the `assume_init` "called at most once per allocation" contract). -#[allow( - clippy::cast_ptr_alignment, - reason = "caller guarantees entries are naturally aligned within the payload; see DropEntry layout" -)] -pub(crate) unsafe fn commit_placeholder_drop_fn( - payload: *mut u8, - payload_len: usize, - drop_entry_count: usize, - value_offset: usize, - len: usize, - drop_fn: DropFn, -) -> bool { - let entry_size = mem::size_of::(); - let entry_align = mem::align_of::(); - // Align the *absolute* payload-end address down to `entry_align`, - // matching `ChunkMutator::from_owned`'s `aligned_end_addr` formula. - // Doing the alignment on absolute addresses (rather than on - // `payload_len` alone) keeps the entry positions valid even when - // `payload` itself is not `entry_align`-aligned — the chunk - // headers don't pad their payload start anymore. - let payload_addr = payload as usize; - let aligned_end_offset = ((payload_addr.wrapping_add(payload_len)) & !(entry_align - 1)).wrapping_sub(payload_addr); - // Find the placeholder by (value_offset, len) and unconditionally - // store the real shim. Concurrent `assume_init` calls on cloned - // handles for the same allocation race here; both calls compute - // the same `drop_fn` (the monomorphisation of `drop_shim_*` for - // `T`), so racing atomic stores are idempotent and well-defined. - // - // A two-phase "check-then-write" alternative would have to compare - // the stored function pointer to a freshly-cast `drop_fn as *mut ()` - // on the loser's path, which is fragile under Miri: the - // fn-pointer-to-data-pointer cast can synthesise distinct data - // addresses across invocations of the same function. The single- - // pass unconditional store sidesteps the comparison entirely. - for i in 0..drop_entry_count { - let entry_off = aligned_end_offset - (i + 1) * entry_size; - // SAFETY: `entry_off + entry_size <= aligned_end_offset <= payload_len`, - // so the entry lies inside the payload; the caller guarantees an - // initialized `DropEntry` was written there. We hold a chunk - // reference, so the slot stays live for this read/write. - let entry = &*(payload.add(entry_off).cast::()); - if entry.value_offset() as usize != value_offset || entry.len() as usize != len { - continue; - } - entry.commit_drop_fn(drop_fn); - return true; - } - false -} - /// A type-erased drop shim for `count` consecutive `T`s. /// /// `ptr` must be aligned for `T` and point at `count` initialized `T`s. This @@ -232,18 +150,11 @@ pub(crate) unsafe fn drop_shim(ptr: *mut u8, count: usize) { ptr::drop_in_place(slice); } -/// Walks the `drop_entry_count` `DropEntry`s packed against the high end of -/// `payload` and invokes each committed shim against the entry's value -/// region (`value_offset` bytes into `payload`, `len` elements). +/// Replays committed drop entries packed against the high end of `payload`. /// -/// Entries are stored growing downward from the payload end. Entry `i` -/// (0-based, oldest first) sits at byte range -/// `[payload.len() - (i + 1) * size_of::(), payload.len() - i * size_of::())`. -/// We iterate in reverse-of-allocation order (LIFO) so child values are -/// dropped before their parents, matching Rust's drop semantics. -/// -/// Entries whose `drop_fn` is `None` (placeholder entries whose tickets were -/// dropped without being initialized) are skipped. +/// Entries grow downward from the payload end and are replayed newest-first +/// (LIFO), so child values drop before parents. Placeholder entries with no +/// `drop_fn` are skipped. /// /// # Safety /// @@ -265,28 +176,16 @@ pub(crate) unsafe fn replay_drops(payload: *mut u8, payload_len: usize, drop_ent } let entry_size = mem::size_of::(); let entry_align = mem::align_of::(); - // Align the *absolute* payload-end address down to `entry_align`, - // matching `ChunkMutator::from_owned`'s `aligned_end_addr` formula - // (which the allocator uses when reserving drop entries). Computing - // the alignment on absolute addresses keeps drop-entry positions - // valid even when `payload` itself is not `entry_align`-aligned — - // chunk headers do not pad the payload start. + // Align the absolute payload end so entry positions stay valid even when + // the payload start is not `entry_align`-aligned. let payload_addr = payload as usize; let aligned_end_offset = ((payload_addr.wrapping_add(payload_len)) & !(entry_align - 1)).wrapping_sub(payload_addr); - // Iterate newest-first (LIFO) so child values drop before their - // parents, matching Rust's drop semantics. Entries grow downward - // from the aligned payload end, so the newest (last-written) entry - // sits at the lowest address (`aligned_end - count * entry_size`) - // and the oldest at the highest (`aligned_end - entry_size`). - // Visiting `i` from `count - 1` down to `0` walks newest -> oldest. + // Entries grow downward, so reverse index order visits newest -> oldest. for i in (0..drop_entry_count).rev() { let entry_off = aligned_end_offset - (i + 1) * entry_size; // SAFETY: `entry_off + entry_size <= aligned_end_offset <= payload_len`, - // so the entry lies inside the payload allocation; the caller - // guarantees that an initialized `DropEntry` was previously - // written there. If committed, the entry's - // `value_off + count * size_of::()` slice is also inside the - // payload and contains initialized `T`s matching the shim type. + // so the entry lies inside the payload. The caller guarantees the + // entry and any committed value range are initialized and type-matched. let entry = &*(payload.add(entry_off).cast::()); if let Some(shim) = entry.drop_fn() { let value_off = entry.value_offset() as usize; @@ -301,77 +200,11 @@ pub(crate) unsafe fn replay_drops(payload: *mut u8, payload_len: usize, drop_ent mod tests { use super::*; - /// Direct test: when `drop_entry_count == 0`, the single-pass walk - /// of `commit_placeholder_drop_fn` skips its loop and returns - /// `false`. - #[test] - fn commit_placeholder_drop_fn_returns_false_when_count_is_zero() { - let mut buf = [0u8; 64]; - let shim_fn = drop_shim:: as DropFn; - // SAFETY: buffer is exclusively owned and the count is 0 so no entry - // is read from it; we only need a valid pointer/length pair. - let result = unsafe { commit_placeholder_drop_fn(buf.as_mut_ptr(), buf.len(), 0, 0, 1, shim_fn) }; - assert!(!result); - } - - /// Direct test: the single-pass walk skips a non-matching - /// `(value_offset, len)` entry (`continue`) and commits the next - /// matching entry (return `true`). Covers both the skip arm and the - /// success arm of the loop body. + /// [`replay_drops`] must locate entries by absolute payload-end alignment + /// even when `payload_ptr` is not `DropEntry`-aligned. Only committed + /// entries run. #[test] - fn commit_placeholder_drop_fn_skips_non_matching_then_commits_match() { - let entry_size = mem::size_of::(); - let entry_align = mem::align_of::(); - let buf_size = entry_size * 4; - let mut buf = std::vec![0u8; buf_size + entry_align]; - let base_addr = buf.as_mut_ptr() as usize; - let aligned_base = (base_addr + entry_align - 1) & !(entry_align - 1); - let payload_offset = aligned_base - base_addr; - // SAFETY: `payload_offset` is within `buf`'s allocation by construction. - let payload_ptr = unsafe { buf.as_mut_ptr().add(payload_offset) }; - let payload_len = buf_size; - let aligned_len = payload_len & !(entry_align - 1); - - let shim_fn = drop_shim:: as DropFn; - let value_offset: u16 = 0; - let len: u16 = 1; - - // Top slot: a *non-matching* placeholder (different value_offset). - let top_off = aligned_len - entry_size; - // Second slot: the matching placeholder. - let next_off = aligned_len - 2 * entry_size; - // SAFETY: see above; placements are within the aligned region and - // both writes target `DropEntry`-aligned addresses. - unsafe { - let top_ptr = payload_ptr.add(top_off).cast::(); - ptr::write(top_ptr, DropEntry::placeholder(99, 1)); - let next_ptr = payload_ptr.add(next_off).cast::(); - ptr::write(next_ptr, DropEntry::placeholder(value_offset, len)); - } - - // SAFETY: the buffer contains 2 placeholder `DropEntry`s, the - // second one matching `(value_offset, len)`. - let result = unsafe { commit_placeholder_drop_fn(payload_ptr, payload_len, 2, value_offset as usize, len as usize, shim_fn) }; - assert!(result); - - // The matching slot now has the real drop fn installed. - // SAFETY: `next_ptr` was initialized above and stays valid for - // the test's lifetime. - let next_ptr = unsafe { payload_ptr.add(next_off).cast::() }; - // SAFETY: the slot is initialized. - let installed = unsafe { (*next_ptr).drop_fn() }; - assert!(installed.is_some()); - } - - /// When `payload_ptr` is **not** `align_of::()`-aligned, - /// both `commit_placeholder_drop_fn` and `replay_drops` must still - /// place drop entries at absolutely-aligned addresses near the - /// payload tail. The buffer below intentionally offsets the payload - /// start by `entry_align - 1` bytes from an aligned base, so the - /// payload start address is 1-aligned but the *end* of the - /// reserved payload still lands on an `entry_align` multiple. - #[test] - fn replay_and_commit_tolerate_unaligned_payload_start() { + fn replay_tolerates_unaligned_payload_start() { use std::sync::atomic::{AtomicUsize, Ordering}; static CALLS: AtomicUsize = AtomicUsize::new(0); fn counting_shim(_p: *mut u8, _n: usize) { @@ -400,41 +233,30 @@ mod tests { let aligned_end_addr = (payload_start_addr + payload_len) & !(entry_align - 1); let aligned_end_offset = aligned_end_addr - payload_start_addr; - let value_offset: u16 = 0; - let len: u16 = 1; let shim_fn = counting_shim as DropFn; - // Write two placeholders at the correctly-aligned offsets. + // Write a committed entry and a non-committed placeholder at the + // correctly-aligned offsets. // SAFETY: both offsets are within the payload buffer and produce // entry_align-aligned addresses by construction. unsafe { let top_off = aligned_end_offset - entry_size; let next_off = aligned_end_offset - 2 * entry_size; - // Top: non-matching placeholder. + // Top: placeholder left uncommitted (no shim). ptr::write(payload_ptr.add(top_off).cast::(), DropEntry::placeholder(99, 1)); - // Below: matching placeholder. - ptr::write( - payload_ptr.add(next_off).cast::(), - DropEntry::placeholder(value_offset, len), - ); + // Below: placeholder committed to the counting shim. + let next_ptr = payload_ptr.add(next_off).cast::(); + ptr::write(next_ptr, DropEntry::placeholder(0, 1)); + (*next_ptr).commit_drop_fn(shim_fn); } - // Commit phase must locate the matching entry and install the shim. - // SAFETY: both entries are initialized; payload_len includes them. - let committed = unsafe { commit_placeholder_drop_fn(payload_ptr, payload_len, 2, value_offset as usize, len as usize, shim_fn) }; - assert!(committed); - - // Replay phase must invoke the installed shim exactly once - // (the non-matching placeholder still has no shim). + // Only the committed shim runs. // SAFETY: payload_ptr + payload_len bounds the live buffer. unsafe { replay_drops(payload_ptr, payload_len, 2) }; assert_eq!(CALLS.load(Ordering::Relaxed), 1); } - /// `raw_used` returns the byte sum of the un-padded `DropEntry` - /// fields: a `DropFn` (function pointer, `usize`-sized) + two - /// `u16`s. Pin the exact value so additive/multiplicative mutations - /// flip it. + /// `raw_used` returns the unpadded field-size sum. #[test] fn raw_used_is_sum_of_field_sizes() { let expected = mem::size_of::() + mem::size_of::() + mem::size_of::(); diff --git a/crates/multitude/src/internal/in_chunk.rs b/crates/multitude/src/internal/in_chunk.rs index 621e7547d..3ef767002 100644 --- a/crates/multitude/src/internal/in_chunk.rs +++ b/crates/multitude/src/internal/in_chunk.rs @@ -6,29 +6,18 @@ use core::marker::PhantomData; use core::ptr::NonNull; -/// A non-null, well-aligned pointer that — by construction — addresses -/// storage inside the payload of a live arena chunk (with one narrow -/// exception for ZSTs, see below). -/// -/// `InChunk` is the fundamental "I came from the allocator" pointer -/// abstraction. The rest of the crate carries these around instead of raw -/// `NonNull` so that the difference between "any pointer" and "a pointer -/// the allocator handed out" is visible in the type system. +/// A non-null, well-aligned pointer produced by the chunk allocator. /// /// # Invariants /// /// - `self.ptr` is non-null and well-aligned for `T`. -/// - If `core::mem::size_of_val(&*self.ptr) > 0`, the pointed-to region lies -/// entirely within the payload of an arena chunk whose lifetime exceeds the -/// use of this `InChunk`. (Liveness is enforced externally by the holder of -/// the chunk's `Arc`.) +/// - If the pointed-to region has nonzero size, it lies entirely within the +/// payload of a live arena chunk. /// - For zero-sized values (ZSTs and empty slices) the pointer is permitted /// to be a dangling, well-aligned non-null address. There is no payload /// storage to reference in that case. /// -/// `InChunk` is `Copy` because copying a pointer cannot violate any of the -/// above. Mutability and aliasing discipline are enforced by the wrappers -/// (`Uninit`, `UninitDrop`, `ArenaBuf`, etc.) that consume `InChunk`s. +/// Aliasing discipline is enforced by wrappers that consume `InChunk`. pub(crate) struct InChunk { ptr: NonNull, _phantom: PhantomData<*const T>, @@ -47,9 +36,7 @@ impl Copy for InChunk {} impl InChunk { /// Wraps a raw `NonNull` that satisfies the type invariants above. /// - /// This constructor is `pub(super)` so only sibling modules in - /// `internal/` (notably `ChunkMutator`) can mint `InChunk` values; the - /// rest of the crate may only obtain them through allocator outputs. + /// Only sibling internal modules can mint `InChunk` values. #[inline] pub(super) fn from_raw(ptr: NonNull) -> Self { Self { @@ -95,9 +82,8 @@ impl InChunk { /// Builds an `InChunk<[T]>` describing `len` consecutive `T`s starting at /// this byte address. /// - /// The caller (always `ChunkMutator`) is responsible for ensuring that - /// the address is aligned for `T` and that `len * size_of::()` bytes - /// of valid in-chunk storage start here. + /// Caller ensures alignment for `T` and enough in-chunk storage for `len` + /// elements. #[inline] pub(crate) fn into_slice(self, len: usize) -> InChunk<[T]> { let slice = NonNull::slice_from_raw_parts(self.ptr.cast::(), len); diff --git a/crates/multitude/src/internal/local_chunk.rs b/crates/multitude/src/internal/local_chunk.rs index 491200874..156fee9fe 100644 --- a/crates/multitude/src/internal/local_chunk.rs +++ b/crates/multitude/src/internal/local_chunk.rs @@ -3,10 +3,8 @@ //! Single-threaded reference-counted arena chunk. -// All methods on chunks that touch raw memory are themselves `unsafe fn`s -// with documented safety contracts at the function level. Wrapping each line -// of their body in an additional `unsafe { ... }` block adds noise without -// adding any safety boundary, so we let edition-2024's lint slide here. +// Raw-memory methods are `unsafe fn` with item-level safety contracts; inner +// unsafe blocks would not add a boundary here. #![allow(unsafe_op_in_unsafe_fn, reason = "see module doc: inner unsafe blocks in unsafe fn add noise here")] #![allow(clippy::unnecessary_safety_comment, reason = "safety rationale documented at function level")] @@ -22,38 +20,20 @@ use super::drop_entry::replay_drops; /// A bump-allocation chunk used by a single arena thread. /// -/// The chunk is laid out as a fixed-size header immediately followed in -/// memory by `capacity` bytes of payload. The header type is `Sized` so it -/// can be referenced via thin `NonNull` pointers; payload addresses are -/// recovered with `payload_ptr`. +/// Fixed header followed by `capacity` payload bytes. /// /// # Provider back-pointer /// -/// `provider` is a non-owning raw pointer rather than a `Weak`. -/// This is sound because a `LocalChunk` is single-owner (its refcount is only -/// ever 0 or 1; [`Chunk::inc_ref`] is `unreachable!()`) and reachable only via -/// the owning [`Arena`](crate::Arena)'s `current_local` / `retired_local` / -/// the provider's own `local_cache`. The arena's `provider: Arc` -/// field is declared **after** the chunk-holding fields, so when the arena is -/// dropped the local mutators tear down first while the provider is still -/// live; chunks in the cache are destroyed directly from the provider's own -/// `Drop` (`drain_all`) without going through the back-pointer. The provider -/// therefore strictly outlives every local-chunk teardown that dereferences -/// this pointer, so no Weak refcount or orphan-handling branch is needed. +/// `provider` is a non-owning raw pointer. A `LocalChunk` is single-owner and +/// reachable only through the owning arena or provider cache; arena field +/// order keeps the provider alive for all local-chunk teardown paths. #[repr(C)] pub(crate) struct LocalChunk { - /// Non-owning back-pointer to the chunk's provider. See the type-level - /// doc for the soundness argument. Never dereferenced from - /// [`Self::destroy`] (the caller — provider methods or the provider's - /// own drop — supplies the allocator); only read from - /// [`ChunkOps::teardown_and_release`](super::chunk_ops::ChunkOps::teardown_and_release) - /// to route the chunk back to the cache. + /// Non-owning provider back-pointer, used only by + /// [`ChunkOps::teardown_and_release`](super::chunk_ops::ChunkOps::teardown_and_release). provider: *const ChunkProvider, capacity: usize, - /// Intrusive next-link, used in two disjoint phases of the chunk's - /// life. Stored as a thin `*mut u8` header pointer (`null` for end- - /// of-list); the fat DST pointer is recovered via - /// [`Self::header_to_fat`] when consumers walk the list. + /// Intrusive next-link as a thin `*mut u8` header pointer. /// /// * While the chunk is **retired** (refcount = 1, sitting on /// [`RetiredLocalChunks`](crate::arena::retired_local::RetiredLocalChunks)) @@ -61,10 +41,7 @@ pub(crate) struct LocalChunk { /// * While the chunk is **cached** (refcount = 0, sitting on the /// provider's local freelist) it links the next cached chunk. /// - /// Those two phases are mutually exclusive in time, so a single - /// field serves both purposes. Placed after `capacity` (both - /// `usize`-aligned) so the smaller `ref_count` / `drop_entry_count` - /// fields can pack into the tail without trailing padding. + /// The phases are mutually exclusive, so one field serves both lists. next: Cell<*mut u8>, ref_count: Cell, drop_entry_count: Cell, @@ -75,14 +52,9 @@ pub(crate) struct LocalChunk { /// never went through a mutator (e.g. preallocated cache fills). #[cfg(feature = "stats")] wasted_at_retire: Cell, - /// Bump-payload tail. `data.len() == capacity`. Declared as - /// `[UnsafeCell]` (same layout as `[u8]`) so that shared - /// borrows of the chunk allow interior-mutable writes into the - /// payload, and so that `NonNull>` is a **fat - /// pointer** carrying provenance over the full chunk allocation - /// (essential for Miri's Stacked / Tree Borrows: a sized-struct - /// header pointer would have provenance for only the header bytes, - /// making any payload-derivation undefined behavior). + /// Bump-payload tail. `[UnsafeCell]` permits payload writes through + /// shared chunk borrows and keeps fat-pointer provenance over the full + /// allocation. /// /// The payload start is **not** required to be `DropEntry`-aligned: /// [`replay_drops`](super::drop_entry::replay_drops) computes drop- @@ -91,13 +63,9 @@ pub(crate) struct LocalChunk { data: [UnsafeCell], } -// SAFETY: `LocalChunk` would auto-derive `Send` when `A: Send` but for the -// raw `*const ChunkProvider` back-pointer, which the compiler conservatively -// treats as `!Send`. The pointer references a `ChunkProvider` that is owned -// by the same `Arena` that owns this chunk (via `Arc>`), so -// moving the arena between threads moves both the chunk and its provider -// together: the address stays valid and the data behind it is `Send` (asserted -// by the `Send` impl on `ChunkProvider` when `A: Send`). +// SAFETY: the raw provider back-pointer moves with the arena that owns both +// the chunk and `Arc>`; when `A: Send`, the provider data is +// also Send. unsafe impl Send for LocalChunk {} impl LocalChunk { @@ -126,15 +94,10 @@ impl LocalChunk { } } - /// Alignment to use when allocating/deallocating a chunk's backing memory. - /// `A` is not stored in the chunk header, so only the header fields' - /// alignment matters (max is `usize`, 8 bytes on 64-bit). The chunk - /// pointer therefore doesn't need to be over-aligned for `A`. + /// Alignment for the chunk backing allocation. /// - /// Unlike [`SharedChunk`](super::shared_chunk::SharedChunk), local - /// chunks need no `CHUNK_ALIGN` base alignment (they hand out no - /// header-recovering smart pointers), so the base and value alignments - /// coincide. + /// Local chunks need no `CHUNK_ALIGN` base alignment; base and value + /// alignments coincide. #[inline] pub(crate) const fn struct_align() -> usize { Self::value_align() @@ -152,12 +115,8 @@ impl LocalChunk { /// Allocates a fresh chunk with `payload_size` payload bytes and /// refcount 1. /// - /// `allocator` is borrowed only to perform the actual allocation; it is - /// not stored. `provider` is stashed as a non-owning back-pointer (see - /// the type-level doc for the soundness argument); pass `ptr::null()` - /// for stand-alone chunks that will be destroyed directly via - /// [`Self::destroy`] without going through - /// [`teardown_and_release`](super::chunk_ops::ChunkOps::teardown_and_release). + /// `allocator` is not stored. `provider` may be null for standalone chunks + /// destroyed directly via [`Self::destroy`]. #[allow( clippy::cast_ptr_alignment, reason = "raw_u8_ptr came from `allocator.allocate(layout)` with `Self`'s alignment; the *mut [u8] -> *mut Self cast preserves the byte address with its full provenance" @@ -194,11 +153,8 @@ impl LocalChunk { } } - /// Non-owning back-pointer to the chunk's provider. See the type-level - /// doc for the soundness argument: the provider strictly outlives every - /// teardown that calls this. Only used by - /// [`ChunkOps::teardown_and_release`](super::chunk_ops::ChunkOps::teardown_and_release) - /// to route the chunk back to the cache. + /// Non-owning provider back-pointer used by + /// [`ChunkOps::teardown_and_release`](super::chunk_ops::ChunkOps::teardown_and_release). #[inline] pub(crate) fn provider(&self) -> *const ChunkProvider { self.provider @@ -229,11 +185,7 @@ impl LocalChunk { /// `chunk` must reference a live (still allocated) chunk. #[inline] pub(crate) unsafe fn payload_ptr(chunk: NonNull) -> NonNull { - // Project through the DST's slice-tail field directly. This - // avoids the fat-to-thin cast (`chunk.as_ptr().cast::()`) - // whose provenance treatment in Miri is inconsistent — going - // through `&raw mut (*chunk).data` keeps the slice's provenance - // intact (covers payload_size bytes). + // Project through the DST tail so the pointer keeps payload provenance. let data_slice_ptr: *mut [UnsafeCell] = &raw mut (*chunk.as_ptr()).data; // SAFETY: `data_slice_ptr` is non-null and points at the first // payload byte. @@ -268,13 +220,9 @@ impl LocalChunk { /// passed to [`Self::allocate`] when this chunk was created. pub(crate) unsafe fn destroy(chunk: NonNull, allocator: &A) { let header = Self::header_size(); - // SAFETY: caller owns the only reference; we read trivial fields, - // replay drops in the payload, then deallocate using the caller- - // supplied allocator. The layout exactly matches the one returned - // by `allocator.allocate` in `allocate` (both go through - // `chunk_layout`). The header carries no Drop-implementing field - // (the provider back-pointer is a plain raw pointer), so nothing - // else needs to be dropped in place before deallocation. + // SAFETY: caller owns the only reference. We replay payload drops, + // deallocate with the matching `chunk_layout`, and the header has no + // Drop fields. let header_ref = &*chunk.as_ptr(); let capacity = header_ref.capacity; let drop_count = header_ref.drop_entry_count.get() as usize; @@ -285,11 +233,8 @@ impl LocalChunk { allocator.deallocate(NonNull::new_unchecked(raw_ptr), layout); } - /// Reads the intrusive next-link without modifying it. The chunk - /// participates in two singly-linked lists at different points in - /// its lifecycle — the arena's retired list and the provider's - /// cache freelist — and this field encodes both. Returns a thin - /// `*mut u8` header pointer (`null` for end-of-list). + /// Reads the intrusive next-link as a thin header pointer. The field is + /// shared by the retired list and provider cache freelist. /// /// # Safety /// @@ -331,6 +276,26 @@ impl LocalChunk { r.drop_entry_count.set(0); } + /// Returns the number of drop entries currently stored at the tail of the + /// chunk. + #[inline] + pub(crate) fn drop_entry_count(&self) -> usize { + self.drop_entry_count.get() as usize + } + + /// Sets the number of drop entries currently stored at the tail of the + /// chunk. + #[inline] + pub(crate) fn set_drop_entry_count(&self, count: usize) { + #[allow( + clippy::cast_possible_truncation, + reason = "a 64KiB chunk holds at most 4096 drop entries (« u16::MAX); round-trip asserted below" + )] + let narrowed = count as u16; + debug_assert_eq!(usize::from(narrowed), count, "drop-entry count exceeds u16 range"); + self.drop_entry_count.set(narrowed); + } + /// Overwrites the refcount. Test-only seam so unit tests can drive /// refcount-dependent paths without poking the field directly. #[cfg(test)] @@ -360,12 +325,8 @@ impl Chunk for LocalChunk { #[inline] #[cfg_attr(coverage_nightly, coverage(off))] fn inc_ref(&self) { - // Local chunks host arena-lifetime allocations, which are single-owner: - // the arena holds the sole +1 and plain arena allocations hand back - // borrows without cloning the refcount. Only smart pointers (Arc/Box) - // clone a chunk reference, and those live exclusively in `SharedChunk`. - // So this is never reached in production; the `Chunk` trait only - // requires it to keep the local/shared chunk machinery uniform. + // Local chunks are single-owner; smart pointers use `SharedChunk`. + // This exists only to satisfy the shared `Chunk` trait. unreachable!("LocalChunk refcount is never incremented; smart pointers use SharedChunk") } @@ -375,22 +336,6 @@ impl Chunk for LocalChunk { self.ref_count.set(new); new == 0 } - - #[inline] - fn drop_entry_count(&self) -> usize { - self.drop_entry_count.get() as usize - } - - #[inline] - fn set_drop_entry_count(&self, count: usize) { - #[allow( - clippy::cast_possible_truncation, - reason = "a 64KiB chunk holds at most 4096 drop entries (« u16::MAX); round-trip asserted below" - )] - let narrowed = count as u16; - debug_assert_eq!(usize::from(narrowed), count, "drop-entry count exceeds u16 range"); - self.drop_entry_count.set(narrowed); - } } #[cfg(test)] @@ -483,15 +428,9 @@ mod tests { } /// `header_size` is `offset_of!() + size_of::<>()`. - /// For `LocalChunk`, the header layout is fixed: - /// 8 (provider) + 8 (capacity) + 8 (`next`) + 1 (`ref_count`) + - /// 1 pad + 2 (`drop_entry_count`) = 28 bytes. Under the `stats` - /// feature an additional `wasted_at_retire: Cell` field is - /// appended (after 0 pad bytes since the prior offset is already - /// 4-aligned at 28), for 32 bytes total. Reordering moved `next` - /// ahead of the small fields so the trailing - /// `ref_count` / `drop_entry_count` pair packs into 4 bytes - /// without end-of-struct padding. + /// For `LocalChunk`, the fixed header is 28 bytes; with `stats`, + /// appended `wasted_at_retire: Cell` makes it 32 bytes. `next` + /// precedes the small fields so they pack without trailing padding. #[test] fn header_size_for_global_matches_layout() { #[cfg(not(feature = "stats"))] @@ -500,13 +439,8 @@ mod tests { assert_eq!(LocalChunk::::header_size(), 32); } - /// `Chunk::inc_ref` on a local chunk is unreachable in production — local - /// chunks have at most one owner (the arena). The trait impl exists only - /// to keep the `Chunk` interface uniform between local and shared chunks; - /// invoking it must abort/panic so that any future caller that wrongly - /// routes a local refcount bump through this path fails loudly. A test - /// invoking the trait method and expecting a panic kills a mutant that - /// replaces the body with `()`. + /// `Chunk::inc_ref` on a local chunk must panic; local chunks have at most + /// one owner and refcount bumps belong to `SharedChunk`. #[test] #[should_panic(expected = "LocalChunk refcount is never incremented")] fn local_chunk_inc_ref_is_unreachable() { diff --git a/crates/multitude/src/internal/owner_thread_cell.rs b/crates/multitude/src/internal/owner_thread_cell.rs index bbc844679..c9bf46d76 100644 --- a/crates/multitude/src/internal/owner_thread_cell.rs +++ b/crates/multitude/src/internal/owner_thread_cell.rs @@ -1,16 +1,11 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -//! Owner-thread-confined cell: shared-Sync wrapper around an `UnsafeCell`. +//! Owner-thread-confined `UnsafeCell`. //! -//! The cell is `Sync` so it can live inside a struct that is itself shared -//! across threads, but every access goes through `unsafe fn with`. The -//! `unsafe` caller asserts that the call happens on the cell's logical -//! "owner thread"; concurrent access is undefined behavior. -//! -//! Used by [`ChunkProvider`](super::ChunkProvider) to hold the local-chunk -//! cache head and local high-water mark — both touched exclusively by the -//! arena's owning thread, even though the provider itself is `Sync`. +//! The cell is `Sync`, but every access goes through `unsafe fn with`; callers +//! assert owner-thread, exclusive access. Used by +//! [`ChunkProvider`](super::chunk_provider::ChunkProvider)'s local cache state. use core::cell::UnsafeCell; diff --git a/crates/multitude/src/internal/shared_chunk.rs b/crates/multitude/src/internal/shared_chunk.rs index a604a5b1c..14cae630c 100644 --- a/crates/multitude/src/internal/shared_chunk.rs +++ b/crates/multitude/src/internal/shared_chunk.rs @@ -3,9 +3,8 @@ //! Multi-threaded reference-counted arena chunk. -// See note in `local_chunk.rs`: methods touching raw memory are `unsafe fn` -// with module-level safety contracts; we don't repeat the inner unsafe -// wrappers that edition 2024 requires by default. +// Raw-memory methods are `unsafe fn` with item-level safety contracts; inner +// unsafe blocks would not add a boundary here. #![allow(unsafe_op_in_unsafe_fn, reason = "see module doc: inner unsafe blocks in unsafe fn add noise here")] #![allow(clippy::unnecessary_safety_comment, reason = "safety rationale documented at function level")] @@ -15,14 +14,13 @@ use core::mem; use core::ptr::{self, NonNull}; #[cfg(feature = "stats")] use core::sync::atomic::AtomicU32; -use core::sync::atomic::{AtomicPtr, AtomicU16, AtomicUsize, Ordering, fence}; +use core::sync::atomic::{AtomicPtr, AtomicUsize, Ordering, fence}; use allocator_api2::alloc::{AllocError, Allocator}; use super::chunk::Chunk; use super::chunk_provider::ChunkProvider; use super::constants::{CHUNK_ALIGN, refcount_overflow_abort}; -use super::drop_entry::replay_drops; /// A bump-allocation chunk whose allocations can outlive the arena. /// @@ -36,66 +34,44 @@ pub(crate) struct SharedChunk { provider: Weak>, capacity: usize, ref_count: AtomicUsize, - /// Intrusive cache-freelist link, used while the chunk sits on - /// the provider's shared cache (refcount = 0). CAS-pushed and - /// CAS-popped from any thread, so the storage is atomic. `null` - /// when not on the list. Placed after `ref_count` (both 8-aligned) - /// so the trailing `drop_entry_count` (`u16`) packs against - /// `data` without end-of-struct padding. + /// Intrusive shared-cache freelist link. Atomic because releases can push + /// from any thread; null when not cached. /// - /// Unlike `LocalChunk::next`, this slot is *only* used for the - /// cache freelist: shared chunks don't have a retired-list phase - /// since handouts outlive the arena and chunks transition - /// directly from refcount = 1 → 0 → cached (or destroyed). + /// Shared chunks use this only for the cache freelist, not a retired list. next: AtomicPtr, - drop_entry_count: AtomicU16, - /// Free bytes between the bump cursor and the drop-entry top at the - /// time this chunk was retired from a `ChunkMutator`. Set in the - /// mutator's `Drop` and read by [`ChunkProvider::release_shared`] - /// to decrement the wasted-tail counter. Stays at 0 for chunks that - /// never went through a mutator (e.g. preallocated cache fills). + /// Wasted tail recorded when a `ChunkMutator` retires this chunk; released + /// by [`ChunkProvider::release_shared`]. /// - /// Read in `release_shared` after the chunk's atomic refcount has - /// dropped to zero (with an acquire fence); the mutator's `Drop` - /// performs the `set` before its own `dec_ref`, so the store is - /// visible. + /// Release/acquire ordering makes the recorded value visible after + /// refcount reaches zero. #[cfg(feature = "stats")] wasted_at_retire: AtomicU32, /// Bump-payload tail. See `LocalChunk` for the - /// [`UnsafeCell]` provenance rationale. The payload start is - /// **not** required to be `DropEntry`-aligned: - /// [`replay_drops`](super::drop_entry::replay_drops) aligns drop- - /// entry positions via the absolute payload-end address. + /// [`UnsafeCell]` provenance rationale. Shared chunks register no + /// drop entries (values are owned by `Box`/`Arc` and dropped eagerly), + /// so the whole payload is available to the bump cursor. data: [UnsafeCell], } impl SharedChunk { - /// Borrow the non-owning back-pointer to the chunk's provider. The - /// provider may have been dropped (a shared chunk can outlive its - /// arena), so callers must `upgrade()` to use it. + /// Borrow the weak provider back-pointer; callers must `upgrade()`. #[inline] pub(crate) fn provider(&self) -> &Weak> { &self.provider } - /// Reads the free byte count stashed by the owning `ChunkMutator`'s - /// `Drop` (the gap between bump cursor and drop-entry top at retire). - /// `0` for chunks that never went through a mutator. + /// Reads the wasted-tail count stashed at retire time. #[cfg(feature = "stats")] #[inline] pub(crate) fn wasted_at_retire(&self) -> u32 { - // Acquire pairs with the `Release` store in `set_wasted_at_retire`; - // shared chunks may be inspected on a different thread than the - // one that performed the retire (the last `Arc::drop`). + // Acquire pairs with `set_wasted_at_retire`'s Release store; release + // may run on a different thread than retire. self.wasted_at_retire.load(Ordering::Acquire) } - /// Stashes the chunk's wasted-tail bytes at retire time, to be - /// subtracted from the provider's wasted-tail counter when the chunk - /// is eventually released to the cache or destroyed. + /// Stashes wasted-tail bytes for release-time stats subtraction. /// - /// `Release` so cross-thread `release_shared` callers observe the - /// stored value after their acquire fence on refcount = 0. + /// `Release` pairs with release-time acquire after refcount reaches zero. #[cfg(feature = "stats")] #[inline] pub(crate) fn set_wasted_at_retire(&self, n: u32) { @@ -111,15 +87,15 @@ impl SharedChunk { #[cfg_attr(test, mutants::skip)] pub(crate) const fn header_size() -> usize { // Under `stats`, `wasted_at_retire` is the last fixed-size field; - // otherwise it's `drop_entry_count`. The `[UnsafeCell]` tail - // has align 1 and sits flush against whichever it is. + // otherwise it's `next`. The `[UnsafeCell]` tail has align 1 and + // sits flush against whichever it is. #[cfg(feature = "stats")] { mem::offset_of!(Self, wasted_at_retire) + mem::size_of::() } #[cfg(not(feature = "stats"))] { - mem::offset_of!(Self, drop_entry_count) + mem::size_of::() + mem::offset_of!(Self, next) + mem::size_of::>() } } @@ -130,14 +106,8 @@ impl SharedChunk { if base >= CHUNK_ALIGN { base } else { CHUNK_ALIGN } } - /// The chunk type's own alignment (`align_of::()`, ignoring the - /// align-1 `[UnsafeCell]` tail): the max of `align_of::()` and - /// `align_of::()` (every other header field — the atomics and - /// the `Weak` pointer — has alignment `<= align_of::()`). - /// - /// Used to round the allocation *size* (vs. [`Self::struct_align`], - /// the larger *base*-address alignment). Pinned against the real - /// `align_of_val` by `value_align_matches_real_alignment`. + /// The chunk type's own alignment, used to round allocation size. This is + /// separate from [`Self::struct_align`], the base-address alignment. #[inline] #[cfg_attr(test, mutants::skip)] // pure layout constant pinned by a dedicated test pub(crate) const fn value_align() -> usize { @@ -146,13 +116,9 @@ impl SharedChunk { if a >= b { a } else { b } } - /// Recovers the chunk header (as a thin `*mut u8` carrying the - /// chunk allocation's provenance) from a pointer into the chunk's - /// payload by walking backwards through the chunk's `CHUNK_ALIGN` - /// tile. + /// Recovers a thin chunk-header pointer from an in-payload pointer. /// - /// Uses [`NonNull::byte_sub`] (provenance-preserving) rather than - /// reconstituting the header pointer from an integer. + /// Uses [`NonNull::byte_sub`] to preserve provenance. #[inline] #[cfg_attr(test, mutants::skip)] // mask mutations break refcount → OOM in mutant harness pub(crate) fn header_from_value_ptr(value: NonNull) -> NonNull { @@ -200,7 +166,6 @@ impl SharedChunk { ptr::write(&raw mut (*fat).provider, provider); ptr::write(&raw mut (*fat).capacity, payload_size); ptr::write(&raw mut (*fat).ref_count, AtomicUsize::new(1)); - ptr::write(&raw mut (*fat).drop_entry_count, AtomicU16::new(0)); ptr::write(&raw mut (*fat).next, AtomicPtr::new(ptr::null_mut())); #[cfg(feature = "stats")] ptr::write(&raw mut (*fat).wasted_at_retire, AtomicU32::new(0)); @@ -223,8 +188,8 @@ impl SharedChunk { let header = Self::header_size(); let header_ref = &*chunk.as_ptr(); let capacity = header_ref.capacity; - let drop_count = header_ref.drop_entry_count.load(Ordering::Acquire) as usize; - replay_drops(Self::payload_ptr(chunk).as_ptr(), capacity, drop_count); + // Shared chunks register no drop entries; per-`Arc` values drop on + // their last strong reference. let allocator: A = ptr::read(&raw const (*chunk.as_ptr()).allocator); ptr::drop_in_place(&raw mut (*chunk.as_ptr()).provider); let layout = crate::internal::chunk_alloc::chunk_layout(header, capacity, Self::value_align(), Self::struct_align()) @@ -234,10 +199,8 @@ impl SharedChunk { drop(allocator); } - /// Pointer to the chunk's intrusive cache-freelist link - /// (`AtomicPtr` storing a thin header pointer; cache stores - /// thin pointers since `*mut Self` is fat for the DST). The field - /// lives in the chunk header. + /// Pointer to the intrusive cache-freelist link storing a thin header + /// pointer. /// /// # Safety /// @@ -248,8 +211,8 @@ impl SharedChunk { &raw const (*chunk.as_ptr()).next } - /// Re-initializes a chunk popped from the cache: refcount → 1, - /// drop-entry count → 0. The caller becomes the +1 holder. + /// Re-initializes a chunk popped from the cache: refcount → 1. The caller + /// becomes the +1 holder. /// /// # Safety /// @@ -257,26 +220,10 @@ impl SharedChunk { /// chunk; the cache link is invalidated by this call. #[inline] pub(crate) unsafe fn reinit_for_acquire(chunk: NonNull) { - // SAFETY: caller owns the unique reference; atomics are safe to - // store unconditionally. + // SAFETY: caller owns the unique reference; the refcount store is + // safe to issue unconditionally. let r = &*chunk.as_ptr(); r.ref_count.store(1, Ordering::Relaxed); - r.drop_entry_count.store(0, Ordering::Relaxed); - } - - /// Loads the drop-entry count with `Acquire` ordering. - /// - /// The [`Chunk::drop_entry_count`](super::chunk::Chunk::drop_entry_count) - /// accessor uses `Relaxed`, which suffices for the owner thread. This - /// `Acquire` variant is for cross-thread readers (the deferred-init - /// commit in [`Arc`](crate::Arc)): it pairs with the owner thread's - /// `Release` publish in - /// [`set_drop_entry_count`](super::chunk::Chunk::set_drop_entry_count) - /// (via `ChunkMutator::publish_drop_count`) so the placeholder slot's - /// bytes are visible before the count is read. - #[inline] - pub(crate) fn drop_entry_count_acquire(&self) -> usize { - self.drop_entry_count.load(Ordering::Acquire) as usize } /// Overwrites the refcount. Test-only seam so unit tests can drive @@ -287,14 +234,9 @@ impl SharedChunk { self.ref_count.store(count, Ordering::Relaxed); } - /// Decrements `chunk`'s refcount on behalf of the caller, and if - /// that drops the count to zero, routes the chunk back through + /// Releases one strong ref and routes zero-ref chunks through /// [`teardown_and_release`](super::chunk_ops::ChunkOps::teardown_and_release). /// - /// Used by smart-pointer drop paths ([`Box`](crate::Box), - /// [`Arc`](crate::Arc)) and by [`ChunkMutator`](super::ChunkMutator) - /// itself to share the "release one ref I am holding" sequence. - /// /// # Safety /// /// Caller must hold exactly one strong reference to `chunk` that @@ -313,14 +255,10 @@ impl SharedChunk { } } - /// Atomically reserves `n` additional strong references on this - /// chunk in a single `fetch_add`, in addition to whatever the - /// caller already holds. Aborts the process on overflow. + /// Atomically reserves `n` additional strong references. Aborts on + /// overflow. /// - /// Used by the arena's per-chunk surplus pre-credit: at chunk - /// install time the arena reserves a large surplus of refs so - /// per-allocation handouts can be tracked in a non-atomic local - /// counter; the unused portion is returned to the chunk via + /// Used by arena surplus pre-credit; unused refs are returned through /// [`Self::refund_refs`] when the chunk is retired. #[inline] pub(crate) fn pre_credit_refs(&self, n: usize) { @@ -339,12 +277,8 @@ impl SharedChunk { } } - /// Atomically returns `n` previously pre-credited but unused - /// refs to the chunk's counter via `fetch_sub` with `Release` - /// ordering. `Release` matches the existing per-ref `dec_ref` - /// ordering so any writes the arena thread performed into the - /// chunk are visible to other-thread holders that may observe - /// the lower count. + /// Atomically returns `n` pre-credited but unused refs with `Release` + /// ordering, matching [`Chunk::dec_ref`](super::Chunk::dec_ref). /// /// # Safety /// @@ -395,22 +329,6 @@ impl Chunk for SharedChunk { false } } - - #[inline] - fn drop_entry_count(&self) -> usize { - self.drop_entry_count.load(Ordering::Relaxed) as usize - } - - #[inline] - fn set_drop_entry_count(&self, count: usize) { - #[allow( - clippy::cast_possible_truncation, - reason = "a 64KiB chunk holds at most 4096 drop entries (« u16::MAX); round-trip asserted below" - )] - let narrowed = count as u16; - debug_assert_eq!(usize::from(narrowed), count, "drop-entry count exceeds u16 range"); - self.drop_entry_count.store(narrowed, Ordering::Release); - } } /// Largest payload byte count a shared chunk can offer to a bump allocator @@ -430,18 +348,16 @@ mod tests { /// `header_size` is `offset_of!() + size_of::<>()`. /// For `SharedChunk`, the header layout is fixed: /// 0 (allocator ZST) + 8 (provider `Weak`) + 8 (capacity) + - /// 8 (`ref_count`) + 8 (`next`) + 2 (`drop_entry_count`) = 34 bytes. - /// Under the `stats` feature an additional `wasted_at_retire: - /// AtomicU32` is appended after 2 pad bytes (offset 36) for 40 bytes - /// total. `next` is placed between `ref_count` and - /// `drop_entry_count` so the trailing `u16` packs against `data` - /// without padding when stats are off. + /// 8 (`ref_count`) + 8 (`next`) = 32 bytes. Under the `stats` feature an + /// additional `wasted_at_retire: AtomicU32` is appended (offset 32) for + /// 36 bytes total. Shared chunks carry no `drop_entry_count` — they never + /// register drop entries. #[test] fn header_size_for_global_matches_layout() { #[cfg(not(feature = "stats"))] - assert_eq!(SharedChunk::::header_size(), 34); + assert_eq!(SharedChunk::::header_size(), 32); #[cfg(feature = "stats")] - assert_eq!(SharedChunk::::header_size(), 40); + assert_eq!(SharedChunk::::header_size(), 36); } /// `struct_align` returns the max of `align_of::()`, @@ -460,11 +376,8 @@ mod tests { assert_eq!(got, super::super::constants::CHUNK_ALIGN); } - /// `chunk_layout` must round the allocation *size* up to - /// `value_align` (8) and set the *base* alignment to `struct_align` - /// (`CHUNK_ALIGN`), but must NOT round the size up to `CHUNK_ALIGN`. - /// Each cacheable size class must therefore produce an allocation - /// whose size equals the class bytes, not 64 KiB. + /// `chunk_layout` rounds size to `value_align` and base alignment to + /// `struct_align`, without inflating every class to `CHUNK_ALIGN`. #[test] fn chunk_layout_does_not_inflate_size_to_base_align() { use super::super::chunk_alloc::chunk_layout; diff --git a/crates/multitude/src/internal/thin_dst.rs b/crates/multitude/src/internal/thin_dst.rs index 59a0ea5a3..75bf23dab 100644 --- a/crates/multitude/src/internal/thin_dst.rs +++ b/crates/multitude/src/internal/thin_dst.rs @@ -10,14 +10,12 @@ //! [optional pad to align(T)][T::Metadata (unaligned)][T payload] //! ``` //! -//! The thin smart pointer stores a `NonNull` to the payload start. -//! Metadata (slice length, trait-object vtable, or `()` for sized T) -//! sits in `size_of::()` bytes immediately preceding the -//! payload and is read with [`ptr::read_unaligned`]. For -//! `T: Sized`, the metadata read is a zero-byte no-op. +//! Thin smart pointers store `NonNull` to the payload. Metadata sits +//! immediately before it and is read with [`ptr::read_unaligned`]. use core::mem; use core::ptr::{self, NonNull}; +use core::sync::atomic::AtomicU32; use ptr_meta::Pointee; @@ -30,6 +28,56 @@ pub(crate) const fn meta_bytes() -> usize { mem::size_of::<::Metadata>() } +/// Byte size of the per-[`Arc`](crate::Arc) strong reference count +/// (an [`AtomicU32`]) stored in the chunk prefix. +pub(crate) const STRONG_BYTES: usize = mem::size_of::(); + +/// Alignment of the per-`Arc` strong reference count. +pub(crate) const STRONG_ALIGN: usize = mem::align_of::(); + +/// Byte distance from an `Arc` value pointer back to its strong +/// reference count, given the value's alignment and metadata width. +/// +/// Layout of every chunk-resident `Arc` value: +/// +/// ```text +/// [strong (AtomicU32, at reservation base)][pad][T::Metadata (unaligned)][T payload] +/// ``` +/// +/// The strong count starts the reservation; metadata sits immediately before +/// the payload. The returned prefix keeps the payload `value_align`-aligned. +#[inline] +pub(crate) const fn strong_prefix_bytes_for(value_align: usize, meta: usize) -> usize { + (STRONG_BYTES + meta).next_multiple_of(value_align) +} + +/// Reservation alignment for an `Arc` value: at least [`STRONG_ALIGN`] and +/// at least `value_align`. +#[inline] +pub(crate) const fn arc_block_align(value_align: usize) -> usize { + if value_align >= STRONG_ALIGN { value_align } else { STRONG_ALIGN } +} + +/// Recovers the strong reference count of an `Arc` from its value +/// pointer. +/// +/// # Safety +/// +/// - `value_ptr` must reference the payload of an `Arc` value whose +/// chunk prefix was written by the strong-prefixed allocator path. +/// - `value_align` must equal the value's alignment (`align_of_val`). +/// - The hosting chunk must be kept alive by the caller for the +/// duration of the returned reference's use. +#[inline] +pub(crate) unsafe fn strong_ref<'a, T: ?Sized + Pointee>(value_ptr: NonNull, value_align: usize) -> &'a AtomicU32 { + let prefix = strong_prefix_bytes_for(value_align, meta_bytes::()); + // SAFETY: per caller. `prefix` bytes of strong + metadata + padding + // were reserved before the payload; the strong slot lives at the + // reservation base, which is `STRONG_ALIGN`-aligned, so the + // `AtomicU32` reference is well-aligned and within chunk provenance. + unsafe { value_ptr.byte_sub(prefix).cast::().as_ref() } +} + /// Reads `T`'s metadata word from the chunk prefix immediately preceding /// the payload at `value_ptr`. /// diff --git a/crates/multitude/src/internal/uninit.rs b/crates/multitude/src/internal/uninit.rs index 2a1fb3c51..8670ba6df 100644 --- a/crates/multitude/src/internal/uninit.rs +++ b/crates/multitude/src/internal/uninit.rs @@ -4,31 +4,23 @@ //! Safe "ticket" wrappers that turn raw [`InChunk`] storage into initialized //! arena allocations. //! -//! Each ticket type is constructed only by [`ChunkMutator`](super::ChunkMutator) -//! when it reserves storage. Consumers obtain a ticket and call the matching -//! `init*` method, which writes the value (and any drop entry) and returns a -//! safe reference. This isolates `unsafe` to a small number of methods in -//! this file; the higher layers of the crate (arena, smart pointers, vec, -//! strings) use only the safe ticket API. +//! [`ChunkMutator`](super::chunk_mutator::ChunkMutator) creates tickets for reserved storage. +//! `init*` methods write values, commit drop entries when needed, and return +//! safe references. use core::marker::PhantomData; -use core::mem::{self, MaybeUninit}; use core::ptr::{self, NonNull}; -use core::str; +use core::{mem, str}; use super::drop_entry::{DropEntry, DropFn, drop_shim}; use super::in_chunk::InChunk; /// Storage reserved for a value (or slice) that has no drop requirements. /// -/// Created by [`ChunkMutator::try_alloc_uninit`](super::ChunkMutator::try_alloc_uninit) -/// or [`try_alloc_uninit_slice`](super::ChunkMutator::try_alloc_uninit_slice). -/// Consume with [`init`](Self::init) (single value) or -/// [`init_copy_from_slice`](Self::init_copy_from_slice) (slice). +/// Created by [`ChunkMutator::try_alloc_uninit`](super::chunk_mutator::ChunkMutator::try_alloc_uninit) +/// or [`try_alloc_uninit_slice`](super::chunk_mutator::ChunkMutator::try_alloc_uninit_slice). /// -/// If the ticket is dropped without being initialized, the reserved bump -/// space is leaked until the owning chunk is torn down — but no unsafe -/// behavior occurs. +/// Dropping without initialization leaks the reservation until chunk teardown. pub(crate) struct Uninit<'a, T: ?Sized> { ptr: InChunk, _phantom: PhantomData<&'a mut T>, @@ -49,11 +41,7 @@ impl Uninit<'_, T> { /// /// # Safety /// - /// Caller asserts that the reserved storage backing this ticket - /// remains valid for the new lifetime `'b`. The intended use is - /// inside [`Arena`](crate::Arena), where the chunk that hosts the - /// slot is retained until the arena is reset or dropped — i.e. - /// at least for the `&Arena` borrow lifetime. + /// Caller guarantees the reserved storage remains valid for `'b`. #[inline] pub(crate) unsafe fn rebind<'b>(self) -> Uninit<'b, T> { Uninit { @@ -90,10 +78,7 @@ impl<'a, T> Uninit<'a, T> { unsafe { &mut *ptr.as_ptr() } } - /// Same as [`init`](Self::init) but returns a raw pointer with no - /// lifetime. Used by the arena layer when the resulting reference's - /// lifetime must be tied to `&Arena` rather than to the consumed - /// ticket's borrow scope. + /// Same as [`init`](Self::init) but returns a raw pointer with no lifetime. #[inline] pub(crate) fn init_raw(self, value: T) -> NonNull { let raw = self.ptr.as_ptr(); @@ -147,9 +132,7 @@ impl<'a, T> Uninit<'a, [T]> { unsafe { slice_ptr.as_mut() } } - /// Like [`Self::init_copy_from_slice`] but returns the raw - /// `NonNull<[T]>` with chunk-wide provenance. See - /// [`Uninit::init_with_ptr`] for the rationale. + /// Like [`Self::init_copy_from_slice`] but returns raw `NonNull<[T]>`. #[inline] pub(crate) fn init_copy_from_slice_ptr(self, src: &[T]) -> NonNull<[T]> where @@ -201,13 +184,8 @@ impl<'a, T> Uninit<'a, [T]> { unsafe { slice_ptr.as_mut() } } - /// Like [`Self::init_with`] but returns the raw `NonNull<[T]>` with - /// chunk-wide provenance instead of an `&mut [T]` retag. Callers - /// that hand the slice to a smart-pointer constructor (which then - /// recovers the chunk header via `byte_sub`) need the chunk-wide - /// provenance; rounding through `&mut [T]` would narrow the - /// borrow-stack tag to the slice payload and trip strict provenance - /// / Stacked Borrows when the header bytes are later read. + /// Like [`Self::init_with`] but returns raw `NonNull<[T]>` to preserve + /// chunk-wide provenance for smart-pointer header recovery. #[inline] #[cfg_attr(test, mutants::skip)] // `+= → *=` on counter ⇒ infinite loop pub(crate) fn init_with_ptr(self, mut f: F) -> NonNull<[T]> @@ -264,14 +242,10 @@ impl<'a, T> Uninit<'a, [T]> { }) } - /// Consume this slice ticket and return the raw start pointer plus - /// capacity. The caller takes over responsibility for tracking - /// which slots are initialized and for dropping the initialized - /// prefix before the chunk is torn down. + /// Consumes this slice ticket and returns the raw start pointer plus + /// capacity; caller tracks initialization and drops. /// - /// Intended for growable container backings (`Vec`, `String`) - /// where the reservation is filled in incrementally rather than in - /// a single `init_*` call. + /// Used by growable containers filled incrementally. #[inline] pub(crate) fn into_raw_buffer(self) -> (NonNull, usize) { let slice_ptr = self.ptr.as_non_null(); @@ -280,9 +254,7 @@ impl<'a, T> Uninit<'a, [T]> { } } -/// Drop-guard used by `init_with` / `init_clone_from_slice` / `init_from_iter` -/// implementations: if the producing closure panics part-way through, drop the -/// elements written so far. +/// Drops the initialized prefix if slice initialization panics. struct InitGuard { dst: *mut T, initialized: usize, @@ -303,13 +275,11 @@ impl Drop for InitGuard { /// Storage reserved for a value, paired with a pre-reserved drop entry slot. /// -/// Created by [`ChunkMutator::try_alloc_uninit_with_drop`](super::ChunkMutator::try_alloc_uninit_with_drop) -/// or [`try_alloc_uninit_slice_with_drop`](super::ChunkMutator::try_alloc_uninit_slice_with_drop). +/// Created by [`ChunkMutator::try_alloc_uninit_with_drop`](super::chunk_mutator::ChunkMutator::try_alloc_uninit_with_drop) +/// or [`try_alloc_uninit_slice_with_drop`](super::chunk_mutator::ChunkMutator::try_alloc_uninit_slice_with_drop). /// -/// On `init*`, the value is written into its storage and the drop entry is -/// committed (its `drop_fn` is set to a shim for `T`). If the ticket is -/// dropped without being initialized, the placeholder entry remains with no -/// drop shim — the replay loop will skip it. +/// `init*` writes the value and commits the drop entry. Dropping without +/// initialization leaves a skipped placeholder entry. pub(crate) struct UninitDrop<'a, T: ?Sized> { value: InChunk, drop_slot: InChunk, @@ -339,10 +309,7 @@ impl<'a, T> UninitDrop<'a, T> { // storage exclusively. unsafe { &mut *ptr.as_ptr() } } - /// Same as [`init`](Self::init) but returns a raw pointer with no - /// lifetime. Used by the arena layer when the resulting reference's - /// lifetime must be tied to `&Arena` rather than to the consumed - /// ticket's borrow scope. + /// Same as [`init`](Self::init) but returns a raw pointer with no lifetime. #[inline] pub(crate) fn init_raw(self, value: T) -> NonNull { let raw = self.value.as_ptr(); @@ -362,40 +329,14 @@ impl<'a, T> UninitDrop<'a, T> { NonNull::new_unchecked(raw) } } - - /// Writes a (possibly uninitialized) `MaybeUninit` into the value - /// slot and returns a pointer to it, leaving the pre-reserved drop entry - /// as an **uncommitted** placeholder. - /// - /// Used by the uninit-`Arc` allocation path: the entry is committed - /// later by [`Arc::>::assume_init`](crate::Arc) once the - /// value is initialized. If the resulting handle is dropped without - /// `assume_init`, the placeholder stays `None` and the replay loop skips - /// it, so no destructor runs on uninitialized memory. - #[inline] - pub(crate) fn into_uninit_placeholder(self, value: MaybeUninit) -> NonNull> { - let raw = self.value.as_ptr().cast::>(); - // SAFETY: `raw` is non-null, aligned for `T` (identical to - // `MaybeUninit`), and exclusively owned by this consumed ticket; - // the slot is uninitialized so `write` drops nothing. The drop slot - // is intentionally left as the placeholder written at reservation. - unsafe { - ptr::write(raw, value); - NonNull::new_unchecked(raw) - } - } } impl<'a, T> UninitDrop<'a, [T]> { - /// Initializes the reserved slice by cloning each element of `src`, - /// commits the drop entry, and returns a mutable reference bound by - /// the arena's lifetime. + /// Clones `src` into the reservation, commits the drop entry, and returns + /// the initialized slice. /// - /// If any `T::clone` panics, all previously-cloned elements are - /// dropped before the panic propagates; the drop entry is *not* - /// committed (the chunk's drop-replay loop will skip the placeholder), - /// so partially-initialized memory cannot be re-dropped at arena - /// teardown. + /// On panic, initialized elements are dropped and the placeholder remains + /// uncommitted. #[inline] pub(crate) fn init_clone_from_slice(self, src: &[T]) -> &'a mut [T] where @@ -409,11 +350,8 @@ impl<'a, T> UninitDrop<'a, [T]> { self.init_with(|i| src[i].clone()) } - /// Initializes the reserved slice by calling `f(i)` for each index - /// `i` in `0..len`, then commits the drop entry on success. If `f` - /// panics, already-initialized elements are dropped and the drop - /// entry is *not* committed (the chunk's drop-replay loop skips the - /// placeholder). + /// Initializes with `f(i)` and commits the drop entry on success. On panic, + /// initialized elements are dropped and the placeholder remains uncommitted. #[inline] pub(crate) fn init_with(self, f: F) -> &'a mut [T] where @@ -425,9 +363,7 @@ impl<'a, T> UninitDrop<'a, [T]> { unsafe { slice_ptr.as_mut() } } - /// Like [`Self::init_with`] but returns the raw `NonNull<[T]>` with - /// chunk-wide provenance. See [`Uninit::init_with_ptr`] for the - /// rationale. + /// Like [`Self::init_with`] but returns raw `NonNull<[T]>`. #[inline] #[cfg_attr(test, mutants::skip)] // counter mutation += → *= ⇒ infinite loop pub(crate) fn init_with_ptr(self, mut f: F) -> NonNull<[T]> @@ -453,11 +389,8 @@ impl<'a, T> UninitDrop<'a, [T]> { slice_ptr } - /// Initializes the reserved slice by pulling `len` values from - /// `iter` and commits the drop entry on success. Panics if `iter` - /// yields fewer elements than the reservation; in that case, - /// already-initialized elements are dropped and the drop entry is - /// not committed. + /// Pulls `len` values from `iter` and commits on success. If `iter` is + /// short, initialized elements are dropped and the entry is not committed. #[inline] pub(crate) fn init_from_iter(self, mut iter: I) -> &'a mut [T] where @@ -468,26 +401,4 @@ impl<'a, T> UninitDrop<'a, [T]> { .expect("iterator yielded fewer elements than ExactSizeIterator::len() reported") }) } - - /// Slice analogue of [`UninitDrop::into_uninit_placeholder`]: optionally - /// zero-fills the reserved elements and returns the buffer as - /// `MaybeUninit`s, leaving the pre-reserved drop entry **uncommitted**. - /// - /// The uninit-slice-`Arc` path commits the entry later via - /// [`Arc::<[MaybeUninit]>::assume_init`](crate::Arc). - #[inline] - pub(crate) fn into_uninit_slice_placeholder(self, zeroed: bool) -> NonNull<[MaybeUninit]> { - let slice_ptr = self.value.as_non_null(); - let len = slice_ptr.len(); - let base = slice_ptr.cast::>(); - if zeroed { - // SAFETY: `base` addresses `len` exclusively-owned `MaybeUninit` - // slots inside chunk storage reserved for this consumed ticket; - // zeroing their bytes leaves valid `MaybeUninit` values. - unsafe { - ptr::write_bytes(base.as_ptr().cast::(), 0, len.saturating_mul(mem::size_of::())); - } - } - NonNull::slice_from_raw_parts(base, len) - } } diff --git a/crates/multitude/src/strings/arc_utf16_str.rs b/crates/multitude/src/strings/arc_utf16_str.rs index 42241cc53..bc8e8d378 100644 --- a/crates/multitude/src/strings/arc_utf16_str.rs +++ b/crates/multitude/src/strings/arc_utf16_str.rs @@ -63,11 +63,16 @@ impl_utf16_str_common!(ArcUtf16Str); impl Clone for ArcUtf16Str { #[inline] fn clone(&self) -> Self { - // SAFETY: `self` owns a live +1 on its chunk so the chunk is - // alive; `clone_from_value_ptr` mints a fresh +1 via an - // atomic bump and returns a `ChunkRef` that owns it. - let r: ChunkRef = unsafe { ChunkRef::clone_from_value_ptr(self.ptr) }; - let _ = r.forget(); + // SAFETY: `self` keeps the payload (and its strong-count prefix) + // alive; the strong slot is aligned and within chunk provenance. + // The conceptual value type is `[u16]` (element align 2, + // `usize` metadata), matching the allocator's strong-prefix + // layout. + let strong = unsafe { crate::internal::thin_dst::strong_ref::<[u16]>(self.ptr.cast::(), core::mem::align_of::()) }; + let prev = strong.fetch_add(1, core::sync::atomic::Ordering::Relaxed); + if prev > (u32::MAX >> 1) { + crate::internal::constants::refcount_overflow_abort(); + } Self { ptr: self.ptr, _phantom: PhantomData, @@ -75,6 +80,29 @@ impl Clone for ArcUtf16Str { } } +impl Drop for ArcUtf16Str { + #[inline] + fn drop(&mut self) { + // SAFETY: the payload (and its strong-count prefix) is live while + // this handle exists; the strong slot is aligned and in chunk + // provenance (conceptual value type `[u16]`). + let strong = unsafe { crate::internal::thin_dst::strong_ref::<[u16]>(self.ptr.cast::(), core::mem::align_of::()) }; + if strong.fetch_sub(1, core::sync::atomic::Ordering::Release) != 1 { + return; + } + core::sync::atomic::fence(core::sync::atomic::Ordering::Acquire); + // Last strong reference: release the chunk +1. The `[u16]` + // payload has no element destructor to run. + // + // SAFETY: `ptr` is hosted in a 64K-aligned `SharedChunk` holding + // exactly one outstanding +1 for this `Arc` family; + // `from_value_ptr` adopts and releases it. + unsafe { + let _ref: ChunkRef = ChunkRef::from_value_ptr(self.ptr); + } + } +} + impl From> for crate::Arc<[u16], A> { /// Convert an [`ArcUtf16Str`] into an [`Arc<[u16], A>`](crate::Arc). /// @@ -96,3 +124,77 @@ impl From> for crate::Arc<[u16], A> { unsafe { Self::from_raw(me.ptr.cast::()) } } } + +#[cfg(test)] +mod tests { + use core::sync::atomic::{AtomicU32, Ordering}; + + use super::*; + use crate::Arena; + use crate::internal::thin_dst::strong_ref; + + // The per-string strong count lives in the chunk prefix, accessed as + // an `[u16]` strong reference (element align 2) — exactly as the + // `Clone`/`Drop` impls do. + fn strong_of(s: &ArcUtf16Str) -> &AtomicU32 { + // SAFETY: `s` keeps the payload and its strong-count prefix live, + // so the strong slot is aligned and within chunk provenance. + unsafe { strong_ref::<[u16]>(s.ptr.cast::(), core::mem::align_of::()) } + } + + // `Drop` must decrement the per-string strong count (and release the + // chunk on the last handle). Kills the `drop -> ()` mutant: cloning + // bumps the count, so dropping the clone must bring it back down. + #[test] + fn drop_decrements_strong_count() { + let arena = Arena::new(); + let s = arena.alloc_utf16_str_arc_from_str("hi"); + let strong = strong_of(&s); + let base = strong.load(Ordering::Relaxed); + let s2 = s.clone(); + assert_eq!(strong.load(Ordering::Relaxed), base + 1, "clone must bump the strong count"); + drop(s2); + assert_eq!(strong.load(Ordering::Relaxed), base, "drop must decrement the strong count"); + // `s` (still live) holds the chunk; it drops normally at scope end. + } + + // `Clone` checks `prev > (u32::MAX >> 1)` on the value returned by + // `fetch_add` (the count *before* the increment), so a clone + // observing `prev == u32::MAX >> 1` must NOT abort. Kills the + // `>` -> `==` and `>` -> `>=` mutants on that comparison. + #[test] + fn clone_at_max_refcount_threshold_does_not_abort() { + let arena = Arena::new(); + let s = arena.alloc_utf16_str_arc_from_str("hi"); + let strong = strong_of(&s); + strong.store(u32::MAX >> 1, Ordering::Relaxed); + let clone = s.clone(); + // Reached here without panic. Restore the true live-handle count + // (`s` + `clone`) so teardown releases the chunk instead of + // leaking the strong count above 1 forever. + strong.store(2, Ordering::Relaxed); + drop(clone); + } + + // A clone observing `prev > u32::MAX >> 1` MUST abort. Driving the + // strong count one past the threshold kills the `>` -> `==` mutant + // (it would not fire) and the `>>` -> `<<` mutant (which raises the + // threshold to `0xFFFF_FFFE`, so the guard would not fire here). + #[test] + #[should_panic(expected = "refcount overflow")] + fn clone_above_max_refcount_threshold_aborts() { + let arena = Arena::new(); + let s = arena.alloc_utf16_str_arc_from_str("hi"); + let strong = strong_of(&s); + strong.store((u32::MAX >> 1) + 1, Ordering::Relaxed); + // The clone panics in its overflow guard before returning, so no + // clone is produced. Catch it, restore the real live-handle count + // (just `s`) so teardown releases the chunk instead of leaking + // (keeps Miri happy), then resume so `should_panic` sees it. + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + let _c = s.clone(); + })); + strong.store(1, Ordering::Relaxed); + std::panic::resume_unwind(result.expect_err("clone past the threshold must panic")); + } +} diff --git a/crates/multitude/src/strings/box_utf16_str.rs b/crates/multitude/src/strings/box_utf16_str.rs index 9bf41f627..85a3069b9 100644 --- a/crates/multitude/src/strings/box_utf16_str.rs +++ b/crates/multitude/src/strings/box_utf16_str.rs @@ -79,6 +79,19 @@ impl BoxUtf16Str { impl_utf16_str_common!(BoxUtf16Str); +impl Drop for BoxUtf16Str { + #[inline] + fn drop(&mut self) { + // SAFETY: `ptr` is hosted in a 64K-aligned `SharedChunk` on + // which this single-owner `Box` holds a +1 strong reference; + // `from_value_ptr` adopts it and releases it on drop. The + // `[u16]` payload has no element destructor to run. + unsafe { + let _ref: crate::internal::chunk_ref::ChunkRef = crate::internal::chunk_ref::ChunkRef::from_value_ptr(self.ptr); + } + } +} + impl DerefMut for BoxUtf16Str { #[inline] fn deref_mut(&mut self) -> &mut Utf16Str { diff --git a/crates/multitude/src/strings/utf16_str_common.rs b/crates/multitude/src/strings/utf16_str_common.rs index 45ae8e4bc..c676b27ae 100644 --- a/crates/multitude/src/strings/utf16_str_common.rs +++ b/crates/multitude/src/strings/utf16_str_common.rs @@ -6,12 +6,12 @@ //! [`BoxUtf16Str`](super::BoxUtf16Str)). //! //! Both types share identical layout (`NonNull` + `PhantomData`), -//! prefix-length reading, payload borrowing, `Drop` semantics, and +//! prefix-length reading, payload borrowing, and //! formatting/comparison/hash/`Pointer`/`Serialize` impls. The macro //! below emits all of that for a given struct name; per-file blocks //! supply the items that legitimately differ (`Send`/`Sync` bounds, -//! `Clone` for `Arc`, `DerefMut`/`AsMut`/`BorrowMut` and -//! `as_mut_utf16_str` for `Box`). +//! `Clone` and `Drop` for `Arc`, `Drop` for `Box`, `DerefMut`/`AsMut`/ +//! `BorrowMut` and `as_mut_utf16_str` for `Box`). /// Emit shared inherent shape + trait impls for a single-pointer /// UTF-16 string type with field layout `{ ptr: NonNull, _phantom }`. @@ -55,18 +55,6 @@ macro_rules! impl_utf16_str_common { } } - impl Drop for $Ty { - #[inline] - fn drop(&mut self) { - // SAFETY: `ptr` is hosted in a 64K-aligned `SharedChunk` - // on which `self` owns a +1 strong reference. The - // `[u16]` payload has no element drop to run. - unsafe { - let _ref: $crate::internal::chunk_ref::ChunkRef = $crate::internal::chunk_ref::ChunkRef::from_value_ptr(self.ptr); - } - } - } - impl Unpin for $Ty {} impl core::ops::Deref for $Ty { diff --git a/crates/multitude/src/vec/freeze.rs b/crates/multitude/src/vec/freeze.rs index 7a2ccae6e..827023046 100644 --- a/crates/multitude/src/vec/freeze.rs +++ b/crates/multitude/src/vec/freeze.rs @@ -1,13 +1,10 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -//! Freeze a transient builder into arena-owned `Arc` or `Box` slices. +//! Freeze a transient vector into arena-owned `Arc` or `Box` slices. //! -//! The infallible freezes are exposed as `From>` impls on -//! [`Arc`](crate::Arc) / [`Box`](crate::Box) (mirroring `std`'s -//! `From> for Box<[T]>` / `Arc<[T]>`) plus the `std`-named -//! [`Vec::into_boxed_slice`] / [`Vec::leak`] methods. Fallible variants -//! ([`Vec::try_into_arc`] / [`Vec::try_into_boxed_slice`]) have no `std` -//! counterpart and stay as inherent methods. +//! Infallible freezes use `From>` for [`Arc`](crate::Arc) / +//! [`Box`](crate::Box) plus [`Vec::into_boxed_slice`] / [`Vec::leak`]. +//! Fallible freezes are [`Vec::try_into_arc`] and [`Vec::try_into_boxed_slice`]. use core::mem::{self, ManuallyDrop}; use core::slice; @@ -15,10 +12,29 @@ use core::slice; use allocator_api2::alloc::{AllocError, Allocator}; use super::Vec; +use crate::Arena; use crate::arc::Arc; use crate::r#box::Box; +use crate::internal::arena_buf::DrainAll; impl<'a, T, A: Allocator + Clone> Vec<'a, T, A> { + /// Shared body of the `Box`/`Arc` freeze paths: drain every element + /// into a fresh shared allocation built by `build`, then release this + /// `Vec`'s now-empty backing buffer. The old buffer is dropped only + /// *after* `build` consumes the drain iterator, so the moved-out + /// elements stay readable for the duration of the freeze. + #[inline] + fn drain_freeze(self, build: impl FnOnce(&'a Arena, DrainAll<'a, T>) -> R) -> R { + let arena = self.arena; + let mut me = ManuallyDrop::new(self); + let iter = me.buf.drain_all(); + let result = build(arena, iter); + // `drain_all` set `buf.len = 0`, so this only releases the (unused) + // backing buffer, never the moved-out elements. + drop(ManuallyDrop::into_inner(me)); + result + } + /// Freeze into a [`Box<[T], A>`](crate::Box). /// /// **O(n)** — moves the elements into a fresh shared allocation @@ -27,18 +43,10 @@ impl<'a, T, A: Allocator + Clone> Vec<'a, T, A> { /// /// # Panics /// - /// Panics if the underlying allocator fails, or — for `T: Drop` — if - /// `len` exceeds `u16::MAX`. + /// Panics if the underlying allocator fails. #[must_use] pub fn into_boxed_slice(self) -> Box<[T], A> { - let arena = self.arena; - let mut me = ManuallyDrop::new(self); - let iter = me.buf.drain_all(); - let bx = arena.alloc_slice_fill_iter_box::(iter); - // `drain_all` set `buf.len = 0`, so `into_inner`'s normal `Drop` - // only releases the (unused) backing buffer. - drop(ManuallyDrop::into_inner(me)); - bx + self.drain_freeze(Arena::alloc_slice_fill_iter_box::) } /// Fallible variant of [`Self::into_boxed_slice`]. @@ -49,13 +57,7 @@ impl<'a, T, A: Allocator + Clone> Vec<'a, T, A> { /// fails. On error, `self` is consumed and any elements remaining /// after a partial move are dropped before this function returns. pub fn try_into_boxed_slice(self) -> Result, AllocError> { - let arena = self.arena; - let mut me = ManuallyDrop::new(self); - let iter = me.buf.drain_all(); - let result = arena.try_alloc_slice_fill_iter_box::(iter); - // See `into_boxed_slice`. - drop(ManuallyDrop::into_inner(me)); - result + self.drain_freeze(Arena::try_alloc_slice_fill_iter_box::) } /// Fallible variant of the [`Arc<[T], A>`](crate::Arc) freeze @@ -71,24 +73,15 @@ impl<'a, T, A: Allocator + Clone> Vec<'a, T, A> { T: Send + Sync, A: Send + Sync, { - let arena = self.arena; - let mut me = ManuallyDrop::new(self); - let iter = me.buf.drain_all(); - let result = arena.try_alloc_slice_fill_iter_arc::(iter); - // See `into_boxed_slice`. - drop(ManuallyDrop::into_inner(me)); - result + self.drain_freeze(Arena::try_alloc_slice_fill_iter_arc::) } /// Consume the `Vec`, returning an arena-lifetime mutable slice /// reference `&'a mut [T]`. Mirrors [`std::vec::Vec::leak`]. /// - /// **O(1) and allocation-free**: the existing buffer is reinterpreted - /// as a slice reference in place. No copy, no new allocation. The - /// unused tail (`cap - len`) is reclaimed back to the chunk's bump - /// cursor when this buffer is still the chunk's last allocation, so - /// later allocations can reuse it; otherwise it is left in the chunk - /// and reclaimed when the arena is dropped. + /// **O(1) and allocation-free**: the existing buffer becomes the returned + /// slice. The unused tail is reclaimed only while this buffer is still the + /// chunk's last allocation; otherwise arena teardown reclaims it. /// /// Available only when `T` does not need `Drop` (compile-time /// asserted). For drop types, freeze via [`Box::from`] / [`Arc::from`]. @@ -100,21 +93,16 @@ impl<'a, T, A: Allocator + Clone> Vec<'a, T, A> { "Vec::leak requires T not to need Drop; freeze via Box::from / Arc::from instead", ); } - // Hand the unused capacity tail back to the chunk before pinning - // the live prefix as a slice. `[len, cap)` holds no initialized - // element, so reclaiming it is sound; the retained `[0, len)` - // prefix (and thus the returned slice) is untouched. + // Reclaim the uninitialized capacity tail before pinning the live + // prefix as the returned slice. let _ = self.reclaim_capacity_tail(self.buf.len()); let mut me = ManuallyDrop::new(self); let ptr = me.buf.as_mut_ptr(); let len = me.buf.len(); - // SAFETY: by `ArenaBuf`'s invariants, `ptr` addresses `len` - // initialized `T`s in an arena chunk that outlives `'a`. We - // `ManuallyDrop` the `Vec` so neither the `ArenaBuf` nor its - // contained elements are dropped here. Since `T` does not need - // `Drop` (const-asserted above), abandoning the buffer without - // registering a chunk drop entry is sound — the chunk storage - // itself is reclaimed at arena teardown. + // SAFETY: `ptr` addresses `len` initialized `T`s in an arena chunk + // that outlives `'a`. `ManuallyDrop` prevents dropping the buffer or + // elements here; `T: !Drop` (const-asserted above) lets arena teardown + // reclaim the raw chunk storage without a drop entry. unsafe { slice::from_raw_parts_mut(ptr, len) } } @@ -125,12 +113,6 @@ impl<'a, T, A: Allocator + Clone> Vec<'a, T, A> { T: Send + Sync, A: Send + Sync, { - let arena = self.arena; - let mut me = ManuallyDrop::new(self); - let iter = me.buf.drain_all(); - let arc = arena.alloc_slice_fill_iter_arc::(iter); - // See `into_boxed_slice`. - drop(ManuallyDrop::into_inner(me)); - arc + self.drain_freeze(Arena::alloc_slice_fill_iter_arc::) } } diff --git a/crates/multitude/src/vec/mod.rs b/crates/multitude/src/vec/mod.rs index 1edf1377d..1e2174bda 100644 --- a/crates/multitude/src/vec/mod.rs +++ b/crates/multitude/src/vec/mod.rs @@ -132,7 +132,7 @@ impl<'a, T, A: Allocator + Clone> Vec<'a, T, A> { if let Some(u) = self.arena.try_reserve_local_slice::(new_cap) { break u; } - if self.arena.is_oversized_local(refill_hint) { + if self.arena.is_oversized(refill_hint) { let (new_ptr, new_cap_actual) = self.arena.alloc_oversized_local_with(refill_hint, |mutator| { let ticket = mutator .try_alloc_uninit_slice::(new_cap) diff --git a/crates/multitude/src/vec/mutate.rs b/crates/multitude/src/vec/mutate.rs index 8945b116e..15cbdf41c 100644 --- a/crates/multitude/src/vec/mutate.rs +++ b/crates/multitude/src/vec/mutate.rs @@ -7,12 +7,11 @@ use core::mem; use allocator_api2::alloc::{AllocError, Allocator}; use super::Vec; +use crate::arena::panic_alloc; use crate::internal::arena_buf::ArenaBuf; -/// Rollback guard for `resize`/`resize_with`: if a user `clone` or -/// closure panics partway through a grow, the guard's `Drop` truncates -/// the buffer back to `old_len`, dropping every element written so far. -/// On the success path the caller disarms it via [`mem::forget`]. +/// Rollback guard for `resize`/`resize_with`. +/// On panic, truncates to `old_len`; success disarms it via [`mem::forget`]. struct ResizeGuard<'b, 'a, T> { buf: &'b mut ArenaBuf<'a, T>, old_len: usize, @@ -133,11 +132,8 @@ impl Vec<'_, T, A> { /// Shrink the capacity of the vector as much as possible. /// - /// O(1) reclamation when the buffer sits at the current bump cursor - /// of its chunk (no later allocation has moved the cursor past it): - /// the unused tail is returned to the chunk and the data pointer is - /// unchanged. Otherwise this is a no-op — the arena never relocates - /// or copies to shrink, so capacity simply stays put. + /// O(1) when the buffer is still at the chunk's bump cursor: returns the + /// unused tail without moving data. Otherwise this is a no-op. #[inline] #[cfg_attr(test, mutants::skip)] // thin delegation; logic covered via `reclaim_capacity_tail` pub fn shrink_to_fit(&mut self) { @@ -146,10 +142,8 @@ impl Vec<'_, T, A> { /// Shrink the capacity with a lower bound. /// - /// The capacity will remain at least as large as both `self.len()` and - /// `min_capacity`. Reclamation only succeeds while the buffer still sits - /// at the chunk's bump cursor; otherwise this is a no-op (matching - /// [`std::vec::Vec::shrink_to`]'s "best-effort" contract). + /// Capacity remains at least `max(self.len(), min_capacity)`. Reclamation + /// only succeeds while the buffer is still at the chunk's bump cursor. #[cfg_attr(test, mutants::skip)] pub fn shrink_to(&mut self, min_capacity: usize) { if const { mem::size_of::() == 0 } { @@ -159,17 +153,12 @@ impl Vec<'_, T, A> { let _ = self.reclaim_capacity_tail(target); } - /// Reclaim the capacity tail `[target_cap, cap)` back to the chunk's - /// bump cursor when this buffer is still the chunk's last allocation - /// (an O(1) cursor rewind — no copy, data pointer unchanged). Returns - /// whether storage was reclaimed. A no-op when the buffer has been - /// overtaken by a later allocation, sits in a retired or oversized - /// chunk, or is a ZST. - /// - /// Callers must ensure the slots in `[target_cap, cap)` hold no live - /// element (either never initialized, or already dropped): the - /// reclaimed bytes return to the arena and may be overwritten by the - /// next allocation. + /// Reclaim `[target_cap, cap)` with an O(1) cursor rewind when this buffer + /// is still the chunk's last allocation. Returns whether storage was + /// reclaimed; no-op for later allocations, retired/oversized chunks, or ZSTs. + /// + /// Callers must ensure `[target_cap, cap)` contains no live elements + /// because the next arena allocation may overwrite it. #[inline] // Mutation testing is suppressed on the `total_bytes > max_normal_alloc` // early-return: `>` with `==` / `>=` mutations only differ at the exact @@ -190,14 +179,9 @@ impl Vec<'_, T, A> { } let elem = mem::size_of::(); let data_addr = self.buf.as_ptr() as usize; - // One-past-the-end address of the current allocation. The product - // is the buffer's real byte size, bounded by its chunk, so it - // cannot overflow. + // Buffer byte size is bounded by its chunk, so this cannot overflow. let total_bytes = cap * elem; - // Buffers large enough to have been served by an oversized chunk - // are never at the `current_local` bump cursor; skip them so the - // cheap cursor check below never spuriously reclaims a one-shot - // chunk's storage. + // Oversized buffers are never at the `current_local` bump cursor. if total_bytes > self.arena.max_normal_alloc() { return false; } @@ -259,8 +243,7 @@ impl Vec<'_, T, A> { assert!(start <= end, "extend_from_within: start > end"); assert!(end <= len, "extend_from_within: range end out of bounds"); let count = end - start; - // Reserve up front so the subsequent pushes cannot relocate the - // buffer (which would invalidate the source indices we read from). + // Reserve first so pushes cannot relocate the source indices. self.try_reserve(count)?; for i in start..end { let cloned = self.buf.as_slice()[i].clone(); @@ -366,7 +349,13 @@ impl Vec<'_, T, A> { Ok(()) } - /// Reserve the minimum capacity for at least `additional` more elements. + /// Reserve capacity for exactly `additional` more elements. + /// + /// Unlike [`Self::reserve`], this does not over-allocate via + /// amortized doubling: the resulting capacity is exactly + /// `len + additional` (modulo whatever the backing chunk's in-place + /// growth already provides). Prefer [`Self::reserve`] when more + /// elements are expected to be inserted afterwards. /// /// # Panics /// @@ -374,9 +363,9 @@ impl Vec<'_, T, A> { /// Use [`Self::try_reserve_exact`] for a fallible variant. #[inline] pub fn reserve_exact(&mut self, additional: usize) { - // No tighter guarantee than `reserve`: the arena's slice - // reservation policy already returns the requested capacity. - self.reserve(additional); + if self.try_reserve_exact(additional).is_err() { + panic_alloc!(); + } } /// Fallible variant of [`Self::reserve_exact`]. @@ -387,7 +376,13 @@ impl Vec<'_, T, A> { /// alignment is at least 32 KiB. #[inline] pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), AllocError> { - self.try_reserve(additional) + let needed = self.buf.len().checked_add(additional).ok_or(AllocError)?; + if needed <= self.buf.cap() { + return Ok(()); + } + // Grow to exactly `needed` (no amortized-doubling slack), matching + // `alloc::vec::Vec::reserve_exact` semantics. + self.try_grow_to(needed) } /// Resize the vector to `new_len`, cloning `value` to fill new slots. @@ -419,10 +414,7 @@ impl Vec<'_, T, A> { } let added = new_len - len; self.try_reserve(added)?; - // If a `clone` (or the final move) panics partway through, the - // guard rolls the length back to `len`, dropping every element - // written so far. This keeps the vector in a consistent state and - // never leaks the partially-grown tail. + // Roll back on panic so partially written elements are dropped. let guard = ResizeGuard { buf: &mut self.buf, old_len: len, @@ -459,8 +451,7 @@ impl Vec<'_, T, A> { } let added = new_len - len; self.try_reserve(added)?; - // See `resize`: roll back on a panic in `f` so the elements - // written before the panic are dropped and the length is restored. + // See `resize`: roll back on panic in `f`. let guard = ResizeGuard { buf: &mut self.buf, old_len: len, @@ -499,15 +490,12 @@ impl Vec<'_, T, A> { let len = self.buf.len(); assert!(at <= len, "split index out of bounds (at is {at}, len is {len})"); let tail_len = len - at; - // Copy/empty path for ZSTs, an unallocated head, or an empty - // tail: produce an independent tail and leave the head's storage - // (and capacity) intact. + // ZST, unallocated-head, and empty-tail cases produce an independent + // tail and leave the head's storage intact. if const { mem::size_of::() == 0 } || self.buf.cap() == 0 || tail_len == 0 { let mut tail = Self::try_with_capacity_in(tail_len, self.arena)?; - // Only ZSTs reach here with `tail_len > 0` (a non-ZST `cap == 0` - // forces `tail_len == 0`). ZSTs carry no data, so popping the - // suffix straight into `tail` — which reverses order — is fine; no - // staging buffer is needed. + // Only ZSTs reach here with `tail_len > 0`; reversing them while + // popping is unobservable. for _ in 0..tail_len { tail.buf .push_within_cap(self.buf.pop().expect("tail length matches")) @@ -516,9 +504,7 @@ impl Vec<'_, T, A> { } return Ok(tail); } - // Zero-copy split: the tail shares the same chunk storage as the - // head (storage is reclaimed only at arena teardown, which - // outlives both halves), so no elements are copied. + // Zero-copy split: both halves share chunk storage until arena teardown. let tail_buf = self.buf.split_off_buf(at); Ok(Self::from_buf(tail_buf, self.arena)) } diff --git a/crates/multitude/tests/alloc_ref.rs b/crates/multitude/tests/alloc_ref.rs index f935aa120..c0e425bb5 100644 --- a/crates/multitude/tests/alloc_ref.rs +++ b/crates/multitude/tests/alloc_ref.rs @@ -406,9 +406,14 @@ fn wasted_tail_grows_on_local_refill_and_clears_on_reset() { } /// **Conservation invariant**: across a full retire-and-release cycle, -/// the wasted-tail counter must return to exactly its starting value. -/// Catches off-by-one or asymmetric-arithmetic bugs (e.g., add 4 KiB, -/// subtract 4096) that observation-of-non-zero tests would miss. +/// the local wasted-tail counter must return to exactly its starting +/// value. Catches off-by-one or asymmetric-arithmetic bugs (e.g., add +/// 4 KiB, subtract 4096) that observation-of-non-zero tests would miss. +/// +/// Only local allocation paths are exercised here: `reset` governs local +/// chunks, so it is what takes the gauge back to zero. Shared-chunk +/// wasted tail is released by handle drop plus chunk turnover (not by +/// `reset`) and is covered by the drop/cache-reuse tests above. #[cfg(feature = "stats")] #[test] fn wasted_tail_returns_to_exactly_baseline_across_full_cycle() { @@ -416,20 +421,15 @@ fn wasted_tail_returns_to_exactly_baseline_across_full_cycle() { for cycle in 0..10 { let before = arena.stats().wasted_tail_bytes; assert_eq!(before, 0, "cycle {cycle}: baseline must be 0 before allocations begin"); - // Mix of all major allocation paths to exercise every retire- - // generating code path within a single cycle: for _ in 0..4 { let _: &mut u64 = arena.alloc(42); let _: &mut [u8] = arena.alloc_slice_fill_with(256, |_| 0); - drop(arena.alloc_arc::(1)); - drop(arena.alloc_box::(2)); - drop(arena.alloc_slice_copy_arc::(&[0_u8; 1024])); } arena.reset(); let after = arena.stats().wasted_tail_bytes; assert_eq!( after, 0, - "cycle {cycle}: after reset, the counter must return to exactly 0 \ + "cycle {cycle}: after reset, the local counter must return to exactly 0 \ (got {after}) — asymmetric add/subtract would leave a residue", ); } @@ -444,7 +444,7 @@ fn wasted_tail_returns_to_exactly_baseline_across_full_cycle() { fn wasted_tail_correct_after_cache_reuse_cycles() { let mut arena = Arena::new(); let mut acquired_chunks_total = 0u64; - for _ in 0..20 { + for _ in 0..8 { // Force at least one full chunk's worth of allocs so we cycle // through `current_local` AND populate the cache on reset. for _ in 0..64 { @@ -472,10 +472,12 @@ fn wasted_tail_decreases_monotonically_as_pinned_arcs_drop() { let arena = Arena::new(); let mut pins = std::vec::Vec::new(); // Build up several pinned chunks by interleaving a pin with allocs - // that force a shared refill. - for _ in 0..5 { + // that force a shared refill. A few moderately sized copies per pin + // overflow the (initially small) shared chunk, retiring it while the + // pin holds it — far fewer allocations than a long inner loop. + for _ in 0..4 { pins.push(arena.alloc_arc::(99)); - for _ in 0..10 { + for _ in 0..3 { drop(arena.alloc_slice_copy_arc::(&[0_u8; 2048])); } } @@ -538,21 +540,36 @@ fn wasted_tail_handles_oversized_local_retire() { /// many times. If the subtract ever exceeded the matching add even by /// one byte, the running counter would underflow to a value near /// `u64::MAX`. +/// +/// The conservation bound is `wasted_tail_bytes <= total_bytes_allocated`: +/// the arena cannot waste more tail than it currently holds. This holds +/// regardless of whether the slack lives in local or (still-installed) +/// shared chunks, and an underflow would blow the wasted gauge far past +/// the total. `reset` only clears local wasted tail, so it is not +/// expected to drive the gauge to zero while a shared chunk is live. #[cfg(feature = "stats")] #[test] fn wasted_tail_never_underflows_under_stress() { let mut arena = Arena::new(); - for _ in 0..256 { + let filler = [0_u8; 64]; + for _ in 0..10 { let _: &mut u64 = arena.alloc(0); - let _: &mut [u8] = arena.alloc_slice_fill_with(64, |_| 0); + let _: &mut [u8] = arena.alloc_slice_copy(filler); drop(arena.alloc_arc::(0)); drop(arena.alloc_box::(0)); drop(arena.alloc_slice_copy_arc::(&[0_u8; 4096])); - // Always-positive: counter never observed as huge. - assert!(arena.stats().wasted_tail_bytes < u64::MAX / 2); + let stats = arena.stats(); + assert!( + stats.wasted_tail_bytes <= stats.total_bytes_allocated, + "wasted tail ({}) must never exceed total bytes outstanding ({}) — \ + an underflow would wrap it near u64::MAX", + stats.wasted_tail_bytes, + stats.total_bytes_allocated, + ); } arena.reset(); - assert_eq!(arena.stats().wasted_tail_bytes, 0); + let stats = arena.stats(); + assert!(stats.wasted_tail_bytes <= stats.total_bytes_allocated); } use crate::common::FailingAllocator; diff --git a/crates/multitude/tests/arena.rs b/crates/multitude/tests/arena.rs index 0c3bb64c3..4888387c0 100644 --- a/crates/multitude/tests/arena.rs +++ b/crates/multitude/tests/arena.rs @@ -408,12 +408,12 @@ mod reset { #[cfg(feature = "stats")] #[test] fn reset_works_with_pinned_chunks() { - // Force chunk rotation by allocating multiple buffers that fill the - // chunk. We seed the high-water to class 7 so the rotated chunks - // are eligible for caching when they return after `reset`. - // `alloc_uninit::>` skips per-byte init. + // Allocate a couple of near-max_normal_alloc buffers to put the + // (class-7, 64 KiB) starter chunk into use. `MaybeUninit<[u8; + // 4000]>` skips per-byte init; a couple of them is enough to + // exercise the reset→cache→reuse path without a long alloc loop. let mut arena: Arena = Arena::builder().max_normal_alloc(4 * 1024).with_capacity_local(64 * 1024).build(); - for _ in 0..5 { + for _ in 0..2 { let _ = arena.alloc(core::mem::MaybeUninit::<[u8; 4000]>::uninit()); } let chunks_before = arena.stats().normal_local_chunks_allocated; @@ -474,6 +474,89 @@ mod reset { // Arena still usable. let _ = arena.alloc_arc(11_u32); } + + /// Regression for the `reset`-retires-shared-chunks bug. + /// + /// `reset` must touch only local chunks. It used to also retire the + /// current shared chunk (reconcile its surplus + reinstall the empty + /// sentinel). That broke workloads that nest arena [`Arc`]s inside an + /// outer arena `Arc` in the same shared chunk: the inner arcs' drops are + /// deferred to chunk teardown (refcount reaching 0), but the outer arc's + /// own slice elements pin the chunk until then, so the chunk can never + /// reach 0 while it is the retired-but-referenced current chunk. Each + /// reset therefore allocated **one fresh shared chunk per cycle** + /// (linear growth, slope 1 — the benchmark saw a fresh ~64 KiB chunk + /// every iteration). + /// + /// With reset leaving shared state alone, shared chunks are bump-filled + /// across cycles and a new (larger) chunk is needed only occasionally as + /// the size class ratchets up, so the count grows strictly sub-linearly. + /// This test pins the slope: across a measured batch of `BATCH` reset + /// cycles the shared-chunk count must grow by far less than `BATCH` + /// (the buggy code grew by exactly `BATCH`). + #[cfg(feature = "stats")] + #[test] + fn reset_does_not_allocate_a_fresh_shared_chunk_per_cycle() { + // Each cycle just needs to *use* a shared chunk so that `reset`'s + // shared-chunk handling is exercised; a single `Arc` allocation + // does that. (The nested-structure variant is covered separately + // by `reset_keeps_nested_arc_structures_valid_across_cycles`.) + // Keeping the per-cycle work to one allocation bounds the Miri + // interpreter cost while still pinning the slope. + fn build(arena: &Arena) { + drop(arena.alloc_arc(0xAB_u64)); + } + + const WARMUP: usize = 16; + const BATCH: usize = 64; + + let mut arena = Arena::new(); + for _ in 0..WARMUP { + build(&arena); + arena.reset(); + } + let before = arena.stats().normal_shared_chunks_allocated; + for _ in 0..BATCH { + build(&arena); + arena.reset(); + } + let grew_by = arena.stats().normal_shared_chunks_allocated - before; + + // Buggy `reset` grew by exactly `BATCH` (one fresh chunk per cycle). + // The correct behavior grows by only a handful (a few class-size + // bumps). A generous sub-linear ceiling cleanly separates the two. + assert!( + grew_by < BATCH as u64 / 8, + "reset must not allocate a fresh shared chunk per cycle: \ + {grew_by} new chunks over {BATCH} cycles (buggy code allocates ~{BATCH})", + ); + } + + /// Companion to the leak regression that needs no `stats` feature: + /// the nested-`Arc` structure must stay valid and drop cleanly across + /// repeated reset cycles. Builds the structure, reads it back, drops it, + /// resets, and repeats — confirming `reset` leaves outstanding + /// shared-chunk contents intact. + #[test] + fn reset_keeps_nested_arc_structures_valid_across_cycles() { + let mut arena = Arena::new(); + for cycle in 0..8_u8 { + let outer: Arc<[Arc<[u8]>]> = { + let mut v = arena.alloc_vec_with_capacity::>(4); + for i in 0_u8..4 { + v.push(arena.alloc_slice_copy_arc(&[cycle, i, 0xCD])); + } + v.try_into_arc().unwrap() + }; + assert_eq!(outer.len(), 4); + for (i, inner) in outer.iter().enumerate() { + let i = u8::try_from(i).unwrap(); + assert_eq!(&**inner, &[cycle, i, 0xCD]); + } + drop(outer); + arena.reset(); + } + } } mod large_alloc { @@ -535,12 +618,17 @@ mod large_alloc { #[test] fn alloc_slice_clone_above_chunk_boundary() { let arena = Arena::new(); - let n = CHUNK_BYTES / 8 + 4; // 65568 bytes - let src: Vec = (0..n as u64).collect(); - let s = arena.alloc_slice_clone::(&src); + // Use `u128` so the element count needed to exceed `CHUNK_BYTES` + // is 16x smaller than with `u8`, halving it again vs `u64` — the + // `alloc_slice_clone` path still clones every element across the + // oversized chunk, so fewer elements means far less Miri work for + // the same `> CHUNK_BYTES` byte threshold. + let n = CHUNK_BYTES / 16 + 2; // 4098 u128 => > 64 KiB + let src: Vec = (0..n as u128).collect(); + let s = arena.alloc_slice_clone::(&src); assert_eq!(s.len(), src.len()); assert_eq!(s[0], 0); - assert_eq!(s[s.len() - 1], (s.len() - 1) as u64); + assert_eq!(s[s.len() - 1], (s.len() - 1) as u128); } #[test] @@ -645,12 +733,22 @@ mod large_alloc { #[test] fn alloc_vec_with_capacity_at_far_over_chunk() { let arena = Arena::new(); - let mut v = arena.alloc_vec_with_capacity::(FAR_OVER_CHUNK / 4); - for i in 0..(FAR_OVER_CHUNK / 4) { - v.push(i as u32); - } - assert_eq!(v.len(), FAR_OVER_CHUNK / 4); - assert_eq!(v[v.len() - 1], (v.len() - 1) as u32); + let cap = FAR_OVER_CHUNK / 4; + let mut v = arena.alloc_vec_with_capacity::(cap); + // Fill the (far-over-chunk) capacity in one bulk `extend_from_slice` + // (a single memcpy) rather than `cap` individual `push` calls — the + // per-`push` arena bookkeeping is what dominates under Miri. A + // bulk-zeroed source vec is itself a single allocation. + v.extend_from_slice(&std::vec![0_u32; cap]); + assert_eq!(v.len(), cap); + // The first, a mid-chunk, and the last slot must all be addressable + // and writable across the oversized backing chunk. + v[0] = 0xA1; + v[CHUNK_BYTES / 4] = 0xB2; + v[cap - 1] = 0xC3; + assert_eq!(v[0], 0xA1); + assert_eq!(v[CHUNK_BYTES / 4], 0xB2); + assert_eq!(v[cap - 1], 0xC3); } // ============================================================================ @@ -715,15 +813,20 @@ mod large_alloc { #[test] fn alloc_vec_extend_from_iter_past_chunk_boundary() { let arena = Arena::new(); - let mut v = arena.alloc_vec::(); - v.extend((0..(OVER_CHUNK / 2) as u16).map(|i| i.wrapping_mul(13))); - assert_eq!(v.len(), OVER_CHUNK / 2); + // Exercise the `Extend`-from-iterator growth path across the chunk + // boundary. Using `u128` reaches `> CHUNK_BYTES` with 8x fewer + // elements than `u16`, so the per-element interpreted `extend` + // loop (which a lazy `map` iterator forces) is 8x shorter. + let mut v = arena.alloc_vec::(); + let n = OVER_CHUNK / 16 + 1; // > 64 KiB worth of u128 + v.extend((0..n as u128).map(|i| i.wrapping_mul(13))); + assert_eq!(v.len(), n); // Spot-check first, mid-chunk and last instead of iterating // every element; a chunk-boundary bug would manifest at any of // these positions equally and the per-element cost dominates // under Miri. - for i in [0, OVER_CHUNK / 4, OVER_CHUNK / 2 - 1] { - assert_eq!(v[i], (i as u16).wrapping_mul(13)); + for i in [0, n / 2, n - 1] { + assert_eq!(v[i], (i as u128).wrapping_mul(13)); } } @@ -845,15 +948,21 @@ mod large_alloc { #[test] fn many_oversized_allocations_in_one_arena() { // The property under test is that an arena tolerates *multiple* - // oversized one-shot chunks coexisting. Using `[u128; OVER_CHUNK/16]` - // gives the same byte-count threshold (above `MAX_CHUNK_BYTES`) but - // a 16× shorter `alloc_slice_fill_with` closure loop — a big win - // under Miri where each closure invocation is interpreted. + // oversized one-shot chunks coexisting. `[u128; OVER_CHUNK/16+1]` + // gives the byte-count threshold (above `MAX_CHUNK_BYTES`). Each + // round is a single bulk `alloc_slice_copy` (one memcpy) from a + // shared zeroed source rather than an `N_U128`-long fill closure + // loop; per-round sentinels written into the first and last slots + // preserve the distinct-content checks that prove the oversized + // chunks don't alias. const N_U128: usize = OVER_CHUNK / 16 + 1; // > 64 KiB worth of u128 let arena = Arena::new(); - let mut keepers: Vec<&[u128]> = Vec::with_capacity(8); + let src = std::vec![0_u128; N_U128]; + let mut keepers: Vec<&mut [u128]> = Vec::with_capacity(8); for round in 0..8u8 { - let s: &mut [u128] = arena.alloc_slice_fill_with::(N_U128, move |_| u128::from(round)); + let s: &mut [u128] = arena.alloc_slice_copy::(&src); + s[0] = u128::from(round); + s[N_U128 - 1] = u128::from(round); keepers.push(s); } for (round, s) in keepers.iter().enumerate() { @@ -1329,7 +1438,7 @@ mod fast_path_correctness { count += 1; assert!(count < 20_000, "should have triggered new chunk by now"); } - assert!(count > 50, "chunk should hold many Arcs"); + assert!(count > 10, "chunk should hold many Arcs"); } #[cfg(feature = "stats")] @@ -1572,22 +1681,23 @@ mod mutants_for_chunk_provider { // The property under test: the size-class ratchet caps at the // largest cacheable class (class 7 = 64 KiB total). After the // first few refills ratchet there, subsequent refills stay at - // class 7 — they don't keep doubling. To observe this we - // allocate a handful of 8 KiB boxes (just under MAX_NORMAL_ALLOC - // = 16 KiB, so still routed through the normal cache) and - // confirm none route to oversized. A 64 KiB class-7 chunk fits - // a couple of these, so 8 boxes span ≥ 2 chunks, proving the - // ratchet stays at class 7 rather than degrading or escaping. + // class 7 — they don't keep doubling. To observe this we allocate + // a handful of ~13 KiB boxes (under MAX_NORMAL_ALLOC = 16 KiB, so + // still routed through the normal cache) and confirm none route to + // oversized. Five ~13 KiB boxes total > 64 KiB, so they span ≥ 2 + // class-7 chunks, proving the ratchet stays at class 7 rather than + // degrading or escaping. Larger-but-fewer boxes keep the byte + // threshold while minimising the per-allocation Miri cost. let arena = Arena::new(); - let mut keep: Vec>> = Vec::new(); - for _ in 0..8 { - keep.push(arena.alloc_uninit_box::<[u8; 8 * 1024]>()); + let mut keep: Vec>> = Vec::new(); + for _ in 0..5 { + keep.push(arena.alloc_uninit_box::<[u8; 13 * 1024]>()); } let s = arena.stats(); assert_eq!(s.oversized_shared_chunks_allocated, 0); assert!( s.normal_shared_chunks_allocated >= 2, - "8 × 8 KiB boxes must span ≥ 2 class-7 chunks, got {}", + "5 × 13 KiB boxes must span ≥ 2 class-7 chunks, got {}", s.normal_shared_chunks_allocated ); } @@ -2316,23 +2426,24 @@ mod coverage_arena_gaps { } // ============================================================================ - // inner_slice.rs:441 — `alloc_slice_local_with_or_panic` `len > u16::MAX` - // with drop_fn panic. - // inner_slice.rs:1014 — shared sibling. + // Per-`Arc` reference counting removes the `u16` element-count cap on + // `Arc<[T]>` slices: a Drop-typed slice longer than `u16::MAX` is now + // dropped via `drop_in_place::<[T]>` in `Arc::drop`, not a counted + // chunk drop entry, so it allocates successfully. // ============================================================================ - #[cfg(feature = "std")] + #[cfg(all(feature = "std", not(miri)))] #[test] - #[should_panic(expected = "multitude: allocator returned AllocError")] - fn alloc_slice_fill_with_arc_drop_too_long_panics() { + fn alloc_slice_fill_with_arc_drop_long_succeeds() { #[derive(Clone)] struct D; - #[expect(clippy::empty_drop, reason = "Drop impl makes needs_drop::() true so a drop_fn is installed")] + #[expect(clippy::empty_drop, reason = "Drop impl makes needs_drop::() true")] impl Drop for D { fn drop(&mut self) {} } let arena = Arena::::new(); - let _ = arena.alloc_slice_fill_with_arc(u16::MAX as usize + 1, |_| D); + let arc = arena.alloc_slice_fill_with_arc(u16::MAX as usize + 1, |_| D); + assert_eq!(arc.len(), u16::MAX as usize + 1); } // ============================================================================ @@ -2878,12 +2989,12 @@ mod from_mutants_extras_stats { let arena = Arena::new(); // Ratchet the chunk class via a few large uninit fillers // (`alloc_uninit_arc` skips per-byte init cost). - for _ in 0..8 { + for _ in 0..4 { let _filler: Arc> = arena.alloc_uninit_arc::<[u8; 8 * 1024]>(); } // A short burst still exercises the small-allocation slow refill path // at the peak shared chunk class. - for i in 0_u32..32 { + for i in 0_u32..16 { let _a: Arc = arena.alloc_arc(i); } assert_eq!(arena.stats().oversized_shared_chunks_allocated, 0); diff --git a/crates/multitude/tests/audit_repro.rs b/crates/multitude/tests/audit_repro.rs index 3c5651fb4..78422af3f 100644 --- a/crates/multitude/tests/audit_repro.rs +++ b/crates/multitude/tests/audit_repro.rs @@ -46,15 +46,20 @@ fn alloc_box_of_maybeuninit_assume_init_drops_inner() { assert_eq!(counter.load(Ordering::Relaxed), 1); } -/// Arc variant of the panic-on-misuse fix. -#[cfg(not(miri))] +/// With per-`Arc` reference counting, `alloc_arc(MaybeUninit::new(x))` +/// followed by `assume_init` works correctly: `Arc::drop` runs the inner +/// value's destructor eagerly on the last clone (no chunk drop entry is +/// involved), so the previously-unsupported pattern is now sound. #[test] -#[should_panic(expected = "no drop entry reserved")] -fn alloc_arc_of_maybeuninit_assume_init_panics_when_unsupported() { +fn alloc_arc_of_maybeuninit_assume_init_drops_inner() { let counter = StdArc::new(AtomicUsize::new(0)); - let arena = Arena::new(); - let arc_uninit = arena.alloc_arc(MaybeUninit::new(DropCounter(counter.clone()))); - let _arc = unsafe { arc_uninit.assume_init() }; + { + let arena = Arena::new(); + let arc_uninit = arena.alloc_arc(MaybeUninit::new(DropCounter(counter.clone()))); + let arc = unsafe { arc_uninit.assume_init() }; + drop(arc); + } + assert_eq!(counter.load(Ordering::Relaxed), 1); } /// `arena.alloc_uninit_arc::()` followed by `assume_init` reserves the @@ -172,10 +177,12 @@ fn zst_shared_handouts_advance_cursor() { let bx2 = arena.alloc_box(()); assert_ne!(bx1.as_ptr(), bx2.as_ptr(), "ZST Box handouts must get distinct addresses"); - // Many create-and-drop cycles force the chunk to fill (1 byte each) - // and refill. Pre-fix the cursor never advanced, so this pattern - // could drive the live chunk's atomic refcount to zero. - for _ in 0..2_000 { + // A few hundred create-and-drop cycles still force the (512-byte + // starter) chunk to fill (1 byte each) and refill at least once. Pre-fix + // the cursor never advanced, so this pattern could drive the live + // chunk's atomic refcount to zero. A few hundred iterations exercise the + // refill the tag now forces without a multi-thousand Miri loop. + for _ in 0..600 { drop(arena.alloc_arc(())); drop(arena.alloc_box(())); } diff --git a/crates/multitude/tests/coverage_extras.rs b/crates/multitude/tests/coverage_extras.rs index 953780d44..eea956cd4 100644 --- a/crates/multitude/tests/coverage_extras.rs +++ b/crates/multitude/tests/coverage_extras.rs @@ -1513,18 +1513,21 @@ mod coverage_more { } #[test] - #[should_panic(expected = "allocator returned AllocError")] - // Skipped under Miri: the test must register `u16::MAX + 1` drop - // entries to trigger the overflow panic, and Miri's per-allocation - // overhead pushes this past the 10-minute CI budget. The panic is a - // runtime-checked assertion, not a memory-safety property, so Miri - // adds no value beyond what `cargo test` already verifies. + // Skipped under Miri: building + dropping `u16::MAX + 1` elements + // (~65K) exceeds Miri's test budget. The lifted restriction is a + // runtime property, not a memory-safety one, so native + cargo-careful + // runs cover it. #[cfg_attr(miri, ignore)] - fn vec_into_box_panics_when_drop_slice_is_too_long_for_entry() { + fn vec_into_box_drop_slice_longer_than_u16_succeeds() { + // `Box<[T]>` drops via `drop_in_place::<[T]>` (no `u16`-counted + // drop entry), so a `T: Drop` slice longer than `u16::MAX` freezes + // into a `Box` without rejection. let arena = Arena::new(); let mut v = arena.alloc_vec::(); - v.extend((0..=u16::MAX).map(|_| Droppy("many"))); - let _ = v.into_boxed_slice(); + let len = (u16::MAX as usize) + 1; + v.extend((0..len).map(|_| Droppy("many"))); + let b = v.into_boxed_slice(); + assert_eq!(b.len(), len); } #[test] diff --git a/crates/multitude/tests/coverage_gaps.rs b/crates/multitude/tests/coverage_gaps.rs index 8b210a91d..6028414d2 100644 --- a/crates/multitude/tests/coverage_gaps.rs +++ b/crates/multitude/tests/coverage_gaps.rs @@ -750,24 +750,32 @@ mod drop_slice_over_u16_max_returns_err { assert!(a.try_alloc_slice_fill_iter::((0..TOO_LONG).map(|i| D(i as u8))).is_err()); } + // `Arc<[T]>` uninit/zeroed slices have no `u16` element-count cap + // under per-`Arc` reference counting (they drop via + // `drop_in_place::<[T]>`, not a `u16`-counted chunk entry), so a + // Drop-typed slice longer than `u16::MAX` now allocates successfully. + #[cfg(not(miri))] #[test] - fn try_alloc_uninit_slice_arc_over_u16_err() { + fn uninit_slice_arc_over_u16_succeeds() { struct D(u32); impl Drop for D { fn drop(&mut self) {} } let a = Arena::new(); - assert!(a.try_alloc_uninit_slice_arc::(TOO_LONG).is_err()); + let arc = a.try_alloc_uninit_slice_arc::(TOO_LONG).expect("Arc slices have no u16 cap"); + assert_eq!(arc.len(), TOO_LONG); } + #[cfg(not(miri))] #[test] - fn try_alloc_zeroed_slice_arc_over_u16_err() { + fn zeroed_slice_arc_over_u16_succeeds() { struct D(u32); impl Drop for D { fn drop(&mut self) {} } let a = Arena::new(); - assert!(a.try_alloc_zeroed_slice_arc::(TOO_LONG).is_err()); + let arc = a.try_alloc_zeroed_slice_arc::(TOO_LONG).expect("Arc slices have no u16 cap"); + assert_eq!(arc.len(), TOO_LONG); } } @@ -925,10 +933,10 @@ mod uninit_drop_init_from_iter { } // ============================================================================ -// internal/uninit.rs:487–489 — `into_uninit_slice_placeholder(zeroed=true)` -// exercised by `alloc_zeroed_slice_arc` for drop types. +// `alloc_zeroed_slice_arc` for a drop type zero-fills the payload (the +// `MaybeUninit::zeroed` fill path). // ============================================================================ -mod uninit_into_uninit_slice_placeholder_zeroed { +mod zeroed_slice_arc_zeroes_payload { use core::mem::MaybeUninit; use multitude::Arena; @@ -1103,34 +1111,39 @@ mod arc_borrow { } // ============================================================================ -// arc.rs — slice assume_init missing-drop-entry panic (287–290). +// arc.rs — slice assume_init is a pure reinterpret under per-`Arc` +// reference counting; element destructors run eagerly in `Arc::drop`. // ============================================================================ -mod arc_assume_init_slice_panics_when_drop_entry_missing { +mod arc_assume_init_slice_drops_each_element { use core::mem::MaybeUninit; - use std::panic::{AssertUnwindSafe, catch_unwind}; + use std::sync::Arc as StdArc; + use std::sync::atomic::{AtomicUsize, Ordering}; use multitude::Arena; #[test] - fn slice_assume_init_for_drop_type_without_placeholder_panics() { - // `MaybeUninit` is itself never-drop (MaybeUninit suppresses - // drops), so allocating an `Arc<[MaybeUninit]>` via the regular - // fill helper does NOT reserve a placeholder slice drop entry. - // Calling `assume_init` on the resulting handle then triggers the - // slice-side panic message because `needs_drop::()` is true. - #[derive(Clone)] - struct D(#[expect(dead_code, reason = "field gives the type a non-zero size")] u32); + fn slice_assume_init_for_drop_type_drops_each_element() { + // `alloc_slice_fill_with_arc::>` + `assume_init` + // used to be rejected (no placeholder drop entry). Now + // `assume_init` is a pure reinterpret and `Arc::drop` runs each + // element's destructor via `drop_in_place::<[D]>`. + struct D(StdArc); impl Drop for D { - fn drop(&mut self) {} + fn drop(&mut self) { + self.0.fetch_add(1, Ordering::Relaxed); + } } - let arena = Arena::new(); - let r = catch_unwind(AssertUnwindSafe(|| { - let arc: multitude::Arc<[MaybeUninit]> = arena.alloc_slice_fill_with_arc(2, |_| MaybeUninit::new(D(0))); - // SAFETY: elements are initialized above; the panic comes from - // the missing placeholder drop entry, not from undefined behavior. - let _: multitude::Arc<[D]> = unsafe { arc.assume_init() }; - })); - assert!(r.is_err()); + let counter = StdArc::new(AtomicUsize::new(0)); + { + let arena = Arena::new(); + let arc: multitude::Arc<[MaybeUninit]> = + arena.alloc_slice_fill_with_arc(2, |_| MaybeUninit::new(D(StdArc::clone(&counter)))); + // SAFETY: both elements were initialized above. + let init: multitude::Arc<[D]> = unsafe { arc.assume_init() }; + assert_eq!(init.len(), 2); + drop(init); + } + assert_eq!(counter.load(Ordering::Relaxed), 2); } } @@ -1455,8 +1468,11 @@ mod allocator_impl_grow_to_zero_overlap { } // ============================================================================ -// alloc_unsized.rs — metadata-too-large rejection for `[D]` slice DSTs +// alloc_unsized.rs — metadata-too-large handling for `[D]` slice DSTs // and refill-failure path for `try_alloc_dst_box` (lines 229–230, 261). +// The `Box` path rejects `T: Drop` DSTs whose metadata does not pack +// into the chunk drop-list's `u16` slot; the `Arc` path stores metadata +// verbatim and runs `drop_in_place` eagerly, so it has no such limit. // Lives here rather than as a `src/` unit test so the empty `Drop` // impl on the probe type doesn't bloat src-coverage counts. // ============================================================================ @@ -1475,14 +1491,63 @@ mod alloc_unsized_extras { } #[test] - fn try_alloc_dst_arc_slice_drop_metadata_too_large_returns_err() { + // Skipped under Miri: writing + dropping `u16::MAX + 1` elements + // (~65K) to exercise the slice-length boundary exceeds Miri's test + // budget; the lifted restriction is a runtime property, not a + // memory-safety one, so native + cargo-careful runs cover it. + #[cfg_attr(miri, ignore)] + fn try_alloc_dst_box_slice_drop_metadata_too_large_succeeds() { + // Like the `Arc` path, the `Box` path stores slice metadata + // full-width in the chunk prefix and drops via + // `drop_in_place::<[D]>` (no `u16` drop-list slot), so a `T: Drop` + // slice longer than `u16::MAX` is accepted. let arena = Arena::new(); let len = (u16::MAX as usize) + 1; let layout = Layout::array::(len).unwrap(); - // SAFETY: the metadata-too-large rejection fires before `init` - // is invoked, so no actual initialization happens. - let r = unsafe { arena.try_alloc_dst_arc::<[D]>(layout, len, |_p: *mut [D]| {}) }; - assert!(r.is_err()); + // SAFETY: `layout` describes `[D; len]`; `init` writes a valid + // `D` into every element before the `Box` is observed, so the + // eager `drop_in_place::<[D]>` in `Box::drop` runs on live values. + let r = unsafe { + arena.try_alloc_dst_box::<[D]>(layout, len, |p: *mut [D]| { + let base = p.cast::(); + for i in 0..len { + // SAFETY: `base..base + len` is the freshly reserved + // `[D]` buffer described by `layout`. + base.add(i).write(D::default()); + } + }) + }; + assert!(r.is_ok()); + } + + #[test] + // Skipped under Miri: writing + dropping `u16::MAX + 1` elements + // (~65K) to exercise the slice-length boundary exceeds Miri's test + // budget (~8 min observed). The lifted-restriction behavior is a + // runtime property, not a memory-safety one; native + cargo-careful + // runs verify it on every CI execution. + #[cfg_attr(miri, ignore)] + fn try_alloc_dst_arc_slice_drop_metadata_too_large_succeeds() { + // Unlike the `Box` path, the `Arc` path stores slice metadata + // verbatim (not in the `u16` drop-list slot), so a `T: Drop` + // slice longer than `u16::MAX` is accepted. + let arena = Arena::new(); + let len = (u16::MAX as usize) + 1; + let layout = Layout::array::(len).unwrap(); + // SAFETY: `layout` describes `[D; len]`; `init` writes a valid + // `D` into every element before the `Arc` is observed, so the + // eager `drop_in_place::<[D]>` on teardown runs on live values. + let r = unsafe { + arena.try_alloc_dst_arc::<[D]>(layout, len, |p: *mut [D]| { + let base = p.cast::(); + for i in 0..len { + // SAFETY: `base..base + len` is the freshly reserved + // `[D]` buffer described by `layout`. + base.add(i).write(D::default()); + } + }) + }; + assert!(r.is_ok()); } #[test] diff --git a/crates/multitude/tests/dst.rs b/crates/multitude/tests/dst.rs index 69fcca955..dd2f49764 100644 --- a/crates/multitude/tests/dst.rs +++ b/crates/multitude/tests/dst.rs @@ -466,13 +466,16 @@ mod dst_box { assert_eq!(COUNT.load(Ordering::SeqCst), before + 4); } - /// Regression: a slice DST with `len > u16::MAX` and `T: Drop` must be - /// rejected at allocation time (returns `AllocError`) so that a future - /// `Box::<[T]>::into_rc()` call cannot find itself with no drop entry - /// to retarget. Matches the non-DST slice-alloc paths which use the - /// same `entry_size != 0 && len > u16::MAX` guard. - #[test] - fn try_alloc_dst_box_rejects_drop_slice_with_overflowing_len() { + /// A slice DST with `len > u16::MAX` and `T: Drop` is accepted: a + /// `Box<[T]>` drops via `drop_in_place::<[T]>` on a full-width fat + /// pointer, so there is no `u16` element-count cap (matching the + /// `Arc<[T]>` family). Every element is constructed and, on drop, + /// every destructor runs. + #[test] + // Skipped under Miri: building + dropping ~65K elements exceeds + // Miri's test budget; native + cargo-careful runs cover it. + #[cfg_attr(miri, ignore)] + fn try_alloc_dst_box_accepts_drop_slice_with_overflowing_len() { struct DropCounter(std::sync::Arc); impl Drop for DropCounter { fn drop(&mut self) { @@ -482,20 +485,28 @@ mod dst_box { let arena = Arena::new(); let n: usize = (u16::MAX as usize) + 1; - // Layout::array fits since u16::MAX+1 elements at small size are well under isize::MAX. let Ok(layout) = core::alloc::Layout::array::(n) else { // Allocator wouldn't even build the layout; the test isn't meaningful. return; }; + let counter = std::sync::Arc::new(AtomicUsize::new(0)); + let c = std::sync::Arc::clone(&counter); - // SAFETY: init would write all `n` elements; we never reach that point - // because the allocation is rejected up front by the new guard. - let result = unsafe { - arena.try_alloc_dst_box::<[DropCounter]>(layout, n, |_fat: *mut [DropCounter]| { - unreachable!("alloc must be rejected before init runs"); + // SAFETY: `layout` describes `[DropCounter; n]`; `init` writes a + // valid `DropCounter` into every slot before the `Box` is + // observed, so `drop_in_place::<[DropCounter]>` runs on live values. + let b = unsafe { + arena.try_alloc_dst_box::<[DropCounter]>(layout, n, |fat: *mut [DropCounter]| { + let base = fat.cast::(); + for i in 0..n { + base.add(i).write(DropCounter(std::sync::Arc::clone(&c))); + } }) - }; - assert!(result.is_err(), "DST slice with len > u16::MAX and T: Drop must be rejected"); + } + .expect("DST slice with len > u16::MAX and T: Drop is accepted"); + assert_eq!(b.len(), n); + drop(b); + assert_eq!(counter.load(Ordering::Relaxed), n, "every element's destructor must run"); } } diff --git a/crates/multitude/tests/loom.rs b/crates/multitude/tests/loom.rs index a695f7cf6..1983edeb8 100644 --- a/crates/multitude/tests/loom.rs +++ b/crates/multitude/tests/loom.rs @@ -233,10 +233,11 @@ mod loom_arc { } #[test] - fn two_workers_clone_and_drop_during_eviction() { - // Eviction race: owner evicts a Shared chunk via `reset` while two - // workers drop their Arcs. The reconcile must produce a refcount - // that reaches 0 exactly once. + fn two_workers_clone_and_drop_during_reset_and_arena_drop() { + // Two workers drop their Arcs while the owner resets (a no-op on the + // shared chunk) and then drops the arena. The shared chunk is torn + // down when its last reference releases; reconcile-on-drop must + // produce a refcount that reaches 0 exactly once. loom::model(|| { let baseline = drop_counter().load(StdOrdering::Relaxed); @@ -259,9 +260,10 @@ mod loom_arc { } #[test] - fn worker_drop_racing_eviction_then_owner_drops_arena() { - // Variant of `deferred_reconciliation_race`: the arena is dropped - // after the eviction, so the worker's drop hits the + fn worker_drop_racing_reset_then_owner_drops_arena() { + // Variant of `deferred_reconciliation_race`: the owner resets (a + // no-op on the shared chunk) and then drops the arena, so the + // worker's drop may be the last reference and hit the // `outstanding_chunks` last-reclaimer path on the now-detached chunk. loom::model(|| { let baseline = drop_counter().load(StdOrdering::Relaxed); @@ -282,10 +284,11 @@ mod loom_arc { } #[test] - fn arena_drop_with_active_workers_and_chunk_cache_reuse() { - // Owner allocates an Arc, resets (chunk cached), allocates again - // (cache pop revives), all while a worker drops the first Arc. - // Stresses the cache-revive path against in-flight worker drops. + fn second_alloc_after_reset_reuses_installed_chunk_with_active_worker() { + // Owner allocates an Arc, resets (the shared chunk stays installed), + // then allocates a second Arc on that same chunk, all while a worker + // drops the first Arc. Stresses an allocation onto a live chunk + // against an in-flight worker drop of an earlier handle. loom::model(|| { let baseline = drop_counter().load(StdOrdering::Relaxed); @@ -369,10 +372,9 @@ mod loom_arc { #[test] fn arena_reset_concurrent_with_clone_and_drop() { - // Owner calls `arena.reset()` (NOT drop) while two workers race - // on Arc clone/drop. `reset` evicts in-place rather than tearing - // down `ArenaInner`, so the orderings exercised differ from the - // arena-drop case. + // Owner calls `arena.reset()` (NOT drop) while a worker drops an Arc + // clone. `reset` leaves the shared chunk untouched, so the chunk is + // torn down later at arena drop; Drop must still run exactly once. loom::model(|| { let baseline = drop_counter().load(StdOrdering::Relaxed); @@ -393,13 +395,11 @@ mod loom_arc { } #[test] - fn cache_pop_concurrent_with_prior_generation_worker_drop() { - // Owner allocates an Arc on chunk-gen-1, resets (chunk cached), - // then allocates a new Arc — which pops the cached chunk and - // re-initializes it (gen-2). Concurrently, a worker holding the - // gen-1 Arc drops it, hitting the now-revived chunk's refcount. - // Tests that cache-revive races a teardown decrement on the - // prior generation safely. + fn second_alloc_after_reset_shares_chunk_with_prior_generation_worker_drop() { + // Owner allocates an Arc, resets (the shared chunk stays installed), + // then allocates a second Arc on the same chunk. Concurrently, a + // worker holding the first Arc drops it, hitting that chunk's + // refcount. Both payloads must drop exactly once. loom::model(|| { let baseline = drop_counter().load(StdOrdering::Relaxed); @@ -456,10 +456,11 @@ mod loom_arc { // re-store its `next` pointer. loom::model(|| { let arena = fresh_arena(); - // Each `Arc<[u32; 256]>` takes 1 KiB + drop entry; with - // `max_normal_alloc = 4 KiB` chunks, two of these allocate in - // separate chunks via refill, so dropping each on a different - // worker forces two independent `push_shared_cache` paths. + // Each `Arc<[u32; 256]>` takes ~1 KiB + the per-`Arc` strong + // prefix; with `max_normal_alloc = 4 KiB` chunks, two of these + // allocate in separate chunks via refill, so dropping each on a + // different worker forces two independent `push_shared_cache` + // paths. let a: Arc<[u32; 256]> = arena.alloc_arc([0_u32; 256]); let b: Arc<[u32; 256]> = arena.alloc_arc([0_u32; 256]); @@ -488,9 +489,10 @@ mod loom_arc { // installed node's `next` field after the push that installed it. loom::model(|| { let arena = fresh_arena(); - // Each `Arc<[u32; 256]>` takes 1 KiB + drop entry; with - // `max_normal_alloc = 4 KiB` chunks, these allocations refill - // into separate chunks so each drop/pop exercises cache traffic. + // Each `Arc<[u32; 256]>` takes ~1 KiB + the per-`Arc` strong + // prefix; with `max_normal_alloc = 4 KiB` chunks, these + // allocations refill into separate chunks so each drop/pop + // exercises cache traffic. let cached: Arc<[u32; 256]> = arena.alloc_arc([0_u32; 256]); let racing: Arc<[u32; 256]> = arena.alloc_arc([0_u32; 256]); diff --git a/crates/multitude/tests/mutant_kills_post_fix.rs b/crates/multitude/tests/mutant_kills_post_fix.rs index 95c24e27e..d39872407 100644 --- a/crates/multitude/tests/mutant_kills_post_fix.rs +++ b/crates/multitude/tests/mutant_kills_post_fix.rs @@ -6,15 +6,15 @@ use multitude::Arena; -// is_oversized_shared: threshold == max_normal_alloc routes via normal path +// is_oversized: threshold == max_normal_alloc routes via normal path #[test] -fn is_oversized_shared_routes_at_threshold_via_normal() { +fn is_oversized_routes_shared_at_threshold_via_normal() { const MNA: usize = 4 * 1024; let arena = Arena::builder().max_normal_alloc(MNA).build(); let before_normal = arena.stats().normal_shared_chunks_allocated; let before_oversized = arena.stats().oversized_shared_chunks_allocated; - // wcp = MNA (size MNA-1 + align 1). - let _arc = arena.alloc_arc([0_u8; MNA - 1]); + // wcp = MNA exactly: strong prefix (4) + value (MNA-8) + arc block align (4). + let _arc = arena.alloc_arc([0_u8; MNA - 8]); let after_normal = arena.stats().normal_shared_chunks_allocated; let after_oversized = arena.stats().oversized_shared_chunks_allocated; assert!(after_normal > before_normal); @@ -25,11 +25,12 @@ fn is_oversized_shared_routes_at_threshold_via_normal() { } #[test] -fn is_oversized_shared_routes_above_threshold_via_oversized() { +fn is_oversized_routes_shared_above_threshold_via_oversized() { const MNA: usize = 4 * 1024; let arena = Arena::builder().max_normal_alloc(MNA).build(); let before_oversized = arena.stats().oversized_shared_chunks_allocated; - let _arc = arena.alloc_arc([0_u8; MNA]); // wcp = MNA + 1 + // wcp = MNA + 1: strong prefix (4) + value (MNA-7) + arc block align (4). + let _arc = arena.alloc_arc([0_u8; MNA - 7]); let after_oversized = arena.stats().oversized_shared_chunks_allocated; assert!( after_oversized > before_oversized, @@ -38,7 +39,7 @@ fn is_oversized_shared_routes_above_threshold_via_oversized() { } #[test] -fn is_oversized_local_routes_at_threshold_via_normal() { +fn is_oversized_routes_local_at_threshold_via_normal() { const MNA: usize = 4 * 1024; let arena = Arena::builder().max_normal_alloc(MNA).build(); let before_normal = arena.stats().normal_local_chunks_allocated; @@ -52,7 +53,7 @@ fn is_oversized_local_routes_at_threshold_via_normal() { } #[test] -fn is_oversized_local_routes_above_threshold_via_oversized() { +fn is_oversized_routes_local_above_threshold_via_oversized() { const MNA: usize = 4 * 1024; let arena = Arena::builder().max_normal_alloc(MNA).build(); let before_oversized = arena.stats().oversized_local_chunks_allocated; diff --git a/crates/multitude/tests/mutants_extras.rs b/crates/multitude/tests/mutants_extras.rs index 722cfa422..76e8754cd 100644 --- a/crates/multitude/tests/mutants_extras.rs +++ b/crates/multitude/tests/mutants_extras.rs @@ -2082,25 +2082,32 @@ mod mutants_for_audit { // tests above that refill across many chunk classes.) // ============================================================================ - // arena.rs:3036 / 3608 — `if entry_size != 0 && len > u16::MAX as usize` - // `> with ==` mutant: only panics when len exactly equals u16::MAX. - // `> with >=` mutant: panics at len == u16::MAX (one short of original). - // Kill: a Drop-aware slice of len == u16::MAX must succeed (original) - // and must panic for len > u16::MAX. + // Per-`Arc` reference counting removes the `u16` element-count cap on + // `Arc<[T]>` slices: a Drop-typed slice with `len > u16::MAX` now + // allocates (via the oversized path) and drops each element through + // `drop_in_place::<[T]>` in `Arc::drop`. // ============================================================================ + #[cfg(not(miri))] #[test] - fn alloc_slice_shared_drop_aware_above_u16_max_returns_err() { + fn alloc_slice_shared_drop_aware_above_u16_max_succeeds() { use std::sync::Arc as StdArc; - use std::sync::atomic::AtomicU32; - struct D(#[allow(dead_code)] StdArc); + use std::sync::atomic::{AtomicU32, Ordering}; + struct D(StdArc); impl Drop for D { - fn drop(&mut self) {} + fn drop(&mut self) { + self.0.fetch_add(1, Ordering::Relaxed); + } } let drops = StdArc::new(AtomicU32::new(0)); let arena = Arena::builder().max_normal_alloc(60 * 1024).build(); - let result = arena.try_alloc_slice_fill_with_arc(65_536, |_| D(drops.clone())); - assert!(result.is_err()); + let n = 65_536_usize; + let arc = arena + .try_alloc_slice_fill_with_arc(n, |_| D(drops.clone())) + .expect("Arc slices have no u16 element-count cap"); + assert_eq!(arc.len(), n); + drop(arc); + assert_eq!(drops.load(Ordering::Relaxed), n as u32); } // ============================================================================ diff --git a/crates/multitude/tests/utf16.rs b/crates/multitude/tests/utf16.rs index 7409cab00..54dbc0e70 100644 --- a/crates/multitude/tests/utf16.rs +++ b/crates/multitude/tests/utf16.rs @@ -3011,7 +3011,7 @@ mod from_coverage_extras_utf16 { #[test] fn alloc_utf16_str_arc_from_str_oversized_routes_via_oversized_shared() { - let len = 16 * 1024; + let len = 4096; let src = "a".repeat(len); // First exercise the default arena so any default-config code paths @@ -3020,11 +3020,13 @@ mod from_coverage_extras_utf16 { let arc = arena.alloc_utf16_str_arc_from_str(&src); assert_eq!(arc.len(), len); - // Then force a small `max_normal_alloc` (in bytes) so the ~32 KiB - // UTF-16 payload transcoded from a 16 KiB ASCII string (2 bytes per - // code unit, plus the length prefix) deterministically takes the - // oversized-shared branch regardless of any future change to the - // default threshold. + // Then force a small `max_normal_alloc` (in bytes) so the 8 KiB + // UTF-16 payload transcoded from a 4096-char ASCII string (2 bytes + // per code unit, plus the length prefix) deterministically takes + // the oversized-shared branch regardless of any future change to + // the default threshold. (A shorter string than before keeps the + // one-shot transcode affordable under Miri while still clearing the + // 4 KiB threshold.) let arena = Arena::builder().max_normal_alloc(4096).build(); let arc = arena.alloc_utf16_str_arc_from_str(&src); assert_eq!(arc.len(), len); @@ -3034,7 +3036,7 @@ mod from_coverage_extras_utf16 { #[test] fn alloc_utf16_str_box_from_str_oversized_routes_via_oversized_shared() { - let len = 16 * 1024; + let len = 4096; let src = "a".repeat(len); // First exercise the default arena so any default-config code paths @@ -3043,11 +3045,13 @@ mod from_coverage_extras_utf16 { let b = arena.alloc_utf16_str_box_from_str(&src); assert_eq!(b.len(), len); - // Then force a small `max_normal_alloc` (in bytes) so the ~32 KiB - // UTF-16 payload transcoded from a 16 KiB ASCII string (2 bytes per - // code unit, plus the length prefix) deterministically takes the - // oversized-shared branch regardless of any future change to the - // default threshold. + // Then force a small `max_normal_alloc` (in bytes) so the 8 KiB + // UTF-16 payload transcoded from a 4096-char ASCII string (2 bytes + // per code unit, plus the length prefix) deterministically takes + // the oversized-shared branch regardless of any future change to + // the default threshold. (A shorter string than before keeps the + // one-shot transcode affordable under Miri while still clearing the + // 4 KiB threshold.) let arena = Arena::builder().max_normal_alloc(4096).build(); let b = arena.alloc_utf16_str_box_from_str(&src); assert_eq!(b.len(), len); @@ -3214,4 +3218,52 @@ mod from_mutants_extras_utf16_scattered { let actual: std::string::String = std::char::decode_utf16(s.as_slice().iter().copied()).map(|r| r.unwrap()).collect(); assert_eq!(actual, "Hello, Rust!"); } + + /// Regression guard for the prefixed shared-allocation routing + /// (`impl_alloc_prefixed_shared_arc`): an odd-length `u8` (`Arc`) + /// allocation leaves the shared bump cursor odd, then a `u16` + /// (`ArcUtf16Str`) allocation reserves a block aligned to 4 bytes (so + /// the per-`Arc` `AtomicU32` strong prefix is aligned, via + /// `arc_block_align(u16) = max(2, 4)`). The routing sizes the refill / + /// oversized hint with `worst_case_arc_slice_payload` (strong prefix + + /// length prefix + payload + front alignment slack), so sweeping `u16` + /// lengths across the `max_normal_alloc` boundary must always terminate + /// (an under-sized hint would spin the refill loop) and produce correct + /// contents. + #[test] + fn prefixed_shared_alloc_boundary_terminates_for_mixed_u8_u16() { + // `max_normal_alloc` must be >= MIN_MAX_NORMAL_ALLOC (4096), so the + // u16 normal/oversized boundary sits at `chars = mna / 2`. Sweep a + // few char lengths right around that boundary for an even and an + // odd `mna` (the parity drives the alignment edge case) plus one + // larger boundary position. Verifying length + a handful of + // sentinel code units (rather than decoding every unit) keeps the + // per-iteration cost down to the unavoidable one-shot transcode, + // which is what makes this affordable under Miri. + for &mna in &[4096_usize, 4097, 6144] { + let arena = Arena::builder().max_normal_alloc(mna).build(); + let center = mna / 2; + for chars in center.saturating_sub(1)..=(center + 1).min(mna) { + // Odd-length u8 (str) alloc to misalign the shared cursor. + let narrow = "x".repeat(2 * (chars % 50) + 1); + let narrow_arc = arena.alloc_str_arc(&narrow); + assert_eq!(&*narrow_arc, narrow.as_str(), "str payload corrupted at mna={mna}, chars={chars}"); + // u16 (utf16) alloc right after at a boundary-spanning length. + let wide = "y".repeat(chars); + let wide_arc = arena.alloc_utf16_str_arc_from_str(&wide); + // Sentinel checks instead of a full decode: the payload is + // uniform ('y'), so a routing bug that returns the wrong + // length or corrupts an edge/middle unit is still caught, + // without an O(chars) decode loop per iteration. + assert_eq!(wide_arc.len(), chars, "utf16 length wrong at mna={mna}, chars={chars}"); + if chars > 0 { + let units = wide_arc.as_slice(); + let yy = u16::from(b'y'); + assert_eq!(units[0], yy, "utf16 head corrupted at mna={mna}, chars={chars}"); + assert_eq!(units[chars / 2], yy, "utf16 mid corrupted at mna={mna}, chars={chars}"); + assert_eq!(units[chars - 1], yy, "utf16 tail corrupted at mna={mna}, chars={chars}"); + } + } + } + } } diff --git a/crates/multitude/tests/zst_uninit_arc_fix.rs b/crates/multitude/tests/zst_uninit_arc_fix.rs index 5dd1a1d57..d9543393e 100644 --- a/crates/multitude/tests/zst_uninit_arc_fix.rs +++ b/crates/multitude/tests/zst_uninit_arc_fix.rs @@ -123,7 +123,7 @@ fn zst_alloc_arc_never_returns_one_past_chunk_end() { } /// Regression from post-fix audit: `impl_alloc_dst_box` used to check -/// `is_oversized_shared(total)` but refill with `total + align`. At +/// `is_oversized(total)` but refill with `total + align`. At /// `total == max_normal_alloc` but `total + align > max_normal_alloc`, /// the in-arena fast path failed, the oversized branch was skipped, /// and `refill_shared(refill_hint)` hit the new `debug_assert!` in