diff --git a/.spelling b/.spelling
index 96834b58c..27d7c4f13 100644
--- a/.spelling
+++ b/.spelling
@@ -608,3 +608,5 @@ u32
 POV
 lossy
 unrounded
+unpadded
+unyielded
diff --git a/Cargo.lock b/Cargo.lock
index 19cc31de8..2ab85d258 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2370,7 +2370,7 @@ dependencies = [
 
 [[package]]
 name = "multitude"
-version = "0.3.1"
+version = "0.3.2"
 dependencies = [
  "allocator-api2 0.4.0",
  "bolero",
diff --git a/Cargo.toml b/Cargo.toml
index c27799b3d..f827117cb 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -45,7 +45,7 @@ fundle_macros = { path = "crates/fundle_macros", default-features = false, versi
 fundle_macros_impl = { path = "crates/fundle_macros_impl", default-features = false, version = "0.3.3" }
 http_extensions = { path = "crates/http_extensions", default-features = false, version = "0.6.2" }
 layered = { path = "crates/layered", default-features = false, version = "0.3.4" }
-multitude = { path = "crates/multitude", default-features = false, version = "0.3.1" }
+multitude = { path = "crates/multitude", default-features = false, version = "0.3.2" }
 ohno = { path = "crates/ohno", default-features = false, version = "0.3.6" }
 ohno_macros = { path = "crates/ohno_macros", default-features = false, version = "0.3.4" }
 recoverable = { path = "crates/recoverable", default-features = false, version = "0.1.6" }
diff --git a/crates/multitude/Cargo.toml b/crates/multitude/Cargo.toml
index cf52a9d36..5ea59ff16 100644
--- a/crates/multitude/Cargo.toml
+++ b/crates/multitude/Cargo.toml
@@ -3,7 +3,7 @@
 
 [package]
 name = "multitude"
-version = "0.3.1"
+version = "0.3.2"
 description = "Fast and flexible arena allocator."
 readme = "README.md"
 keywords = ["arena", "memory", "allocator", "bump"]
@@ -91,6 +91,10 @@ harness = false
 name = "criterion_drop"
 harness = false
 
+[[bench]]
+name = "criterion_arc_array"
+harness = false
+
 # Callgrind benches require Linux (Valgrind). The bench files are gated to compile
 # to a no-op on non-Linux targets, but the [[bench]] entry itself cannot be
 # cfg-gated, so it is unconditional here.
@@ -102,6 +106,10 @@ harness = false
 name = "gungraun_drop"
 harness = false
 
+[[bench]]
+name = "gungraun_arc_array"
+harness = false
+
 [[example]]
 name = "multitude_basic"
 
diff --git a/crates/multitude/README.md b/crates/multitude/README.md
index 0e511587e..a728bfa11 100644
--- a/crates/multitude/README.md
+++ b/crates/multitude/README.md
@@ -397,94 +397,94 @@ existing `_arc` slice methods).
 This crate was developed as part of <a href="../..">The Oxidizer Project</a>. Browse this crate's <a href="https://github.com/microsoft/oxidizer/tree/main/crates/multitude">source code</a>.
 </sub>
 
- [__cargo_doc2readme_dependencies_info]: ggGmYW0CYXZlMC43LjJhdIQbLiTyV0MU86EbZU15e0PmecoboQ9jo59bnAEbyDXw04U13GlhYvRhcoQbBzV3ofWgqIgbt8brW1MeN_Mb9N6Ac8XJFEIbIYjmnKUrOjRhZIWCaGJ5dGVtdWNrZjEuMjUuMIJlYnl0ZXNmMS4xMS4xgmhieXRlc2J1ZmUwLjUuNYJpbXVsdGl0dWRlZTAuMy4xgmh6ZXJvY29weWYwLjguNTA
+ [__cargo_doc2readme_dependencies_info]: ggGmYW0CYXZlMC43LjJhdIQbLiTyV0MU86EbZU15e0PmecoboQ9jo59bnAEbyDXw04U13GlhYvRhcoQbBzV3ofWgqIgbt8brW1MeN_Mb9N6Ac8XJFEIbIYjmnKUrOjRhZIWCaGJ5dGVtdWNrZjEuMjUuMIJlYnl0ZXNmMS4xMS4xgmhieXRlc2J1ZmUwLjUuNYJpbXVsdGl0dWRlZTAuMy4ygmh6ZXJvY29weWYwLjguNTA
  [__link0]: https://crates.io/crates/bumpalo
- [__link1]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc
- [__link10]: https://docs.rs/multitude/0.3.1/multitude/?search=vec::Vec
+ [__link1]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc
+ [__link10]: https://docs.rs/multitude/0.3.2/multitude/?search=vec::Vec
  [__link11]: https://crates.io/crates/dst-factory
- [__link12]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::format
- [__link13]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::ArcUtf16Str
- [__link14]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::BoxUtf16Str
- [__link15]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::Utf16String
- [__link16]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::format_utf16
- [__link17]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc
- [__link18]: https://docs.rs/multitude/0.3.1/multitude/?search=Box
- [__link19]: https://docs.rs/multitude/0.3.1/multitude/?search=Arena
- [__link2]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc
+ [__link12]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::format
+ [__link13]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::ArcUtf16Str
+ [__link14]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::BoxUtf16Str
+ [__link15]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::Utf16String
+ [__link16]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::format_utf16
+ [__link17]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc
+ [__link18]: https://docs.rs/multitude/0.3.2/multitude/?search=Box
+ [__link19]: https://docs.rs/multitude/0.3.2/multitude/?search=Arena
+ [__link2]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc
  [__link20]: https://doc.rust-lang.org/stable/std/marker/trait.Send.html
- [__link21]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc
- [__link22]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc
- [__link23]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc
- [__link24]: https://docs.rs/multitude/0.3.1/multitude/?search=Box
+ [__link21]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc
+ [__link22]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc
+ [__link23]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc
+ [__link24]: https://docs.rs/multitude/0.3.2/multitude/?search=Box
  [__link25]: https://doc.rust-lang.org/stable/alloc/?search=boxed::Box
- [__link26]: https://docs.rs/multitude/0.3.1/multitude/?search=vec::Vec
- [__link27]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::String
+ [__link26]: https://docs.rs/multitude/0.3.2/multitude/?search=vec::Vec
+ [__link27]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::String
  [__link28]: https://crates.io/crates/allocator-api2
- [__link29]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::String
- [__link3]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc
- [__link30]: https://docs.rs/multitude/0.3.1/multitude/?search=vec::Vec
- [__link31]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::String::into_boxed_str
- [__link32]: https://docs.rs/multitude/0.3.1/multitude/?search=Box
- [__link33]: https://docs.rs/multitude/0.3.1/multitude/?search=Box
- [__link34]: https://docs.rs/multitude/0.3.1/multitude/?search=vec::Vec::into_boxed_slice
- [__link35]: https://docs.rs/multitude/0.3.1/multitude/?search=Box
- [__link36]: https://docs.rs/multitude/0.3.1/multitude/?search=Box
- [__link37]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc
- [__link38]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc
- [__link39]: https://docs.rs/multitude/0.3.1/multitude/?search=vec::Vec::leak
- [__link4]: https://docs.rs/multitude/0.3.1/multitude/?search=Box
+ [__link29]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::String
+ [__link3]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc
+ [__link30]: https://docs.rs/multitude/0.3.2/multitude/?search=vec::Vec
+ [__link31]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::String::into_boxed_str
+ [__link32]: https://docs.rs/multitude/0.3.2/multitude/?search=Box
+ [__link33]: https://docs.rs/multitude/0.3.2/multitude/?search=Box
+ [__link34]: https://docs.rs/multitude/0.3.2/multitude/?search=vec::Vec::into_boxed_slice
+ [__link35]: https://docs.rs/multitude/0.3.2/multitude/?search=Box
+ [__link36]: https://docs.rs/multitude/0.3.2/multitude/?search=Box
+ [__link37]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc
+ [__link38]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc
+ [__link39]: https://docs.rs/multitude/0.3.2/multitude/?search=vec::Vec::leak
+ [__link4]: https://docs.rs/multitude/0.3.2/multitude/?search=Box
  [__link40]: https://github.com/microsoft/oxidizer/blob/main/crates/multitude/BUMPALO.md
  [__link41]: https://crates.io/crates/bumpalo
- [__link42]: https://docs.rs/multitude/0.3.1/multitude/strings/index.html
- [__link43]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc
- [__link44]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::ArcUtf16Str
- [__link45]: https://docs.rs/multitude/0.3.1/multitude/?search=Box
- [__link46]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::BoxUtf16Str
- [__link47]: https://docs.rs/multitude/0.3.1/multitude/?search=Arena
- [__link48]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::String
- [__link49]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::Utf16String
- [__link5]: https://docs.rs/multitude/0.3.1/multitude/?search=Box
- [__link50]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::format
- [__link51]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::format_utf16
- [__link52]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::String
- [__link53]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::String::into_boxed_str
- [__link54]: https://docs.rs/multitude/0.3.1/multitude/?search=Box
- [__link55]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::Utf16String
- [__link56]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::Utf16String::into_boxed_utf16_str
- [__link57]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::BoxUtf16Str
- [__link58]: https://docs.rs/multitude/0.3.1/multitude/?search=Box
- [__link59]: https://docs.rs/multitude/0.3.1/multitude/?search=Arena
- [__link6]: https://docs.rs/multitude/0.3.1/multitude/?search=Box
- [__link60]: https://docs.rs/multitude/0.3.1/multitude/?search=Arena::alloc_dst_arc
- [__link61]: https://docs.rs/multitude/0.3.1/multitude/?search=Arena::alloc_dst_box
+ [__link42]: https://docs.rs/multitude/0.3.2/multitude/strings/index.html
+ [__link43]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc
+ [__link44]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::ArcUtf16Str
+ [__link45]: https://docs.rs/multitude/0.3.2/multitude/?search=Box
+ [__link46]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::BoxUtf16Str
+ [__link47]: https://docs.rs/multitude/0.3.2/multitude/?search=Arena
+ [__link48]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::String
+ [__link49]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::Utf16String
+ [__link5]: https://docs.rs/multitude/0.3.2/multitude/?search=Box
+ [__link50]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::format
+ [__link51]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::format_utf16
+ [__link52]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::String
+ [__link53]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::String::into_boxed_str
+ [__link54]: https://docs.rs/multitude/0.3.2/multitude/?search=Box
+ [__link55]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::Utf16String
+ [__link56]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::Utf16String::into_boxed_utf16_str
+ [__link57]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::BoxUtf16Str
+ [__link58]: https://docs.rs/multitude/0.3.2/multitude/?search=Box
+ [__link59]: https://docs.rs/multitude/0.3.2/multitude/?search=Arena
+ [__link6]: https://docs.rs/multitude/0.3.2/multitude/?search=Box
+ [__link60]: https://docs.rs/multitude/0.3.2/multitude/?search=Arena::alloc_dst_arc
+ [__link61]: https://docs.rs/multitude/0.3.2/multitude/?search=Arena::alloc_dst_box
  [__link62]: https://doc.rust-lang.org/stable/core/?search=alloc::Layout
  [__link63]: https://crates.io/crates/dst-factory
  [__link64]: https://doc.rust-lang.org/stable/std/?search=io::Write
- [__link65]: https://docs.rs/multitude/0.3.1/multitude/?search=vec::Vec
- [__link66]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc
- [__link67]: https://docs.rs/multitude/0.3.1/multitude/?search=Box
- [__link68]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::String
- [__link69]: https://docs.rs/multitude/0.3.1/multitude/?search=vec::Vec
- [__link7]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc
- [__link70]: https://docs.rs/multitude/0.3.1/multitude/?search=Arena::alloc_dst_arc
- [__link71]: https://docs.rs/multitude/0.3.1/multitude/?search=Arena::alloc_dst_box
- [__link72]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::ArcUtf16Str
- [__link73]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::BoxUtf16Str
- [__link74]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::Utf16String
- [__link75]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::format_utf16
+ [__link65]: https://docs.rs/multitude/0.3.2/multitude/?search=vec::Vec
+ [__link66]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc
+ [__link67]: https://docs.rs/multitude/0.3.2/multitude/?search=Box
+ [__link68]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::String
+ [__link69]: https://docs.rs/multitude/0.3.2/multitude/?search=vec::Vec
+ [__link7]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc
+ [__link70]: https://docs.rs/multitude/0.3.2/multitude/?search=Arena::alloc_dst_arc
+ [__link71]: https://docs.rs/multitude/0.3.2/multitude/?search=Arena::alloc_dst_box
+ [__link72]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::ArcUtf16Str
+ [__link73]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::BoxUtf16Str
+ [__link74]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::Utf16String
+ [__link75]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::format_utf16
  [__link76]: https://crates.io/crates/widestring
- [__link77]: https://docs.rs/multitude/0.3.1/multitude/?search=zerocopy::ZerocopyView
+ [__link77]: https://docs.rs/multitude/0.3.2/multitude/?search=zerocopy::ZerocopyView
  [__link78]: https://docs.rs/zerocopy/0.8.50/zerocopy/?search=FromZeros
- [__link79]: https://docs.rs/multitude/0.3.1/multitude/?search=Arena::zerocopy
- [__link8]: https://docs.rs/multitude/0.3.1/multitude/?search=Box
- [__link80]: https://docs.rs/multitude/0.3.1/multitude/?search=bytemuck::BytemuckView
+ [__link79]: https://docs.rs/multitude/0.3.2/multitude/?search=Arena::zerocopy
+ [__link8]: https://docs.rs/multitude/0.3.2/multitude/?search=Box
+ [__link80]: https://docs.rs/multitude/0.3.2/multitude/?search=bytemuck::BytemuckView
  [__link81]: https://docs.rs/bytemuck/1.25.0/bytemuck/?search=Zeroable
- [__link82]: https://docs.rs/multitude/0.3.1/multitude/?search=Arena::bytemuck
+ [__link82]: https://docs.rs/multitude/0.3.2/multitude/?search=Arena::bytemuck
  [__link83]: https://doc.rust-lang.org/stable/std/convert/trait.From.html
- [__link84]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc
- [__link85]: https://docs.rs/multitude/0.3.1/multitude/?search=Arc
+ [__link84]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc
+ [__link85]: https://docs.rs/multitude/0.3.2/multitude/?search=Arc
  [__link86]: https://docs.rs/bytes/1.11.1/bytes/?search=Bytes
  [__link87]: https://docs.rs/bytesbuf/0.5.5/bytesbuf/?search=mem::Memory
- [__link88]: https://docs.rs/multitude/0.3.1/multitude/?search=Arena
+ [__link88]: https://docs.rs/multitude/0.3.2/multitude/?search=Arena
  [__link89]: https://docs.rs/bytesbuf/0.5.5/bytesbuf/?search=BytesBuf
- [__link9]: https://docs.rs/multitude/0.3.1/multitude/?search=strings::String
+ [__link9]: https://docs.rs/multitude/0.3.2/multitude/?search=strings::String
diff --git a/crates/multitude/benches/criterion_arc_array.rs b/crates/multitude/benches/criterion_arc_array.rs
new file mode 100644
index 000000000..a24c4337b
--- /dev/null
+++ b/crates/multitude/benches/criterion_arc_array.rs
@@ -0,0 +1,105 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+//! Builds an `Arc<[Arc<[u8]>]>` of `PROPERTIES` binary blobs two ways and
+//! compares them: `std::sync::Arc` (global allocator) vs `multitude::Arc`
+#![allow(clippy::unwrap_used, reason = "benchmark code")]
+#![allow(clippy::missing_panics_doc, reason = "benchmark code")]
+#![allow(unused_results, reason = "benchmark code")]
+#![allow(clippy::std_instead_of_core, reason = "benchmark code")]
+#![allow(dead_code, reason = "array properties are held only to keep the allocation alive")]
+
+use std::hint::black_box;
+use std::sync::Arc as StdArc;
+
+use criterion::{Criterion, criterion_group, criterion_main};
+use multitude::{Arc as ArenaArc, Arena};
+
+// ---------------------------------------------------------------------------
+// Array shape: `PROPERTIES` binary blobs of `PROPERTY_SIZE` bytes each.
+// ---------------------------------------------------------------------------
+
+const PROPERTIES: usize = 8;
+const PROPERTY_SIZE: usize = 16;
+
+// ---------------------------------------------------------------------------
+// Global-allocator array
+// ---------------------------------------------------------------------------
+
+fn build_global(payload: &[u8]) -> StdArc<[StdArc<[u8]>]> {
+    let mut properties = Vec::with_capacity(PROPERTIES);
+    for _ in 0..PROPERTIES {
+        properties.push(StdArc::<[u8]>::from(payload));
+    }
+    StdArc::from(properties)
+}
+
+fn build_global_from_slice(properties: &[StdArc<[u8]>]) -> StdArc<[StdArc<[u8]>]> {
+    StdArc::from(properties)
+}
+
+// ---------------------------------------------------------------------------
+// Arena-backed array
+// ---------------------------------------------------------------------------
+
+fn build_arena(arena: &Arena, payload: &[u8]) -> ArenaArc<[ArenaArc<[u8]>]> {
+    let mut properties = arena.alloc_vec_with_capacity::<ArenaArc<[u8]>>(PROPERTIES);
+    for _ in 0..PROPERTIES {
+        properties.push(arena.alloc_slice_copy_arc(payload));
+    }
+    properties.try_into_arc().unwrap()
+}
+
+fn build_arena_from_slice(arena: &Arena, properties: &[StdArc<[u8]>]) -> ArenaArc<[StdArc<[u8]>]> {
+    arena.alloc_slice_clone_arc(properties)
+}
+
+fn global_properties(payload: &[u8]) -> Vec<StdArc<[u8]>> {
+    (0..PROPERTIES).map(|_| StdArc::<[u8]>::from(payload)).collect()
+}
+
+// ---------------------------------------------------------------------------
+// Criterion timing + per-iteration allocation tracking
+// ---------------------------------------------------------------------------
+
+fn bench_arc_array(c: &mut Criterion) {
+    let payload = vec![0xABu8; PROPERTY_SIZE];
+
+    let mut group = c.benchmark_group("arc_array");
+
+    group.bench_function("global", |b| {
+        b.iter(|| {
+            black_box(build_global(black_box(&payload)));
+        });
+    });
+
+    let arena = Arena::new();
+    black_box(build_arena(&arena, &payload));
+
+    group.bench_function("arena", |b| {
+        b.iter(|| {
+            black_box(build_arena(&arena, black_box(&payload)));
+        });
+    });
+
+    let global_props = global_properties(&payload);
+    group.bench_function("global_from_slice", |b| {
+        b.iter(|| {
+            black_box(build_global_from_slice(black_box(&global_props)));
+        });
+    });
+
+    let work_arena = Arena::new();
+    black_box(build_arena_from_slice(&work_arena, &global_props));
+
+    group.bench_function("arena_from_slice", |b| {
+        b.iter(|| {
+            black_box(build_arena_from_slice(&work_arena, black_box(&global_props)));
+        });
+    });
+
+    group.finish();
+}
+
+criterion_group!(benches, bench_arc_array);
+criterion_main!(benches);
diff --git a/crates/multitude/benches/gungraun_arc_array/linux.rs b/crates/multitude/benches/gungraun_arc_array/linux.rs
new file mode 100644
index 000000000..39846f4ad
--- /dev/null
+++ b/crates/multitude/benches/gungraun_arc_array/linux.rs
@@ -0,0 +1,172 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+//! Instruction-precise `Arc<[Arc<[u8]>]>` build benchmarks for multitude.
+//!
+//! Mirrors `benches/criterion_arc_array.rs` 1:1: each gungraun function
+//! `<variant>` corresponds to a criterion benchmark `arc_array/<variant>`.
+//! Builds an `Arc<[Arc<[u8]>]>` of `PROPERTIES` binary blobs two ways and
+//! compares them: `std::sync::Arc` (global allocator) vs `multitude::Arc`
+//! (arena). Each is built with two strategies:
+//!
+//! - `*` — push freshly allocated properties through a growable vec, then
+//!   freeze it into the `Arc`.
+//! - `*_from_slice` — build directly from a pre-created slice of properties,
+//!   with no intermediate vec.
+//!
+//! # Allocation hygiene
+//!
+//! Following the same toggle hygiene as `gungraun_alloc`: setup (the arena
+//! warm-up, the payload, the pre-created property slice, and the pre-sized
+//! output `Vec`) runs outside the callgrind toggle via `#[bench::run(...)]`.
+//! The timed body only builds the structures and pushes the handles into the
+//! pre-sized output `Vec`, which is returned by value so its `Drop` runs
+//! outside the toggle. The only traffic counted is the build itself.
+
+#![allow(missing_docs, reason = "Benchmark")]
+#![allow(unused_results, reason = "black_box of bench input is intentional")]
+#![allow(clippy::unwrap_used, reason = "benchmark code")]
+#![allow(
+    clippy::needless_pass_by_value,
+    reason = "gungraun bench inputs are passed by value by the framework"
+)]
+#![allow(clippy::type_complexity, reason = "benchmark state tuples are inherently complex")]
+#![allow(clippy::too_many_lines, reason = "benchmark file")]
+
+use core::hint::black_box;
+use std::sync::Arc as StdArc;
+
+use gungraun::{Callgrind, LibraryBenchmarkConfig, library_benchmark, library_benchmark_group, main};
+use multitude::{Arc as ArenaArc, Arena};
+
+// Array shape: `PROPERTIES` binary blobs of `PROPERTY_SIZE` bytes each, built
+// `N` times per bench so the per-build instruction count is stable.
+const PROPERTIES: usize = 8;
+const PROPERTY_SIZE: usize = 16;
+const N: usize = 1_000;
+
+type GlobalArray = StdArc<[StdArc<[u8]>]>;
+type ArenaArrayOfArena = ArenaArc<[ArenaArc<[u8]>]>;
+type ArenaArrayOfGlobal = ArenaArc<[StdArc<[u8]>]>;
+
+// ===== shared builders (mirror criterion_arc_array.rs) =====
+
+fn build_global(payload: &[u8]) -> GlobalArray {
+    let mut properties = Vec::with_capacity(PROPERTIES);
+    for _ in 0..PROPERTIES {
+        properties.push(StdArc::<[u8]>::from(payload));
+    }
+    StdArc::from(properties)
+}
+
+fn build_global_from_slice(properties: &[StdArc<[u8]>]) -> GlobalArray {
+    StdArc::from(properties)
+}
+
+fn build_arena(arena: &Arena, payload: &[u8]) -> ArenaArrayOfArena {
+    let mut properties = arena.alloc_vec_with_capacity::<ArenaArc<[u8]>>(PROPERTIES);
+    for _ in 0..PROPERTIES {
+        properties.push(arena.alloc_slice_copy_arc(payload));
+    }
+    properties.try_into_arc().unwrap()
+}
+
+fn build_arena_from_slice(arena: &Arena, properties: &[StdArc<[u8]>]) -> ArenaArrayOfGlobal {
+    arena.alloc_slice_clone_arc(properties)
+}
+
+// ===== leaf setup helpers =====
+
+fn payload() -> Vec<u8> {
+    vec![0xAB_u8; PROPERTY_SIZE]
+}
+
+fn global_properties() -> Vec<StdArc<[u8]>> {
+    let payload = payload();
+    (0..PROPERTIES).map(|_| StdArc::<[u8]>::from(payload.as_slice())).collect()
+}
+
+fn warm_arena() -> Arena {
+    // Warm: preallocate one chunk of the largest size class for each flavor
+    // AND prime the arena's current_local / current_shared mutators with a
+    // throwaway allocation, so the timed body never pays a cold `refill_*`.
+    // Mirrors `gungraun_alloc::warm_arena`.
+    let arena = Arena::builder()
+        .with_capacity_local(64 * 1024)
+        .with_capacity_shared(64 * 1024)
+        .build();
+    let _: &mut u64 = arena.alloc(0_u64);
+    let _ = arena.alloc_arc(0_u64);
+    arena
+}
+
+// ===== composite setups (pre-allocate the output Vec to N) =====
+
+fn setup_global() -> (Vec<u8>, Vec<GlobalArray>) {
+    (payload(), Vec::with_capacity(N))
+}
+
+fn setup_arena() -> (Arena, Vec<u8>, Vec<ArenaArrayOfArena>) {
+    (warm_arena(), payload(), Vec::with_capacity(N))
+}
+
+fn setup_global_from_slice() -> (Vec<StdArc<[u8]>>, Vec<GlobalArray>) {
+    (global_properties(), Vec::with_capacity(N))
+}
+
+fn setup_arena_from_slice() -> (Arena, Vec<StdArc<[u8]>>, Vec<ArenaArrayOfGlobal>) {
+    (warm_arena(), global_properties(), Vec::with_capacity(N))
+}
+
+// ===== bench bodies — only the build is inside the toggle =====
+
+#[library_benchmark]
+#[bench::run(setup_global())]
+fn global(state: (Vec<u8>, Vec<GlobalArray>)) -> (Vec<u8>, Vec<GlobalArray>) {
+    let (payload, mut out) = state;
+    for _ in 0..N {
+        out.push(black_box(build_global(black_box(&payload))));
+    }
+    (payload, out)
+}
+
+#[library_benchmark]
+#[bench::run(setup_arena())]
+fn arena(state: (Arena, Vec<u8>, Vec<ArenaArrayOfArena>)) -> (Arena, Vec<u8>, Vec<ArenaArrayOfArena>) {
+    let (arena, payload, mut out) = state;
+    for _ in 0..N {
+        out.push(black_box(build_arena(&arena, black_box(&payload))));
+    }
+    (arena, payload, out)
+}
+
+#[library_benchmark]
+#[bench::run(setup_global_from_slice())]
+fn global_from_slice(state: (Vec<StdArc<[u8]>>, Vec<GlobalArray>)) -> (Vec<StdArc<[u8]>>, Vec<GlobalArray>) {
+    let (properties, mut out) = state;
+    for _ in 0..N {
+        out.push(black_box(build_global_from_slice(black_box(&properties))));
+    }
+    (properties, out)
+}
+
+#[library_benchmark]
+#[bench::run(setup_arena_from_slice())]
+fn arena_from_slice(state: (Arena, Vec<StdArc<[u8]>>, Vec<ArenaArrayOfGlobal>)) -> (Arena, Vec<StdArc<[u8]>>, Vec<ArenaArrayOfGlobal>) {
+    let (arena, properties, mut out) = state;
+    for _ in 0..N {
+        out.push(black_box(build_arena_from_slice(&arena, black_box(&properties))));
+    }
+    (arena, properties, out)
+}
+
+library_benchmark_group!(
+    name = arc_array_group;
+    benchmarks = global, arena, global_from_slice, arena_from_slice
+);
+
+main!(
+    config = LibraryBenchmarkConfig::default()
+        .tool(Callgrind::with_args(["--branch-sim=yes"]));
+    library_benchmark_groups = arc_array_group
+);
diff --git a/crates/multitude/benches/gungraun_arc_array/main.rs b/crates/multitude/benches/gungraun_arc_array/main.rs
new file mode 100644
index 000000000..f28534120
--- /dev/null
+++ b/crates/multitude/benches/gungraun_arc_array/main.rs
@@ -0,0 +1,46 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+//! Instruction-precise `Arc<[Arc<[u8]>]>` build benchmarks for multitude.
+//!
+//! Mirrors `benches/criterion_arc_array.rs` 1:1: each gungraun function
+//! `<variant>` corresponds to a criterion benchmark `arc_array/<variant>`.
+//!
+//! Run with `cargo bench --bench gungraun_arc_array` on a Linux host with
+//! Valgrind.
+
+#![allow(missing_docs, reason = "Benchmark")]
+#![allow(unused_results, reason = "black_box of bench input is intentional")]
+#![allow(
+    clippy::needless_pass_by_value,
+    reason = "gungraun bench inputs are passed by value by the framework"
+)]
+#![allow(clippy::type_complexity, reason = "benchmark state tuples are inherently complex")]
+#![allow(clippy::too_many_lines, reason = "benchmark file")]
+#![cfg_attr(
+    target_os = "linux",
+    expect(
+        clippy::exit,
+        clippy::missing_docs_in_private_items,
+        unused_qualifications,
+        reason = "Triggered by Gungraun macro expansion. Upstream tracking issues are pending."
+    )
+)]
+
+// Gungraun requires Valgrind, which is Linux-only. On other platforms this
+// bench target compiles to a no-op so `cargo build --all-targets` still works.
+#[cfg(not(target_os = "linux"))]
+fn main() {}
+
+#[cfg(target_os = "linux")]
+mod linux;
+
+#[cfg(target_os = "linux")]
+use linux::*;
+
+#[cfg(target_os = "linux")]
+gungraun::main!(
+    config = gungraun::LibraryBenchmarkConfig::default()
+        .tool(gungraun::Callgrind::with_args(["--branch-sim=yes"]));
+    library_benchmark_groups = arc_array_group
+);
diff --git a/crates/multitude/docs/BUMPALO.md b/crates/multitude/docs/BUMPALO.md
index 1229dde9e..287413c36 100644
--- a/crates/multitude/docs/BUMPALO.md
+++ b/crates/multitude/docs/BUMPALO.md
@@ -12,7 +12,7 @@ spirit; here's how multitude differs.
 | Refcounted smart pointers                               | ❌ (raw `&'bump T`)                                                          | ✅ `Arc` (atomic; thread-safe sharing)                                                                                                                                                                 |
 | Smart pointers outlive the arena                        | ❌                                                                           | ✅ (`Arc` / `Box` and their `str` variants — simple references are lifetime-bound)                                                                                                                     |
 | Cross-thread sharing of individual values               | ❌                                                                           | ✅ via `Arc`                                                                                                                                                                                           |
-| Automatic per-object `Drop`                             | Only via `bumpalo::boxed::Box`                                              | ✅ Automatic (refcount smart pointers drop at chunk teardown; `Box` drops at smart pointer drop; simple references drop at arena drop)                                                                 |
+| Automatic per-object `Drop`                             | Only via `bumpalo::boxed::Box`                                              | ✅ Automatic (`Arc` drops at last-clone drop, `Box` drops at smart-pointer drop, simple references drop at arena reset/drop)                                                                 |
 | Owned single smart pointer (`Drop` on drop)             | `bumpalo::boxed::Box`                                                       | `Box`                                                                                                                                                                                                 |
 | Smart-pointer width                                     | 16 bytes for fat DSTs (`&str`, `Bump-allocated boxed slice` are 2-word)     | **8 bytes uniformly** — `Arc<T>` / `Box<T>` are thin even for DST `T` (slice / `str` / `dyn Trait` / custom `Pointee`); DST metadata is stored unaligned in a chunk prefix                            |
 | Single-pointer string smart pointers                    | ❌ (`&str` is 16 bytes)                                                      | ✅ `Arc<str>` / `Box<str>` / `ArcUtf16Str` / `BoxUtf16Str` are all 8 bytes (length stored unaligned in a `usize` prefix in the chunk; zero per-string padding)                                         |
diff --git a/crates/multitude/docs/DESIGN.md b/crates/multitude/docs/DESIGN.md
index bdb688334..f58983365 100644
--- a/crates/multitude/docs/DESIGN.md
+++ b/crates/multitude/docs/DESIGN.md
@@ -72,13 +72,17 @@ the shared chunk's `AtomicUsize` refcount on every allocation would be a
 hot-path atomic. Instead, at install time the arena pre-credits the
 chunk's atomic `ref_count` with `LARGE_SHARED_REF_SURPLUS` (2^30) and
 tracks per-allocation handouts in the non-atomic `local_shared_count`
-(`Cell<u32>`). At retire (refill / reset / arena drop) the surplus is
-reconciled with a single
+(`Cell<u32>`). At retire (`refill_shared` or `Arena::drop`) the surplus
+is reconciled with a single
 `fetch_sub(LARGE_SHARED_REF_SURPLUS - local_shared_count)`, leaving the
 chunk's atomic count equal to the number of escaped handles. The 2^30
 surplus is large enough that concurrent `Arc::drop` on other threads
-cannot underflow it, while the `u32` counter leaves ~2^30 headroom
-against `Arc::clone` overflow.
+cannot underflow it. `Arc::clone` does not touch this count —
+each `Arc` family takes exactly one chunk refcount at allocation and
+releases it when its last clone drops (clones bump only the per-`Arc`
+strong count; see *Per-`Arc` reference counting*). `Arena::reset` does
+not reconcile or detach the installed shared chunk — it resets only
+local-chunk state, so shared allocations continue on the same chunk.
 
 **Size-class ratchet.** Each successful refill bumps the matching
 `next_*_class` toward the largest cacheable class (`NUM_CHUNK_CLASSES
@@ -155,7 +159,6 @@ pub(crate) struct SharedChunk<A: Allocator + Clone> {
     capacity:  usize,
     ref_count: AtomicUsize,
     next:      AtomicPtr<u8>,            // intrusive cache-freelist link
-    drop_entry_count: AtomicU16,
     #[cfg(feature = "stats")]
     wasted_at_retire: AtomicU32,
     data: [UnsafeCell<u8>],
@@ -209,7 +212,10 @@ is a **single 8-byte raw pointer** into the chunk's `data` tail. DST
 metadata (slice length, vtable) lives unaligned in the chunk prefix
 immediately preceding the value payload, read with
 `core::ptr::read_unaligned`. For `T: Sized` the metadata is `()` so
-there's no prefix overhead.
+there's no prefix overhead. `Arc<T>` additionally stores its
+per-`Arc` strong count (an `AtomicU32`) in the prefix, before the
+metadata (see *Per-`Arc` reference counting*); `Box` has no such
+prefix.
 
 To recover the owning chunk's header from a smart-pointer value, each
 smart-pointer type **masks the low bits to the 64 KiB boundary**
@@ -237,23 +243,64 @@ Two consequences of the masking scheme:
   refill path if a ZST would otherwise land at the one-past-end
   boundary.
 
+## Per-`Arc` reference counting
+
+Each `Arc<T>` carries **its own** strong reference count — an
+`AtomicU32` stored in the chunk payload immediately *before* the value
+(and before the DST metadata, if any). The layout of an `Arc` value is:
+
+```text
+[strong (AtomicU32, at reservation base)][pad][T::Metadata (unaligned)][T payload]
+                                                                        ^ value pointer
+```
+
+The reservation is aligned to `max(align_of::<T>(), 4)` so the leading
+strong slot is 4-byte aligned; the value pointer is `align_of::<T>()`
+aligned and the metadata sits immediately before it (recovered with
+`read_unaligned`, exactly as for `Box`). The strong count is recovered
+from the value pointer by subtracting a fixed prefix
+(`thin_dst::strong_prefix_bytes_for`) and is accessed only as an
+`AtomicU32` — never through a reference that spans the (possibly
+uninitialized) payload, which keeps the scheme sound under Miri.
+
+The accounting works as follows:
+
+- **Allocation** writes `strong = 1` and takes **one** refcount on the
+  hosting chunk for the whole `Arc` family (via the pre-credited
+  surplus, as for any shared allocation).
+- **`Arc::clone`** bumps only the per-`Arc` `strong` with a single
+  `Relaxed` increment — it does **not** touch the chunk refcount.
+- **`Arc::drop`** does a `Release` decrement of `strong`; on the
+  `strong → 0` transition it runs an `Acquire` fence, drops the value
+  in place (`drop_in_place::<T>`, which natively handles `?Sized`),
+  and releases the family's single chunk refcount (adopted *before*
+  the value drop, so a panicking destructor still releases the chunk).
+
+Because the value's destructor runs eagerly on the last `Arc` (rather
+than being deferred to chunk teardown), nested arena `Arc`s — e.g.
+`Arc<[Arc<T>]>` whose inner and outer handles share a chunk — release
+their storage promptly instead of forming a self-pinning cycle.
+
+`Arc::<MaybeUninit<T>>::assume_init` is a pure reinterpret: `MaybeUninit<T>`
+and `T` share size, alignment, and metadata, so the strong-prefix layout
+is identical and the strong count is untouched.
+
 ## `DropEntry`
 
-`DropEntry` records the deferred destructor work for values whose
-`Drop` cannot be run by the smart pointer itself — i.e. arena
-references (`&mut T` / `&mut [T]`, which have no `Drop` of their own)
-and `Arc<T>` (whose value must be dropped by whichever handle observes
-the last refcount, a moment only the chunk can detect). **No `Box`
-variant registers a drop entry**: `Box::drop` runs `drop_in_place` on
-the (re-fattened) value pointer eagerly, which natively handles `?Sized`
-`T`, so sized `Box<T>`, slice `Box<[T]>`, and DST `Box<dyn Trait>` all
-need no entry.
-
-Each such allocation reserves **both** `size_of::<T>()` at the front
-of the free region *and* one `DropEntry` slot at the back. The
-effective remaining capacity is `drop_top - bump`; overflow is
-detected when those two meet. Allocations of `T: !Drop` skip the
-reservation entirely.
+`DropEntry` records the deferred destructor work for **local arena
+references only** — `Arena::alloc -> &mut T` and `&mut [T]`, which have
+no `Drop` of their own and whose backing chunk runs the destructor at
+teardown. **Neither `Box` nor `Arc` registers a drop entry, and shared
+chunks never carry one**: `Box::drop` runs `drop_in_place` eagerly on
+the (re-fattened) value pointer, and `Arc::drop` does the same on the
+last strong reference (see *Per-`Arc` reference counting* above). Drop
+entries therefore live exclusively on `LocalChunk`s.
+
+Each such reference allocation reserves **both** `size_of::<T>()` at the
+front of the free region *and* one `DropEntry` slot at the back. The
+effective remaining capacity is `drop_top - bump`; overflow is detected
+when those two meet. Allocations of `T: !Drop` skip the reservation
+entirely.
 
 ```rust
 #[repr(C)]
@@ -266,9 +313,11 @@ struct DropEntry {
 }
 ```
 
-`len` is a `u16`; slice/DST allocations whose `needs_drop` count
+`len` is a `u16`; local slice references whose `needs_drop` count
 exceeds `u16::MAX` are rejected up front by their `alloc_*` orchestrator
-so the placeholder never overflows.
+so the placeholder never overflows. (The `Arc<[T]>` family has **no**
+such cap, since it drops via `drop_in_place::<[T]>` rather than a
+counted entry.)
 
 **Two-phase write.** Allocation paths reserve a *placeholder* (null
 `drop_fn`, real `value_offset`/`len`) up front. After the value is
@@ -279,26 +328,27 @@ initialization closure panicked or whose `Uninit` ticket was dropped
 without `init`. Storing as `AtomicPtr<()>` (not `AtomicUsize`)
 preserves function-pointer provenance under Miri's strict provenance.
 
-The commit is idempotent: concurrent `Arc::<MaybeUninit<T>>::assume_init`
-on cloned handles all install the same `T`-determined shim.
-
-**Replay.** When the chunk's last refcount drops, the chunk walks its
-drop-entry stack **newest-first** (LIFO, matching Rust drop order) and
-invokes `(drop_fn)(data + value_offset, len)` on each committed
-entry. A panic in any shim is contained; replay continues so remaining
-destructors still run.
+**Replay.** When a `LocalChunk`'s refcount drops to zero (at
+`Arena::reset` / `Arena::drop`), the chunk walks its drop-entry stack
+**newest-first** (LIFO, matching Rust drop order) and invokes
+`(drop_fn)(data + value_offset, len)` on each committed entry. Shared
+chunks skip this step entirely. A panic in any shim is contained;
+replay continues so remaining destructors still run.
 
 **Closure-panic safety.** The smart-pointer construction paths take a
 protective `ChunkRef` (`+1` guard) before invoking the user closure.
 On unwinding, the `ChunkRef`'s `Drop` releases the +1; on success the
 caller calls `ChunkRef::forget` to transfer the +1 into the
 freshly-constructed smart pointer. Combined with the two-phase
-placeholder, a panicking closure leaves no `T::drop` queued on
+placeholder (for local references) and eager `drop_in_place` (for
+`Box`/`Arc`), a panicking closure leaves no `T::drop` queued on
 uninitialized memory and no refcount leaked.
 
-**Refcount overflow.** Both `inc_ref` paths check against the
-wraparound boundary and abort (`std::process::abort` or a forced
-double-panic under `no_std`) if exceeded. The abort helper is
-`#[cold] #[inline(never)]` so the hot-path call site stays small.
-This mirrors `std::sync::Arc`: a wraparound would race live pointers
-with a free, and the only sound response is to terminate.
+**Refcount overflow.** Both the chunk `inc_ref` paths and `Arc::clone`'s
+per-`Arc` `strong` increment check against the wraparound boundary and
+abort (`std::process::abort` or a forced double-panic under `no_std`) if
+exceeded. The abort helper is `#[cold] #[inline(never)]` so the hot-path
+call site stays small. This mirrors `std::sync::Arc`: a wraparound would
+race live pointers with a free, and the only sound response is to
+terminate.
+
diff --git a/crates/multitude/docs/PERF.md b/crates/multitude/docs/PERF.md
index 173e127e6..e41eb71d5 100644
--- a/crates/multitude/docs/PERF.md
+++ b/crates/multitude/docs/PERF.md
@@ -13,98 +13,98 @@ Bench names are aligned between criterion and gungraun via the `GROUPS` table in
 
 | Variant | Time (criterion) | Instructions | Branch misses | Mem accesses |
 |---|---:|---:|---:|---:|
-| `multitude_new` | 38 ns | 316 | 9 | 457 |
+| `multitude_new` | 37 ns | 316 | 8 | 457 |
 | `bumpalo_new` | 1 ns | 16 | 1 | 26 |
 
 ## `alloc_u64`
 
 | Variant | Time (criterion) | Instructions | Branch misses | Mem accesses |
 |---|---:|---:|---:|---:|
-| `alloc` | 6.45 µs | 14,026 | 6 | 21,043 |
-| `alloc_with` | 6.53 µs | 14,024 | 11 | 21,040 |
-| `alloc_box` | 5.24 µs | 23,043 | 9 | 37,078 |
-| `alloc_box_with` | 5.18 µs | 24,043 | 9 | 38,078 |
-| `alloc_uninit_box` | 2.33 µs | 20,043 | 9 | 31,078 |
-| `alloc_zeroed_box` | 4.85 µs | 21,043 | 9 | 33,078 |
-| `alloc_arc` | 5.36 µs | 23,043 | 7 | 37,078 |
-| `alloc_arc_with` | 5.19 µs | 24,043 | 9 | 38,078 |
-| `alloc_uninit_arc` | 2.33 µs | 20,043 | 9 | 31,078 |
-| `alloc_zeroed_arc` | 4.96 µs | 21,043 | 9 | 33,078 |
-| `bumpalo_alloc` | 5.97 µs | 19,022 | 4 | 27,037 |
-| `bumpalo_alloc_with` | 6.08 µs | 19,020 | 4 | 27,034 |
+| `alloc` | 6.58 µs | 14,026 | 6 | 21,043 |
+| `alloc_with` | 6.62 µs | 14,024 | 9 | 21,040 |
+| `alloc_box` | 5.83 µs | 23,043 | 9 | 37,078 |
+| `alloc_box_with` | 5.94 µs | 24,043 | 9 | 38,078 |
+| `alloc_uninit_box` | 3.10 µs | 20,043 | 9 | 31,078 |
+| `alloc_zeroed_box` | 5.58 µs | 21,043 | 9 | 33,078 |
+| `alloc_arc` | 9.48 µs | 25,043 | 9 | 40,078 |
+| `alloc_arc_with` | 9.78 µs | 26,043 | 8 | 41,078 |
+| `alloc_uninit_arc` | 9.26 µs | 22,043 | 9 | 34,078 |
+| `alloc_zeroed_arc` | 9.53 µs | 23,043 | 9 | 36,078 |
+| `bumpalo_alloc` | 6.57 µs | 19,022 | 6 | 27,037 |
+| `bumpalo_alloc_with` | 6.58 µs | 19,020 | 4 | 27,034 |
 
 ## `alloc_str`
 
 | Variant | Time (criterion) | Instructions | Branch misses | Mem accesses |
 |---|---:|---:|---:|---:|
-| `alloc_str` | 8.24 µs | 51,053 | 10 | 76,098 |
-| `alloc_str_box` | 11.83 µs | 59,053 | 11 | 85,098 |
-| `alloc_str_arc` | 11.89 µs | 59,053 | 11 | 85,098 |
-| `bumpalo_alloc_str` | 9.13 µs | 50,048 | 13 | 75,088 |
+| `alloc_str` | 8.36 µs | 51,053 | 10 | 76,098 |
+| `alloc_str_box` | 12.64 µs | 59,053 | 11 | 85,098 |
+| `alloc_str_arc` | 14.00 µs | 58,054 | 11 | 84,099 |
+| `bumpalo_alloc_str` | 9.56 µs | 50,048 | 13 | 75,088 |
 
 ## `alloc_slice`
 
 | Variant | Time (criterion) | Instructions | Branch misses | Mem accesses |
 |---|---:|---:|---:|---:|
-| `alloc_slice_copy` | 22.82 µs | 41,049 | 4 | 57,090 |
-| `alloc_slice_clone` | 22.50 µs | 45,050 | 10 | 58,091 |
-| `alloc_slice_fill_with` | 24.07 µs | 38,026 | 11 | 68,043 |
-| `alloc_slice_fill_iter` | 24.18 µs | 38,027 | 11 | 68,044 |
-| `alloc_slice_copy_box` | 41.99 µs | 55,646 | 33 | 83,916 |
-| `alloc_slice_clone_box` | 42.18 µs | 68,646 | 40 | 92,915 |
-| `alloc_slice_fill_with_box` | 43.59 µs | 48,585 | 40 | 86,809 |
-| `alloc_slice_fill_iter_box` | 43.84 µs | 50,585 | 39 | 90,809 |
-| `alloc_uninit_slice_box` | 39.83 µs | 23,585 | 40 | 36,809 |
-| `alloc_zeroed_slice_box` | 40.73 µs | 27,585 | 40 | 43,809 |
-| `alloc_slice_copy_arc` | 42.49 µs | 53,647 | 34 | 80,917 |
-| `alloc_slice_clone_arc` | 42.37 µs | 59,645 | 39 | 80,914 |
-| `alloc_slice_fill_with_arc` | 44.47 µs | 46,585 | 41 | 82,809 |
-| `alloc_slice_fill_iter_arc` | 43.77 µs | 47,585 | 40 | 84,809 |
-| `alloc_uninit_slice_arc` | 40.01 µs | 22,585 | 40 | 34,809 |
-| `alloc_zeroed_slice_arc` | 41.27 µs | 25,585 | 40 | 39,809 |
-| `bumpalo_alloc_slice_copy` | 23.49 µs | 38,042 | 4 | 55,076 |
-| `bumpalo_alloc_slice_clone` | 24.38 µs | 60,046 | 9 | 74,083 |
-| `bumpalo_alloc_slice_fill_with` | 25.44 µs | 40,020 | 5 | 70,033 |
-| `bumpalo_alloc_slice_fill_iter` | 25.43 µs | 40,020 | 5 | 70,033 |
+| `alloc_slice_copy` | 33.81 µs | 41,049 | 3 | 57,090 |
+| `alloc_slice_clone` | 33.49 µs | 45,050 | 10 | 58,091 |
+| `alloc_slice_fill_with` | 35.52 µs | 38,026 | 10 | 68,043 |
+| `alloc_slice_fill_iter` | 35.82 µs | 38,027 | 9 | 68,044 |
+| `alloc_slice_copy_box` | 50.31 µs | 55,624 | 28 | 83,885 |
+| `alloc_slice_clone_box` | 49.03 µs | 68,624 | 36 | 92,884 |
+| `alloc_slice_fill_with_box` | 51.45 µs | 48,563 | 31 | 86,778 |
+| `alloc_slice_fill_iter_box` | 52.38 µs | 50,563 | 34 | 90,778 |
+| `alloc_uninit_slice_box` | 46.89 µs | 23,563 | 34 | 36,778 |
+| `alloc_zeroed_slice_box` | 48.11 µs | 27,563 | 34 | 43,778 |
+| `alloc_slice_copy_arc` | 54.53 µs | 55,625 | 28 | 83,886 |
+| `alloc_slice_clone_arc` | 54.06 µs | 61,623 | 36 | 83,883 |
+| `alloc_slice_fill_with_arc` | 56.68 µs | 47,563 | 33 | 84,778 |
+| `alloc_slice_fill_iter_arc` | 55.84 µs | 48,563 | 32 | 86,778 |
+| `alloc_uninit_slice_arc` | 51.35 µs | 23,563 | 34 | 36,778 |
+| `alloc_zeroed_slice_arc` | 51.95 µs | 26,563 | 33 | 41,778 |
+| `bumpalo_alloc_slice_copy` | 36.94 µs | 38,042 | 7 | 55,076 |
+| `bumpalo_alloc_slice_clone` | 36.81 µs | 60,046 | 10 | 74,083 |
+| `bumpalo_alloc_slice_fill_with` | 36.03 µs | 40,020 | 5 | 70,033 |
+| `bumpalo_alloc_slice_fill_iter` | 37.52 µs | 40,020 | 5 | 70,033 |
 
 ## `string_builder`
 
 | Variant | Time (criterion) | Instructions | Branch misses | Mem accesses |
 |---|---:|---:|---:|---:|
-| `alloc_string` | 8.05 µs | 36,836 | 32 | 51,184 |
-| `alloc_string_with_capacity` | 7.64 µs | 37,194 | 21 | 52,304 |
-| `bumpalo_string_new_in` | 9.20 µs | 35,843 | 76 | 50,867 |
-| `bumpalo_string_with_capacity_in` | 10.62 µs | 34,708 | 28 | 49,159 |
+| `alloc_string` | 8.23 µs | 36,849 | 28 | 51,203 |
+| `alloc_string_with_capacity` | 8.09 µs | 37,210 | 20 | 52,325 |
+| `bumpalo_string_new_in` | 12.16 µs | 35,843 | 74 | 50,867 |
+| `bumpalo_string_with_capacity_in` | 11.79 µs | 34,708 | 30 | 49,159 |
 
 ## `vec_builder`
 
 | Variant | Time (criterion) | Instructions | Branch misses | Mem accesses |
 |---|---:|---:|---:|---:|
-| `alloc_vec` | 1.25 µs | 11,765 | 31 | 17,053 |
-| `alloc_vec_with_capacity` | 1.23 µs | 12,132 | 8 | 18,215 |
-| `bumpalo_vec_new_in` | 3.72 µs | 12,281 | 61 | 18,888 |
-| `bumpalo_vec_with_capacity_in` | 3.48 µs | 11,069 | 2 | 17,116 |
+| `alloc_vec` | 1.29 µs | 11,792 | 30 | 17,087 |
+| `alloc_vec_with_capacity` | 1.23 µs | 12,139 | 10 | 18,221 |
+| `bumpalo_vec_new_in` | 3.89 µs | 12,281 | 61 | 18,888 |
+| `bumpalo_vec_with_capacity_in` | 3.63 µs | 11,069 | 2 | 17,116 |
 
 ## `drop`
 
 | Variant | Time (criterion) | Instructions | Branch misses | Mem accesses |
 |---|---:|---:|---:|---:|
-| `box_u64` | 8.42 µs | 10,309 | 55 | 13,904 |
-| `rc_u64` | 8.18 µs | 10,309 | 55 | 13,904 |
-| `arc_u64` | 8.36 µs | 10,309 | 55 | 13,904 |
-| `box_droppy` | 22.06 µs | 186,161 | 77 | 272,621 |
-| `rc_droppy` | 27.37 µs | 219,386 | 80 | 320,930 |
-| `arc_droppy` | 27.25 µs | 219,386 | 80 | 320,930 |
-| `str_box` | 7.59 µs | 10,309 | 55 | 13,904 |
-| `str_rc` | 7.68 µs | 10,309 | 55 | 13,904 |
-| `str_arc` | 7.71 µs | 10,309 | 55 | 13,904 |
-| `slice_box_u64` | 13.96 µs | 10,819 | 58 | 14,639 |
-| `slice_rc_u64` | 12.30 µs | 10,819 | 58 | 14,639 |
-| `slice_arc_u64` | 12.59 µs | 10,819 | 58 | 14,639 |
-| `slice_box_droppy` | 115.72 µs | 1,520,210 | 1,848 | 2,214,775 |
-| `slice_rc_droppy` | 122.93 µs | 1,546,283 | 1,110 | 2,253,860 |
-| `slice_arc_droppy` | 122.05 µs | 1,546,283 | 1,110 | 2,253,860 |
-| `alloc` | 686 ns | 337 | 15 | 504 |
+| `box_u64` | 8.05 µs | 10,660 | 68 | 14,433 |
+| `rc_u64` | 12.85 µs | 13,005 | 64 | 18,929 |
+| `arc_u64` | 12.53 µs | 13,005 | 64 | 18,929 |
+| `box_droppy` | 15.19 µs | 186,501 | 86 | 273,127 |
+| `rc_droppy` | 15.53 µs | 188,852 | 83 | 277,632 |
+| `arc_droppy` | 20.20 µs | 188,852 | 83 | 277,632 |
+| `str_box` | 7.50 µs | 10,660 | 68 | 14,433 |
+| `str_rc` | 12.30 µs | 13,005 | 70 | 18,929 |
+| `str_arc` | 12.13 µs | 13,005 | 70 | 18,929 |
+| `slice_box_u64` | 14.68 µs | 11,395 | 68 | 15,498 |
+| `slice_rc_u64` | 19.22 µs | 13,390 | 63 | 19,490 |
+| `slice_arc_u64` | 19.27 µs | 13,390 | 63 | 19,490 |
+| `slice_box_droppy` | 123.93 µs | 1,480,204 | 1,362 | 2,162,703 |
+| `slice_rc_droppy` | 122.57 µs | 1,482,204 | 1,107 | 2,166,702 |
+| `slice_arc_droppy` | 123.82 µs | 1,482,204 | 1,107 | 2,166,702 |
+| `alloc` | 970 ns | 345 | 13 | 514 |
 
 ## Multitude vs Bumpalo Head-to-Head
 
@@ -112,14 +112,14 @@ Direct comparisons of multitude versus bumpalo on identical workloads (the multi
 
 | Workload | Multitude time | Bumpalo time | Δ time | Multitude instr | Bumpalo instr | Δ instr |
 |---|---:|---:|---:|---:|---:|---:|
-| `alloc` vs `bumpalo_alloc` | 6.45 µs | 5.97 µs | +8.1% | 14,026 | 19,022 | -26.3% |
-| `alloc_str` vs `bumpalo_alloc_str` | 8.24 µs | 9.13 µs | -9.7% | 51,053 | 50,048 | +2.0% |
-| `alloc_slice_copy` vs `bumpalo_alloc_slice_copy` | 22.82 µs | 23.49 µs | -2.9% | 41,049 | 38,042 | +7.9% |
-| `alloc_slice_clone` vs `bumpalo_alloc_slice_clone` | 22.50 µs | 24.38 µs | -7.7% | 45,050 | 60,046 | -25.0% |
-| `alloc_slice_fill_with` vs `bumpalo_alloc_slice_fill_with` | 24.07 µs | 25.44 µs | -5.4% | 38,026 | 40,020 | -5.0% |
-| `alloc_slice_fill_iter` vs `bumpalo_alloc_slice_fill_iter` | 24.18 µs | 25.43 µs | -4.9% | 38,027 | 40,020 | -5.0% |
-| `alloc_string` vs `bumpalo_string_new_in` | 8.05 µs | 9.20 µs | -12.5% | 36,836 | 35,843 | +2.8% |
-| `alloc_string_with_capacity` vs `bumpalo_string_with_capacity_in` | 7.64 µs | 10.62 µs | -28.0% | 37,194 | 34,708 | +7.2% |
-| `alloc_vec` vs `bumpalo_vec_new_in` | 1.25 µs | 3.72 µs | -66.3% | 11,765 | 12,281 | -4.2% |
-| `alloc_vec_with_capacity` vs `bumpalo_vec_with_capacity_in` | 1.23 µs | 3.48 µs | -64.6% | 12,132 | 11,069 | +9.6% |
+| `alloc` vs `bumpalo_alloc` | 6.58 µs | 6.57 µs | +0.2% | 14,026 | 19,022 | -26.3% |
+| `alloc_str` vs `bumpalo_alloc_str` | 8.36 µs | 9.56 µs | -12.5% | 51,053 | 50,048 | +2.0% |
+| `alloc_slice_copy` vs `bumpalo_alloc_slice_copy` | 33.81 µs | 36.94 µs | -8.5% | 41,049 | 38,042 | +7.9% |
+| `alloc_slice_clone` vs `bumpalo_alloc_slice_clone` | 33.49 µs | 36.81 µs | -9.0% | 45,050 | 60,046 | -25.0% |
+| `alloc_slice_fill_with` vs `bumpalo_alloc_slice_fill_with` | 35.52 µs | 36.03 µs | -1.4% | 38,026 | 40,020 | -5.0% |
+| `alloc_slice_fill_iter` vs `bumpalo_alloc_slice_fill_iter` | 35.82 µs | 37.52 µs | -4.5% | 38,027 | 40,020 | -5.0% |
+| `alloc_string` vs `bumpalo_string_new_in` | 8.23 µs | 12.16 µs | -32.3% | 36,849 | 35,843 | +2.8% |
+| `alloc_string_with_capacity` vs `bumpalo_string_with_capacity_in` | 8.09 µs | 11.79 µs | -31.3% | 37,210 | 34,708 | +7.2% |
+| `alloc_vec` vs `bumpalo_vec_new_in` | 1.29 µs | 3.89 µs | -66.8% | 11,792 | 12,281 | -4.0% |
+| `alloc_vec_with_capacity` vs `bumpalo_vec_with_capacity_in` | 1.23 µs | 3.63 µs | -66.1% | 12,139 | 11,069 | +9.7% |
 
diff --git a/crates/multitude/src/allocator_impl.rs b/crates/multitude/src/allocator_impl.rs
index 4c7f2c67c..daef0e31e 100644
--- a/crates/multitude/src/allocator_impl.rs
+++ b/crates/multitude/src/allocator_impl.rs
@@ -56,7 +56,7 @@ unsafe impl<A: Allocator + Clone> Allocator for &Arena<A> {
                 let _ = chunk_ref.forget();
                 return Ok(NonNull::slice_from_raw_parts(ptr, layout.size()));
             }
-            if self.is_oversized_shared(refill_hint) {
+            if self.is_oversized(refill_hint) {
                 return self.alloc_oversized_shared_with(refill_hint, |mutator, chunk_ptr| {
                     let (slot, _chunk) = mutator
                         .try_alloc_with_chunk(layout.size(), layout.align())
diff --git a/crates/multitude/src/arc.rs b/crates/multitude/src/arc.rs
index 37a51e2cb..afed3b0b9 100644
--- a/crates/multitude/src/arc.rs
+++ b/crates/multitude/src/arc.rs
@@ -10,35 +10,43 @@ use core::marker::PhantomData;
 use core::mem::{self, MaybeUninit};
 use core::pin::Pin;
 use core::ptr::{self, NonNull};
+use core::sync::atomic::{Ordering, fence};
 
 use allocator_api2::alloc::{Allocator, Global};
 use ptr_meta::Pointee;
 
-use crate::internal::chunk::Chunk;
 use crate::internal::chunk_ref::ChunkRef;
-use crate::internal::drop_entry::{self, DropFn};
-use crate::internal::shared_chunk::SharedChunk;
+use crate::internal::constants::refcount_overflow_abort;
 use crate::internal::thin_dst;
 use crate::thin_smart_ptr_common::impl_thin_smart_ptr_common;
 use crate::vec::Vec;
 
+/// Strong-count saturation threshold. Cloning past this aborts the
+/// process, mirroring `std::sync::Arc`'s `MAX_REFCOUNT` guard (using
+/// the `u32` strong counter's half-range instead of `isize::MAX`).
+const MAX_STRONG_REFCOUNT: u32 = u32::MAX >> 1;
+
 /// A thread-safe reference-counted smart pointer to a `T` stored in an [`Arena`](crate::Arena).
 ///
 /// Safe to share across threads when `T: Send + Sync`.
 ///
 /// Created via [`Arena::alloc_arc`](crate::Arena::alloc_arc). Cloning is
-/// **O(1)** and uses a single Relaxed atomic increment (matching
-/// `std::sync::Arc`). Dropping a clone is one Release decrement plus,
-/// on the final dec to zero, an Acquire fence before chunk teardown.
+/// **O(1)** and uses a single Relaxed atomic increment of the `Arc`'s
+/// own strong count (matching `std::sync::Arc`). Dropping a clone is one
+/// Release decrement plus, on the final dec to zero, an Acquire fence,
+/// the value's destructor (`T::drop`), and the release of the chunk
+/// reference.
 ///
-/// `Arc` keeps its containing chunk alive by holding a +1 refcount on
-/// it, so the smart pointer can outlive the arena it came from and
-/// survives [`Arena::reset`](crate::Arena::reset). For `T: Drop`, a
-/// drop entry is registered at allocation time and `T::drop` runs at
-/// chunk teardown (when the chunk's last reference is released); for
-/// `T: !Drop` (the common case for strings, numbers, slices, etc.),
-/// no drop entry is reserved and the only per-allocation cost beyond
-/// the value itself is the chunk's atomic refcount.
+/// Each `Arc` carries its own strong reference count — an
+/// [`AtomicU32`](core::sync::atomic::AtomicU32) stored in the chunk's
+/// payload immediately before the value. The allocation also holds
+/// **one** refcount on its containing chunk for the whole `Arc` family
+/// (all clones share it); that chunk reference is released only when the
+/// last `Arc` drops. This keeps the value alive across
+/// [`Arena::reset`](crate::Arena::reset) and lets the `Arc` outlive the
+/// arena, while running `T::drop` eagerly on the last drop — so nested
+/// `Arc`s (e.g. `Arc<[Arc<T>]>`) release their storage promptly instead
+/// of deferring to chunk teardown.
 ///
 /// # Pinning
 ///
@@ -86,13 +94,17 @@ impl<T: ?Sized + Pointee, A: Allocator + Clone> Arc<T, A> {
     ///
     /// - `thin` must reference the payload of a fully-initialized `T`
     ///   whose storage was bump-allocated from a [`SharedChunk<A>`] via
-    ///   the thin-DST allocator path. For DST `T` the chunk prefix
-    ///   must carry the matching `T::Metadata`. For `T: Drop`, a drop
-    ///   entry must already be registered so the destructor runs at
-    ///   chunk teardown.
+    ///   the strong-prefixed `Arc` allocator path: a per-`Arc`
+    ///   [`AtomicU32`](core::sync::atomic::AtomicU32) strong count must
+    ///   already be initialized in the chunk prefix (see
+    ///   [`thin_dst::strong_ref`](crate::internal::thin_dst::strong_ref)),
+    ///   and for DST `T` the prefix must also carry the matching
+    ///   `T::Metadata`.
     /// - The caller must have just acquired a +1 refcount on that chunk
-    ///   in the new `Arc`'s name; the returned `Arc` takes ownership of
-    ///   that +1 and releases it in [`Drop`].
+    ///   for the new `Arc` family, and the strong count must account for
+    ///   this handle; the returned `Arc` owns that strong reference and
+    ///   releases the chunk +1 (plus runs `T::drop`) when the strong
+    ///   count reaches zero.
     /// - `thin` must lie within the first `CHUNK_ALIGN` bytes of the
     ///   chunk so the header-from-mask helper recovers the chunk
     ///   address correctly.
@@ -134,33 +146,17 @@ impl<T, A: Allocator + Clone> Arc<MaybeUninit<T>, A> {
     /// The `MaybeUninit<T>` must contain a fully-initialized, valid
     /// `T`. The allocation must come from
     /// [`Arena::alloc_uninit_arc`](crate::Arena::alloc_uninit_arc) or
-    /// [`Arena::alloc_zeroed_arc`](crate::Arena::alloc_zeroed_arc) so a
-    /// drop entry was reserved up front;
-    /// `Arena::alloc_arc(MaybeUninit::new(...))` does not reserve one
-    /// and panics here for `T: Drop`.
-    ///
-    /// # Panics
-    ///
-    /// Panics for `T: Drop` when no drop entry is found in the chunk
-    /// — see the safety contract above.
+    /// [`Arena::alloc_zeroed_arc`](crate::Arena::alloc_zeroed_arc).
     #[inline]
     #[must_use]
     pub unsafe fn assume_init(self) -> Arc<T, A> {
-        if const { mem::needs_drop::<T>() } {
-            // SAFETY: `self.ptr` references a live value inside a
-            // `SharedChunk<A>` this `Arc` holds a +1 on; `alloc_uninit_arc`
-            // reserved a placeholder drop entry for it. Commit the real shim
-            // so `T::drop` runs at chunk teardown.
-            unsafe {
-                commit_uninit_drop_entry::<A>(self.ptr, 1, drop_entry::drop_shim::<T>, false);
-            }
-        }
         let thin = self.ptr;
         mem::forget(self);
-        // SAFETY: `thin` carries the +1 the consumed handle held; the value is
-        // now a valid `T` per the caller's contract. `Arc<MaybeUninit<T>>` and
-        // `Arc<T>` for sized `T` share the same chunk layout (no metadata
-        // prefix), so no prefix rewrite is needed.
+        // SAFETY: `thin` carries the strong-count prefix and the live
+        // reference the consumed handle held; the value is now a valid
+        // `T` per the caller's contract. `MaybeUninit<T>` and `T` share
+        // size, alignment, and (empty) metadata, so the strong-prefix
+        // chunk layout is identical and no rewrite is needed.
         unsafe { Arc::from_raw(thin) }
     }
 
@@ -198,33 +194,17 @@ impl<T, A: Allocator + Clone> Arc<[MaybeUninit<T>], A> {
     /// [`Arena::alloc_uninit_slice_arc`](crate::Arena::alloc_uninit_slice_arc)
     /// or
     /// [`Arena::alloc_zeroed_slice_arc`](crate::Arena::alloc_zeroed_slice_arc).
-    ///
-    /// # Panics
-    ///
-    /// Panics for `T: Drop` when no drop entry is found in the chunk.
     #[inline]
     #[must_use]
     pub unsafe fn assume_init(self) -> Arc<[T], A> {
-        // SAFETY: `Arc<[MaybeUninit<T>]>` and `Arc<[T]>` share an
-        // identical chunk prefix layout (the slice length, written as
-        // `usize` by the allocator); read the length from the prefix
-        // directly rather than relying on the (now-thin) `self.ptr`.
-        let len: usize = unsafe { thin_dst::read_metadata::<[T]>(self.ptr) };
-        if const { mem::needs_drop::<T>() } {
-            // SAFETY: see the scalar `assume_init`; the placeholder slice
-            // drop entry reserved by `alloc_uninit_slice_arc` is committed to
-            // `drop_shim::<T>` so all `len` elements drop at chunk teardown.
-            unsafe {
-                commit_uninit_drop_entry::<A>(self.ptr, len, drop_entry::drop_shim::<T>, true);
-            }
-        }
         let thin = self.ptr;
         mem::forget(self);
-        // SAFETY: `thin` carries the +1 the consumed handle held; every
-        // element is now a valid `T` per the caller's contract.
-        // `Arc<[MaybeUninit<T>]>` and `Arc<[T]>` share the same chunk
-        // prefix layout, so the length already stored there matches the
-        // new fat pointer's metadata.
+        // SAFETY: `thin` carries the strong-count prefix and the live
+        // reference the consumed handle held; every element is now a
+        // valid `T`. `[MaybeUninit<T>]` and `[T]` share an identical
+        // chunk prefix layout (the slice length, stored as `usize`), so
+        // the metadata already in the prefix matches the new fat
+        // pointer.
         unsafe { Arc::from_raw(thin) }
     }
 
@@ -249,67 +229,29 @@ impl<T, A: Allocator + Clone> Arc<[MaybeUninit<T>], A> {
     }
 }
 
-/// Locates the placeholder [`DropEntry`](crate::internal::drop_entry) that
-/// `Arena::alloc_uninit_arc` / `alloc_uninit_slice_arc` reserved for the
-/// value at `value` and commits `drop_fn` into it, so the value's destructor
-/// runs when the hosting chunk is torn down.
-///
-/// `len` is `1` for a scalar value or the element count for a slice.
-/// `is_slice` only selects the panic message.
-///
-/// # Safety
-///
-/// - `value` must point at a value reserved via the uninit-`Arc` path, living
-///   in the first `CHUNK_ALIGN` bytes of a live `SharedChunk<A>` on which the
-///   caller holds a strong reference.
-/// - `assume_init` must be called at most once per allocation (the placeholder
-///   commit is a non-atomic write; concurrent commits on cloned handles are
-///   not supported).
-#[inline]
-unsafe fn commit_uninit_drop_entry<A: Allocator + Clone>(value: NonNull<u8>, len: usize, drop_fn: DropFn, is_slice: bool) {
-    let header = SharedChunk::<A>::header_from_value_ptr(value);
-    // SAFETY: `header` has full chunk provenance via `with_addr`;
-    // reconstruct the fat DST pointer for typed field access.
-    let chunk = unsafe { NonNull::new_unchecked(SharedChunk::<A>::header_to_fat(header.as_ptr())) };
-    // SAFETY: `chunk` is a live `SharedChunk<A>` (caller holds a +1).
-    let chunk_ref = unsafe { chunk.as_ref() };
-    // SAFETY: `chunk` is live; `payload_ptr` returns its payload start.
-    let payload = unsafe { SharedChunk::<A>::payload_ptr(chunk) }.as_ptr();
-    let payload_len = chunk_ref.capacity();
-    let value_offset = (value.as_ptr() as usize) - (payload as usize);
-    // Acquire pairs with the owner thread's Release publish of the count in
-    // `ChunkMutator::publish_drop_count`, so the placeholder slot's bytes are
-    // visible to this (possibly different) thread before we read/commit it.
-    let count = chunk_ref.drop_entry_count_acquire();
-    // SAFETY: `payload`, `payload_len`, and `count` describe the live chunk's
-    // drop region; we hold a +1 and the contract forbids concurrent commits.
-    let committed = unsafe { drop_entry::commit_placeholder_drop_fn(payload, payload_len, count, value_offset, len, drop_fn) };
-    assert!(
-        committed,
-        "{}",
-        if is_slice {
-            "Arc::<[MaybeUninit<T>]>::assume_init: no drop entry reserved for this allocation. \
-             Use `Arena::alloc_uninit_slice_arc::<T>()` / `alloc_zeroed_slice_arc`; allocating \
-             a `MaybeUninit<T>` slice via the ordinary slice-Arc helpers does not reserve one \
-             and would silently leak each `T::drop`."
-        } else {
-            "Arc::<MaybeUninit<T>>::assume_init: no drop entry reserved for this allocation. \
-             Use `Arena::alloc_uninit_arc::<T>()` / `alloc_zeroed_arc`; \
-             `Arena::alloc_arc(MaybeUninit::new(...))` does not reserve an entry and would \
-             silently leak `T::drop`."
-        }
-    );
+/// Saturation guard for [`Arc::clone`]: aborts the process when the
+/// strong count would overflow, mirroring `std::sync::Arc`.
+#[cfg_attr(coverage_nightly, coverage(off))]
+#[inline(never)]
+#[cold]
+fn strong_overflow_abort() -> ! {
+    refcount_overflow_abort()
 }
 
 impl<T: ?Sized + Pointee, A: Allocator + Clone> Clone for Arc<T, A> {
     #[inline]
     fn clone(&self) -> Self {
-        // SAFETY: `self` owns a live +1 on its chunk so the chunk is
-        // alive; `clone_from_value_ptr` mints a fresh +1 via an
-        // atomic bump and returns a `ChunkRef` that owns it. We
-        // `forget` that `ChunkRef`, handing the +1 to the new `Arc`.
-        let chunk_ref = unsafe { ChunkRef::<A>::clone_from_value_ptr(self.ptr) };
-        let _ = chunk_ref.forget();
+        let value_align = mem::align_of_val::<T>(&**self);
+        // SAFETY: `self` keeps the value (and its strong-count prefix)
+        // alive, so the strong slot is live, aligned, and within the
+        // chunk's provenance.
+        let strong = unsafe { thin_dst::strong_ref::<T>(self.ptr, value_align) };
+        // Relaxed suffices (as `std::sync::Arc`): the new handle need not
+        // synchronize until it is dropped.
+        let prev = strong.fetch_add(1, Ordering::Relaxed);
+        if prev > MAX_STRONG_REFCOUNT {
+            strong_overflow_abort();
+        }
         Self {
             ptr: self.ptr,
             _phantom: PhantomData,
@@ -320,15 +262,30 @@ impl<T: ?Sized + Pointee, A: Allocator + Clone> Clone for Arc<T, A> {
 impl<T: ?Sized + Pointee, A: Allocator + Clone> Drop for Arc<T, A> {
     #[inline]
     fn drop(&mut self) {
-        // SAFETY: `ptr` is hosted in a 64K-aligned SharedChunk we
-        // hold a +1 strong reference on. `ChunkRef::from_value_ptr`
-        // adopts that +1 and releases it on its own drop. We do not
-        // invoke `T::drop` here — for `T: Drop`, a drop entry was
-        // registered at allocation time so the chunk's teardown runs
-        // `T::drop` when the last reference releases the chunk; for
-        // `T: !Drop` no destructor is needed.
+        let value_align = mem::align_of_val::<T>(&**self);
+        // SAFETY: the value (and its strong-count prefix) is still live
+        // while this handle exists; the strong slot is aligned and
+        // within chunk provenance.
+        let strong = unsafe { thin_dst::strong_ref::<T>(self.ptr, value_align) };
+        // Release so prior accesses happen-before teardown (as `std::sync::Arc`).
+        if strong.fetch_sub(1, Ordering::Release) != 1 {
+            return;
+        }
+        // Last strong reference: Acquire-fence so other handles' writes are
+        // visible before we drop the value and release the chunk.
+        fence(Ordering::Acquire);
+        // Adopt the chunk's +1 *before* `T::drop` so a panicking destructor
+        // still releases the chunk via `ChunkRef`'s `Drop` (the in-chunk slot
+        // leaks, per the `alloc_arc*` panic semantics).
+        //
+        // SAFETY: `ptr` is hosted in a 64K-aligned `SharedChunk` that
+        // holds exactly one outstanding +1 for this whole allocation;
+        // `from_value_ptr` adopts it. The value is a valid `T` and is
+        // dropped exactly once (only on the strong → 0 transition).
         unsafe {
-            let _ref: ChunkRef<A> = ChunkRef::from_value_ptr(self.ptr);
+            let _chunk: ChunkRef<A> = ChunkRef::from_value_ptr(self.ptr);
+            let fat = self.as_fat_ptr();
+            ptr::drop_in_place(fat.as_ptr());
         }
     }
 }
@@ -351,3 +308,72 @@ where
         v.freeze_into_arc()
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::Arena;
+
+    // Pins the saturation threshold to the `u32` half-range, killing the
+    // mutant that swaps `>>` for `<<` in the constant (which would yield
+    // `0xFFFF_FFFE`). Behavioral tests cannot reach this — the boundary
+    // sits ~2 billion clones away — so assert the value directly.
+    #[test]
+    fn max_strong_refcount_is_u32_half_range() {
+        assert_eq!(MAX_STRONG_REFCOUNT, u32::MAX >> 1);
+        assert_eq!(MAX_STRONG_REFCOUNT, 0x7FFF_FFFF);
+    }
+
+    // `Arc::clone` checks `prev > MAX_STRONG_REFCOUNT` on the value
+    // returned by `fetch_add` (the count *before* the increment), so a
+    // clone observing `prev == MAX_STRONG_REFCOUNT` must NOT abort.
+    // Driving the strong count to exactly the threshold and cloning kills
+    // the `>` -> `==` and `>` -> `>=` mutants on that comparison: both
+    // would abort the process here.
+    #[test]
+    fn clone_at_max_refcount_threshold_does_not_abort() {
+        let arena = Arena::new();
+        let arc = arena.alloc_arc(0xABCD_u32);
+        // SAFETY: `arc` keeps the value and its strong-count prefix live,
+        // so the strong slot is aligned and within chunk provenance.
+        let strong = unsafe { thin_dst::strong_ref::<u32>(arc.thin_ptr(), mem::align_of::<u32>()) };
+        // Force the next clone to observe `prev == MAX_STRONG_REFCOUNT`.
+        strong.store(MAX_STRONG_REFCOUNT, Ordering::Relaxed);
+        #[expect(
+            clippy::redundant_clone,
+            reason = "exercising Arc::clone's overflow guard at the threshold is the point of the test"
+        )]
+        let clone = arc.clone();
+        assert_eq!(*clone, 0xABCD);
+        // Restore the true live-handle count (`arc` + `clone`) so the two
+        // drops tear the value and chunk down correctly instead of
+        // leaking the strong count above 1 forever.
+        strong.store(2, Ordering::Relaxed);
+    }
+
+    // A clone observing `prev > MAX_STRONG_REFCOUNT` MUST abort. Driving
+    // the strong count one past the threshold reaches the
+    // `strong_overflow_abort()` call site in `Arc::clone` (which panics
+    // instead of aborting under `cfg(test)`), covering that guard and
+    // killing the `>` -> `==` mutant (which would not fire here).
+    #[test]
+    #[should_panic(expected = "refcount overflow")]
+    fn clone_above_max_refcount_threshold_aborts() {
+        let arena = Arena::new();
+        let arc = arena.alloc_arc(0xABCD_u32);
+        // SAFETY: `arc` keeps the value and its strong-count prefix live,
+        // so the strong slot is aligned and within chunk provenance.
+        let strong = unsafe { thin_dst::strong_ref::<u32>(arc.thin_ptr(), mem::align_of::<u32>()) };
+        strong.store(MAX_STRONG_REFCOUNT + 1, Ordering::Relaxed);
+        // The clone panics in its overflow guard before returning, so no
+        // clone is produced (but `fetch_add` already bumped the count).
+        // Catch it, restore the real live-handle count (just `arc`) so
+        // teardown releases the chunk instead of leaking (keeps Miri
+        // happy), then resume so `should_panic` observes the panic.
+        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
+            let _c = arc.clone();
+        }));
+        strong.store(1, Ordering::Relaxed);
+        std::panic::resume_unwind(result.expect_err("clone past the threshold must panic"));
+    }
+}
diff --git a/crates/multitude/src/arena/alloc_prefixed.rs b/crates/multitude/src/arena/alloc_prefixed.rs
index ee29c271f..1c2bee081 100644
--- a/crates/multitude/src/arena/alloc_prefixed.rs
+++ b/crates/multitude/src/arena/alloc_prefixed.rs
@@ -83,6 +83,15 @@ impl<A: Allocator + Clone> Arena<A> {
         // recovery invariant used by the smart pointers' `Drop`.
         let payload_bytes = len.checked_mul(elem_size).ok_or(AllocError)?.max(elem_align);
         let total = PREFIX_BYTES.checked_add(payload_bytes).ok_or(AllocError)?;
+        // `total` is an exact reservation size, not a worst-case hint: unlike
+        // the slice paths (which permit over-aligned `T` and so add `elem_align`
+        // of front-padding slack to their routing hint), the const-assert above
+        // bounds `elem_align <= align_of::<usize>() <= value_align`. A fresh
+        // chunk's payload base is `value_align`-aligned, so an `elem_align`
+        // reservation on a freshly refilled chunk never consumes front padding.
+        // Routing/refilling with `total` therefore always yields a chunk into
+        // which the retry's `try_alloc_with_chunk(total, elem_align)` fits — no
+        // `total` vs `total + elem_align` boundary loop is possible here.
         loop {
             // Allocate `total` bytes aligned to `align_of::<T>()` so the
             // payload (at offset PREFIX_BYTES, a multiple of any align
@@ -94,7 +103,7 @@ impl<A: Allocator + Clone> Arena<A> {
                 let _ = chunk_ref.forget();
                 return Ok(payload);
             }
-            if self.is_oversized_shared(total) {
+            if self.is_oversized(total) {
                 return self.alloc_oversized_shared_with(total, |mutator, chunk_ptr| {
                     let (base, _chunk_unused) = mutator
                         .try_alloc_with_chunk(total, elem_align)
@@ -110,6 +119,69 @@ impl<A: Allocator + Clone> Arena<A> {
     }
 }
 
+impl<A: Allocator + Clone> Arena<A> {
+    /// Strong-prefixed [`Arc`](crate::Arc) variant of
+    /// [`Self::impl_alloc_prefixed_shared`]: reserves a per-`Arc`
+    /// [`AtomicU32`](core::sync::atomic::AtomicU32) strong count and a
+    /// `usize` length metadata word immediately before the payload,
+    /// initializes the strong count to `1`, writes the length and the
+    /// payload, takes one chunk refcount for the new `Arc` family, and
+    /// returns a thin `NonNull<T>` to the first payload element.
+    ///
+    /// `T` must have `align_of::<T>() <= align_of::<usize>()`; see
+    /// module docs.
+    #[inline(always)]
+    pub(crate) fn impl_alloc_prefixed_shared_arc<T: Copy>(&self, src: &[T]) -> Result<NonNull<T>, AllocError> {
+        const {
+            assert!(
+                mem::align_of::<T>() <= mem::align_of::<usize>(),
+                "impl_alloc_prefixed_shared_arc: T's align must not exceed usize's align",
+            );
+        }
+        let len = src.len();
+        // `src` is a live `&[T]`, so `size_of_val(src)` is a valid usize.
+        let payload_bytes = mem::size_of_val(src);
+        let bytes_needed = worst_case_arc_slice_payload::<T>(len);
+        loop {
+            // SAFETY: `payload_bytes == size_of_val(src) == size_of::<T>() * len`.
+            let reserved = unsafe { self.try_reserve_arc_slice_with_size::<T>(len, payload_bytes) };
+            if let Some((uninit, chunk_ptr)) = reserved {
+                let chunk_ref: ChunkRef<A> = self.acquire_current_shared_chunk_ref(chunk_ptr);
+                let slice_ptr = uninit.init_copy_from_slice_ptr(src);
+                let _ = chunk_ref.forget();
+                return Ok(slice_ptr.cast::<T>());
+            }
+            if self.is_oversized(bytes_needed) {
+                return self.alloc_oversized_shared_with(bytes_needed, |mutator, chunk_ptr| {
+                    let (ticket, _chunk) = mutator
+                        .try_alloc_arc_slice::<T>(len)
+                        .expect("dedicated oversized chunk sized to fit prefixed Arc payload");
+                    let chunk_ref: ChunkRef<A> = acquire_shared_chunk_ref::<A>(chunk_ptr);
+                    let slice_ptr = ticket.init_copy_from_slice_ptr(src);
+                    let _ = chunk_ref.forget();
+                    slice_ptr.cast::<T>()
+                });
+            }
+            self.refill_shared(bytes_needed)?;
+        }
+    }
+}
+
+/// Worst-case byte budget for a strong-prefixed `Arc` slice/prefixed
+/// payload of `len` elements: per-`Arc` strong count + slice-length
+/// prefix + payload + front alignment slack. Shared by the `Arc<[T]>`,
+/// `Arc<str>`, and `ArcUtf16Str` allocation paths.
+#[cfg_attr(test, mutants::skip)] // underestimating refill hint ⇒ refill spin
+#[inline]
+pub(crate) fn worst_case_arc_slice_payload<T>(len: usize) -> usize {
+    use crate::internal::thin_dst;
+    let align = mem::align_of::<T>();
+    let value_bytes = mem::size_of::<T>().saturating_mul(len).max(1);
+    thin_dst::strong_prefix_bytes_for(align, mem::size_of::<usize>())
+        .saturating_add(value_bytes)
+        .saturating_add(thin_dst::arc_block_align(align))
+}
+
 /// Write the length prefix (unaligned `usize`) at `base` and copy
 /// `src` immediately after, returning a thin pointer to the first
 /// payload element.
diff --git a/crates/multitude/src/arena/alloc_slice_arc.rs b/crates/multitude/src/arena/alloc_slice_arc.rs
index c0a2b6b0c..9d6fcdcb3 100644
--- a/crates/multitude/src/arena/alloc_slice_arc.rs
+++ b/crates/multitude/src/arena/alloc_slice_arc.rs
@@ -10,7 +10,7 @@ use core::pin::Pin;
 
 use allocator_api2::alloc::{AllocError, Allocator};
 
-use super::alloc_prefixed::worst_case_thin_slice_payload;
+use super::alloc_prefixed::worst_case_arc_slice_payload;
 use super::alloc_value::{MAX_SMART_PTR_ALIGN, acquire_shared_chunk_ref};
 use super::{Arena, ExpectAlloc};
 use crate::arc::Arc;
@@ -149,34 +149,34 @@ impl<A: Allocator + Clone> Arena<A> {
     }
 
     /// Arc + Copy: no element-drop runs, but we still take an Arc-owned
-    /// refcount on the chunk.
+    /// refcount on the chunk and reserve the strong-count prefix.
     #[inline]
     fn impl_alloc_slice_arc_copy<T: Copy>(&self, src: &[T]) -> Result<Arc<[T], A>, AllocError> {
-        check_slice_arc_layout::<T>(src.len())?;
+        check_slice_arc_layout::<T>()?;
         let len = src.len();
-        // Copy is never `Drop`, so use the no-drop reservation.
-        let bytes_needed = worst_case_thin_slice_payload::<T>(len);
+        let bytes_needed = worst_case_arc_slice_payload::<T>(len);
         // `src` is a live `&[T]`, so `size_of_val(src)` is a valid
         // `usize`. Hoisting the precomputed byte size lets the inner
         // reservation helper skip the `checked_mul` overflow guard.
         let payload_bytes = mem::size_of_val(src);
         loop {
             // SAFETY: `payload_bytes == size_of_val(src) == size_of::<T>() * len`.
-            let reserved = unsafe { self.try_reserve_shared_slice_with_size::<T>(len, payload_bytes) };
+            let reserved = unsafe { self.try_reserve_arc_slice_with_size::<T>(len, payload_bytes) };
             if let Some((uninit, chunk_ptr)) = reserved {
                 let chunk_ref = self.acquire_current_shared_chunk_ref(chunk_ptr);
                 let slice_ptr = uninit.init_copy_from_slice_ptr(src);
                 let _ = chunk_ref.forget();
                 // SAFETY: `slice_ptr` points to `len` initialized `T`s in a
-                // shared chunk with a fresh +1; `Arc::from_raw` adopts that
-                // +1. Chunk-wide provenance preserved via `init_copy_from_slice_ptr`.
+                // shared chunk with a fresh +1 and an initialized strong
+                // prefix; `Arc::from_raw` adopts that family. Chunk-wide
+                // provenance preserved via `init_copy_from_slice_ptr`.
                 return Ok(unsafe { Arc::from_raw(slice_ptr.cast::<u8>()) });
             }
-            if self.is_oversized_shared(bytes_needed) {
+            if self.is_oversized(bytes_needed) {
                 return self.alloc_oversized_shared_with(bytes_needed, |mutator, chunk_ptr| {
-                    let ticket = mutator
-                        .try_alloc_uninit_slice_prefixed::<T>(len)
-                        .expect("dedicated oversized chunk sized to fit slice");
+                    let (ticket, _chunk) = mutator
+                        .try_alloc_arc_slice::<T>(len)
+                        .expect("dedicated oversized chunk sized to fit slice + strong prefix");
                     let chunk_ref = acquire_shared_chunk_ref::<A>(chunk_ptr);
                     let slice_ptr = ticket.init_copy_from_slice_ptr(src);
                     let _ = chunk_ref.forget();
@@ -188,32 +188,16 @@ impl<A: Allocator + Clone> Arena<A> {
         }
     }
 
-    /// Arc + closure fill: records a chunk drop entry when `T: Drop`,
-    /// so the chunk's teardown runs `T::drop` on each element after the
-    /// last `Arc` releases.
+    /// Arc + closure fill: `T::drop` (if any) runs eagerly in
+    /// [`Arc::drop`](crate::Arc) on the last reference via
+    /// `drop_in_place::<[T]>`, so no chunk drop entry is reserved.
     #[inline]
     fn impl_alloc_slice_arc_with<T, F: FnMut(usize) -> T>(&self, len: usize, f: F) -> Result<Arc<[T], A>, AllocError> {
-        check_slice_arc_layout::<T>(len)?;
-        // Refill hint accounts for the length prefix, payload alignment
-        // slack, payload bytes, and (for `T: Drop`) a drop-entry slot.
-        let bytes_needed = worst_case_thin_slice_payload::<T>(len);
+        check_slice_arc_layout::<T>()?;
+        let bytes_needed = worst_case_arc_slice_payload::<T>(len);
         let mut f = Some(f);
         loop {
-            // Branch on needs_drop at const time so monomorphizations
-            // pick the right reservation helper.
-            if const { mem::needs_drop::<T>() } {
-                if let Some((uninit, chunk_ptr)) = self.try_reserve_shared_slice_with_drop::<T>(len) {
-                    let chunk_ref = self.acquire_current_shared_chunk_ref(chunk_ptr);
-                    let f = f.take().expect("with closure taken twice");
-                    let slice_ptr = uninit.init_with_ptr(f);
-                    let _ = chunk_ref.forget();
-                    // SAFETY: see `impl_alloc_slice_arc_copy`; the drop entry
-                    // was committed by `init_with_ptr` for the chunk-teardown
-                    // path. `slice_ptr` carries chunk-wide provenance so the
-                    // Arc's later `byte_sub` to the chunk header is sound.
-                    return Ok(unsafe { Arc::from_raw(slice_ptr.cast::<u8>()) });
-                }
-            } else if let Some((uninit, chunk_ptr)) = self.try_reserve_shared_slice::<T>(len) {
+            if let Some((uninit, chunk_ptr)) = self.try_reserve_arc_slice::<T>(len) {
                 let chunk_ref = self.acquire_current_shared_chunk_ref(chunk_ptr);
                 let f = f.take().expect("with closure taken twice");
                 let slice_ptr = uninit.init_with_ptr(f);
@@ -222,27 +206,16 @@ impl<A: Allocator + Clone> Arena<A> {
                 // provenance preserved via `init_with_ptr`.
                 return Ok(unsafe { Arc::from_raw(slice_ptr.cast::<u8>()) });
             }
-            if self.is_oversized_shared(bytes_needed) {
+            if self.is_oversized(bytes_needed) {
                 let fclosure = f.take().expect("with closure taken twice");
                 return self.alloc_oversized_shared_with(bytes_needed, |mutator, chunk_ptr| {
-                    let slice_ptr = if const { mem::needs_drop::<T>() } {
-                        let ticket = mutator
-                            .try_alloc_uninit_slice_with_drop_prefixed::<T>(len)
-                            .expect("dedicated oversized chunk sized to fit slice + drop entry");
-                        let chunk_ref = acquire_shared_chunk_ref::<A>(chunk_ptr);
-                        let p = ticket.init_with_ptr(fclosure);
-                        let _ = chunk_ref.forget();
-                        p
-                    } else {
-                        let ticket = mutator
-                            .try_alloc_uninit_slice_prefixed::<T>(len)
-                            .expect("dedicated oversized chunk sized to fit slice");
-                        let chunk_ref = acquire_shared_chunk_ref::<A>(chunk_ptr);
-                        let p = ticket.init_with_ptr(fclosure);
-                        let _ = chunk_ref.forget();
-                        p
-                    };
-                    // SAFETY: see the non-oversized branches above.
+                    let (ticket, _chunk) = mutator
+                        .try_alloc_arc_slice::<T>(len)
+                        .expect("dedicated oversized chunk sized to fit slice + strong prefix");
+                    let chunk_ref = acquire_shared_chunk_ref::<A>(chunk_ptr);
+                    let slice_ptr = ticket.init_with_ptr(fclosure);
+                    let _ = chunk_ref.forget();
+                    // SAFETY: see the non-oversized branch above.
                     unsafe { Arc::from_raw(slice_ptr.cast::<u8>()) }
                 });
             }
@@ -289,25 +262,15 @@ impl<A: Allocator + Clone> Arena<A> {
     }
 }
 
-/// Common up-front checks for the `Arc<[T]>` slice family. Rejects
-/// over-aligned `T` (would break the smart-pointer header recovery) and
-/// `T: Drop` slices whose `len > u16::MAX` (the chunk drop entry packs
-/// the element count into a `u16`).
-//
-// Mutation testing is suppressed here: any mutation that bypasses the
-// `len > u16::MAX` rejection (e.g. `&&`→`||`, `>`→`==`) sends the
-// caller's refill loop into an unbounded chunk-allocation spin (see the
-// detailed note in `alloc_slice_ref::reject_drop_slice_too_long`).
-// Correctness is exercised by integration tests in `coverage_gaps.rs`,
-// `arena.rs`, and `mutants_extras.rs`.
-#[cfg_attr(test, mutants::skip)]
+/// Up-front check for the `Arc<[T]>` slice family. Rejects over-aligned
+/// `T` (would break the smart-pointer header recovery). Unlike the
+/// old drop-entry design, there is no `len > u16::MAX` restriction:
+/// element destructors run via `drop_in_place::<[T]>` in
+/// [`Arc::drop`](crate::Arc), not a `u16`-counted chunk drop entry.
 #[inline]
-fn check_slice_arc_layout<T>(len: usize) -> Result<(), AllocError> {
+fn check_slice_arc_layout<T>() -> Result<(), AllocError> {
     if mem::align_of::<T>() >= MAX_SMART_PTR_ALIGN {
         return Err(AllocError);
     }
-    if mem::needs_drop::<T>() && len > u16::MAX as usize {
-        return Err(AllocError);
-    }
     Ok(())
 }
diff --git a/crates/multitude/src/arena/alloc_slice_box.rs b/crates/multitude/src/arena/alloc_slice_box.rs
index dcfa0553f..e76956411 100644
--- a/crates/multitude/src/arena/alloc_slice_box.rs
+++ b/crates/multitude/src/arena/alloc_slice_box.rs
@@ -163,19 +163,15 @@ impl<A: Allocator + Clone> Arena<A> {
     fn impl_alloc_slice_box_copy<T: Copy>(&self, src: &[T]) -> Result<Box<[T], A>, AllocError> {
         check_slice_box_layout::<T>(src.len())?;
         let len = src.len();
-        // `src` is a live `&[T]`, so `size_of_val(src)` is a valid
-        // `usize`. Hoisting it past the refill loop spares the inner
-        // reservation a `checked_mul` overflow guard.
+        // Precompute byte size so the reservation helper skips checked_mul.
         let payload_bytes = mem::size_of_val(src);
         let ptr = self.reserve_slice_box::<T>(len, payload_bytes, |slot_ptr| {
             // SAFETY: `slot_ptr` is the reservation start; `len` elements
             // of `T` fit by construction.
             unsafe { ptr::copy_nonoverlapping(src.as_ptr(), slot_ptr, len) };
         })?;
-        // `ptr` points to `len` initialized `T`s in a shared chunk that
-        // has a fresh +1; `Box::from_raw` adopts that +1 and `Box::drop` runs
-        // `drop_in_place` on the slice when the smart pointer is dropped.
-        // SAFETY: see above.
+        // SAFETY: `ptr` points to `len` initialized `T`s in a shared
+        // chunk with a fresh +1; `Box::from_raw` adopts that +1.
         Ok(unsafe { Box::from_raw(ptr.cast::<u8>()) })
     }
 
@@ -184,10 +180,7 @@ impl<A: Allocator + Clone> Arena<A> {
     #[inline]
     fn impl_alloc_slice_box_with<T, F: FnMut(usize) -> T>(&self, len: usize, mut f: F) -> Result<Box<[T], A>, AllocError> {
         check_slice_box_layout::<T>(len)?;
-        // Caller-provided `len`: must overflow-check the payload size
-        // up front so the hot loop can skip the `checked_mul`. On
-        // overflow we report `AllocError` immediately rather than spin
-        // refilling.
+        // Check overflow before the refill loop.
         let payload_bytes = mem::size_of::<T>().checked_mul(len).ok_or(AllocError)?;
         let ptr = self.reserve_slice_box::<T>(len, payload_bytes, |slot_ptr| {
             // SAFETY: `slot_ptr` is the reservation start; we init `len` slots
@@ -243,7 +236,7 @@ impl<A: Allocator + Clone> Arena<A> {
                 let _ = chunk_ref.forget();
                 return Ok(base);
             }
-            if self.is_oversized_shared(bytes_needed) {
+            if self.is_oversized(bytes_needed) {
                 let init_owned = init.take().expect("reserve_slice_box init taken twice");
                 return self.alloc_oversized_shared_with(bytes_needed, |mutator, chunk_ptr| {
                     let ticket = mutator
@@ -292,29 +285,18 @@ impl<A: Allocator + Clone> Arena<A> {
     }
 }
 
-/// Common up-front checks for the `Box<[T]>` slice family. `Box::drop`
-/// runs `drop_in_place` on the entire slice eagerly, so no chunk drop
-/// entry is recorded; however we still reject `T: Drop` slices with
-/// `len > u16::MAX` so a future `Box<[T]> -> Arc<[T]>` conversion has
-/// a slot to populate (parity with the `alloc_dst_box` guard).
-//
-// Mutation testing is suppressed: bypassing the `len > u16::MAX`
-// rejection sends the caller's refill loop into an unbounded
-// chunk-allocation spin (see `alloc_slice_ref::reject_drop_slice_too_long`).
-#[cfg_attr(test, mutants::skip)]
+/// Up-front check for `Box<[T]>`: reject alignments that break
+/// smart-pointer header recovery. Slice length is full-width in the
+/// chunk prefix.
 #[inline]
-fn check_slice_box_layout<T>(len: usize) -> Result<(), AllocError> {
+fn check_slice_box_layout<T>(_len: usize) -> Result<(), AllocError> {
     if mem::align_of::<T>() >= MAX_SMART_PTR_ALIGN {
         return Err(AllocError);
     }
-    if mem::needs_drop::<T>() && len > u16::MAX as usize {
-        return Err(AllocError);
-    }
     Ok(())
 }
 
-/// Drop-guard for partial init in `alloc_slice_*_box`. Mirrors the
-/// `InitGuard` in `internal::uninit`.
+/// Drop guard for partially initialized boxed slices.
 struct InitGuard<T> {
     dst: *mut T,
     initialized: usize,
diff --git a/crates/multitude/src/arena/alloc_slice_ref.rs b/crates/multitude/src/arena/alloc_slice_ref.rs
index ea96a67e3..ee0ee4d4a 100644
--- a/crates/multitude/src/arena/alloc_slice_ref.rs
+++ b/crates/multitude/src/arena/alloc_slice_ref.rs
@@ -290,7 +290,7 @@ impl<A: Allocator + Clone> Arena<A> {
     #[cfg_attr(test, mutants::skip)]
     fn refill_or_alloc_oversized_slice_copy<T: Copy>(&self, src: &[T]) -> Result<Option<&mut [T]>, AllocError> {
         let refill_hint = worst_case_slice_payload::<T>(src.len());
-        if self.is_oversized_local(refill_hint) {
+        if self.is_oversized(refill_hint) {
             return Ok(Some(self.alloc_oversized_slice_copy::<T>(refill_hint, src)?));
         }
         self.refill_local(refill_hint)?;
@@ -363,7 +363,7 @@ impl<A: Allocator + Clone> Arena<A> {
     fn refill_or_alloc_oversized_slice_clone<T: Clone>(&self, src: &[T]) -> Result<Option<&mut [T]>, AllocError> {
         let len = src.len();
         let refill_hint = worst_case_slice_payload::<T>(len);
-        if self.is_oversized_local(refill_hint) {
+        if self.is_oversized(refill_hint) {
             let mut ptr = self.alloc_oversized_local_with(refill_hint, |mutator| {
                 if const { mem::needs_drop::<T>() } {
                     let ticket = mutator
@@ -413,7 +413,7 @@ impl<A: Allocator + Clone> Arena<A> {
                 let f = f.take().expect("with closure taken twice");
                 return Ok(u.init_with(f));
             }
-            if self.is_oversized_local(refill_hint) {
+            if self.is_oversized(refill_hint) {
                 let f = f.take().expect("with closure taken twice");
                 let mut ptr = self.alloc_oversized_local_with(refill_hint, |mutator| {
                     if const { mem::needs_drop::<T>() } {
@@ -468,7 +468,7 @@ impl<A: Allocator + Clone> Arena<A> {
                 let it = iter.take().expect("iterator taken twice");
                 return Ok(u.init_from_iter(it));
             }
-            if self.is_oversized_local(refill_hint) {
+            if self.is_oversized(refill_hint) {
                 let mut it = iter.take().expect("iterator taken twice");
                 let mut ptr = self.alloc_oversized_local_with(refill_hint, |mutator| {
                     if const { mem::needs_drop::<T>() } {
diff --git a/crates/multitude/src/arena/alloc_str.rs b/crates/multitude/src/arena/alloc_str.rs
index efe2ab9b3..08eb368d7 100644
--- a/crates/multitude/src/arena/alloc_str.rs
+++ b/crates/multitude/src/arena/alloc_str.rs
@@ -120,7 +120,7 @@ impl<A: Allocator + Clone> Arena<A> {
     where
         A: Send + Sync,
     {
-        self.impl_alloc_prefixed_shared::<u8>(s.as_ref().as_bytes()).map(|ptr|
+        self.impl_alloc_prefixed_shared_arc::<u8>(s.as_ref().as_bytes()).map(|ptr|
             // SAFETY: see `Self::alloc_str_arc`.
             unsafe { Arc::from_raw(ptr) })
     }
@@ -150,7 +150,7 @@ impl<A: Allocator + Clone> Arena<A> {
             if let Some(u) = self.try_reserve_local_bytes(len) {
                 return Ok(u.init_copy_from_str(s));
             }
-            if self.is_oversized_local(len) {
+            if self.is_oversized(len) {
                 let ptr = self.alloc_oversized_local_with(len, |mutator| {
                     let ticket = mutator.try_alloc_bytes(len).expect("dedicated oversized chunk sized to fit string");
                     // `init_copy_from_str` returns `&mut str` bound to the
diff --git a/crates/multitude/src/arena/alloc_uninit.rs b/crates/multitude/src/arena/alloc_uninit.rs
index f05c47eb3..a11e61b5c 100644
--- a/crates/multitude/src/arena/alloc_uninit.rs
+++ b/crates/multitude/src/arena/alloc_uninit.rs
@@ -7,13 +7,12 @@
 //! groups the `alloc_uninit_*` / `alloc_zeroed_*` family together to
 //! keep the central `mod.rs` smaller.
 
-use core::mem;
 use core::mem::MaybeUninit;
 use core::pin::Pin;
 
 use allocator_api2::alloc::{AllocError, Allocator};
 
-use super::{Arena, ExpectAlloc};
+use super::Arena;
 use crate::arc::Arc;
 use crate::r#box::Box;
 
@@ -185,9 +184,11 @@ impl<A: Allocator + Clone> Arena<A> {
     /// Allocate uninitialized space for a `T` and return an
     /// [`Arc<MaybeUninit<T>, A>`](crate::Arc).
     ///
-    /// For `T: Drop`, this reserves a placeholder drop entry. Dropping
-    /// `Arc<MaybeUninit<T>>` without `assume_init` is sound; `assume_init`
-    /// commits the entry so a later `Arc<T>` drop runs `T::drop`.
+    /// No drop entry is reserved. Dropping `Arc<MaybeUninit<T>>` without
+    /// `assume_init` is sound (`MaybeUninit<T>` has no drop glue); after
+    /// `assume_init`, dropping the last `Arc<T>` runs `T::drop` eagerly
+    /// via `drop_in_place::<T>` (see [`Arc`](crate::Arc)'s per-pointer
+    /// reference counting).
     ///
     /// # Panics
     ///
@@ -200,11 +201,7 @@ impl<A: Allocator + Clone> Arena<A> {
         A: Send + Sync,
         T: Send + Sync,
     {
-        if const { mem::needs_drop::<T>() } {
-            (self.impl_alloc_uninit_arc::<T>(false)).expect_alloc()
-        } else {
-            self.alloc_arc_with::<MaybeUninit<T>, _>(MaybeUninit::uninit)
-        }
+        self.alloc_arc_with::<MaybeUninit<T>, _>(MaybeUninit::uninit)
     }
 
     /// Fallible variant of [`Self::alloc_uninit_arc`].
@@ -219,11 +216,7 @@ impl<A: Allocator + Clone> Arena<A> {
         A: Send + Sync,
         T: Send + Sync,
     {
-        if const { mem::needs_drop::<T>() } {
-            self.impl_alloc_uninit_arc::<T>(false)
-        } else {
-            self.try_alloc_arc_with::<MaybeUninit<T>, _>(MaybeUninit::uninit)
-        }
+        self.try_alloc_arc_with::<MaybeUninit<T>, _>(MaybeUninit::uninit)
     }
 
     /// Like [`Self::alloc_uninit_arc`] but the value bytes are zeroed.
@@ -239,11 +232,7 @@ impl<A: Allocator + Clone> Arena<A> {
         A: Send + Sync,
         T: Send + Sync,
     {
-        if const { mem::needs_drop::<T>() } {
-            (self.impl_alloc_uninit_arc::<T>(true)).expect_alloc()
-        } else {
-            self.alloc_arc_with::<MaybeUninit<T>, _>(MaybeUninit::zeroed)
-        }
+        self.alloc_arc_with::<MaybeUninit<T>, _>(MaybeUninit::zeroed)
     }
 
     /// Fallible variant of [`Self::alloc_zeroed_arc`].
@@ -258,20 +247,17 @@ impl<A: Allocator + Clone> Arena<A> {
         A: Send + Sync,
         T: Send + Sync,
     {
-        if const { mem::needs_drop::<T>() } {
-            self.impl_alloc_uninit_arc::<T>(true)
-        } else {
-            self.try_alloc_arc_with::<MaybeUninit<T>, _>(MaybeUninit::zeroed)
-        }
+        self.try_alloc_arc_with::<MaybeUninit<T>, _>(MaybeUninit::zeroed)
     }
 
     /// Allocate `len` uninitialized `T` slots and return an
     /// [`Arc<[MaybeUninit<T>], A>`](crate::Arc).
     ///
-    /// For `T: Drop`, this reserves a placeholder slice drop entry.
-    /// Dropping `Arc<[MaybeUninit<T>]>` without `assume_init` is sound;
-    /// `assume_init` commits the entry so dropping `Arc<[T]>` runs element
-    /// destructors.
+    /// No drop entry is reserved. Dropping `Arc<[MaybeUninit<T>]>`
+    /// without `assume_init` is sound (`MaybeUninit<T>` has no drop
+    /// glue); after `assume_init`, dropping the last `Arc<[T]>` runs the
+    /// element destructors eagerly via `drop_in_place::<[T]>` (see
+    /// [`Arc`](crate::Arc)'s per-pointer reference counting).
     ///
     /// # Panics
     ///
@@ -284,11 +270,7 @@ impl<A: Allocator + Clone> Arena<A> {
         A: Send + Sync,
         T: Send + Sync,
     {
-        if const { mem::needs_drop::<T>() } {
-            (self.impl_alloc_uninit_slice_arc::<T>(len, false)).expect_alloc()
-        } else {
-            self.alloc_slice_fill_with_arc::<MaybeUninit<T>, _>(len, |_| MaybeUninit::uninit())
-        }
+        self.alloc_slice_fill_with_arc::<MaybeUninit<T>, _>(len, |_| MaybeUninit::uninit())
     }
 
     /// Fallible variant of [`Self::alloc_uninit_slice_arc`].
@@ -303,11 +285,7 @@ impl<A: Allocator + Clone> Arena<A> {
         A: Send + Sync,
         T: Send + Sync,
     {
-        if const { mem::needs_drop::<T>() } {
-            self.impl_alloc_uninit_slice_arc::<T>(len, false)
-        } else {
-            self.try_alloc_slice_fill_with_arc::<MaybeUninit<T>, _>(len, |_| MaybeUninit::uninit())
-        }
+        self.try_alloc_slice_fill_with_arc::<MaybeUninit<T>, _>(len, |_| MaybeUninit::uninit())
     }
 
     /// Like [`Self::alloc_uninit_slice_arc`] but the slice bytes are zeroed.
@@ -323,11 +301,7 @@ impl<A: Allocator + Clone> Arena<A> {
         A: Send + Sync,
         T: Send + Sync,
     {
-        if const { mem::needs_drop::<T>() } {
-            (self.impl_alloc_uninit_slice_arc::<T>(len, true)).expect_alloc()
-        } else {
-            self.alloc_slice_fill_with_arc::<MaybeUninit<T>, _>(len, |_| MaybeUninit::zeroed())
-        }
+        self.alloc_slice_fill_with_arc::<MaybeUninit<T>, _>(len, |_| MaybeUninit::zeroed())
     }
 
     /// Fallible variant of [`Self::alloc_zeroed_slice_arc`].
@@ -342,11 +316,7 @@ impl<A: Allocator + Clone> Arena<A> {
         A: Send + Sync,
         T: Send + Sync,
     {
-        if const { mem::needs_drop::<T>() } {
-            self.impl_alloc_uninit_slice_arc::<T>(len, true)
-        } else {
-            self.try_alloc_slice_fill_with_arc::<MaybeUninit<T>, _>(len, |_| MaybeUninit::zeroed())
-        }
+        self.try_alloc_slice_fill_with_arc::<MaybeUninit<T>, _>(len, |_| MaybeUninit::zeroed())
     }
 
     /// Allocate `len` uninitialized `T` slots and return an
diff --git a/crates/multitude/src/arena/alloc_unsized.rs b/crates/multitude/src/arena/alloc_unsized.rs
index 1d30ce9e0..c11706a71 100644
--- a/crates/multitude/src/arena/alloc_unsized.rs
+++ b/crates/multitude/src/arena/alloc_unsized.rs
@@ -4,13 +4,15 @@
 //! DST (unsized) value allocation API on [`Arena`].
 //!
 //! Implements `alloc_dst_arc`, `alloc_dst_box` and their `try_*`
-//! variants under the `dst` Cargo feature. The trailing drop entry
-//! stores the pointer-metadata as a `u16`, which limits supported DSTs
-//! to those whose pointer-metadata is either zero-sized (sized `T`) or
-//! `usize`-sized AND fits in `u16` (slices of length up to
-//! `u16::MAX`). For drop-aware slices with more than `u16::MAX`
-//! elements, the non-DST `alloc_slice_arc` / `_box` family stores the
-//! length in a separate prefix word and has no such cap.
+//! variants under the `dst` Cargo feature. The pointer-metadata is
+//! stored verbatim in the chunk prefix (immediately before the
+//! payload), so supported DSTs are those whose metadata is either
+//! zero-sized (sized `T`) or `usize`-sized (slice DSTs and trait
+//! objects). `Arc` runs `T`'s destructor eagerly on the last clone via
+//! `drop_in_place::<T>`; `Box` does so in its own `Drop`. Neither
+//! family caps the metadata width for `T: Drop`: both drop via
+//! `drop_in_place` on a full-width fat pointer, so a `Drop` trait
+//! object or a slice longer than `u16::MAX` is accepted by both.
 
 use core::alloc::Layout;
 use core::mem;
@@ -25,7 +27,6 @@ use super::{Arena, ExpectAlloc};
 use crate::arc::Arc;
 use crate::r#box::Box;
 use crate::internal::constants::max_smart_ptr_align;
-use crate::internal::drop_entry::DropFn;
 
 /// Maximum `layout.align()` accepted by smart-pointer allocations.
 /// Mirrors the constant of the same name in [`alloc_value`](super::alloc_value):
@@ -38,8 +39,10 @@ impl<A: Allocator + Clone> Arena<A> {
     ///
     /// The closure `init` receives a typed fat pointer to the buffer
     /// (built from `(thin_ptr, metadata)`) and is responsible for
-    /// writing a valid `T` through it. multitude reconstructs the same
-    /// metadata at chunk teardown so `T`'s destructor runs correctly.
+    /// writing a valid `T` through it. The metadata is stored in the
+    /// chunk prefix and recovered on demand, so `T`'s destructor runs
+    /// eagerly (via `drop_in_place::<T>`) when the last `Arc` clone is
+    /// dropped.
     ///
     /// For sized `T`, prefer [`Self::alloc_arc`] / [`Self::alloc_arc_with`].
     ///
@@ -57,11 +60,9 @@ impl<A: Allocator + Clone> Arena<A> {
     /// - `init` must initialize all bytes covered by `layout` to a valid `T`.
     /// - `metadata` must be valid for the value just written.
     /// - `T::Metadata` must be either zero-sized (sized `T`) or
-    ///   `usize`-sized AND fit in `u16` after reinterpretation. This
-    ///   means **slices** (`[U]`, where the metadata is the slice
-    ///   length) and **sized** `T` are supported; trait objects (`dyn
-    ///   Trait`) and other DSTs whose metadata cannot be packed into
-    ///   `u16` are **not** supported.
+    ///   `usize`-sized (slice DSTs `[U]` and trait objects `dyn Trait`,
+    ///   whose metadata — slice length or vtable pointer — is stored
+    ///   verbatim in the chunk prefix).
     #[cfg_attr(docsrs, doc(cfg(feature = "dst")))]
     pub unsafe fn alloc_dst_arc<T: ?Sized + Send + Sync + Pointee>(
         &self,
@@ -107,8 +108,9 @@ impl<A: Allocator + Clone> Arena<A> {
     /// Allocate a possibly-unsized `T` and return a [`Box<T, A>`](crate::Box).
     /// See [`Self::alloc_dst_arc`] for the contract.
     ///
-    /// Unlike the refcount variants, the resulting [`Box`](crate::Box) runs
-    /// `T`'s destructor immediately when the smart pointer is dropped.
+    /// The resulting [`Box`](crate::Box) is the sole owner, so it runs
+    /// `T`'s destructor when it is dropped (the `Arc` variants run it
+    /// when the last clone is dropped; both are eager).
     ///
     /// # Panics
     ///
@@ -149,12 +151,8 @@ impl<A: Allocator + Clone> Arena<A> {
 
     /// Shared implementation for `alloc_dst_arc` / `try_alloc_dst_arc`.
     ///
-    /// Reserves `layout.size()` bytes aligned to `layout.align()` in
-    /// the current shared chunk, places a drop-entry placeholder (if
-    /// `T` requires drop), invokes `init` on the typed fat pointer,
-    /// commits the drop shim, and wraps the result in an [`Arc`].
-    ///
-    /// `TRY` selects the panic / error arm.
+    /// Reserves a strong-prefixed shared slot, invokes `init` on the
+    /// typed fat pointer, and wraps the result in an [`Arc`].
     ///
     /// # Safety
     ///
@@ -178,10 +176,10 @@ impl<A: Allocator + Clone> Arena<A> {
     }
 
     /// Shared implementation for `alloc_dst_box` / `try_alloc_dst_box`.
-    /// Mirrors `impl_alloc_dst_arc` but skips drop-entry reservation:
-    /// [`Box::drop`] runs `drop_in_place::<T>` on the value pointer
-    /// (which natively handles `?Sized`), so no chunk-teardown drop
-    /// entry is needed.
+    /// Like `impl_alloc_dst_arc` but without the per-`Arc` strong-count
+    /// prefix: [`Box::drop`] runs `drop_in_place::<T>` on the value
+    /// pointer (which natively handles `?Sized`). Neither variant
+    /// reserves a chunk drop entry.
     ///
     /// # Safety
     ///
@@ -196,48 +194,30 @@ impl<A: Allocator + Clone> Arena<A> {
         if layout.align() >= MAX_SMART_PTR_ALIGN {
             return Err(AllocError);
         }
-        // Guard parity with the Arc path: even though `Box::drop` runs
-        // `T::drop` eagerly (no chunk-teardown drop entry needed), reject
-        // DST values with `T: Drop` whose metadata cannot pack into the
-        // chunk drop-list's `u16` slot. This keeps the Box convertible
-        // to `Arc<T, A>` later via `into_arc`-style APIs and matches the
-        // non-DST `alloc_slice_box` family.
-        if mem::needs_drop::<T>() && !metadata_fits_u16::<T>(metadata) {
-            return Err(AllocError);
-        }
         let meta_bytes = mem::size_of::<T::Metadata>();
         // Payload starts at the lowest layout-aligned offset >=
         // meta_bytes. For sized T (meta_bytes = 0) payload starts at 0.
         let payload_offset = if meta_bytes == 0 { 0 } else { meta_bytes.max(layout.align()) };
-        // Floor the value byte count to 1 so the returned payload pointer
-        // (at offset `payload_offset` within the reservation) is strictly
-        // less than `reservation_end`, never landing at
-        // `chunk_base + CHUNK_ALIGN` for `layout.size() == 0`.
+        // Keep the payload pointer inside the reservation for ZSTs.
         let value_bytes = layout.size().max(1);
         let total = payload_offset.checked_add(value_bytes).ok_or(AllocError)?;
-        // Refill hint must include `layout.align() - 1` bytes of slack
-        // so `try_alloc(total, align)` always succeeds inside a chunk
-        // sized for this allocation. The same hint drives the oversized
-        // routing check so the dedicated chunk also has the slack.
+        // Include alignment slack so the retry fits the chosen chunk.
         let refill_hint = total.saturating_add(layout.align());
         let mut init = Some(init);
         loop {
             if let Some((reservation, chunk_ptr)) = self.current_shared().try_alloc_with_chunk(total, layout.align().max(1)) {
                 let init = init.take().expect("init taken twice");
                 let chunk_ref = self.acquire_current_shared_chunk_ref(chunk_ptr);
-                // SAFETY: see `write_dst_prefix_and_init` — `reservation`
-                // is the freshly reserved exclusive storage; we write
-                // metadata at `payload - meta_bytes` and hand `init` a
-                // fat pointer to the payload.
+                // SAFETY: `reservation` is fresh exclusive storage; metadata
+                // is written before `init` receives the fat payload pointer.
                 let payload_nn =
                     unsafe { write_dst_prefix_and_init::<T>(reservation.as_non_null(), payload_offset, meta_bytes, metadata, init) };
                 let _ = chunk_ref.forget();
-                // SAFETY: `payload_nn` references a fully-initialized
-                // `T` whose metadata is in the chunk prefix; the
-                // hosting chunk now holds +1 in the new `Box`'s name.
+                // SAFETY: `payload_nn` references initialized `T`; the
+                // hosting chunk holds the new `Box`'s +1.
                 return Ok(unsafe { Box::from_raw(payload_nn) });
             }
-            if self.is_oversized_shared(refill_hint) {
+            if self.is_oversized(refill_hint) {
                 let init = init.take().expect("init taken twice");
                 return self.alloc_oversized_shared_with(refill_hint, |mutator, chunk_ptr| {
                     let (reservation, _chunk) = mutator
@@ -256,10 +236,12 @@ impl<A: Allocator + Clone> Arena<A> {
         }
     }
 
-    /// Reserve raw storage + drop entry in the current shared chunk,
-    /// run `init` on a typed fat pointer, commit the DST drop shim,
-    /// and return the fat `NonNull<T>`. Skips the drop entry when `T`
-    /// is drop-free.
+    /// Reserve a strong-prefixed `Arc<T>` slot in the current shared
+    /// chunk (per-`Arc` strong count + `T::Metadata` prefix + payload),
+    /// run `init` on a typed fat pointer, and return the thin payload
+    /// pointer. No chunk drop entry is reserved:
+    /// [`Arc::drop`](crate::Arc) runs `drop_in_place::<T>` (which natively
+    /// handles `?Sized`) on the last reference.
     ///
     /// # Safety
     ///
@@ -274,72 +256,33 @@ impl<A: Allocator + Clone> Arena<A> {
         if layout.align() >= MAX_SMART_PTR_ALIGN {
             return Err(AllocError);
         }
-
-        let needs_drop = mem::needs_drop::<T>();
-
-        // For DST values that need drop, the drop entry packs `metadata`
-        // into a `u16`. Reject metadata that doesn't fit before doing
-        // any allocation.
-        if needs_drop && !metadata_fits_u16::<T>(metadata) {
-            return Err(AllocError);
-        }
-        let metadata_u16 = if needs_drop { encode_metadata_u16::<T>(metadata) } else { 0 };
         let meta_bytes = mem::size_of::<T::Metadata>();
-        // Payload starts at the lowest layout-aligned offset >=
-        // meta_bytes. For sized T (meta_bytes = 0) payload starts at 0.
-        let payload_offset = if meta_bytes == 0 { 0 } else { meta_bytes.max(layout.align()) };
-        // Floor the value byte count to 1 so the returned payload pointer
-        // is strictly inside the reservation; see `impl_alloc_dst_box`.
-        let value_bytes = layout.size().max(1);
-        let total = payload_offset.checked_add(value_bytes).ok_or(AllocError)?;
+        let value_align = layout.align().max(1);
+        // Keep the payload pointer inside the reservation for ZSTs.
+        let payload_bytes = layout.size().max(1);
+        let refill_hint = worst_case_arc_dst(payload_bytes, value_align, meta_bytes);
 
         let mut init = Some(init);
         loop {
-            let reservation = self.current_shared().try_alloc_thin_dst_smart_with_chunk(
-                total,
-                layout.align().max(1),
-                payload_offset,
-                needs_drop,
-                metadata_u16,
-            );
-
-            if let Some((base_in_chunk, drop_slot_opt, chunk_ptr)) = reservation {
+            if let Some((value_ptr, chunk_ptr)) = self.current_shared().try_alloc_arc_dst(payload_bytes, value_align, meta_bytes) {
                 let init = init.take().expect("init taken twice");
                 let chunk_ref = self.acquire_current_shared_chunk_ref(chunk_ptr);
-                // SAFETY: see `write_dst_prefix_and_init`.
-                let payload_nn =
-                    unsafe { write_dst_prefix_and_init::<T>(base_in_chunk.as_non_null(), payload_offset, meta_bytes, metadata, init) };
-                if let Some(slot) = drop_slot_opt {
-                    // SAFETY: `slot.as_ptr()` references a freshly
-                    // placed `DropEntry::placeholder` we own
-                    // exclusively until commit.
-                    unsafe {
-                        (*slot.as_ptr()).commit_drop_fn(dst_drop_shim::<T> as DropFn);
-                    }
-                }
+                // SAFETY: `value_ptr` is fresh payload storage with a
+                // strong prefix; metadata is written before `init`.
+                let payload_nn = unsafe { write_dst_meta_and_init::<T>(value_ptr, meta_bytes, metadata, init) };
                 let _ = chunk_ref.forget();
                 return Ok(payload_nn);
             }
 
-            let refill_hint = total
-                .saturating_add(layout.align())
-                .saturating_add(mem::size_of::<crate::internal::drop_entry::DropEntry>());
-            if self.is_oversized_shared(refill_hint) {
+            if self.is_oversized(refill_hint) {
                 let init = init.take().expect("init taken twice");
                 return self.alloc_oversized_shared_with(refill_hint, |mutator, chunk_ptr| {
-                    let (base_in_chunk, drop_slot_opt) = mutator
-                        .try_alloc_thin_dst_smart(total, layout.align().max(1), payload_offset, needs_drop, metadata_u16)
-                        .expect("dedicated oversized chunk sized to fit DST value + optional drop entry");
+                    let (value_ptr, _chunk) = mutator
+                        .try_alloc_arc_dst(payload_bytes, value_align, meta_bytes)
+                        .expect("dedicated oversized chunk sized to fit DST value + strong prefix");
                     let chunk_ref = acquire_shared_chunk_ref::<A>(chunk_ptr);
                     // SAFETY: see the in-arena branch above.
-                    let payload_nn =
-                        unsafe { write_dst_prefix_and_init::<T>(base_in_chunk.as_non_null(), payload_offset, meta_bytes, metadata, init) };
-                    if let Some(slot) = drop_slot_opt {
-                        // SAFETY: see the in-arena branch above.
-                        unsafe {
-                            (*slot.as_ptr()).commit_drop_fn(dst_drop_shim::<T> as DropFn);
-                        }
-                    }
+                    let payload_nn = unsafe { write_dst_meta_and_init::<T>(value_ptr, meta_bytes, metadata, init) };
                     let _ = chunk_ref.forget();
                     payload_nn
                 });
@@ -455,66 +398,62 @@ impl<A: Allocator + Clone> Arena<A> {
     }
 }
 
-/// Reinterpret the pointer-metadata for `T` as a `u16`.
-///
-/// Returns the low 16 bits of the metadata value when interpreted as a
-/// `usize`. For metadata kinds we don't support packing
-/// (vtable-bearing trait objects), the returned value is meaningless;
-/// [`metadata_fits_u16`] gates this.
-///
-/// For sized `T` (`Metadata = ()`), returns `0`.
+/// Worst-case byte budget for a single strong-prefixed `Arc<T>` DST
+/// allocation: per-`Arc` strong count + `T::Metadata` prefix + payload +
+/// front alignment slack.
+#[cfg_attr(test, mutants::skip)] // underestimating refill hint ⇒ refill spin
 #[inline]
-#[cfg_attr(test, mutants::skip)] // saturating cast; callers gate via `metadata_fits_u16`
-fn encode_metadata_u16<T: ?Sized + Pointee>(metadata: T::Metadata) -> u16 {
-    if mem::size_of::<T::Metadata>() == 0 {
-        return 0;
-    }
-    debug_assert_eq!(
-        mem::size_of::<T::Metadata>(),
-        mem::size_of::<usize>(),
-        "alloc_dst_*: T::Metadata must be either ZST or usize-sized"
-    );
-    // SAFETY: branch above ensures `T::Metadata` is `usize`-sized; we
-    // read it through a `usize` window, which is layout-compatible for
-    // the supported subset (`[U]` slices: metadata is the length).
-    let raw: usize = unsafe { mem::transmute_copy::<T::Metadata, usize>(&metadata) };
-    // Saturating cast: if the value exceeds u16::MAX we set u16::MAX
-    // and `metadata_fits_u16` will reject it.
-    #[allow(
-        clippy::cast_possible_truncation,
-        reason = "saturating cast: value > u16::MAX is guarded by the branch above"
-    )]
-    if raw > u16::MAX as usize { u16::MAX } else { raw as u16 }
+fn worst_case_arc_dst(payload_bytes: usize, value_align: usize, meta_bytes: usize) -> usize {
+    use crate::internal::thin_dst;
+    thin_dst::strong_prefix_bytes_for(value_align, meta_bytes)
+        .saturating_add(payload_bytes)
+        .saturating_add(thin_dst::arc_block_align(value_align))
 }
 
-/// Returns whether `metadata` packs losslessly into a `u16`.
-#[cfg_attr(test, mutants::skip)] // see `alloc_slice_ref::reject_drop_slice_too_long`
-#[inline]
-fn metadata_fits_u16<T: ?Sized + Pointee>(metadata: T::Metadata) -> bool {
-    if mem::size_of::<T::Metadata>() == 0 {
-        return true;
-    }
-    if mem::size_of::<T::Metadata>() != mem::size_of::<usize>() {
-        return false;
-    }
-    // SAFETY: branch above ensures `T::Metadata` is `usize`-sized.
-    let raw: usize = unsafe { mem::transmute_copy::<T::Metadata, usize>(&metadata) };
-    u16::try_from(raw).is_ok()
+/// Write metadata, call `init` on the reconstructed fat pointer, and
+/// return the thin payload pointer. Used by strong-prefixed `Arc<T>` DSTs.
+///
+/// # Safety
+///
+/// - `value_ptr` must be the payload pointer of a strong-prefixed `Arc`
+///   reservation whose prefix has room for `meta_bytes` immediately
+///   before it.
+/// - `init` must initialize a valid `T` through the fat pointer it
+///   receives.
+#[inline(always)]
+unsafe fn write_dst_meta_and_init<T: ?Sized + Pointee>(
+    value_ptr: NonNull<u8>,
+    meta_bytes: usize,
+    metadata: T::Metadata,
+    init: impl FnOnce(*mut T),
+) -> NonNull<u8> {
+    // SAFETY: per the function contract. The metadata word sits in
+    // `[value_ptr - meta_bytes, value_ptr)`, inside the reservation
+    // prefix; `write_unaligned` tolerates any alignment. For sized T
+    // (meta_bytes == 0) the write is skipped.
+    let fat = unsafe {
+        if meta_bytes != 0 {
+            let prefix_ptr = value_ptr.as_ptr().sub(meta_bytes).cast::<T::Metadata>();
+            ptr::write_unaligned(prefix_ptr, metadata);
+        }
+        ptr_meta::from_raw_parts_mut::<T>(value_ptr.as_ptr().cast::<()>(), metadata)
+    };
+    // Caller's contract: `init` writes a valid `T` through `fat`. If it
+    // panics, callers' `ChunkRef` guard releases the chunk's `+1`.
+    init(fat);
+    value_ptr
 }
 
-/// Write `T::Metadata` (if any) at `base + payload_offset - meta_bytes`,
-/// reconstruct the fat `*mut T`, run the caller-provided `init` on
-/// it, and return the thin payload pointer adopted by the smart
-/// pointer (metadata is recovered on demand from the chunk prefix).
+/// `Box` DST variant of [`write_dst_meta_and_init`]. `Box` has no
+/// strong-count prefix, so the reservation starts at the metadata region.
 ///
 /// # Safety
 ///
 /// - `base` must reference `payload_offset + layout.size()` bytes of
 ///   exclusively-owned chunk storage aligned to `layout.align()`.
-/// - `payload_offset` must equal the value computed at the call site
-///   (i.e. `meta_bytes.max(layout.align())` for DST or `0` for sized).
-/// - `init` must initialize a valid `T` through the fat pointer it
-///   receives.
+/// - `payload_offset` must equal `meta_bytes.max(layout.align())` for
+///   DST or `0` for sized `T`.
+/// - `init` must initialize a valid `T` through the fat pointer.
 #[inline(always)]
 unsafe fn write_dst_prefix_and_init<T: ?Sized + Pointee>(
     base: NonNull<u8>,
@@ -525,10 +464,8 @@ unsafe fn write_dst_prefix_and_init<T: ?Sized + Pointee>(
 ) -> NonNull<u8> {
     // SAFETY: per the function contract. `byte_add(payload_offset)`
     // stays within the reservation. The prefix at `payload - meta_bytes`
-    // lies in `[base, base + payload_offset)` (low-align T fills the
-    // prefix region; high-align T leaves the prefix in the padding).
-    // For sized T (meta_bytes == 0) the prefix write is a no-op.
-    // `from_raw_parts_mut` rebuilds the fat pointer for `init`'s call.
+    // lies in `[base, base + payload_offset)`. For sized T (meta_bytes
+    // == 0) the prefix write is a no-op.
     let (payload_nn, fat) = unsafe {
         let payload_nn = base.byte_add(payload_offset);
         if meta_bytes != 0 {
@@ -538,50 +475,20 @@ unsafe fn write_dst_prefix_and_init<T: ?Sized + Pointee>(
         let fat = ptr_meta::from_raw_parts_mut::<T>(payload_nn.as_ptr().cast::<()>(), metadata);
         (payload_nn, fat)
     };
-    // Caller's contract: `init` writes a valid `T` through `fat`. If
-    // it panics, callers' `ChunkRef` guard releases the chunk's `+1`.
+    // Caller's contract: `init` writes a valid `T` through `fat`. If it
+    // panics, callers' `ChunkRef` guard releases the chunk's `+1`.
     init(fat);
     payload_nn
 }
 
-/// Drop shim used by the DST path. Reconstructs the fat `*mut T` from
-/// `(thin, metadata_u16)` and runs `drop_in_place::<T>` on it.
-///
-/// # Safety
-///
-/// - `thin` must point at a fully-initialized `T` whose size/alignment
-///   match the [`Layout`] used at allocation time.
-/// - `T::Metadata` must be either zero-sized or `usize`-sized
-///   (enforced at the public API by `encode_metadata_u16` /
-///   `metadata_fits_u16`).
-/// - `metadata_raw`, when interpreted as `T::Metadata`, must equal the
-///   metadata that was paired with the value at allocation time.
-unsafe fn dst_drop_shim<T: ?Sized + Pointee>(thin: *mut u8, metadata_raw: usize) {
-    // Recover `T::Metadata` from the stored `usize`. For sized `T`
-    // (Metadata = `()`), the read is a zero-byte no-op.
-    let metadata: T::Metadata = if mem::size_of::<T::Metadata>() == 0 {
-        // SAFETY: `T::Metadata` is zero-sized; read produces the
-        // single uninhabited-by-data unit value.
-        unsafe { mem::zeroed() }
-    } else {
-        // SAFETY: by the function's safety contract.
-        unsafe { mem::transmute_copy::<usize, T::Metadata>(&metadata_raw) }
-    };
-    let fat: *mut T = ptr_meta::from_raw_parts_mut(thin.cast::<()>(), metadata);
-    // SAFETY: by the function's safety contract `fat` references a
-    // fully-initialized `T`; we hold exclusive access (chunk refcount
-    // is zero on the teardown path that invokes this shim).
-    unsafe { ptr::drop_in_place(fat) };
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
     use crate::Arena as TestArena;
 
-    /// Cover `encode_metadata_u16` / `metadata_fits_u16` zero-sized
-    /// branches (lines 434, 458) and `dst_drop_shim`'s `Metadata = ()`
-    /// branch (line 486) via an `alloc_dst_arc` of a sized drop-bearing `T`.
+    /// Exercises `alloc_dst_arc` of a sized drop-bearing `T`: the value's
+    /// destructor must run eagerly when the last `Arc` clone drops
+    /// (before the arena is torn down).
     #[test]
     fn dst_arc_sized_drop_type_metadata_zero_sized_paths() {
         use std::sync::Arc as StdArc;
@@ -606,27 +513,4 @@ mod tests {
         drop(arena);
         assert_eq!(counter.load(Ordering::Relaxed), 1);
     }
-
-    // A `?Sized` type whose `ptr_meta` pointer metadata (`u8`) is neither
-    // zero-sized (as for `Sized` `T`) nor `usize`-sized (as for slices, `str`,
-    // and trait objects). No DST produced by real allocations has such
-    // metadata, so this exercises the otherwise-unreachable reject branch in
-    // `metadata_fits_u16`.
-    #[allow(dead_code, reason = "exists only to provide a non-usize Pointee::Metadata type")]
-    struct OddMetadataDst(str);
-
-    // SAFETY: `OddMetadataDst` is never constructed, and no pointer to it is
-    // ever formed or split via `ptr_meta`. The impl exists solely to give
-    // `metadata_fits_u16` a metadata type (`u8`) whose size is neither 0 nor
-    // `size_of::<usize>()`.
-    unsafe impl Pointee for OddMetadataDst {
-        type Metadata = u8;
-    }
-
-    /// Cover `metadata_fits_u16`'s non-`usize`-sized metadata reject branch:
-    /// `size_of::<u8>()` is 1, which is neither 0 nor `size_of::<usize>()`.
-    #[test]
-    fn metadata_fits_u16_rejects_non_usize_metadata() {
-        assert!(!metadata_fits_u16::<OddMetadataDst>(0u8));
-    }
 }
diff --git a/crates/multitude/src/arena/alloc_utf16.rs b/crates/multitude/src/arena/alloc_utf16.rs
index 6a626686f..bb42d9e89 100644
--- a/crates/multitude/src/arena/alloc_utf16.rs
+++ b/crates/multitude/src/arena/alloc_utf16.rs
@@ -42,7 +42,7 @@ impl<A: Allocator + Clone> Arena<A> {
     where
         A: Send + Sync,
     {
-        self.impl_alloc_prefixed_shared::<u16>(s.as_ref().as_slice()).map(|ptr|
+        self.impl_alloc_prefixed_shared_arc::<u16>(s.as_ref().as_slice()).map(|ptr|
             // SAFETY: see `Self::alloc_utf16_str_arc`.
             unsafe { ArcUtf16Str::from_raw(ptr) })
     }
@@ -90,7 +90,7 @@ impl<A: Allocator + Clone> Arena<A> {
     where
         A: Send + Sync,
     {
-        self.impl_alloc_utf16_prefixed_from_str(s.as_ref()).map(|ptr|
+        self.impl_alloc_utf16_prefixed_from_str_arc(s.as_ref()).map(|ptr|
             // SAFETY: see `Self::alloc_utf16_str_arc`.
             unsafe { ArcUtf16Str::from_raw(ptr) })
     }
@@ -182,7 +182,7 @@ impl<A: Allocator + Clone> Arena<A> {
                 let _ = chunk_ref.forget();
                 return Ok(payload);
             }
-            if self.is_oversized_shared(total) {
+            if self.is_oversized(total) {
                 return self.alloc_oversized_shared_with(total, |mutator, chunk_ptr| {
                     let (base, _chunk_unused) = mutator
                         .try_alloc_with_chunk(total, elem_align)
@@ -196,6 +196,38 @@ impl<A: Allocator + Clone> Arena<A> {
             self.refill_shared(total)?;
         }
     }
+
+    /// Strong-prefixed [`ArcUtf16Str`](crate::strings::ArcUtf16Str)
+    /// variant of [`Self::impl_alloc_utf16_prefixed_from_str`]: reserves
+    /// a per-`Arc` strong count and slice-length prefix, transcodes `s`
+    /// into the `u16` payload, and returns a thin pointer to the first
+    /// payload element.
+    #[inline(always)]
+    #[cfg_attr(test, mutants::skip)] // size-hint mutation ⇒ refill spin (OOM)
+    fn impl_alloc_utf16_prefixed_from_str_arc(&self, s: &str) -> Result<NonNull<u16>, AllocError> {
+        let exact: usize = s.chars().map(char::len_utf16).sum();
+        let bytes_needed = super::alloc_prefixed::worst_case_arc_slice_payload::<u16>(exact);
+        loop {
+            if let Some((uninit, chunk_ptr)) = self.try_reserve_arc_slice::<u16>(exact) {
+                let chunk_ref = self.acquire_current_shared_chunk_ref(chunk_ptr);
+                let payload = uninit.init_from_iter_ptr(s.encode_utf16());
+                let _ = chunk_ref.forget();
+                return Ok(payload.cast::<u16>());
+            }
+            if self.is_oversized(bytes_needed) {
+                return self.alloc_oversized_shared_with(bytes_needed, |mutator, chunk_ptr| {
+                    let (ticket, _chunk) = mutator
+                        .try_alloc_arc_slice::<u16>(exact)
+                        .expect("dedicated oversized chunk sized to fit utf-16 Arc payload");
+                    let chunk_ref = acquire_shared_chunk_ref::<A>(chunk_ptr);
+                    let payload = ticket.init_from_iter_ptr(s.encode_utf16());
+                    let _ = chunk_ref.forget();
+                    payload.cast::<u16>()
+                });
+            }
+            self.refill_shared(bytes_needed)?;
+        }
+    }
 }
 
 /// Writes the `usize` element-count prefix at `base`, transcodes `s`
diff --git a/crates/multitude/src/arena/alloc_value.rs b/crates/multitude/src/arena/alloc_value.rs
index 67b870887..93d532874 100644
--- a/crates/multitude/src/arena/alloc_value.rs
+++ b/crates/multitude/src/arena/alloc_value.rs
@@ -14,12 +14,12 @@ use allocator_api2::alloc::{AllocError, Allocator};
 use super::{Arena, ExpectAlloc};
 use crate::arc::Arc;
 use crate::r#box::Box;
-use crate::internal::Chunk;
 use crate::internal::chunk_ref::ChunkRef;
 use crate::internal::constants::max_smart_ptr_align;
 use crate::internal::drop_entry::DropEntry;
 use crate::internal::shared_chunk::SharedChunk;
-use crate::internal::uninit::{Uninit, UninitDrop};
+use crate::internal::uninit::Uninit;
+use crate::internal::{Chunk, thin_dst};
 
 /// Worst-case bytes consumed by a single value allocation of type `T` in
 /// a chunk: value bytes + alignment padding, plus one [`DropEntry`] slot
@@ -35,6 +35,18 @@ const fn worst_case_payload<T>() -> usize {
     }
 }
 
+/// Worst-case bytes consumed by a single `Arc<T>` value allocation: the
+/// per-`Arc` strong-count prefix + value bytes + front alignment slack.
+#[cfg_attr(test, mutants::skip)] // under-sized hint ⇒ refill loop spin (OOM)
+#[inline]
+const fn worst_case_arc_payload<T>() -> usize {
+    let align = mem::align_of::<T>();
+    let value_bytes = if mem::size_of::<T>() == 0 { 1 } else { mem::size_of::<T>() };
+    thin_dst::strong_prefix_bytes_for(align, 0)
+        .saturating_add(value_bytes)
+        .saturating_add(thin_dst::arc_block_align(align))
+}
+
 /// Maximum `align_of::<T>()` accepted by smart-pointer allocations.
 ///
 /// Boxes recover their chunk header by subtracting the value pointer's
@@ -445,7 +457,7 @@ impl<A: Allocator + Clone> Arena<A> {
                 return Ok(u.init(f()));
             }
             let wcp = worst_case_payload::<T>();
-            if self.is_oversized_local(wcp) {
+            if self.is_oversized(wcp) {
                 return self.alloc_oversized_value_with::<T, F>(wcp, f);
             }
             self.refill_local(wcp)?;
@@ -497,45 +509,53 @@ impl<A: Allocator + Clone> Arena<A> {
     fn impl_alloc_box_with<T, F: FnOnce() -> T>(&self, f: F) -> Result<Box<T, A>, AllocError> {
         // SAFETY: `impl_alloc_smart_with` returns a `NonNull<T>` to a
         // freshly-written `T` whose containing chunk has just been
-        // bumped by +1 in the new smart pointer's name. `Box` runs
-        // `T::drop` eagerly in its own `Drop`, so it does *not* register
-        // a chunk drop entry (`REGISTER_DROP = false`); otherwise the
-        // value would be dropped twice (once by `Box::drop`, once by the
-        // chunk teardown replay). `Box::from_raw` adopts that +1.
-        self.impl_alloc_smart_with::<T, F, false>(f)
+        // bumped by +1 in the new `Box`'s name. `Box` runs `T::drop`
+        // eagerly in its own `Drop` and adopts that +1 via
+        // `Box::from_raw`.
+        self.impl_alloc_smart_with::<T, F>(f)
             .map(|ptr| unsafe { Box::from_raw(ptr.cast::<u8>()) })
     }
 
-    /// Shared fast-path body for the `alloc_arc` family. Identical
-    /// shape to [`Self::impl_alloc_box_with`] — the only differences
-    /// between `Box` and `Arc` live in their `Clone`/`Send`/`Sync`
-    /// impls, not at allocation time.
+    /// Shared fast-path body for the `alloc_arc` family.
+    ///
+    /// Unlike [`Box`], an [`Arc`] reserves a per-`Arc` strong reference
+    /// count in the chunk prefix (initialized to `1`), takes one chunk
+    /// refcount for the whole `Arc` family, and runs `T::drop` eagerly
+    /// when the strong count reaches zero — never via a chunk
+    /// drop-entry.
     #[inline(always)]
+    #[cfg_attr(test, mutants::skip)] // routing-predicate mutations ⇒ refill spin (OOM)
     fn impl_alloc_arc_with<T, F: FnOnce() -> T>(&self, f: F) -> Result<Arc<T, A>, AllocError>
     where
         A: Send + Sync,
         T: Send + Sync,
     {
-        // SAFETY: see `Self::impl_alloc_box_with` — `Arc::from_raw`
-        // adopts the fresh +1 on the containing chunk. Unlike `Box`,
-        // `Arc` keeps the value alive until the chunk is torn down, so a
-        // drop entry IS registered for `T: Drop` (`REGISTER_DROP = true`).
-        self.impl_alloc_smart_with::<T, F, true>(f)
-            .map(|ptr| unsafe { Arc::from_raw(ptr.cast::<u8>()) })
+        if const { mem::align_of::<T>() >= MAX_SMART_PTR_ALIGN } {
+            return Err(AllocError);
+        }
+        let mut f = Some(f);
+        loop {
+            if let Some((uninit, chunk_ptr)) = self.try_reserve_arc_value::<T>() {
+                let chunk_ref = self.acquire_current_shared_chunk_ref(chunk_ptr);
+                let f = f.take().expect("closure taken twice");
+                let ptr = init_smart_slot::<T, A, _>(uninit, chunk_ref, f);
+                // SAFETY: the strong prefix was written (count = 1) and the
+                // chunk holds a fresh +1 for this `Arc` family.
+                return Ok(unsafe { Arc::from_raw(ptr.cast::<u8>()) });
+            }
+            let wcp = worst_case_arc_payload::<T>();
+            if self.is_oversized(wcp) {
+                let f = f.take().expect("closure taken twice");
+                return self.alloc_oversized_arc_with::<T, F>(wcp, f);
+            }
+            self.refill_shared(wcp)?;
+        }
     }
 
-    /// Bump-allocates `T` in the arena's current shared chunk, takes a
-    /// +1 refcount on that chunk for the resulting smart pointer, and
-    /// writes the value into the reservation. When `REGISTER_DROP` is
-    /// `true` and `T` needs drop, a drop entry is committed so the
-    /// chunk's teardown runs `T::drop` when the last reference releases
-    /// the chunk ([`Arc`] semantics). [`Box`] passes `REGISTER_DROP =
-    /// false` because it runs `T::drop` eagerly in its own `Drop`;
-    /// registering an entry as well would drop the value twice.
-    ///
-    /// The returned `NonNull<T>` carries no ownership marker; the
-    /// caller wraps it in the appropriate smart pointer ([`Box`] or
-    /// [`Arc`]) and that wrapper owns the +1.
+    /// Bump-allocates `T` in the arena's current shared chunk for a
+    /// [`Box`], takes a +1 refcount on that chunk, and writes the value
+    /// into the reservation. [`Box`] runs `T::drop` eagerly in its own
+    /// `Drop`, so no chunk drop entry is reserved.
     ///
     /// Rejects alignments at or above [`MAX_SMART_PTR_ALIGN`]: such
     /// values cannot live inside the first [`CHUNK_ALIGN`] bytes of a
@@ -543,222 +563,70 @@ impl<A: Allocator + Clone> Arena<A> {
     /// smart pointers' `Drop` impls.
     #[inline(always)]
     #[cfg_attr(test, mutants::skip)] // routing-predicate mutations ⇒ refill spin (OOM)
-    fn impl_alloc_smart_with<T, F: FnOnce() -> T, const REGISTER_DROP: bool>(&self, f: F) -> Result<NonNull<T>, AllocError> {
+    fn impl_alloc_smart_with<T, F: FnOnce() -> T>(&self, f: F) -> Result<NonNull<T>, AllocError> {
         if const { mem::align_of::<T>() >= MAX_SMART_PTR_ALIGN } {
             return Err(AllocError);
         }
         loop {
-            // A ZST whose allocation reserves no drop entry does not
-            // advance the bump cursor (`try_alloc(0, _)` is a no-op on
-            // the cursor), so back-to-back handouts would never refill
-            // the chunk. The per-allocation handout count is tracked in
-            // the non-atomic `local_shared_count` and draws down the
-            // pre-credited ref surplus; an unbounded run from a single
-            // chunk could exhaust that surplus, driving the chunk's
-            // atomic refcount to zero while it is still installed
-            // (use-after-free) or underflowing the surplus reconciliation
-            // at retire (double-free). Pre-reserve a 1-byte tag so each
-            // such handout advances the cursor, bounding per-chunk
-            // handouts to the chunk capacity (well below the surplus).
-            // The drop-entry path below already advances `drop_top`, so
-            // drop-registering reservations need no tag. Mirrors the
-            // guard in `impl_alloc_uninit_smart`.
-            if const { mem::size_of::<T>() == 0 && !(REGISTER_DROP && mem::needs_drop::<T>()) }
-                && self.current_shared().try_alloc(1, 1).is_none()
-            {
+            // A non-drop ZST allocation does not advance the bump cursor
+            // (`try_alloc(0, _)` is a no-op), so back-to-back handouts
+            // would never refill the chunk. The per-allocation handout
+            // count draws down the pre-credited ref surplus; an unbounded
+            // run could exhaust it (use-after-free) or underflow the
+            // surplus reconciliation at retire (double-free). Pre-reserve
+            // a 1-byte tag so each such handout advances the cursor,
+            // bounding per-chunk handouts to the chunk capacity.
+            if const { mem::size_of::<T>() == 0 } && self.current_shared().try_alloc(1, 1).is_none() {
                 self.refill_shared(worst_case_payload::<T>())?;
                 continue;
             }
-            if const { REGISTER_DROP && mem::needs_drop::<T>() } {
-                if let Some((uninit, chunk_ptr)) = self.try_reserve_shared_with_drop::<T>() {
-                    let chunk_ref = self.acquire_current_shared_chunk_ref(chunk_ptr);
-                    return Ok(init_smart_slot_with_drop::<T, A, F>(uninit, chunk_ref, f));
-                }
-            } else if let Some((uninit, chunk_ptr)) = self.try_reserve_shared::<T>() {
+            if let Some((uninit, chunk_ptr)) = self.try_reserve_shared::<T>() {
                 let chunk_ref = self.acquire_current_shared_chunk_ref(chunk_ptr);
                 return Ok(init_smart_slot::<T, A, F>(uninit, chunk_ref, f));
             }
-            // Worst-case payload includes a drop entry for `T: Drop`
-            // so refill always sizes the chunk for the with-drop
-            // reservation above.
             let wcp = worst_case_payload::<T>();
-            if self.is_oversized_shared(wcp) {
-                return self.alloc_oversized_smart_with::<T, F, REGISTER_DROP>(wcp, f);
+            if self.is_oversized(wcp) {
+                return self.alloc_oversized_smart_with::<T, F>(wcp, f);
             }
             self.refill_shared(wcp)?;
         }
     }
 
-    /// Cold oversized-smart-pointer fallback for
-    /// [`Self::impl_alloc_smart_with`].
-    ///
-    /// Kept `#[inline(never)]` for the same reason as
-    /// [`Self::alloc_oversized_value_with`]: the fast-path body must
-    /// stay small enough for the public smart-pointer entry points to
-    /// inline; closure-free in `f` to avoid spilling the user closure's
-    /// environment to memory on the hot path.
+    /// Cold oversized-`Box` fallback for [`Self::impl_alloc_smart_with`].
     #[cold]
     #[inline(never)]
-    fn alloc_oversized_smart_with<T, F: FnOnce() -> T, const REGISTER_DROP: bool>(
-        &self,
-        wcp: usize,
-        f: F,
-    ) -> Result<NonNull<T>, AllocError> {
+    fn alloc_oversized_smart_with<T, F: FnOnce() -> T>(&self, wcp: usize, f: F) -> Result<NonNull<T>, AllocError> {
         let (mutator, chunk_ptr) = self.acquire_oversized_shared_mutator(wcp)?;
-        let ptr = if const { REGISTER_DROP && mem::needs_drop::<T>() } {
-            let ticket = mutator
-                .try_alloc_uninit_with_drop::<T>()
-                .expect("dedicated oversized chunk sized to fit one value + drop entry");
-            let chunk_ref = acquire_shared_chunk_ref::<A>(chunk_ptr);
-            init_smart_slot_with_drop::<T, A, F>(ticket, chunk_ref, f)
-        } else {
-            let ticket = mutator
-                .try_alloc_uninit::<T>()
-                .expect("dedicated oversized chunk sized to fit one value");
-            let chunk_ref = acquire_shared_chunk_ref::<A>(chunk_ptr);
-            init_smart_slot::<T, A, F>(ticket, chunk_ref, f)
-        };
+        let ticket = mutator
+            .try_alloc_uninit::<T>()
+            .expect("dedicated oversized chunk sized to fit one value");
+        let chunk_ref = acquire_shared_chunk_ref::<A>(chunk_ptr);
+        let ptr = init_smart_slot::<T, A, F>(ticket, chunk_ref, f);
         // `mutator` drops here, releasing its `+1`. The smart-pointer
         // `chunk_ref` taken above owns the surviving `+1`.
         drop(mutator);
         Ok(ptr)
     }
 
-    /// Shared body for the uninit/zeroed `Arc<MaybeUninit<T>>` family,
-    /// **for `T: Drop` only** (callers route `T: !Drop` to the ordinary
-    /// no-entry value-Arc path).
-    ///
-    /// Reserves a placeholder [`DropEntry`] alongside the value, writes the
-    /// uninitialized (or zeroed) `MaybeUninit<T>` without committing the
-    /// entry, and eagerly publishes the chunk's drop-entry count so a later
-    /// [`Arc::<MaybeUninit<T>>::assume_init`](crate::Arc) can locate and
-    /// commit it while the chunk is still the arena's active chunk.
-    #[inline]
-    #[cfg_attr(test, mutants::skip)] // ZST tag branch && → || ⇒ refill spin
-    pub(crate) fn impl_alloc_uninit_arc<T>(&self, zeroed: bool) -> Result<Arc<mem::MaybeUninit<T>, A>, AllocError>
-    where
-        A: Send + Sync,
-        T: Send + Sync,
-    {
-        if const { mem::align_of::<T>() >= MAX_SMART_PTR_ALIGN } {
-            return Err(AllocError);
-        }
-        loop {
-            // For ZST `T: Drop`, `size_of::<T>() == 0`, so the bump
-            // cursor doesn't advance per allocation. Back-to-back
-            // `alloc_uninit_arc<ZST_Drop>()` calls would otherwise
-            // produce placeholders that share `(value_offset, len = 1)`,
-            // and `commit_placeholder_drop_fn`'s lookup (which matches
-            // on that key) would re-commit the first placeholder on
-            // every subsequent `assume_init`, silently leaving the
-            // others uncommitted and skipping their destructors.
-            //
-            // Pre-reserve a 1-byte tag so each placeholder lands at a
-            // distinct `value_offset`. For ZST `T` the returned
-            // value-pointer points one byte past the previous cursor,
-            // which is fine because writes/reads/drops of a ZST touch
-            // zero bytes — the pointer's address only serves as the
-            // placeholder's lookup key.
-            if const { mem::size_of::<T>() == 0 } && self.current_shared().try_alloc(1, 1).is_none() {
-                self.refill_shared(worst_case_payload::<T>())?;
-                continue;
-            }
-            if let Some((uninit, chunk_ptr)) = self.try_reserve_shared_with_drop::<T>() {
-                let chunk_ref = self.acquire_current_shared_chunk_ref(chunk_ptr);
-                let value = if zeroed {
-                    mem::MaybeUninit::<T>::zeroed()
-                } else {
-                    mem::MaybeUninit::<T>::uninit()
-                };
-                let ptr = uninit.into_uninit_placeholder(value);
-                let _ = chunk_ref.forget();
-                // Publish the just-written placeholder so `assume_init` sees it.
-                self.current_shared().publish_drop_count();
-                // SAFETY: the chunk was bumped +1 for this `Arc` and a
-                // placeholder drop entry is reserved and published;
-                // `assume_init` commits the real shim once the value is set.
-                return Ok(unsafe { Arc::from_raw(ptr.cast::<u8>()) });
-            }
-            let wcp = worst_case_payload::<T>();
-            if self.is_oversized_shared(wcp) {
-                return self.alloc_oversized_shared_with(wcp, |mutator, chunk_ptr| {
-                    let ticket = mutator
-                        .try_alloc_uninit_with_drop::<T>()
-                        .expect("dedicated oversized chunk sized to fit one value + drop entry");
-                    let chunk_ref = acquire_shared_chunk_ref::<A>(chunk_ptr);
-                    let value = if zeroed {
-                        mem::MaybeUninit::<T>::zeroed()
-                    } else {
-                        mem::MaybeUninit::<T>::uninit()
-                    };
-                    let ptr = ticket.into_uninit_placeholder(value);
-                    let _ = chunk_ref.forget();
-                    // SAFETY: see the non-oversized branch above. The
-                    // temporary mutator's `Drop` publishes the drop-entry
-                    // count before this function returns, so `assume_init`
-                    // can locate the placeholder via the chunk header.
-                    unsafe { Arc::from_raw(ptr.cast::<u8>()) }
-                });
-            }
-            self.refill_shared(wcp)?;
-        }
-    }
-
-    /// Slice mirror of [`Self::impl_alloc_uninit_arc`], **for `T: Drop`
-    /// only**. Reserves a placeholder slice drop entry, fills the buffer
-    /// (uninitialized or zeroed) without committing, and publishes the
-    /// drop-entry count for a later
-    /// [`Arc::<[MaybeUninit<T>]>::assume_init`](crate::Arc).
-    #[inline]
-    pub(crate) fn impl_alloc_uninit_slice_arc<T>(&self, len: usize, zeroed: bool) -> Result<Arc<[mem::MaybeUninit<T>], A>, AllocError>
+    /// Cold oversized-`Arc` fallback for [`Self::impl_alloc_arc_with`].
+    #[cold]
+    #[inline(never)]
+    fn alloc_oversized_arc_with<T, F: FnOnce() -> T>(&self, wcp: usize, f: F) -> Result<Arc<T, A>, AllocError>
     where
         A: Send + Sync,
         T: Send + Sync,
     {
-        if const { mem::align_of::<T>() >= MAX_SMART_PTR_ALIGN } {
-            return Err(AllocError);
-        }
-        reject_uninit_slice_arc_too_long(len)?;
-        // Refill hint accounts for prefix + payload alignment slack +
-        // payload bytes + drop entry.
-        let min_payload = super::alloc_prefixed::worst_case_thin_slice_payload::<T>(len);
-        loop {
-            if let Some((uninit, chunk_ptr)) = self.try_reserve_shared_slice_with_drop::<T>(len) {
-                let chunk_ref = self.acquire_current_shared_chunk_ref(chunk_ptr);
-                let ptr = uninit.into_uninit_slice_placeholder(zeroed);
-                let _ = chunk_ref.forget();
-                self.current_shared().publish_drop_count();
-                // SAFETY: as in `impl_alloc_uninit_arc`; the placeholder slice
-                // drop entry is reserved and published for `assume_init`.
-                return Ok(unsafe { Arc::from_raw(ptr.cast::<u8>()) });
-            }
-            if self.is_oversized_shared(min_payload) {
-                return self.alloc_oversized_shared_with(min_payload, |mutator, chunk_ptr| {
-                    let ticket = mutator
-                        .try_alloc_uninit_slice_with_drop_prefixed::<T>(len)
-                        .expect("dedicated oversized chunk sized to fit slice + drop entry");
-                    let chunk_ref = acquire_shared_chunk_ref::<A>(chunk_ptr);
-                    let ptr = ticket.into_uninit_slice_placeholder(zeroed);
-                    let _ = chunk_ref.forget();
-                    // SAFETY: see the non-oversized branch above.
-                    unsafe { Arc::from_raw(ptr.cast::<u8>()) }
-                });
-            }
-            self.refill_shared(min_payload)?;
-        }
-    }
-}
-/// Reject slice-arc uninit requests whose `len > u16::MAX`: the chunk
-/// drop entry packs the element count into a `u16`, so a longer slice
-/// can never be encoded and the caller's refill loop would otherwise
-/// spin allocating chunks until OOM.
-#[cfg_attr(test, mutants::skip)] // see `alloc_slice_ref::reject_drop_slice_too_long`
-#[inline]
-fn reject_uninit_slice_arc_too_long(len: usize) -> Result<(), AllocError> {
-    if len > u16::MAX as usize {
-        return Err(AllocError);
+        let (mutator, chunk_ptr) = self.acquire_oversized_shared_mutator(wcp)?;
+        let (ticket, _chunk) = mutator
+            .try_alloc_arc_value::<T>()
+            .expect("dedicated oversized chunk sized to fit one Arc value + strong prefix");
+        let chunk_ref = acquire_shared_chunk_ref::<A>(chunk_ptr);
+        let ptr = init_smart_slot::<T, A, F>(ticket, chunk_ref, f);
+        drop(mutator);
+        // SAFETY: the strong prefix was written (count = 1) and the chunk
+        // holds a fresh +1 for this `Arc` family.
+        Ok(unsafe { Arc::from_raw(ptr.cast::<u8>()) })
     }
-    Ok(())
 }
 
 /// writes the value produced by `f` into the reservation. Factored out
@@ -771,20 +639,6 @@ fn init_smart_slot<T, A: Allocator + Clone, F: FnOnce() -> T>(uninit: Uninit<'_,
     uninit.init_raw(value)
 }
 
-/// Parallel to [`init_smart_slot`] but consumes a
-/// [`UninitDrop`](crate::internal::uninit::UninitDrop) ticket so the
-/// value's `Drop` runs from the chunk's drop-list at teardown.
-#[inline(always)]
-fn init_smart_slot_with_drop<T, A: Allocator + Clone, F: FnOnce() -> T>(
-    uninit: UninitDrop<'_, T>,
-    chunk_ref: ChunkRef<A>,
-    f: F,
-) -> NonNull<T> {
-    let value = f();
-    let _ = chunk_ref.forget();
-    uninit.init_raw(value)
-}
-
 /// Bumps the strong refcount on `chunk_ptr` and returns a
 /// [`ChunkRef`](crate::internal::chunk_ref::ChunkRef) that owns the
 /// fresh +1. Shared by [`Arena::init_box_slot`] and
diff --git a/crates/multitude/src/arena/mod.rs b/crates/multitude/src/arena/mod.rs
index f348407d0..b9af72dc2 100644
--- a/crates/multitude/src/arena/mod.rs
+++ b/crates/multitude/src/arena/mod.rs
@@ -319,26 +319,20 @@ impl<A: Allocator + Clone> Arena<A> {
         self.relocations.set(self.relocations.get() + 1);
     }
 
-    /// Reset the arena to a fresh state, ready for a new allocation phase.
+    /// Reset the arena's local-chunk state for a new allocation phase:
+    /// the current local chunk and all retired local chunks are released
+    /// (running any pending drop entries) and their bytes returned to the
+    /// chunk cache.
     ///
     /// Given that this takes `&mut self`, the borrow checker ensures no
-    /// outstanding simple references can still be live. Outstanding `Arc`s
-    /// from shared chunks continue to hold their backing chunks alive
-    /// independently.
-    ///
-    /// The reset is lazy: the current chunk slots are returned to the
-    /// empty state and a fresh chunk is acquired on the first subsequent
-    /// allocation, mirroring the lazy semantics of [`Self::new`].
+    /// outstanding simple references can still be live. The currently
+    /// installed shared chunk is **not** detached or rewound — shared
+    /// allocations continue on it — and outstanding `Arc`s from shared
+    /// chunks keep their backing chunks alive independently.
     #[cold]
     pub fn reset(&mut self) {
-        // Reconcile the surplus on the current shared chunk before
-        // the mutator's Drop fires its own dec_ref — keeps the
-        // chunk's atomic refcount in sync with the number of escaped
-        // handles.
-        self.reconcile_shared_surplus();
         self.retired_local.clear();
         *self.current_local.get_mut() = ChunkMutator::<LocalChunk<A>>::empty();
-        *self.current_shared.get_mut() = ChunkMutator::<SharedChunk<A>>::empty();
     }
 
     /// Returns a [`ZerocopyView`](crate::zerocopy::ZerocopyView)
@@ -381,24 +375,19 @@ impl<A: Allocator + Clone> Arena<A> {
         self.provider.config().max_normal_alloc()
     }
 
-    /// True iff a shared-chunk allocation request of `min_payload` bytes
-    /// must be routed to a one-shot oversized chunk instead of the normal
-    /// size-class pool. Callers that detect this case should use
-    /// [`Self::alloc_oversized_shared_with`] rather than
-    /// [`Self::refill_shared`].
+    /// True iff an allocation request of `min_payload` bytes must be routed
+    /// to a one-shot oversized chunk instead of the normal size-class pool.
+    /// Callers that detect this case should use the matching oversized path
+    /// ([`Self::alloc_oversized_shared_with`] /
+    /// [`Self::alloc_oversized_local_with`]) rather than the normal refill.
     ///
-    /// `ArenaBuilder` caps `max_normal_alloc` at `max_bump_extent`
-    /// (`MAX_CHUNK_BYTES - header_size`), so `min_payload <=
-    /// max_normal_alloc` always implies `header + min_payload <=
-    /// MAX_CHUNK_BYTES` — a single threshold check is enough.
+    /// The threshold is the same for local and shared chunks: `ArenaBuilder`
+    /// caps `max_normal_alloc` at `max_bump_extent` (`MAX_CHUNK_BYTES -
+    /// header_size`), so `min_payload <= max_normal_alloc` always implies
+    /// `header + min_payload <= MAX_CHUNK_BYTES` — a single threshold check
+    /// is enough for both flavors.
     #[inline]
-    pub(crate) fn is_oversized_shared(&self, min_payload: usize) -> bool {
-        min_payload > self.max_normal_alloc()
-    }
-
-    /// Local mirror of [`Self::is_oversized_shared`].
-    #[inline]
-    pub(crate) fn is_oversized_local(&self, min_payload: usize) -> bool {
+    pub(crate) fn is_oversized(&self, min_payload: usize) -> bool {
         min_payload > self.max_normal_alloc()
     }
 
@@ -444,7 +433,7 @@ impl<A: Allocator + Clone> Arena<A> {
     /// `min_payload` bytes. The previous mutator is dropped immediately —
     /// any outstanding `Arc`s independently keep the prior chunk alive.
     ///
-    /// The caller must have verified `!self.is_oversized_shared(min_payload)`
+    /// The caller must have verified `!self.is_oversized(min_payload)`
     /// before invoking this; oversized requests must go through
     /// [`Self::alloc_oversized_shared_with`] so they don't replace (and
     /// thus waste) the current chunk.
@@ -461,19 +450,15 @@ impl<A: Allocator + Clone> Arena<A> {
         // the replacement so a now-unreferenced chunk frees its bytes and
         // lets the new reservation reuse the budget.
         self.current_shared.drop_replace(ChunkMutator::<SharedChunk<A>>::empty());
-        // The previous `drop_replace` may have run user-supplied drop
-        // shims (chunk teardown). Those can re-enter the arena via
-        // `alloc_arc`/`alloc_box` which call `refill_shared`
-        // recursively and install a fresh chunk into `current_shared`.
-        // Honor that installation as-is: returning `Ok` lets the
-        // caller's retry loop re-attempt the allocation against the
-        // reentry-installed chunk. If it doesn't fit `min_payload`,
-        // the caller will simply call us again and we'll reconcile +
-        // replace that chunk in turn (its own `local_shared_count`
-        // already tracks any nested handouts).
-        if self.current_shared.borrow().chunk_ptr().is_some() {
-            return Ok(());
-        }
+        // Unlike `refill_local`, this `drop_replace` cannot re-enter the
+        // arena: shared chunks register no drop entries, and a refcount-zero
+        // shared chunk is cached (never deallocated) here, so its teardown
+        // runs no user code. `current_shared` is therefore always empty at
+        // this point.
+        debug_assert!(
+            self.current_shared.borrow().chunk_ptr().is_none(),
+            "shared drop_replace cannot install a chunk: shared teardown runs no user code",
+        );
         let new_chunk = self.provider.acquire_shared(min_payload, self.next_shared_class.get())?;
         // Pre-credit a large surplus of refs on the new chunk so the
         // per-allocation hot path can just bump a non-atomic local
diff --git a/crates/multitude/src/arena/reserve.rs b/crates/multitude/src/arena/reserve.rs
index 4a93d6f2f..43d1687d7 100644
--- a/crates/multitude/src/arena/reserve.rs
+++ b/crates/multitude/src/arena/reserve.rs
@@ -123,40 +123,14 @@ impl<A: Allocator + Clone> Arena<A> {
         Some(unsafe { (ticket.rebind(), mutator.chunk_ptr_unchecked()) })
     }
 
-    /// Try to reserve uninitialized storage for one `T` plus a drop
-    /// entry slot in the current shared chunk.
-    #[inline(always)]
-    #[cfg_attr(test, mutants::skip)] // see `try_reserve_shared`
-    pub(crate) fn try_reserve_shared_with_drop<T>(&self) -> Option<(UninitDrop<'_, T>, NonNull<SharedChunk<A>>)> {
-        let mutator = self.current_shared();
-        let ticket = mutator.try_alloc_uninit_with_drop::<T>()?;
-        // SAFETY: see `try_reserve_shared`.
-        Some(unsafe { (ticket.rebind(), mutator.chunk_ptr_unchecked()) })
-    }
-
     /// Try to reserve uninitialized storage for `len` consecutive `T`s
-    /// in the current shared chunk.
+    /// in the current shared chunk, taking the precomputed payload byte
+    /// size; the slice-copy fast paths hold an existing `&[T]` and
+    /// compute `size_of_val(src)` once outside the refill loop, sparing
+    /// the inner reservation a `checked_mul` overflow guard.
     ///
     /// Includes a thin-pointer DST length prefix immediately before
     /// the payload — see [`ChunkMutator::try_alloc_uninit_slice_prefixed`].
-    #[inline(always)]
-    #[cfg_attr(test, mutants::skip)] // see `try_reserve_shared`
-    #[allow(
-        clippy::type_complexity,
-        reason = "ticket + chunk-ptr tuple is the natural shape; type alias would obscure rather than clarify"
-    )]
-    pub(crate) fn try_reserve_shared_slice<T>(&self, len: usize) -> Option<(Uninit<'_, [T]>, NonNull<SharedChunk<A>>)> {
-        let mutator = self.current_shared();
-        let ticket = mutator.try_alloc_uninit_slice_prefixed::<T>(len)?;
-        // SAFETY: see `try_reserve_shared`.
-        Some(unsafe { (ticket.rebind(), mutator.chunk_ptr_unchecked()) })
-    }
-
-    /// Like [`Self::try_reserve_shared_slice`] but takes the precomputed
-    /// payload byte size; the slice-copy fast paths hold an existing
-    /// `&[T]` and compute `size_of_val(src)` once outside the refill
-    /// loop, sparing the inner reservation a `checked_mul` overflow
-    /// guard.
     ///
     /// # Safety
     ///
@@ -180,19 +154,51 @@ impl<A: Allocator + Clone> Arena<A> {
         Some(unsafe { (ticket.rebind(), mutator.chunk_ptr_unchecked()) })
     }
 
-    /// Try to reserve uninitialized storage for `len` consecutive `T`s
-    /// plus a drop entry slot in the current shared chunk. Includes a
-    /// thin-pointer DST length prefix immediately before the payload.
+    /// Try to reserve storage for one strong-prefixed `Arc<T>` value in
+    /// the current shared chunk. The returned ticket addresses the
+    /// payload (the strong count is already initialized to `1`).
+    #[inline(always)]
+    #[cfg_attr(test, mutants::skip)] // see `try_reserve_shared`
+    pub(crate) fn try_reserve_arc_value<T>(&self) -> Option<(Uninit<'_, T>, NonNull<SharedChunk<A>>)> {
+        let (ticket, chunk) = self.current_shared().try_alloc_arc_value::<T>()?;
+        // SAFETY: see `try_reserve_shared`.
+        Some(unsafe { (ticket.rebind(), chunk) })
+    }
+
+    /// Slice form of [`Self::try_reserve_arc_value`]: reserves a strong
+    /// prefix, slice-length metadata, and `len` `T`s.
     #[inline(always)]
     #[cfg_attr(test, mutants::skip)] // see `try_reserve_shared`
     #[allow(
         clippy::type_complexity,
         reason = "ticket + chunk-ptr tuple is the natural shape; type alias would obscure rather than clarify"
     )]
-    pub(crate) fn try_reserve_shared_slice_with_drop<T>(&self, len: usize) -> Option<(UninitDrop<'_, [T]>, NonNull<SharedChunk<A>>)> {
-        let mutator = self.current_shared();
-        let ticket = mutator.try_alloc_uninit_slice_with_drop_prefixed::<T>(len)?;
+    pub(crate) fn try_reserve_arc_slice<T>(&self, len: usize) -> Option<(Uninit<'_, [T]>, NonNull<SharedChunk<A>>)> {
+        let (ticket, chunk) = self.current_shared().try_alloc_arc_slice::<T>(len)?;
         // SAFETY: see `try_reserve_shared`.
-        Some(unsafe { (ticket.rebind(), mutator.chunk_ptr_unchecked()) })
+        Some(unsafe { (ticket.rebind(), chunk) })
+    }
+
+    /// Like [`Self::try_reserve_arc_slice`] but takes the precomputed
+    /// payload byte size (held by callers with a live `&[T]`).
+    ///
+    /// # Safety
+    ///
+    /// `payload_bytes` must equal `size_of::<T>() * len` (without overflow).
+    #[inline(always)]
+    #[cfg_attr(test, mutants::skip)] // see `try_reserve_shared`
+    #[allow(
+        clippy::type_complexity,
+        reason = "ticket + chunk-ptr tuple is the natural shape; type alias would obscure rather than clarify"
+    )]
+    pub(crate) unsafe fn try_reserve_arc_slice_with_size<T>(
+        &self,
+        len: usize,
+        payload_bytes: usize,
+    ) -> Option<(Uninit<'_, [T]>, NonNull<SharedChunk<A>>)> {
+        // SAFETY: forwarded to the caller.
+        let (ticket, chunk) = unsafe { self.current_shared().try_alloc_arc_slice_with_size::<T>(len, payload_bytes) }?;
+        // SAFETY: see `try_reserve_shared`.
+        Some(unsafe { (ticket.rebind(), chunk) })
     }
 }
diff --git a/crates/multitude/src/box.rs b/crates/multitude/src/box.rs
index e06c5f857..4956703fd 100644
--- a/crates/multitude/src/box.rs
+++ b/crates/multitude/src/box.rs
@@ -33,9 +33,10 @@ use crate::thin_smart_ptr_common::impl_thin_smart_ptr_common;
 ///
 /// Like [`Arc`](crate::Arc), `Box` keeps its containing chunk alive by
 /// holding a +1 refcount, so it can outlive the arena it came from and
-/// survives [`Arena::reset`](crate::Arena::reset). Unlike `Arc`, the
-/// `T` destructor runs eagerly when the `Box` itself is dropped
-/// (single owner), not at chunk teardown.
+/// survives [`Arena::reset`](crate::Arena::reset), and it runs `T`'s
+/// destructor eagerly — never deferred to chunk teardown. As the sole
+/// owner, `Box` drops `T` when the `Box` itself is dropped, whereas
+/// `Arc` drops `T` when its last clone is dropped.
 ///
 /// # `Send` and `Sync`
 ///
diff --git a/crates/multitude/src/internal/arena_buf.rs b/crates/multitude/src/internal/arena_buf.rs
index b5bb357d0..24f8a430e 100644
--- a/crates/multitude/src/internal/arena_buf.rs
+++ b/crates/multitude/src/internal/arena_buf.rs
@@ -3,16 +3,10 @@
 
 //! Growable, arena-backed buffer of `T`.
 //!
-//! `ArenaBuf<T>` is the internal storage primitive that backs the public
-//! `Vec<'a, T, A>`, `String<'a, A>`, and `Utf16String<'a, A>` types. It owns
-//! an in-chunk pointer plus a length and capacity, and exposes safe slice
-//! accessors. Growth (in-place when possible, copy-to-new-allocation
-//! otherwise) is mediated by [`ChunkMutator`](super::ChunkMutator) so this
-//! type stays free of allocator concerns.
-//!
-//! All `unsafe` related to the `(ptr, len, cap)` invariant of an
-//! arena-backed buffer lives in this file. Higher layers (`vec/*`,
-//! `strings/*`) compose `ArenaBuf` via its safe methods.
+//! Backing storage for `Vec<'a, T, A>`, `String<'a, A>`, and
+//! `Utf16String<'a, A>`. Growth is mediated by
+//! [`ChunkMutator`](super::chunk_mutator::ChunkMutator); this file owns the unsafe
+//! `(ptr, len, cap)` invariant.
 
 use core::iter::FusedIterator;
 use core::marker::PhantomData;
@@ -43,10 +37,8 @@ impl<T> ArenaBuf<'_, T> {
     ///
     /// # Safety
     ///
-    /// The `(ptr, len, cap)` triple must satisfy the type's invariants for
-    /// some live arena chunk that outlives `'a` — e.g. parts taken from
-    /// another `ArenaBuf` (possibly reinterpreted, as in
-    /// [`Vec::into_flattened`](crate::vec::Vec::into_flattened)).
+    /// `(ptr, len, cap)` must satisfy the type invariants for storage in a
+    /// live arena chunk that outlives `'a`.
     #[inline]
     pub(crate) const unsafe fn from_raw_parts(ptr: NonNull<T>, len: usize, cap: usize) -> Self {
         Self {
@@ -59,9 +51,8 @@ impl<T> ArenaBuf<'_, T> {
 }
 
 impl<'a, T> ArenaBuf<'a, T> {
-    /// Creates an empty buffer with no backing storage. ZSTs are
-    /// initialized with `cap = usize::MAX` since no real storage is
-    /// ever needed for them.
+    /// Creates an empty buffer. ZST buffers use `cap = usize::MAX`
+    /// because they need no backing storage.
     #[inline]
     pub(crate) const fn new() -> Self {
         let cap = if mem::size_of::<T>() == 0 { usize::MAX } else { 0 };
@@ -171,10 +162,8 @@ impl<'a, T> ArenaBuf<'a, T> {
         unsafe { self.replace_buffer_raw(new_ptr, new_cap) };
     }
 
-    /// Raw-pointer variant of [`Self::replace_buffer`]. Used by the
-    /// oversized-chunk growth path in [`crate::vec::Vec`], where the
-    /// fresh reservation comes from a temporary [`ChunkMutator`] whose
-    /// ticket lifetime can't be rebound to `'a` through the public API.
+    /// Raw-pointer variant of [`Self::replace_buffer`] for oversized
+    /// growth through a temporary [`ChunkMutator`](super::chunk_mutator::ChunkMutator).
     ///
     /// # Safety
     ///
@@ -329,12 +318,8 @@ impl<'a, T> ArenaBuf<'a, T> {
     /// Splits the buffer at `at`, keeping `[0, at)` in `self` and
     /// returning a new buffer that owns `[at, len)`.
     ///
-    /// The returned buffer shares the same chunk storage as `self`; no
-    /// elements are copied. After the split, `self`'s capacity is capped
-    /// at `at` (so a later push reallocates rather than overwriting the
-    /// tail), and the tail buffer covers the remaining capacity. This is
-    /// sound because chunk storage is reclaimed only at arena teardown,
-    /// which outlives both buffers (lifetime `'a`).
+    /// No elements are copied. `self.cap` is capped at `at`, and the returned
+    /// tail owns the remaining capacity in the same arena chunk.
     ///
     /// Caller must ensure `at <= len`.
     #[inline]
@@ -368,13 +353,9 @@ impl<'a, T> ArenaBuf<'a, T> {
     /// Attempts to absorb `other`'s storage in O(1) when it directly
     /// abuts the end of `self`'s storage in the same chunk.
     ///
-    /// Succeeds only when `self` is exactly full (`len == cap`, so there
-    /// is no uninitialized gap before `other`) and `other`'s buffer
-    /// begins exactly at `self`'s one-past-the-end address. On success,
-    /// `self` grows to cover `other`'s elements and capacity, and `other`
-    /// is reset to empty without dropping its elements (ownership moves
-    /// to `self`). Returns `false` (leaving both buffers untouched) when
-    /// the buffers are not adjacent. Not used for ZSTs.
+    /// Succeeds only when `self` is full and `other` starts at `self`'s
+    /// one-past-end address. On success, `self` owns both ranges and
+    /// `other` is reset to empty without dropping elements.
     #[inline]
     pub(crate) fn try_absorb_adjacent(&mut self, other: &mut Self) -> bool {
         debug_assert!(mem::size_of::<T>() != 0, "try_absorb_adjacent: not for ZSTs");
@@ -382,15 +363,8 @@ impl<'a, T> ArenaBuf<'a, T> {
             return false;
         }
         let self_end = self.ptr.as_ptr().wrapping_add(self.cap);
-        // The exact pointer-equality test below is also a proof that `other`
-        // lives in the *same chunk* as `self` (so `self.ptr`'s chunk-wide
-        // provenance legitimately covers the absorbed region). A distinct
-        // chunk's payload always begins `header_size > 0` bytes after its
-        // base, and chunk allocations never overlap, so a buffer in another
-        // chunk can never start exactly at `self`'s one-past-the-end address:
-        // that would require the other chunk's base to fall *inside* `self`'s
-        // chunk. Hence `ptr::eq(self_end, other.ptr)` can only hold when both
-        // buffers were carved from one chunk's bump region.
+        // Pointer equality proves same-chunk adjacency: another chunk's
+        // payload cannot begin exactly at this chunk's one-past-end address.
         if !ptr::eq(self_end.cast_const(), other.ptr.as_ptr().cast_const()) {
             return false;
         }
@@ -433,25 +407,14 @@ impl<'a, T> ArenaBuf<'a, T> {
         }
     }
 
-    /// Returns an owning, double-ended iterator that yields the live
-    /// elements in order, leaving the buffer empty. The iterator's
-    /// `Drop` drops any elements that were not yielded. The iterator
-    /// is bound to the arena lifetime `'a` of the buffer.
+    /// Returns an owning iterator over the live elements and leaves the
+    /// buffer empty. Dropping the iterator drops any unyielded elements.
     ///
     /// # Caller contract
     ///
-    /// The returned [`DrainAll`] is deliberately bound to the arena
-    /// lifetime `'a` rather than to the `&mut self` borrow, so that an
-    /// owning [`IntoIter`](crate::vec::IntoIter) can hold it past the
-    /// `ManuallyDrop<Vec>` that produced it. Because the borrow checker
-    /// therefore does **not** tie the iterator to this buffer, the
-    /// caller MUST NOT touch `self` (push, grow, drain again, drop the
-    /// elements, etc.) until the returned iterator has been fully
-    /// consumed or dropped: the iterator keeps a *copy* of `self.ptr`
-    /// and still logically owns `[0, len)`, so any concurrent write or
-    /// re-read of those slots would alias and double-own the elements.
-    /// All current callers consume the iterator immediately and never
-    /// reuse the buffer afterwards.
+    /// [`DrainAll`] is bound to arena lifetime `'a`, not the `&mut self`
+    /// borrow. The caller must not touch `self` until the iterator is
+    /// consumed or dropped, because the iterator owns `[0, len)`.
     #[inline]
     pub(crate) fn drain_all(&mut self) -> DrainAll<'a, T> {
         let len = self.len;
@@ -467,9 +430,7 @@ impl<'a, T> ArenaBuf<'a, T> {
     }
 }
 
-/// Owning iterator over every element of an [`ArenaBuf`], in order.
-/// Bound to the arena lifetime `'a` rather than to the buffer that
-/// produced it, so the iterator can outlive the `ArenaBuf`.
+/// Owning iterator over an [`ArenaBuf`]'s live elements.
 pub(crate) struct DrainAll<'a, T> {
     ptr: NonNull<T>,
     head: usize,
diff --git a/crates/multitude/src/internal/chunk.rs b/crates/multitude/src/internal/chunk.rs
index 9ae7a4636..59245c516 100644
--- a/crates/multitude/src/internal/chunk.rs
+++ b/crates/multitude/src/internal/chunk.rs
@@ -5,18 +5,9 @@
 
 /// A contiguous block of memory that an arena carves bump allocations out of.
 ///
-/// Both [`LocalChunk`](super::LocalChunk) and [`SharedChunk`](super::SharedChunk)
-/// implement this trait. They differ in how the chunk and its allocations are
-/// owned and shared:
-///
-/// - `LocalChunk` is used for allocations whose lifetime is tied to the arena
-///   itself and never crosses thread boundaries; no synchronization is needed.
-/// - `SharedChunk` is used for allocations whose lifetime can outlive the
-///   arena (reference-counted handles), and uses atomics for cross-thread
-///   refcounting.
-///
-/// Implementors are dynamically-sized types: the struct ends with a `[u8]`
-/// payload that holds the actual bump-allocation buffer.
+/// Implemented by [`LocalChunk`](super::local_chunk::LocalChunk) and
+/// [`SharedChunk`](super::shared_chunk::SharedChunk). Both are DSTs with a payload tail;
+/// local chunks are arena-thread confined, shared chunks use atomic refcounts.
 pub(crate) trait Chunk {
     /// Returns the chunk's payload capacity in bytes (i.e. `data.len()`).
     fn capacity(&self) -> usize;
@@ -33,12 +24,4 @@ pub(crate) trait Chunk {
     /// responsible for tearing down the chunk (running drop entries and
     /// routing the backing memory back to the provider or deallocator).
     fn dec_ref(&self) -> bool;
-
-    /// Returns the number of drop entries currently stored at the tail of the
-    /// chunk.
-    fn drop_entry_count(&self) -> usize;
-
-    /// Sets the number of drop entries currently stored at the tail of the
-    /// chunk.
-    fn set_drop_entry_count(&self, count: usize);
 }
diff --git a/crates/multitude/src/internal/chunk_alloc.rs b/crates/multitude/src/internal/chunk_alloc.rs
index 6830ebe32..3bb196eea 100644
--- a/crates/multitude/src/internal/chunk_alloc.rs
+++ b/crates/multitude/src/internal/chunk_alloc.rs
@@ -1,36 +1,24 @@
 // Copyright (c) Microsoft Corporation.
 // Licensed under the MIT License.
 
-//! Shared raw-allocation helpers used by `LocalChunk::allocate` and
-//! `SharedChunk::allocate`. Both build a `header + payload_size` byte
-//! allocation aligned for the chunk header, then write fields through a
-//! freshly-constructed fat DST pointer.
+//! Shared raw-allocation helpers for chunk `allocate` / `destroy` paths.
+//! They centralize layout size and alignment.
 
 use core::alloc::Layout;
 use core::ptr::NonNull;
 
 use allocator_api2::alloc::{AllocError, Allocator};
 
-/// Computes the canonical `Layout` for a chunk allocation, the single
-/// source of truth shared by every `allocate`/`destroy` pair so the two
-/// can never disagree (a mismatched `deallocate` layout is UB).
+/// Computes the canonical `Layout` for a chunk allocation.
 ///
-/// Two *distinct* alignments are at play and must not be conflated:
+/// Two alignments are distinct:
 ///
 /// * `value_align` — the chunk type's own alignment (`align_of::<Self>()`,
-///   ignoring the `[UnsafeCell<u8>]` tail which is align-1). Rust rounds
-///   the size of any value up to a multiple of its alignment, so a
-///   reference built from the fat pointer covers `round_up(total,
-///   value_align)` bytes. The allocation's **size** is rounded up to this
-///   so the reference's footprint matches the allocation exactly (a
-///   shortfall is UB, caught by Miri; an excess silently over-allocates).
+///   ignoring the align-1 tail). The allocation size is rounded up to this.
 ///
 /// * `base_align` — the alignment of the allocation's **base address**,
-///   which may be far larger than `value_align` (e.g. `CHUNK_ALIGN` =
-///   64 KiB for shared chunks, so the chunk header is recoverable by
-///   masking the low bits of any interior pointer). This governs only the
-///   `Layout` alignment; the **size is never rounded up to it**, otherwise
-///   every shared chunk would inflate to a full `CHUNK_ALIGN`.
+///   which may be much larger for shared chunks. This governs only
+///   `Layout::align`; the size is not rounded up to it.
 ///
 /// `base_align >= value_align` and both must be powers of two.
 #[allow(
@@ -47,13 +35,9 @@ pub(crate) fn chunk_layout(header_size: usize, payload_size: usize, value_align:
     Layout::from_size_align(rounded, base_align).map_err(|_| AllocError)
 }
 
-/// The exact byte footprint of a chunk allocation — the rounded
-/// `Layout::size()` that [`chunk_layout`] produces. This is the single
-/// source of truth for both the allocation `Layout` and the provider's
-/// byte-budget accounting, so the two can never disagree (accounting that
-/// used the unrounded `header_size + payload_size` would under-report the
-/// real allocator footprint when `header_size + payload_size` is not a
-/// multiple of `value_align`, e.g. for oversized chunks).
+/// Exact byte footprint of a chunk allocation: the rounded `Layout::size()`
+/// produced by [`chunk_layout`]. Used for both allocation and byte-budget
+/// accounting.
 #[inline]
 pub(crate) fn chunk_alloc_size(header_size: usize, payload_size: usize, value_align: usize) -> Result<usize, AllocError> {
     debug_assert!(value_align.is_power_of_two(), "value_align must be a power of two");
@@ -62,15 +46,10 @@ pub(crate) fn chunk_alloc_size(header_size: usize, payload_size: usize, value_al
     Ok(total.checked_add(mask).ok_or(AllocError)? & !mask)
 }
 
-/// Allocate a `header + payload_size` byte allocation whose base address
-/// is `base_align`-aligned and whose size is rounded up to `value_align`
-/// (see [`chunk_layout`]).
+/// Allocates a chunk backing allocation using [`chunk_layout`].
 ///
-/// Returns `(raw_u8_ptr, layout)` on success. The pointer carries
-/// provenance over the full allocation and is suitable as the data field
-/// of a slice-DST fat pointer with metadata `payload_size`. The layout is
-/// the exact one passed to `allocator.allocate`, suitable for a matching
-/// `deallocate` call (reproduced by [`chunk_layout`] at destroy time).
+/// Returns `(raw_u8_ptr, layout)`. The pointer covers the full allocation and
+/// can be used as the data field of a slice-DST fat pointer.
 ///
 /// On size-overflow or end-of-address-space overflow, the allocation is
 /// freed and `AllocError` is returned.
@@ -103,18 +82,12 @@ pub(crate) fn alloc_chunk_raw<A: Allocator>(
 mod tests {
     use super::chunk_layout;
 
-    /// `chunk_layout` must round the allocation *size* up to `value_align`.
-    /// Pins the exact round-up so the `value_align - 1` mask can't be
-    /// mutated to `value_align + 1` or `value_align / 1` — both corrupt the
-    /// rounding for totals that aren't already `value_align`-aligned (the
-    /// size-class tests use pre-aligned totals, so they can't catch this).
+    /// `chunk_layout` must round allocation size up to `value_align`.
     #[test]
     fn rounds_size_up_to_value_align() {
-        // A large power-of-two base (mirrors shared chunks); it governs the
-        // layout *alignment* only and must not affect the size rounding.
+        // Large base alignment must not affect size rounding.
         const BASE: usize = 65_536;
-        // (header, payload, value_align, expected_size). Totals are chosen
-        // to be NON-multiples of `value_align` so the mask actually rounds.
+        // Non-multiple totals force the rounding mask to matter.
         let cases = [
             (10_usize, 7_usize, 8_usize, 24_usize), // total 17 -> 24
             (34, 16, 8, 56),                        // total 50 -> 56
diff --git a/crates/multitude/src/internal/chunk_mutator.rs b/crates/multitude/src/internal/chunk_mutator.rs
index 365e42bb7..473a72304 100644
--- a/crates/multitude/src/internal/chunk_mutator.rs
+++ b/crates/multitude/src/internal/chunk_mutator.rs
@@ -3,14 +3,10 @@
 
 //! Bump allocator over a single chunk.
 //!
-//! [`ChunkMutator<C>`] owns one strong reference to a chunk and exposes safe
-//! allocation primitives that hand out [`InChunk`] pointers, [`Uninit`]
-//! tickets, and [`UninitDrop`] tickets. All `unsafe` interaction with the
-//! chunk's raw memory is concentrated here.
-//!
-//! When the mutator is dropped it decrements the chunk's refcount; if that
-//! drops the count to zero it replays pending drop entries and routes the
-//! chunk back through [`ChunkOps::teardown_and_release`].
+//! [`ChunkMutator<C>`] owns one strong chunk reference and hands out
+//! [`InChunk`], [`Uninit`], and [`UninitDrop`] tickets. Drop publishes pending
+//! drop entries, releases the refcount, and may trigger
+//! [`ChunkOps::teardown_and_release`].
 
 use core::cell::Cell;
 use core::ptr::{self, NonNull};
@@ -24,37 +20,21 @@ use super::uninit::{Uninit, UninitDrop};
 /// Owns one strong reference to a chunk and tracks the bump cursor and the
 /// growing-down drop-entry top.
 ///
-/// Hot-path layout is intentionally minimal: only `chunk`, `bump`, and
-/// `drop_top` are stored. The payload start/end addresses are re-derived
-/// from `chunk` in the cold paths that need them (capacity reporting,
-/// drop-publish on `Drop`, drop-entry rollback, value-offset encoding for
-/// drop-requiring types).
+/// Hot-path layout stores only `chunk`, `bump`, and `drop_top`; cold paths
+/// re-derive payload bounds from `chunk`.
 pub(crate) struct ChunkMutator<C: ?Sized + ChunkOps> {
     chunk: Option<NonNull<C>>,
-    /// Bump cursor stored as a pointer so that derivations preserve
-    /// the chunk's full provenance under Stacked / Tree Borrows.
-    /// Storing it as a `usize` and recovering the pointer via
-    /// `addr as *mut u8` would produce a no-provenance pointer that
-    /// would fail Miri whenever a derived value pointer is later read
-    /// back (e.g., during drop-entry replay).
+    /// Bump cursor stored as a pointer to preserve chunk provenance under
+    /// Stacked / Tree Borrows.
     bump: Cell<NonNull<u8>>,
     /// Top of the drop-entry region (entries grow downward). Same
     /// pointer-preserves-provenance rationale as `bump`.
     drop_top: Cell<NonNull<u8>>,
 }
 
-// SAFETY: `ChunkMutator` owns one strong refcount on its chunk and
-// accesses its payload via interior-mutable cells; the underlying
-// `NonNull<C>` is the only field that prevents auto-derivation of
-// `Send`. Both implementors of `ChunkOps` (`LocalChunk`, `SharedChunk`)
-// support cross-thread ownership transfer of a single owning reference
-// (atomic refcount for shared, single-thread invariant for local that
-// follows the owning thread). The `?Sized` bound matters: both chunk
-// types are DSTs with a `[UnsafeCell<u8>]` tail, so a `C: Sized` bound
-// would exclude every real instantiation and silently break
-// `Arena: Send` (because the `Send` impl wouldn't apply). `Sync` is
-// intentionally NOT implemented: the `Cell` fields make the mutator
-// unsuitable for concurrent shared access.
+// SAFETY: the mutator owns one strong chunk ref and moves that ownership
+// across threads only when `C: Send`. `LocalChunk` follows the owning thread;
+// `SharedChunk` uses atomics. The `Cell` fields intentionally make this `!Sync`.
 unsafe impl<C: ?Sized + ChunkOps + Send> Send for ChunkMutator<C> {}
 
 impl<C: ?Sized + ChunkOps> ChunkMutator<C> {
@@ -82,17 +62,13 @@ impl<C: ?Sized + ChunkOps> ChunkMutator<C> {
         }
     }
 
-    /// Builds an empty mutator that owns no chunk. Every `try_alloc*`
-    /// returns `None`, so the arena's hot path falls through to a
-    /// `refill_*` call that installs a real chunk. Used to defer the
-    /// first chunk allocation until the first user-visible alloc.
+    /// Builds an empty mutator. Every `try_alloc*` returns `None`, deferring
+    /// chunk allocation until the first user-visible allocation.
     pub(crate) const fn empty() -> Self {
         Self {
             chunk: None,
-            // Sentinels: `bump > drop_top` so every `try_alloc*` falls
-            // through to the refill path via the bound check without
-            // any explicit `self.chunk?` test. Both `dangling()` values
-            // are non-null, fit in `isize`, and are never dereferenced.
+            // Sentinels: `bump > drop_top`, so bound checks fail without an
+            // explicit `self.chunk?`. These pointers are never dereferenced.
             bump: Cell::new(NonNull::<u16>::dangling().cast::<u8>()),
             drop_top: Cell::new(NonNull::<u8>::dangling()),
         }
@@ -110,12 +86,8 @@ impl<C: ?Sized + ChunkOps> ChunkMutator<C> {
     }
 
     /// Free byte count between the bump cursor and the drop-entry top.
-    /// Used by stats accounting at retire (`ChunkMutator::Drop` and
-    /// `ChunkMutator::forget_into_chunk`) and by `Arena::stats` to fold
-    /// the currently-active chunks' unused tails into
-    /// `ArenaStats::wasted_tail_bytes`. The empty-mutator sentinel
-    /// returns 0 (saturating). The value is reported as `u32` since
-    /// chunk capacity is bounded well below `u32::MAX`.
+    /// Stats helper; empty-mutator sentinels saturate to 0. Reported as `u32`
+    /// because chunk capacity is far below `u32::MAX`.
     #[cfg(feature = "stats")]
     #[inline]
     pub(crate) fn wasted_tail_for_stats(&self) -> u32 {
@@ -128,9 +100,8 @@ impl<C: ?Sized + ChunkOps> ChunkMutator<C> {
     ///
     /// # Panics
     ///
-    /// Panics on the empty mutator. Only the dead-code `capacity` /
-    /// `free_bytes` helpers can hit that path; all hot-path callers
-    /// invoke this after a successful `try_reserve_*`.
+    /// Panics on the empty mutator; hot-path callers invoke this only after a
+    /// successful reservation.
     #[inline]
     fn payload_range(&self) -> (usize, usize) {
         let chunk = self.chunk.expect("payload_range: chunk must be set");
@@ -150,11 +121,17 @@ impl<C: ?Sized + ChunkOps> ChunkMutator<C> {
         // SAFETY: caller asserts `chunk` is live.
         let (start, cap) = unsafe { (C::payload_ptr(chunk), chunk.as_ref().capacity()) };
         let start_addr = start.as_ptr() as usize;
-        // Align the reported end down to `align_of::<DropEntry>()` so the
-        // drop-entry region (entries grow down from this point) stays
-        // naturally aligned regardless of payload-start alignment.
-        let entry_align = mem::align_of::<DropEntry>();
-        let end_addr = (start_addr + cap) & !(entry_align - 1);
+        let end_addr = if C::REGISTERS_DROPS {
+            // Align the reported end down to `align_of::<DropEntry>()` so the
+            // drop-entry region (entries grow down from this point) stays
+            // naturally aligned regardless of payload-start alignment.
+            let entry_align = mem::align_of::<DropEntry>();
+            (start_addr + cap) & !(entry_align - 1)
+        } else {
+            // Flavors that never register drop entries let the bump cursor use
+            // the whole payload — no tail region is reserved.
+            start_addr + cap
+        };
         (start_addr, end_addr)
     }
 
@@ -186,11 +163,9 @@ impl<C: ?Sized + ChunkOps> ChunkMutator<C> {
     ///
     /// # Overflow safety
     ///
-    /// `cur_addr` is asserted to fit in `isize` so the alignment math
-    /// has no overflow guard; chunks live in the lower half of the
-    /// address space on every realistic 64-bit platform. `size` is *not*
-    /// constrained, so the `aligned_addr + size` step uses `checked_add`
-    /// to refuse oversized requests.
+    /// On 64-bit targets `cur_addr` is asserted to fit in `isize`, allowing
+    /// overflow-free alignment math. `aligned_addr + size` still uses
+    /// `checked_add` because `size` is caller-controlled.
     #[inline]
     // Mutation testing is suppressed: any mutation that always rejects
     // sends callers into an infinite refill spin (OOM).
@@ -204,12 +179,8 @@ impl<C: ?Sized + ChunkOps> ChunkMutator<C> {
         // SAFETY: see the overflow-safety note above.
         unsafe {
             hint::assert_unchecked(cur_addr > 0);
-            // On 64-bit targets every valid address is far below
-            // `isize::MAX`, so this also holds and lets the optimizer treat
-            // the align-up below as overflow-free. It is only asserted where
-            // guaranteed: on a target where an address may exceed
-            // `isize::MAX` (e.g. 32-bit upper half) the assertion could be
-            // false (→ UB), so we drop the hint and use checked arithmetic.
+            // On 64-bit targets this lets the optimizer treat align-up as
+            // overflow-free. Narrower targets use checked arithmetic.
             #[cfg(target_pointer_width = "64")]
             hint::assert_unchecked(isize::try_from(cur_addr).is_ok());
         }
@@ -217,16 +188,9 @@ impl<C: ?Sized + ChunkOps> ChunkMutator<C> {
         let aligned_addr = (cur_addr + (align - 1)) & !(align - 1);
         #[cfg(not(target_pointer_width = "64"))]
         let aligned_addr = (cur_addr.checked_add(align - 1)?) & !(align - 1);
-        // For zero-size allocations, probe one extra byte: a ZST alloc
-        // at the chunk tail (`cur_addr == drop_top_addr`) would otherwise
-        // return a value pointer equal to `chunk_base + CHUNK_ALIGN` for
-        // the largest chunk class, which masks to the next 64 KiB tile
-        // and breaks the smart-pointer chunk-header recovery. Non-zero-
-        // size allocs already satisfy this because the last payload byte
-        // sits at `end - 1`, strictly inside `[chunk_base, drop_top)`.
-        // `size.max(1)` is a branchless CMOV; the checked_add cannot
-        // overflow under the `cur_addr + (align - 1)` debug assertion
-        // above.
+        // ZST smart-pointer values must still point strictly inside the chunk;
+        // a tail one-past pointer would mask to the next 64 KiB tile.
+        // `size.max(1)` probes that byte without changing the ZST bump.
         let probe_end = aligned_addr.checked_add(size.max(1))?;
         if probe_end > drop_top_addr {
             return None;
@@ -262,29 +226,6 @@ impl<C: ?Sized + ChunkOps> ChunkMutator<C> {
         Some((in_chunk, unsafe { self.chunk_ptr_unchecked() }))
     }
 
-    /// [`Self::try_alloc_thin_dst_smart`] paired with the owning chunk
-    /// pointer. See [`Self::try_alloc_with_chunk`].
-    #[inline]
-    #[cfg_attr(test, mutants::skip)] // see `try_alloc`
-    #[allow(
-        clippy::type_complexity,
-        reason = "matches try_alloc_thin_dst_smart's shape plus the chunk pointer"
-    )]
-    #[cfg(feature = "dst")]
-    pub(crate) fn try_alloc_thin_dst_smart_with_chunk(
-        &self,
-        total: usize,
-        align: usize,
-        payload_offset: usize,
-        needs_drop: bool,
-        metadata_u16: u16,
-    ) -> Option<(InChunk<u8>, Option<InChunk<DropEntry>>, NonNull<C>)> {
-        let (base, drop_slot) = self.try_alloc_thin_dst_smart(total, align, payload_offset, needs_drop, metadata_u16)?;
-        // SAFETY: a successful reservation proves the mutator owns a
-        // chunk.
-        Some((base, drop_slot, unsafe { self.chunk_ptr_unchecked() }))
-    }
-
     /// Byte-slice fast path: skips the alignment mask, `checked_mul`,
     /// and ZST branch. Only valid for `T = u8` (align 1, size 1).
     #[inline]
@@ -312,16 +253,11 @@ impl<C: ?Sized + ChunkOps> ChunkMutator<C> {
         Some(Uninit::new(bytes.into_slice::<T>(len)))
     }
 
-    /// Like [`Self::try_alloc_uninit_slice`] but takes the precomputed
-    /// byte size, skipping the `size_of::<T>().checked_mul(len)`
-    /// overflow guard.
+    /// Like [`Self::try_alloc_uninit_slice`] with a precomputed byte size.
     ///
     /// # Safety
     ///
-    /// `size` must equal `size_of::<T>() * len` (without overflow).
-    /// Callers holding an existing `&[T]` satisfy this trivially via
-    /// [`core::mem::size_of_val`] (which is an unchecked intrinsic
-    /// guaranteed not to overflow for any live slice).
+    /// `size` must equal `size_of::<T>() * len` without overflow.
     #[cfg_attr(test, mutants::skip)] // see `try_alloc`: body→None ⇒ refill spin
     pub(crate) unsafe fn try_alloc_uninit_slice_with_size<T>(&self, len: usize, size: usize) -> Option<Uninit<'_, [T]>> {
         debug_assert_eq!(size, mem::size_of::<T>().wrapping_mul(len));
@@ -352,15 +288,12 @@ impl<C: ?Sized + ChunkOps> ChunkMutator<C> {
         Some(Uninit::new(payload))
     }
 
-    /// Like [`Self::try_alloc_uninit_slice_prefixed`] but takes the
-    /// precomputed payload byte size, skipping the
-    /// `size_of::<T>().checked_mul(len)` overflow guard.
+    /// Like [`Self::try_alloc_uninit_slice_prefixed`] with a precomputed
+    /// payload byte size.
     ///
     /// # Safety
     ///
-    /// `payload_bytes` must equal `size_of::<T>() * len` (without
-    /// overflow). Callers holding an existing `&[T]` satisfy this via
-    /// [`core::mem::size_of_val`].
+    /// `payload_bytes` must equal `size_of::<T>() * len` without overflow.
     #[cfg_attr(test, mutants::skip)] // see `try_alloc`
     #[allow(
         clippy::cast_ptr_alignment,
@@ -372,9 +305,114 @@ impl<C: ?Sized + ChunkOps> ChunkMutator<C> {
         Some(Uninit::new(payload))
     }
 
-    /// Layout + alloc + prefix-write for "thin DST slice" reservations.
-    /// On success returns the payload ticket and the absolute payload
-    /// address (used by drop-tracked callers to encode `value_offset`).
+    /// Reserve storage for one `Arc<T>`-style value with a leading
+    /// per-`Arc` strong reference count.
+    ///
+    /// Layout: `[strong][pad][metadata][payload]`. Initializes strong count
+    /// to 1 and returns the payload pointer.
+    ///
+    /// `payload_bytes` is floored to 1 so the value pointer stays inside the
+    /// chunk and preserves header recovery by mask.
+    #[inline]
+    #[cfg_attr(test, mutants::skip)] // see `try_alloc`: body→None ⇒ refill spin
+    #[allow(
+        clippy::cast_ptr_alignment,
+        reason = "reservation is aligned to >= STRONG_ALIGN, so the leading strong slot is aligned for AtomicU32"
+    )]
+    fn try_alloc_arc_prefixed(&self, payload_bytes: usize, value_align: usize, meta_bytes: usize) -> Option<NonNull<u8>> {
+        use super::thin_dst::{arc_block_align, strong_prefix_bytes_for};
+        let prefix = strong_prefix_bytes_for(value_align, meta_bytes);
+        let total = prefix.checked_add(payload_bytes.max(1))?;
+        let base = self.try_alloc(total, arc_block_align(value_align))?;
+        // SAFETY: `base` is aligned to `arc_block_align(value_align)` (>=
+        // STRONG_ALIGN), so the leading `AtomicU32` write is aligned and
+        // in chunk provenance; `base + prefix` is `value_align`-aligned
+        // and stays within the reservation.
+        unsafe {
+            base.as_ptr()
+                .cast::<core::sync::atomic::AtomicU32>()
+                .write(core::sync::atomic::AtomicU32::new(1));
+            Some(NonNull::new_unchecked(base.as_ptr().add(prefix)))
+        }
+    }
+
+    /// [`Self::try_alloc_arc_prefixed`] plus the owning chunk pointer.
+    #[inline]
+    #[cfg_attr(test, mutants::skip)] // see `try_alloc`: body→None ⇒ refill spin
+    pub(crate) fn try_alloc_arc_value<T>(&self) -> Option<(Uninit<'_, T>, NonNull<C>)> {
+        let value_ptr = self.try_alloc_arc_prefixed(mem::size_of::<T>(), mem::align_of::<T>(), 0)?;
+        // SAFETY: a successful reservation proves the mutator owns a chunk.
+        Some((Uninit::new(InChunk::from_raw(value_ptr).cast::<T>()), unsafe {
+            self.chunk_ptr_unchecked()
+        }))
+    }
+
+    /// Slice form of [`Self::try_alloc_arc_value`], including the strong
+    /// prefix and slice-length metadata word.
+    #[inline]
+    #[cfg_attr(test, mutants::skip)] // see `try_alloc`: body→None ⇒ refill spin
+    #[allow(
+        clippy::type_complexity,
+        reason = "ticket + chunk-ptr tuple is the natural shape; type alias would obscure rather than clarify"
+    )]
+    pub(crate) fn try_alloc_arc_slice<T>(&self, len: usize) -> Option<(Uninit<'_, [T]>, NonNull<C>)> {
+        let payload_bytes = mem::size_of::<T>().checked_mul(len)?;
+        // SAFETY: `payload_bytes == size_of::<T>() * len` (just checked).
+        unsafe { self.try_alloc_arc_slice_with_size::<T>(len, payload_bytes) }
+    }
+
+    /// Like [`Self::try_alloc_arc_slice`] with a precomputed payload byte size.
+    ///
+    /// # Safety
+    ///
+    /// `payload_bytes` must equal `size_of::<T>() * len` (without overflow).
+    #[inline]
+    #[cfg_attr(test, mutants::skip)] // see `try_alloc`: body→None ⇒ refill spin
+    #[allow(
+        clippy::type_complexity,
+        reason = "ticket + chunk-ptr tuple is the natural shape; type alias would obscure rather than clarify"
+    )]
+    #[allow(
+        clippy::cast_ptr_alignment,
+        reason = "slice-length metadata is written/read unaligned immediately before the payload"
+    )]
+    pub(crate) unsafe fn try_alloc_arc_slice_with_size<T>(
+        &self,
+        len: usize,
+        payload_bytes: usize,
+    ) -> Option<(Uninit<'_, [T]>, NonNull<C>)> {
+        debug_assert_eq!(payload_bytes, mem::size_of::<T>().wrapping_mul(len));
+        let value_ptr = self.try_alloc_arc_prefixed(payload_bytes, mem::align_of::<T>(), mem::size_of::<usize>())?;
+        // SAFETY: the reservation placed `size_of::<usize>()` metadata
+        // bytes immediately before the payload; `write_unaligned`
+        // tolerates any alignment.
+        unsafe {
+            ptr::write_unaligned(value_ptr.as_ptr().sub(mem::size_of::<usize>()).cast::<usize>(), len);
+        }
+        // SAFETY: a successful reservation proves the mutator owns a chunk.
+        Some((Uninit::new(InChunk::from_raw(value_ptr).into_slice::<T>(len)), unsafe {
+            self.chunk_ptr_unchecked()
+        }))
+    }
+
+    /// DST form of [`Self::try_alloc_arc_value`]. The caller writes metadata
+    /// before the returned value pointer and initializes the payload.
+    #[inline]
+    #[cfg_attr(test, mutants::skip)] // see `try_alloc`: body→None ⇒ refill spin
+    #[cfg(feature = "dst")]
+    pub(crate) fn try_alloc_arc_dst(
+        &self,
+        payload_bytes: usize,
+        value_align: usize,
+        meta_bytes: usize,
+    ) -> Option<(NonNull<u8>, NonNull<C>)> {
+        let value_ptr = self.try_alloc_arc_prefixed(payload_bytes, value_align, meta_bytes)?;
+        // SAFETY: a successful reservation proves the mutator owns a chunk.
+        Some((value_ptr, unsafe { self.chunk_ptr_unchecked() }))
+    }
+
+    /// Thin-DST slice reservation; returns the payload ticket and absolute
+    /// payload address for drop-entry `value_offset` encoding.
     #[inline]
     #[cfg_attr(test, mutants::skip)] // see `try_alloc`
     fn try_alloc_prefixed_slice_payload<T>(&self, len: usize) -> Option<(InChunk<[T]>, usize)> {
@@ -404,11 +442,8 @@ impl<C: ?Sized + ChunkOps> ChunkMutator<C> {
         // `prefix_size`. Both values are powers of two so `max` gives
         // the right answer.
         let payload_offset = prefix_size.max(elem_align);
-        // Floor the payload byte count to 1 so the returned payload
-        // pointer is strictly less than the reservation's end. Without
-        // this, an empty slice (`len == 0` or ZST element) at the chunk
-        // tail could return a payload pointer at `chunk_base +
-        // CHUNK_ALIGN`, masking to the wrong tile on smart-pointer Drop.
+        // Empty slices/ZSTs still need an in-chunk payload address for
+        // smart-pointer header recovery.
         let payload_bytes = payload_bytes.max(1);
         let total = payload_offset.checked_add(payload_bytes)?;
         let base_in_chunk = self.try_alloc(total, elem_align.max(1))?;
@@ -458,11 +493,8 @@ impl<C: ?Sized + ChunkOps> ChunkMutator<C> {
     #[cfg_attr(test, mutants::skip)] // see `try_alloc`: body→None ⇒ refill spin
     pub(crate) fn try_alloc_uninit_slice_with_drop<T>(&self, len: usize) -> Option<UninitDrop<'_, [T]>> {
         let size = mem::size_of::<T>().checked_mul(len)?;
-        // The drop entry encodes the element count in a `u16`; reject longer
-        // slices up front, before committing any reservation, so we never
-        // leave a counted-but-uninitialized drop slot behind. Callers also
-        // guard this earlier (see `alloc_slice_*` layout checks) to convert
-        // it into a clean `AllocError`/panic rather than a refill spin.
+        // Drop entries store length as `u16`; reject larger slices before any
+        // reservation is committed.
         let len_u16 = u16::try_from(len).ok()?;
         let drop_slot = self.try_reserve_drop_entry()?;
         let Some(value_bytes_ptr) = self.try_alloc(size, mem::align_of::<T>()) else {
@@ -478,44 +510,11 @@ impl<C: ?Sized + ChunkOps> ChunkMutator<C> {
         Some(UninitDrop::new(value, drop_slot))
     }
 
-    /// Like [`Self::try_alloc_uninit_slice_with_drop`] but additionally
-    /// writes a thin-pointer DST length prefix (`size_of::<usize>()`
-    /// bytes, unaligned) immediately before the payload. See
-    /// [`Self::try_alloc_uninit_slice_prefixed`].
-    #[cfg_attr(test, mutants::skip)] // see `try_alloc`
-    #[allow(
-        clippy::cast_ptr_alignment,
-        reason = "prefix slot may be unaligned for T's whose align < align_of::<usize>(); paired with write_unaligned/read_unaligned"
-    )]
-    pub(crate) fn try_alloc_uninit_slice_with_drop_prefixed<T>(&self, len: usize) -> Option<UninitDrop<'_, [T]>> {
-        // `len` must fit in the drop entry's `u16` element-count field.
-        let len_u16 = u16::try_from(len).ok()?;
-        let drop_slot = self.try_reserve_drop_entry()?;
-        let Some((value, payload_addr)) = self.try_alloc_prefixed_slice_payload::<T>(len) else {
-            self.unwind_drop_entry();
-            return None;
-        };
-        // The drop entry's `value_offset` encodes the *payload* address
-        // (post-prefix) so `replay_drops` runs `drop_in_place::<[T]>`
-        // on the real elements.
-        let value_offset = self.offset_or_unwind(payload_addr)?;
-        // SAFETY: `drop_slot` is freshly reserved, aligned, exclusively
-        // owned slot in the chunk's drop region.
-        unsafe {
-            ptr::write(drop_slot.as_ptr(), DropEntry::placeholder(value_offset, len_u16));
-        }
-        Some(UninitDrop::new(value, drop_slot))
-    }
-
     /// Attempts to reclaim the unused tail of the most recent bump
     /// allocation in O(1).
     ///
-    /// When `end_addr` (the one-past-the-end address of an allocation)
-    /// equals the current bump cursor — i.e. nothing has been allocated
-    /// after it — the cursor is rewound by `bytes`, returning that span to
-    /// the chunk, and `true` is returned. Returns `false` (leaving the
-    /// cursor untouched) when the allocation is not at the cursor or the
-    /// mutator owns no chunk.
+    /// Rewinds the bump cursor by `bytes` when `end_addr` is the current
+    /// cursor. Returns `false` if the allocation is not at the tail.
     #[inline]
     pub(crate) fn try_reclaim_tail(&self, end_addr: usize, bytes: usize) -> bool {
         if self.chunk.is_none() {
@@ -569,49 +568,6 @@ impl<C: ?Sized + ChunkOps> ChunkMutator<C> {
         true
     }
 
-    /// Thin-DST smart-pointer reservation. Reserves `total` bytes
-    /// aligned to `align`, optionally pre-reserves a drop entry that
-    /// will point at the *payload* address (i.e. `reservation_start +
-    /// payload_offset`, not the reservation start), and returns the
-    /// reservation start plus the drop slot. The caller is responsible
-    /// for writing the metadata prefix at `[0, payload_offset)` and the
-    /// payload at `[payload_offset, total)`.
-    ///
-    /// Used by the thin generic-DST smart-pointer alloc paths
-    /// ([`Arc<T>`](crate::Arc) / [`Box<T>`](crate::Box) for `T: ?Sized`).
-    #[cfg_attr(test, mutants::skip)] // see `try_alloc`
-    #[cfg(feature = "dst")]
-    pub(crate) fn try_alloc_thin_dst_smart(
-        &self,
-        total: usize,
-        align: usize,
-        payload_offset: usize,
-        needs_drop: bool,
-        metadata_u16: u16,
-    ) -> Option<(InChunk<u8>, Option<InChunk<DropEntry>>)> {
-        debug_assert!(align.is_power_of_two(), "align must be a power of two");
-        debug_assert!(payload_offset <= total, "payload_offset must fit inside the reservation");
-        if needs_drop {
-            let drop_slot = self.try_reserve_drop_entry()?;
-            let Some(base) = self.try_alloc(total, align) else {
-                self.unwind_drop_entry();
-                return None;
-            };
-            // Drop entry encodes the payload address (post-prefix), so
-            // `replay_drops` runs `drop_in_place::<T>` on the real
-            // value bytes.
-            let payload_addr = base.addr().wrapping_add(payload_offset);
-            let value_offset = self.offset_or_unwind(payload_addr)?;
-            // SAFETY: freshly reserved, aligned, exclusively owned slot.
-            unsafe {
-                ptr::write(drop_slot.as_ptr(), DropEntry::placeholder(value_offset, metadata_u16));
-            }
-            Some((base, Some(drop_slot)))
-        } else {
-            let base = self.try_alloc(total, align)?;
-            Some((base, None))
-        }
-    }
     /// Reserves a [`DropEntry`]-sized slot at the top of the drop-entry
     /// region. Entries are packed end-to-end from the payload's high end
     /// downward, matching the layout walked by
@@ -631,11 +587,8 @@ impl<C: ?Sized + ChunkOps> ChunkMutator<C> {
         Some(InChunk::from_raw(new_top).cast::<DropEntry>())
     }
 
-    /// Reverses the most recent `try_reserve_drop_entry`. Used when a
-    /// downstream allocation in the same compound operation fails.
-    ///
-    /// Cold: this fires only on the compound-reservation failure path,
-    /// which co-occurs with a refill miss and is by definition rare.
+    /// Reverses the most recent `try_reserve_drop_entry` after a compound
+    /// reservation failure.
     #[cold]
     #[inline(never)]
     #[cfg_attr(test, mutants::skip)] // only observable via skip'd callers
@@ -652,10 +605,7 @@ impl<C: ?Sized + ChunkOps> ChunkMutator<C> {
         self.drop_top.set(clamped);
     }
 
-    /// Cold helper: roll back the most recently reserved drop entry and
-    /// return `None`. Out-of-line from compound-reservation paths so the
-    /// genuinely-unreachable `u16::try_from(...) == Err` arm is a single
-    /// line at the call site.
+    /// Rolls back the most recently reserved drop entry and returns `None`.
     #[cold]
     #[inline(never)]
     #[cfg_attr(coverage_nightly, coverage(off))]
@@ -690,38 +640,25 @@ impl<C: ?Sized + ChunkOps> ChunkMutator<C> {
     }
 
     /// Publishes the locally-tracked drop-entry count to the chunk header
-    /// eagerly, before the mutator is dropped.
-    ///
-    /// The count is normally published exactly once in [`Drop`]; teardown
-    /// reads it only after the refcount reaches zero, so the deferred publish
-    /// is sufficient for the common path. The uninit-`Arc` reservation path
-    /// (`Arena::alloc_uninit_arc`) calls this after writing a placeholder so
-    /// that `Arc::<MaybeUninit<T>>::assume_init` can locate the entry while
-    /// the chunk is still the arena's active chunk. It must be invoked only
-    /// after the placeholder slot it counts has been fully written.
+    /// eagerly, before the mutator is dropped. A no-op for chunk flavors that
+    /// never register drop entries ([`ChunkOps::REGISTERS_DROPS`] is `false`).
     #[inline]
     pub(crate) fn publish_drop_count(&self) {
+        if !C::REGISTERS_DROPS {
+            return;
+        }
         let Some(chunk) = self.chunk else { return };
         // SAFETY: the mutator owns a +1 on `chunk` for its whole lifetime,
         // so the header is live for this store.
         unsafe {
-            chunk.as_ref().set_drop_entry_count(self.local_drop_entry_count());
+            C::publish_drop_entry_count(chunk, self.local_drop_entry_count());
         }
     }
 
-    /// Consumes the mutator, publishing the locally-tracked drop-entry
-    /// count to the chunk header and returning the chunk pointer with
-    /// the mutator's `+1` retained ownership transferred to the caller.
-    /// The mutator's `Drop` (which would otherwise release the `+1`) is
-    /// bypassed.
+    /// Consumes the mutator and returns the owned chunk ref without running
+    /// this mutator's `Drop`.
     ///
-    /// Under the `stats` feature, this is also a "retire" event for
-    /// wasted-tail accounting: the chunk's free tail is recorded and
-    /// added to the provider's wasted-tail counter (the matching subtract
-    /// happens in `release_*` when the chunk is eventually cached or
-    /// destroyed). This matters for the `retired_local` push path, where
-    /// the chunk is removed from `current_local` (so its tail is wasted
-    /// from the user's POV) but the mutator's `Drop` is bypassed.
+    /// Under `stats`, also records wasted tail before transferring the chunk.
     ///
     /// Returns `None` for the empty (sentinel) mutator that has no
     /// chunk installed.
@@ -745,24 +682,20 @@ impl<C: ?Sized + ChunkOps> Drop for ChunkMutator<C> {
         let Some(chunk) = self.chunk else {
             return;
         };
-        // SAFETY: chunk is live; we hold one of its refcount tickets.
-        // Publish the locally-tracked drop-entry count to the chunk header
-        // before releasing our refcount: if dec_ref returns true we own the
-        // unique remaining reference, and `teardown_and_release` will read
-        // the count to walk the drop list.
+        // SAFETY: chunk is live; we hold one refcount ticket. Publish the
+        // drop-entry count before releasing it so teardown can replay drops.
         unsafe {
             #[cfg(feature = "stats")]
             {
-                // Record the wasted-tail at retire BEFORE dec_ref so that
-                // (a) the chunk header carries the value for the eventual
-                // `release_*` subtract (handles may outlive us), and (b) the
-                // provider counter goes up before any potential immediate
-                // release-driven subtract.
+                // Record wasted tail before `dec_ref`; release may happen
+                // immediately and subtract the stashed value.
                 let wasted = self.wasted_tail_for_stats();
                 C::record_retire(chunk, wasted);
             }
             let chunk_ref = chunk.as_ref();
-            chunk_ref.set_drop_entry_count(self.local_drop_entry_count());
+            // Publish the locally-tracked drop count; a no-op for flavors that
+            // never register drop entries (see `ChunkOps::publish_drop_entry_count`).
+            C::publish_drop_entry_count(chunk, self.local_drop_entry_count());
             if chunk_ref.dec_ref() {
                 C::teardown_and_release(chunk);
             }
@@ -962,4 +895,65 @@ mod tests {
             "overflowing new_len must fail",
         );
     }
+
+    // Covers `try_alloc_uninit_with_drop`'s value-allocation rollback
+    // (the `unwind_drop_entry` arm): the drop slot is reserved from the
+    // top, but the value itself doesn't fit, so the reserved slot must be
+    // unwound and the call must report failure. We size the remaining
+    // free space into the window `[entry_size, entry_size + value_bytes)`
+    // so the drop-slot reservation succeeds while the value alloc fails.
+    #[test]
+    fn try_alloc_uninit_with_drop_rolls_back_when_value_does_not_fit() {
+        struct BigDrop([u8; 64]);
+        impl Drop for BigDrop {
+            fn drop(&mut self) {
+                core::hint::black_box(&self.0);
+            }
+        }
+        let arena = crate::Arena::new();
+        // Force the first refill so `current_local` carries a live chunk.
+        let _ = arena.alloc(0_u8);
+        let m = arena.current_local();
+
+        let entry_size = mem::size_of::<DropEntry>();
+        // Leave exactly one byte of headroom past the drop slot so the
+        // value (64 bytes, align 1) cannot fit after the slot is reserved.
+        let target = entry_size + 1;
+        let free = m.free_bytes();
+        assert!(free >= target, "post-refill chunk must have room for the setup");
+        let _ = m.try_alloc_bytes(free - target).expect("setup fill");
+        assert_eq!(m.free_bytes(), target);
+
+        // Drop slot fits (free >= entry_size) but the value does not
+        // (free - entry_size == 1 < 64), driving the rollback path.
+        assert!(
+            m.try_alloc_uninit_with_drop::<BigDrop>().is_none(),
+            "value that doesn't fit must report failure",
+        );
+        // The reserved drop slot must have been unwound, restoring free space.
+        assert_eq!(m.free_bytes(), target, "unwind_drop_entry must restore the reserved drop slot");
+
+        // Also exercise `BigDrop`'s destructor end-to-end: allocate one into
+        // a fresh arena and let teardown replay the drop entry, so the drop
+        // shim actually runs.
+        let drop_arena = crate::Arena::new();
+        let _ = drop_arena.alloc(BigDrop([0_u8; 64]));
+        drop(drop_arena);
+    }
+
+    // Covers `publish_drop_count`'s early return for chunk flavors that never
+    // register drop entries (`REGISTERS_DROPS == false`): publishing the count
+    // on a shared mutator is a no-op that must leave the arena fully usable.
+    #[test]
+    fn publish_drop_count_is_noop_for_shared_mutator() {
+        let arena = crate::Arena::new();
+        // Force a live shared chunk into `current_shared`.
+        let a = arena.alloc_arc(1_u32);
+        assert_eq!(*a, 1);
+        // Shared chunks register no drop entries, so this returns early.
+        arena.current_shared().publish_drop_count();
+        // The arena keeps working afterward.
+        let b = arena.alloc_arc(2_u32);
+        assert_eq!(*b, 2);
+    }
 }
diff --git a/crates/multitude/src/internal/chunk_ops.rs b/crates/multitude/src/internal/chunk_ops.rs
index 61e8c88be..12d615bf6 100644
--- a/crates/multitude/src/internal/chunk_ops.rs
+++ b/crates/multitude/src/internal/chunk_ops.rs
@@ -3,12 +3,9 @@
 
 //! Per-flavor chunk lifecycle and access operations.
 //!
-//! [`ChunkOps`] is the trait that [`ChunkMutator`](super::ChunkMutator) uses
-//! to manipulate either a [`LocalChunk`](super::LocalChunk) or a
-//! [`SharedChunk`](super::SharedChunk) without caring which flavor it has.
-//! It also drives the "refcount hit zero" teardown path, which depends on
-//! the flavor: local chunks return to the provider's single-threaded cache,
-//! shared chunks return to the provider's lock-free cache.
+//! [`ChunkOps`] lets [`ChunkMutator`](super::chunk_mutator::ChunkMutator) handle
+//! [`LocalChunk`](super::local_chunk::LocalChunk) and [`SharedChunk`](super::shared_chunk::SharedChunk)
+//! through one lifecycle interface.
 
 // All trait methods are `unsafe fn` with documented safety contracts at the
 // function level; the inner unsafe wrappers required by edition 2024 add
@@ -18,21 +15,56 @@
 
 use core::ptr::NonNull;
 
-use allocator_api2::alloc::Allocator;
+use allocator_api2::alloc::{AllocError, Allocator};
 
 use super::chunk::Chunk;
+use super::chunk_alloc::chunk_alloc_size;
 use super::local_chunk::LocalChunk;
 use super::shared_chunk::SharedChunk;
 
 /// Operations every chunk flavor must support.
 ///
-/// Implemented for [`LocalChunk<A>`] and [`SharedChunk<A>`]. The associated
-/// `Allocator` type lets generic callers recover the provider type for
-/// release-routing.
+/// Implemented for [`LocalChunk<A>`] and [`SharedChunk<A>`].
 pub(crate) trait ChunkOps: Chunk {
     /// Allocator type used to back this chunk flavor's underlying storage.
     type Allocator: Allocator + Clone;
 
+    /// Whether this chunk flavor stores per-allocation drop entries packed at
+    /// its payload tail.
+    ///
+    /// `true` only for [`LocalChunk`]: plain arena references (`&mut T` /
+    /// `&mut [T]`) have no destructor of their own, so the chunk runs them at
+    /// teardown. `false` for [`SharedChunk`], whose values are owned by `Box`
+    /// or `Arc` and dropped eagerly on their last reference. The
+    /// [`ChunkMutator`](super::chunk_mutator::ChunkMutator) keys all its
+    /// drop-entry bookkeeping off this const so the shared monomorphization
+    /// compiles the dead paths away.
+    const REGISTERS_DROPS: bool;
+
+    /// Header size in bytes for this chunk flavor.
+    fn header_size() -> usize;
+
+    /// Publishes the mutator's locally-tracked drop-entry count into the chunk
+    /// header so teardown can replay them. A no-op for flavors that never
+    /// register drop entries ([`Self::REGISTERS_DROPS`] is `false`).
+    ///
+    /// # Safety
+    ///
+    /// `chunk` must reference a live chunk the caller holds a reference to.
+    unsafe fn publish_drop_entry_count(chunk: NonNull<Self>, count: usize);
+
+    /// Payload alignment for this chunk flavor.
+    fn value_align() -> usize;
+
+    /// Rounded backing-allocation size (`Layout::size()`) of a chunk whose
+    /// payload holds `payload` bytes. The single source of truth for chunk
+    /// byte accounting: every reserve/release/cache path routes through here
+    /// so the rounded footprint stays balanced.
+    #[inline]
+    fn footprint(payload: usize) -> Result<usize, AllocError> {
+        chunk_alloc_size(Self::header_size(), payload, Self::value_align())
+    }
+
     /// Returns a pointer to the first byte of the chunk's payload.
     ///
     /// # Safety
@@ -48,12 +80,8 @@ pub(crate) trait ChunkOps: Chunk {
     /// Caller must hold the unique remaining reference to `chunk`.
     unsafe fn teardown_and_release(chunk: NonNull<Self>);
 
-    /// Stashes `wasted` on the chunk header and adds it to the provider's
-    /// wasted-tail counter. Called from `ChunkMutator::Drop` at retire-time
-    /// (i.e., as the mutator's `+1` is about to be released). The matching
-    /// subtract happens in [`ChunkProvider::release_local`] /
-    /// [`ChunkProvider::release_shared`] when the chunk is later cached
-    /// or destroyed.
+    /// Records wasted tail on retire; the provider subtracts it when the
+    /// chunk is later cached or destroyed.
     ///
     /// # Safety
     ///
@@ -70,6 +98,24 @@ pub(crate) trait ChunkOps: Chunk {
 impl<A: Allocator + Clone> ChunkOps for LocalChunk<A> {
     type Allocator = A;
 
+    const REGISTERS_DROPS: bool = true;
+
+    #[inline]
+    fn header_size() -> usize {
+        LocalChunk::<A>::header_size()
+    }
+
+    #[inline]
+    unsafe fn publish_drop_entry_count(chunk: NonNull<Self>, count: usize) {
+        // SAFETY: caller holds a live reference to `chunk`.
+        chunk.as_ref().set_drop_entry_count(count);
+    }
+
+    #[inline]
+    fn value_align() -> usize {
+        LocalChunk::<A>::value_align()
+    }
+
     #[inline]
     unsafe fn payload_ptr(chunk: NonNull<Self>) -> NonNull<u8> {
         // SAFETY: delegated to the inherent `LocalChunk::payload_ptr`.
@@ -91,15 +137,8 @@ impl<A: Allocator + Clone> ChunkOps for LocalChunk<A> {
             super::drop_entry::replay_drops(payload, capacity, drop_count);
             chunk_ref.set_drop_entry_count(0);
         }
-        // Route the just-released chunk back to the provider. The provider
-        // is guaranteed to outlive every local-chunk teardown: the arena's
-        // `provider: Arc<ChunkProvider>` field is declared after the
-        // chunk-holding fields, so on `Arena::drop` the local mutators
-        // tear down first (running this code) while the provider Arc is
-        // still alive; chunks parked in the provider's own cache are torn
-        // down directly via `LocalChunk::destroy` in `drain_all` and do
-        // not reach this code path. See the type-level doc on
-        // `LocalChunk`.
+        // Local chunks teardown while the arena provider is still alive; cached
+        // local chunks are destroyed directly from provider drop.
         let provider = chunk_ref.provider();
         debug_assert!(!provider.is_null(), "local-chunk provider back-pointer is null in teardown");
         (*provider).release_local(chunk);
@@ -122,6 +161,23 @@ impl<A: Allocator + Clone> ChunkOps for LocalChunk<A> {
 impl<A: Allocator + Clone> ChunkOps for SharedChunk<A> {
     type Allocator = A;
 
+    const REGISTERS_DROPS: bool = false;
+
+    #[inline]
+    fn header_size() -> usize {
+        SharedChunk::<A>::header_size()
+    }
+
+    #[inline]
+    unsafe fn publish_drop_entry_count(_chunk: NonNull<Self>, _count: usize) {
+        // Shared chunks never register drop entries; nothing to publish.
+    }
+
+    #[inline]
+    fn value_align() -> usize {
+        SharedChunk::<A>::value_align()
+    }
+
     #[inline]
     unsafe fn payload_ptr(chunk: NonNull<Self>) -> NonNull<u8> {
         // SAFETY: delegated to the inherent `SharedChunk::payload_ptr`.
@@ -131,20 +187,11 @@ impl<A: Allocator + Clone> ChunkOps for SharedChunk<A> {
     #[cold]
     #[inline(never)]
     unsafe fn teardown_and_release(chunk: NonNull<Self>) {
-        // SAFETY: see local variant. Replay drops + clear count before the
-        // chunk is recycled to the shared cache (where its payload's first
-        // bytes are reused as a Treiber-stack next-link).
+        // SAFETY: caller owns the unique remaining reference. Shared chunks
+        // register no drop entries; per-`Arc` values drop on their last strong
+        // reference before the chunk reaches the cache.
         let chunk_ref = &*chunk.as_ptr();
-        let drop_count = chunk_ref.drop_entry_count();
-        if drop_count != 0 {
-            let payload = SharedChunk::payload_ptr(chunk).as_ptr();
-            let capacity = chunk_ref.capacity();
-            super::drop_entry::replay_drops(payload, capacity, drop_count);
-            chunk_ref.set_drop_entry_count(0);
-        }
-        // Shared chunks CAN outlive their provider (an Arc<T> backed by
-        // a shared chunk can be held past Arena::drop), so we still need
-        // the Weak::upgrade dance here.
+        // Shared chunks can outlive their provider, so release through `Weak`.
         if let Some(provider) = chunk_ref.provider().upgrade() {
             provider.release_shared(chunk);
         } else {
@@ -156,18 +203,34 @@ impl<A: Allocator + Clone> ChunkOps for SharedChunk<A> {
     unsafe fn record_retire(chunk: NonNull<Self>, wasted: u32) {
         let chunk_ref = &*chunk.as_ptr();
         chunk_ref.set_wasted_at_retire(wasted);
-        // If the provider has already been dropped (shared chunks can
-        // outlive their arena), there is no counter left to update;
-        // the stashed `wasted_at_retire` will simply never be read.
+        // If the provider is gone, no stats counter remains to update.
         if let Some(provider) = chunk_ref.provider().upgrade() {
             provider.record_wasted_tail(u64::from(wasted));
         }
     }
 }
 
-// Note: the prior `orphan_local_chunk_is_destroyed_on_mutator_drop` test
-// (which exercised the now-removed `destroy_orphan_local` defensive arm)
-// is gone — that branch was eliminated when `LocalChunk` switched from a
-// `Weak<ChunkProvider>` to a non-owning raw back-pointer. See the
-// type-level doc on `LocalChunk` for the soundness argument and
-// `teardown_and_release` above for the simplified routing.
+#[cfg(test)]
+mod tests {
+    use allocator_api2::alloc::Global;
+
+    use super::*;
+
+    // Kills the `value_align -> 1` mutants on both `ChunkOps` impls: the
+    // trait method must report the real payload alignment
+    // (`align_of::<usize>()`), which every footprint computation depends on.
+    // The inherent `value_align` tests don't cover the trait impls.
+    #[test]
+    fn chunk_ops_value_align_reports_real_payload_alignment() {
+        assert_eq!(
+            <LocalChunk<Global> as ChunkOps>::value_align(),
+            core::mem::align_of::<usize>(),
+            "LocalChunk trait value_align must match the real payload alignment"
+        );
+        assert_eq!(
+            <SharedChunk<Global> as ChunkOps>::value_align(),
+            core::mem::align_of::<usize>(),
+            "SharedChunk trait value_align must match the real payload alignment"
+        );
+    }
+}
diff --git a/crates/multitude/src/internal/chunk_provider.rs b/crates/multitude/src/internal/chunk_provider.rs
index 5597adb1d..c23247c93 100644
--- a/crates/multitude/src/internal/chunk_provider.rs
+++ b/crates/multitude/src/internal/chunk_provider.rs
@@ -4,35 +4,20 @@
 //! Per-arena chunk cache and allocation source.
 //!
 //! [`ChunkProvider`] owns the arena's allocator clone, enforces a byte
-//! budget, and maintains a freed-chunk cache of the **current floor
-//! class**, so steady-state allocate/release pairs avoid hitting the
-//! system allocator.
+//! budget, and maintains freed-chunk caches at the current class floor.
 //!
-//! Each cache holds at most one freelist. The associated **class floor**
-//! (`local_cache_class` / `shared_cache_class`) ratchets monotonically
-//! upward as the arena progresses to larger chunks. Chunks released
-//! below the floor are returned to the system; cached chunks below the
-//! floor are evicted at the next floor bump. The intent is that the
-//! arena settles into the largest class it needs with the minimum
-//! number of chunks retained.
+//! Each cache holds one freelist. The class floor ratchets upward as the
+//! arena needs larger chunks; below-floor chunks are evicted or destroyed.
 //!
 //! Two cache shapes coexist:
 //!
-//! - **Local cache**: single freelist guarded by an [`OwnerThreadCell`].
-//!   The provider's owning thread is the arena's thread; only that
-//!   thread allocates from or releases into the local cache.
-//! - **Shared cache**: lock-free Treiber-style stack of
-//!   `AtomicPtr<SharedChunk<A>>`. Any thread can push a chunk (when its
-//!   last refcount handle drops); only the owning thread pops. A
-//!   concurrent push by a thread that has yet to observe the latest
-//!   floor bump may add a below-floor chunk; that straggler is destroyed
-//!   when the owner thread pops it (see [`ChunkProvider::pop_shared`]).
-
-// `release_local`, `release_shared`, `pop_shared`, `push_shared`, and the
-// `destroy_or_cache_just_acquired` helpers are `unsafe fn` with their full
-// safety contracts documented on the items themselves; the inner unsafe
-// wrappers edition 2024 would otherwise require do not add a safety
-// boundary, so we drop them.
+//! - Local: single freelist in [`OwnerThreadCell`], accessed only by the
+//!   arena thread.
+//! - Shared: lock-free Treiber stack; any thread can push, only the owner
+//!   pops. Below-floor stragglers are destroyed by [`ChunkProvider::pop_shared`].
+
+// These `unsafe fn`s have item-level safety contracts; inner unsafe blocks
+// would not add a boundary here.
 #![allow(unsafe_op_in_unsafe_fn, reason = "see module doc: inner unsafe blocks in unsafe fn add noise here")]
 #![allow(clippy::unnecessary_safety_comment, reason = "safety rationale documented at function level")]
 
@@ -46,7 +31,7 @@ use core::sync::atomic::{AtomicPtr, AtomicU8, AtomicUsize, Ordering};
 use allocator_api2::alloc::{AllocError, Allocator};
 
 use super::chunk::Chunk;
-use super::chunk_alloc::chunk_alloc_size;
+use super::chunk_ops::ChunkOps;
 use super::constants::{MAX_CHUNK_BYTES, MAX_NORMAL_ALLOC, MIN_CHUNK_BYTES, SizeClass};
 use super::drop_entry::DropEntry;
 use super::local_chunk::LocalChunk;
@@ -135,17 +120,10 @@ pub(crate) struct ChunkProvider<A: Allocator + Clone> {
     /// Bytes currently outstanding (allocated, not yet freed). Updated via
     /// `AcqRel` speculative-add.
     bytes_outstanding: AtomicUsize,
-    /// Single-thread local-chunk cache: thin `*mut u8` header pointer to
-    /// the freelist head (chunks linked via [`LocalChunk::set_next`]
-    /// / [`LocalChunk::next`]). Holds at most one freelist for
-    /// the **current class floor** ([`Self::local_cache_class`]); chunks
-    /// below the floor are destroyed instead of cached.
+    /// Local-cache freelist head as a thin header pointer. Holds chunks at or
+    /// above [`Self::local_cache_class`].
     local_cache: OwnerThreadCell<*mut u8>,
-    /// Current class floor for the local cache. Only chunks at class
-    /// greater than or equal to `local_cache_class` are cached; the
-    /// floor ratchets monotonically upward as the arena allocates
-    /// progressively larger chunks, and stale below-floor chunks are
-    /// evicted at each bump.
+    /// Current class floor for the local cache; below-floor chunks are evicted.
     local_cache_class: AtomicU8,
     /// Lock-free shared-chunk cache: single Treiber-stack head for the
     /// current class floor ([`Self::shared_cache_class`]).
@@ -167,11 +145,8 @@ pub(crate) struct ChunkProvider<A: Allocator + Clone> {
     /// Lifetime count of oversized one-shot shared chunks allocated.
     #[cfg(feature = "stats")]
     oversized_shared_chunks_allocated: AtomicU64,
-    /// Bytes currently "wasted" in the unused free region of chunks that have
-    /// been retired from an arena's `current_*` slot but have not yet been
-    /// returned to the cache or freed back to the underlying allocator. Bumped
-    /// up when a chunk is retired, bumped back down when the same chunk is
-    /// later cached or destroyed.
+    /// Unused tail bytes in retired chunks not yet cached or freed. Retire
+    /// increments; cache/destroy decrements.
     #[cfg(feature = "stats")]
     wasted_tail_bytes: AtomicU64,
 }
@@ -270,17 +245,10 @@ impl<A: Allocator + Clone> ChunkProvider<A> {
         &self.allocator
     }
 
-    /// Acquires a normal-class local chunk whose payload has at least
-    /// `min_payload` bytes. The caller MUST have already verified the
-    /// request is not oversized (i.e. `min_payload <= max_normal_alloc`
-    /// and total fits in `MAX_CHUNK_BYTES`); use
-    /// [`Self::acquire_oversized_local`] otherwise. Returns with refcount
-    /// = 1.
+    /// Acquires a normal-class local chunk with at least `min_payload` bytes.
+    /// Caller must route oversized requests to [`Self::acquire_oversized_local`].
     ///
-    /// `ratchet_class` is the caller's size-class floor (the refill
-    /// ratchet): the chosen chunk is sized to the larger of the class
-    /// needed for `min_payload` and `ratchet_class`, so the chunk can
-    /// grow with arena usage.
+    /// `ratchet_class` is the caller's size-class floor for refill growth.
     pub(crate) fn acquire_local(&self, min_payload: usize, ratchet_class: SizeClass) -> Result<NonNull<LocalChunk<A>>, AllocError> {
         let header = LocalChunk::<A>::header_size();
         let needed_total = header.checked_add(min_payload).ok_or(AllocError)?;
@@ -291,13 +259,9 @@ impl<A: Allocator + Clone> ChunkProvider<A> {
         self.acquire_normal_local(SizeClass::min_for_bytes(needed_total).max(ratchet_class))
     }
 
-    /// Acquires a normal (cacheable) local chunk in the given size `class`,
-    /// reusing a cached chunk when available. Never routes to oversized; the
-    /// caller is responsible for any oversized decision.
+    /// Acquires a cacheable local chunk in `class`, reusing cache when possible.
     ///
-    /// If `class` exceeds the cache's current class floor, the floor is
-    /// bumped (monotonically) and stale below-floor chunks in the cache
-    /// are destroyed before the pop attempt.
+    /// Bumps the cache floor and evicts stale chunks when `class` is higher.
     //
     // Mutation testing is suppressed on the `class > floor` branch:
     // `>` with `<` / `==` only changes when the floor advances; the
@@ -334,9 +298,8 @@ impl<A: Allocator + Clone> ChunkProvider<A> {
         self.allocate_fresh_local(class)
     }
 
-    /// Sets the local cache floor to `new_class` and destroys every cached
-    /// chunk whose total allocation is smaller than the new floor.
-    /// Idempotent: caller already verified `new_class > current_floor`.
+    /// Sets the local cache floor and destroys cached chunks below it.
+    /// Caller already verified `new_class > current_floor`.
     ///
     /// # Safety
     ///
@@ -356,7 +319,8 @@ impl<A: Allocator + Clone> ChunkProvider<A> {
                     let fat = LocalChunk::<A>::header_to_fat(cur);
                     let chunk_nn = NonNull::new_unchecked(fat);
                     let next = LocalChunk::next(chunk_nn);
-                    let total = LocalChunk::<A>::header_size() + (*chunk_nn.as_ptr()).capacity();
+                    let total = LocalChunk::<A>::footprint((*chunk_nn.as_ptr()).capacity())
+                        .expect("evicted chunk's layout was valid when it was allocated");
                     if total >= new_min_total {
                         LocalChunk::set_next(chunk_nn, new_head);
                         new_head = cur;
@@ -371,11 +335,7 @@ impl<A: Allocator + Clone> ChunkProvider<A> {
         }
     }
 
-    /// Allocates a brand-new normal local chunk of the given size `class`,
-    /// bypassing the cache. Increments the lifetime allocation counter.
-    /// Used both on a cache miss in [`acquire_normal_local`](Self::acquire_normal_local)
-    /// and by [`preallocate_local`](Self::preallocate_local) (which must add
-    /// fresh chunks to the cache rather than recycle existing ones).
+    /// Allocates a fresh normal local chunk, bypassing the cache.
     fn allocate_fresh_local(&self, class: SizeClass) -> Result<NonNull<LocalChunk<A>>, AllocError> {
         let header = LocalChunk::<A>::header_size();
         let total = class.bytes();
@@ -394,11 +354,8 @@ impl<A: Allocator + Clone> ChunkProvider<A> {
         }
     }
 
-    /// Acquires a normal-class shared chunk whose payload has at least
-    /// `min_payload` bytes. The caller MUST have already verified the
-    /// request is not oversized; use [`Self::acquire_oversized_shared`]
-    /// otherwise. See [`Self::acquire_local`] for `ratchet_class`
-    /// semantics. Returns with refcount = 1.
+    /// Acquires a normal-class shared chunk with at least `min_payload` bytes.
+    /// Caller must route oversized requests to [`Self::acquire_oversized_shared`].
     pub(crate) fn acquire_shared(&self, min_payload: usize, ratchet_class: SizeClass) -> Result<NonNull<SharedChunk<A>>, AllocError> {
         let header = SharedChunk::<A>::header_size();
         let needed_total = header.checked_add(min_payload).ok_or(AllocError)?;
@@ -409,10 +366,8 @@ impl<A: Allocator + Clone> ChunkProvider<A> {
         self.acquire_normal_shared(SizeClass::min_for_bytes(needed_total).max(ratchet_class))
     }
 
-    /// Acquires a normal (cacheable) shared chunk in the given size `class`.
-    /// If `class` exceeds the cache's current class floor, the floor is
-    /// bumped (monotonically) and stale below-floor chunks in the cache
-    /// are destroyed before the pop attempt.
+    /// Acquires a cacheable shared chunk in `class`, bumping the floor first
+    /// when needed.
     //
     // Mutation testing is suppressed on the `class > floor` branch for
     // the same reason as `acquire_normal_local`.
@@ -435,12 +390,8 @@ impl<A: Allocator + Clone> ChunkProvider<A> {
         self.allocate_fresh_shared(class)
     }
 
-    /// Sets the shared cache floor to `new_class` and destroys every
-    /// cached chunk whose total allocation is smaller than the new floor.
-    /// Called only by the owning thread; concurrent pushers (releasing
-    /// threads) may race a below-floor chunk into the cache after the
-    /// floor is observed-as-lower — those stragglers are caught by the
-    /// pop-time class check in [`Self::pop_shared`].
+    /// Sets the shared cache floor and destroys detached chunks below it.
+    /// Racing below-floor pushes are handled by [`Self::pop_shared`].
     ///
     /// # Safety
     ///
@@ -453,9 +404,7 @@ impl<A: Allocator + Clone> ChunkProvider<A> {
         // subsequent Acquire load sees it.
         self.shared_cache_class.store(new_class.raw(), Ordering::Release);
         let new_min_total = new_class.bytes();
-        // Atomically detach the whole freelist. Concurrent pushers will
-        // push onto the now-empty head; the post-bump pushers may push
-        // either above-floor (kept) or below-floor (caught at pop) chunks.
+        // Detach the freelist; racing pushers target the empty head.
         let mut cur = self.shared_cache.swap(ptr::null_mut(), Ordering::AcqRel);
         // SAFETY: each linked chunk is a refcount-zero, uniquely-owned
         // chunk we just detached; we walk the list, re-push survivors,
@@ -466,7 +415,8 @@ impl<A: Allocator + Clone> ChunkProvider<A> {
                 let chunk_nn = NonNull::new_unchecked(fat);
                 let link = SharedChunk::cache_link(chunk_nn);
                 let next = (*link).load(Ordering::Acquire);
-                let total = SharedChunk::<A>::header_size() + (*chunk_nn.as_ptr()).capacity();
+                let total = SharedChunk::<A>::footprint((*chunk_nn.as_ptr()).capacity())
+                    .expect("evicted chunk's layout was valid when it was allocated");
                 if total >= new_min_total {
                     self.push_shared(chunk_nn);
                 } else {
@@ -478,8 +428,7 @@ impl<A: Allocator + Clone> ChunkProvider<A> {
         }
     }
 
-    /// Allocates a brand-new normal shared chunk of the given size `class`,
-    /// bypassing the cache. See [`allocate_fresh_local`](Self::allocate_fresh_local).
+    /// Allocates a fresh normal shared chunk, bypassing the cache.
     #[cfg_attr(test, mutants::skip)] // `total - header → total / header` ⇒ runaway allocations
     fn allocate_fresh_shared(&self, class: SizeClass) -> Result<NonNull<SharedChunk<A>>, AllocError> {
         let header = SharedChunk::<A>::header_size();
@@ -512,27 +461,16 @@ impl<A: Allocator + Clone> ChunkProvider<A> {
         // outright or push the chunk onto the (single-threaded) cache by
         // writing its cache-link slot.
         let capacity = (*chunk.as_ptr()).capacity();
-        // Match the rounded `Layout::size()` that `chunk_layout` allocated, so
-        // the byte budget / `total_bytes_allocated` track the real footprint
-        // (the round-up matters for oversized chunks whose header+capacity is
-        // not `value_align`-aligned).
-        let total = chunk_alloc_size(LocalChunk::<A>::header_size(), capacity, LocalChunk::<A>::value_align())
-            .expect("released chunk's layout was valid when it was allocated");
+        let total = LocalChunk::<A>::footprint(capacity).expect("released chunk's layout was valid when it was allocated");
         #[cfg(feature = "stats")]
         {
-            // Decrement the wasted-tail counter by the value stashed on
-            // the chunk header at retire time (0 for chunks that never
-            // went through a mutator, e.g. preallocated cache fills).
+            // Subtract the retire-time wasted-tail value, if any.
             let wasted = u64::from((*chunk.as_ptr()).wasted_at_retire());
             if wasted != 0 {
                 self.release_wasted_tail(wasted);
             }
         }
-        // Bypass the cache for non-class-size totals (oversized one-shots
-        // whose total isn't a power of two) and for chunks below the
-        // current cache class floor. The floor ratchets monotonically as
-        // the arena moves to larger chunks; smaller chunks released
-        // afterward are returned to the system so the cache stays uniform.
+        // Bypass the cache for oversized/non-class totals and below-floor chunks.
         if !is_cacheable_size(total) || total < SizeClass::new(self.local_cache_class.load(Ordering::Relaxed)).bytes() {
             LocalChunk::destroy(chunk, &self.allocator);
             self.release_bytes(total);
@@ -552,15 +490,10 @@ impl<A: Allocator + Clone> ChunkProvider<A> {
     pub(crate) unsafe fn release_shared(&self, chunk: NonNull<SharedChunk<A>>) {
         // SAFETY: chunk is live and uniquely owned by caller.
         let capacity = (*chunk.as_ptr()).capacity();
-        // See `release_local`: round to the allocated `Layout::size()` so byte
-        // accounting matches the real footprint for oversized chunks.
-        let total = chunk_alloc_size(SharedChunk::<A>::header_size(), capacity, SharedChunk::<A>::value_align())
-            .expect("released chunk's layout was valid when it was allocated");
+        let total = SharedChunk::<A>::footprint(capacity).expect("released chunk's layout was valid when it was allocated");
         #[cfg(feature = "stats")]
         {
-            // See `release_local` for the symmetric subtract semantics.
-            // Acquire load on the shared chunk's atomic — the store may
-            // have happened on a different thread (last `Arc::drop`).
+            // Acquire load pairs with retire on another thread.
             let wasted = u64::from((*chunk.as_ptr()).wasted_at_retire());
             if wasted != 0 {
                 self.release_wasted_tail(wasted);
@@ -577,10 +510,7 @@ impl<A: Allocator + Clone> ChunkProvider<A> {
 
     /// Pre-warms the local cache with one chunk in the given size class.
     ///
-    /// Always allocates through the normal (cacheable) class path: a
-    /// preallocated chunk is a size-classed chunk regardless of the
-    /// configured `max_normal_alloc`, so it must never route to the
-    /// oversized (one-shot, non-cacheable) path even when its payload
+    /// Always allocates through the normal class path, even when the payload
     /// exceeds `max_normal_alloc`.
     pub(crate) fn preallocate_local(&self, class: SizeClass) -> Result<(), AllocError> {
         let chunk = self.allocate_fresh_local(class)?;
@@ -626,25 +556,16 @@ impl<A: Allocator + Clone> ChunkProvider<A> {
         self.bytes_outstanding.fetch_sub(n, Ordering::AcqRel);
     }
 
-    /// Allocates a one-shot oversized local chunk whose payload is sized
-    /// to fit a single allocation of `min_payload` bytes (plus rounding
-    /// for drop-entry alignment). The chunk bypasses the size-class cache.
+    /// Allocates a one-shot oversized local chunk sized for `min_payload`.
     ///
-    /// Used by [`Arena`](crate::Arena) for allocations whose worst-case
-    /// payload exceeds `max_normal_alloc`: the caller wraps the chunk
-    /// in a temporary [`ChunkMutator`](super::ChunkMutator), performs the
-    /// single allocation, and the current chunk is left untouched so
-    /// subsequent small allocations continue to use it.
+    /// The caller uses a temporary [`ChunkMutator`](super::chunk_mutator::ChunkMutator), so
+    /// the current chunk remains available for later small allocations.
     pub(crate) fn acquire_oversized_local(&self, min_payload: usize) -> Result<NonNull<LocalChunk<A>>, AllocError> {
-        // Add `oversized_payload_align_slack()` to absorb the worst-case
-        // alignment skew the bump cursor pays at the start of an unaligned
-        // payload (chunk headers do not pad the payload to be 8-aligned).
-        // Callers requesting an `elem_align > align_of::<DropEntry>()` must
-        // pre-size `min_payload` to cover the extra skew themselves.
+        // Add worst-case payload-start alignment skew. Callers with larger
+        // element alignment pre-size `min_payload` themselves.
         let payload = round_up_to_drop_align(min_payload.checked_add(oversized_payload_align_slack()).ok_or(AllocError)?)?;
-        // Reserve the rounded `Layout::size()` that `LocalChunk::allocate`
-        // will actually request, so the byte budget is enforced accurately.
-        let total = chunk_alloc_size(LocalChunk::<A>::header_size(), payload, LocalChunk::<A>::value_align())?;
+        // Reserve the exact rounded allocation size.
+        let total = LocalChunk::<A>::footprint(payload)?;
         self.reserve_bytes(total)?;
         match LocalChunk::<A>::allocate(&self.allocator, ptr::from_ref(self), payload) {
             Ok(chunk) => {
@@ -664,7 +585,7 @@ impl<A: Allocator + Clone> ChunkProvider<A> {
         // See `acquire_oversized_local` for the alignment-slack rationale.
         let payload = round_up_to_drop_align(min_payload.checked_add(oversized_payload_align_slack()).ok_or(AllocError)?)?;
         // See `acquire_oversized_local`: reserve the rounded allocation size.
-        let total = chunk_alloc_size(SharedChunk::<A>::header_size(), payload, SharedChunk::<A>::value_align())?;
+        let total = SharedChunk::<A>::footprint(payload)?;
         self.reserve_bytes(total)?;
         match SharedChunk::<A>::allocate(self.allocator.clone(), Weak::clone(&self.weak_self), payload) {
             Ok(chunk) => {
@@ -679,10 +600,8 @@ impl<A: Allocator + Clone> ChunkProvider<A> {
         }
     }
 
-    /// Pops a cached shared chunk at or above the current class floor.
-    /// Stale below-floor chunks (pushed by a release thread that raced
-    /// against [`Self::advance_shared_cache_floor`]) are destroyed and
-    /// the next chunk is tried.
+    /// Pops a cached shared chunk at or above the current class floor,
+    /// destroying below-floor stragglers.
     ///
     /// # Safety
     ///
@@ -706,7 +625,8 @@ impl<A: Allocator + Clone> ChunkProvider<A> {
             let Ok(popped) = updated else { return None };
             let fat = SharedChunk::<A>::header_to_fat(popped);
             let chunk_nn = NonNull::new_unchecked(fat);
-            let total = SharedChunk::<A>::header_size() + (*chunk_nn.as_ptr()).capacity();
+            let total = SharedChunk::<A>::footprint((*chunk_nn.as_ptr()).capacity())
+                .expect("popped chunk's layout was valid when it was allocated");
             if total >= floor_min_total {
                 return Some(chunk_nn);
             }
@@ -726,16 +646,8 @@ impl<A: Allocator + Clone> ChunkProvider<A> {
         let head = &self.shared_cache;
         let link = SharedChunk::cache_link(chunk);
         let new = chunk.cast::<u8>().as_ptr();
-        // The chunk is exclusively ours until the publishing CAS below
-        // succeeds, so the link can be initialized via a non-atomic
-        // pointer write through `AtomicPtr::as_ptr()`. Doing the first
-        // write atomically triggers a Miri weak-memory ICE
-        // ("cannot have empty store buffer when previous write was
-        // atomic") on freshly-allocated chunk payload bytes; the
-        // non-atomic init sidesteps it. After the CAS, all subsequent
-        // mutations to the link go through atomic ops, and any popper
-        // observes the link via `head.load(Acquire)` which
-        // synchronizes-with the `Release` half of our CAS.
+        // Exclusive ownership permits non-atomic link initialization before
+        // the publishing CAS; later link changes use atomics.
         let mut cur = head.load(Ordering::Acquire);
         loop {
             ptr::write((*link).as_ptr(), cur);
@@ -752,7 +664,7 @@ impl<A: Allocator + Clone> ChunkProvider<A> {
         }
     }
 
-    /// Drains every cached chunk and deallocates its backing memory.
+    /// Drains cached chunks and deallocates their backing memory.
     fn drain_all(&self) {
         // SAFETY: drain runs in Drop with no outstanding mutators; the
         // provider is single-owner at this point, so the OwnerThreadCell
@@ -799,10 +711,8 @@ pub(crate) fn is_cacheable_size(total: usize) -> bool {
 /// `align_of::<DropEntry>()`. Returns `None` on overflow.
 ///
 /// [`ChunkMutator::from_owned`](super::chunk_mutator::ChunkMutator::from_owned)
-/// aligns the chunk's `drop_top` *down* to `align_of::<DropEntry>()`,
-/// shaving up to `align - 1` bytes off the usable payload. Without this
-/// rounding the usable capacity could fall below `min_payload` and
-/// `impl_alloc_*`'s reserve/refill loop would spin until OOM.
+/// aligns `drop_top` down, so rounding prevents usable capacity from falling
+/// below `min_payload`.
 #[cfg_attr(test, mutants::skip)] // mask mutations underfit payload → OOM spin
 #[inline]
 fn round_up_to_drop_align(min_payload: usize) -> Result<usize, AllocError> {
@@ -834,9 +744,7 @@ fn exceeds_max_chunk_bytes(needed_total: usize) -> bool {
 // --- Helpers wired into chunk types via inherent impls ------------------------
 
 impl<A: Allocator + Clone> LocalChunk<A> {
-    /// Used by `preallocate_local`: route a just-acquired refcount-1 chunk
-    /// back to its provider's cache (refcount → 0) without going through
-    /// `ChunkMutator`.
+    /// Routes a just-acquired refcount-1 chunk to the provider cache.
     ///
     /// # Safety
     ///
@@ -889,11 +797,7 @@ mod tests {
         static PUSH_RETRY_COUNT: Cell<usize> = const { Cell::new(0) };
     }
 
-    /// Test hook invoked by `push_shared` just before its CAS. If the
-    /// thread-local injection slot is armed, splice that chunk onto the
-    /// stack as if a concurrent pusher had installed it: link it to the
-    /// value the pusher loaded, then publish it as the new head so the
-    /// pending CAS (still expecting `cur`) fails exactly once.
+    /// Test hook that injects a competing shared-cache push before the CAS.
     ///
     /// # Safety
     ///
@@ -924,12 +828,8 @@ mod tests {
         assert_eq!(c.max_normal_alloc(), MAX_NORMAL_ALLOC);
     }
 
-    // Covers `pop_shared`'s below-floor straggler arm: a cached shared
-    // chunk smaller than the current class floor is destroyed (not
-    // returned) and the pop continues. Single-threaded code never caches
-    // a below-floor chunk via `release_shared`, so we model the
-    // push-races-floor-bump state directly: raise the floor on an empty
-    // cache, then inject a small (class-0) chunk via `push_shared`.
+    // Covers `pop_shared`'s below-floor straggler arm by raising the floor,
+    // then pushing a smaller chunk.
     #[test]
     fn pop_shared_destroys_below_floor_straggler() {
         let provider = ChunkProvider::<Global>::new(Global, ChunkProviderConfig::default());
@@ -969,13 +869,8 @@ mod tests {
         assert!(!is_cacheable_size(0));
     }
 
-    // Covers `push_shared`'s contended CAS retry arm (the `Err(actual)`
-    // branch). A real concurrent push is non-deterministic, so we model
-    // the race directly: arm a thread-local injection so the test hook
-    // publishes a competing chunk onto the stack head between our load and
-    // CAS, forcing the pending CAS to fail and the loop to retry exactly
-    // once before it settles. Thread-local state keeps this isolated from
-    // other tests running in parallel.
+    // Covers `push_shared`'s contended CAS retry arm via deterministic
+    // thread-local race injection.
     #[test]
     fn push_shared_retries_on_contended_cas() {
         let provider = ChunkProvider::<Global>::new(Global, ChunkProviderConfig::default());
diff --git a/crates/multitude/src/internal/chunk_ref.rs b/crates/multitude/src/internal/chunk_ref.rs
index 5efc8a64f..30adea109 100644
--- a/crates/multitude/src/internal/chunk_ref.rs
+++ b/crates/multitude/src/internal/chunk_ref.rs
@@ -4,10 +4,8 @@
 //! [`ChunkRef`] — a RAII handle for a single strong reference on a
 //! [`SharedChunk`].
 //!
-//! Centralizes the "+1 on a chunk that must be released exactly once,
-//! even on panic" pattern used by smart pointers and in-flight slot
-//! initialization. One machine word, `!Send`/`!Sync`, and inhibits
-//! implicit `Copy`/`Clone` so the +1 ownership is linear.
+//! Centralizes linear "+1" chunk ownership used by smart pointers and
+//! in-flight slot initialization.
 
 use core::marker::PhantomData;
 use core::mem;
@@ -15,7 +13,6 @@ use core::ptr::NonNull;
 
 use allocator_api2::alloc::Allocator;
 
-use super::chunk::Chunk;
 use super::shared_chunk::SharedChunk;
 
 /// Owns a single strong reference on a [`SharedChunk`]; releases the
@@ -65,32 +62,7 @@ impl<A: Allocator + Clone> ChunkRef<A> {
         }
     }
 
-    /// Bumps the strong refcount on the chunk containing `value` and
-    /// returns a [`ChunkRef`] owning the new +1.
-    ///
-    /// # Safety
-    ///
-    /// Same as [`Self::from_value_ptr`], plus caller must already hold
-    /// a live strong reference on the chunk.
-    #[inline]
-    pub(crate) unsafe fn clone_from_value_ptr<T: ?Sized>(value: NonNull<T>) -> Self {
-        // SAFETY: see from_value_ptr; caller's pre-existing +1 prevents
-        // teardown races.
-        unsafe {
-            let header = SharedChunk::<A>::header_from_value_ptr(value.cast::<u8>());
-            let chunk_fat = SharedChunk::<A>::header_to_fat(header.as_ptr());
-            let chunk = NonNull::new_unchecked(chunk_fat);
-            chunk.as_ref().inc_ref();
-            Self {
-                chunk,
-                _phantom: PhantomData,
-            }
-        }
-    }
-
-    /// Cancels release-on-drop and returns the raw chunk pointer with
-    /// the +1 still live. Use when ownership of the +1 is being
-    /// handed to another holder (e.g. a freshly-constructed `Box`).
+    /// Cancels release-on-drop and returns the chunk pointer with +1 live.
     #[inline]
     pub(crate) fn forget(self) -> NonNull<SharedChunk<A>> {
         let chunk = self.chunk;
diff --git a/crates/multitude/src/internal/constants.rs b/crates/multitude/src/internal/constants.rs
index 3999a7d50..9abdb06dc 100644
--- a/crates/multitude/src/internal/constants.rs
+++ b/crates/multitude/src/internal/constants.rs
@@ -13,13 +13,8 @@ pub(crate) const MIN_CHUNK_BYTES: usize = 512;
 pub(crate) const MAX_CHUNK_BYTES: usize = 65_536;
 
 /// Required alignment for every [`SharedChunk`](super::shared_chunk::SharedChunk)
-/// allocation. Matches [`MAX_CHUNK_BYTES`] so that for any pointer to a
-/// non-oversized value in the chunk, the chunk header's address can be
-/// recovered by subtracting the low `CHUNK_ALIGN - 1` bits of the pointer.
-///
-/// This in turn allows [`Box`](crate::Box) and similar smart pointers
-/// to store a single value pointer without separately tracking the
-/// chunk header.
+/// allocation. Matching [`MAX_CHUNK_BYTES`] lets smart pointers recover the
+/// chunk header from any non-oversized in-chunk value pointer.
 pub(crate) const CHUNK_ALIGN: usize = MAX_CHUNK_BYTES;
 
 /// Maximum alignment accepted by smart-pointer / `Allocator::allocate`
@@ -42,12 +37,9 @@ pub(crate) const MAX_NORMAL_ALLOC: usize = 16 * 1024;
 
 /// Cache size-class index, range `0..NUM_CHUNK_CLASSES`.
 ///
-/// Wraps the raw `u8` to make invalid classes harder to construct
-/// accidentally and to centralize the
-/// [`bytes`](Self::bytes)/[`saturating_inc`](Self::saturating_inc)
-/// helpers. `#[repr(transparent)]` so that `AtomicU8` cache slots in
-/// [`ChunkProvider`](super::chunk_provider::ChunkProvider) can keep
-/// storing the raw byte without conversion.
+/// `#[repr(transparent)]` wrapper around the raw `u8` used by
+/// [`ChunkProvider`](super::chunk_provider::ChunkProvider)'s atomic cache
+/// floor slots.
 #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
 #[repr(transparent)]
 pub(crate) struct SizeClass(u8);
@@ -132,11 +124,8 @@ impl SizeClass {
 #[cfg_attr(coverage_nightly, coverage(off))]
 #[cfg_attr(test, mutants::skip)] // unreachable: refcount overflow requires usize::MAX live refs
 pub(crate) fn refcount_overflow_abort() -> ! {
-    // Under `cfg(test)` we panic instead of aborting so the overflow-guard
-    // call sites (otherwise unreachable without `usize::MAX` live references)
-    // can be exercised by `#[should_panic]` unit tests. Production builds are
-    // never compiled with `cfg(test)`, so the abort behavior below is the only
-    // one that ships.
+    // In tests, panic so overflow guards can be asserted with `#[should_panic]`.
+    // Non-test builds abort.
     #[cfg(test)]
     {
         panic!("multitude: refcount overflow (test)");
diff --git a/crates/multitude/src/internal/current_chunk.rs b/crates/multitude/src/internal/current_chunk.rs
index 55737f10f..0f6d24f9d 100644
--- a/crates/multitude/src/internal/current_chunk.rs
+++ b/crates/multitude/src/internal/current_chunk.rs
@@ -3,17 +3,15 @@
 
 //! Single-slot interior-mutable holder for a [`ChunkMutator`].
 //!
-//! [`CurrentChunk`] is a `repr(transparent)` newtype over
-//! `UnsafeCell<ChunkMutator<C>>` that encapsulates the `unsafe` access
-//! patterns needed by [`Arena`](crate::Arena)'s hot path.
+//! [`CurrentChunk`] wraps `UnsafeCell<ChunkMutator<C>>` for
+//! [`Arena`](crate::Arena)'s hot path.
 //!
 //! # Soundness contract
 //!
-//! `CurrentChunk` does **not** track borrows at runtime. The holder
-//! (currently [`Arena`](crate::Arena)) must obey two invariants:
+//! `CurrentChunk` does not track borrows at runtime. The holder must ensure:
 //!
-//! 1. *Single-threaded access*: the holder is `!Sync`.
-//! 2. *No re-entry during borrow*: the shared reference returned by
+//! 1. Single-threaded access (`!Sync` holder).
+//! 2. No re-entry during borrow: the shared reference returned by
 //!    [`borrow`](CurrentChunk::borrow) must not be held across any
 //!    `replace`/`drop_replace` on the same cell.
 
@@ -35,11 +33,7 @@ impl<C: ?Sized + ChunkOps> CurrentChunk<C> {
         Self(UnsafeCell::new(mutator))
     }
 
-    /// Borrow the contained mutator. Hot-path entry; inlines fully.
-    ///
-    /// The returned reference is valid only until the next
-    /// `replace`/`drop_replace` on this cell. See module docs for the
-    /// soundness contract.
+    /// Borrow the mutator until the next `replace` / `drop_replace`.
     #[expect(clippy::inline_always, reason = "hot-path entry; must inline fully for arena performance")]
     #[inline(always)]
     pub(crate) fn borrow(&self) -> &ChunkMutator<C> {
diff --git a/crates/multitude/src/internal/drop_entry.rs b/crates/multitude/src/internal/drop_entry.rs
index 6b6a80d5f..21929a4fe 100644
--- a/crates/multitude/src/internal/drop_entry.rs
+++ b/crates/multitude/src/internal/drop_entry.rs
@@ -43,31 +43,18 @@ const PAD_BYTES: usize = pad_bytes();
 
 /// A single entry in a chunk's trailing drop list.
 ///
-/// Drop entries are appended at the high end of the chunk's payload, growing
-/// downward, while bump allocations grow upward from the low end. The chunk's
-/// `drop_entry_count` counts the number of entries written at the tail.
+/// Entries grow downward from the high end of the payload; bump allocations
+/// grow upward from the low end.
 ///
 /// # Two-phase commit
 ///
-/// Each entry is created in two phases:
-///
-/// 1. At allocation time, [`DropEntry::placeholder`] is written into the
-///    slot and `drop_entry_count` is incremented. `drop_fn` is `None`, so the
-///    replay loop will skip the slot if it is never committed.
-/// 2. When the corresponding value is initialized,
-///    [`DropEntry::commit_drop_fn`] is invoked to fill in the real shim
-///    pointer.
-///
-/// This two-phase scheme means out-of-order initialization is safe: a slot
-/// whose `Uninit` was dropped without `init` simply stays in the placeholder
-/// state and is harmless.
+/// Allocation writes a [`DropEntry::placeholder`] and increments the count.
+/// Initialization later calls [`DropEntry::commit_drop_fn`]. Uncommitted
+/// placeholders are skipped during replay.
 #[repr(C)]
 pub(crate) struct DropEntry {
-    /// Type-erased shim. Stored as `AtomicPtr<()>` so the function
-    /// pointer's provenance survives the atomic store/load round-trip
-    /// (an `AtomicUsize` with `fn-as-usize` casts would lose provenance
-    /// under Miri's Stacked Borrows and the recovered function pointer
-    /// would be unresolvable when called). A null value means
+    /// Type-erased shim. `AtomicPtr<()>` preserves function-pointer
+    /// provenance across the store/load round-trip. Null means
     /// "uncommitted placeholder".
     ///
     /// The placeholder → committed transition is race-safe because
@@ -103,8 +90,7 @@ impl DropEntry {
     /// Fills in the real drop shim pointer. Idempotent under races: when
     /// two threads commit the same slot, both writes are the same value
     /// (the shim is determined by `T`), so a relaxed-store is sufficient
-    /// once paired with the `Acquire` load in [`replay_drops`] /
-    /// [`commit_placeholder_drop_fn`].
+    /// once paired with the `Acquire` load in [`replay_drops`].
     #[inline]
     pub(crate) fn commit_drop_fn(&self, drop_fn: DropFn) {
         // Cast the fn pointer to `*mut ()` for atomic storage; this
@@ -148,74 +134,6 @@ impl DropEntry {
     }
 }
 
-/// Scans the `drop_entry_count` `DropEntry`s packed against the high end of
-/// `payload` for the unique uncommitted placeholder whose `value_offset` and
-/// `len` match, and commits `drop_fn` into it. Returns `true` if such an
-/// entry was found and committed, `false` otherwise.
-///
-/// Used by `Arc::<MaybeUninit<T>>::assume_init` to retarget the placeholder
-/// reserved by `Arena::alloc_uninit_arc` once the value is initialized. The
-/// entry walk mirrors [`replay_drops`] exactly so the located slot is the
-/// same one the teardown replay will later read.
-///
-/// # Safety
-///
-/// - `payload` / `payload_len` / `drop_entry_count` carry the same contract
-///   as [`replay_drops`]: they must describe the live chunk's payload and the
-///   number of entries previously written by the allocator at the tail.
-/// - The caller must own a strong reference on the chunk (so it stays live)
-///   and must not let another thread commit the same placeholder concurrently
-///   (see the `assume_init` "called at most once per allocation" contract).
-#[allow(
-    clippy::cast_ptr_alignment,
-    reason = "caller guarantees entries are naturally aligned within the payload; see DropEntry layout"
-)]
-pub(crate) unsafe fn commit_placeholder_drop_fn(
-    payload: *mut u8,
-    payload_len: usize,
-    drop_entry_count: usize,
-    value_offset: usize,
-    len: usize,
-    drop_fn: DropFn,
-) -> bool {
-    let entry_size = mem::size_of::<DropEntry>();
-    let entry_align = mem::align_of::<DropEntry>();
-    // Align the *absolute* payload-end address down to `entry_align`,
-    // matching `ChunkMutator::from_owned`'s `aligned_end_addr` formula.
-    // Doing the alignment on absolute addresses (rather than on
-    // `payload_len` alone) keeps the entry positions valid even when
-    // `payload` itself is not `entry_align`-aligned — the chunk
-    // headers don't pad their payload start anymore.
-    let payload_addr = payload as usize;
-    let aligned_end_offset = ((payload_addr.wrapping_add(payload_len)) & !(entry_align - 1)).wrapping_sub(payload_addr);
-    // Find the placeholder by (value_offset, len) and unconditionally
-    // store the real shim. Concurrent `assume_init` calls on cloned
-    // handles for the same allocation race here; both calls compute
-    // the same `drop_fn` (the monomorphisation of `drop_shim_*` for
-    // `T`), so racing atomic stores are idempotent and well-defined.
-    //
-    // A two-phase "check-then-write" alternative would have to compare
-    // the stored function pointer to a freshly-cast `drop_fn as *mut ()`
-    // on the loser's path, which is fragile under Miri: the
-    // fn-pointer-to-data-pointer cast can synthesise distinct data
-    // addresses across invocations of the same function. The single-
-    // pass unconditional store sidesteps the comparison entirely.
-    for i in 0..drop_entry_count {
-        let entry_off = aligned_end_offset - (i + 1) * entry_size;
-        // SAFETY: `entry_off + entry_size <= aligned_end_offset <= payload_len`,
-        // so the entry lies inside the payload; the caller guarantees an
-        // initialized `DropEntry` was written there. We hold a chunk
-        // reference, so the slot stays live for this read/write.
-        let entry = &*(payload.add(entry_off).cast::<DropEntry>());
-        if entry.value_offset() as usize != value_offset || entry.len() as usize != len {
-            continue;
-        }
-        entry.commit_drop_fn(drop_fn);
-        return true;
-    }
-    false
-}
-
 /// A type-erased drop shim for `count` consecutive `T`s.
 ///
 /// `ptr` must be aligned for `T` and point at `count` initialized `T`s. This
@@ -232,18 +150,11 @@ pub(crate) unsafe fn drop_shim<T>(ptr: *mut u8, count: usize) {
     ptr::drop_in_place(slice);
 }
 
-/// Walks the `drop_entry_count` `DropEntry`s packed against the high end of
-/// `payload` and invokes each committed shim against the entry's value
-/// region (`value_offset` bytes into `payload`, `len` elements).
+/// Replays committed drop entries packed against the high end of `payload`.
 ///
-/// Entries are stored growing downward from the payload end. Entry `i`
-/// (0-based, oldest first) sits at byte range
-/// `[payload.len() - (i + 1) * size_of::<DropEntry>(), payload.len() - i * size_of::<DropEntry>())`.
-/// We iterate in reverse-of-allocation order (LIFO) so child values are
-/// dropped before their parents, matching Rust's drop semantics.
-///
-/// Entries whose `drop_fn` is `None` (placeholder entries whose tickets were
-/// dropped without being initialized) are skipped.
+/// Entries grow downward from the payload end and are replayed newest-first
+/// (LIFO), so child values drop before parents. Placeholder entries with no
+/// `drop_fn` are skipped.
 ///
 /// # Safety
 ///
@@ -265,28 +176,16 @@ pub(crate) unsafe fn replay_drops(payload: *mut u8, payload_len: usize, drop_ent
     }
     let entry_size = mem::size_of::<DropEntry>();
     let entry_align = mem::align_of::<DropEntry>();
-    // Align the *absolute* payload-end address down to `entry_align`,
-    // matching `ChunkMutator::from_owned`'s `aligned_end_addr` formula
-    // (which the allocator uses when reserving drop entries). Computing
-    // the alignment on absolute addresses keeps drop-entry positions
-    // valid even when `payload` itself is not `entry_align`-aligned —
-    // chunk headers do not pad the payload start.
+    // Align the absolute payload end so entry positions stay valid even when
+    // the payload start is not `entry_align`-aligned.
     let payload_addr = payload as usize;
     let aligned_end_offset = ((payload_addr.wrapping_add(payload_len)) & !(entry_align - 1)).wrapping_sub(payload_addr);
-    // Iterate newest-first (LIFO) so child values drop before their
-    // parents, matching Rust's drop semantics. Entries grow downward
-    // from the aligned payload end, so the newest (last-written) entry
-    // sits at the lowest address (`aligned_end - count * entry_size`)
-    // and the oldest at the highest (`aligned_end - entry_size`).
-    // Visiting `i` from `count - 1` down to `0` walks newest -> oldest.
+    // Entries grow downward, so reverse index order visits newest -> oldest.
     for i in (0..drop_entry_count).rev() {
         let entry_off = aligned_end_offset - (i + 1) * entry_size;
         // SAFETY: `entry_off + entry_size <= aligned_end_offset <= payload_len`,
-        // so the entry lies inside the payload allocation; the caller
-        // guarantees that an initialized `DropEntry` was previously
-        // written there. If committed, the entry's
-        // `value_off + count * size_of::<T>()` slice is also inside the
-        // payload and contains initialized `T`s matching the shim type.
+        // so the entry lies inside the payload. The caller guarantees the
+        // entry and any committed value range are initialized and type-matched.
         let entry = &*(payload.add(entry_off).cast::<DropEntry>());
         if let Some(shim) = entry.drop_fn() {
             let value_off = entry.value_offset() as usize;
@@ -301,77 +200,11 @@ pub(crate) unsafe fn replay_drops(payload: *mut u8, payload_len: usize, drop_ent
 mod tests {
     use super::*;
 
-    /// Direct test: when `drop_entry_count == 0`, the single-pass walk
-    /// of `commit_placeholder_drop_fn` skips its loop and returns
-    /// `false`.
-    #[test]
-    fn commit_placeholder_drop_fn_returns_false_when_count_is_zero() {
-        let mut buf = [0u8; 64];
-        let shim_fn = drop_shim::<u8> as DropFn;
-        // SAFETY: buffer is exclusively owned and the count is 0 so no entry
-        // is read from it; we only need a valid pointer/length pair.
-        let result = unsafe { commit_placeholder_drop_fn(buf.as_mut_ptr(), buf.len(), 0, 0, 1, shim_fn) };
-        assert!(!result);
-    }
-
-    /// Direct test: the single-pass walk skips a non-matching
-    /// `(value_offset, len)` entry (`continue`) and commits the next
-    /// matching entry (return `true`). Covers both the skip arm and the
-    /// success arm of the loop body.
+    /// [`replay_drops`] must locate entries by absolute payload-end alignment
+    /// even when `payload_ptr` is not `DropEntry`-aligned. Only committed
+    /// entries run.
     #[test]
-    fn commit_placeholder_drop_fn_skips_non_matching_then_commits_match() {
-        let entry_size = mem::size_of::<DropEntry>();
-        let entry_align = mem::align_of::<DropEntry>();
-        let buf_size = entry_size * 4;
-        let mut buf = std::vec![0u8; buf_size + entry_align];
-        let base_addr = buf.as_mut_ptr() as usize;
-        let aligned_base = (base_addr + entry_align - 1) & !(entry_align - 1);
-        let payload_offset = aligned_base - base_addr;
-        // SAFETY: `payload_offset` is within `buf`'s allocation by construction.
-        let payload_ptr = unsafe { buf.as_mut_ptr().add(payload_offset) };
-        let payload_len = buf_size;
-        let aligned_len = payload_len & !(entry_align - 1);
-
-        let shim_fn = drop_shim::<u8> as DropFn;
-        let value_offset: u16 = 0;
-        let len: u16 = 1;
-
-        // Top slot: a *non-matching* placeholder (different value_offset).
-        let top_off = aligned_len - entry_size;
-        // Second slot: the matching placeholder.
-        let next_off = aligned_len - 2 * entry_size;
-        // SAFETY: see above; placements are within the aligned region and
-        // both writes target `DropEntry`-aligned addresses.
-        unsafe {
-            let top_ptr = payload_ptr.add(top_off).cast::<DropEntry>();
-            ptr::write(top_ptr, DropEntry::placeholder(99, 1));
-            let next_ptr = payload_ptr.add(next_off).cast::<DropEntry>();
-            ptr::write(next_ptr, DropEntry::placeholder(value_offset, len));
-        }
-
-        // SAFETY: the buffer contains 2 placeholder `DropEntry`s, the
-        // second one matching `(value_offset, len)`.
-        let result = unsafe { commit_placeholder_drop_fn(payload_ptr, payload_len, 2, value_offset as usize, len as usize, shim_fn) };
-        assert!(result);
-
-        // The matching slot now has the real drop fn installed.
-        // SAFETY: `next_ptr` was initialized above and stays valid for
-        // the test's lifetime.
-        let next_ptr = unsafe { payload_ptr.add(next_off).cast::<DropEntry>() };
-        // SAFETY: the slot is initialized.
-        let installed = unsafe { (*next_ptr).drop_fn() };
-        assert!(installed.is_some());
-    }
-
-    /// When `payload_ptr` is **not** `align_of::<DropEntry>()`-aligned,
-    /// both `commit_placeholder_drop_fn` and `replay_drops` must still
-    /// place drop entries at absolutely-aligned addresses near the
-    /// payload tail. The buffer below intentionally offsets the payload
-    /// start by `entry_align - 1` bytes from an aligned base, so the
-    /// payload start address is 1-aligned but the *end* of the
-    /// reserved payload still lands on an `entry_align` multiple.
-    #[test]
-    fn replay_and_commit_tolerate_unaligned_payload_start() {
+    fn replay_tolerates_unaligned_payload_start() {
         use std::sync::atomic::{AtomicUsize, Ordering};
         static CALLS: AtomicUsize = AtomicUsize::new(0);
         fn counting_shim(_p: *mut u8, _n: usize) {
@@ -400,41 +233,30 @@ mod tests {
         let aligned_end_addr = (payload_start_addr + payload_len) & !(entry_align - 1);
         let aligned_end_offset = aligned_end_addr - payload_start_addr;
 
-        let value_offset: u16 = 0;
-        let len: u16 = 1;
         let shim_fn = counting_shim as DropFn;
 
-        // Write two placeholders at the correctly-aligned offsets.
+        // Write a committed entry and a non-committed placeholder at the
+        // correctly-aligned offsets.
         // SAFETY: both offsets are within the payload buffer and produce
         // entry_align-aligned addresses by construction.
         unsafe {
             let top_off = aligned_end_offset - entry_size;
             let next_off = aligned_end_offset - 2 * entry_size;
-            // Top: non-matching placeholder.
+            // Top: placeholder left uncommitted (no shim).
             ptr::write(payload_ptr.add(top_off).cast::<DropEntry>(), DropEntry::placeholder(99, 1));
-            // Below: matching placeholder.
-            ptr::write(
-                payload_ptr.add(next_off).cast::<DropEntry>(),
-                DropEntry::placeholder(value_offset, len),
-            );
+            // Below: placeholder committed to the counting shim.
+            let next_ptr = payload_ptr.add(next_off).cast::<DropEntry>();
+            ptr::write(next_ptr, DropEntry::placeholder(0, 1));
+            (*next_ptr).commit_drop_fn(shim_fn);
         }
 
-        // Commit phase must locate the matching entry and install the shim.
-        // SAFETY: both entries are initialized; payload_len includes them.
-        let committed = unsafe { commit_placeholder_drop_fn(payload_ptr, payload_len, 2, value_offset as usize, len as usize, shim_fn) };
-        assert!(committed);
-
-        // Replay phase must invoke the installed shim exactly once
-        // (the non-matching placeholder still has no shim).
+        // Only the committed shim runs.
         // SAFETY: payload_ptr + payload_len bounds the live buffer.
         unsafe { replay_drops(payload_ptr, payload_len, 2) };
         assert_eq!(CALLS.load(Ordering::Relaxed), 1);
     }
 
-    /// `raw_used` returns the byte sum of the un-padded `DropEntry`
-    /// fields: a `DropFn` (function pointer, `usize`-sized) + two
-    /// `u16`s. Pin the exact value so additive/multiplicative mutations
-    /// flip it.
+    /// `raw_used` returns the unpadded field-size sum.
     #[test]
     fn raw_used_is_sum_of_field_sizes() {
         let expected = mem::size_of::<DropFn>() + mem::size_of::<u16>() + mem::size_of::<u16>();
diff --git a/crates/multitude/src/internal/in_chunk.rs b/crates/multitude/src/internal/in_chunk.rs
index 621e7547d..3ef767002 100644
--- a/crates/multitude/src/internal/in_chunk.rs
+++ b/crates/multitude/src/internal/in_chunk.rs
@@ -6,29 +6,18 @@
 use core::marker::PhantomData;
 use core::ptr::NonNull;
 
-/// A non-null, well-aligned pointer that — by construction — addresses
-/// storage inside the payload of a live arena chunk (with one narrow
-/// exception for ZSTs, see below).
-///
-/// `InChunk<T>` is the fundamental "I came from the allocator" pointer
-/// abstraction. The rest of the crate carries these around instead of raw
-/// `NonNull<T>` so that the difference between "any pointer" and "a pointer
-/// the allocator handed out" is visible in the type system.
+/// A non-null, well-aligned pointer produced by the chunk allocator.
 ///
 /// # Invariants
 ///
 /// - `self.ptr` is non-null and well-aligned for `T`.
-/// - If `core::mem::size_of_val(&*self.ptr) > 0`, the pointed-to region lies
-///   entirely within the payload of an arena chunk whose lifetime exceeds the
-///   use of this `InChunk`. (Liveness is enforced externally by the holder of
-///   the chunk's `Arc`.)
+/// - If the pointed-to region has nonzero size, it lies entirely within the
+///   payload of a live arena chunk.
 /// - For zero-sized values (ZSTs and empty slices) the pointer is permitted
 ///   to be a dangling, well-aligned non-null address. There is no payload
 ///   storage to reference in that case.
 ///
-/// `InChunk` is `Copy` because copying a pointer cannot violate any of the
-/// above. Mutability and aliasing discipline are enforced by the wrappers
-/// (`Uninit`, `UninitDrop`, `ArenaBuf`, etc.) that consume `InChunk`s.
+/// Aliasing discipline is enforced by wrappers that consume `InChunk`.
 pub(crate) struct InChunk<T: ?Sized> {
     ptr: NonNull<T>,
     _phantom: PhantomData<*const T>,
@@ -47,9 +36,7 @@ impl<T: ?Sized> Copy for InChunk<T> {}
 impl<T: ?Sized> InChunk<T> {
     /// Wraps a raw `NonNull<T>` that satisfies the type invariants above.
     ///
-    /// This constructor is `pub(super)` so only sibling modules in
-    /// `internal/` (notably `ChunkMutator`) can mint `InChunk` values; the
-    /// rest of the crate may only obtain them through allocator outputs.
+    /// Only sibling internal modules can mint `InChunk` values.
     #[inline]
     pub(super) fn from_raw(ptr: NonNull<T>) -> Self {
         Self {
@@ -95,9 +82,8 @@ impl InChunk<u8> {
     /// Builds an `InChunk<[T]>` describing `len` consecutive `T`s starting at
     /// this byte address.
     ///
-    /// The caller (always `ChunkMutator`) is responsible for ensuring that
-    /// the address is aligned for `T` and that `len * size_of::<T>()` bytes
-    /// of valid in-chunk storage start here.
+    /// Caller ensures alignment for `T` and enough in-chunk storage for `len`
+    /// elements.
     #[inline]
     pub(crate) fn into_slice<T>(self, len: usize) -> InChunk<[T]> {
         let slice = NonNull::slice_from_raw_parts(self.ptr.cast::<T>(), len);
diff --git a/crates/multitude/src/internal/local_chunk.rs b/crates/multitude/src/internal/local_chunk.rs
index 491200874..156fee9fe 100644
--- a/crates/multitude/src/internal/local_chunk.rs
+++ b/crates/multitude/src/internal/local_chunk.rs
@@ -3,10 +3,8 @@
 
 //! Single-threaded reference-counted arena chunk.
 
-// All methods on chunks that touch raw memory are themselves `unsafe fn`s
-// with documented safety contracts at the function level. Wrapping each line
-// of their body in an additional `unsafe { ... }` block adds noise without
-// adding any safety boundary, so we let edition-2024's lint slide here.
+// Raw-memory methods are `unsafe fn` with item-level safety contracts; inner
+// unsafe blocks would not add a boundary here.
 #![allow(unsafe_op_in_unsafe_fn, reason = "see module doc: inner unsafe blocks in unsafe fn add noise here")]
 #![allow(clippy::unnecessary_safety_comment, reason = "safety rationale documented at function level")]
 
@@ -22,38 +20,20 @@ use super::drop_entry::replay_drops;
 
 /// A bump-allocation chunk used by a single arena thread.
 ///
-/// The chunk is laid out as a fixed-size header immediately followed in
-/// memory by `capacity` bytes of payload. The header type is `Sized` so it
-/// can be referenced via thin `NonNull` pointers; payload addresses are
-/// recovered with `payload_ptr`.
+/// Fixed header followed by `capacity` payload bytes.
 ///
 /// # Provider back-pointer
 ///
-/// `provider` is a non-owning raw pointer rather than a `Weak<ChunkProvider>`.
-/// This is sound because a `LocalChunk` is single-owner (its refcount is only
-/// ever 0 or 1; [`Chunk::inc_ref`] is `unreachable!()`) and reachable only via
-/// the owning [`Arena`](crate::Arena)'s `current_local` / `retired_local` /
-/// the provider's own `local_cache`. The arena's `provider: Arc<ChunkProvider>`
-/// field is declared **after** the chunk-holding fields, so when the arena is
-/// dropped the local mutators tear down first while the provider is still
-/// live; chunks in the cache are destroyed directly from the provider's own
-/// `Drop` (`drain_all`) without going through the back-pointer. The provider
-/// therefore strictly outlives every local-chunk teardown that dereferences
-/// this pointer, so no Weak refcount or orphan-handling branch is needed.
+/// `provider` is a non-owning raw pointer. A `LocalChunk` is single-owner and
+/// reachable only through the owning arena or provider cache; arena field
+/// order keeps the provider alive for all local-chunk teardown paths.
 #[repr(C)]
 pub(crate) struct LocalChunk<A: Allocator + Clone> {
-    /// Non-owning back-pointer to the chunk's provider. See the type-level
-    /// doc for the soundness argument. Never dereferenced from
-    /// [`Self::destroy`] (the caller — provider methods or the provider's
-    /// own drop — supplies the allocator); only read from
-    /// [`ChunkOps::teardown_and_release`](super::chunk_ops::ChunkOps::teardown_and_release)
-    /// to route the chunk back to the cache.
+    /// Non-owning provider back-pointer, used only by
+    /// [`ChunkOps::teardown_and_release`](super::chunk_ops::ChunkOps::teardown_and_release).
     provider: *const ChunkProvider<A>,
     capacity: usize,
-    /// Intrusive next-link, used in two disjoint phases of the chunk's
-    /// life. Stored as a thin `*mut u8` header pointer (`null` for end-
-    /// of-list); the fat DST pointer is recovered via
-    /// [`Self::header_to_fat`] when consumers walk the list.
+    /// Intrusive next-link as a thin `*mut u8` header pointer.
     ///
     /// * While the chunk is **retired** (refcount = 1, sitting on
     ///   [`RetiredLocalChunks`](crate::arena::retired_local::RetiredLocalChunks))
@@ -61,10 +41,7 @@ pub(crate) struct LocalChunk<A: Allocator + Clone> {
     /// * While the chunk is **cached** (refcount = 0, sitting on the
     ///   provider's local freelist) it links the next cached chunk.
     ///
-    /// Those two phases are mutually exclusive in time, so a single
-    /// field serves both purposes. Placed after `capacity` (both
-    /// `usize`-aligned) so the smaller `ref_count` / `drop_entry_count`
-    /// fields can pack into the tail without trailing padding.
+    /// The phases are mutually exclusive, so one field serves both lists.
     next: Cell<*mut u8>,
     ref_count: Cell<u8>,
     drop_entry_count: Cell<u16>,
@@ -75,14 +52,9 @@ pub(crate) struct LocalChunk<A: Allocator + Clone> {
     /// never went through a mutator (e.g. preallocated cache fills).
     #[cfg(feature = "stats")]
     wasted_at_retire: Cell<u32>,
-    /// Bump-payload tail. `data.len() == capacity`. Declared as
-    /// `[UnsafeCell<u8>]` (same layout as `[u8]`) so that shared
-    /// borrows of the chunk allow interior-mutable writes into the
-    /// payload, and so that `NonNull<LocalChunk<A>>` is a **fat
-    /// pointer** carrying provenance over the full chunk allocation
-    /// (essential for Miri's Stacked / Tree Borrows: a sized-struct
-    /// header pointer would have provenance for only the header bytes,
-    /// making any payload-derivation undefined behavior).
+    /// Bump-payload tail. `[UnsafeCell<u8>]` permits payload writes through
+    /// shared chunk borrows and keeps fat-pointer provenance over the full
+    /// allocation.
     ///
     /// The payload start is **not** required to be `DropEntry`-aligned:
     /// [`replay_drops`](super::drop_entry::replay_drops) computes drop-
@@ -91,13 +63,9 @@ pub(crate) struct LocalChunk<A: Allocator + Clone> {
     data: [UnsafeCell<u8>],
 }
 
-// SAFETY: `LocalChunk` would auto-derive `Send` when `A: Send` but for the
-// raw `*const ChunkProvider<A>` back-pointer, which the compiler conservatively
-// treats as `!Send`. The pointer references a `ChunkProvider<A>` that is owned
-// by the same `Arena` that owns this chunk (via `Arc<ChunkProvider<A>>`), so
-// moving the arena between threads moves both the chunk and its provider
-// together: the address stays valid and the data behind it is `Send` (asserted
-// by the `Send` impl on `ChunkProvider<A>` when `A: Send`).
+// SAFETY: the raw provider back-pointer moves with the arena that owns both
+// the chunk and `Arc<ChunkProvider<A>>`; when `A: Send`, the provider data is
+// also Send.
 unsafe impl<A: Allocator + Clone + Send> Send for LocalChunk<A> {}
 
 impl<A: Allocator + Clone> LocalChunk<A> {
@@ -126,15 +94,10 @@ impl<A: Allocator + Clone> LocalChunk<A> {
         }
     }
 
-    /// Alignment to use when allocating/deallocating a chunk's backing memory.
-    /// `A` is not stored in the chunk header, so only the header fields'
-    /// alignment matters (max is `usize`, 8 bytes on 64-bit). The chunk
-    /// pointer therefore doesn't need to be over-aligned for `A`.
+    /// Alignment for the chunk backing allocation.
     ///
-    /// Unlike [`SharedChunk`](super::shared_chunk::SharedChunk), local
-    /// chunks need no `CHUNK_ALIGN` base alignment (they hand out no
-    /// header-recovering smart pointers), so the base and value alignments
-    /// coincide.
+    /// Local chunks need no `CHUNK_ALIGN` base alignment; base and value
+    /// alignments coincide.
     #[inline]
     pub(crate) const fn struct_align() -> usize {
         Self::value_align()
@@ -152,12 +115,8 @@ impl<A: Allocator + Clone> LocalChunk<A> {
     /// Allocates a fresh chunk with `payload_size` payload bytes and
     /// refcount 1.
     ///
-    /// `allocator` is borrowed only to perform the actual allocation; it is
-    /// not stored. `provider` is stashed as a non-owning back-pointer (see
-    /// the type-level doc for the soundness argument); pass `ptr::null()`
-    /// for stand-alone chunks that will be destroyed directly via
-    /// [`Self::destroy`] without going through
-    /// [`teardown_and_release`](super::chunk_ops::ChunkOps::teardown_and_release).
+    /// `allocator` is not stored. `provider` may be null for standalone chunks
+    /// destroyed directly via [`Self::destroy`].
     #[allow(
         clippy::cast_ptr_alignment,
         reason = "raw_u8_ptr came from `allocator.allocate(layout)` with `Self`'s alignment; the *mut [u8] -> *mut Self cast preserves the byte address with its full provenance"
@@ -194,11 +153,8 @@ impl<A: Allocator + Clone> LocalChunk<A> {
         }
     }
 
-    /// Non-owning back-pointer to the chunk's provider. See the type-level
-    /// doc for the soundness argument: the provider strictly outlives every
-    /// teardown that calls this. Only used by
-    /// [`ChunkOps::teardown_and_release`](super::chunk_ops::ChunkOps::teardown_and_release)
-    /// to route the chunk back to the cache.
+    /// Non-owning provider back-pointer used by
+    /// [`ChunkOps::teardown_and_release`](super::chunk_ops::ChunkOps::teardown_and_release).
     #[inline]
     pub(crate) fn provider(&self) -> *const ChunkProvider<A> {
         self.provider
@@ -229,11 +185,7 @@ impl<A: Allocator + Clone> LocalChunk<A> {
     /// `chunk` must reference a live (still allocated) chunk.
     #[inline]
     pub(crate) unsafe fn payload_ptr(chunk: NonNull<Self>) -> NonNull<u8> {
-        // Project through the DST's slice-tail field directly. This
-        // avoids the fat-to-thin cast (`chunk.as_ptr().cast::<u8>()`)
-        // whose provenance treatment in Miri is inconsistent — going
-        // through `&raw mut (*chunk).data` keeps the slice's provenance
-        // intact (covers payload_size bytes).
+        // Project through the DST tail so the pointer keeps payload provenance.
         let data_slice_ptr: *mut [UnsafeCell<u8>] = &raw mut (*chunk.as_ptr()).data;
         // SAFETY: `data_slice_ptr` is non-null and points at the first
         // payload byte.
@@ -268,13 +220,9 @@ impl<A: Allocator + Clone> LocalChunk<A> {
     /// passed to [`Self::allocate`] when this chunk was created.
     pub(crate) unsafe fn destroy(chunk: NonNull<Self>, allocator: &A) {
         let header = Self::header_size();
-        // SAFETY: caller owns the only reference; we read trivial fields,
-        // replay drops in the payload, then deallocate using the caller-
-        // supplied allocator. The layout exactly matches the one returned
-        // by `allocator.allocate` in `allocate` (both go through
-        // `chunk_layout`). The header carries no Drop-implementing field
-        // (the provider back-pointer is a plain raw pointer), so nothing
-        // else needs to be dropped in place before deallocation.
+        // SAFETY: caller owns the only reference. We replay payload drops,
+        // deallocate with the matching `chunk_layout`, and the header has no
+        // Drop fields.
         let header_ref = &*chunk.as_ptr();
         let capacity = header_ref.capacity;
         let drop_count = header_ref.drop_entry_count.get() as usize;
@@ -285,11 +233,8 @@ impl<A: Allocator + Clone> LocalChunk<A> {
         allocator.deallocate(NonNull::new_unchecked(raw_ptr), layout);
     }
 
-    /// Reads the intrusive next-link without modifying it. The chunk
-    /// participates in two singly-linked lists at different points in
-    /// its lifecycle — the arena's retired list and the provider's
-    /// cache freelist — and this field encodes both. Returns a thin
-    /// `*mut u8` header pointer (`null` for end-of-list).
+    /// Reads the intrusive next-link as a thin header pointer. The field is
+    /// shared by the retired list and provider cache freelist.
     ///
     /// # Safety
     ///
@@ -331,6 +276,26 @@ impl<A: Allocator + Clone> LocalChunk<A> {
         r.drop_entry_count.set(0);
     }
 
+    /// Returns the number of drop entries currently stored at the tail of the
+    /// chunk.
+    #[inline]
+    pub(crate) fn drop_entry_count(&self) -> usize {
+        self.drop_entry_count.get() as usize
+    }
+
+    /// Sets the number of drop entries currently stored at the tail of the
+    /// chunk.
+    #[inline]
+    pub(crate) fn set_drop_entry_count(&self, count: usize) {
+        #[allow(
+            clippy::cast_possible_truncation,
+            reason = "a 64KiB chunk holds at most 4096 drop entries (« u16::MAX); round-trip asserted below"
+        )]
+        let narrowed = count as u16;
+        debug_assert_eq!(usize::from(narrowed), count, "drop-entry count exceeds u16 range");
+        self.drop_entry_count.set(narrowed);
+    }
+
     /// Overwrites the refcount. Test-only seam so unit tests can drive
     /// refcount-dependent paths without poking the field directly.
     #[cfg(test)]
@@ -360,12 +325,8 @@ impl<A: Allocator + Clone> Chunk for LocalChunk<A> {
     #[inline]
     #[cfg_attr(coverage_nightly, coverage(off))]
     fn inc_ref(&self) {
-        // Local chunks host arena-lifetime allocations, which are single-owner:
-        // the arena holds the sole +1 and plain arena allocations hand back
-        // borrows without cloning the refcount. Only smart pointers (Arc/Box)
-        // clone a chunk reference, and those live exclusively in `SharedChunk`.
-        // So this is never reached in production; the `Chunk` trait only
-        // requires it to keep the local/shared chunk machinery uniform.
+        // Local chunks are single-owner; smart pointers use `SharedChunk`.
+        // This exists only to satisfy the shared `Chunk` trait.
         unreachable!("LocalChunk refcount is never incremented; smart pointers use SharedChunk")
     }
 
@@ -375,22 +336,6 @@ impl<A: Allocator + Clone> Chunk for LocalChunk<A> {
         self.ref_count.set(new);
         new == 0
     }
-
-    #[inline]
-    fn drop_entry_count(&self) -> usize {
-        self.drop_entry_count.get() as usize
-    }
-
-    #[inline]
-    fn set_drop_entry_count(&self, count: usize) {
-        #[allow(
-            clippy::cast_possible_truncation,
-            reason = "a 64KiB chunk holds at most 4096 drop entries (« u16::MAX); round-trip asserted below"
-        )]
-        let narrowed = count as u16;
-        debug_assert_eq!(usize::from(narrowed), count, "drop-entry count exceeds u16 range");
-        self.drop_entry_count.set(narrowed);
-    }
 }
 
 #[cfg(test)]
@@ -483,15 +428,9 @@ mod tests {
     }
 
     /// `header_size` is `offset_of!(<last field>) + size_of::<<last field>>()`.
-    /// For `LocalChunk<Global>`, the header layout is fixed:
-    /// 8 (provider) + 8 (capacity) + 8 (`next`) + 1 (`ref_count`) +
-    /// 1 pad + 2 (`drop_entry_count`) = 28 bytes. Under the `stats`
-    /// feature an additional `wasted_at_retire: Cell<u32>` field is
-    /// appended (after 0 pad bytes since the prior offset is already
-    /// 4-aligned at 28), for 32 bytes total. Reordering moved `next`
-    /// ahead of the small fields so the trailing
-    /// `ref_count` / `drop_entry_count` pair packs into 4 bytes
-    /// without end-of-struct padding.
+    /// For `LocalChunk<Global>`, the fixed header is 28 bytes; with `stats`,
+    /// appended `wasted_at_retire: Cell<u32>` makes it 32 bytes. `next`
+    /// precedes the small fields so they pack without trailing padding.
     #[test]
     fn header_size_for_global_matches_layout() {
         #[cfg(not(feature = "stats"))]
@@ -500,13 +439,8 @@ mod tests {
         assert_eq!(LocalChunk::<Global>::header_size(), 32);
     }
 
-    /// `Chunk::inc_ref` on a local chunk is unreachable in production — local
-    /// chunks have at most one owner (the arena). The trait impl exists only
-    /// to keep the `Chunk` interface uniform between local and shared chunks;
-    /// invoking it must abort/panic so that any future caller that wrongly
-    /// routes a local refcount bump through this path fails loudly. A test
-    /// invoking the trait method and expecting a panic kills a mutant that
-    /// replaces the body with `()`.
+    /// `Chunk::inc_ref` on a local chunk must panic; local chunks have at most
+    /// one owner and refcount bumps belong to `SharedChunk`.
     #[test]
     #[should_panic(expected = "LocalChunk refcount is never incremented")]
     fn local_chunk_inc_ref_is_unreachable() {
diff --git a/crates/multitude/src/internal/owner_thread_cell.rs b/crates/multitude/src/internal/owner_thread_cell.rs
index bbc844679..c9bf46d76 100644
--- a/crates/multitude/src/internal/owner_thread_cell.rs
+++ b/crates/multitude/src/internal/owner_thread_cell.rs
@@ -1,16 +1,11 @@
 // Copyright (c) Microsoft Corporation.
 // Licensed under the MIT License.
 
-//! Owner-thread-confined cell: shared-Sync wrapper around an `UnsafeCell`.
+//! Owner-thread-confined `UnsafeCell`.
 //!
-//! The cell is `Sync` so it can live inside a struct that is itself shared
-//! across threads, but every access goes through `unsafe fn with`. The
-//! `unsafe` caller asserts that the call happens on the cell's logical
-//! "owner thread"; concurrent access is undefined behavior.
-//!
-//! Used by [`ChunkProvider`](super::ChunkProvider) to hold the local-chunk
-//! cache head and local high-water mark — both touched exclusively by the
-//! arena's owning thread, even though the provider itself is `Sync`.
+//! The cell is `Sync`, but every access goes through `unsafe fn with`; callers
+//! assert owner-thread, exclusive access. Used by
+//! [`ChunkProvider`](super::chunk_provider::ChunkProvider)'s local cache state.
 
 use core::cell::UnsafeCell;
 
diff --git a/crates/multitude/src/internal/shared_chunk.rs b/crates/multitude/src/internal/shared_chunk.rs
index a604a5b1c..14cae630c 100644
--- a/crates/multitude/src/internal/shared_chunk.rs
+++ b/crates/multitude/src/internal/shared_chunk.rs
@@ -3,9 +3,8 @@
 
 //! Multi-threaded reference-counted arena chunk.
 
-// See note in `local_chunk.rs`: methods touching raw memory are `unsafe fn`
-// with module-level safety contracts; we don't repeat the inner unsafe
-// wrappers that edition 2024 requires by default.
+// Raw-memory methods are `unsafe fn` with item-level safety contracts; inner
+// unsafe blocks would not add a boundary here.
 #![allow(unsafe_op_in_unsafe_fn, reason = "see module doc: inner unsafe blocks in unsafe fn add noise here")]
 #![allow(clippy::unnecessary_safety_comment, reason = "safety rationale documented at function level")]
 
@@ -15,14 +14,13 @@ use core::mem;
 use core::ptr::{self, NonNull};
 #[cfg(feature = "stats")]
 use core::sync::atomic::AtomicU32;
-use core::sync::atomic::{AtomicPtr, AtomicU16, AtomicUsize, Ordering, fence};
+use core::sync::atomic::{AtomicPtr, AtomicUsize, Ordering, fence};
 
 use allocator_api2::alloc::{AllocError, Allocator};
 
 use super::chunk::Chunk;
 use super::chunk_provider::ChunkProvider;
 use super::constants::{CHUNK_ALIGN, refcount_overflow_abort};
-use super::drop_entry::replay_drops;
 
 /// A bump-allocation chunk whose allocations can outlive the arena.
 ///
@@ -36,66 +34,44 @@ pub(crate) struct SharedChunk<A: Allocator + Clone> {
     provider: Weak<ChunkProvider<A>>,
     capacity: usize,
     ref_count: AtomicUsize,
-    /// Intrusive cache-freelist link, used while the chunk sits on
-    /// the provider's shared cache (refcount = 0). CAS-pushed and
-    /// CAS-popped from any thread, so the storage is atomic. `null`
-    /// when not on the list. Placed after `ref_count` (both 8-aligned)
-    /// so the trailing `drop_entry_count` (`u16`) packs against
-    /// `data` without end-of-struct padding.
+    /// Intrusive shared-cache freelist link. Atomic because releases can push
+    /// from any thread; null when not cached.
     ///
-    /// Unlike `LocalChunk::next`, this slot is *only* used for the
-    /// cache freelist: shared chunks don't have a retired-list phase
-    /// since handouts outlive the arena and chunks transition
-    /// directly from refcount = 1 → 0 → cached (or destroyed).
+    /// Shared chunks use this only for the cache freelist, not a retired list.
     next: AtomicPtr<u8>,
-    drop_entry_count: AtomicU16,
-    /// Free bytes between the bump cursor and the drop-entry top at the
-    /// time this chunk was retired from a `ChunkMutator`. Set in the
-    /// mutator's `Drop` and read by [`ChunkProvider::release_shared`]
-    /// to decrement the wasted-tail counter. Stays at 0 for chunks that
-    /// never went through a mutator (e.g. preallocated cache fills).
+    /// Wasted tail recorded when a `ChunkMutator` retires this chunk; released
+    /// by [`ChunkProvider::release_shared`].
     ///
-    /// Read in `release_shared` after the chunk's atomic refcount has
-    /// dropped to zero (with an acquire fence); the mutator's `Drop`
-    /// performs the `set` before its own `dec_ref`, so the store is
-    /// visible.
+    /// Release/acquire ordering makes the recorded value visible after
+    /// refcount reaches zero.
     #[cfg(feature = "stats")]
     wasted_at_retire: AtomicU32,
     /// Bump-payload tail. See `LocalChunk` for the
-    /// [`UnsafeCell<u8>]` provenance rationale. The payload start is
-    /// **not** required to be `DropEntry`-aligned:
-    /// [`replay_drops`](super::drop_entry::replay_drops) aligns drop-
-    /// entry positions via the absolute payload-end address.
+    /// [`UnsafeCell<u8>]` provenance rationale. Shared chunks register no
+    /// drop entries (values are owned by `Box`/`Arc` and dropped eagerly),
+    /// so the whole payload is available to the bump cursor.
     data: [UnsafeCell<u8>],
 }
 
 impl<A: Allocator + Clone> SharedChunk<A> {
-    /// Borrow the non-owning back-pointer to the chunk's provider. The
-    /// provider may have been dropped (a shared chunk can outlive its
-    /// arena), so callers must `upgrade()` to use it.
+    /// Borrow the weak provider back-pointer; callers must `upgrade()`.
     #[inline]
     pub(crate) fn provider(&self) -> &Weak<ChunkProvider<A>> {
         &self.provider
     }
 
-    /// Reads the free byte count stashed by the owning `ChunkMutator`'s
-    /// `Drop` (the gap between bump cursor and drop-entry top at retire).
-    /// `0` for chunks that never went through a mutator.
+    /// Reads the wasted-tail count stashed at retire time.
     #[cfg(feature = "stats")]
     #[inline]
     pub(crate) fn wasted_at_retire(&self) -> u32 {
-        // Acquire pairs with the `Release` store in `set_wasted_at_retire`;
-        // shared chunks may be inspected on a different thread than the
-        // one that performed the retire (the last `Arc::drop`).
+        // Acquire pairs with `set_wasted_at_retire`'s Release store; release
+        // may run on a different thread than retire.
         self.wasted_at_retire.load(Ordering::Acquire)
     }
 
-    /// Stashes the chunk's wasted-tail bytes at retire time, to be
-    /// subtracted from the provider's wasted-tail counter when the chunk
-    /// is eventually released to the cache or destroyed.
+    /// Stashes wasted-tail bytes for release-time stats subtraction.
     ///
-    /// `Release` so cross-thread `release_shared` callers observe the
-    /// stored value after their acquire fence on refcount = 0.
+    /// `Release` pairs with release-time acquire after refcount reaches zero.
     #[cfg(feature = "stats")]
     #[inline]
     pub(crate) fn set_wasted_at_retire(&self, n: u32) {
@@ -111,15 +87,15 @@ impl<A: Allocator + Clone> SharedChunk<A> {
     #[cfg_attr(test, mutants::skip)]
     pub(crate) const fn header_size() -> usize {
         // Under `stats`, `wasted_at_retire` is the last fixed-size field;
-        // otherwise it's `drop_entry_count`. The `[UnsafeCell<u8>]` tail
-        // has align 1 and sits flush against whichever it is.
+        // otherwise it's `next`. The `[UnsafeCell<u8>]` tail has align 1 and
+        // sits flush against whichever it is.
         #[cfg(feature = "stats")]
         {
             mem::offset_of!(Self, wasted_at_retire) + mem::size_of::<AtomicU32>()
         }
         #[cfg(not(feature = "stats"))]
         {
-            mem::offset_of!(Self, drop_entry_count) + mem::size_of::<AtomicU16>()
+            mem::offset_of!(Self, next) + mem::size_of::<AtomicPtr<u8>>()
         }
     }
 
@@ -130,14 +106,8 @@ impl<A: Allocator + Clone> SharedChunk<A> {
         if base >= CHUNK_ALIGN { base } else { CHUNK_ALIGN }
     }
 
-    /// The chunk type's own alignment (`align_of::<Self>()`, ignoring the
-    /// align-1 `[UnsafeCell<u8>]` tail): the max of `align_of::<A>()` and
-    /// `align_of::<usize>()` (every other header field — the atomics and
-    /// the `Weak` pointer — has alignment `<= align_of::<usize>()`).
-    ///
-    /// Used to round the allocation *size* (vs. [`Self::struct_align`],
-    /// the larger *base*-address alignment). Pinned against the real
-    /// `align_of_val` by `value_align_matches_real_alignment`.
+    /// The chunk type's own alignment, used to round allocation size. This is
+    /// separate from [`Self::struct_align`], the base-address alignment.
     #[inline]
     #[cfg_attr(test, mutants::skip)] // pure layout constant pinned by a dedicated test
     pub(crate) const fn value_align() -> usize {
@@ -146,13 +116,9 @@ impl<A: Allocator + Clone> SharedChunk<A> {
         if a >= b { a } else { b }
     }
 
-    /// Recovers the chunk header (as a thin `*mut u8` carrying the
-    /// chunk allocation's provenance) from a pointer into the chunk's
-    /// payload by walking backwards through the chunk's `CHUNK_ALIGN`
-    /// tile.
+    /// Recovers a thin chunk-header pointer from an in-payload pointer.
     ///
-    /// Uses [`NonNull::byte_sub`] (provenance-preserving) rather than
-    /// reconstituting the header pointer from an integer.
+    /// Uses [`NonNull::byte_sub`] to preserve provenance.
     #[inline]
     #[cfg_attr(test, mutants::skip)] // mask mutations break refcount → OOM in mutant harness
     pub(crate) fn header_from_value_ptr(value: NonNull<u8>) -> NonNull<u8> {
@@ -200,7 +166,6 @@ impl<A: Allocator + Clone> SharedChunk<A> {
             ptr::write(&raw mut (*fat).provider, provider);
             ptr::write(&raw mut (*fat).capacity, payload_size);
             ptr::write(&raw mut (*fat).ref_count, AtomicUsize::new(1));
-            ptr::write(&raw mut (*fat).drop_entry_count, AtomicU16::new(0));
             ptr::write(&raw mut (*fat).next, AtomicPtr::new(ptr::null_mut()));
             #[cfg(feature = "stats")]
             ptr::write(&raw mut (*fat).wasted_at_retire, AtomicU32::new(0));
@@ -223,8 +188,8 @@ impl<A: Allocator + Clone> SharedChunk<A> {
         let header = Self::header_size();
         let header_ref = &*chunk.as_ptr();
         let capacity = header_ref.capacity;
-        let drop_count = header_ref.drop_entry_count.load(Ordering::Acquire) as usize;
-        replay_drops(Self::payload_ptr(chunk).as_ptr(), capacity, drop_count);
+        // Shared chunks register no drop entries; per-`Arc` values drop on
+        // their last strong reference.
         let allocator: A = ptr::read(&raw const (*chunk.as_ptr()).allocator);
         ptr::drop_in_place(&raw mut (*chunk.as_ptr()).provider);
         let layout = crate::internal::chunk_alloc::chunk_layout(header, capacity, Self::value_align(), Self::struct_align())
@@ -234,10 +199,8 @@ impl<A: Allocator + Clone> SharedChunk<A> {
         drop(allocator);
     }
 
-    /// Pointer to the chunk's intrusive cache-freelist link
-    /// (`AtomicPtr<u8>` storing a thin header pointer; cache stores
-    /// thin pointers since `*mut Self` is fat for the DST). The field
-    /// lives in the chunk header.
+    /// Pointer to the intrusive cache-freelist link storing a thin header
+    /// pointer.
     ///
     /// # Safety
     ///
@@ -248,8 +211,8 @@ impl<A: Allocator + Clone> SharedChunk<A> {
         &raw const (*chunk.as_ptr()).next
     }
 
-    /// Re-initializes a chunk popped from the cache: refcount → 1,
-    /// drop-entry count → 0. The caller becomes the +1 holder.
+    /// Re-initializes a chunk popped from the cache: refcount → 1. The caller
+    /// becomes the +1 holder.
     ///
     /// # Safety
     ///
@@ -257,26 +220,10 @@ impl<A: Allocator + Clone> SharedChunk<A> {
     /// chunk; the cache link is invalidated by this call.
     #[inline]
     pub(crate) unsafe fn reinit_for_acquire(chunk: NonNull<Self>) {
-        // SAFETY: caller owns the unique reference; atomics are safe to
-        // store unconditionally.
+        // SAFETY: caller owns the unique reference; the refcount store is
+        // safe to issue unconditionally.
         let r = &*chunk.as_ptr();
         r.ref_count.store(1, Ordering::Relaxed);
-        r.drop_entry_count.store(0, Ordering::Relaxed);
-    }
-
-    /// Loads the drop-entry count with `Acquire` ordering.
-    ///
-    /// The [`Chunk::drop_entry_count`](super::chunk::Chunk::drop_entry_count)
-    /// accessor uses `Relaxed`, which suffices for the owner thread. This
-    /// `Acquire` variant is for cross-thread readers (the deferred-init
-    /// commit in [`Arc`](crate::Arc)): it pairs with the owner thread's
-    /// `Release` publish in
-    /// [`set_drop_entry_count`](super::chunk::Chunk::set_drop_entry_count)
-    /// (via `ChunkMutator::publish_drop_count`) so the placeholder slot's
-    /// bytes are visible before the count is read.
-    #[inline]
-    pub(crate) fn drop_entry_count_acquire(&self) -> usize {
-        self.drop_entry_count.load(Ordering::Acquire) as usize
     }
 
     /// Overwrites the refcount. Test-only seam so unit tests can drive
@@ -287,14 +234,9 @@ impl<A: Allocator + Clone> SharedChunk<A> {
         self.ref_count.store(count, Ordering::Relaxed);
     }
 
-    /// Decrements `chunk`'s refcount on behalf of the caller, and if
-    /// that drops the count to zero, routes the chunk back through
+    /// Releases one strong ref and routes zero-ref chunks through
     /// [`teardown_and_release`](super::chunk_ops::ChunkOps::teardown_and_release).
     ///
-    /// Used by smart-pointer drop paths ([`Box`](crate::Box),
-    /// [`Arc`](crate::Arc)) and by [`ChunkMutator`](super::ChunkMutator)
-    /// itself to share the "release one ref I am holding" sequence.
-    ///
     /// # Safety
     ///
     /// Caller must hold exactly one strong reference to `chunk` that
@@ -313,14 +255,10 @@ impl<A: Allocator + Clone> SharedChunk<A> {
         }
     }
 
-    /// Atomically reserves `n` additional strong references on this
-    /// chunk in a single `fetch_add`, in addition to whatever the
-    /// caller already holds. Aborts the process on overflow.
+    /// Atomically reserves `n` additional strong references. Aborts on
+    /// overflow.
     ///
-    /// Used by the arena's per-chunk surplus pre-credit: at chunk
-    /// install time the arena reserves a large surplus of refs so
-    /// per-allocation handouts can be tracked in a non-atomic local
-    /// counter; the unused portion is returned to the chunk via
+    /// Used by arena surplus pre-credit; unused refs are returned through
     /// [`Self::refund_refs`] when the chunk is retired.
     #[inline]
     pub(crate) fn pre_credit_refs(&self, n: usize) {
@@ -339,12 +277,8 @@ impl<A: Allocator + Clone> SharedChunk<A> {
         }
     }
 
-    /// Atomically returns `n` previously pre-credited but unused
-    /// refs to the chunk's counter via `fetch_sub` with `Release`
-    /// ordering. `Release` matches the existing per-ref `dec_ref`
-    /// ordering so any writes the arena thread performed into the
-    /// chunk are visible to other-thread holders that may observe
-    /// the lower count.
+    /// Atomically returns `n` pre-credited but unused refs with `Release`
+    /// ordering, matching [`Chunk::dec_ref`](super::Chunk::dec_ref).
     ///
     /// # Safety
     ///
@@ -395,22 +329,6 @@ impl<A: Allocator + Clone> Chunk for SharedChunk<A> {
             false
         }
     }
-
-    #[inline]
-    fn drop_entry_count(&self) -> usize {
-        self.drop_entry_count.load(Ordering::Relaxed) as usize
-    }
-
-    #[inline]
-    fn set_drop_entry_count(&self, count: usize) {
-        #[allow(
-            clippy::cast_possible_truncation,
-            reason = "a 64KiB chunk holds at most 4096 drop entries (« u16::MAX); round-trip asserted below"
-        )]
-        let narrowed = count as u16;
-        debug_assert_eq!(usize::from(narrowed), count, "drop-entry count exceeds u16 range");
-        self.drop_entry_count.store(narrowed, Ordering::Release);
-    }
 }
 
 /// Largest payload byte count a shared chunk can offer to a bump allocator
@@ -430,18 +348,16 @@ mod tests {
     /// `header_size` is `offset_of!(<last field>) + size_of::<<last field>>()`.
     /// For `SharedChunk<Global>`, the header layout is fixed:
     /// 0 (allocator ZST) + 8 (provider `Weak`) + 8 (capacity) +
-    /// 8 (`ref_count`) + 8 (`next`) + 2 (`drop_entry_count`) = 34 bytes.
-    /// Under the `stats` feature an additional `wasted_at_retire:
-    /// AtomicU32` is appended after 2 pad bytes (offset 36) for 40 bytes
-    /// total. `next` is placed between `ref_count` and
-    /// `drop_entry_count` so the trailing `u16` packs against `data`
-    /// without padding when stats are off.
+    /// 8 (`ref_count`) + 8 (`next`) = 32 bytes. Under the `stats` feature an
+    /// additional `wasted_at_retire: AtomicU32` is appended (offset 32) for
+    /// 36 bytes total. Shared chunks carry no `drop_entry_count` — they never
+    /// register drop entries.
     #[test]
     fn header_size_for_global_matches_layout() {
         #[cfg(not(feature = "stats"))]
-        assert_eq!(SharedChunk::<Global>::header_size(), 34);
+        assert_eq!(SharedChunk::<Global>::header_size(), 32);
         #[cfg(feature = "stats")]
-        assert_eq!(SharedChunk::<Global>::header_size(), 40);
+        assert_eq!(SharedChunk::<Global>::header_size(), 36);
     }
 
     /// `struct_align` returns the max of `align_of::<A>()`,
@@ -460,11 +376,8 @@ mod tests {
         assert_eq!(got, super::super::constants::CHUNK_ALIGN);
     }
 
-    /// `chunk_layout` must round the allocation *size* up to
-    /// `value_align` (8) and set the *base* alignment to `struct_align`
-    /// (`CHUNK_ALIGN`), but must NOT round the size up to `CHUNK_ALIGN`.
-    /// Each cacheable size class must therefore produce an allocation
-    /// whose size equals the class bytes, not 64 KiB.
+    /// `chunk_layout` rounds size to `value_align` and base alignment to
+    /// `struct_align`, without inflating every class to `CHUNK_ALIGN`.
     #[test]
     fn chunk_layout_does_not_inflate_size_to_base_align() {
         use super::super::chunk_alloc::chunk_layout;
diff --git a/crates/multitude/src/internal/thin_dst.rs b/crates/multitude/src/internal/thin_dst.rs
index 59a0ea5a3..75bf23dab 100644
--- a/crates/multitude/src/internal/thin_dst.rs
+++ b/crates/multitude/src/internal/thin_dst.rs
@@ -10,14 +10,12 @@
 //! [optional pad to align(T)][T::Metadata (unaligned)][T payload]
 //! ```
 //!
-//! The thin smart pointer stores a `NonNull<u8>` to the payload start.
-//! Metadata (slice length, trait-object vtable, or `()` for sized T)
-//! sits in `size_of::<T::Metadata>()` bytes immediately preceding the
-//! payload and is read with [`ptr::read_unaligned`]. For
-//! `T: Sized`, the metadata read is a zero-byte no-op.
+//! Thin smart pointers store `NonNull<u8>` to the payload. Metadata sits
+//! immediately before it and is read with [`ptr::read_unaligned`].
 
 use core::mem;
 use core::ptr::{self, NonNull};
+use core::sync::atomic::AtomicU32;
 
 use ptr_meta::Pointee;
 
@@ -30,6 +28,56 @@ pub(crate) const fn meta_bytes<T: ?Sized + Pointee>() -> usize {
     mem::size_of::<<T as Pointee>::Metadata>()
 }
 
+/// Byte size of the per-[`Arc`](crate::Arc) strong reference count
+/// (an [`AtomicU32`]) stored in the chunk prefix.
+pub(crate) const STRONG_BYTES: usize = mem::size_of::<AtomicU32>();
+
+/// Alignment of the per-`Arc` strong reference count.
+pub(crate) const STRONG_ALIGN: usize = mem::align_of::<AtomicU32>();
+
+/// Byte distance from an `Arc<T>` value pointer back to its strong
+/// reference count, given the value's alignment and metadata width.
+///
+/// Layout of every chunk-resident `Arc<T>` value:
+///
+/// ```text
+/// [strong (AtomicU32, at reservation base)][pad][T::Metadata (unaligned)][T payload]
+/// ```
+///
+/// The strong count starts the reservation; metadata sits immediately before
+/// the payload. The returned prefix keeps the payload `value_align`-aligned.
+#[inline]
+pub(crate) const fn strong_prefix_bytes_for(value_align: usize, meta: usize) -> usize {
+    (STRONG_BYTES + meta).next_multiple_of(value_align)
+}
+
+/// Reservation alignment for an `Arc<T>` value: at least [`STRONG_ALIGN`] and
+/// at least `value_align`.
+#[inline]
+pub(crate) const fn arc_block_align(value_align: usize) -> usize {
+    if value_align >= STRONG_ALIGN { value_align } else { STRONG_ALIGN }
+}
+
+/// Recovers the strong reference count of an `Arc<T>` from its value
+/// pointer.
+///
+/// # Safety
+///
+/// - `value_ptr` must reference the payload of an `Arc<T>` value whose
+///   chunk prefix was written by the strong-prefixed allocator path.
+/// - `value_align` must equal the value's alignment (`align_of_val`).
+/// - The hosting chunk must be kept alive by the caller for the
+///   duration of the returned reference's use.
+#[inline]
+pub(crate) unsafe fn strong_ref<'a, T: ?Sized + Pointee>(value_ptr: NonNull<u8>, value_align: usize) -> &'a AtomicU32 {
+    let prefix = strong_prefix_bytes_for(value_align, meta_bytes::<T>());
+    // SAFETY: per caller. `prefix` bytes of strong + metadata + padding
+    // were reserved before the payload; the strong slot lives at the
+    // reservation base, which is `STRONG_ALIGN`-aligned, so the
+    // `AtomicU32` reference is well-aligned and within chunk provenance.
+    unsafe { value_ptr.byte_sub(prefix).cast::<AtomicU32>().as_ref() }
+}
+
 /// Reads `T`'s metadata word from the chunk prefix immediately preceding
 /// the payload at `value_ptr`.
 ///
diff --git a/crates/multitude/src/internal/uninit.rs b/crates/multitude/src/internal/uninit.rs
index 2a1fb3c51..8670ba6df 100644
--- a/crates/multitude/src/internal/uninit.rs
+++ b/crates/multitude/src/internal/uninit.rs
@@ -4,31 +4,23 @@
 //! Safe "ticket" wrappers that turn raw [`InChunk`] storage into initialized
 //! arena allocations.
 //!
-//! Each ticket type is constructed only by [`ChunkMutator`](super::ChunkMutator)
-//! when it reserves storage. Consumers obtain a ticket and call the matching
-//! `init*` method, which writes the value (and any drop entry) and returns a
-//! safe reference. This isolates `unsafe` to a small number of methods in
-//! this file; the higher layers of the crate (arena, smart pointers, vec,
-//! strings) use only the safe ticket API.
+//! [`ChunkMutator`](super::chunk_mutator::ChunkMutator) creates tickets for reserved storage.
+//! `init*` methods write values, commit drop entries when needed, and return
+//! safe references.
 
 use core::marker::PhantomData;
-use core::mem::{self, MaybeUninit};
 use core::ptr::{self, NonNull};
-use core::str;
+use core::{mem, str};
 
 use super::drop_entry::{DropEntry, DropFn, drop_shim};
 use super::in_chunk::InChunk;
 
 /// Storage reserved for a value (or slice) that has no drop requirements.
 ///
-/// Created by [`ChunkMutator::try_alloc_uninit`](super::ChunkMutator::try_alloc_uninit)
-/// or [`try_alloc_uninit_slice`](super::ChunkMutator::try_alloc_uninit_slice).
-/// Consume with [`init`](Self::init) (single value) or
-/// [`init_copy_from_slice`](Self::init_copy_from_slice) (slice).
+/// Created by [`ChunkMutator::try_alloc_uninit`](super::chunk_mutator::ChunkMutator::try_alloc_uninit)
+/// or [`try_alloc_uninit_slice`](super::chunk_mutator::ChunkMutator::try_alloc_uninit_slice).
 ///
-/// If the ticket is dropped without being initialized, the reserved bump
-/// space is leaked until the owning chunk is torn down — but no unsafe
-/// behavior occurs.
+/// Dropping without initialization leaks the reservation until chunk teardown.
 pub(crate) struct Uninit<'a, T: ?Sized> {
     ptr: InChunk<T>,
     _phantom: PhantomData<&'a mut T>,
@@ -49,11 +41,7 @@ impl<T: ?Sized> Uninit<'_, T> {
     ///
     /// # Safety
     ///
-    /// Caller asserts that the reserved storage backing this ticket
-    /// remains valid for the new lifetime `'b`. The intended use is
-    /// inside [`Arena`](crate::Arena), where the chunk that hosts the
-    /// slot is retained until the arena is reset or dropped — i.e.
-    /// at least for the `&Arena` borrow lifetime.
+    /// Caller guarantees the reserved storage remains valid for `'b`.
     #[inline]
     pub(crate) unsafe fn rebind<'b>(self) -> Uninit<'b, T> {
         Uninit {
@@ -90,10 +78,7 @@ impl<'a, T> Uninit<'a, T> {
         unsafe { &mut *ptr.as_ptr() }
     }
 
-    /// Same as [`init`](Self::init) but returns a raw pointer with no
-    /// lifetime. Used by the arena layer when the resulting reference's
-    /// lifetime must be tied to `&Arena` rather than to the consumed
-    /// ticket's borrow scope.
+    /// Same as [`init`](Self::init) but returns a raw pointer with no lifetime.
     #[inline]
     pub(crate) fn init_raw(self, value: T) -> NonNull<T> {
         let raw = self.ptr.as_ptr();
@@ -147,9 +132,7 @@ impl<'a, T> Uninit<'a, [T]> {
         unsafe { slice_ptr.as_mut() }
     }
 
-    /// Like [`Self::init_copy_from_slice`] but returns the raw
-    /// `NonNull<[T]>` with chunk-wide provenance. See
-    /// [`Uninit::init_with_ptr`] for the rationale.
+    /// Like [`Self::init_copy_from_slice`] but returns raw `NonNull<[T]>`.
     #[inline]
     pub(crate) fn init_copy_from_slice_ptr(self, src: &[T]) -> NonNull<[T]>
     where
@@ -201,13 +184,8 @@ impl<'a, T> Uninit<'a, [T]> {
         unsafe { slice_ptr.as_mut() }
     }
 
-    /// Like [`Self::init_with`] but returns the raw `NonNull<[T]>` with
-    /// chunk-wide provenance instead of an `&mut [T]` retag. Callers
-    /// that hand the slice to a smart-pointer constructor (which then
-    /// recovers the chunk header via `byte_sub`) need the chunk-wide
-    /// provenance; rounding through `&mut [T]` would narrow the
-    /// borrow-stack tag to the slice payload and trip strict provenance
-    /// / Stacked Borrows when the header bytes are later read.
+    /// Like [`Self::init_with`] but returns raw `NonNull<[T]>` to preserve
+    /// chunk-wide provenance for smart-pointer header recovery.
     #[inline]
     #[cfg_attr(test, mutants::skip)] // `+= → *=` on counter ⇒ infinite loop
     pub(crate) fn init_with_ptr<F>(self, mut f: F) -> NonNull<[T]>
@@ -264,14 +242,10 @@ impl<'a, T> Uninit<'a, [T]> {
         })
     }
 
-    /// Consume this slice ticket and return the raw start pointer plus
-    /// capacity. The caller takes over responsibility for tracking
-    /// which slots are initialized and for dropping the initialized
-    /// prefix before the chunk is torn down.
+    /// Consumes this slice ticket and returns the raw start pointer plus
+    /// capacity; caller tracks initialization and drops.
     ///
-    /// Intended for growable container backings (`Vec`, `String`)
-    /// where the reservation is filled in incrementally rather than in
-    /// a single `init_*` call.
+    /// Used by growable containers filled incrementally.
     #[inline]
     pub(crate) fn into_raw_buffer(self) -> (NonNull<T>, usize) {
         let slice_ptr = self.ptr.as_non_null();
@@ -280,9 +254,7 @@ impl<'a, T> Uninit<'a, [T]> {
     }
 }
 
-/// Drop-guard used by `init_with` / `init_clone_from_slice` / `init_from_iter`
-/// implementations: if the producing closure panics part-way through, drop the
-/// elements written so far.
+/// Drops the initialized prefix if slice initialization panics.
 struct InitGuard<T> {
     dst: *mut T,
     initialized: usize,
@@ -303,13 +275,11 @@ impl<T> Drop for InitGuard<T> {
 
 /// Storage reserved for a value, paired with a pre-reserved drop entry slot.
 ///
-/// Created by [`ChunkMutator::try_alloc_uninit_with_drop`](super::ChunkMutator::try_alloc_uninit_with_drop)
-/// or [`try_alloc_uninit_slice_with_drop`](super::ChunkMutator::try_alloc_uninit_slice_with_drop).
+/// Created by [`ChunkMutator::try_alloc_uninit_with_drop`](super::chunk_mutator::ChunkMutator::try_alloc_uninit_with_drop)
+/// or [`try_alloc_uninit_slice_with_drop`](super::chunk_mutator::ChunkMutator::try_alloc_uninit_slice_with_drop).
 ///
-/// On `init*`, the value is written into its storage and the drop entry is
-/// committed (its `drop_fn` is set to a shim for `T`). If the ticket is
-/// dropped without being initialized, the placeholder entry remains with no
-/// drop shim — the replay loop will skip it.
+/// `init*` writes the value and commits the drop entry. Dropping without
+/// initialization leaves a skipped placeholder entry.
 pub(crate) struct UninitDrop<'a, T: ?Sized> {
     value: InChunk<T>,
     drop_slot: InChunk<DropEntry>,
@@ -339,10 +309,7 @@ impl<'a, T> UninitDrop<'a, T> {
         // storage exclusively.
         unsafe { &mut *ptr.as_ptr() }
     }
-    /// Same as [`init`](Self::init) but returns a raw pointer with no
-    /// lifetime. Used by the arena layer when the resulting reference's
-    /// lifetime must be tied to `&Arena` rather than to the consumed
-    /// ticket's borrow scope.
+    /// Same as [`init`](Self::init) but returns a raw pointer with no lifetime.
     #[inline]
     pub(crate) fn init_raw(self, value: T) -> NonNull<T> {
         let raw = self.value.as_ptr();
@@ -362,40 +329,14 @@ impl<'a, T> UninitDrop<'a, T> {
             NonNull::new_unchecked(raw)
         }
     }
-
-    /// Writes a (possibly uninitialized) `MaybeUninit<T>` into the value
-    /// slot and returns a pointer to it, leaving the pre-reserved drop entry
-    /// as an **uncommitted** placeholder.
-    ///
-    /// Used by the uninit-`Arc` allocation path: the entry is committed
-    /// later by [`Arc::<MaybeUninit<T>>::assume_init`](crate::Arc) once the
-    /// value is initialized. If the resulting handle is dropped without
-    /// `assume_init`, the placeholder stays `None` and the replay loop skips
-    /// it, so no destructor runs on uninitialized memory.
-    #[inline]
-    pub(crate) fn into_uninit_placeholder(self, value: MaybeUninit<T>) -> NonNull<MaybeUninit<T>> {
-        let raw = self.value.as_ptr().cast::<MaybeUninit<T>>();
-        // SAFETY: `raw` is non-null, aligned for `T` (identical to
-        // `MaybeUninit<T>`), and exclusively owned by this consumed ticket;
-        // the slot is uninitialized so `write` drops nothing. The drop slot
-        // is intentionally left as the placeholder written at reservation.
-        unsafe {
-            ptr::write(raw, value);
-            NonNull::new_unchecked(raw)
-        }
-    }
 }
 
 impl<'a, T> UninitDrop<'a, [T]> {
-    /// Initializes the reserved slice by cloning each element of `src`,
-    /// commits the drop entry, and returns a mutable reference bound by
-    /// the arena's lifetime.
+    /// Clones `src` into the reservation, commits the drop entry, and returns
+    /// the initialized slice.
     ///
-    /// If any `T::clone` panics, all previously-cloned elements are
-    /// dropped before the panic propagates; the drop entry is *not*
-    /// committed (the chunk's drop-replay loop will skip the placeholder),
-    /// so partially-initialized memory cannot be re-dropped at arena
-    /// teardown.
+    /// On panic, initialized elements are dropped and the placeholder remains
+    /// uncommitted.
     #[inline]
     pub(crate) fn init_clone_from_slice(self, src: &[T]) -> &'a mut [T]
     where
@@ -409,11 +350,8 @@ impl<'a, T> UninitDrop<'a, [T]> {
         self.init_with(|i| src[i].clone())
     }
 
-    /// Initializes the reserved slice by calling `f(i)` for each index
-    /// `i` in `0..len`, then commits the drop entry on success. If `f`
-    /// panics, already-initialized elements are dropped and the drop
-    /// entry is *not* committed (the chunk's drop-replay loop skips the
-    /// placeholder).
+    /// Initializes with `f(i)` and commits the drop entry on success. On panic,
+    /// initialized elements are dropped and the placeholder remains uncommitted.
     #[inline]
     pub(crate) fn init_with<F>(self, f: F) -> &'a mut [T]
     where
@@ -425,9 +363,7 @@ impl<'a, T> UninitDrop<'a, [T]> {
         unsafe { slice_ptr.as_mut() }
     }
 
-    /// Like [`Self::init_with`] but returns the raw `NonNull<[T]>` with
-    /// chunk-wide provenance. See [`Uninit::init_with_ptr`] for the
-    /// rationale.
+    /// Like [`Self::init_with`] but returns raw `NonNull<[T]>`.
     #[inline]
     #[cfg_attr(test, mutants::skip)] // counter mutation += → *= ⇒ infinite loop
     pub(crate) fn init_with_ptr<F>(self, mut f: F) -> NonNull<[T]>
@@ -453,11 +389,8 @@ impl<'a, T> UninitDrop<'a, [T]> {
         slice_ptr
     }
 
-    /// Initializes the reserved slice by pulling `len` values from
-    /// `iter` and commits the drop entry on success. Panics if `iter`
-    /// yields fewer elements than the reservation; in that case,
-    /// already-initialized elements are dropped and the drop entry is
-    /// not committed.
+    /// Pulls `len` values from `iter` and commits on success. If `iter` is
+    /// short, initialized elements are dropped and the entry is not committed.
     #[inline]
     pub(crate) fn init_from_iter<I>(self, mut iter: I) -> &'a mut [T]
     where
@@ -468,26 +401,4 @@ impl<'a, T> UninitDrop<'a, [T]> {
                 .expect("iterator yielded fewer elements than ExactSizeIterator::len() reported")
         })
     }
-
-    /// Slice analogue of [`UninitDrop::into_uninit_placeholder`]: optionally
-    /// zero-fills the reserved elements and returns the buffer as
-    /// `MaybeUninit<T>`s, leaving the pre-reserved drop entry **uncommitted**.
-    ///
-    /// The uninit-slice-`Arc` path commits the entry later via
-    /// [`Arc::<[MaybeUninit<T>]>::assume_init`](crate::Arc).
-    #[inline]
-    pub(crate) fn into_uninit_slice_placeholder(self, zeroed: bool) -> NonNull<[MaybeUninit<T>]> {
-        let slice_ptr = self.value.as_non_null();
-        let len = slice_ptr.len();
-        let base = slice_ptr.cast::<MaybeUninit<T>>();
-        if zeroed {
-            // SAFETY: `base` addresses `len` exclusively-owned `MaybeUninit<T>`
-            // slots inside chunk storage reserved for this consumed ticket;
-            // zeroing their bytes leaves valid `MaybeUninit<T>` values.
-            unsafe {
-                ptr::write_bytes(base.as_ptr().cast::<u8>(), 0, len.saturating_mul(mem::size_of::<T>()));
-            }
-        }
-        NonNull::slice_from_raw_parts(base, len)
-    }
 }
diff --git a/crates/multitude/src/strings/arc_utf16_str.rs b/crates/multitude/src/strings/arc_utf16_str.rs
index 42241cc53..bc8e8d378 100644
--- a/crates/multitude/src/strings/arc_utf16_str.rs
+++ b/crates/multitude/src/strings/arc_utf16_str.rs
@@ -63,11 +63,16 @@ impl_utf16_str_common!(ArcUtf16Str);
 impl<A: Allocator + Clone> Clone for ArcUtf16Str<A> {
     #[inline]
     fn clone(&self) -> Self {
-        // SAFETY: `self` owns a live +1 on its chunk so the chunk is
-        // alive; `clone_from_value_ptr` mints a fresh +1 via an
-        // atomic bump and returns a `ChunkRef` that owns it.
-        let r: ChunkRef<A> = unsafe { ChunkRef::clone_from_value_ptr(self.ptr) };
-        let _ = r.forget();
+        // SAFETY: `self` keeps the payload (and its strong-count prefix)
+        // alive; the strong slot is aligned and within chunk provenance.
+        // The conceptual value type is `[u16]` (element align 2,
+        // `usize` metadata), matching the allocator's strong-prefix
+        // layout.
+        let strong = unsafe { crate::internal::thin_dst::strong_ref::<[u16]>(self.ptr.cast::<u8>(), core::mem::align_of::<u16>()) };
+        let prev = strong.fetch_add(1, core::sync::atomic::Ordering::Relaxed);
+        if prev > (u32::MAX >> 1) {
+            crate::internal::constants::refcount_overflow_abort();
+        }
         Self {
             ptr: self.ptr,
             _phantom: PhantomData,
@@ -75,6 +80,29 @@ impl<A: Allocator + Clone> Clone for ArcUtf16Str<A> {
     }
 }
 
+impl<A: Allocator + Clone> Drop for ArcUtf16Str<A> {
+    #[inline]
+    fn drop(&mut self) {
+        // SAFETY: the payload (and its strong-count prefix) is live while
+        // this handle exists; the strong slot is aligned and in chunk
+        // provenance (conceptual value type `[u16]`).
+        let strong = unsafe { crate::internal::thin_dst::strong_ref::<[u16]>(self.ptr.cast::<u8>(), core::mem::align_of::<u16>()) };
+        if strong.fetch_sub(1, core::sync::atomic::Ordering::Release) != 1 {
+            return;
+        }
+        core::sync::atomic::fence(core::sync::atomic::Ordering::Acquire);
+        // Last strong reference: release the chunk +1. The `[u16]`
+        // payload has no element destructor to run.
+        //
+        // SAFETY: `ptr` is hosted in a 64K-aligned `SharedChunk` holding
+        // exactly one outstanding +1 for this `Arc` family;
+        // `from_value_ptr` adopts and releases it.
+        unsafe {
+            let _ref: ChunkRef<A> = ChunkRef::from_value_ptr(self.ptr);
+        }
+    }
+}
+
 impl<A: Allocator + Clone> From<ArcUtf16Str<A>> for crate::Arc<[u16], A> {
     /// Convert an [`ArcUtf16Str<A>`] into an [`Arc<[u16], A>`](crate::Arc).
     ///
@@ -96,3 +124,77 @@ impl<A: Allocator + Clone> From<ArcUtf16Str<A>> for crate::Arc<[u16], A> {
         unsafe { Self::from_raw(me.ptr.cast::<u8>()) }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use core::sync::atomic::{AtomicU32, Ordering};
+
+    use super::*;
+    use crate::Arena;
+    use crate::internal::thin_dst::strong_ref;
+
+    // The per-string strong count lives in the chunk prefix, accessed as
+    // an `[u16]` strong reference (element align 2) — exactly as the
+    // `Clone`/`Drop` impls do.
+    fn strong_of<A: Allocator + Clone>(s: &ArcUtf16Str<A>) -> &AtomicU32 {
+        // SAFETY: `s` keeps the payload and its strong-count prefix live,
+        // so the strong slot is aligned and within chunk provenance.
+        unsafe { strong_ref::<[u16]>(s.ptr.cast::<u8>(), core::mem::align_of::<u16>()) }
+    }
+
+    // `Drop` must decrement the per-string strong count (and release the
+    // chunk on the last handle). Kills the `drop -> ()` mutant: cloning
+    // bumps the count, so dropping the clone must bring it back down.
+    #[test]
+    fn drop_decrements_strong_count() {
+        let arena = Arena::new();
+        let s = arena.alloc_utf16_str_arc_from_str("hi");
+        let strong = strong_of(&s);
+        let base = strong.load(Ordering::Relaxed);
+        let s2 = s.clone();
+        assert_eq!(strong.load(Ordering::Relaxed), base + 1, "clone must bump the strong count");
+        drop(s2);
+        assert_eq!(strong.load(Ordering::Relaxed), base, "drop must decrement the strong count");
+        // `s` (still live) holds the chunk; it drops normally at scope end.
+    }
+
+    // `Clone` checks `prev > (u32::MAX >> 1)` on the value returned by
+    // `fetch_add` (the count *before* the increment), so a clone
+    // observing `prev == u32::MAX >> 1` must NOT abort. Kills the
+    // `>` -> `==` and `>` -> `>=` mutants on that comparison.
+    #[test]
+    fn clone_at_max_refcount_threshold_does_not_abort() {
+        let arena = Arena::new();
+        let s = arena.alloc_utf16_str_arc_from_str("hi");
+        let strong = strong_of(&s);
+        strong.store(u32::MAX >> 1, Ordering::Relaxed);
+        let clone = s.clone();
+        // Reached here without panic. Restore the true live-handle count
+        // (`s` + `clone`) so teardown releases the chunk instead of
+        // leaking the strong count above 1 forever.
+        strong.store(2, Ordering::Relaxed);
+        drop(clone);
+    }
+
+    // A clone observing `prev > u32::MAX >> 1` MUST abort. Driving the
+    // strong count one past the threshold kills the `>` -> `==` mutant
+    // (it would not fire) and the `>>` -> `<<` mutant (which raises the
+    // threshold to `0xFFFF_FFFE`, so the guard would not fire here).
+    #[test]
+    #[should_panic(expected = "refcount overflow")]
+    fn clone_above_max_refcount_threshold_aborts() {
+        let arena = Arena::new();
+        let s = arena.alloc_utf16_str_arc_from_str("hi");
+        let strong = strong_of(&s);
+        strong.store((u32::MAX >> 1) + 1, Ordering::Relaxed);
+        // The clone panics in its overflow guard before returning, so no
+        // clone is produced. Catch it, restore the real live-handle count
+        // (just `s`) so teardown releases the chunk instead of leaking
+        // (keeps Miri happy), then resume so `should_panic` sees it.
+        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
+            let _c = s.clone();
+        }));
+        strong.store(1, Ordering::Relaxed);
+        std::panic::resume_unwind(result.expect_err("clone past the threshold must panic"));
+    }
+}
diff --git a/crates/multitude/src/strings/box_utf16_str.rs b/crates/multitude/src/strings/box_utf16_str.rs
index 9bf41f627..85a3069b9 100644
--- a/crates/multitude/src/strings/box_utf16_str.rs
+++ b/crates/multitude/src/strings/box_utf16_str.rs
@@ -79,6 +79,19 @@ impl<A: Allocator + Clone> BoxUtf16Str<A> {
 
 impl_utf16_str_common!(BoxUtf16Str);
 
+impl<A: Allocator + Clone> Drop for BoxUtf16Str<A> {
+    #[inline]
+    fn drop(&mut self) {
+        // SAFETY: `ptr` is hosted in a 64K-aligned `SharedChunk` on
+        // which this single-owner `Box` holds a +1 strong reference;
+        // `from_value_ptr` adopts it and releases it on drop. The
+        // `[u16]` payload has no element destructor to run.
+        unsafe {
+            let _ref: crate::internal::chunk_ref::ChunkRef<A> = crate::internal::chunk_ref::ChunkRef::from_value_ptr(self.ptr);
+        }
+    }
+}
+
 impl<A: Allocator + Clone> DerefMut for BoxUtf16Str<A> {
     #[inline]
     fn deref_mut(&mut self) -> &mut Utf16Str {
diff --git a/crates/multitude/src/strings/utf16_str_common.rs b/crates/multitude/src/strings/utf16_str_common.rs
index 45ae8e4bc..c676b27ae 100644
--- a/crates/multitude/src/strings/utf16_str_common.rs
+++ b/crates/multitude/src/strings/utf16_str_common.rs
@@ -6,12 +6,12 @@
 //! [`BoxUtf16Str`](super::BoxUtf16Str)).
 //!
 //! Both types share identical layout (`NonNull<u16>` + `PhantomData`),
-//! prefix-length reading, payload borrowing, `Drop` semantics, and
+//! prefix-length reading, payload borrowing, and
 //! formatting/comparison/hash/`Pointer`/`Serialize` impls. The macro
 //! below emits all of that for a given struct name; per-file blocks
 //! supply the items that legitimately differ (`Send`/`Sync` bounds,
-//! `Clone` for `Arc`, `DerefMut`/`AsMut`/`BorrowMut` and
-//! `as_mut_utf16_str` for `Box`).
+//! `Clone` and `Drop` for `Arc`, `Drop` for `Box`, `DerefMut`/`AsMut`/
+//! `BorrowMut` and `as_mut_utf16_str` for `Box`).
 
 /// Emit shared inherent shape + trait impls for a single-pointer
 /// UTF-16 string type with field layout `{ ptr: NonNull<u16>, _phantom }`.
@@ -55,18 +55,6 @@ macro_rules! impl_utf16_str_common {
             }
         }
 
-        impl<A: allocator_api2::alloc::Allocator + Clone> Drop for $Ty<A> {
-            #[inline]
-            fn drop(&mut self) {
-                // SAFETY: `ptr` is hosted in a 64K-aligned `SharedChunk`
-                // on which `self` owns a +1 strong reference. The
-                // `[u16]` payload has no element drop to run.
-                unsafe {
-                    let _ref: $crate::internal::chunk_ref::ChunkRef<A> = $crate::internal::chunk_ref::ChunkRef::from_value_ptr(self.ptr);
-                }
-            }
-        }
-
         impl<A: allocator_api2::alloc::Allocator + Clone> Unpin for $Ty<A> {}
 
         impl<A: allocator_api2::alloc::Allocator + Clone> core::ops::Deref for $Ty<A> {
diff --git a/crates/multitude/src/vec/freeze.rs b/crates/multitude/src/vec/freeze.rs
index 7a2ccae6e..827023046 100644
--- a/crates/multitude/src/vec/freeze.rs
+++ b/crates/multitude/src/vec/freeze.rs
@@ -1,13 +1,10 @@
 // Copyright (c) Microsoft Corporation.
 // Licensed under the MIT License.
-//! Freeze a transient builder into arena-owned `Arc` or `Box` slices.
+//! Freeze a transient vector into arena-owned `Arc` or `Box` slices.
 //!
-//! The infallible freezes are exposed as `From<Vec<…>>` impls on
-//! [`Arc`](crate::Arc) / [`Box`](crate::Box) (mirroring `std`'s
-//! `From<Vec<T>> for Box<[T]>` / `Arc<[T]>`) plus the `std`-named
-//! [`Vec::into_boxed_slice`] / [`Vec::leak`] methods. Fallible variants
-//! ([`Vec::try_into_arc`] / [`Vec::try_into_boxed_slice`]) have no `std`
-//! counterpart and stay as inherent methods.
+//! Infallible freezes use `From<Vec<…>>` for [`Arc`](crate::Arc) /
+//! [`Box`](crate::Box) plus [`Vec::into_boxed_slice`] / [`Vec::leak`].
+//! Fallible freezes are [`Vec::try_into_arc`] and [`Vec::try_into_boxed_slice`].
 
 use core::mem::{self, ManuallyDrop};
 use core::slice;
@@ -15,10 +12,29 @@ use core::slice;
 use allocator_api2::alloc::{AllocError, Allocator};
 
 use super::Vec;
+use crate::Arena;
 use crate::arc::Arc;
 use crate::r#box::Box;
+use crate::internal::arena_buf::DrainAll;
 
 impl<'a, T, A: Allocator + Clone> Vec<'a, T, A> {
+    /// Shared body of the `Box`/`Arc` freeze paths: drain every element
+    /// into a fresh shared allocation built by `build`, then release this
+    /// `Vec`'s now-empty backing buffer. The old buffer is dropped only
+    /// *after* `build` consumes the drain iterator, so the moved-out
+    /// elements stay readable for the duration of the freeze.
+    #[inline]
+    fn drain_freeze<R>(self, build: impl FnOnce(&'a Arena<A>, DrainAll<'a, T>) -> R) -> R {
+        let arena = self.arena;
+        let mut me = ManuallyDrop::new(self);
+        let iter = me.buf.drain_all();
+        let result = build(arena, iter);
+        // `drain_all` set `buf.len = 0`, so this only releases the (unused)
+        // backing buffer, never the moved-out elements.
+        drop(ManuallyDrop::into_inner(me));
+        result
+    }
+
     /// Freeze into a [`Box<[T], A>`](crate::Box).
     ///
     /// **O(n)** — moves the elements into a fresh shared allocation
@@ -27,18 +43,10 @@ impl<'a, T, A: Allocator + Clone> Vec<'a, T, A> {
     ///
     /// # Panics
     ///
-    /// Panics if the underlying allocator fails, or — for `T: Drop` — if
-    /// `len` exceeds `u16::MAX`.
+    /// Panics if the underlying allocator fails.
     #[must_use]
     pub fn into_boxed_slice(self) -> Box<[T], A> {
-        let arena = self.arena;
-        let mut me = ManuallyDrop::new(self);
-        let iter = me.buf.drain_all();
-        let bx = arena.alloc_slice_fill_iter_box::<T, _>(iter);
-        // `drain_all` set `buf.len = 0`, so `into_inner`'s normal `Drop`
-        // only releases the (unused) backing buffer.
-        drop(ManuallyDrop::into_inner(me));
-        bx
+        self.drain_freeze(Arena::alloc_slice_fill_iter_box::<T, _>)
     }
 
     /// Fallible variant of [`Self::into_boxed_slice`].
@@ -49,13 +57,7 @@ impl<'a, T, A: Allocator + Clone> Vec<'a, T, A> {
     /// fails. On error, `self` is consumed and any elements remaining
     /// after a partial move are dropped before this function returns.
     pub fn try_into_boxed_slice(self) -> Result<Box<[T], A>, AllocError> {
-        let arena = self.arena;
-        let mut me = ManuallyDrop::new(self);
-        let iter = me.buf.drain_all();
-        let result = arena.try_alloc_slice_fill_iter_box::<T, _>(iter);
-        // See `into_boxed_slice`.
-        drop(ManuallyDrop::into_inner(me));
-        result
+        self.drain_freeze(Arena::try_alloc_slice_fill_iter_box::<T, _>)
     }
 
     /// Fallible variant of the [`Arc<[T], A>`](crate::Arc) freeze
@@ -71,24 +73,15 @@ impl<'a, T, A: Allocator + Clone> Vec<'a, T, A> {
         T: Send + Sync,
         A: Send + Sync,
     {
-        let arena = self.arena;
-        let mut me = ManuallyDrop::new(self);
-        let iter = me.buf.drain_all();
-        let result = arena.try_alloc_slice_fill_iter_arc::<T, _>(iter);
-        // See `into_boxed_slice`.
-        drop(ManuallyDrop::into_inner(me));
-        result
+        self.drain_freeze(Arena::try_alloc_slice_fill_iter_arc::<T, _>)
     }
 
     /// Consume the `Vec`, returning an arena-lifetime mutable slice
     /// reference `&'a mut [T]`. Mirrors [`std::vec::Vec::leak`].
     ///
-    /// **O(1) and allocation-free**: the existing buffer is reinterpreted
-    /// as a slice reference in place. No copy, no new allocation. The
-    /// unused tail (`cap - len`) is reclaimed back to the chunk's bump
-    /// cursor when this buffer is still the chunk's last allocation, so
-    /// later allocations can reuse it; otherwise it is left in the chunk
-    /// and reclaimed when the arena is dropped.
+    /// **O(1) and allocation-free**: the existing buffer becomes the returned
+    /// slice. The unused tail is reclaimed only while this buffer is still the
+    /// chunk's last allocation; otherwise arena teardown reclaims it.
     ///
     /// Available only when `T` does not need `Drop` (compile-time
     /// asserted). For drop types, freeze via [`Box::from`] / [`Arc::from`].
@@ -100,21 +93,16 @@ impl<'a, T, A: Allocator + Clone> Vec<'a, T, A> {
                 "Vec::leak requires T not to need Drop; freeze via Box::from / Arc::from instead",
             );
         }
-        // Hand the unused capacity tail back to the chunk before pinning
-        // the live prefix as a slice. `[len, cap)` holds no initialized
-        // element, so reclaiming it is sound; the retained `[0, len)`
-        // prefix (and thus the returned slice) is untouched.
+        // Reclaim the uninitialized capacity tail before pinning the live
+        // prefix as the returned slice.
         let _ = self.reclaim_capacity_tail(self.buf.len());
         let mut me = ManuallyDrop::new(self);
         let ptr = me.buf.as_mut_ptr();
         let len = me.buf.len();
-        // SAFETY: by `ArenaBuf`'s invariants, `ptr` addresses `len`
-        // initialized `T`s in an arena chunk that outlives `'a`. We
-        // `ManuallyDrop` the `Vec` so neither the `ArenaBuf` nor its
-        // contained elements are dropped here. Since `T` does not need
-        // `Drop` (const-asserted above), abandoning the buffer without
-        // registering a chunk drop entry is sound — the chunk storage
-        // itself is reclaimed at arena teardown.
+        // SAFETY: `ptr` addresses `len` initialized `T`s in an arena chunk
+        // that outlives `'a`. `ManuallyDrop` prevents dropping the buffer or
+        // elements here; `T: !Drop` (const-asserted above) lets arena teardown
+        // reclaim the raw chunk storage without a drop entry.
         unsafe { slice::from_raw_parts_mut(ptr, len) }
     }
 
@@ -125,12 +113,6 @@ impl<'a, T, A: Allocator + Clone> Vec<'a, T, A> {
         T: Send + Sync,
         A: Send + Sync,
     {
-        let arena = self.arena;
-        let mut me = ManuallyDrop::new(self);
-        let iter = me.buf.drain_all();
-        let arc = arena.alloc_slice_fill_iter_arc::<T, _>(iter);
-        // See `into_boxed_slice`.
-        drop(ManuallyDrop::into_inner(me));
-        arc
+        self.drain_freeze(Arena::alloc_slice_fill_iter_arc::<T, _>)
     }
 }
diff --git a/crates/multitude/src/vec/mod.rs b/crates/multitude/src/vec/mod.rs
index 1edf1377d..1e2174bda 100644
--- a/crates/multitude/src/vec/mod.rs
+++ b/crates/multitude/src/vec/mod.rs
@@ -132,7 +132,7 @@ impl<'a, T, A: Allocator + Clone> Vec<'a, T, A> {
             if let Some(u) = self.arena.try_reserve_local_slice::<T>(new_cap) {
                 break u;
             }
-            if self.arena.is_oversized_local(refill_hint) {
+            if self.arena.is_oversized(refill_hint) {
                 let (new_ptr, new_cap_actual) = self.arena.alloc_oversized_local_with(refill_hint, |mutator| {
                     let ticket = mutator
                         .try_alloc_uninit_slice::<T>(new_cap)
diff --git a/crates/multitude/src/vec/mutate.rs b/crates/multitude/src/vec/mutate.rs
index 8945b116e..15cbdf41c 100644
--- a/crates/multitude/src/vec/mutate.rs
+++ b/crates/multitude/src/vec/mutate.rs
@@ -7,12 +7,11 @@ use core::mem;
 use allocator_api2::alloc::{AllocError, Allocator};
 
 use super::Vec;
+use crate::arena::panic_alloc;
 use crate::internal::arena_buf::ArenaBuf;
 
-/// Rollback guard for `resize`/`resize_with`: if a user `clone` or
-/// closure panics partway through a grow, the guard's `Drop` truncates
-/// the buffer back to `old_len`, dropping every element written so far.
-/// On the success path the caller disarms it via [`mem::forget`].
+/// Rollback guard for `resize`/`resize_with`.
+/// On panic, truncates to `old_len`; success disarms it via [`mem::forget`].
 struct ResizeGuard<'b, 'a, T> {
     buf: &'b mut ArenaBuf<'a, T>,
     old_len: usize,
@@ -133,11 +132,8 @@ impl<T, A: Allocator + Clone> Vec<'_, T, A> {
 
     /// Shrink the capacity of the vector as much as possible.
     ///
-    /// O(1) reclamation when the buffer sits at the current bump cursor
-    /// of its chunk (no later allocation has moved the cursor past it):
-    /// the unused tail is returned to the chunk and the data pointer is
-    /// unchanged. Otherwise this is a no-op — the arena never relocates
-    /// or copies to shrink, so capacity simply stays put.
+    /// O(1) when the buffer is still at the chunk's bump cursor: returns the
+    /// unused tail without moving data. Otherwise this is a no-op.
     #[inline]
     #[cfg_attr(test, mutants::skip)] // thin delegation; logic covered via `reclaim_capacity_tail`
     pub fn shrink_to_fit(&mut self) {
@@ -146,10 +142,8 @@ impl<T, A: Allocator + Clone> Vec<'_, T, A> {
 
     /// Shrink the capacity with a lower bound.
     ///
-    /// The capacity will remain at least as large as both `self.len()` and
-    /// `min_capacity`. Reclamation only succeeds while the buffer still sits
-    /// at the chunk's bump cursor; otherwise this is a no-op (matching
-    /// [`std::vec::Vec::shrink_to`]'s "best-effort" contract).
+    /// Capacity remains at least `max(self.len(), min_capacity)`. Reclamation
+    /// only succeeds while the buffer is still at the chunk's bump cursor.
     #[cfg_attr(test, mutants::skip)]
     pub fn shrink_to(&mut self, min_capacity: usize) {
         if const { mem::size_of::<T>() == 0 } {
@@ -159,17 +153,12 @@ impl<T, A: Allocator + Clone> Vec<'_, T, A> {
         let _ = self.reclaim_capacity_tail(target);
     }
 
-    /// Reclaim the capacity tail `[target_cap, cap)` back to the chunk's
-    /// bump cursor when this buffer is still the chunk's last allocation
-    /// (an O(1) cursor rewind — no copy, data pointer unchanged). Returns
-    /// whether storage was reclaimed. A no-op when the buffer has been
-    /// overtaken by a later allocation, sits in a retired or oversized
-    /// chunk, or is a ZST.
-    ///
-    /// Callers must ensure the slots in `[target_cap, cap)` hold no live
-    /// element (either never initialized, or already dropped): the
-    /// reclaimed bytes return to the arena and may be overwritten by the
-    /// next allocation.
+    /// Reclaim `[target_cap, cap)` with an O(1) cursor rewind when this buffer
+    /// is still the chunk's last allocation. Returns whether storage was
+    /// reclaimed; no-op for later allocations, retired/oversized chunks, or ZSTs.
+    ///
+    /// Callers must ensure `[target_cap, cap)` contains no live elements
+    /// because the next arena allocation may overwrite it.
     #[inline]
     // Mutation testing is suppressed on the `total_bytes > max_normal_alloc`
     // early-return: `>` with `==` / `>=` mutations only differ at the exact
@@ -190,14 +179,9 @@ impl<T, A: Allocator + Clone> Vec<'_, T, A> {
         }
         let elem = mem::size_of::<T>();
         let data_addr = self.buf.as_ptr() as usize;
-        // One-past-the-end address of the current allocation. The product
-        // is the buffer's real byte size, bounded by its chunk, so it
-        // cannot overflow.
+        // Buffer byte size is bounded by its chunk, so this cannot overflow.
         let total_bytes = cap * elem;
-        // Buffers large enough to have been served by an oversized chunk
-        // are never at the `current_local` bump cursor; skip them so the
-        // cheap cursor check below never spuriously reclaims a one-shot
-        // chunk's storage.
+        // Oversized buffers are never at the `current_local` bump cursor.
         if total_bytes > self.arena.max_normal_alloc() {
             return false;
         }
@@ -259,8 +243,7 @@ impl<T, A: Allocator + Clone> Vec<'_, T, A> {
         assert!(start <= end, "extend_from_within: start > end");
         assert!(end <= len, "extend_from_within: range end out of bounds");
         let count = end - start;
-        // Reserve up front so the subsequent pushes cannot relocate the
-        // buffer (which would invalidate the source indices we read from).
+        // Reserve first so pushes cannot relocate the source indices.
         self.try_reserve(count)?;
         for i in start..end {
             let cloned = self.buf.as_slice()[i].clone();
@@ -366,7 +349,13 @@ impl<T, A: Allocator + Clone> Vec<'_, T, A> {
         Ok(())
     }
 
-    /// Reserve the minimum capacity for at least `additional` more elements.
+    /// Reserve capacity for exactly `additional` more elements.
+    ///
+    /// Unlike [`Self::reserve`], this does not over-allocate via
+    /// amortized doubling: the resulting capacity is exactly
+    /// `len + additional` (modulo whatever the backing chunk's in-place
+    /// growth already provides). Prefer [`Self::reserve`] when more
+    /// elements are expected to be inserted afterwards.
     ///
     /// # Panics
     ///
@@ -374,9 +363,9 @@ impl<T, A: Allocator + Clone> Vec<'_, T, A> {
     /// Use [`Self::try_reserve_exact`] for a fallible variant.
     #[inline]
     pub fn reserve_exact(&mut self, additional: usize) {
-        // No tighter guarantee than `reserve`: the arena's slice
-        // reservation policy already returns the requested capacity.
-        self.reserve(additional);
+        if self.try_reserve_exact(additional).is_err() {
+            panic_alloc!();
+        }
     }
 
     /// Fallible variant of [`Self::reserve_exact`].
@@ -387,7 +376,13 @@ impl<T, A: Allocator + Clone> Vec<'_, T, A> {
     /// alignment is at least 32 KiB.
     #[inline]
     pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), AllocError> {
-        self.try_reserve(additional)
+        let needed = self.buf.len().checked_add(additional).ok_or(AllocError)?;
+        if needed <= self.buf.cap() {
+            return Ok(());
+        }
+        // Grow to exactly `needed` (no amortized-doubling slack), matching
+        // `alloc::vec::Vec::reserve_exact` semantics.
+        self.try_grow_to(needed)
     }
 
     /// Resize the vector to `new_len`, cloning `value` to fill new slots.
@@ -419,10 +414,7 @@ impl<T, A: Allocator + Clone> Vec<'_, T, A> {
         }
         let added = new_len - len;
         self.try_reserve(added)?;
-        // If a `clone` (or the final move) panics partway through, the
-        // guard rolls the length back to `len`, dropping every element
-        // written so far. This keeps the vector in a consistent state and
-        // never leaks the partially-grown tail.
+        // Roll back on panic so partially written elements are dropped.
         let guard = ResizeGuard {
             buf: &mut self.buf,
             old_len: len,
@@ -459,8 +451,7 @@ impl<T, A: Allocator + Clone> Vec<'_, T, A> {
         }
         let added = new_len - len;
         self.try_reserve(added)?;
-        // See `resize`: roll back on a panic in `f` so the elements
-        // written before the panic are dropped and the length is restored.
+        // See `resize`: roll back on panic in `f`.
         let guard = ResizeGuard {
             buf: &mut self.buf,
             old_len: len,
@@ -499,15 +490,12 @@ impl<T, A: Allocator + Clone> Vec<'_, T, A> {
         let len = self.buf.len();
         assert!(at <= len, "split index out of bounds (at is {at}, len is {len})");
         let tail_len = len - at;
-        // Copy/empty path for ZSTs, an unallocated head, or an empty
-        // tail: produce an independent tail and leave the head's storage
-        // (and capacity) intact.
+        // ZST, unallocated-head, and empty-tail cases produce an independent
+        // tail and leave the head's storage intact.
         if const { mem::size_of::<T>() == 0 } || self.buf.cap() == 0 || tail_len == 0 {
             let mut tail = Self::try_with_capacity_in(tail_len, self.arena)?;
-            // Only ZSTs reach here with `tail_len > 0` (a non-ZST `cap == 0`
-            // forces `tail_len == 0`). ZSTs carry no data, so popping the
-            // suffix straight into `tail` — which reverses order — is fine; no
-            // staging buffer is needed.
+            // Only ZSTs reach here with `tail_len > 0`; reversing them while
+            // popping is unobservable.
             for _ in 0..tail_len {
                 tail.buf
                     .push_within_cap(self.buf.pop().expect("tail length matches"))
@@ -516,9 +504,7 @@ impl<T, A: Allocator + Clone> Vec<'_, T, A> {
             }
             return Ok(tail);
         }
-        // Zero-copy split: the tail shares the same chunk storage as the
-        // head (storage is reclaimed only at arena teardown, which
-        // outlives both halves), so no elements are copied.
+        // Zero-copy split: both halves share chunk storage until arena teardown.
         let tail_buf = self.buf.split_off_buf(at);
         Ok(Self::from_buf(tail_buf, self.arena))
     }
diff --git a/crates/multitude/tests/alloc_ref.rs b/crates/multitude/tests/alloc_ref.rs
index f935aa120..c0e425bb5 100644
--- a/crates/multitude/tests/alloc_ref.rs
+++ b/crates/multitude/tests/alloc_ref.rs
@@ -406,9 +406,14 @@ fn wasted_tail_grows_on_local_refill_and_clears_on_reset() {
 }
 
 /// **Conservation invariant**: across a full retire-and-release cycle,
-/// the wasted-tail counter must return to exactly its starting value.
-/// Catches off-by-one or asymmetric-arithmetic bugs (e.g., add 4 KiB,
-/// subtract 4096) that observation-of-non-zero tests would miss.
+/// the local wasted-tail counter must return to exactly its starting
+/// value. Catches off-by-one or asymmetric-arithmetic bugs (e.g., add
+/// 4 KiB, subtract 4096) that observation-of-non-zero tests would miss.
+///
+/// Only local allocation paths are exercised here: `reset` governs local
+/// chunks, so it is what takes the gauge back to zero. Shared-chunk
+/// wasted tail is released by handle drop plus chunk turnover (not by
+/// `reset`) and is covered by the drop/cache-reuse tests above.
 #[cfg(feature = "stats")]
 #[test]
 fn wasted_tail_returns_to_exactly_baseline_across_full_cycle() {
@@ -416,20 +421,15 @@ fn wasted_tail_returns_to_exactly_baseline_across_full_cycle() {
     for cycle in 0..10 {
         let before = arena.stats().wasted_tail_bytes;
         assert_eq!(before, 0, "cycle {cycle}: baseline must be 0 before allocations begin");
-        // Mix of all major allocation paths to exercise every retire-
-        // generating code path within a single cycle:
         for _ in 0..4 {
             let _: &mut u64 = arena.alloc(42);
             let _: &mut [u8] = arena.alloc_slice_fill_with(256, |_| 0);
-            drop(arena.alloc_arc::<u64>(1));
-            drop(arena.alloc_box::<u64>(2));
-            drop(arena.alloc_slice_copy_arc::<u8>(&[0_u8; 1024]));
         }
         arena.reset();
         let after = arena.stats().wasted_tail_bytes;
         assert_eq!(
             after, 0,
-            "cycle {cycle}: after reset, the counter must return to exactly 0 \
+            "cycle {cycle}: after reset, the local counter must return to exactly 0 \
              (got {after}) — asymmetric add/subtract would leave a residue",
         );
     }
@@ -444,7 +444,7 @@ fn wasted_tail_returns_to_exactly_baseline_across_full_cycle() {
 fn wasted_tail_correct_after_cache_reuse_cycles() {
     let mut arena = Arena::new();
     let mut acquired_chunks_total = 0u64;
-    for _ in 0..20 {
+    for _ in 0..8 {
         // Force at least one full chunk's worth of allocs so we cycle
         // through `current_local` AND populate the cache on reset.
         for _ in 0..64 {
@@ -472,10 +472,12 @@ fn wasted_tail_decreases_monotonically_as_pinned_arcs_drop() {
     let arena = Arena::new();
     let mut pins = std::vec::Vec::new();
     // Build up several pinned chunks by interleaving a pin with allocs
-    // that force a shared refill.
-    for _ in 0..5 {
+    // that force a shared refill. A few moderately sized copies per pin
+    // overflow the (initially small) shared chunk, retiring it while the
+    // pin holds it — far fewer allocations than a long inner loop.
+    for _ in 0..4 {
         pins.push(arena.alloc_arc::<u64>(99));
-        for _ in 0..10 {
+        for _ in 0..3 {
             drop(arena.alloc_slice_copy_arc::<u8>(&[0_u8; 2048]));
         }
     }
@@ -538,21 +540,36 @@ fn wasted_tail_handles_oversized_local_retire() {
 /// many times. If the subtract ever exceeded the matching add even by
 /// one byte, the running counter would underflow to a value near
 /// `u64::MAX`.
+///
+/// The conservation bound is `wasted_tail_bytes <= total_bytes_allocated`:
+/// the arena cannot waste more tail than it currently holds. This holds
+/// regardless of whether the slack lives in local or (still-installed)
+/// shared chunks, and an underflow would blow the wasted gauge far past
+/// the total. `reset` only clears local wasted tail, so it is not
+/// expected to drive the gauge to zero while a shared chunk is live.
 #[cfg(feature = "stats")]
 #[test]
 fn wasted_tail_never_underflows_under_stress() {
     let mut arena = Arena::new();
-    for _ in 0..256 {
+    let filler = [0_u8; 64];
+    for _ in 0..10 {
         let _: &mut u64 = arena.alloc(0);
-        let _: &mut [u8] = arena.alloc_slice_fill_with(64, |_| 0);
+        let _: &mut [u8] = arena.alloc_slice_copy(filler);
         drop(arena.alloc_arc::<u64>(0));
         drop(arena.alloc_box::<u64>(0));
         drop(arena.alloc_slice_copy_arc::<u8>(&[0_u8; 4096]));
-        // Always-positive: counter never observed as huge.
-        assert!(arena.stats().wasted_tail_bytes < u64::MAX / 2);
+        let stats = arena.stats();
+        assert!(
+            stats.wasted_tail_bytes <= stats.total_bytes_allocated,
+            "wasted tail ({}) must never exceed total bytes outstanding ({}) — \
+             an underflow would wrap it near u64::MAX",
+            stats.wasted_tail_bytes,
+            stats.total_bytes_allocated,
+        );
     }
     arena.reset();
-    assert_eq!(arena.stats().wasted_tail_bytes, 0);
+    let stats = arena.stats();
+    assert!(stats.wasted_tail_bytes <= stats.total_bytes_allocated);
 }
 
 use crate::common::FailingAllocator;
diff --git a/crates/multitude/tests/arena.rs b/crates/multitude/tests/arena.rs
index 0c3bb64c3..4888387c0 100644
--- a/crates/multitude/tests/arena.rs
+++ b/crates/multitude/tests/arena.rs
@@ -408,12 +408,12 @@ mod reset {
     #[cfg(feature = "stats")]
     #[test]
     fn reset_works_with_pinned_chunks() {
-        // Force chunk rotation by allocating multiple buffers that fill the
-        // chunk. We seed the high-water to class 7 so the rotated chunks
-        // are eligible for caching when they return after `reset`.
-        // `alloc_uninit::<MaybeUninit<[u8; 4000]>>` skips per-byte init.
+        // Allocate a couple of near-max_normal_alloc buffers to put the
+        // (class-7, 64 KiB) starter chunk into use. `MaybeUninit<[u8;
+        // 4000]>` skips per-byte init; a couple of them is enough to
+        // exercise the reset→cache→reuse path without a long alloc loop.
         let mut arena: Arena = Arena::builder().max_normal_alloc(4 * 1024).with_capacity_local(64 * 1024).build();
-        for _ in 0..5 {
+        for _ in 0..2 {
             let _ = arena.alloc(core::mem::MaybeUninit::<[u8; 4000]>::uninit());
         }
         let chunks_before = arena.stats().normal_local_chunks_allocated;
@@ -474,6 +474,89 @@ mod reset {
         // Arena still usable.
         let _ = arena.alloc_arc(11_u32);
     }
+
+    /// Regression for the `reset`-retires-shared-chunks bug.
+    ///
+    /// `reset` must touch only local chunks. It used to also retire the
+    /// current shared chunk (reconcile its surplus + reinstall the empty
+    /// sentinel). That broke workloads that nest arena [`Arc`]s inside an
+    /// outer arena `Arc` in the same shared chunk: the inner arcs' drops are
+    /// deferred to chunk teardown (refcount reaching 0), but the outer arc's
+    /// own slice elements pin the chunk until then, so the chunk can never
+    /// reach 0 while it is the retired-but-referenced current chunk. Each
+    /// reset therefore allocated **one fresh shared chunk per cycle**
+    /// (linear growth, slope 1 — the benchmark saw a fresh ~64 KiB chunk
+    /// every iteration).
+    ///
+    /// With reset leaving shared state alone, shared chunks are bump-filled
+    /// across cycles and a new (larger) chunk is needed only occasionally as
+    /// the size class ratchets up, so the count grows strictly sub-linearly.
+    /// This test pins the slope: across a measured batch of `BATCH` reset
+    /// cycles the shared-chunk count must grow by far less than `BATCH`
+    /// (the buggy code grew by exactly `BATCH`).
+    #[cfg(feature = "stats")]
+    #[test]
+    fn reset_does_not_allocate_a_fresh_shared_chunk_per_cycle() {
+        // Each cycle just needs to *use* a shared chunk so that `reset`'s
+        // shared-chunk handling is exercised; a single `Arc` allocation
+        // does that. (The nested-structure variant is covered separately
+        // by `reset_keeps_nested_arc_structures_valid_across_cycles`.)
+        // Keeping the per-cycle work to one allocation bounds the Miri
+        // interpreter cost while still pinning the slope.
+        fn build(arena: &Arena) {
+            drop(arena.alloc_arc(0xAB_u64));
+        }
+
+        const WARMUP: usize = 16;
+        const BATCH: usize = 64;
+
+        let mut arena = Arena::new();
+        for _ in 0..WARMUP {
+            build(&arena);
+            arena.reset();
+        }
+        let before = arena.stats().normal_shared_chunks_allocated;
+        for _ in 0..BATCH {
+            build(&arena);
+            arena.reset();
+        }
+        let grew_by = arena.stats().normal_shared_chunks_allocated - before;
+
+        // Buggy `reset` grew by exactly `BATCH` (one fresh chunk per cycle).
+        // The correct behavior grows by only a handful (a few class-size
+        // bumps). A generous sub-linear ceiling cleanly separates the two.
+        assert!(
+            grew_by < BATCH as u64 / 8,
+            "reset must not allocate a fresh shared chunk per cycle: \
+             {grew_by} new chunks over {BATCH} cycles (buggy code allocates ~{BATCH})",
+        );
+    }
+
+    /// Companion to the leak regression that needs no `stats` feature:
+    /// the nested-`Arc` structure must stay valid and drop cleanly across
+    /// repeated reset cycles. Builds the structure, reads it back, drops it,
+    /// resets, and repeats — confirming `reset` leaves outstanding
+    /// shared-chunk contents intact.
+    #[test]
+    fn reset_keeps_nested_arc_structures_valid_across_cycles() {
+        let mut arena = Arena::new();
+        for cycle in 0..8_u8 {
+            let outer: Arc<[Arc<[u8]>]> = {
+                let mut v = arena.alloc_vec_with_capacity::<Arc<[u8]>>(4);
+                for i in 0_u8..4 {
+                    v.push(arena.alloc_slice_copy_arc(&[cycle, i, 0xCD]));
+                }
+                v.try_into_arc().unwrap()
+            };
+            assert_eq!(outer.len(), 4);
+            for (i, inner) in outer.iter().enumerate() {
+                let i = u8::try_from(i).unwrap();
+                assert_eq!(&**inner, &[cycle, i, 0xCD]);
+            }
+            drop(outer);
+            arena.reset();
+        }
+    }
 }
 
 mod large_alloc {
@@ -535,12 +618,17 @@ mod large_alloc {
     #[test]
     fn alloc_slice_clone_above_chunk_boundary() {
         let arena = Arena::new();
-        let n = CHUNK_BYTES / 8 + 4; // 65568 bytes
-        let src: Vec<u64> = (0..n as u64).collect();
-        let s = arena.alloc_slice_clone::<u64>(&src);
+        // Use `u128` so the element count needed to exceed `CHUNK_BYTES`
+        // is 16x smaller than with `u8`, halving it again vs `u64` — the
+        // `alloc_slice_clone` path still clones every element across the
+        // oversized chunk, so fewer elements means far less Miri work for
+        // the same `> CHUNK_BYTES` byte threshold.
+        let n = CHUNK_BYTES / 16 + 2; // 4098 u128 => > 64 KiB
+        let src: Vec<u128> = (0..n as u128).collect();
+        let s = arena.alloc_slice_clone::<u128>(&src);
         assert_eq!(s.len(), src.len());
         assert_eq!(s[0], 0);
-        assert_eq!(s[s.len() - 1], (s.len() - 1) as u64);
+        assert_eq!(s[s.len() - 1], (s.len() - 1) as u128);
     }
 
     #[test]
@@ -645,12 +733,22 @@ mod large_alloc {
     #[test]
     fn alloc_vec_with_capacity_at_far_over_chunk() {
         let arena = Arena::new();
-        let mut v = arena.alloc_vec_with_capacity::<u32>(FAR_OVER_CHUNK / 4);
-        for i in 0..(FAR_OVER_CHUNK / 4) {
-            v.push(i as u32);
-        }
-        assert_eq!(v.len(), FAR_OVER_CHUNK / 4);
-        assert_eq!(v[v.len() - 1], (v.len() - 1) as u32);
+        let cap = FAR_OVER_CHUNK / 4;
+        let mut v = arena.alloc_vec_with_capacity::<u32>(cap);
+        // Fill the (far-over-chunk) capacity in one bulk `extend_from_slice`
+        // (a single memcpy) rather than `cap` individual `push` calls — the
+        // per-`push` arena bookkeeping is what dominates under Miri. A
+        // bulk-zeroed source vec is itself a single allocation.
+        v.extend_from_slice(&std::vec![0_u32; cap]);
+        assert_eq!(v.len(), cap);
+        // The first, a mid-chunk, and the last slot must all be addressable
+        // and writable across the oversized backing chunk.
+        v[0] = 0xA1;
+        v[CHUNK_BYTES / 4] = 0xB2;
+        v[cap - 1] = 0xC3;
+        assert_eq!(v[0], 0xA1);
+        assert_eq!(v[CHUNK_BYTES / 4], 0xB2);
+        assert_eq!(v[cap - 1], 0xC3);
     }
 
     // ============================================================================
@@ -715,15 +813,20 @@ mod large_alloc {
     #[test]
     fn alloc_vec_extend_from_iter_past_chunk_boundary() {
         let arena = Arena::new();
-        let mut v = arena.alloc_vec::<u16>();
-        v.extend((0..(OVER_CHUNK / 2) as u16).map(|i| i.wrapping_mul(13)));
-        assert_eq!(v.len(), OVER_CHUNK / 2);
+        // Exercise the `Extend`-from-iterator growth path across the chunk
+        // boundary. Using `u128` reaches `> CHUNK_BYTES` with 8x fewer
+        // elements than `u16`, so the per-element interpreted `extend`
+        // loop (which a lazy `map` iterator forces) is 8x shorter.
+        let mut v = arena.alloc_vec::<u128>();
+        let n = OVER_CHUNK / 16 + 1; // > 64 KiB worth of u128
+        v.extend((0..n as u128).map(|i| i.wrapping_mul(13)));
+        assert_eq!(v.len(), n);
         // Spot-check first, mid-chunk and last instead of iterating
         // every element; a chunk-boundary bug would manifest at any of
         // these positions equally and the per-element cost dominates
         // under Miri.
-        for i in [0, OVER_CHUNK / 4, OVER_CHUNK / 2 - 1] {
-            assert_eq!(v[i], (i as u16).wrapping_mul(13));
+        for i in [0, n / 2, n - 1] {
+            assert_eq!(v[i], (i as u128).wrapping_mul(13));
         }
     }
 
@@ -845,15 +948,21 @@ mod large_alloc {
     #[test]
     fn many_oversized_allocations_in_one_arena() {
         // The property under test is that an arena tolerates *multiple*
-        // oversized one-shot chunks coexisting. Using `[u128; OVER_CHUNK/16]`
-        // gives the same byte-count threshold (above `MAX_CHUNK_BYTES`) but
-        // a 16× shorter `alloc_slice_fill_with` closure loop — a big win
-        // under Miri where each closure invocation is interpreted.
+        // oversized one-shot chunks coexisting. `[u128; OVER_CHUNK/16+1]`
+        // gives the byte-count threshold (above `MAX_CHUNK_BYTES`). Each
+        // round is a single bulk `alloc_slice_copy` (one memcpy) from a
+        // shared zeroed source rather than an `N_U128`-long fill closure
+        // loop; per-round sentinels written into the first and last slots
+        // preserve the distinct-content checks that prove the oversized
+        // chunks don't alias.
         const N_U128: usize = OVER_CHUNK / 16 + 1; // > 64 KiB worth of u128
         let arena = Arena::new();
-        let mut keepers: Vec<&[u128]> = Vec::with_capacity(8);
+        let src = std::vec![0_u128; N_U128];
+        let mut keepers: Vec<&mut [u128]> = Vec::with_capacity(8);
         for round in 0..8u8 {
-            let s: &mut [u128] = arena.alloc_slice_fill_with::<u128, _>(N_U128, move |_| u128::from(round));
+            let s: &mut [u128] = arena.alloc_slice_copy::<u128>(&src);
+            s[0] = u128::from(round);
+            s[N_U128 - 1] = u128::from(round);
             keepers.push(s);
         }
         for (round, s) in keepers.iter().enumerate() {
@@ -1329,7 +1438,7 @@ mod fast_path_correctness {
             count += 1;
             assert!(count < 20_000, "should have triggered new chunk by now");
         }
-        assert!(count > 50, "chunk should hold many Arc<u64>s");
+        assert!(count > 10, "chunk should hold many Arc<u64>s");
     }
 
     #[cfg(feature = "stats")]
@@ -1572,22 +1681,23 @@ mod mutants_for_chunk_provider {
         // The property under test: the size-class ratchet caps at the
         // largest cacheable class (class 7 = 64 KiB total). After the
         // first few refills ratchet there, subsequent refills stay at
-        // class 7 — they don't keep doubling. To observe this we
-        // allocate a handful of 8 KiB boxes (just under MAX_NORMAL_ALLOC
-        // = 16 KiB, so still routed through the normal cache) and
-        // confirm none route to oversized. A 64 KiB class-7 chunk fits
-        // a couple of these, so 8 boxes span ≥ 2 chunks, proving the
-        // ratchet stays at class 7 rather than degrading or escaping.
+        // class 7 — they don't keep doubling. To observe this we allocate
+        // a handful of ~13 KiB boxes (under MAX_NORMAL_ALLOC = 16 KiB, so
+        // still routed through the normal cache) and confirm none route to
+        // oversized. Five ~13 KiB boxes total > 64 KiB, so they span ≥ 2
+        // class-7 chunks, proving the ratchet stays at class 7 rather than
+        // degrading or escaping. Larger-but-fewer boxes keep the byte
+        // threshold while minimising the per-allocation Miri cost.
         let arena = Arena::new();
-        let mut keep: Vec<Box<core::mem::MaybeUninit<[u8; 8 * 1024]>>> = Vec::new();
-        for _ in 0..8 {
-            keep.push(arena.alloc_uninit_box::<[u8; 8 * 1024]>());
+        let mut keep: Vec<Box<core::mem::MaybeUninit<[u8; 13 * 1024]>>> = Vec::new();
+        for _ in 0..5 {
+            keep.push(arena.alloc_uninit_box::<[u8; 13 * 1024]>());
         }
         let s = arena.stats();
         assert_eq!(s.oversized_shared_chunks_allocated, 0);
         assert!(
             s.normal_shared_chunks_allocated >= 2,
-            "8 × 8 KiB boxes must span ≥ 2 class-7 chunks, got {}",
+            "5 × 13 KiB boxes must span ≥ 2 class-7 chunks, got {}",
             s.normal_shared_chunks_allocated
         );
     }
@@ -2316,23 +2426,24 @@ mod coverage_arena_gaps {
     }
 
     // ============================================================================
-    // inner_slice.rs:441 — `alloc_slice_local_with_or_panic` `len > u16::MAX`
-    // with drop_fn panic.
-    // inner_slice.rs:1014 — shared sibling.
+    // Per-`Arc` reference counting removes the `u16` element-count cap on
+    // `Arc<[T]>` slices: a Drop-typed slice longer than `u16::MAX` is now
+    // dropped via `drop_in_place::<[T]>` in `Arc::drop`, not a counted
+    // chunk drop entry, so it allocates successfully.
     // ============================================================================
 
-    #[cfg(feature = "std")]
+    #[cfg(all(feature = "std", not(miri)))]
     #[test]
-    #[should_panic(expected = "multitude: allocator returned AllocError")]
-    fn alloc_slice_fill_with_arc_drop_too_long_panics() {
+    fn alloc_slice_fill_with_arc_drop_long_succeeds() {
         #[derive(Clone)]
         struct D;
-        #[expect(clippy::empty_drop, reason = "Drop impl makes needs_drop::<D>() true so a drop_fn is installed")]
+        #[expect(clippy::empty_drop, reason = "Drop impl makes needs_drop::<D>() true")]
         impl Drop for D {
             fn drop(&mut self) {}
         }
         let arena = Arena::<Global>::new();
-        let _ = arena.alloc_slice_fill_with_arc(u16::MAX as usize + 1, |_| D);
+        let arc = arena.alloc_slice_fill_with_arc(u16::MAX as usize + 1, |_| D);
+        assert_eq!(arc.len(), u16::MAX as usize + 1);
     }
 
     // ============================================================================
@@ -2878,12 +2989,12 @@ mod from_mutants_extras_stats {
         let arena = Arena::new();
         // Ratchet the chunk class via a few large uninit fillers
         // (`alloc_uninit_arc` skips per-byte init cost).
-        for _ in 0..8 {
+        for _ in 0..4 {
             let _filler: Arc<core::mem::MaybeUninit<[u8; 8 * 1024]>> = arena.alloc_uninit_arc::<[u8; 8 * 1024]>();
         }
         // A short burst still exercises the small-allocation slow refill path
         // at the peak shared chunk class.
-        for i in 0_u32..32 {
+        for i in 0_u32..16 {
             let _a: Arc<u32> = arena.alloc_arc(i);
         }
         assert_eq!(arena.stats().oversized_shared_chunks_allocated, 0);
diff --git a/crates/multitude/tests/audit_repro.rs b/crates/multitude/tests/audit_repro.rs
index 3c5651fb4..78422af3f 100644
--- a/crates/multitude/tests/audit_repro.rs
+++ b/crates/multitude/tests/audit_repro.rs
@@ -46,15 +46,20 @@ fn alloc_box_of_maybeuninit_assume_init_drops_inner() {
     assert_eq!(counter.load(Ordering::Relaxed), 1);
 }
 
-/// Arc variant of the panic-on-misuse fix.
-#[cfg(not(miri))]
+/// With per-`Arc` reference counting, `alloc_arc(MaybeUninit::new(x))`
+/// followed by `assume_init` works correctly: `Arc::drop` runs the inner
+/// value's destructor eagerly on the last clone (no chunk drop entry is
+/// involved), so the previously-unsupported pattern is now sound.
 #[test]
-#[should_panic(expected = "no drop entry reserved")]
-fn alloc_arc_of_maybeuninit_assume_init_panics_when_unsupported() {
+fn alloc_arc_of_maybeuninit_assume_init_drops_inner() {
     let counter = StdArc::new(AtomicUsize::new(0));
-    let arena = Arena::new();
-    let arc_uninit = arena.alloc_arc(MaybeUninit::new(DropCounter(counter.clone())));
-    let _arc = unsafe { arc_uninit.assume_init() };
+    {
+        let arena = Arena::new();
+        let arc_uninit = arena.alloc_arc(MaybeUninit::new(DropCounter(counter.clone())));
+        let arc = unsafe { arc_uninit.assume_init() };
+        drop(arc);
+    }
+    assert_eq!(counter.load(Ordering::Relaxed), 1);
 }
 
 /// `arena.alloc_uninit_arc::<U>()` followed by `assume_init` reserves the
@@ -172,10 +177,12 @@ fn zst_shared_handouts_advance_cursor() {
     let bx2 = arena.alloc_box(());
     assert_ne!(bx1.as_ptr(), bx2.as_ptr(), "ZST Box handouts must get distinct addresses");
 
-    // Many create-and-drop cycles force the chunk to fill (1 byte each)
-    // and refill. Pre-fix the cursor never advanced, so this pattern
-    // could drive the live chunk's atomic refcount to zero.
-    for _ in 0..2_000 {
+    // A few hundred create-and-drop cycles still force the (512-byte
+    // starter) chunk to fill (1 byte each) and refill at least once. Pre-fix
+    // the cursor never advanced, so this pattern could drive the live
+    // chunk's atomic refcount to zero. A few hundred iterations exercise the
+    // refill the tag now forces without a multi-thousand Miri loop.
+    for _ in 0..600 {
         drop(arena.alloc_arc(()));
         drop(arena.alloc_box(()));
     }
diff --git a/crates/multitude/tests/coverage_extras.rs b/crates/multitude/tests/coverage_extras.rs
index 953780d44..eea956cd4 100644
--- a/crates/multitude/tests/coverage_extras.rs
+++ b/crates/multitude/tests/coverage_extras.rs
@@ -1513,18 +1513,21 @@ mod coverage_more {
     }
 
     #[test]
-    #[should_panic(expected = "allocator returned AllocError")]
-    // Skipped under Miri: the test must register `u16::MAX + 1` drop
-    // entries to trigger the overflow panic, and Miri's per-allocation
-    // overhead pushes this past the 10-minute CI budget. The panic is a
-    // runtime-checked assertion, not a memory-safety property, so Miri
-    // adds no value beyond what `cargo test` already verifies.
+    // Skipped under Miri: building + dropping `u16::MAX + 1` elements
+    // (~65K) exceeds Miri's test budget. The lifted restriction is a
+    // runtime property, not a memory-safety one, so native + cargo-careful
+    // runs cover it.
     #[cfg_attr(miri, ignore)]
-    fn vec_into_box_panics_when_drop_slice_is_too_long_for_entry() {
+    fn vec_into_box_drop_slice_longer_than_u16_succeeds() {
+        // `Box<[T]>` drops via `drop_in_place::<[T]>` (no `u16`-counted
+        // drop entry), so a `T: Drop` slice longer than `u16::MAX` freezes
+        // into a `Box` without rejection.
         let arena = Arena::new();
         let mut v = arena.alloc_vec::<Droppy>();
-        v.extend((0..=u16::MAX).map(|_| Droppy("many")));
-        let _ = v.into_boxed_slice();
+        let len = (u16::MAX as usize) + 1;
+        v.extend((0..len).map(|_| Droppy("many")));
+        let b = v.into_boxed_slice();
+        assert_eq!(b.len(), len);
     }
 
     #[test]
diff --git a/crates/multitude/tests/coverage_gaps.rs b/crates/multitude/tests/coverage_gaps.rs
index 8b210a91d..6028414d2 100644
--- a/crates/multitude/tests/coverage_gaps.rs
+++ b/crates/multitude/tests/coverage_gaps.rs
@@ -750,24 +750,32 @@ mod drop_slice_over_u16_max_returns_err {
         assert!(a.try_alloc_slice_fill_iter::<D, _>((0..TOO_LONG).map(|i| D(i as u8))).is_err());
     }
 
+    // `Arc<[T]>` uninit/zeroed slices have no `u16` element-count cap
+    // under per-`Arc` reference counting (they drop via
+    // `drop_in_place::<[T]>`, not a `u16`-counted chunk entry), so a
+    // Drop-typed slice longer than `u16::MAX` now allocates successfully.
+    #[cfg(not(miri))]
     #[test]
-    fn try_alloc_uninit_slice_arc_over_u16_err() {
+    fn uninit_slice_arc_over_u16_succeeds() {
         struct D(u32);
         impl Drop for D {
             fn drop(&mut self) {}
         }
         let a = Arena::new();
-        assert!(a.try_alloc_uninit_slice_arc::<D>(TOO_LONG).is_err());
+        let arc = a.try_alloc_uninit_slice_arc::<D>(TOO_LONG).expect("Arc slices have no u16 cap");
+        assert_eq!(arc.len(), TOO_LONG);
     }
 
+    #[cfg(not(miri))]
     #[test]
-    fn try_alloc_zeroed_slice_arc_over_u16_err() {
+    fn zeroed_slice_arc_over_u16_succeeds() {
         struct D(u32);
         impl Drop for D {
             fn drop(&mut self) {}
         }
         let a = Arena::new();
-        assert!(a.try_alloc_zeroed_slice_arc::<D>(TOO_LONG).is_err());
+        let arc = a.try_alloc_zeroed_slice_arc::<D>(TOO_LONG).expect("Arc slices have no u16 cap");
+        assert_eq!(arc.len(), TOO_LONG);
     }
 }
 
@@ -925,10 +933,10 @@ mod uninit_drop_init_from_iter {
 }
 
 // ============================================================================
-// internal/uninit.rs:487–489 — `into_uninit_slice_placeholder(zeroed=true)`
-// exercised by `alloc_zeroed_slice_arc` for drop types.
+// `alloc_zeroed_slice_arc` for a drop type zero-fills the payload (the
+// `MaybeUninit::zeroed` fill path).
 // ============================================================================
-mod uninit_into_uninit_slice_placeholder_zeroed {
+mod zeroed_slice_arc_zeroes_payload {
     use core::mem::MaybeUninit;
 
     use multitude::Arena;
@@ -1103,34 +1111,39 @@ mod arc_borrow {
 }
 
 // ============================================================================
-// arc.rs — slice assume_init missing-drop-entry panic (287–290).
+// arc.rs — slice assume_init is a pure reinterpret under per-`Arc`
+// reference counting; element destructors run eagerly in `Arc::drop`.
 // ============================================================================
-mod arc_assume_init_slice_panics_when_drop_entry_missing {
+mod arc_assume_init_slice_drops_each_element {
     use core::mem::MaybeUninit;
-    use std::panic::{AssertUnwindSafe, catch_unwind};
+    use std::sync::Arc as StdArc;
+    use std::sync::atomic::{AtomicUsize, Ordering};
 
     use multitude::Arena;
 
     #[test]
-    fn slice_assume_init_for_drop_type_without_placeholder_panics() {
-        // `MaybeUninit<D>` is itself never-drop (MaybeUninit suppresses
-        // drops), so allocating an `Arc<[MaybeUninit<D>]>` via the regular
-        // fill helper does NOT reserve a placeholder slice drop entry.
-        // Calling `assume_init` on the resulting handle then triggers the
-        // slice-side panic message because `needs_drop::<D>()` is true.
-        #[derive(Clone)]
-        struct D(#[expect(dead_code, reason = "field gives the type a non-zero size")] u32);
+    fn slice_assume_init_for_drop_type_drops_each_element() {
+        // `alloc_slice_fill_with_arc::<MaybeUninit<D>>` + `assume_init`
+        // used to be rejected (no placeholder drop entry). Now
+        // `assume_init` is a pure reinterpret and `Arc::drop` runs each
+        // element's destructor via `drop_in_place::<[D]>`.
+        struct D(StdArc<AtomicUsize>);
         impl Drop for D {
-            fn drop(&mut self) {}
+            fn drop(&mut self) {
+                self.0.fetch_add(1, Ordering::Relaxed);
+            }
         }
-        let arena = Arena::new();
-        let r = catch_unwind(AssertUnwindSafe(|| {
-            let arc: multitude::Arc<[MaybeUninit<D>]> = arena.alloc_slice_fill_with_arc(2, |_| MaybeUninit::new(D(0)));
-            // SAFETY: elements are initialized above; the panic comes from
-            // the missing placeholder drop entry, not from undefined behavior.
-            let _: multitude::Arc<[D]> = unsafe { arc.assume_init() };
-        }));
-        assert!(r.is_err());
+        let counter = StdArc::new(AtomicUsize::new(0));
+        {
+            let arena = Arena::new();
+            let arc: multitude::Arc<[MaybeUninit<D>]> =
+                arena.alloc_slice_fill_with_arc(2, |_| MaybeUninit::new(D(StdArc::clone(&counter))));
+            // SAFETY: both elements were initialized above.
+            let init: multitude::Arc<[D]> = unsafe { arc.assume_init() };
+            assert_eq!(init.len(), 2);
+            drop(init);
+        }
+        assert_eq!(counter.load(Ordering::Relaxed), 2);
     }
 }
 
@@ -1455,8 +1468,11 @@ mod allocator_impl_grow_to_zero_overlap {
 }
 
 // ============================================================================
-// alloc_unsized.rs — metadata-too-large rejection for `[D]` slice DSTs
+// alloc_unsized.rs — metadata-too-large handling for `[D]` slice DSTs
 // and refill-failure path for `try_alloc_dst_box` (lines 229–230, 261).
+// The `Box` path rejects `T: Drop` DSTs whose metadata does not pack
+// into the chunk drop-list's `u16` slot; the `Arc` path stores metadata
+// verbatim and runs `drop_in_place` eagerly, so it has no such limit.
 // Lives here rather than as a `src/` unit test so the empty `Drop`
 // impl on the probe type doesn't bloat src-coverage counts.
 // ============================================================================
@@ -1475,14 +1491,63 @@ mod alloc_unsized_extras {
     }
 
     #[test]
-    fn try_alloc_dst_arc_slice_drop_metadata_too_large_returns_err() {
+    // Skipped under Miri: writing + dropping `u16::MAX + 1` elements
+    // (~65K) to exercise the slice-length boundary exceeds Miri's test
+    // budget; the lifted restriction is a runtime property, not a
+    // memory-safety one, so native + cargo-careful runs cover it.
+    #[cfg_attr(miri, ignore)]
+    fn try_alloc_dst_box_slice_drop_metadata_too_large_succeeds() {
+        // Like the `Arc` path, the `Box` path stores slice metadata
+        // full-width in the chunk prefix and drops via
+        // `drop_in_place::<[D]>` (no `u16` drop-list slot), so a `T: Drop`
+        // slice longer than `u16::MAX` is accepted.
         let arena = Arena::new();
         let len = (u16::MAX as usize) + 1;
         let layout = Layout::array::<D>(len).unwrap();
-        // SAFETY: the metadata-too-large rejection fires before `init`
-        // is invoked, so no actual initialization happens.
-        let r = unsafe { arena.try_alloc_dst_arc::<[D]>(layout, len, |_p: *mut [D]| {}) };
-        assert!(r.is_err());
+        // SAFETY: `layout` describes `[D; len]`; `init` writes a valid
+        // `D` into every element before the `Box` is observed, so the
+        // eager `drop_in_place::<[D]>` in `Box::drop` runs on live values.
+        let r = unsafe {
+            arena.try_alloc_dst_box::<[D]>(layout, len, |p: *mut [D]| {
+                let base = p.cast::<D>();
+                for i in 0..len {
+                    // SAFETY: `base..base + len` is the freshly reserved
+                    // `[D]` buffer described by `layout`.
+                    base.add(i).write(D::default());
+                }
+            })
+        };
+        assert!(r.is_ok());
+    }
+
+    #[test]
+    // Skipped under Miri: writing + dropping `u16::MAX + 1` elements
+    // (~65K) to exercise the slice-length boundary exceeds Miri's test
+    // budget (~8 min observed). The lifted-restriction behavior is a
+    // runtime property, not a memory-safety one; native + cargo-careful
+    // runs verify it on every CI execution.
+    #[cfg_attr(miri, ignore)]
+    fn try_alloc_dst_arc_slice_drop_metadata_too_large_succeeds() {
+        // Unlike the `Box` path, the `Arc` path stores slice metadata
+        // verbatim (not in the `u16` drop-list slot), so a `T: Drop`
+        // slice longer than `u16::MAX` is accepted.
+        let arena = Arena::new();
+        let len = (u16::MAX as usize) + 1;
+        let layout = Layout::array::<D>(len).unwrap();
+        // SAFETY: `layout` describes `[D; len]`; `init` writes a valid
+        // `D` into every element before the `Arc` is observed, so the
+        // eager `drop_in_place::<[D]>` on teardown runs on live values.
+        let r = unsafe {
+            arena.try_alloc_dst_arc::<[D]>(layout, len, |p: *mut [D]| {
+                let base = p.cast::<D>();
+                for i in 0..len {
+                    // SAFETY: `base..base + len` is the freshly reserved
+                    // `[D]` buffer described by `layout`.
+                    base.add(i).write(D::default());
+                }
+            })
+        };
+        assert!(r.is_ok());
     }
 
     #[test]
diff --git a/crates/multitude/tests/dst.rs b/crates/multitude/tests/dst.rs
index 69fcca955..dd2f49764 100644
--- a/crates/multitude/tests/dst.rs
+++ b/crates/multitude/tests/dst.rs
@@ -466,13 +466,16 @@ mod dst_box {
         assert_eq!(COUNT.load(Ordering::SeqCst), before + 4);
     }
 
-    /// Regression: a slice DST with `len > u16::MAX` and `T: Drop` must be
-    /// rejected at allocation time (returns `AllocError`) so that a future
-    /// `Box::<[T]>::into_rc()` call cannot find itself with no drop entry
-    /// to retarget. Matches the non-DST slice-alloc paths which use the
-    /// same `entry_size != 0 && len > u16::MAX` guard.
-    #[test]
-    fn try_alloc_dst_box_rejects_drop_slice_with_overflowing_len() {
+    /// A slice DST with `len > u16::MAX` and `T: Drop` is accepted: a
+    /// `Box<[T]>` drops via `drop_in_place::<[T]>` on a full-width fat
+    /// pointer, so there is no `u16` element-count cap (matching the
+    /// `Arc<[T]>` family). Every element is constructed and, on drop,
+    /// every destructor runs.
+    #[test]
+    // Skipped under Miri: building + dropping ~65K elements exceeds
+    // Miri's test budget; native + cargo-careful runs cover it.
+    #[cfg_attr(miri, ignore)]
+    fn try_alloc_dst_box_accepts_drop_slice_with_overflowing_len() {
         struct DropCounter(std::sync::Arc<AtomicUsize>);
         impl Drop for DropCounter {
             fn drop(&mut self) {
@@ -482,20 +485,28 @@ mod dst_box {
 
         let arena = Arena::new();
         let n: usize = (u16::MAX as usize) + 1;
-        // Layout::array fits since u16::MAX+1 elements at small size are well under isize::MAX.
         let Ok(layout) = core::alloc::Layout::array::<DropCounter>(n) else {
             // Allocator wouldn't even build the layout; the test isn't meaningful.
             return;
         };
+        let counter = std::sync::Arc::new(AtomicUsize::new(0));
+        let c = std::sync::Arc::clone(&counter);
 
-        // SAFETY: init would write all `n` elements; we never reach that point
-        // because the allocation is rejected up front by the new guard.
-        let result = unsafe {
-            arena.try_alloc_dst_box::<[DropCounter]>(layout, n, |_fat: *mut [DropCounter]| {
-                unreachable!("alloc must be rejected before init runs");
+        // SAFETY: `layout` describes `[DropCounter; n]`; `init` writes a
+        // valid `DropCounter` into every slot before the `Box` is
+        // observed, so `drop_in_place::<[DropCounter]>` runs on live values.
+        let b = unsafe {
+            arena.try_alloc_dst_box::<[DropCounter]>(layout, n, |fat: *mut [DropCounter]| {
+                let base = fat.cast::<DropCounter>();
+                for i in 0..n {
+                    base.add(i).write(DropCounter(std::sync::Arc::clone(&c)));
+                }
             })
-        };
-        assert!(result.is_err(), "DST slice with len > u16::MAX and T: Drop must be rejected");
+        }
+        .expect("DST slice with len > u16::MAX and T: Drop is accepted");
+        assert_eq!(b.len(), n);
+        drop(b);
+        assert_eq!(counter.load(Ordering::Relaxed), n, "every element's destructor must run");
     }
 }
 
diff --git a/crates/multitude/tests/loom.rs b/crates/multitude/tests/loom.rs
index a695f7cf6..1983edeb8 100644
--- a/crates/multitude/tests/loom.rs
+++ b/crates/multitude/tests/loom.rs
@@ -233,10 +233,11 @@ mod loom_arc {
     }
 
     #[test]
-    fn two_workers_clone_and_drop_during_eviction() {
-        // Eviction race: owner evicts a Shared chunk via `reset` while two
-        // workers drop their Arcs. The reconcile must produce a refcount
-        // that reaches 0 exactly once.
+    fn two_workers_clone_and_drop_during_reset_and_arena_drop() {
+        // Two workers drop their Arcs while the owner resets (a no-op on the
+        // shared chunk) and then drops the arena. The shared chunk is torn
+        // down when its last reference releases; reconcile-on-drop must
+        // produce a refcount that reaches 0 exactly once.
         loom::model(|| {
             let baseline = drop_counter().load(StdOrdering::Relaxed);
 
@@ -259,9 +260,10 @@ mod loom_arc {
     }
 
     #[test]
-    fn worker_drop_racing_eviction_then_owner_drops_arena() {
-        // Variant of `deferred_reconciliation_race`: the arena is dropped
-        // after the eviction, so the worker's drop hits the
+    fn worker_drop_racing_reset_then_owner_drops_arena() {
+        // Variant of `deferred_reconciliation_race`: the owner resets (a
+        // no-op on the shared chunk) and then drops the arena, so the
+        // worker's drop may be the last reference and hit the
         // `outstanding_chunks` last-reclaimer path on the now-detached chunk.
         loom::model(|| {
             let baseline = drop_counter().load(StdOrdering::Relaxed);
@@ -282,10 +284,11 @@ mod loom_arc {
     }
 
     #[test]
-    fn arena_drop_with_active_workers_and_chunk_cache_reuse() {
-        // Owner allocates an Arc, resets (chunk cached), allocates again
-        // (cache pop revives), all while a worker drops the first Arc.
-        // Stresses the cache-revive path against in-flight worker drops.
+    fn second_alloc_after_reset_reuses_installed_chunk_with_active_worker() {
+        // Owner allocates an Arc, resets (the shared chunk stays installed),
+        // then allocates a second Arc on that same chunk, all while a worker
+        // drops the first Arc. Stresses an allocation onto a live chunk
+        // against an in-flight worker drop of an earlier handle.
         loom::model(|| {
             let baseline = drop_counter().load(StdOrdering::Relaxed);
 
@@ -369,10 +372,9 @@ mod loom_arc {
 
     #[test]
     fn arena_reset_concurrent_with_clone_and_drop() {
-        // Owner calls `arena.reset()` (NOT drop) while two workers race
-        // on Arc clone/drop. `reset` evicts in-place rather than tearing
-        // down `ArenaInner`, so the orderings exercised differ from the
-        // arena-drop case.
+        // Owner calls `arena.reset()` (NOT drop) while a worker drops an Arc
+        // clone. `reset` leaves the shared chunk untouched, so the chunk is
+        // torn down later at arena drop; Drop must still run exactly once.
         loom::model(|| {
             let baseline = drop_counter().load(StdOrdering::Relaxed);
 
@@ -393,13 +395,11 @@ mod loom_arc {
     }
 
     #[test]
-    fn cache_pop_concurrent_with_prior_generation_worker_drop() {
-        // Owner allocates an Arc on chunk-gen-1, resets (chunk cached),
-        // then allocates a new Arc — which pops the cached chunk and
-        // re-initializes it (gen-2). Concurrently, a worker holding the
-        // gen-1 Arc drops it, hitting the now-revived chunk's refcount.
-        // Tests that cache-revive races a teardown decrement on the
-        // prior generation safely.
+    fn second_alloc_after_reset_shares_chunk_with_prior_generation_worker_drop() {
+        // Owner allocates an Arc, resets (the shared chunk stays installed),
+        // then allocates a second Arc on the same chunk. Concurrently, a
+        // worker holding the first Arc drops it, hitting that chunk's
+        // refcount. Both payloads must drop exactly once.
         loom::model(|| {
             let baseline = drop_counter().load(StdOrdering::Relaxed);
 
@@ -456,10 +456,11 @@ mod loom_arc {
         // re-store its `next` pointer.
         loom::model(|| {
             let arena = fresh_arena();
-            // Each `Arc<[u32; 256]>` takes 1 KiB + drop entry; with
-            // `max_normal_alloc = 4 KiB` chunks, two of these allocate in
-            // separate chunks via refill, so dropping each on a different
-            // worker forces two independent `push_shared_cache` paths.
+            // Each `Arc<[u32; 256]>` takes ~1 KiB + the per-`Arc` strong
+            // prefix; with `max_normal_alloc = 4 KiB` chunks, two of these
+            // allocate in separate chunks via refill, so dropping each on a
+            // different worker forces two independent `push_shared_cache`
+            // paths.
             let a: Arc<[u32; 256]> = arena.alloc_arc([0_u32; 256]);
             let b: Arc<[u32; 256]> = arena.alloc_arc([0_u32; 256]);
 
@@ -488,9 +489,10 @@ mod loom_arc {
         // installed node's `next` field after the push that installed it.
         loom::model(|| {
             let arena = fresh_arena();
-            // Each `Arc<[u32; 256]>` takes 1 KiB + drop entry; with
-            // `max_normal_alloc = 4 KiB` chunks, these allocations refill
-            // into separate chunks so each drop/pop exercises cache traffic.
+            // Each `Arc<[u32; 256]>` takes ~1 KiB + the per-`Arc` strong
+            // prefix; with `max_normal_alloc = 4 KiB` chunks, these
+            // allocations refill into separate chunks so each drop/pop
+            // exercises cache traffic.
             let cached: Arc<[u32; 256]> = arena.alloc_arc([0_u32; 256]);
             let racing: Arc<[u32; 256]> = arena.alloc_arc([0_u32; 256]);
 
diff --git a/crates/multitude/tests/mutant_kills_post_fix.rs b/crates/multitude/tests/mutant_kills_post_fix.rs
index 95c24e27e..d39872407 100644
--- a/crates/multitude/tests/mutant_kills_post_fix.rs
+++ b/crates/multitude/tests/mutant_kills_post_fix.rs
@@ -6,15 +6,15 @@
 
 use multitude::Arena;
 
-// is_oversized_shared: threshold == max_normal_alloc routes via normal path
+// is_oversized: threshold == max_normal_alloc routes via normal path
 #[test]
-fn is_oversized_shared_routes_at_threshold_via_normal() {
+fn is_oversized_routes_shared_at_threshold_via_normal() {
     const MNA: usize = 4 * 1024;
     let arena = Arena::builder().max_normal_alloc(MNA).build();
     let before_normal = arena.stats().normal_shared_chunks_allocated;
     let before_oversized = arena.stats().oversized_shared_chunks_allocated;
-    // wcp = MNA (size MNA-1 + align 1).
-    let _arc = arena.alloc_arc([0_u8; MNA - 1]);
+    // wcp = MNA exactly: strong prefix (4) + value (MNA-8) + arc block align (4).
+    let _arc = arena.alloc_arc([0_u8; MNA - 8]);
     let after_normal = arena.stats().normal_shared_chunks_allocated;
     let after_oversized = arena.stats().oversized_shared_chunks_allocated;
     assert!(after_normal > before_normal);
@@ -25,11 +25,12 @@ fn is_oversized_shared_routes_at_threshold_via_normal() {
 }
 
 #[test]
-fn is_oversized_shared_routes_above_threshold_via_oversized() {
+fn is_oversized_routes_shared_above_threshold_via_oversized() {
     const MNA: usize = 4 * 1024;
     let arena = Arena::builder().max_normal_alloc(MNA).build();
     let before_oversized = arena.stats().oversized_shared_chunks_allocated;
-    let _arc = arena.alloc_arc([0_u8; MNA]); // wcp = MNA + 1
+    // wcp = MNA + 1: strong prefix (4) + value (MNA-7) + arc block align (4).
+    let _arc = arena.alloc_arc([0_u8; MNA - 7]);
     let after_oversized = arena.stats().oversized_shared_chunks_allocated;
     assert!(
         after_oversized > before_oversized,
@@ -38,7 +39,7 @@ fn is_oversized_shared_routes_above_threshold_via_oversized() {
 }
 
 #[test]
-fn is_oversized_local_routes_at_threshold_via_normal() {
+fn is_oversized_routes_local_at_threshold_via_normal() {
     const MNA: usize = 4 * 1024;
     let arena = Arena::builder().max_normal_alloc(MNA).build();
     let before_normal = arena.stats().normal_local_chunks_allocated;
@@ -52,7 +53,7 @@ fn is_oversized_local_routes_at_threshold_via_normal() {
 }
 
 #[test]
-fn is_oversized_local_routes_above_threshold_via_oversized() {
+fn is_oversized_routes_local_above_threshold_via_oversized() {
     const MNA: usize = 4 * 1024;
     let arena = Arena::builder().max_normal_alloc(MNA).build();
     let before_oversized = arena.stats().oversized_local_chunks_allocated;
diff --git a/crates/multitude/tests/mutants_extras.rs b/crates/multitude/tests/mutants_extras.rs
index 722cfa422..76e8754cd 100644
--- a/crates/multitude/tests/mutants_extras.rs
+++ b/crates/multitude/tests/mutants_extras.rs
@@ -2082,25 +2082,32 @@ mod mutants_for_audit {
     // tests above that refill across many chunk classes.)
 
     // ============================================================================
-    // arena.rs:3036 / 3608 — `if entry_size != 0 && len > u16::MAX as usize`
-    // `> with ==` mutant: only panics when len exactly equals u16::MAX.
-    // `> with >=` mutant: panics at len == u16::MAX (one short of original).
-    // Kill: a Drop-aware slice of len == u16::MAX must succeed (original)
-    // and must panic for len > u16::MAX.
+    // Per-`Arc` reference counting removes the `u16` element-count cap on
+    // `Arc<[T]>` slices: a Drop-typed slice with `len > u16::MAX` now
+    // allocates (via the oversized path) and drops each element through
+    // `drop_in_place::<[T]>` in `Arc::drop`.
     // ============================================================================
 
+    #[cfg(not(miri))]
     #[test]
-    fn alloc_slice_shared_drop_aware_above_u16_max_returns_err() {
+    fn alloc_slice_shared_drop_aware_above_u16_max_succeeds() {
         use std::sync::Arc as StdArc;
-        use std::sync::atomic::AtomicU32;
-        struct D(#[allow(dead_code)] StdArc<AtomicU32>);
+        use std::sync::atomic::{AtomicU32, Ordering};
+        struct D(StdArc<AtomicU32>);
         impl Drop for D {
-            fn drop(&mut self) {}
+            fn drop(&mut self) {
+                self.0.fetch_add(1, Ordering::Relaxed);
+            }
         }
         let drops = StdArc::new(AtomicU32::new(0));
         let arena = Arena::builder().max_normal_alloc(60 * 1024).build();
-        let result = arena.try_alloc_slice_fill_with_arc(65_536, |_| D(drops.clone()));
-        assert!(result.is_err());
+        let n = 65_536_usize;
+        let arc = arena
+            .try_alloc_slice_fill_with_arc(n, |_| D(drops.clone()))
+            .expect("Arc slices have no u16 element-count cap");
+        assert_eq!(arc.len(), n);
+        drop(arc);
+        assert_eq!(drops.load(Ordering::Relaxed), n as u32);
     }
 
     // ============================================================================
diff --git a/crates/multitude/tests/utf16.rs b/crates/multitude/tests/utf16.rs
index 7409cab00..54dbc0e70 100644
--- a/crates/multitude/tests/utf16.rs
+++ b/crates/multitude/tests/utf16.rs
@@ -3011,7 +3011,7 @@ mod from_coverage_extras_utf16 {
 
     #[test]
     fn alloc_utf16_str_arc_from_str_oversized_routes_via_oversized_shared() {
-        let len = 16 * 1024;
+        let len = 4096;
         let src = "a".repeat(len);
 
         // First exercise the default arena so any default-config code paths
@@ -3020,11 +3020,13 @@ mod from_coverage_extras_utf16 {
         let arc = arena.alloc_utf16_str_arc_from_str(&src);
         assert_eq!(arc.len(), len);
 
-        // Then force a small `max_normal_alloc` (in bytes) so the ~32 KiB
-        // UTF-16 payload transcoded from a 16 KiB ASCII string (2 bytes per
-        // code unit, plus the length prefix) deterministically takes the
-        // oversized-shared branch regardless of any future change to the
-        // default threshold.
+        // Then force a small `max_normal_alloc` (in bytes) so the 8 KiB
+        // UTF-16 payload transcoded from a 4096-char ASCII string (2 bytes
+        // per code unit, plus the length prefix) deterministically takes
+        // the oversized-shared branch regardless of any future change to
+        // the default threshold. (A shorter string than before keeps the
+        // one-shot transcode affordable under Miri while still clearing the
+        // 4 KiB threshold.)
         let arena = Arena::builder().max_normal_alloc(4096).build();
         let arc = arena.alloc_utf16_str_arc_from_str(&src);
         assert_eq!(arc.len(), len);
@@ -3034,7 +3036,7 @@ mod from_coverage_extras_utf16 {
 
     #[test]
     fn alloc_utf16_str_box_from_str_oversized_routes_via_oversized_shared() {
-        let len = 16 * 1024;
+        let len = 4096;
         let src = "a".repeat(len);
 
         // First exercise the default arena so any default-config code paths
@@ -3043,11 +3045,13 @@ mod from_coverage_extras_utf16 {
         let b = arena.alloc_utf16_str_box_from_str(&src);
         assert_eq!(b.len(), len);
 
-        // Then force a small `max_normal_alloc` (in bytes) so the ~32 KiB
-        // UTF-16 payload transcoded from a 16 KiB ASCII string (2 bytes per
-        // code unit, plus the length prefix) deterministically takes the
-        // oversized-shared branch regardless of any future change to the
-        // default threshold.
+        // Then force a small `max_normal_alloc` (in bytes) so the 8 KiB
+        // UTF-16 payload transcoded from a 4096-char ASCII string (2 bytes
+        // per code unit, plus the length prefix) deterministically takes
+        // the oversized-shared branch regardless of any future change to
+        // the default threshold. (A shorter string than before keeps the
+        // one-shot transcode affordable under Miri while still clearing the
+        // 4 KiB threshold.)
         let arena = Arena::builder().max_normal_alloc(4096).build();
         let b = arena.alloc_utf16_str_box_from_str(&src);
         assert_eq!(b.len(), len);
@@ -3214,4 +3218,52 @@ mod from_mutants_extras_utf16_scattered {
         let actual: std::string::String = std::char::decode_utf16(s.as_slice().iter().copied()).map(|r| r.unwrap()).collect();
         assert_eq!(actual, "Hello, Rust!");
     }
+
+    /// Regression guard for the prefixed shared-allocation routing
+    /// (`impl_alloc_prefixed_shared_arc`): an odd-length `u8` (`Arc<str>`)
+    /// allocation leaves the shared bump cursor odd, then a `u16`
+    /// (`ArcUtf16Str`) allocation reserves a block aligned to 4 bytes (so
+    /// the per-`Arc` `AtomicU32` strong prefix is aligned, via
+    /// `arc_block_align(u16) = max(2, 4)`). The routing sizes the refill /
+    /// oversized hint with `worst_case_arc_slice_payload` (strong prefix +
+    /// length prefix + payload + front alignment slack), so sweeping `u16`
+    /// lengths across the `max_normal_alloc` boundary must always terminate
+    /// (an under-sized hint would spin the refill loop) and produce correct
+    /// contents.
+    #[test]
+    fn prefixed_shared_alloc_boundary_terminates_for_mixed_u8_u16() {
+        // `max_normal_alloc` must be >= MIN_MAX_NORMAL_ALLOC (4096), so the
+        // u16 normal/oversized boundary sits at `chars = mna / 2`. Sweep a
+        // few char lengths right around that boundary for an even and an
+        // odd `mna` (the parity drives the alignment edge case) plus one
+        // larger boundary position. Verifying length + a handful of
+        // sentinel code units (rather than decoding every unit) keeps the
+        // per-iteration cost down to the unavoidable one-shot transcode,
+        // which is what makes this affordable under Miri.
+        for &mna in &[4096_usize, 4097, 6144] {
+            let arena = Arena::builder().max_normal_alloc(mna).build();
+            let center = mna / 2;
+            for chars in center.saturating_sub(1)..=(center + 1).min(mna) {
+                // Odd-length u8 (str) alloc to misalign the shared cursor.
+                let narrow = "x".repeat(2 * (chars % 50) + 1);
+                let narrow_arc = arena.alloc_str_arc(&narrow);
+                assert_eq!(&*narrow_arc, narrow.as_str(), "str payload corrupted at mna={mna}, chars={chars}");
+                // u16 (utf16) alloc right after at a boundary-spanning length.
+                let wide = "y".repeat(chars);
+                let wide_arc = arena.alloc_utf16_str_arc_from_str(&wide);
+                // Sentinel checks instead of a full decode: the payload is
+                // uniform ('y'), so a routing bug that returns the wrong
+                // length or corrupts an edge/middle unit is still caught,
+                // without an O(chars) decode loop per iteration.
+                assert_eq!(wide_arc.len(), chars, "utf16 length wrong at mna={mna}, chars={chars}");
+                if chars > 0 {
+                    let units = wide_arc.as_slice();
+                    let yy = u16::from(b'y');
+                    assert_eq!(units[0], yy, "utf16 head corrupted at mna={mna}, chars={chars}");
+                    assert_eq!(units[chars / 2], yy, "utf16 mid corrupted at mna={mna}, chars={chars}");
+                    assert_eq!(units[chars - 1], yy, "utf16 tail corrupted at mna={mna}, chars={chars}");
+                }
+            }
+        }
+    }
 }
diff --git a/crates/multitude/tests/zst_uninit_arc_fix.rs b/crates/multitude/tests/zst_uninit_arc_fix.rs
index 5dd1a1d57..d9543393e 100644
--- a/crates/multitude/tests/zst_uninit_arc_fix.rs
+++ b/crates/multitude/tests/zst_uninit_arc_fix.rs
@@ -123,7 +123,7 @@ fn zst_alloc_arc_never_returns_one_past_chunk_end() {
 }
 
 /// Regression from post-fix audit: `impl_alloc_dst_box` used to check
-/// `is_oversized_shared(total)` but refill with `total + align`. At
+/// `is_oversized(total)` but refill with `total + align`. At
 /// `total == max_normal_alloc` but `total + align > max_normal_alloc`,
 /// the in-arena fast path failed, the oversized branch was skipped,
 /// and `refill_shared(refill_hint)` hit the new `debug_assert!` in