From 8f941b84751b1f3743be7ea915ab269378063e2d Mon Sep 17 00:00:00 2001 From: Chris O'Neil Date: Mon, 8 Jun 2026 13:50:16 +0100 Subject: [PATCH 01/18] chore(release): cut rc-2026.6.2 --- Cargo.lock | 17 +++++++---------- Cargo.toml | 6 +++--- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 45e89fe..4cc5df5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -809,7 +809,7 @@ dependencies = [ [[package]] name = "ant-node" -version = "0.12.0" +version = "0.12.1-rc.1" dependencies = [ "alloy", "ant-protocol", @@ -861,9 +861,8 @@ dependencies = [ [[package]] name = "ant-protocol" -version = "2.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e950d12c9f6d08d0ea560573729d93f15e105d53b669defa682f5e6f92da4b1" +version = "2.1.3-rc.1" +source = "git+https://github.com/WithAutonomi/ant-protocol?branch=rc-2026.6.2#053be60ced998c6a4c0833b4560f1ddb89a86514" dependencies = [ "blake3", "bytes", @@ -4866,9 +4865,8 @@ dependencies = [ [[package]] name = "saorsa-core" -version = "0.24.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c0f8952fc5a4d37eb0bca7de0740830f40347f9da663effde3ddd6b68bcd2fb" +version = "0.25.0-rc.1" +source = "git+https://github.com/saorsa-labs/saorsa-core?branch=rc-2026.6.2#088c3552bf7257ee10b5a670cd4990d1c0e7b8af" dependencies = [ "anyhow", "async-trait", @@ -4981,9 +4979,8 @@ dependencies = [ [[package]] name = "saorsa-transport" -version = "0.34.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "852400712537856ab6fec5293be4290daf0130df0dbcb249a6e8280f9257665f" +version = "0.35.0-rc.1" +source = "git+https://github.com/saorsa-labs/saorsa-transport?branch=rc-2026.6.2#48b0874adaefcf26650634a5f93a9618715518ca" dependencies = [ "anyhow", "async-trait", diff --git a/Cargo.toml b/Cargo.toml index 1175dc0..26810de 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "ant-node" -version = "0.12.0" +version = "0.12.1-rc.1" edition = "2021" authors = ["David Irvine "] description = "Pure quantum-proof network node for the Autonomi decentralized network" @@ -39,10 +39,10 @@ mimalloc = "0.1" # Until then, the git pin tracks the matching saorsa-core lineage # (the rc-2026.4.2 branch) so Cargo can unify the wire types here # with ant-protocol's re-exports. -ant-protocol = "2.1.2" +ant-protocol = { git = "https://github.com/WithAutonomi/ant-protocol", branch = "rc-2026.6.2" } # Core (provides EVERYTHING: networking, DHT, security, trust, storage) -saorsa-core = "0.24.5" +saorsa-core = { git = "https://github.com/saorsa-labs/saorsa-core", branch = "rc-2026.6.2" } saorsa-pqc = "0.5" # Payment verification - autonomi network lookup + EVM payment From 8bc7f84a014c75a617d447ea243a570b6cc424ef Mon Sep 17 00:00:00 2001 From: Chris O'Neil Date: Mon, 8 Jun 2026 21:37:05 +0100 Subject: [PATCH 02/18] fix(storage): disk-space pre-check before payment verification in PUT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A disk-full node previously ran full payment verification — ML-DSA pool checks, a Kademlia closeness lookup, and an on-chain Arbitrum RPC — on every PUT before discovering at storage time that it had no space, only to reject the chunk. At fleet scale this wasted RPC/network load and delayed the client's fallback to another peer (V2-411). Add a cheap disk-space pre-check that runs immediately after the already-exists check and before payment verification: - Expose `LmdbStorage::check_capacity()` wrapping the existing cached `check_disk_space_cached()` (passing results only, so freed space is still detected promptly; effectively free per-PUT). - In `handle_put_inner`, call it before `verify_payment`; on insufficient space, emit an info-level reject log (target `ant_node::storage::disk_precheck`) and return the existing `ProtocolError::StorageFailed` early, preserving client behaviour. - Keep the store-path check as defence-in-depth (pre-check↔store race, replication writes). The reject log is greppable for testnet verification. Backwards compatible: no wire/response-variant change. A dedicated OutOfCapacity/TryElsewhere response is left to V2-469. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/storage/handler.rs | 85 ++++++++++++++++++++++++++++++++++++++++-- src/storage/lmdb.rs | 17 +++++++++ 2 files changed, 99 insertions(+), 3 deletions(-) diff --git a/src/storage/handler.rs b/src/storage/handler.rs index 25c1178..64846e4 100644 --- a/src/storage/handler.rs +++ b/src/storage/handler.rs @@ -243,7 +243,24 @@ impl AntProtocol { Ok(false) => {} } - // 4. Verify payment. This node is the storer being paid right now, so + // 4. Cheap disk-space pre-check — runs BEFORE the expensive payment + // verification path (ML-DSA pool checks, a Kademlia closeness + // lookup, and an on-chain Arbitrum RPC). A disk-full node can never + // satisfy this PUT, so reject it here rather than burning that work + // only to fail the reserve check inside `storage.put` (V2-411). The + // check is cached (passing results only), so this is effectively + // free per-PUT and still detects freed space promptly. The store + // path keeps its own check as defence-in-depth. + if let Err(e) = self.storage.check_capacity() { + info!( + target: "ant_node::storage::disk_precheck", + addr = %addr_hex, + "Rejecting PUT before payment verification: {e}" + ); + return ChunkPutResponse::Error(ProtocolError::StorageFailed(e.to_string())); + } + + // 5. Verify payment. This node is the storer being paid right now, so // the full ClientPut check set applies (own-quote price freshness, // local recipient, merkle candidate closeness). let payment_result = self @@ -269,7 +286,7 @@ impl AntProtocol { } } - // 5. Store chunk + // 6. Store chunk match self.storage.put(&address, &request.content).await { Ok(_) => { let content_len = request.content.len(); @@ -281,7 +298,7 @@ impl AntProtocol { // fallback path stays roughly accurate. self.quote_generator.record_store(); - // 6. Notify replication engine for fresh fan-out. + // 7. Notify replication engine for fresh fan-out. // Only emit when a real proof is present — cached-as-verified // PUTs have no proof to forward, and the chunk would have // already replicated on the original write that carried one. @@ -528,10 +545,22 @@ mod tests { use tempfile::TempDir; async fn create_test_protocol() -> (AntProtocol, TempDir) { + // `test_default()` sets `disk_reserve: 0`, so the disk pre-check always + // passes for the regular tests. + create_test_protocol_with_reserve(0).await + } + + /// Build a test protocol whose storage enforces the given disk reserve. + /// + /// A very large reserve (e.g. `u64::MAX`) makes `available < reserve` + /// always true, so the disk-space pre-check in `handle_put_inner` fails — + /// used to exercise the V2-411 early-return path. + async fn create_test_protocol_with_reserve(disk_reserve: u64) -> (AntProtocol, TempDir) { let temp_dir = TempDir::new().expect("create temp dir"); let storage_config = LmdbStorageConfig { root_dir: temp_dir.path().to_path_buf(), + disk_reserve, ..LmdbStorageConfig::test_default() }; let storage = Arc::new( @@ -727,6 +756,56 @@ mod tests { } } + /// V2-411: a disk-full node must reject a PUT with the disk-space error + /// *before* running payment verification. + /// + /// The chunk is intentionally **not** cache-inserted, so if the handler + /// reached `verify_payment` it would return `PaymentRequired`/`PaymentFailed` + /// (an uncached chunk with no proof). Observing the `StorageFailed` disk + /// error instead proves the disk pre-check short-circuited ahead of + /// verification — there is no on-chain path to reach. + #[tokio::test] + async fn test_put_rejected_on_insufficient_disk_before_verification() { + // u64::MAX reserve guarantees `available < reserve`, so the cached + // disk-space check always fails. + let (protocol, _temp) = create_test_protocol_with_reserve(u64::MAX).await; + + let content = b"chunk for a disk-full node"; + let address = LmdbStorage::compute_address(content); + + let put_request = ChunkPutRequest::new(address, Bytes::copy_from_slice(content)); + let put_msg = ChunkMessage { + request_id: 41, + body: ChunkMessageBody::PutRequest(put_request), + }; + let put_bytes = put_msg.encode().expect("encode put"); + + let response_bytes = protocol + .try_handle_request(&put_bytes) + .await + .expect("handle put") + .expect("expected response"); + let response = ChunkMessage::decode(&response_bytes).expect("decode response"); + + assert_eq!(response.request_id, 41); + match response.body { + ChunkMessageBody::PutResponse(ChunkPutResponse::Error( + ProtocolError::StorageFailed(msg), + )) => { + assert!( + msg.contains("Insufficient disk space"), + "expected disk-space error, got: {msg}" + ); + } + other => { + panic!("expected StorageFailed disk error before verification, got: {other:?}") + } + } + + // And nothing was stored. + assert!(!protocol.exists(&address).expect("exists check")); + } + #[tokio::test] async fn test_put_already_exists() { let (protocol, _temp) = create_test_protocol().await; diff --git a/src/storage/lmdb.rs b/src/storage/lmdb.rs index 5e2c31d..ee15041 100644 --- a/src/storage/lmdb.rs +++ b/src/storage/lmdb.rs @@ -583,6 +583,23 @@ impl LmdbStorage { value } + /// Cheap capacity pre-check for callers that want to reject work *before* + /// doing expensive setup (e.g. the PUT handler skipping payment + /// verification on a disk-full node — see `V2-411`). + /// + /// Delegates to [`Self::check_disk_space_cached`], so it shares the same + /// TTL cache and only ever performs an `fs2::available_space` syscall on a + /// cache miss. Returns the same `Insufficient disk space …` error the + /// store path raises, keeping caller behaviour identical. + /// + /// # Errors + /// + /// Returns [`Error::Storage`] when available space is below the configured + /// reserve, or when the disk-space query itself fails. + pub fn check_capacity(&self) -> Result<()> { + self.check_disk_space_cached() + } + /// Check available disk space, skipping the syscall if a recent check passed. /// /// Only caches *passing* results — a low-space condition is always From 0dc5926d8946ba6da2b41415063ab5e615a18440 Mon Sep 17 00:00:00 2001 From: Chris O'Neil Date: Mon, 8 Jun 2026 22:38:14 +0100 Subject: [PATCH 03/18] fix(replication): disk-space pre-check before payment verification on fresh offers DEV-02 testnet verification of the PUT-handler pre-check (the previous commit) showed full nodes still doing wasted payment verification on the replication push path: ~491 "EVM payment verified" log lines on the two full VMs, each followed by "Failed to store fetched record ...: Insufficient disk space". That work runs in handle_fresh_offer, which is not covered by the handle_put_inner reordering. Mirror V2-411 into the fresh-offer path: call LmdbStorage::check_capacity() immediately after the responsibility check and before verify_payment. On insufficient space, emit the same info-level reject log (target ant_node::storage::disk_precheck, with a replication-specific message) and return a Rejected response, instead of verifying a payment for a record the node can never store. The store-path check inside put() remains as defence-in-depth. The pull/fetch path (FetchResponse store) does no payment verification, so it is left unchanged. No behavioural change for nodes with free space. Verification: re-run the same small testnet and confirm the full hosts' "EVM payment verified" volume drops while disk_precheck reject logs cover the replication path. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/replication/mod.rs | 26 ++++++++++++++++++++++++++ src/storage/lmdb.rs | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/replication/mod.rs b/src/replication/mod.rs index 81ea8ec..5be29f1 100644 --- a/src/replication/mod.rs +++ b/src/replication/mod.rs @@ -1151,6 +1151,32 @@ async fn handle_fresh_offer( return Ok(()); } + // Disk-space pre-check — mirror the PUT handler (V2-411). A full node can + // never store this record, so reject it before the expensive payment + // verification (EVM on-chain query / merkle pool work) rather than verifying + // and only then failing at `storage.put` below. Reuses the cached capacity + // check (passing results only, so freed space is detected promptly), and the + // store path keeps its own check as defence-in-depth. + if let Err(e) = storage.check_capacity() { + info!( + target: "ant_node::storage::disk_precheck", + key = %hex::encode(offer.key), + "Rejecting fresh replication offer before payment verification: {e}" + ); + send_replication_response( + source, + p2p_node, + request_id, + ReplicationMessageBody::FreshReplicationResponse(FreshReplicationResponse::Rejected { + key: offer.key, + reason: format!("Storage error: {e}"), + }), + rr_message_id, + ) + .await; + return Ok(()); + } + // Gap 1: Validate PoP via PaymentVerifier. This is an already-settled // receipt handed over by a neighbour, not a live sale — Replication // context skips the storer-being-paid-now checks (own-quote price diff --git a/src/storage/lmdb.rs b/src/storage/lmdb.rs index ee15041..99f1e78 100644 --- a/src/storage/lmdb.rs +++ b/src/storage/lmdb.rs @@ -587,7 +587,7 @@ impl LmdbStorage { /// doing expensive setup (e.g. the PUT handler skipping payment /// verification on a disk-full node — see `V2-411`). /// - /// Delegates to [`Self::check_disk_space_cached`], so it shares the same + /// Delegates to the private `check_disk_space_cached`, so it shares the same /// TTL cache and only ever performs an `fs2::available_space` syscall on a /// cache miss. Returns the same `Insufficient disk space …` error the /// store path raises, keeping caller behaviour identical. From f4e4ec572d24ca13eaf74ad13e7e2d5f4ceb4c92 Mon Sep 17 00:00:00 2001 From: Chris O'Neil Date: Mon, 8 Jun 2026 23:32:48 +0100 Subject: [PATCH 04/18] fix: address Copilot review comments on disk-space pre-check - Narrow `LmdbStorage::check_capacity` from `pub` to `pub(crate)`: it is only used in-crate (PUT handler + replication), so it should not widen the lib API. - Drop the redundant "Storage error: " prefix from the replication rejection reason: `Error::Storage` already renders as "storage error: ...", so `format!("Storage error: {e}")` produced "Storage error: storage error: ...". Use the error's own message instead, here and on the adjacent pre-existing store-failure branch for consistency. - Reword the PUT pre-check comment: the cached check is free per-PUT on a healthy node, but a disk-full node re-runs a cheap `available_space` syscall each PUT (still negligible next to the verification it avoids). No behavioural change. cargo fmt/clippy/doc clean; replication 233/233, storage 30/30 tests pass. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/replication/mod.rs | 4 ++-- src/storage/handler.rs | 8 +++++--- src/storage/lmdb.rs | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/replication/mod.rs b/src/replication/mod.rs index 5be29f1..0e0995c 100644 --- a/src/replication/mod.rs +++ b/src/replication/mod.rs @@ -1169,7 +1169,7 @@ async fn handle_fresh_offer( request_id, ReplicationMessageBody::FreshReplicationResponse(FreshReplicationResponse::Rejected { key: offer.key, - reason: format!("Storage error: {e}"), + reason: e.to_string(), }), rr_message_id, ) @@ -1262,7 +1262,7 @@ async fn handle_fresh_offer( ReplicationMessageBody::FreshReplicationResponse( FreshReplicationResponse::Rejected { key: offer.key, - reason: format!("Storage error: {e}"), + reason: e.to_string(), }, ), rr_message_id, diff --git a/src/storage/handler.rs b/src/storage/handler.rs index 64846e4..26d20d7 100644 --- a/src/storage/handler.rs +++ b/src/storage/handler.rs @@ -248,9 +248,11 @@ impl AntProtocol { // lookup, and an on-chain Arbitrum RPC). A disk-full node can never // satisfy this PUT, so reject it here rather than burning that work // only to fail the reserve check inside `storage.put` (V2-411). The - // check is cached (passing results only), so this is effectively - // free per-PUT and still detects freed space promptly. The store - // path keeps its own check as defence-in-depth. + // check caches passing results, so it is free per-PUT on a healthy + // node; a disk-full node re-runs a cheap `available_space` syscall + // each PUT (still negligible next to the verification it avoids) and + // so detects freed space promptly. The store path keeps its own + // check as defence-in-depth. if let Err(e) = self.storage.check_capacity() { info!( target: "ant_node::storage::disk_precheck", diff --git a/src/storage/lmdb.rs b/src/storage/lmdb.rs index 99f1e78..acda5ea 100644 --- a/src/storage/lmdb.rs +++ b/src/storage/lmdb.rs @@ -596,7 +596,7 @@ impl LmdbStorage { /// /// Returns [`Error::Storage`] when available space is below the configured /// reserve, or when the disk-space query itself fails. - pub fn check_capacity(&self) -> Result<()> { + pub(crate) fn check_capacity(&self) -> Result<()> { self.check_disk_space_cached() } From fb5eb31bbfe5a128228fadf4390a2cceb7316c10 Mon Sep 17 00:00:00 2001 From: Warm Beer Date: Thu, 28 May 2026 15:19:40 +0200 Subject: [PATCH 05/18] feat!: depend on bootstrap-cache removal chain BREAKING CHANGE: removes ant-node bootstrap-cache config, CLI flags, and manager integration while updating ant-node to the ant-protocol and saorsa-core bootstrap-cache removal branches. --- Cargo.lock | 90 ++++++++++++++++++++--------------- docs/DESIGN.md | 2 - src/bin/ant-node/cli.rs | 24 +--------- src/config.rs | 103 ++++++++++------------------------------ src/lib.rs | 2 +- src/node.rs | 56 ---------------------- 6 files changed, 82 insertions(+), 195 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4cc5df5..6e2129c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1315,9 +1315,9 @@ dependencies = [ [[package]] name = "bitflags" -version = "2.12.1" +version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84d7ced0ae9557296835c32bf1b1e02b44c746701f898460fb000d7eaa84f00a" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" dependencies = [ "serde_core", ] @@ -1489,9 +1489,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.63" +version = "1.2.62" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "556e016178bb5662a08681bbe0f00f8e17631781a4dfc8c45e466e4b185ec27f" +checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98" dependencies = [ "find-msvc-tools", "jobserver", @@ -2960,9 +2960,9 @@ dependencies = [ [[package]] name = "hyper" -version = "1.10.1" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55281c53a1894c864990125767da440a4e630446785086f52523b20033b74498" +checksum = "eb92f162bf56536459fc83c79b974bb12837acfed43d6bc370a7916d0ae15ecc" dependencies = [ "atomic-waker", "bytes", @@ -3011,7 +3011,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.4", + "socket2 0.6.3", "tokio", "tower-service", "tracing", @@ -3158,9 +3158,9 @@ dependencies = [ [[package]] name = "igd-next" -version = "0.17.1" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de7238d487a9aff61f81b5ab41c0a841532a115a398b5fa92a2fadd0885e2581" +checksum = "bac9a3c8278f43b4cd8463380f4a25653ac843e5b177e1d3eaf849cc9ba10d4d" dependencies = [ "attohttpc", "bytes", @@ -3519,9 +3519,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.31" +version = "0.4.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "113b30b4cd05f7c06868fdb2854f66a7b9fece9a48425351cd532e810d74024f" +checksum = "616ec5685824bcc94416c6d4a7a446eea774a31efd7062c8480ba6fd06d7a6e5" [[package]] name = "lru" @@ -3621,9 +3621,9 @@ dependencies = [ [[package]] name = "mio" -version = "1.2.1" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02bd0af71c67b473010cbbc60715ee815645a4dc942899111f494b4b737d6fda" +checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" dependencies = [ "libc", "wasi", @@ -4228,7 +4228,7 @@ dependencies = [ "quinn-udp 0.5.14", "rustc-hash", "rustls", - "socket2 0.6.4", + "socket2 0.6.3", "thiserror 2.0.18", "tokio", "tracing", @@ -4266,7 +4266,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.4", + "socket2 0.6.3", "tracing", "windows-sys 0.60.2", ] @@ -4279,7 +4279,7 @@ checksum = "76150b617afc75e6e21ac5f39bc196e80b65415ae48d62dbef8e2519d040ce42" dependencies = [ "cfg_aliases", "libc", - "socket2 0.6.4", + "socket2 0.6.3", "tracing", "windows-sys 0.61.2", ] @@ -4721,9 +4721,9 @@ dependencies = [ [[package]] name = "rustls-native-certs" -version = "0.8.4" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dab5152771c58876a2146916e53e35057e1a4dfa2b9df0f0305b07f611fdea4d" +checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" dependencies = [ "openssl-probe", "rustls-pki-types", @@ -5036,6 +5036,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "scc" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46e6f046b7fef48e2660c57ed794263155d713de679057f2d0c169bfc6e756cc" +dependencies = [ + "sdd", +] + [[package]] name = "schannel" version = "0.1.29" @@ -5075,6 +5084,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sdd" +version = "3.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490dcfcbfef26be6800d11870ff2df8774fa6e86d047e3e8c8a76b25655e41ca" + [[package]] name = "sec1" version = "0.7.3" @@ -5307,23 +5322,24 @@ dependencies = [ [[package]] name = "serial_test" -version = "3.5.0" +version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "699f4197115b8a7e7ff19c9a315a4bd6fffec26cc4626ef45ecaea389e081c6d" +checksum = "911bd979bf1070a3f3aa7b691a3b3e9968f339ceeec89e08c280a8a22207a32f" dependencies = [ "futures-executor", "futures-util", "log", "once_cell", "parking_lot", + "scc", "serial_test_derive", ] [[package]] name = "serial_test_derive" -version = "3.5.0" +version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94e153fc76e1c6a068703d6d29c508a0b15c061c4b7e43da59cc097bc342673c" +checksum = "0a7d91949b85b0d2fb687445e448b40d322b6b3e4af6b44a29b21d9a5f33e6d9" dependencies = [ "proc-macro2", "quote", @@ -5393,9 +5409,9 @@ dependencies = [ [[package]] name = "shlex" -version = "2.0.1" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "signal-hook-registry" @@ -5472,9 +5488,9 @@ dependencies = [ [[package]] name = "socket2" -version = "0.6.4" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52d1cfed4120b4d927bf7c0f86d2087a4a7d6027c906d9f9d525a80573b9be51" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", "windows-sys 0.61.2", @@ -5801,7 +5817,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2 0.6.4", + "socket2 0.6.3", "tokio-macros", "windows-sys 0.61.2", ] @@ -6087,9 +6103,9 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "typenum" -version = "1.20.1" +version = "1.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20" +checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" [[package]] name = "ucd-trie" @@ -6123,9 +6139,9 @@ checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "unicode-segmentation" -version = "1.13.3" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6f5d3c3b1bf09027a88a6bc961fc00497d651009560b5463668dc81b0fa87a8" +checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c" [[package]] name = "unicode-width" @@ -6188,9 +6204,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.23.2" +version = "1.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d258b83ceec21034727ecee8c382cfa6c3e133699b0742c64571814fb420c9f7" +checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76" dependencies = [ "getrandom 0.4.2", "js-sys", @@ -7086,18 +7102,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.50" +version = "0.8.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b065d4f0e55f82fae73202e189638116a87c55ab6b8e6c2721e13dd9d854ad1" +checksum = "bce33a6288fa3f072a8c2c7d0f2fdbb90e28298f0135c1f99b96c3db2efcc60b" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.50" +version = "0.8.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b631b19d36a892ab55420c92dbc83ccd79274f25be714855d3074aa71cab639" +checksum = "8fd425244944f4ab65ccff928e7323354c5a018c75838362fdce749dfad2ee1e" dependencies = [ "proc-macro2", "quote", diff --git a/docs/DESIGN.md b/docs/DESIGN.md index 3d71990..94c39fd 100644 --- a/docs/DESIGN.md +++ b/docs/DESIGN.md @@ -185,7 +185,6 @@ use saorsa_core::{ P2PNode, NodeConfig, NodeMode, adaptive::trust::TrustEngine, adaptive::dht::AdaptiveDhtConfig, - BootstrapConfig, BootstrapManager, IPDiversityConfig, identity::peer_id::PeerId, }; @@ -194,7 +193,6 @@ pub struct RunningNode { shutdown_sender: watch::Sender, // USE ANT-CORE DIRECTLY - NO REIMPLEMENTATION! node: Arc, // Integrates ALL components - bootstrap: Arc, // 30,000 peer cache // Events node_events_channel: NodeEventsChannel, root_dir_path: PathBuf, diff --git a/src/bin/ant-node/cli.rs b/src/bin/ant-node/cli.rs index b1d68c6..9d1c635 100644 --- a/src/bin/ant-node/cli.rs +++ b/src/bin/ant-node/cli.rs @@ -1,8 +1,8 @@ //! Command-line interface definition. use ant_node::config::{ - BootstrapCacheConfig, BootstrapPeersConfig, BootstrapSource, EvmNetworkConfig, NetworkMode, - NodeConfig, PaymentConfig, UpgradeChannel, + BootstrapPeersConfig, BootstrapSource, EvmNetworkConfig, NetworkMode, NodeConfig, + PaymentConfig, UpgradeChannel, }; use clap::{Parser, ValueEnum}; use std::net::SocketAddr; @@ -133,18 +133,6 @@ pub struct Cli { /// that will restart the process automatically. #[arg(long)] pub stop_on_upgrade: bool, - - /// Disable persistent bootstrap cache. - #[arg(long)] - pub disable_bootstrap_cache: bool, - - /// Directory for bootstrap cache files. - #[arg(long, env = "ANT_BOOTSTRAP_CACHE_DIR")] - pub bootstrap_cache_dir: Option, - - /// Maximum peers to cache in the bootstrap cache. - #[arg(long, default_value = "10000", env = "ANT_BOOTSTRAP_CACHE_CAPACITY")] - pub bootstrap_cache_capacity: usize, } /// Upgrade channel CLI enum. @@ -282,14 +270,6 @@ impl Cli { metrics_port: self.metrics_port, }; - // Bootstrap cache config - config.bootstrap_cache = BootstrapCacheConfig { - enabled: !self.disable_bootstrap_cache, - cache_dir: self.bootstrap_cache_dir, - max_contacts: self.bootstrap_cache_capacity, - ..config.bootstrap_cache - }; - // Determine bootstrap source and apply auto-discovery if needed. let bootstrap_source = if cli_bootstrap_provided { BootstrapSource::Cli diff --git a/src/config.rs b/src/config.rs index e1d9d74..2319f96 100644 --- a/src/config.rs +++ b/src/config.rs @@ -116,10 +116,6 @@ pub struct NodeConfig { #[serde(default)] pub payment: PaymentConfig, - /// Bootstrap cache configuration for persistent peer storage. - #[serde(default)] - pub bootstrap_cache: BootstrapCacheConfig, - /// Storage configuration for chunk persistence. #[serde(default)] pub storage: StorageConfig, @@ -282,7 +278,6 @@ impl Default for NodeConfig { testnet: TestnetConfig::default(), upgrade: UpgradeConfig::default(), payment: PaymentConfig::default(), - bootstrap_cache: BootstrapCacheConfig::default(), storage: StorageConfig::default(), close_group_cache_dir: None, max_message_size: default_max_message_size(), @@ -405,63 +400,6 @@ const fn default_staged_rollout_hours() -> u64 { 24 // 24 hour window for staged rollout } -// ============================================================================ -// Bootstrap Cache Configuration -// ============================================================================ - -/// Bootstrap cache configuration for persistent peer storage. -/// -/// The bootstrap cache stores discovered peers across node restarts, -/// ranking them by quality metrics (success rate, latency, recency). -/// This reduces dependency on hardcoded bootstrap nodes and enables -/// faster network reconnection after restarts. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct BootstrapCacheConfig { - /// Enable persistent bootstrap cache. - /// Default: true - #[serde(default = "default_bootstrap_cache_enabled")] - pub enabled: bool, - - /// Directory for cache files. - /// Default: `{root_dir}/bootstrap_cache/` - #[serde(default)] - pub cache_dir: Option, - - /// Maximum contacts to store in the cache. - /// Default: 10,000 - #[serde(default = "default_bootstrap_max_contacts")] - pub max_contacts: usize, - - /// Stale contact threshold in days. - /// Contacts older than this are removed during cleanup. - /// Default: 7 days - #[serde(default = "default_bootstrap_stale_days")] - pub stale_threshold_days: u64, -} - -impl Default for BootstrapCacheConfig { - fn default() -> Self { - Self { - enabled: default_bootstrap_cache_enabled(), - cache_dir: None, - max_contacts: default_bootstrap_max_contacts(), - stale_threshold_days: default_bootstrap_stale_days(), - } - } -} - -const fn default_bootstrap_cache_enabled() -> bool { - true -} - -const fn default_bootstrap_max_contacts() -> usize { - 10_000 -} - -const fn default_bootstrap_stale_days() -> u64 { - 7 -} - // ============================================================================ // Storage Configuration // ============================================================================ @@ -537,8 +475,6 @@ pub const BOOTSTRAP_PEERS_ENV: &str = "ANT_BOOTSTRAP_PEERS_PATH"; /// Bootstrap peers loaded from a shipped configuration file. /// /// This file provides initial peers for first-time network joins. -/// It is separate from the bootstrap *cache* (which stores quality-ranked -/// peers discovered at runtime). #[derive(Debug, Clone, Serialize, Deserialize)] pub struct BootstrapPeersConfig { /// The bootstrap peer socket addresses. @@ -583,24 +519,37 @@ impl BootstrapPeersConfig { /// Returns `None` if no file is found in any location. #[must_use] pub fn discover() -> Option<(Self, PathBuf)> { - let candidates = Self::search_paths(); - for path in candidates { - if path.is_file() { - match Self::from_file(&path) { - Ok(config) if !config.peers.is_empty() => return Some((config, path)), - Ok(_) => {} - Err(err) => { - crate::logging::warn!( - "Failed to load bootstrap peers from {}: {err}", - path.display(), - ); - } - } + if let Ok(env_path) = std::env::var(BOOTSTRAP_PEERS_ENV) { + return Self::load_non_empty_candidate(PathBuf::from(env_path)); + } + + for path in Self::search_paths() { + if let Some(discovered) = Self::load_non_empty_candidate(path) { + return Some(discovered); } } + None } + fn load_non_empty_candidate(path: PathBuf) -> Option<(Self, PathBuf)> { + if !path.is_file() { + return None; + } + + match Self::from_file(&path) { + Ok(config) if !config.peers.is_empty() => Some((config, path)), + Ok(_) => None, + Err(err) => { + crate::logging::warn!( + "Failed to load bootstrap peers from {}: {err}", + path.display(), + ); + None + } + } + } + /// Build the ordered list of candidate paths to search. fn search_paths() -> Vec { let mut paths = Vec::new(); diff --git a/src/lib.rs b/src/lib.rs index bcad485..38cc909 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -64,7 +64,7 @@ pub use client::{ compute_address, hex_node_id_to_encoded_peer_id, peer_id_to_xor_name, xor_distance, DataChunk, XorName, }; -pub use config::{BootstrapCacheConfig, NodeConfig, StorageConfig}; +pub use config::{NodeConfig, StorageConfig}; pub use devnet::{Devnet, DevnetConfig, DevnetEvmInfo, DevnetManifest}; pub use error::{Error, Result}; pub use event::{NodeEvent, NodeEventsChannel}; diff --git a/src/node.rs b/src/node.rs index e63ec27..0926df2 100644 --- a/src/node.rs +++ b/src/node.rs @@ -20,7 +20,6 @@ use crate::upgrade::{ use rand::Rng; use saorsa_core::identity::NodeIdentity; use saorsa_core::{ - BootstrapConfig as CoreBootstrapConfig, BootstrapManager, IPDiversityConfig as CoreDiversityConfig, MultiAddr, NodeConfig as CoreNodeConfig, P2PEvent, P2PNode, }; @@ -108,14 +107,6 @@ impl NodeBuilder { Some(Self::build_upgrade_monitor(&self.config, node_id_seed)) }; - // Initialize bootstrap cache manager if enabled - let bootstrap_manager = if self.config.bootstrap_cache.enabled { - Self::build_bootstrap_manager(&self.config).await - } else { - info!("Bootstrap cache disabled"); - None - }; - // Initialize ANT protocol handler for chunk storage and // wire the fresh-write channel so PUTs trigger replication. let (ant_protocol, fresh_write_rx) = if self.config.storage.enabled { @@ -173,7 +164,6 @@ impl NodeBuilder { events_tx, events_rx: Some(events_rx), upgrade_monitor, - bootstrap_manager, ant_protocol, replication_engine, protocol_task: None, @@ -409,41 +399,6 @@ impl NodeBuilder { Ok(protocol) } - - /// Build the bootstrap cache manager from config. - async fn build_bootstrap_manager(config: &NodeConfig) -> Option { - let cache_dir = config - .bootstrap_cache - .cache_dir - .clone() - .unwrap_or_else(|| config.root_dir.join("bootstrap_cache")); - - // Create cache directory - if let Err(e) = std::fs::create_dir_all(&cache_dir) { - warn!("Failed to create bootstrap cache directory: {e}"); - return None; - } - - let bootstrap_config = CoreBootstrapConfig { - cache_dir, - max_peers: config.bootstrap_cache.max_contacts, - ..CoreBootstrapConfig::default() - }; - - match BootstrapManager::with_config(bootstrap_config).await { - Ok(manager) => { - info!( - "Bootstrap cache initialized with {} max contacts", - config.bootstrap_cache.max_contacts - ); - Some(manager) - } - Err(e) => { - warn!("Failed to initialize bootstrap cache: {e}"); - None - } - } - } } /// A running Ant node. @@ -454,8 +409,6 @@ pub struct RunningNode { events_tx: NodeEventsSender, events_rx: Option, upgrade_monitor: Option, - /// Bootstrap cache manager for persistent peer storage. - bootstrap_manager: Option, /// ANT protocol handler for chunk storage. ant_protocol: Option>, /// Replication engine (manages neighbor sync, verification, audits). @@ -690,15 +643,6 @@ impl RunningNode { // Run the main event loop with signal handling self.run_event_loop().await?; - // Log bootstrap cache stats before shutdown - if let Some(ref manager) = self.bootstrap_manager { - let stats = manager.stats().await; - info!( - "Bootstrap cache shutdown: {} peers, avg quality {:.2}", - stats.total_peers, stats.average_quality - ); - } - // Shutdown replication engine before P2P so background tasks don't // use a dead P2P layer, and Arc references are released. if let Some(ref mut engine) = self.replication_engine { From c3032ff1c6298bc8bad3ce5961c47e7b0932dfa1 Mon Sep 17 00:00:00 2001 From: Chris O'Neil Date: Thu, 4 Jun 2026 18:39:27 +0100 Subject: [PATCH 06/18] chore: update Cargo.lock after rebase Co-Authored-By: Claude Opus 4.8 (1M context) --- Cargo.lock | 98 +++++++++++++++++++++++------------------------------- 1 file changed, 41 insertions(+), 57 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6e2129c..d52976f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1315,9 +1315,9 @@ dependencies = [ [[package]] name = "bitflags" -version = "2.11.1" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" +checksum = "84d7ced0ae9557296835c32bf1b1e02b44c746701f898460fb000d7eaa84f00a" dependencies = [ "serde_core", ] @@ -1489,9 +1489,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.62" +version = "1.2.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98" +checksum = "556e016178bb5662a08681bbe0f00f8e17631781a4dfc8c45e466e4b185ec27f" dependencies = [ "find-msvc-tools", "jobserver", @@ -1554,9 +1554,9 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.44" +version = "0.4.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +checksum = "1aa79e62e7697b8e29b513a68abacf485adcd1fe8284a4316c5ae868e6633327" dependencies = [ "iana-time-zone", "js-sys", @@ -2960,9 +2960,9 @@ dependencies = [ [[package]] name = "hyper" -version = "1.10.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb92f162bf56536459fc83c79b974bb12837acfed43d6bc370a7916d0ae15ecc" +checksum = "55281c53a1894c864990125767da440a4e630446785086f52523b20033b74498" dependencies = [ "atomic-waker", "bytes", @@ -3011,7 +3011,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.3", + "socket2 0.6.4", "tokio", "tower-service", "tracing", @@ -3158,9 +3158,9 @@ dependencies = [ [[package]] name = "igd-next" -version = "0.17.0" +version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bac9a3c8278f43b4cd8463380f4a25653ac843e5b177e1d3eaf849cc9ba10d4d" +checksum = "de7238d487a9aff61f81b5ab41c0a841532a115a398b5fa92a2fadd0885e2581" dependencies = [ "attohttpc", "bytes", @@ -3519,9 +3519,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.30" +version = "0.4.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "616ec5685824bcc94416c6d4a7a446eea774a31efd7062c8480ba6fd06d7a6e5" +checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a" [[package]] name = "lru" @@ -3621,9 +3621,9 @@ dependencies = [ [[package]] name = "mio" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" +checksum = "02bd0af71c67b473010cbbc60715ee815645a4dc942899111f494b4b737d6fda" dependencies = [ "libc", "wasi", @@ -4228,7 +4228,7 @@ dependencies = [ "quinn-udp 0.5.14", "rustc-hash", "rustls", - "socket2 0.6.3", + "socket2 0.6.4", "thiserror 2.0.18", "tokio", "tracing", @@ -4266,7 +4266,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.3", + "socket2 0.6.4", "tracing", "windows-sys 0.60.2", ] @@ -4279,7 +4279,7 @@ checksum = "76150b617afc75e6e21ac5f39bc196e80b65415ae48d62dbef8e2519d040ce42" dependencies = [ "cfg_aliases", "libc", - "socket2 0.6.3", + "socket2 0.6.4", "tracing", "windows-sys 0.61.2", ] @@ -4721,9 +4721,9 @@ dependencies = [ [[package]] name = "rustls-native-certs" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" +checksum = "dab5152771c58876a2146916e53e35057e1a4dfa2b9df0f0305b07f611fdea4d" dependencies = [ "openssl-probe", "rustls-pki-types", @@ -5036,15 +5036,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "scc" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46e6f046b7fef48e2660c57ed794263155d713de679057f2d0c169bfc6e756cc" -dependencies = [ - "sdd", -] - [[package]] name = "schannel" version = "0.1.29" @@ -5084,12 +5075,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "sdd" -version = "3.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "490dcfcbfef26be6800d11870ff2df8774fa6e86d047e3e8c8a76b25655e41ca" - [[package]] name = "sec1" version = "0.7.3" @@ -5322,24 +5307,23 @@ dependencies = [ [[package]] name = "serial_test" -version = "3.4.0" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "911bd979bf1070a3f3aa7b691a3b3e9968f339ceeec89e08c280a8a22207a32f" +checksum = "699f4197115b8a7e7ff19c9a315a4bd6fffec26cc4626ef45ecaea389e081c6d" dependencies = [ "futures-executor", "futures-util", "log", "once_cell", "parking_lot", - "scc", "serial_test_derive", ] [[package]] name = "serial_test_derive" -version = "3.4.0" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a7d91949b85b0d2fb687445e448b40d322b6b3e4af6b44a29b21d9a5f33e6d9" +checksum = "94e153fc76e1c6a068703d6d29c508a0b15c061c4b7e43da59cc097bc342673c" dependencies = [ "proc-macro2", "quote", @@ -5409,9 +5393,9 @@ dependencies = [ [[package]] name = "shlex" -version = "1.3.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba" [[package]] name = "signal-hook-registry" @@ -5488,9 +5472,9 @@ dependencies = [ [[package]] name = "socket2" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +checksum = "52d1cfed4120b4d927bf7c0f86d2087a4a7d6027c906d9f9d525a80573b9be51" dependencies = [ "libc", "windows-sys 0.61.2", @@ -5817,7 +5801,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2 0.6.3", + "socket2 0.6.4", "tokio-macros", "windows-sys 0.61.2", ] @@ -6103,9 +6087,9 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "typenum" -version = "1.20.0" +version = "1.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" +checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20" [[package]] name = "ucd-trie" @@ -6139,9 +6123,9 @@ checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "unicode-segmentation" -version = "1.13.2" +version = "1.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c" +checksum = "c6f5d3c3b1bf09027a88a6bc961fc00497d651009560b5463668dc81b0fa87a8" [[package]] name = "unicode-width" @@ -6204,9 +6188,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.23.1" +version = "1.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76" +checksum = "d258b83ceec21034727ecee8c382cfa6c3e133699b0742c64571814fb420c9f7" dependencies = [ "getrandom 0.4.2", "js-sys", @@ -7079,9 +7063,9 @@ dependencies = [ [[package]] name = "yoke" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" +checksum = "709fe23a0424b6a435d82152b1bd3fdfb0833487d5fa90d05d42762a9891fef5" dependencies = [ "stable_deref_trait", "yoke-derive", @@ -7102,18 +7086,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.49" +version = "0.8.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bce33a6288fa3f072a8c2c7d0f2fdbb90e28298f0135c1f99b96c3db2efcc60b" +checksum = "3b065d4f0e55f82fae73202e189638116a87c55ab6b8e6c2721e13dd9d854ad1" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.49" +version = "0.8.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fd425244944f4ab65ccff928e7323354c5a018c75838362fdce749dfad2ee1e" +checksum = "0b631b19d36a892ab55420c92dbc83ccd79274f25be714855d3074aa71cab639" dependencies = [ "proc-macro2", "quote", From c188cbe5e41e9cc15ea057154fd9939871fb7774 Mon Sep 17 00:00:00 2001 From: Chris O'Neil Date: Fri, 5 Jun 2026 19:11:52 +0100 Subject: [PATCH 07/18] chore: update Cargo.lock after rebase Co-Authored-By: Claude Opus 4.8 (1M context) --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d52976f..6287d74 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5252,9 +5252,9 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.20.0" +version = "3.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e72c1c2cb7b223fafb600a619537a871c2818583d619401b785e7c0b746ccde2" +checksum = "76a5c54c7310e7b8b9577c286d7e399ddd876c3e12b3ed917a8aabc4b96e9e8c" dependencies = [ "base64", "bs58", @@ -5272,9 +5272,9 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.20.0" +version = "3.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b90c488738ecb4fb0262f41f43bc40efc5868d9fb744319ddf5f5317f417bfac" +checksum = "84d57bc0c8b9a17920c178daa6bb924850d54a9c97ab45194bb8c17ad66bb660" dependencies = [ "darling", "proc-macro2", From d04c5aa6c4943ecb17d040721235c00d3c2f4e1c Mon Sep 17 00:00:00 2001 From: grumbach Date: Thu, 11 Jun 2026 13:32:35 +0900 Subject: [PATCH 08/18] fix(pruning): tolerate one lagging peer in the prune proof gate Prune deletion required every peer in the current close group to prove possession, but fresh replication is fire-and-forget and uploads/repair only guarantee QUORUM_THRESHOLD placement (4 of 7). One lagging peer vetoed the prune forever, leaving prod nodes unable to free disk while audit-proof WARNs accumulated. A record that is out of the node's closest 7 now prunes once all but one of the current close group (6 of 7 at production parameters) prove they hold the exact bytes. This keeps a single absent peer from blocking deletion forever while still requiring near-full placement before the extra copy is dropped. Groups of one or two peers still require every proof. The mature-repair-proof precondition follows the same rule: a never-synced close-group peer reduces the audit pool instead of vetoing the prune, and only hinted peers are ever audited. PaidForList entry pruning gets its own remote gate: an out-of-range entry was previously removed on local state alone once the hysteresis elapsed; it is now retained until three quarters of the current paid close group (15 of 20 at production parameters) confirm the key in their own paid lists. Confirmations are revalidated against the current paid close group after the network round, the scan rotates through a dedicated cursor so capped passes cannot starve later entries, and chunk pruning checks only chunk possession while paid pruning checks only paid-list confirmations. Also split the failed-proof WARN into key-absent vs digest-mismatch so the two failure modes can be told apart in production logs. --- src/replication/mod.rs | 2 + src/replication/pruning.rs | 546 +++++++++++++++++++++++++++++++++---- src/replication/quorum.rs | 2 +- src/replication/types.rs | 4 + tests/e2e/replication.rs | 253 +++++++++++++++++ 5 files changed, 760 insertions(+), 47 deletions(-) diff --git a/src/replication/mod.rs b/src/replication/mod.rs index 81ea8ec..a19fb57 100644 --- a/src/replication/mod.rs +++ b/src/replication/mod.rs @@ -1695,11 +1695,13 @@ async fn run_neighbor_sync_round( let old_bootstrap_claims = std::mem::take(&mut state.bootstrap_claims); let old_bootstrap_claim_history = std::mem::take(&mut state.bootstrap_claim_history); let old_prune_cursor = state.prune_cursor; + let old_paid_prune_cursor = state.paid_prune_cursor; *state = NeighborSyncState::new_cycle(neighbors); state.last_sync_times = old_sync_times; state.bootstrap_claims = old_bootstrap_claims; state.bootstrap_claim_history = old_bootstrap_claim_history; state.prune_cursor = old_prune_cursor; + state.paid_prune_cursor = old_paid_prune_cursor; } } diff --git a/src/replication/pruning.rs b/src/replication/pruning.rs index 4618ab0..19177d9 100644 --- a/src/replication/pruning.rs +++ b/src/replication/pruning.rs @@ -26,13 +26,22 @@ use crate::replication::protocol::{ compute_audit_digest, AuditChallenge, AuditResponse, ReplicationMessage, ReplicationMessageBody, ABSENT_KEY_DIGEST, }; -use crate::replication::types::{BootstrapClaimObservation, NeighborSyncState, RepairProofs}; +use crate::replication::quorum::{self, VerificationTargets}; +use crate::replication::types::{ + BootstrapClaimObservation, KeyVerificationEvidence, NeighborSyncState, PaidListEvidence, + RepairProofs, +}; use crate::storage::LmdbStorage; use super::REPLICATION_TRUST_WEIGHT; const MAX_CONCURRENT_PRUNE_AUDIT_CHALLENGES: usize = 32; +/// Maximum expired `PaidForList` entries verified against the paid close +/// group per prune pass. Bounds the per-pass verification fan-out the same +/// way `MAX_PRUNE_AUDIT_CHALLENGES_PER_PASS` bounds record audits. +const MAX_PAID_PRUNE_VERIFICATIONS_PER_PASS: usize = 32; + // --------------------------------------------------------------------------- // Result type // --------------------------------------------------------------------------- @@ -140,13 +149,15 @@ struct PruneAuditReportState { /// For each stored record K: /// - If `IsResponsible(self, K)`: clear `RecordOutOfRangeFirstSeen`. /// - If not responsible: set timestamp if not already set; delete if the -/// timestamp is at least `PRUNE_HYSTERESIS_DURATION` old and the current -/// close group proves it stores the record. +/// timestamp is at least `PRUNE_HYSTERESIS_DURATION` old and all but one +/// of the current close group prove they store the record. /// /// For each `PaidForList` entry K: /// - If self is in `PaidCloseGroup(K)`: clear `PaidOutOfRangeFirstSeen`. /// - If not in group: set timestamp if not already set; remove entry if the -/// timestamp is at least `PRUNE_HYSTERESIS_DURATION` old. +/// timestamp is at least `PRUNE_HYSTERESIS_DURATION` old and three +/// quarters of the current paid close group (15 of 20 at production +/// parameters) confirm the key in their own `PaidForList`. /// /// Compatibility wrapper for callers that have not adopted repair-proof /// tracking. It preserves the original public signature, but it has no proof @@ -182,8 +193,15 @@ pub async fn run_prune_pass( pub async fn run_prune_pass_with_context(ctx: PrunePassContext<'_>) -> PruneResult { let (stored_count, record_stats) = prune_stored_records(&ctx).await; let now = Instant::now(); - let (paid_count, paid_stats) = - prune_paid_entries(ctx.self_id, ctx.paid_list, ctx.p2p_node, ctx.config, now).await; + let (paid_count, paid_stats) = prune_paid_entries( + ctx.self_id, + ctx.paid_list, + ctx.p2p_node, + ctx.config, + ctx.sync_state, + now, + ) + .await; let result = PruneResult { records_pruned: record_stats.pruned, @@ -346,32 +364,37 @@ async fn evaluate_record_prune_key( return outcome; } + // Only peers we have hinted (mature repair proof) may be audited; the + // proof threshold must be reachable among them. A never-synced peer in + // the close group reduces the audit pool instead of vetoing the prune. let current_close_peers: HashSet = closest.iter().map(|node| node.peer_id).collect(); - if !target_peers_have_mature_repair_proofs( + let audit_targets = peers_with_mature_repair_proofs( key, &target_peers, ¤t_close_peers, ctx.repair_proofs, ctx.current_sync_epoch, ) - .await - { + .await; + let proofs_needed = prune_proofs_needed(target_peers.len()); + if proofs_needed == 0 || audit_targets.len() < proofs_needed { debug!( - "Deferring prune for {} until current close group has mature repair proofs", + "Deferring prune for {} until enough of the close group has mature \ + repair proofs", hex::encode(key) ); return outcome; } - if target_peers.len() > *audit_challenge_budget { + if audit_targets.len() > *audit_challenge_budget { outcome.state = RecordPruneKeyState::BudgetDeferred; return outcome; } - *audit_challenge_budget -= target_peers.len(); + *audit_challenge_budget -= audit_targets.len(); outcome.state = RecordPruneKeyState::Candidate(RecordPruneCandidate { key: *key, - target_peers, + target_peers: audit_targets, }); outcome } @@ -381,6 +404,7 @@ async fn prune_paid_entries( paid_list: &Arc, p2p_node: &Arc, config: &ReplicationConfig, + sync_state: &Arc>, now: Instant, ) -> (usize, PaidPruneStats) { let paid_keys = match paid_list.all_keys() { @@ -393,9 +417,17 @@ async fn prune_paid_entries( let dht = p2p_node.dht_manager(); let mut stats = PaidPruneStats::default(); - let mut paid_keys_to_delete = Vec::new(); + let mut expired_candidates: Vec<(XorName, Vec)> = Vec::new(); + let mut verification_deferred = 0usize; + // Rotate the scan start so expired entries beyond the per-pass cap are + // not starved by the same head-of-list entries every pass. + let scan_start = paid_prune_scan_start(sync_state, paid_keys.len()).await; + let mut last_selected_offset = None; - for key in &paid_keys { + for offset in 0..paid_keys.len() { + let Some(key) = paid_keys.get((scan_start + offset) % paid_keys.len()) else { + continue; + }; let closest: Vec = dht .find_closest_nodes_local_with_self(key, config.paid_list_close_group_size) .await; @@ -417,12 +449,46 @@ async fn prune_paid_entries( .checked_duration_since(first_seen) .unwrap_or(Duration::ZERO); if elapsed >= config.prune_hysteresis_duration { - paid_keys_to_delete.push(*key); + if expired_candidates.len() < MAX_PAID_PRUNE_VERIFICATIONS_PER_PASS { + let target_peers = remote_close_group_peers(&closest, self_id); + expired_candidates.push((*key, target_peers)); + last_selected_offset = Some(offset); + } else { + verification_deferred = verification_deferred.saturating_add(1); + } } } } } + advance_paid_prune_cursor( + sync_state, + paid_keys.len(), + scan_start, + last_selected_offset, + ) + .await; + + if verification_deferred > 0 { + debug!( + "Deferred {verification_deferred} expired PaidForList entries beyond the \ + per-pass verification cap ({MAX_PAID_PRUNE_VERIFICATIONS_PER_PASS})" + ); + } + + let confirmed_by_key = + collect_paid_prune_confirmations(&expired_candidates, p2p_node, config).await; + let (paid_keys_to_delete, revalidated_cleared) = revalidated_paid_prune_keys( + &expired_candidates, + &confirmed_by_key, + self_id, + paid_list, + p2p_node, + config, + ) + .await; + stats.cleared += revalidated_cleared; + if !paid_keys_to_delete.is_empty() { match paid_list.remove_batch(&paid_keys_to_delete).await { Ok(count) => { @@ -438,6 +504,104 @@ async fn prune_paid_entries( (paid_keys.len(), stats) } +async fn paid_prune_scan_start( + sync_state: &Arc>, + paid_key_count: usize, +) -> usize { + if paid_key_count == 0 { + return 0; + } + sync_state.read().await.paid_prune_cursor % paid_key_count +} + +async fn advance_paid_prune_cursor( + sync_state: &Arc>, + paid_key_count: usize, + scan_start: usize, + last_selected_offset: Option, +) { + if paid_key_count == 0 { + sync_state.write().await.paid_prune_cursor = 0; + return; + } + + let advance_by = last_selected_offset.map_or(1, |offset| offset.saturating_add(1)); + sync_state.write().await.paid_prune_cursor = (scan_start + advance_by) % paid_key_count; +} + +/// Re-check each confirmed candidate against current local state before +/// deletion. +/// +/// The network round in [`collect_paid_prune_confirmations`] takes time; +/// the paid close group may have changed underneath it, including self +/// moving back into range. Mirrors [`revalidated_record_prune_keys`]: +/// confirmations only count from peers still in the current paid close +/// group, against a threshold computed from that current group. +async fn revalidated_paid_prune_keys( + expired_candidates: &[(XorName, Vec)], + confirmed_by_key: &HashMap>, + self_id: &PeerId, + paid_list: &Arc, + p2p_node: &Arc, + config: &ReplicationConfig, +) -> (Vec, usize) { + let dht = p2p_node.dht_manager(); + let mut keys_to_delete = Vec::new(); + let mut cleared = 0; + let now = Instant::now(); + + for (key, _) in expired_candidates { + let closest: Vec = dht + .find_closest_nodes_local_with_self(key, config.paid_list_close_group_size) + .await; + + if closest.iter().any(|n| n.peer_id == *self_id) { + if paid_list.paid_out_of_range_since(key).is_some() { + paid_list.clear_paid_out_of_range(key); + cleared += 1; + } + continue; + } + + let Some(first_seen) = paid_list.paid_out_of_range_since(key) else { + continue; + }; + let elapsed = now + .checked_duration_since(first_seen) + .unwrap_or(Duration::ZERO); + if elapsed < config.prune_hysteresis_duration { + continue; + } + + let current_target_peers = remote_close_group_peers(&closest, self_id); + if current_target_peers.is_empty() { + warn!( + "Cannot prune paid entry {}: current paid close group has no remote peers", + hex::encode(key) + ); + continue; + } + + let confirmations_needed = paid_prune_confirmations_needed(current_target_peers.len()); + if target_peers_reported_present( + key, + ¤t_target_peers, + confirmed_by_key, + confirmations_needed, + ) { + keys_to_delete.push(*key); + } else { + debug!( + "Deferring paid-entry prune for {} until enough of the current paid \ + close group confirm it", + hex::encode(key) + ); + } + } + + (keys_to_delete, cleared) +} + fn remote_close_group_peers(close_group: &[DHTNode], self_id: &PeerId) -> Vec { close_group .iter() @@ -446,17 +610,112 @@ fn remote_close_group_peers(close_group: &[DHTNode], self_id: &PeerId) -> Vec usize { + (3 * group_size).div_ceil(4) +} + +/// Ask the current paid close group whether they track each expired key in +/// their `PaidForList`, and return the confirming peers per key. +/// +/// The deletion decision happens afterwards in +/// [`revalidated_paid_prune_keys`], against the paid close group as it +/// stands once the network round has completed. +async fn collect_paid_prune_confirmations( + expired_candidates: &[(XorName, Vec)], + p2p_node: &Arc, + config: &ReplicationConfig, +) -> HashMap> { + if expired_candidates.is_empty() { + return HashMap::new(); + } + + let mut targets = VerificationTargets::default(); + let mut keys = Vec::new(); + for (key, target_peers) in expired_candidates { + if target_peers.is_empty() { + warn!( + "Cannot prune paid entry {}: current paid close group has no remote peers", + hex::encode(key) + ); + continue; + } + keys.push(*key); + for peer in target_peers { + targets.all_peers.insert(*peer); + targets.peer_to_keys.entry(*peer).or_default().push(*key); + targets + .peer_to_paid_keys + .entry(*peer) + .or_default() + .insert(*key); + } + targets.paid_targets.insert(*key, target_peers.clone()); + targets.paid_group_sizes.insert(*key, target_peers.len()); + } + for keys_list in targets.peer_to_keys.values_mut() { + keys_list.sort_unstable(); + keys_list.dedup(); + } + + let evidence = quorum::run_verification_round(&keys, &targets, p2p_node, config).await; + paid_confirmations_by_key(expired_candidates, &evidence) +} + +/// Aggregate `Confirmed` paid-list evidence into per-key peer sets. +/// +/// Only peers in the candidate's own target set count; `NotFound` and +/// `Unresolved` answers never confirm. +fn paid_confirmations_by_key( + expired_candidates: &[(XorName, Vec)], + evidence: &HashMap, +) -> HashMap> { + let mut confirmed_by_key: HashMap> = HashMap::new(); + for (key, target_peers) in expired_candidates { + let Some(key_evidence) = evidence.get(key) else { + continue; + }; + let confirmed: HashSet = key_evidence + .paid_list + .iter() + .filter(|(peer, status)| { + **status == PaidListEvidence::Confirmed && target_peers.contains(peer) + }) + .map(|(peer, _)| *peer) + .collect(); + if !confirmed.is_empty() { + confirmed_by_key.insert(*key, confirmed); + } + } + confirmed_by_key +} + +/// Filter `target_peers` down to those with a mature repair proof for `key`. +/// +/// Per design rule 20, peers without a key-specific mature repair hint proof +/// are never audited for that key. +async fn peers_with_mature_repair_proofs( key: &XorName, target_peers: &[PeerId], current_close_peers: &HashSet, repair_proofs: &Arc>, current_sync_epoch: u64, -) -> bool { +) -> Vec { let mut proofs = repair_proofs.write().await; - target_peers.iter().all(|peer| { - proofs.has_mature_replica_hint(peer, key, current_close_peers, current_sync_epoch) - }) + target_peers + .iter() + .filter(|peer| { + proofs.has_mature_replica_hint(peer, key, current_close_peers, current_sync_epoch) + }) + .copied() + .collect() } async fn prune_scan_start( @@ -512,10 +771,14 @@ async fn delete_stored_records( /// Collect positive presence reports for prune candidates. /// -/// Peers that fail to prove storage block pruning for their keys. The -/// retained local record continues to participate in normal neighbor-sync -/// repair because replica hint construction walks all locally stored keys, -/// including out-of-range keys retained by hysteresis. +/// A key is deleted once all but one of the current close group prove +/// possession ([`prune_proofs_needed`]). Requiring unanimous proofs left +/// out-of-range records undeletable whenever a single close-group peer +/// lagged, while the all-but-one threshold still demands more copies than +/// the storage quorum used elsewhere. Keys below the proof threshold stay +/// local, and the retained record continues to participate in normal +/// neighbor-sync repair because replica hint construction walks all locally +/// stored keys, including out-of-range keys retained by hysteresis. async fn collect_record_prune_proofs( candidates: &[RecordPruneCandidate], storage: &Arc, @@ -600,11 +863,18 @@ async fn revalidated_record_prune_keys( continue; } - if target_peers_reported_present(&candidate.key, ¤t_target_peers, present_by_key) { + let proofs_needed = prune_proofs_needed(current_target_peers.len()); + if target_peers_reported_present( + &candidate.key, + ¤t_target_peers, + present_by_key, + proofs_needed, + ) { keys_to_delete.push(candidate.key); } else { debug!( - "Deferring prune for {} until current close group reports it", + "Deferring prune for {} until all but one of the current close group \ + report it", hex::encode(candidate.key) ); } @@ -627,25 +897,63 @@ fn build_peer_audit_challenges(candidates: &[RecordPruneCandidate]) -> Vec<(Peer fn confirmed_keys_from_presence( candidates: &[RecordPruneCandidate], present_by_key: &HashMap>, + proofs_needed: usize, ) -> Vec { candidates .iter() .filter(|candidate| { - target_peers_reported_present(&candidate.key, &candidate.target_peers, present_by_key) + target_peers_reported_present( + &candidate.key, + &candidate.target_peers, + present_by_key, + proofs_needed, + ) }) .map(|candidate| candidate.key) .collect() } +/// Proofs required before deleting an out-of-range record: all but one of +/// the close group (6 of 7 at production parameters). +/// +/// Stricter than the storage quorum (`QuorumNeeded`) because pruning only +/// runs after `PRUNE_HYSTERESIS_DURATION` out of range, by which time many +/// sync cycles should have replicated the record across the whole close +/// group. Tolerating exactly one lagging peer keeps a single absent peer +/// from vetoing deletion forever without accepting under-replication. +/// Groups of one or two peers require every proof: tolerating a miss there +/// would allow deletion on a single attestation. +fn prune_proofs_needed(group_size: usize) -> usize { + if group_size <= 2 { + group_size + } else { + group_size - 1 + } +} + +/// Whether enough target peers proved possession to allow deletion. +/// +/// `proofs_needed == 0` means confirmation is impossible (no targets), not +/// trivially met. fn target_peers_reported_present( key: &XorName, target_peers: &[PeerId], present_by_key: &HashMap>, + proofs_needed: usize, ) -> bool { + if proofs_needed == 0 { + return false; + } let Some(present_peers) = present_by_key.get(key) else { return false; }; - target_peers.iter().all(|peer| present_peers.contains(peer)) + // Count distinct proven peers: iterating the present set keeps a + // duplicated entry in `target_peers` from being counted twice. + let proven = present_peers + .iter() + .filter(|peer| target_peers.contains(peer)) + .count(); + proven >= proofs_needed } /// Challenge a peer to prove it holds the exact record bytes for `key`. @@ -782,19 +1090,25 @@ fn prune_audit_response_status( warn!("Prune audit challenge ID mismatch from {peer}"); return PruneAuditStatus::Failed; } - if digests.len() != 1 { + let [digest] = digests.as_slice() else { warn!( "Prune audit response from {peer} returned {} digests for one challenged key", digests.len(), ); return PruneAuditStatus::Failed; + }; + if *digest == ABSENT_KEY_DIGEST { + warn!( + "Prune audit proof from {peer} failed for {}: peer reports key absent", + hex::encode(key) + ); + return PruneAuditStatus::Failed; } - - if audit_digest_proves_key(peer, key, nonce, local_bytes, &digests[0]) { + if audit_digest_proves_key(peer, key, nonce, local_bytes, digest) { PruneAuditStatus::Proven } else { warn!( - "Prune audit proof from {peer} failed for {}", + "Prune audit proof from {peer} failed for {}: digest mismatch", hex::encode(key) ); PruneAuditStatus::Failed @@ -1027,38 +1341,178 @@ mod tests { } #[test] - fn confirmed_keys_require_all_target_peers_present() { + fn confirmed_keys_require_quorum_of_target_peers_present() { let peer_a = peer_id_from_byte(1); let peer_b = peer_id_from_byte(2); + let peer_c = peer_id_from_byte(3); let key = key_from_byte(0xC); - let candidates = vec![candidate(key, vec![peer_a, peer_b])]; + let candidates = vec![candidate(key, vec![peer_a, peer_b, peer_c])]; let mut present_by_key = HashMap::new(); present_by_key.insert(key, HashSet::from([peer_a, peer_b])); - let confirmed = confirmed_keys_from_presence(&candidates, &present_by_key); - + // Two of three proofs meet a quorum of 2 even though one peer is + // missing — unanimity is not required. + let confirmed = confirmed_keys_from_presence(&candidates, &present_by_key, 2); assert_eq!(confirmed, vec![key]); + + // The same evidence fails a quorum of 3. + let confirmed = confirmed_keys_from_presence(&candidates, &present_by_key, 3); + assert!(confirmed.is_empty()); } #[test] - fn confirmed_keys_defer_absent_or_missing_peer_evidence() { + fn confirmed_keys_defer_below_quorum_or_missing_peer_evidence() { let peer_a = peer_id_from_byte(1); let peer_b = peer_id_from_byte(2); - let complete_key = key_from_byte(0xD); - let absent_key = key_from_byte(0xE); + let quorum_key = key_from_byte(0xD); + let below_quorum_key = key_from_byte(0xE); let missing_key = key_from_byte(0xF); let candidates = vec![ - candidate(complete_key, vec![peer_a, peer_b]), - candidate(absent_key, vec![peer_a, peer_b]), + candidate(quorum_key, vec![peer_a, peer_b]), + candidate(below_quorum_key, vec![peer_a, peer_b]), candidate(missing_key, vec![peer_a, peer_b]), ]; let mut present_by_key = HashMap::new(); - present_by_key.insert(complete_key, HashSet::from([peer_a, peer_b])); - present_by_key.insert(absent_key, HashSet::from([peer_a])); + present_by_key.insert(quorum_key, HashSet::from([peer_a, peer_b])); + present_by_key.insert(below_quorum_key, HashSet::from([peer_a])); + + let confirmed = confirmed_keys_from_presence(&candidates, &present_by_key, 2); - let confirmed = confirmed_keys_from_presence(&candidates, &present_by_key); + assert_eq!(confirmed, vec![quorum_key]); + } - assert_eq!(confirmed, vec![complete_key]); + #[test] + fn prune_proofs_needed_tolerates_exactly_one_lagging_peer() { + assert_eq!(prune_proofs_needed(0), 0); + // Tiny groups require every proof. + assert_eq!(prune_proofs_needed(1), 1); + assert_eq!(prune_proofs_needed(2), 2); + assert_eq!(prune_proofs_needed(3), 2); + assert_eq!(prune_proofs_needed(5), 4); + // Production close group: 6 of 7 proofs required. + assert_eq!(prune_proofs_needed(7), 6); + } + + #[test] + fn paid_prune_confirmations_are_three_quarters_rounded_up() { + assert_eq!(paid_prune_confirmations_needed(0), 0); + assert_eq!(paid_prune_confirmations_needed(1), 1); + assert_eq!(paid_prune_confirmations_needed(2), 2); + assert_eq!(paid_prune_confirmations_needed(4), 3); + // Production paid close group: 15 of 20 confirmations required. + assert_eq!(paid_prune_confirmations_needed(20), 15); + } + + #[test] + fn paid_confirmations_count_only_confirmed_target_peers() { + let confirmed_peer = peer_id_from_byte(1); + let not_found_peer = peer_id_from_byte(2); + let unresolved_peer = peer_id_from_byte(3); + let outsider = peer_id_from_byte(4); + let key = key_from_byte(0x21); + let candidates = vec![(key, vec![confirmed_peer, not_found_peer, unresolved_peer])]; + + let mut evidence = HashMap::new(); + evidence.insert( + key, + KeyVerificationEvidence { + presence: HashMap::new(), + paid_list: HashMap::from([ + (confirmed_peer, PaidListEvidence::Confirmed), + (not_found_peer, PaidListEvidence::NotFound), + (unresolved_peer, PaidListEvidence::Unresolved), + // Confirmation from a peer outside the target set. + (outsider, PaidListEvidence::Confirmed), + ]), + }, + ); + + let confirmed_by_key = paid_confirmations_by_key(&candidates, &evidence); + + assert_eq!( + confirmed_by_key.get(&key), + Some(&HashSet::from([confirmed_peer])), + "only Confirmed answers from target peers may count", + ); + } + + #[test] + fn paid_confirmations_skip_keys_without_evidence() { + let peer = peer_id_from_byte(1); + let key = key_from_byte(0x22); + let candidates = vec![(key, vec![peer])]; + + let confirmed_by_key = paid_confirmations_by_key(&candidates, &HashMap::new()); + + assert!(confirmed_by_key.is_empty()); + } + + #[tokio::test] + async fn paid_prune_cursor_advances_past_selected_window() { + let state = Arc::new(RwLock::new(NeighborSyncState::new_cycle(vec![]))); + state.write().await.paid_prune_cursor = 2; + + let start = paid_prune_scan_start(&state, 10).await; + advance_paid_prune_cursor(&state, 10, start, Some(3)).await; + + assert_eq!(state.read().await.paid_prune_cursor, 6); + } + + #[tokio::test] + async fn paid_prune_cursor_advances_even_when_nothing_selected() { + let state = Arc::new(RwLock::new(NeighborSyncState::new_cycle(vec![]))); + state.write().await.paid_prune_cursor = 9; + + let start = paid_prune_scan_start(&state, 10).await; + advance_paid_prune_cursor(&state, 10, start, None).await; + + assert_eq!(state.read().await.paid_prune_cursor, 0); + } + + #[test] + fn zero_quorum_never_confirms() { + let peer_a = peer_id_from_byte(1); + let key = key_from_byte(0x10); + let mut present_by_key = HashMap::new(); + present_by_key.insert(key, HashSet::from([peer_a])); + + assert!(!target_peers_reported_present( + &key, + &[peer_a], + &present_by_key, + 0 + )); + } + + #[test] + fn proofs_from_non_target_peers_do_not_count_toward_quorum() { + let target = peer_id_from_byte(1); + let outsider = peer_id_from_byte(2); + let key = key_from_byte(0x11); + let mut present_by_key = HashMap::new(); + present_by_key.insert(key, HashSet::from([outsider])); + + assert!(!target_peers_reported_present( + &key, + &[target], + &present_by_key, + 1 + )); + } + + #[test] + fn duplicated_target_peer_counts_once_toward_quorum() { + let peer = peer_id_from_byte(1); + let key = key_from_byte(0x12); + let mut present_by_key = HashMap::new(); + present_by_key.insert(key, HashSet::from([peer])); + + assert!(!target_peers_reported_present( + &key, + &[peer, peer], + &present_by_key, + 2 + )); } #[test] diff --git a/src/replication/quorum.rs b/src/replication/quorum.rs index 5f4d99a..4a3f140 100644 --- a/src/replication/quorum.rs +++ b/src/replication/quorum.rs @@ -22,7 +22,7 @@ use crate::replication::types::{KeyVerificationEvidence, PaidListEvidence, Prese // --------------------------------------------------------------------------- /// Targets for verifying a set of keys. -#[derive(Debug)] +#[derive(Debug, Default)] pub struct VerificationTargets { /// Per-key: closest `CLOSE_GROUP_SIZE` peers (excluding self) for presence /// quorum. diff --git a/src/replication/types.rs b/src/replication/types.rs index ec74e76..a2f6019 100644 --- a/src/replication/types.rs +++ b/src/replication/types.rs @@ -455,6 +455,9 @@ pub struct NeighborSyncState { /// Cursor used by post-cycle pruning to rotate through stored records when /// the per-pass prune-confirmation budget is exhausted. pub prune_cursor: usize, + /// Cursor used by post-cycle pruning to rotate through `PaidForList` + /// entries when the per-pass paid-verification cap is exhausted. + pub paid_prune_cursor: usize, } impl NeighborSyncState { @@ -468,6 +471,7 @@ impl NeighborSyncState { bootstrap_claims: HashMap::new(), bootstrap_claim_history: HashMap::new(), prune_cursor: 0, + paid_prune_cursor: 0, } } diff --git a/tests/e2e/replication.rs b/tests/e2e/replication.rs index 83fc792..121cc8c 100644 --- a/tests/e2e/replication.rs +++ b/tests/e2e/replication.rs @@ -626,6 +626,259 @@ async fn test_prune_pass_requires_remote_confirmation_before_delete() { harness.teardown().await.expect("teardown"); } +/// The prune proof gate tolerates exactly one lagging close-group peer, at +/// production parameters (close group 7, 6 proofs required). +/// +/// Fresh replication is fire-and-forget and uploads/repair succeed at +/// `QUORUM_THRESHOLD` (4 of 7), so a record's placement routinely sits below +/// full unanimity. When the prune gate demanded unanimous proofs, such +/// records were audited every pass, failed (absent peers answer +/// `ABSENT_KEY_DIGEST`), and were never deleted — the production "pruning is +/// hardly taking place" incident. The all-but-one threshold keeps a single +/// absent peer from vetoing deletion while still demanding near-full +/// placement before the local copy is dropped. +/// +/// This test pins both sides of the gate: +/// - below the threshold (5 of 7 proofs) the record must never be deleted, +/// no matter how many passes run; +/// - at the threshold (6 of 7 proofs) the record prunes even though one +/// peer still lacks the bytes. +/// +/// Repair proofs are recorded only for the eventual holders: the +/// never-hinted peer must reduce the audit pool rather than veto the prune +/// at the repair-proof gate. +#[tokio::test] +#[serial] +async fn prune_deletes_at_proof_threshold_and_retains_below_it() { + const HINT_EPOCH: u64 = 7; + const CURRENT_EPOCH: u64 = HINT_EPOCH + 1; + /// Production close-group size (`CLOSE_GROUP_SIZE` in ant-protocol). + const PROD_CLOSE_GROUP_SIZE: usize = 7; + /// Prune proof threshold at production parameters: all but one, 6 of 7. + const PRUNE_PROOFS_NEEDED: usize = 6; + + let harness = TestHarness::setup_small().await.expect("setup"); + harness.warmup_dht().await.expect("warmup"); + + let pruner_idx = 3; + let config = ReplicationConfig { + close_group_size: PROD_CLOSE_GROUP_SIZE, + paid_list_close_group_size: 1, + prune_hysteresis_duration: Duration::ZERO, + ..ReplicationConfig::default() + }; + let sync_state = Arc::new(RwLock::new(NeighborSyncState::new_cycle(vec![]))); + let repair_proofs = Arc::new(RwLock::new(RepairProofs::new())); + + let pruner = harness.test_node(pruner_idx).expect("pruner"); + let pruner_p2p = Arc::clone(pruner.p2p_node.as_ref().expect("pruner p2p")); + let pruner_protocol = pruner.ant_protocol.as_ref().expect("pruner protocol"); + let pruner_storage = pruner_protocol.storage(); + let pruner_paid_list = Arc::clone( + pruner + .replication_engine + .as_ref() + .expect("pruner replication engine") + .paid_list(), + ); + let pruner_peer = *pruner_p2p.peer_id(); + + let (content, address, targets) = + find_remote_prune_candidate(&harness, pruner_idx, PROD_CLOSE_GROUP_SIZE, "quorum-stored") + .await; + pruner_storage + .put(&address, &content) + .await + .expect("put record on pruner"); + + // Replicate below the threshold: only 5 of 7 peers hold the bytes. + // Repair proofs cover only the eventual holders; the remaining + // close-group peer was never hinted and stays outside the audit pool. + store_record_on_peers( + &harness, + &targets[..PRUNE_PROOFS_NEEDED - 1], + &address, + &content, + ) + .await; + record_repair_proofs_for_peers( + &repair_proofs, + &pruner_p2p, + &config, + &targets[..PRUNE_PROOFS_NEEDED], + &address, + HINT_EPOCH, + ) + .await; + + // Below the threshold the local copy is load-bearing: deleting it would + // shrink the proven replica set past the prune safety bar, so every + // pass must retain it. + for pass in 0..3 { + let result = pruning::run_prune_pass_with_context(pruning::PrunePassContext { + self_id: &pruner_peer, + storage: &pruner_storage, + paid_list: &pruner_paid_list, + p2p_node: &pruner_p2p, + config: &config, + sync_state: &sync_state, + repair_proofs: &repair_proofs, + current_sync_epoch: CURRENT_EPOCH, + allow_remote_prune_audits: true, + }) + .await; + assert_eq!( + result.records_pruned, 0, + "pass {pass}: a record below the proof threshold must never prune", + ); + assert!( + pruner_storage.exists(&address).expect("exists"), + "pass {pass}: record should remain on the out-of-range node", + ); + } + + // One more holder reaches the threshold (6 of 7). The prune must now + // proceed even though one close-group peer still lacks the bytes: + // demanding unanimity left prod fleets unable to prune at all. + store_record_on_peer( + &harness, + targets + .get(PRUNE_PROOFS_NEEDED - 1) + .expect("threshold target"), + &address, + &content, + ) + .await; + let at_threshold = pruning::run_prune_pass_with_context(pruning::PrunePassContext { + self_id: &pruner_peer, + storage: &pruner_storage, + paid_list: &pruner_paid_list, + p2p_node: &pruner_p2p, + config: &config, + sync_state: &sync_state, + repair_proofs: &repair_proofs, + current_sync_epoch: CURRENT_EPOCH, + allow_remote_prune_audits: true, + }) + .await; + assert_eq!( + at_threshold.records_pruned, 1, + "a record proven on all but one of the close group must prune", + ); + assert!(!pruner_storage.exists(&address).expect("exists")); + + harness.teardown().await.expect("teardown"); +} + +/// Paid-list entry pruning requires confirmations from the current paid +/// close group (three quarters rounded up, 15 of 20 at production +/// parameters), independent of chunk possession. +/// +/// An out-of-range `PaidForList` entry used to be removed on local state +/// alone once the hysteresis elapsed. It is now retained until enough of +/// the current paid close group confirm they track the key in their own +/// paid lists, so a node never forgets an authorization the group has not +/// already absorbed. Chunk pruning and paid pruning check their own gates +/// only: this test stores no chunk anywhere. +/// +/// Run with a 2-peer paid close group, where the threshold is both peers. +#[tokio::test] +#[serial] +async fn paid_prune_requires_paid_close_group_confirmations() { + const PAID_GROUP: usize = 2; + + let harness = TestHarness::setup_minimal().await.expect("setup"); + harness.warmup_dht().await.expect("warmup"); + + let pruner_idx = 3; + let config = ReplicationConfig { + close_group_size: 2, + quorum_threshold: 1, + paid_list_close_group_size: PAID_GROUP, + prune_hysteresis_duration: Duration::ZERO, + ..ReplicationConfig::default() + }; + let sync_state = Arc::new(RwLock::new(NeighborSyncState::new_cycle(vec![]))); + let repair_proofs = Arc::new(RwLock::new(RepairProofs::new())); + + let pruner = harness.test_node(pruner_idx).expect("pruner"); + let pruner_p2p = Arc::clone(pruner.p2p_node.as_ref().expect("pruner p2p")); + let pruner_protocol = pruner.ant_protocol.as_ref().expect("pruner protocol"); + let pruner_storage = pruner_protocol.storage(); + let pruner_peer = *pruner_p2p.peer_id(); + + // Standalone paid list so the engine's own paid state stays untouched. + let paid_dir = tempfile::tempdir().expect("tempdir"); + let paid_list = Arc::new( + ant_node::replication::paid_list::PaidList::new(paid_dir.path()) + .await + .expect("paid list"), + ); + + let (_content, address, targets) = + find_remote_prune_candidate(&harness, pruner_idx, PAID_GROUP, "paid-prune").await; + paid_list.insert(&address).await.expect("insert paid key"); + + // The paid close group does not track the key yet: the entry must be + // retained even though it is out of range and past hysteresis. + let unconfirmed = pruning::run_prune_pass_with_context(pruning::PrunePassContext { + self_id: &pruner_peer, + storage: &pruner_storage, + paid_list: &paid_list, + p2p_node: &pruner_p2p, + config: &config, + sync_state: &sync_state, + repair_proofs: &repair_proofs, + current_sync_epoch: 1, + allow_remote_prune_audits: true, + }) + .await; + assert_eq!( + unconfirmed.paid_entries_pruned, 0, + "a paid entry without paid close-group confirmations must never prune", + ); + assert!( + paid_list.contains(&address).expect("contains"), + "unconfirmed paid entry should remain tracked", + ); + + // Once the whole paid close group confirms the key in their paid lists, + // the entry prunes. + for peer in &targets { + let idx = node_index_for_peer(&harness, peer).expect("target in harness"); + let engine = harness + .test_node(idx) + .expect("target node") + .replication_engine + .as_ref() + .expect("target engine"); + engine.paid_list().insert(&address).await.expect("insert"); + } + + let confirmed = pruning::run_prune_pass_with_context(pruning::PrunePassContext { + self_id: &pruner_peer, + storage: &pruner_storage, + paid_list: &paid_list, + p2p_node: &pruner_p2p, + config: &config, + sync_state: &sync_state, + repair_proofs: &repair_proofs, + current_sync_epoch: 1, + allow_remote_prune_audits: true, + }) + .await; + assert_eq!( + confirmed.paid_entries_pruned, 1, + "a paid entry confirmed by the paid close group must prune", + ); + assert!( + !paid_list.contains(&address).expect("contains"), + "confirmed paid entry should be removed", + ); + + harness.teardown().await.expect("teardown"); +} + /// Fetch not-found returns `NotFound`. /// /// Request a key that does not exist on the target node and verify From 11976d9d953424ba42c5d07d409213ceeedc4c41 Mon Sep 17 00:00:00 2001 From: Warm Beer Date: Thu, 11 Jun 2026 15:38:52 +0200 Subject: [PATCH 09/18] fix(pruning): keep paid prune cursor internal --- src/replication/mod.rs | 2 - src/replication/paid_list.rs | 80 ++++++++++++++++++++++++++++++++++++ src/replication/pruning.rs | 69 ++----------------------------- src/replication/types.rs | 4 -- 4 files changed, 84 insertions(+), 71 deletions(-) diff --git a/src/replication/mod.rs b/src/replication/mod.rs index a19fb57..81ea8ec 100644 --- a/src/replication/mod.rs +++ b/src/replication/mod.rs @@ -1695,13 +1695,11 @@ async fn run_neighbor_sync_round( let old_bootstrap_claims = std::mem::take(&mut state.bootstrap_claims); let old_bootstrap_claim_history = std::mem::take(&mut state.bootstrap_claim_history); let old_prune_cursor = state.prune_cursor; - let old_paid_prune_cursor = state.paid_prune_cursor; *state = NeighborSyncState::new_cycle(neighbors); state.last_sync_times = old_sync_times; state.bootstrap_claims = old_bootstrap_claims; state.bootstrap_claim_history = old_bootstrap_claim_history; state.prune_cursor = old_prune_cursor; - state.paid_prune_cursor = old_paid_prune_cursor; } } diff --git a/src/replication/paid_list.rs b/src/replication/paid_list.rs index 8676036..e329ff8 100644 --- a/src/replication/paid_list.rs +++ b/src/replication/paid_list.rs @@ -54,6 +54,9 @@ pub struct PaidList { /// In-memory: when each stored record first went out of /// storage-responsibility range. record_out_of_range: RwLock>, + /// Cursor used by paid-list pruning to rotate through expired entries when + /// the per-pass remote confirmation cap is exhausted. + paid_prune_cursor: RwLock, } impl PaidList { @@ -107,6 +110,7 @@ impl PaidList { db, paid_out_of_range: RwLock::new(HashMap::new()), record_out_of_range: RwLock::new(HashMap::new()), + paid_prune_cursor: RwLock::new(0), }; let count = paid_list.count()?; @@ -325,6 +329,36 @@ impl PaidList { self.record_out_of_range.read().get(key).copied() } + /// Starting offset for the next paid-list prune scan. + /// + /// LMDB iteration order is stable, so a bounded prune pass must rotate its + /// verification window or later expired entries can be starved behind + /// earlier unconfirmed entries. + pub(crate) fn paid_prune_scan_start(&self, paid_key_count: usize) -> usize { + if paid_key_count == 0 { + return 0; + } + + *self.paid_prune_cursor.read() % paid_key_count + } + + /// Advance the paid-list prune cursor after one pass. + pub(crate) fn advance_paid_prune_cursor( + &self, + paid_key_count: usize, + scan_start: usize, + last_selected_offset: Option, + ) { + let mut cursor = self.paid_prune_cursor.write(); + if paid_key_count == 0 { + *cursor = 0; + return; + } + + let advance_by = last_selected_offset.map_or(1, |offset| offset.saturating_add(1)); + *cursor = (scan_start + advance_by) % paid_key_count; + } + /// Remove multiple keys in a single write transaction. /// /// Also clears any in-memory out-of-range timestamps for removed keys. @@ -642,6 +676,52 @@ mod tests { assert_eq!(removed, 0); } + #[tokio::test] + async fn paid_prune_cursor_advances_past_selected_window() { + const PAID_KEY_COUNT: usize = 10; + const START_CURSOR: usize = 2; + const LAST_SELECTED_OFFSET: usize = 3; + const EXPECTED_CURSOR: usize = 6; + + let (pl, _temp) = create_test_paid_list().await; + *pl.paid_prune_cursor.write() = START_CURSOR; + + let scan_start = pl.paid_prune_scan_start(PAID_KEY_COUNT); + pl.advance_paid_prune_cursor(PAID_KEY_COUNT, scan_start, Some(LAST_SELECTED_OFFSET)); + + assert_eq!(*pl.paid_prune_cursor.read(), EXPECTED_CURSOR); + } + + #[tokio::test] + async fn paid_prune_cursor_advances_even_without_selected_entry() { + const PAID_KEY_COUNT: usize = 10; + const START_CURSOR: usize = 9; + const EXPECTED_CURSOR: usize = 0; + + let (pl, _temp) = create_test_paid_list().await; + *pl.paid_prune_cursor.write() = START_CURSOR; + + let scan_start = pl.paid_prune_scan_start(PAID_KEY_COUNT); + pl.advance_paid_prune_cursor(PAID_KEY_COUNT, scan_start, None); + + assert_eq!(*pl.paid_prune_cursor.read(), EXPECTED_CURSOR); + } + + #[tokio::test] + async fn paid_prune_cursor_resets_for_empty_paid_list() { + const STALE_CURSOR: usize = 7; + const EMPTY_PAID_KEY_COUNT: usize = 0; + const EXPECTED_CURSOR: usize = 0; + + let (pl, _temp) = create_test_paid_list().await; + *pl.paid_prune_cursor.write() = STALE_CURSOR; + + let scan_start = pl.paid_prune_scan_start(EMPTY_PAID_KEY_COUNT); + pl.advance_paid_prune_cursor(EMPTY_PAID_KEY_COUNT, scan_start, Some(STALE_CURSOR)); + + assert_eq!(*pl.paid_prune_cursor.read(), EXPECTED_CURSOR); + } + // -- Scenario tests ------------------------------------------------------- /// #50: Key goes out of range. `set_record_out_of_range` called. diff --git a/src/replication/pruning.rs b/src/replication/pruning.rs index 19177d9..6280946 100644 --- a/src/replication/pruning.rs +++ b/src/replication/pruning.rs @@ -193,15 +193,8 @@ pub async fn run_prune_pass( pub async fn run_prune_pass_with_context(ctx: PrunePassContext<'_>) -> PruneResult { let (stored_count, record_stats) = prune_stored_records(&ctx).await; let now = Instant::now(); - let (paid_count, paid_stats) = prune_paid_entries( - ctx.self_id, - ctx.paid_list, - ctx.p2p_node, - ctx.config, - ctx.sync_state, - now, - ) - .await; + let (paid_count, paid_stats) = + prune_paid_entries(ctx.self_id, ctx.paid_list, ctx.p2p_node, ctx.config, now).await; let result = PruneResult { records_pruned: record_stats.pruned, @@ -404,7 +397,6 @@ async fn prune_paid_entries( paid_list: &Arc, p2p_node: &Arc, config: &ReplicationConfig, - sync_state: &Arc>, now: Instant, ) -> (usize, PaidPruneStats) { let paid_keys = match paid_list.all_keys() { @@ -421,7 +413,7 @@ async fn prune_paid_entries( let mut verification_deferred = 0usize; // Rotate the scan start so expired entries beyond the per-pass cap are // not starved by the same head-of-list entries every pass. - let scan_start = paid_prune_scan_start(sync_state, paid_keys.len()).await; + let scan_start = paid_list.paid_prune_scan_start(paid_keys.len()); let mut last_selected_offset = None; for offset in 0..paid_keys.len() { @@ -461,13 +453,7 @@ async fn prune_paid_entries( } } - advance_paid_prune_cursor( - sync_state, - paid_keys.len(), - scan_start, - last_selected_offset, - ) - .await; + paid_list.advance_paid_prune_cursor(paid_keys.len(), scan_start, last_selected_offset); if verification_deferred > 0 { debug!( @@ -504,31 +490,6 @@ async fn prune_paid_entries( (paid_keys.len(), stats) } -async fn paid_prune_scan_start( - sync_state: &Arc>, - paid_key_count: usize, -) -> usize { - if paid_key_count == 0 { - return 0; - } - sync_state.read().await.paid_prune_cursor % paid_key_count -} - -async fn advance_paid_prune_cursor( - sync_state: &Arc>, - paid_key_count: usize, - scan_start: usize, - last_selected_offset: Option, -) { - if paid_key_count == 0 { - sync_state.write().await.paid_prune_cursor = 0; - return; - } - - let advance_by = last_selected_offset.map_or(1, |offset| offset.saturating_add(1)); - sync_state.write().await.paid_prune_cursor = (scan_start + advance_by) % paid_key_count; -} - /// Re-check each confirmed candidate against current local state before /// deletion. /// @@ -1447,28 +1408,6 @@ mod tests { assert!(confirmed_by_key.is_empty()); } - #[tokio::test] - async fn paid_prune_cursor_advances_past_selected_window() { - let state = Arc::new(RwLock::new(NeighborSyncState::new_cycle(vec![]))); - state.write().await.paid_prune_cursor = 2; - - let start = paid_prune_scan_start(&state, 10).await; - advance_paid_prune_cursor(&state, 10, start, Some(3)).await; - - assert_eq!(state.read().await.paid_prune_cursor, 6); - } - - #[tokio::test] - async fn paid_prune_cursor_advances_even_when_nothing_selected() { - let state = Arc::new(RwLock::new(NeighborSyncState::new_cycle(vec![]))); - state.write().await.paid_prune_cursor = 9; - - let start = paid_prune_scan_start(&state, 10).await; - advance_paid_prune_cursor(&state, 10, start, None).await; - - assert_eq!(state.read().await.paid_prune_cursor, 0); - } - #[test] fn zero_quorum_never_confirms() { let peer_a = peer_id_from_byte(1); diff --git a/src/replication/types.rs b/src/replication/types.rs index a2f6019..ec74e76 100644 --- a/src/replication/types.rs +++ b/src/replication/types.rs @@ -455,9 +455,6 @@ pub struct NeighborSyncState { /// Cursor used by post-cycle pruning to rotate through stored records when /// the per-pass prune-confirmation budget is exhausted. pub prune_cursor: usize, - /// Cursor used by post-cycle pruning to rotate through `PaidForList` - /// entries when the per-pass paid-verification cap is exhausted. - pub paid_prune_cursor: usize, } impl NeighborSyncState { @@ -471,7 +468,6 @@ impl NeighborSyncState { bootstrap_claims: HashMap::new(), bootstrap_claim_history: HashMap::new(), prune_cursor: 0, - paid_prune_cursor: 0, } } From 03d91e811a1cb698d16ef537ade967890346431c Mon Sep 17 00:00:00 2001 From: Warm Beer Date: Thu, 11 Jun 2026 15:54:02 +0200 Subject: [PATCH 10/18] fix(replication): require aged repair hints before audits --- src/replication/audit.rs | 42 ++++++-- src/replication/config.rs | 7 ++ src/replication/mod.rs | 5 +- src/replication/pruning.rs | 6 +- src/replication/types.rs | 209 +++++++++++++++++++++++++++++++------ tests/e2e/replication.rs | 15 ++- 6 files changed, 238 insertions(+), 46 deletions(-) diff --git a/src/replication/audit.rs b/src/replication/audit.rs index f074b9c..3bbeaff 100644 --- a/src/replication/audit.rs +++ b/src/replication/audit.rs @@ -4,6 +4,7 @@ use std::collections::{HashMap, HashSet}; use std::sync::Arc; +use std::time::Instant; use crate::logging::{debug, info, warn}; use rand::seq::SliceRandom; @@ -167,11 +168,13 @@ pub async fn audit_tick_with_repair_proofs( let peer_keys = { let mut proofs = repair_proofs.write().await; + let now = Instant::now(); mature_audit_keys_for_peer( &challenged_peer, sampled_key_groups, &mut proofs, current_sync_epoch, + now, ) }; @@ -349,12 +352,19 @@ fn mature_audit_keys_for_peer( sampled_key_groups: Vec<(XorName, HashSet)>, repair_proofs: &mut RepairProofs, current_sync_epoch: u64, + now: Instant, ) -> Vec { sampled_key_groups .into_iter() .filter_map(|(key, close_peers)| { repair_proofs - .has_mature_replica_hint(challenged_peer, &key, &close_peers, current_sync_epoch) + .has_mature_replica_hint( + challenged_peer, + &key, + &close_peers, + current_sync_epoch, + now, + ) .then_some(key) }) .collect() @@ -720,6 +730,7 @@ pub async fn handle_audit_challenge( #[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)] mod tests { use super::*; + use crate::replication::config::REPAIR_HINT_MIN_AGE; use crate::replication::protocol::compute_audit_digest; use crate::replication::types::{BootstrapClaimObservation, NeighborSyncState}; use crate::storage::LmdbStorageConfig; @@ -1367,6 +1378,7 @@ mod tests { const MISSING_PROOF_KEY_BYTE: u8 = 0xB3; const STABLE_CHURN_KEY_BYTE: u8 = 0xB4; const EVICTED_KEY_BYTE: u8 = 0xB5; + const FRESH_HINT_KEY_BYTE: u8 = 0xB6; const XOR_NAME_LEN: usize = 32; let challenged_peer = peer_id_from_bytes([CHALLENGED_PEER_BYTE; XOR_NAME_LEN]); @@ -1377,34 +1389,50 @@ mod tests { let missing_proof_key = [MISSING_PROOF_KEY_BYTE; XOR_NAME_LEN]; let stable_churn_key = [STABLE_CHURN_KEY_BYTE; XOR_NAME_LEN]; let evicted_key = [EVICTED_KEY_BYTE; XOR_NAME_LEN]; + let fresh_hint_key = [FRESH_HINT_KEY_BYTE; XOR_NAME_LEN]; let close_group = HashSet::from([challenged_peer, other_peer]); let changed_close_group = HashSet::from([challenged_peer, new_peer]); let evicted_close_group = HashSet::from([other_peer, new_peer]); let mut repair_proofs = RepairProofs::new(); + let mature_hinted_at = Instant::now(); + let now = mature_hinted_at + .checked_add(REPAIR_HINT_MIN_AGE) + .unwrap_or(mature_hinted_at); - assert!(repair_proofs.record_replica_hint_sent( + assert!(repair_proofs.record_replica_hint_sent_at( challenged_peer, mature_key, &close_group, HINT_EPOCH, + mature_hinted_at, )); - assert!(repair_proofs.record_replica_hint_sent( + assert!(repair_proofs.record_replica_hint_sent_at( challenged_peer, same_epoch_key, &close_group, CURRENT_EPOCH, + mature_hinted_at, )); - assert!(repair_proofs.record_replica_hint_sent( + assert!(repair_proofs.record_replica_hint_sent_at( challenged_peer, stable_churn_key, &close_group, HINT_EPOCH, + mature_hinted_at, )); - assert!(repair_proofs.record_replica_hint_sent( + assert!(repair_proofs.record_replica_hint_sent_at( challenged_peer, evicted_key, &close_group, HINT_EPOCH, + mature_hinted_at, + )); + assert!(repair_proofs.record_replica_hint_sent_at( + challenged_peer, + fresh_hint_key, + &close_group, + HINT_EPOCH, + now, )); let sampled_key_groups = vec![ @@ -1413,18 +1441,20 @@ mod tests { (missing_proof_key, close_group.clone()), (stable_churn_key, changed_close_group), (evicted_key, evicted_close_group), + (fresh_hint_key, close_group.clone()), ]; let peer_keys = mature_audit_keys_for_peer( &challenged_peer, sampled_key_groups, &mut repair_proofs, CURRENT_EPOCH, + now, ); assert_eq!( peer_keys, vec![mature_key, stable_churn_key], - "mature proofs for stable close-group peers should become audit keys, while same-epoch, missing, and evicted-peer proofs should not" + "mature proofs for stable close-group peers should become audit keys, while same-epoch, fresh, missing, and evicted-peer proofs should not" ); } diff --git a/src/replication/config.rs b/src/replication/config.rs index 1ca8b3d..3337cf2 100644 --- a/src/replication/config.rs +++ b/src/replication/config.rs @@ -58,6 +58,13 @@ const NEIGHBOR_SYNC_COOLDOWN_SECS: u64 = 60 * 60; // 1 hour /// Per-peer minimum spacing between successive syncs with the same peer. pub const NEIGHBOR_SYNC_COOLDOWN: Duration = Duration::from_secs(NEIGHBOR_SYNC_COOLDOWN_SECS); +/// Minimum age for a replica repair hint before the hinted peer can be audited +/// for that key. +const REPAIR_HINT_MIN_AGE_SECS: u64 = 60 * 60; // 1 hour +/// Minimum age for a replica repair hint before the hinted peer can be audited +/// for that key. +pub const REPAIR_HINT_MIN_AGE: Duration = Duration::from_secs(REPAIR_HINT_MIN_AGE_SECS); + /// Minimum self-lookup cadence. const SELF_LOOKUP_INTERVAL_MIN_SECS: u64 = 5 * 60; /// Maximum self-lookup cadence. diff --git a/src/replication/mod.rs b/src/replication/mod.rs index 0e0995c..050d9db 100644 --- a/src/replication/mod.rs +++ b/src/replication/mod.rs @@ -1675,8 +1675,9 @@ async fn run_neighbor_sync_round( // prune pass and DHT snapshot so other tasks are not starved. let cycle_complete = sync_state.read().await.is_cycle_complete(); if cycle_complete { - // A completed local neighbor-sync cycle matures key-specific repair - // proofs recorded in earlier epochs. + // A completed local neighbor-sync cycle advances the epoch component + // of repair-proof maturity. The per-key wall-clock minimum age is + // checked when audits are selected. { let mut history = sync_history.write().await; for record in history.values_mut() { diff --git a/src/replication/pruning.rs b/src/replication/pruning.rs index 4618ab0..fb3737e 100644 --- a/src/replication/pruning.rs +++ b/src/replication/pruning.rs @@ -70,7 +70,7 @@ pub struct PrunePassContext<'a> { pub sync_state: &'a Arc>, /// Key-specific repair proofs used to gate prune-confirmation audits. pub repair_proofs: &'a Arc>, - /// Current local neighbor-sync cycle epoch. + /// Current local neighbor-sync cycle epoch for repair-proof maturity. pub current_sync_epoch: u64, /// Whether remote prune-confirmation audits are allowed this pass. pub allow_remote_prune_audits: bool, @@ -353,6 +353,7 @@ async fn evaluate_record_prune_key( ¤t_close_peers, ctx.repair_proofs, ctx.current_sync_epoch, + now, ) .await { @@ -452,10 +453,11 @@ async fn target_peers_have_mature_repair_proofs( current_close_peers: &HashSet, repair_proofs: &Arc>, current_sync_epoch: u64, + now: Instant, ) -> bool { let mut proofs = repair_proofs.write().await; target_peers.iter().all(|peer| { - proofs.has_mature_replica_hint(peer, key, current_close_peers, current_sync_epoch) + proofs.has_mature_replica_hint(peer, key, current_close_peers, current_sync_epoch, now) }) } diff --git a/src/replication/types.rs b/src/replication/types.rs index ec74e76..0b1838e 100644 --- a/src/replication/types.rs +++ b/src/replication/types.rs @@ -11,6 +11,7 @@ use std::time::{Duration, Instant}; use serde::{Deserialize, Serialize}; use crate::ant_protocol::XorName; +use crate::replication::config::REPAIR_HINT_MIN_AGE; use saorsa_core::identity::PeerId; // --------------------------------------------------------------------------- @@ -273,6 +274,8 @@ impl PeerSyncRecord { struct RepairProof { /// Local neighbor-sync cycle epoch when the hint was sent. hinted_at_epoch: u64, + /// Monotonic local time when the hint was sent. + hinted_at: Instant, } /// Repair proofs for one key, tied to the close-group snapshot they were @@ -325,6 +328,41 @@ impl RepairProofs { key: XorName, current_close_peers: &HashSet, hinted_at_epoch: u64, + ) -> bool { + self.insert_replica_hint_sent( + peer, + key, + current_close_peers, + hinted_at_epoch, + Instant::now(), + ) + } + + /// Record that `peer` was sent a replica repair hint at a caller-provided + /// time. + /// + /// This is exposed only for deterministic tests and test harnesses. Normal + /// production callers use [`Self::record_replica_hint_sent`] so the proof + /// timestamp is captured internally at send-recording time. + #[cfg(any(test, feature = "test-utils"))] + pub fn record_replica_hint_sent_at( + &mut self, + peer: PeerId, + key: XorName, + current_close_peers: &HashSet, + hinted_at_epoch: u64, + hinted_at: Instant, + ) -> bool { + self.insert_replica_hint_sent(peer, key, current_close_peers, hinted_at_epoch, hinted_at) + } + + fn insert_replica_hint_sent( + &mut self, + peer: PeerId, + key: XorName, + current_close_peers: &HashSet, + hinted_at_epoch: u64, + hinted_at: Instant, ) -> bool { self.reconcile_key_close_group(&key, current_close_peers); @@ -341,9 +379,13 @@ impl RepairProofs { return false; } - entry - .peer_proofs - .insert(peer, RepairProof { hinted_at_epoch }); + entry.peer_proofs.insert( + peer, + RepairProof { + hinted_at_epoch, + hinted_at, + }, + ); true } @@ -351,20 +393,25 @@ impl RepairProofs { /// /// The check invalidates proofs for peers that have left the current /// self-inclusive close group. A proof is mature only after at least one - /// later local sync-cycle epoch. + /// later local sync-cycle epoch and the repair hint is at least + /// [`REPAIR_HINT_MIN_AGE`] old. pub fn has_mature_replica_hint( &mut self, peer: &PeerId, key: &XorName, current_close_peers: &HashSet, current_epoch: u64, + now: Instant, ) -> bool { self.reconcile_key_close_group(key, current_close_peers); self.proofs_by_key .get(key) .and_then(|entry| entry.peer_proofs.get(peer)) - .is_some_and(|proof| proof.hinted_at_epoch < current_epoch) + .is_some_and(|proof| { + proof.hinted_at_epoch < current_epoch + && now.saturating_duration_since(proof.hinted_at) >= REPAIR_HINT_MIN_AGE + }) } /// Remove all repair proofs for a key, e.g. after local deletion. @@ -594,6 +641,14 @@ mod tests { PeerId::from_bytes(bytes) } + fn mature_hint_times() -> (Instant, Instant) { + let hinted_at = Instant::now(); + let now = hinted_at + .checked_add(REPAIR_HINT_MIN_AGE) + .unwrap_or(hinted_at); + (hinted_at, now) + } + // -- FetchCandidate ordering ------------------------------------------- #[test] @@ -748,12 +803,13 @@ mod tests { let peer = peer_id_from_byte(1); let close_peers = HashSet::from([peer, peer_id_from_byte(2), peer_id_from_byte(3)]); let mut proofs = RepairProofs::new(); + let (hinted_at, now) = mature_hint_times(); - assert!(proofs.record_replica_hint_sent(peer, key, &close_peers, HINT_EPOCH)); + assert!(proofs.record_replica_hint_sent_at(peer, key, &close_peers, HINT_EPOCH, hinted_at,)); assert!( - proofs.has_mature_replica_hint(&peer, &key, &close_peers, CURRENT_EPOCH), - "sent hint should make key auditable for that peer" + proofs.has_mature_replica_hint(&peer, &key, &close_peers, CURRENT_EPOCH, now), + "old sent hint should make key auditable for that peer" ); } @@ -766,11 +822,18 @@ mod tests { let peer = peer_id_from_byte(1); let close_peers = HashSet::from([peer_id_from_byte(2), peer_id_from_byte(3)]); let mut proofs = RepairProofs::new(); + let (hinted_at, now) = mature_hint_times(); - assert!(!proofs.record_replica_hint_sent(peer, key, &close_peers, HINT_EPOCH)); + assert!(!proofs.record_replica_hint_sent_at( + peer, + key, + &close_peers, + HINT_EPOCH, + hinted_at, + )); assert!( - !proofs.has_mature_replica_hint(&peer, &key, &close_peers, CURRENT_EPOCH), + !proofs.has_mature_replica_hint(&peer, &key, &close_peers, CURRENT_EPOCH, now), "peers outside current close group must not get repair proof" ); } @@ -784,16 +847,48 @@ mod tests { let peer = peer_id_from_byte(1); let close_peers = HashSet::from([peer, peer_id_from_byte(2), peer_id_from_byte(3)]); let mut proofs = RepairProofs::new(); + let (hinted_at, now) = mature_hint_times(); - assert!(proofs.record_replica_hint_sent(peer, key, &close_peers, HINT_EPOCH)); + assert!(proofs.record_replica_hint_sent_at(peer, key, &close_peers, HINT_EPOCH, hinted_at,)); assert!( - !proofs.has_mature_replica_hint(&peer, &key, &close_peers, HINT_EPOCH), + !proofs.has_mature_replica_hint(&peer, &key, &close_peers, HINT_EPOCH, now), "same-cycle proof should not be audit-eligible" ); assert!( - proofs.has_mature_replica_hint(&peer, &key, &close_peers, CURRENT_EPOCH), - "proof should mature after a later local sync-cycle epoch" + proofs.has_mature_replica_hint(&peer, &key, &close_peers, CURRENT_EPOCH, now), + "old proof should mature after a later local sync-cycle epoch" + ); + } + + #[test] + fn repair_proofs_require_min_hint_age() { + const HINT_EPOCH: u64 = 7; + const CURRENT_EPOCH: u64 = HINT_EPOCH + 1; + + let key = [0xA8; 32]; + let peer = peer_id_from_byte(1); + let close_peers = HashSet::from([peer, peer_id_from_byte(2), peer_id_from_byte(3)]); + let mut proofs = RepairProofs::new(); + let hinted_at = Instant::now(); + + assert!(proofs.record_replica_hint_sent_at(peer, key, &close_peers, HINT_EPOCH, hinted_at)); + + assert!( + !proofs.has_mature_replica_hint(&peer, &key, &close_peers, CURRENT_EPOCH, hinted_at), + "fresh repair hints should not be audit-eligible" + ); + assert!( + proofs.has_mature_replica_hint( + &peer, + &key, + &close_peers, + CURRENT_EPOCH, + hinted_at + .checked_add(REPAIR_HINT_MIN_AGE) + .unwrap_or(hinted_at), + ), + "repair hints should mature once they are at least the minimum age" ); } @@ -806,14 +901,15 @@ mod tests { let peer = peer_id_from_byte(1); let close_peers = HashSet::from([peer, peer_id_from_byte(2), peer_id_from_byte(3)]); let mut proofs = RepairProofs::new(); + let (hinted_at, now) = mature_hint_times(); - assert!(proofs.record_replica_hint_sent(peer, key, &close_peers, HINT_EPOCH)); + assert!(proofs.record_replica_hint_sent_at(peer, key, &close_peers, HINT_EPOCH, hinted_at,)); assert!( - !proofs.record_replica_hint_sent(peer, key, &close_peers, REPEATED_HINT_EPOCH), + !proofs.record_replica_hint_sent_at(peer, key, &close_peers, REPEATED_HINT_EPOCH, now), "duplicate hint in the same close group should keep existing proof" ); assert!( - proofs.has_mature_replica_hint(&peer, &key, &close_peers, REPEATED_HINT_EPOCH), + proofs.has_mature_replica_hint(&peer, &key, &close_peers, REPEATED_HINT_EPOCH, now), "duplicate hint must not reset an already mature proof" ); } @@ -831,20 +927,39 @@ mod tests { let old_group = HashSet::from([stable_peer, departing_peer, retained_peer]); let changed_group = HashSet::from([stable_peer, retained_peer, new_peer]); let mut proofs = RepairProofs::new(); + let (hinted_at, now) = mature_hint_times(); - assert!(proofs.record_replica_hint_sent(stable_peer, key, &old_group, HINT_EPOCH)); - assert!(proofs.record_replica_hint_sent(departing_peer, key, &old_group, HINT_EPOCH)); + assert!(proofs.record_replica_hint_sent_at( + stable_peer, + key, + &old_group, + HINT_EPOCH, + hinted_at, + )); + assert!(proofs.record_replica_hint_sent_at( + departing_peer, + key, + &old_group, + HINT_EPOCH, + hinted_at, + )); assert!( - proofs.has_mature_replica_hint(&stable_peer, &key, &changed_group, CURRENT_EPOCH), + proofs.has_mature_replica_hint(&stable_peer, &key, &changed_group, CURRENT_EPOCH, now), "stable peers should keep mature repair proofs across unrelated close-group churn" ); assert!( - !proofs.has_mature_replica_hint(&departing_peer, &key, &changed_group, CURRENT_EPOCH), + !proofs.has_mature_replica_hint( + &departing_peer, + &key, + &changed_group, + CURRENT_EPOCH, + now, + ), "peers that left the close group should lose repair proofs" ); assert!( - !proofs.has_mature_replica_hint(&new_peer, &key, &changed_group, CURRENT_EPOCH), + !proofs.has_mature_replica_hint(&new_peer, &key, &changed_group, CURRENT_EPOCH, now), "new close-group peers need their own repair hint before auditing" ); } @@ -861,26 +976,40 @@ mod tests { let old_group = HashSet::from([returning_peer, peer_id_from_byte(2), peer_id_from_byte(3)]); let changed_group = HashSet::from([new_peer, peer_id_from_byte(2), peer_id_from_byte(3)]); let mut proofs = RepairProofs::new(); + let (hinted_at, now) = mature_hint_times(); - assert!(proofs.record_replica_hint_sent(returning_peer, key, &old_group, FIRST_HINT_EPOCH,)); + assert!(proofs.record_replica_hint_sent_at( + returning_peer, + key, + &old_group, + FIRST_HINT_EPOCH, + hinted_at, + )); assert!( - !proofs.has_mature_replica_hint(&new_peer, &key, &changed_group, SECOND_HINT_EPOCH), + !proofs.has_mature_replica_hint( + &new_peer, + &key, + &changed_group, + SECOND_HINT_EPOCH, + now + ), "new close-group peer should not inherit another peer's repair proof" ); assert!( - !proofs.has_mature_replica_hint(&returning_peer, &key, &old_group, CURRENT_EPOCH), + !proofs.has_mature_replica_hint(&returning_peer, &key, &old_group, CURRENT_EPOCH, now), "a peer that re-enters must receive a fresh repair hint" ); - assert!(proofs.record_replica_hint_sent( + assert!(proofs.record_replica_hint_sent_at( returning_peer, key, &old_group, SECOND_HINT_EPOCH, + hinted_at, )); assert!( - proofs.has_mature_replica_hint(&returning_peer, &key, &old_group, CURRENT_EPOCH), + proofs.has_mature_replica_hint(&returning_peer, &key, &old_group, CURRENT_EPOCH, now), "fresh repair hint after re-entry should be eligible once mature" ); } @@ -895,18 +1024,31 @@ mod tests { let peer = peer_id_from_byte(1); let close_peers = HashSet::from([peer, peer_id_from_byte(2), peer_id_from_byte(3)]); let mut proofs = RepairProofs::new(); + let (hinted_at, now) = mature_hint_times(); - assert!(proofs.record_replica_hint_sent(peer, key, &close_peers, FIRST_HINT_EPOCH)); + assert!(proofs.record_replica_hint_sent_at( + peer, + key, + &close_peers, + FIRST_HINT_EPOCH, + hinted_at, + )); proofs.remove_peer(&peer); assert!( - !proofs.has_mature_replica_hint(&peer, &key, &close_peers, CURRENT_EPOCH), + !proofs.has_mature_replica_hint(&peer, &key, &close_peers, CURRENT_EPOCH, now), "routing-table removal should clear proof even if peer re-enters same close group" ); - assert!(proofs.record_replica_hint_sent(peer, key, &close_peers, SECOND_HINT_EPOCH)); + assert!(proofs.record_replica_hint_sent_at( + peer, + key, + &close_peers, + SECOND_HINT_EPOCH, + hinted_at, + )); assert!( - proofs.has_mature_replica_hint(&peer, &key, &close_peers, CURRENT_EPOCH), + proofs.has_mature_replica_hint(&peer, &key, &close_peers, CURRENT_EPOCH, now), "fresh hint after re-entry should become eligible after a later epoch" ); } @@ -920,12 +1062,13 @@ mod tests { let peer = peer_id_from_byte(1); let close_peers = HashSet::from([peer]); let mut proofs = RepairProofs::new(); + let (hinted_at, now) = mature_hint_times(); - assert!(proofs.record_replica_hint_sent(peer, key, &close_peers, HINT_EPOCH)); + assert!(proofs.record_replica_hint_sent_at(peer, key, &close_peers, HINT_EPOCH, hinted_at,)); proofs.remove_key(&key); assert!( - !proofs.has_mature_replica_hint(&peer, &key, &close_peers, CURRENT_EPOCH), + !proofs.has_mature_replica_hint(&peer, &key, &close_peers, CURRENT_EPOCH, now), "deleted local key should not retain repair proof entries" ); } diff --git a/tests/e2e/replication.rs b/tests/e2e/replication.rs index 83fc792..27ff559 100644 --- a/tests/e2e/replication.rs +++ b/tests/e2e/replication.rs @@ -7,7 +7,7 @@ use super::TestHarness; use ant_node::client::compute_address; -use ant_node::replication::config::REPLICATION_PROTOCOL_ID; +use ant_node::replication::config::{REPAIR_HINT_MIN_AGE, REPLICATION_PROTOCOL_ID}; use ant_node::replication::protocol::{ compute_audit_digest, AuditChallenge, AuditResponse, FetchRequest, FetchResponse, FreshReplicationOffer, FreshReplicationResponse, NeighborSyncRequest, ReplicationMessage, @@ -22,7 +22,7 @@ use saorsa_core::{P2PNode, TrustEvent}; use serial_test::serial; use std::collections::HashSet; use std::sync::Arc; -use std::time::Duration; +use std::time::{Duration, Instant}; use tokio::sync::RwLock; /// Maximum time to wait for replication propagation in tests. @@ -148,9 +148,18 @@ async fn record_repair_proofs_for_peers( .map(|node| node.peer_id) .collect(); let mut proofs = repair_proofs.write().await; + let hinted_at = Instant::now() + .checked_sub(REPAIR_HINT_MIN_AGE) + .unwrap_or_else(Instant::now); for peer in peers { assert!( - proofs.record_replica_hint_sent(*peer, *key, &close_peers, hinted_at_epoch), + proofs.record_replica_hint_sent_at( + *peer, + *key, + &close_peers, + hinted_at_epoch, + hinted_at + ), "test target should be in close group for repair-proof recording" ); } From bdbfdfc2ae88ccb6607012032363e6814ae73e1b Mon Sep 17 00:00:00 2001 From: Warm Beer Date: Thu, 11 Jun 2026 17:12:30 +0200 Subject: [PATCH 11/18] fix(replication): make repair proof pruning test deterministic --- src/replication/mod.rs | 2 ++ src/replication/pruning.rs | 11 ++++++++++- tests/e2e/replication.rs | 18 ++++++++++++------ 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/src/replication/mod.rs b/src/replication/mod.rs index 050d9db..0a20c7d 100644 --- a/src/replication/mod.rs +++ b/src/replication/mod.rs @@ -1703,6 +1703,8 @@ async fn run_neighbor_sync_round( sync_state, repair_proofs, current_sync_epoch, + #[cfg(any(test, feature = "test-utils"))] + repair_proof_now: None, allow_remote_prune_audits, }) .await; diff --git a/src/replication/pruning.rs b/src/replication/pruning.rs index fb3737e..536ca25 100644 --- a/src/replication/pruning.rs +++ b/src/replication/pruning.rs @@ -72,6 +72,9 @@ pub struct PrunePassContext<'a> { pub repair_proofs: &'a Arc>, /// Current local neighbor-sync cycle epoch for repair-proof maturity. pub current_sync_epoch: u64, + /// Test-only clock override for repair-proof maturity checks. + #[cfg(any(test, feature = "test-utils"))] + pub repair_proof_now: Option, /// Whether remote prune-confirmation audits are allowed this pass. pub allow_remote_prune_audits: bool, } @@ -173,6 +176,8 @@ pub async fn run_prune_pass( sync_state, repair_proofs: &repair_proofs, current_sync_epoch: 0, + #[cfg(any(test, feature = "test-utils"))] + repair_proof_now: None, allow_remote_prune_audits, }) .await @@ -347,13 +352,17 @@ async fn evaluate_record_prune_key( } let current_close_peers: HashSet = closest.iter().map(|node| node.peer_id).collect(); + #[cfg(any(test, feature = "test-utils"))] + let repair_proof_now = ctx.repair_proof_now.unwrap_or(now); + #[cfg(not(any(test, feature = "test-utils")))] + let repair_proof_now = now; if !target_peers_have_mature_repair_proofs( key, &target_peers, ¤t_close_peers, ctx.repair_proofs, ctx.current_sync_epoch, - now, + repair_proof_now, ) .await { diff --git a/tests/e2e/replication.rs b/tests/e2e/replication.rs index 27ff559..6302d3f 100644 --- a/tests/e2e/replication.rs +++ b/tests/e2e/replication.rs @@ -139,7 +139,7 @@ async fn record_repair_proofs_for_peers( peers: &[PeerId], key: &[u8; 32], hinted_at_epoch: u64, -) { +) -> Instant { let close_peers: HashSet = p2p_node .dht_manager() .find_closest_nodes_local_with_self(key, config.close_group_size) @@ -148,9 +148,10 @@ async fn record_repair_proofs_for_peers( .map(|node| node.peer_id) .collect(); let mut proofs = repair_proofs.write().await; - let hinted_at = Instant::now() - .checked_sub(REPAIR_HINT_MIN_AGE) - .unwrap_or_else(Instant::now); + let hinted_at = Instant::now(); + let repair_proof_now = hinted_at + .checked_add(REPAIR_HINT_MIN_AGE) + .unwrap_or(hinted_at); for peer in peers { assert!( proofs.record_replica_hint_sent_at( @@ -164,6 +165,7 @@ async fn record_repair_proofs_for_peers( ); } drop(proofs); + repair_proof_now } /// Fresh write happy path (Section 18 #1). @@ -517,7 +519,7 @@ async fn test_prune_pass_requires_remote_confirmation_before_delete() { .await .expect("put gate record on pruner"); store_record_on_peers(&harness, &gate_targets, &gate_address, &gate_content).await; - record_repair_proofs_for_peers( + let gate_repair_proof_now = record_repair_proofs_for_peers( &repair_proofs, &pruner_p2p, &config, @@ -536,6 +538,7 @@ async fn test_prune_pass_requires_remote_confirmation_before_delete() { sync_state: &sync_state, repair_proofs: &repair_proofs, current_sync_epoch: CURRENT_EPOCH, + repair_proof_now: Some(gate_repair_proof_now), allow_remote_prune_audits: false, }) .await; @@ -554,6 +557,7 @@ async fn test_prune_pass_requires_remote_confirmation_before_delete() { sync_state: &sync_state, repair_proofs: &repair_proofs, current_sync_epoch: CURRENT_EPOCH, + repair_proof_now: Some(gate_repair_proof_now), allow_remote_prune_audits: true, }) .await; @@ -578,7 +582,7 @@ async fn test_prune_pass_requires_remote_confirmation_before_delete() { &missing_content, ) .await; - record_repair_proofs_for_peers( + let missing_repair_proof_now = record_repair_proofs_for_peers( &repair_proofs, &pruner_p2p, &config, @@ -597,6 +601,7 @@ async fn test_prune_pass_requires_remote_confirmation_before_delete() { sync_state: &sync_state, repair_proofs: &repair_proofs, current_sync_epoch: CURRENT_EPOCH, + repair_proof_now: Some(missing_repair_proof_now), allow_remote_prune_audits: true, }) .await; @@ -623,6 +628,7 @@ async fn test_prune_pass_requires_remote_confirmation_before_delete() { sync_state: &sync_state, repair_proofs: &repair_proofs, current_sync_epoch: CURRENT_EPOCH, + repair_proof_now: Some(missing_repair_proof_now), allow_remote_prune_audits: true, }) .await; From 1570733d9887c728b013e850f314a87269e86109 Mon Sep 17 00:00:00 2001 From: Warm Beer Date: Thu, 11 Jun 2026 17:19:06 +0200 Subject: [PATCH 12/18] fix(pruning): bound paid prune verification --- src/replication/pruning.rs | 237 ++++++++++++++++++++++++++++++++----- 1 file changed, 205 insertions(+), 32 deletions(-) diff --git a/src/replication/pruning.rs b/src/replication/pruning.rs index 6280946..ff97f1f 100644 --- a/src/replication/pruning.rs +++ b/src/replication/pruning.rs @@ -37,10 +37,12 @@ use super::REPLICATION_TRUST_WEIGHT; const MAX_CONCURRENT_PRUNE_AUDIT_CHALLENGES: usize = 32; -/// Maximum expired `PaidForList` entries verified against the paid close -/// group per prune pass. Bounds the per-pass verification fan-out the same -/// way `MAX_PRUNE_AUDIT_CHALLENGES_PER_PASS` bounds record audits. +/// Maximum expired `PaidForList` entries selected for verification per prune +/// pass. The unique peer fan-out for those entries is capped separately. const MAX_PAID_PRUNE_VERIFICATIONS_PER_PASS: usize = 32; +/// Maximum unique peers contacted for paid-list verification per prune pass. +/// `quorum::run_verification_round` sends one request per target peer. +const MAX_PAID_PRUNE_VERIFICATION_PEERS_PER_PASS: usize = MAX_CONCURRENT_PRUNE_AUDIT_CHALLENGES; // --------------------------------------------------------------------------- // Result type @@ -106,6 +108,41 @@ struct PaidPruneStats { pruned: usize, } +#[derive(Debug, Default)] +struct PaidPruneDeferredCounts { + entry_budget: usize, + remote_gate: usize, + peer_budget: usize, +} + +impl PaidPruneDeferredCounts { + fn log(&self) { + if self.entry_budget > 0 { + debug!( + "Deferred {} expired PaidForList entries beyond the per-pass verification cap \ + ({MAX_PAID_PRUNE_VERIFICATIONS_PER_PASS})", + self.entry_budget, + ); + } + + if self.remote_gate > 0 { + debug!( + "Deferred {} expired PaidForList entries until bootstrap drain allows remote \ + paid-prune verification", + self.remote_gate, + ); + } + + if self.peer_budget > 0 { + debug!( + "Deferred {} expired PaidForList entries beyond the per-pass paid-prune peer cap \ + ({MAX_PAID_PRUNE_VERIFICATION_PEERS_PER_PASS})", + self.peer_budget, + ); + } + } +} + #[derive(Debug, Clone)] struct RecordPruneCandidate { key: XorName, @@ -134,6 +171,14 @@ enum RecordPruneKeyState { Candidate(RecordPruneCandidate), } +enum PaidPruneKeyState { + None, + RemoteDeferred, + EntryBudgetDeferred, + PeerBudgetDeferred, + Candidate(Vec), +} + #[derive(Default)] struct PruneAuditReportState { audit_failures: RwLock>, @@ -193,8 +238,15 @@ pub async fn run_prune_pass( pub async fn run_prune_pass_with_context(ctx: PrunePassContext<'_>) -> PruneResult { let (stored_count, record_stats) = prune_stored_records(&ctx).await; let now = Instant::now(); - let (paid_count, paid_stats) = - prune_paid_entries(ctx.self_id, ctx.paid_list, ctx.p2p_node, ctx.config, now).await; + let (paid_count, paid_stats) = prune_paid_entries( + ctx.self_id, + ctx.paid_list, + ctx.p2p_node, + ctx.config, + now, + ctx.allow_remote_prune_audits, + ) + .await; let result = PruneResult { records_pruned: record_stats.pruned, @@ -398,6 +450,7 @@ async fn prune_paid_entries( p2p_node: &Arc, config: &ReplicationConfig, now: Instant, + allow_remote_prune_audits: bool, ) -> (usize, PaidPruneStats) { let paid_keys = match paid_list.all_keys() { Ok(keys) => keys, @@ -410,7 +463,8 @@ async fn prune_paid_entries( let dht = p2p_node.dht_manager(); let mut stats = PaidPruneStats::default(); let mut expired_candidates: Vec<(XorName, Vec)> = Vec::new(); - let mut verification_deferred = 0usize; + let mut deferred_counts = PaidPruneDeferredCounts::default(); + let mut selected_verification_peers = HashSet::new(); // Rotate the scan start so expired entries beyond the per-pass cap are // not starved by the same head-of-list entries every pass. let scan_start = paid_list.paid_prune_scan_start(paid_keys.len()); @@ -441,12 +495,31 @@ async fn prune_paid_entries( .checked_duration_since(first_seen) .unwrap_or(Duration::ZERO); if elapsed >= config.prune_hysteresis_duration { - if expired_candidates.len() < MAX_PAID_PRUNE_VERIFICATIONS_PER_PASS { - let target_peers = remote_close_group_peers(&closest, self_id); - expired_candidates.push((*key, target_peers)); - last_selected_offset = Some(offset); - } else { - verification_deferred = verification_deferred.saturating_add(1); + match select_paid_prune_candidate( + key, + &closest, + self_id, + allow_remote_prune_audits, + expired_candidates.len(), + &mut selected_verification_peers, + ) { + PaidPruneKeyState::None => {} + PaidPruneKeyState::RemoteDeferred => { + deferred_counts.remote_gate = + deferred_counts.remote_gate.saturating_add(1); + } + PaidPruneKeyState::EntryBudgetDeferred => { + deferred_counts.entry_budget = + deferred_counts.entry_budget.saturating_add(1); + } + PaidPruneKeyState::PeerBudgetDeferred => { + deferred_counts.peer_budget = + deferred_counts.peer_budget.saturating_add(1); + } + PaidPruneKeyState::Candidate(target_peers) => { + expired_candidates.push((*key, target_peers)); + last_selected_offset = Some(offset); + } } } } @@ -454,13 +527,7 @@ async fn prune_paid_entries( } paid_list.advance_paid_prune_cursor(paid_keys.len(), scan_start, last_selected_offset); - - if verification_deferred > 0 { - debug!( - "Deferred {verification_deferred} expired PaidForList entries beyond the \ - per-pass verification cap ({MAX_PAID_PRUNE_VERIFICATIONS_PER_PASS})" - ); - } + deferred_counts.log(); let confirmed_by_key = collect_paid_prune_confirmations(&expired_candidates, p2p_node, config).await; @@ -474,20 +541,58 @@ async fn prune_paid_entries( ) .await; stats.cleared += revalidated_cleared; + stats.pruned = delete_paid_entries(&paid_keys_to_delete, paid_list).await; - if !paid_keys_to_delete.is_empty() { - match paid_list.remove_batch(&paid_keys_to_delete).await { - Ok(count) => { - stats.pruned = count; - debug!("Pruned {count} out-of-range PaidForList entries"); - } - Err(e) => { - warn!("Failed to prune PaidForList entries: {e}"); - } - } + (paid_keys.len(), stats) +} + +fn select_paid_prune_candidate( + key: &XorName, + closest: &[DHTNode], + self_id: &PeerId, + allow_remote_prune_audits: bool, + selected_candidate_count: usize, + selected_verification_peers: &mut HashSet, +) -> PaidPruneKeyState { + if !allow_remote_prune_audits { + return PaidPruneKeyState::RemoteDeferred; } - (paid_keys.len(), stats) + let target_peers = remote_close_group_peers(closest, self_id); + if target_peers.is_empty() { + warn!( + "Cannot prune paid entry {}: current paid close group has no remote peers", + hex::encode(key) + ); + return PaidPruneKeyState::None; + } + + if selected_candidate_count >= MAX_PAID_PRUNE_VERIFICATIONS_PER_PASS { + return PaidPruneKeyState::EntryBudgetDeferred; + } + + if !reserve_paid_prune_peer_budget(&target_peers, selected_verification_peers) { + return PaidPruneKeyState::PeerBudgetDeferred; + } + + PaidPruneKeyState::Candidate(target_peers) +} + +async fn delete_paid_entries(keys_to_delete: &[XorName], paid_list: &Arc) -> usize { + if keys_to_delete.is_empty() { + return 0; + } + + match paid_list.remove_batch(keys_to_delete).await { + Ok(count) => { + debug!("Pruned {count} out-of-range PaidForList entries"); + count + } + Err(e) => { + warn!("Failed to prune PaidForList entries: {e}"); + 0 + } + } } /// Re-check each confirmed candidate against current local state before @@ -583,6 +688,26 @@ fn paid_prune_confirmations_needed(group_size: usize) -> usize { (3 * group_size).div_ceil(4) } +fn reserve_paid_prune_peer_budget( + target_peers: &[PeerId], + selected_verification_peers: &mut HashSet, +) -> bool { + let new_peer_count = target_peers + .iter() + .filter(|peer| !selected_verification_peers.contains(peer)) + .count(); + if selected_verification_peers + .len() + .saturating_add(new_peer_count) + > MAX_PAID_PRUNE_VERIFICATION_PEERS_PER_PASS + { + return false; + } + + selected_verification_peers.extend(target_peers.iter().copied()); + true +} + /// Ask the current paid close group whether they track each expired key in /// their `PaidForList`, and return the confirming peers per key. /// @@ -646,8 +771,8 @@ fn paid_confirmations_by_key( let confirmed: HashSet = key_evidence .paid_list .iter() - .filter(|(peer, status)| { - **status == PaidListEvidence::Confirmed && target_peers.contains(peer) + .filter(|&(peer, status)| { + *status == PaidListEvidence::Confirmed && target_peers.contains(peer) }) .map(|(peer, _)| *peer) .collect(); @@ -1278,6 +1403,12 @@ mod tests { [b; 32] } + fn peer_ids(count: usize) -> Vec { + (0..count) + .map(|idx| peer_id_from_byte(u8::try_from(idx + 1).expect("peer byte"))) + .collect() + } + fn candidate(key: XorName, target_peers: Vec) -> RecordPruneCandidate { RecordPruneCandidate { key, target_peers } } @@ -1364,6 +1495,48 @@ mod tests { assert_eq!(paid_prune_confirmations_needed(20), 15); } + #[test] + fn paid_prune_peer_budget_allows_overlapping_targets() { + let peers = peer_ids(MAX_PAID_PRUNE_VERIFICATION_PEERS_PER_PASS); + let mut selected_peers = HashSet::new(); + + assert!(reserve_paid_prune_peer_budget(&peers, &mut selected_peers)); + assert_eq!( + selected_peers.len(), + MAX_PAID_PRUNE_VERIFICATION_PEERS_PER_PASS, + ); + + let overlapping_targets = vec![peers[0], peers[1]]; + assert!(reserve_paid_prune_peer_budget( + &overlapping_targets, + &mut selected_peers, + )); + assert_eq!( + selected_peers.len(), + MAX_PAID_PRUNE_VERIFICATION_PEERS_PER_PASS, + ); + } + + #[test] + fn paid_prune_peer_budget_rejects_new_peers_past_cap() { + let peers = peer_ids(MAX_PAID_PRUNE_VERIFICATION_PEERS_PER_PASS + 1); + let mut selected_peers = HashSet::new(); + + assert!(reserve_paid_prune_peer_budget( + &peers[..MAX_PAID_PRUNE_VERIFICATION_PEERS_PER_PASS], + &mut selected_peers, + )); + assert!(!reserve_paid_prune_peer_budget( + &peers[MAX_PAID_PRUNE_VERIFICATION_PEERS_PER_PASS..], + &mut selected_peers, + )); + assert_eq!( + selected_peers.len(), + MAX_PAID_PRUNE_VERIFICATION_PEERS_PER_PASS, + ); + assert!(!selected_peers.contains(&peers[MAX_PAID_PRUNE_VERIFICATION_PEERS_PER_PASS])); + } + #[test] fn paid_confirmations_count_only_confirmed_target_peers() { let confirmed_peer = peer_id_from_byte(1); From 479aceda4c7f0e29f6252d15b9f8e6ec9b35eb51 Mon Sep 17 00:00:00 2001 From: Warm Beer Date: Thu, 11 Jun 2026 17:59:29 +0200 Subject: [PATCH 13/18] chore(pruning): clean up paid prune review nits --- src/replication/pruning.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/replication/pruning.rs b/src/replication/pruning.rs index ff97f1f..d72a750 100644 --- a/src/replication/pruning.rs +++ b/src/replication/pruning.rs @@ -471,9 +471,7 @@ async fn prune_paid_entries( let mut last_selected_offset = None; for offset in 0..paid_keys.len() { - let Some(key) = paid_keys.get((scan_start + offset) % paid_keys.len()) else { - continue; - }; + let key = &paid_keys[(scan_start + offset) % paid_keys.len()]; let closest: Vec = dht .find_closest_nodes_local_with_self(key, config.paid_list_close_group_size) .await; @@ -1017,7 +1015,7 @@ fn prune_proofs_needed(group_size: usize) -> usize { } } -/// Whether enough target peers proved possession to allow deletion. +/// Whether enough target peers supplied positive evidence to allow deletion. /// /// `proofs_needed == 0` means confirmation is impossible (no targets), not /// trivially met. From 692a4a04bfdf49a660c01ca011e2023098a283a3 Mon Sep 17 00:00:00 2001 From: Warm Beer Date: Thu, 11 Jun 2026 17:18:07 +0200 Subject: [PATCH 14/18] feat(payment): accept flexible single-node proof bundles Verify paid single-node quotes from 1..=CLOSE_GROUP_SIZE supplied quotes while preserving the existing 0x01 proof format and 3x payment requirement. Run direct client PUTs and fresh chunk replication through the full ClientPut check set, including local close-group receiver membership. Run fresh paid-list admission through the same live payment checks with K-wide receiver membership, and keep verified-payment cache entries scoped by verification strength. --- src/payment/cache.rs | 125 ++- src/payment/quote.rs | 35 +- src/payment/verifier.rs | 2083 ++++++++++++++++++++--------------- src/replication/mod.rs | 86 +- src/storage/handler.rs | 22 +- tests/e2e/merkle_payment.rs | 44 +- tests/e2e/replication.rs | 58 + 7 files changed, 1514 insertions(+), 939 deletions(-) diff --git a/src/payment/cache.rs b/src/payment/cache.rs index 75994d9..c351889 100644 --- a/src/payment/cache.rs +++ b/src/payment/cache.rs @@ -19,23 +19,33 @@ const DEFAULT_CACHE_CAPACITY: usize = 100_000; /// This cache stores `XorName` values that have been verified to exist on the /// autonomi network, avoiding repeated network queries for the same data. /// -/// Each entry carries a flag recording whether the verification that inserted -/// it ran the full client-PUT check set (`true`) or only the -/// receipt-authenticity subset used for replication (`false`). A -/// replication-verified entry must not satisfy a later client-PUT fast-path — -/// the context-gated checks (own-quote freshness, local recipient, merkle -/// candidate closeness) were never run for it — while either kind of entry -/// satisfies a later replication check. +/// Each entry records which fresh proof verification level inserted it. A +/// paid-list entry must not satisfy a later client-PUT fast-path because +/// paid-list membership checks K closest peers while client PUTs require the +/// close group. Stronger entries satisfy weaker lookups. #[derive(Clone)] pub struct VerifiedCache { - /// Value: `true` if the entry was verified under the full client-PUT - /// check set, `false` if only under the replication subset. - inner: Arc>>, + inner: Arc>>, hits: Arc, misses: Arc, additions: Arc, } +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum VerificationLevel { + PaidList, + ClientPut, +} + +impl VerificationLevel { + fn satisfies(self, required: Self) -> bool { + matches!( + (self, required), + (Self::PaidList, Self::PaidList) | (Self::ClientPut, Self::PaidList | Self::ClientPut) + ) + } +} + /// Cache statistics for monitoring. #[derive(Debug, Default, Clone, Copy)] pub struct CacheStats { @@ -86,11 +96,10 @@ impl VerifiedCache { } } - /// Check if a `XorName` is in the cache (verified under either check set). + /// Check if a `XorName` is in the cache (verified under any fresh check set). /// /// Returns `true` if the `XorName` is cached (verified to exist on autonomi). - /// Sufficient for replication-context lookups; client-PUT lookups must use - /// [`Self::contains_client_put_verified`]. + /// Paid-list and client-PUT lookups must use their stricter helpers. #[must_use] pub fn contains(&self, xorname: &XorName) -> bool { let found = self.inner.lock().get(xorname).is_some(); @@ -104,14 +113,42 @@ impl VerifiedCache { found } + /// Check if a `XorName` is cached AND its verification ran at least the + /// paid-list admission check set. + /// + /// A client-PUT entry returns `true` here because it passed the stricter + /// closest-7 receiver membership check. + #[must_use] + pub fn contains_paid_list_verified(&self, xorname: &XorName) -> bool { + let found = self + .inner + .lock() + .get(xorname) + .copied() + .is_some_and(|level| level.satisfies(VerificationLevel::PaidList)); + + if found { + self.hits.fetch_add(1, Ordering::Relaxed); + } else { + self.misses.fetch_add(1, Ordering::Relaxed); + } + + found + } + /// Check if a `XorName` is cached AND its verification ran the full - /// client-PUT check set. + /// client-PUT close-group check set. /// - /// A replication-verified entry returns `false` here: it never passed the - /// client-PUT-only checks, so it must not let a later client PUT skip them. + /// Paid-list entries return `false` here because they did not pass the + /// client-PUT close-group membership check. #[must_use] pub fn contains_client_put_verified(&self, xorname: &XorName) -> bool { - let found = self.inner.lock().get(xorname).copied() == Some(true); + let found = self + .inner + .lock() + .get(xorname) + .copied() + .is_some_and(|level| level.satisfies(VerificationLevel::ClientPut)); if found { self.hits.fetch_add(1, Ordering::Relaxed); @@ -125,27 +162,32 @@ impl VerifiedCache { /// Add a `XorName` verified under the full client-PUT check set. /// /// This should be called after verifying that data exists on the autonomi network. - /// Also upgrades an existing replication-verified entry. + /// Also upgrades an existing paid-list-verified entry. pub fn insert(&self, xorname: XorName) { - self.inner.lock().put(xorname, true); - self.additions.fetch_add(1, Ordering::Relaxed); + self.insert_with_level(xorname, VerificationLevel::ClientPut); } - /// Add a `XorName` verified under the replication (receipt-authenticity) - /// subset only. + /// Add a `XorName` verified under paid-list admission checks. /// - /// Never downgrades an existing client-PUT-verified entry — the stronger - /// verification already happened, and replication re-offers of the same - /// key are routine. - pub fn insert_replication_verified(&self, xorname: XorName) { + /// Never downgrades an existing client-PUT-verified entry. + pub fn insert_paid_list_verified(&self, xorname: XorName) { + self.insert_with_level(xorname, VerificationLevel::PaidList); + } + + fn insert_with_level(&self, xorname: XorName, level: VerificationLevel) { let added = { let mut inner = self.inner.lock(); // `get_mut` refreshes LRU recency for existing entries of either kind. - if inner.get_mut(&xorname).is_none() { - inner.put(xorname, false); - true - } else { + if inner.get(&xorname).is_some() { + if let Some(existing) = inner.get_mut(&xorname) { + if !existing.satisfies(level) { + *existing = level; + } + } false + } else { + inner.put(xorname, level); + true } }; if added { @@ -216,6 +258,29 @@ mod tests { assert_eq!(cache.len(), 2); } + #[test] + fn test_cache_verification_levels_do_not_downgrade_or_over_authorize() { + let cache = VerifiedCache::new(); + let paid_list = [2u8; 32]; + let client_put = [3u8; 32]; + + cache.insert_paid_list_verified(paid_list); + assert!(cache.contains(&paid_list)); + assert!(cache.contains_paid_list_verified(&paid_list)); + assert!(!cache.contains_client_put_verified(&paid_list)); + + cache.insert(paid_list); + assert!(cache.contains_client_put_verified(&paid_list)); + + cache.insert(client_put); + assert!(cache.contains(&client_put)); + assert!(cache.contains_paid_list_verified(&client_put)); + assert!(cache.contains_client_put_verified(&client_put)); + + cache.insert_paid_list_verified(client_put); + assert!(cache.contains_client_put_verified(&client_put)); + } + #[test] fn test_cache_stats() { let cache = VerifiedCache::new(); diff --git a/src/payment/quote.rs b/src/payment/quote.rs index 5a1a44d..eb5419f 100644 --- a/src/payment/quote.rs +++ b/src/payment/quote.rs @@ -45,12 +45,11 @@ pub struct QuoteGenerator { /// /// When attached, quote prices are computed from /// [`LmdbStorage::current_chunks()`] — the **same** count the - /// [`PaymentVerifier`](crate::payment::PaymentVerifier) freshness gate - /// compares the quote against. Keeping pricing and freshness on one source - /// means a quote priced at record count `N` is later checked against a - /// current count that differs only by genuine in-flight growth, instead of - /// by the standing client-PUT-vs-replication gap that rejected every - /// payment when pricing read the side counter and freshness read the store. + /// [`PaymentVerifier`](crate::payment::PaymentVerifier) price-floor check + /// compares the paid quote against. Keeping pricing and verification on one + /// source means a quote priced at record count `N` is later checked against + /// a current count that differs only by genuine in-flight growth, instead of + /// by a side-counter-vs-store gap. /// `None` until [`Self::attach_storage`] is called. storage: RwLock>>, /// Signing function provided by the node. @@ -84,10 +83,10 @@ impl QuoteGenerator { /// authoritative on-disk record count. /// /// This MUST be wired to the same `LmdbStorage` the - /// [`PaymentVerifier`](crate::payment::PaymentVerifier) freshness gate reads - /// via `current_chunks()`; otherwise pricing and freshness diverge and the - /// gate rejects healthy payments. Idempotent: calling twice replaces the - /// handle. Uses interior mutability so it can be called on an `Arc`. + /// [`PaymentVerifier`](crate::payment::PaymentVerifier) price-floor check + /// reads via `current_chunks()`; otherwise pricing and verification diverge + /// and healthy payments can be rejected. Idempotent: calling twice replaces + /// the handle. Uses interior mutability so it can be called on an `Arc`. pub fn attach_storage(&self, storage: Arc) { *self.storage.write() = Some(storage); debug!("QuoteGenerator: LmdbStorage attached for current-records pricing"); @@ -97,7 +96,7 @@ impl QuoteGenerator { /// /// Prefers the attached `LmdbStorage` count (authoritative — counts client /// PUTs, replication stores, and repair fetches alike, exactly matching the - /// verifier's freshness source). Falls back to the in-memory + /// verifier's price-floor source). Falls back to the in-memory /// `metrics_tracker` when no storage is attached or the read fails, so /// pricing never panics or stalls. fn pricing_records_stored(&self) -> usize { @@ -184,7 +183,7 @@ impl QuoteGenerator { let timestamp = SystemTime::now(); // Calculate price from the authoritative current record count (the same - // count the verifier's freshness gate reads), falling back to the + // count the verifier's price-floor check reads), falling back to the // in-memory counter only when no storage is attached. let price = calculate_price(self.pricing_records_stored()); @@ -370,13 +369,13 @@ mod tests { generator } - /// Regression test for the STG-01 quote-freshness rejection: pricing must - /// read the attached store's `current_chunks()`, NOT the side counter. + /// Regression test for the STG-01 quote-pricing mismatch: pricing must read + /// the attached store's `current_chunks()`, NOT the side counter. /// /// Before the fix, the price came from `metrics_tracker` (client-PUT count - /// only) while the verifier's freshness gate read `current_chunks()` (all - /// records, including replicated ones). On a replicating network the store - /// count ran far ahead of the side counter, so every quote looked "stale". + /// only) while verifier checks read `current_chunks()` (all records, + /// including replicated ones). On a replicating network the store count ran + /// far ahead of the side counter, so every quote looked underpriced. /// Here we attach a store, write records WITHOUT touching the side counter /// (mimicking replication stores), and assert the quote prices off the /// store count — i.e. the two sources now agree. @@ -441,7 +440,7 @@ mod tests { derive_records_stored_from_price(quote.price), 25, "verifier's price-inverse must recover the store count, keeping the \ - freshness delta at ~0 for a freshly issued quote" + local price comparison aligned for a freshly issued quote" ); } diff --git a/src/payment/verifier.rs b/src/payment/verifier.rs index b310184..633481b 100644 --- a/src/payment/verifier.rs +++ b/src/payment/verifier.rs @@ -11,13 +11,14 @@ use crate::payment::pricing::{calculate_price, derive_records_stored_from_price} use crate::payment::proof::{ deserialize_merkle_proof, deserialize_proof, detect_proof_type, ProofType, }; -use crate::payment::single_node::SingleNodePayment; +use crate::replication::config::K_BUCKET_SIZE; use crate::storage::lmdb::LmdbStorage; use ant_protocol::payment::verify::{verify_quote_content, verify_quote_signature}; -use evmlib::common::Amount; +use evmlib::common::{Amount, QuoteHash}; use evmlib::contract::payment_vault; use evmlib::merkle_batch_payment::{OnChainPaymentInfo, PoolHash}; use evmlib::Network as EvmNetwork; +use evmlib::PaymentQuote; use evmlib::ProofOfPayment; use evmlib::RewardsAddress; use lru::LruCache; @@ -25,6 +26,8 @@ use parking_lot::{Mutex, RwLock}; use saorsa_core::identity::node_identity::peer_id_from_public_key_bytes; use saorsa_core::identity::PeerId; use saorsa_core::P2PNode; +#[cfg(any(test, feature = "test-utils"))] +use std::collections::HashMap; use std::num::NonZeroUsize; use std::sync::Arc; @@ -42,25 +45,38 @@ pub const MIN_PAYMENT_PROOF_SIZE_BYTES: usize = 32; /// 256 KB provides headroom while still capping memory during verification. pub const MAX_PAYMENT_PROOF_SIZE_BYTES: usize = 262_144; -/// Maximum percentage by which a quote's paid price may fall *below* the node's -/// current price before the quote is rejected as stale. +/// Maximum percentage by which the median-paid quote may fall below this +/// verifier's current local price before a client PUT is rejected. /// -/// The freshness gate is one-directional and price-based, not a symmetric -/// record-count delta: -/// -/// - **Over-payment is always accepted.** If the client paid at least the -/// node's current price (e.g. the node pruned records and is now cheaper), -/// the quote is fine — a node has no reason to reject money. -/// - **Only meaningful under-payment is rejected.** A quote priced below the -/// current price by more than this percentage is rejected as stale. -/// -/// Comparing prices instead of raw record counts makes the tolerance -/// self-scaling against the quadratic pricing curve: at low/moderate fill the -/// curve is nearly flat, so normal in-flight churn (the node storing a handful -/// of replicated records between quoting and verifying) is a negligible price -/// change and passes; at high fill the curve is steep, so the same percentage -/// still catches genuinely stale, underpriced quotes. -const QUOTE_PRICE_STALENESS_PCT_TOLERANCE: u64 = 25; +/// A 20% floor means a paid quote must be at least `0.8 * P_v`, so an +/// attacker who controls a real close-group issuer still pays at least +/// `0.8 * P_v * 3` for an honest verifier. Honest median-paid bundles have +/// a structural majority guarantee: the four nodes at or below the median +/// accept unless their own price grows more than `1 / 0.8 = 1.25x` between +/// quote and PUT. Above-median nodes may reject when `P_v > 1.25 * median`; +/// those records are backfilled by replication, which deliberately skips +/// this present-tense floor. +const PAID_QUOTE_PRICE_FLOOR_TOLERANCE_PCT: u64 = 20; + +const PERCENT_DENOMINATOR: u64 = 100; +const PAID_QUOTE_PAYMENT_MULTIPLIER: u64 = 3; + +#[derive(Clone, Copy)] +struct LegacyMedianCandidate<'a> { + encoded_peer_id: &'a evmlib::EncodedPeerId, + quote: &'a PaymentQuote, + expected_amount: Amount, +} + +fn price_floor(current_price: Amount, tolerance_pct: u64) -> Amount { + current_price.saturating_mul(Amount::from( + PERCENT_DENOMINATOR.saturating_sub(tolerance_pct), + )) / Amount::from(PERCENT_DENOMINATOR) +} + +fn median_quote_index(quote_count: usize) -> usize { + quote_count / 2 +} /// Configuration for EVM payment verification. /// @@ -91,66 +107,47 @@ pub struct PaymentVerifierConfig { /// Cache capacity (number of `XorName` values to cache). pub cache_capacity: usize, /// Local node's rewards address. - /// The verifier rejects payments that don't include this node as a recipient. + /// + /// Kept in the verifier config for payment policies that bind receipts to + /// this node's payout address. pub local_rewards_address: RewardsAddress, } -/// The situation a payment proof is being verified in. +/// The fresh admission path a payment proof is being verified for. /// -/// A proof-of-payment is a *receipt*: it records a sale that closed at some -/// earlier moment, at that moment's prices, between the client and the close -/// group of that moment. Two very different callers present receipts: +/// - **`ClientPut`** — the node is the storer being paid *right now*. The +/// node must be in the local close group (`CLOSE_GROUP_SIZE`) for the +/// address, and every live payment check applies. +/// - **`PaidListAdmission`** — the node is admitting fresh paid-list metadata. +/// It runs the same live payment checks as `ClientPut`, but the receiver +/// membership check uses the local K closest peers because paid-list +/// tracking spans a wider group than storage. /// -/// - **`ClientPut`** — the node is the storer being paid *right now*. Every -/// check applies, including the ones that interrogate the present: "is the -/// price on this receipt still fair for my current fullness?" (own-quote -/// freshness) and "am I actually one of the paid recipients?" (local -/// recipient / merkle candidate closeness). -/// - **`Replication`** — a neighbour is handing over an already-paid record -/// (fresh-write fan-out, paid-notify, repair). The sale closed long ago; the -/// network's job now is to keep the record at target redundancy for the rest -/// of its life. Re-asking the present-tense questions of a receipt is a -/// category error with a guaranteed failure mode: record counts only grow, -/// so every receipt's quoted price eventually drops below the verifier's -/// live floor, and close groups churn, so the receiving node eventually -/// isn't a quoted recipient at all. On DEV-01 (2026-06-05) this rejected -/// nearly 100% of replication proof-of-payment transfers within an hour of -/// launch (4M+ -/// rejections at ~300k/hour), pinned records below target redundancy, and -/// drove a permanent ~500 MB/s fleet-wide re-offer storm. +/// Immediate fresh chunk replication is different: the receiver is about to +/// store the newly written chunk as if the client PUT it there directly, so +/// that call site deliberately uses `ClientPut`. /// -/// Under `Replication` the verifier therefore skips only the -/// storer-being-paid-now checks. Everything that makes the receipt a receipt -/// still runs: quote structure, content binding to this exact address, -/// peer-ID/pub-key bindings, ML-DSA signatures, and the on-chain settlement -/// lookup. A record cannot be admitted via replication without an authentic, -/// settled payment for that record. -/// -/// The verified-`XorName` cache is context-aware to match: an entry inserted -/// by a `Replication` verification satisfies later replication lookups but -/// NOT a later `ClientPut` fast-path, so a replication receipt can never let -/// a client PUT bypass the checks this enum gates. -/// -/// Trade-off (deliberate, documented): skipping the recipient/closeness -/// checks for replication means a payer who self-deals — minting a quote pool -/// from peers they control and settling the median payment to their own -/// wallet on-chain — can present that receipt to honest nodes via the -/// replication protocol, paying only gas plus a recycled self-payment instead -/// of paying real storers. The client-PUT path still rejects such pools, and -/// replication admission still requires the receiving node to be responsible -/// for the key, so the abuse costs a settled on-chain payment per chunk and -/// buys only what storage already costs; closing it properly belongs in quote -/// issuance / payment policy, not in the replication hot path, where the -/// equivalent defence provably destroys the network's ability to heal. +/// Later neighbour-sync repair does not include proof-of-payment bytes and +/// does not call this verifier. It authorizes repair from network evidence: +/// majority storage among the closest 7, or majority paid-list membership +/// among the closest K. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum VerificationContext { - /// The node is the storer being paid right now: all checks apply. + /// The node is the storer being paid right now: all checks apply, + /// including receiver membership in the local close group. ClientPut, - /// An already-settled receipt presented during replication/repair: skip - /// the storer-being-paid-now checks (own-quote price freshness, local - /// recipient, merkle candidate closeness); keep all receipt-authenticity - /// checks. - Replication, + /// The node is admitting fresh paid-list metadata: same payment checks as + /// `ClientPut`, but receiver membership is local K closest peers. + PaidListAdmission, +} + +impl VerificationContext { + fn receiver_membership_width(self) -> usize { + match self { + Self::ClientPut => CLOSE_GROUP_SIZE, + Self::PaidListAdmission => K_BUCKET_SIZE, + } + } } /// Status returned by payment verification. @@ -202,30 +199,42 @@ pub struct PaymentVerifier { /// amplification to one lookup per unique `pool_hash` regardless of /// concurrency. inflight_closeness: Mutex>>, - /// P2P node handle, attached post-construction so merkle verification can - /// check that candidate `pub_keys` map to peers actually close to the pool - /// midpoint in the live DHT. `None` in unit tests that don't exercise - /// merkle verification; production startup MUST call [`attach_p2p_node`]. + /// P2P node handle, attached post-construction so client PUT verification + /// can check receiver membership and paid-quote issuer closeness, and + /// merkle verification can check that candidate `pub_keys` map to peers + /// actually close to the pool midpoint in the live DHT. `None` in unit + /// tests that don't exercise live-DHT checks; production startup MUST call + /// [`attach_p2p_node`]. p2p_node: RwLock>>, - /// LMDB storage handle, attached post-construction so the storage-delta - /// freshness check can read the authoritative on-disk record count without + /// LMDB storage handle, attached post-construction so the paid-quote + /// price-floor check can read the authoritative on-disk record count without /// depending on a side counter that may drift from replication/repair/prune /// paths. `None` in unit tests that pre-set [`Self::test_records_override`]; /// production startup MUST call [`attach_storage`]. storage: RwLock>>, - /// Test-only override for the storage-delta freshness check. + /// Test-only override for the paid-quote local price floor. /// - /// When `Some(n)`, `validate_quote_freshness` uses `n` as the current - /// record count instead of querying `storage.current_chunks()`. Set via + /// When `Some(n)`, `validate_paid_quote_price_floor` uses `n` as the + /// current record count instead of querying `storage.current_chunks()`. Set via /// [`Self::set_records_stored_for_tests`] so unit tests that don't wire a - /// real `LmdbStorage` can still drive the freshness logic. + /// real `LmdbStorage` can still drive the price-floor logic. test_records_override: RwLock>, - /// Test-only override for this node's own peer ID, used by - /// `validate_quote_freshness` to pick out the node's own quote from the - /// payment bundle. Production code derives it from the attached - /// [`P2PNode`]; set via [`Self::set_peer_id_for_tests`] so unit tests can - /// drive the freshness logic without wiring a real `P2PNode`. - test_peer_id_override: RwLock>, + /// Test-only override for the paid-quote known-peer check. + /// + /// Production code derives closest peers from the attached [`P2PNode`]. + #[cfg(any(test, feature = "test-utils"))] + test_paid_quote_known_peers_override: RwLock>>, + /// Test-only override for `completedPayments(quote_hash)`. + /// + /// Production always queries the payment vault; unit tests use this to + /// exercise the full verifier path without starting an EVM chain. + #[cfg(any(test, feature = "test-utils"))] + test_completed_payments_override: RwLock>, + /// Test-only override for receiver-membership checks. + /// + /// Production derives membership from the attached [`P2PNode`]. + #[cfg(any(test, feature = "test-utils"))] + test_receiver_membership_override: RwLock>, /// Configuration. config: PaymentVerifierConfig, } @@ -325,13 +334,13 @@ impl PaymentVerifier { info!("Payment verifier initialized (cache_capacity={cache_capacity}, evm=always-on, pool_cache={DEFAULT_POOL_CACHE_CAPACITY})"); // Loud warning if a production binary was accidentally built with - // `test-utils`: that feature flips the closeness-check fail-open - // switch, disabling the pay-yourself defence when P2PNode isn't - // attached. Safe in tests, never intended for prod. + // `test-utils`: that feature flips the live-DHT payment-check + // fail-open switches when P2PNode isn't attached. Safe in tests, never + // intended for prod. #[cfg(feature = "test-utils")] crate::logging::error!( - "PaymentVerifier: built with `test-utils` feature — merkle closeness \ - defence falls back to fail-open when no P2PNode is attached. This \ + "PaymentVerifier: built with `test-utils` feature — payment live-DHT \ + checks fall back to fail-open when no P2PNode is attached. This \ feature is for test binaries only; production nodes must be built \ without it." ); @@ -344,38 +353,44 @@ impl PaymentVerifier { p2p_node: RwLock::new(None), storage: RwLock::new(None), test_records_override: RwLock::new(None), - test_peer_id_override: RwLock::new(None), + #[cfg(any(test, feature = "test-utils"))] + test_paid_quote_known_peers_override: RwLock::new(None), + #[cfg(any(test, feature = "test-utils"))] + test_completed_payments_override: RwLock::new(HashMap::new()), + #[cfg(any(test, feature = "test-utils"))] + test_receiver_membership_override: RwLock::new(None), config, } } - /// Attach the node's [`P2PNode`] handle so merkle-payment verification can - /// check candidate `pub_keys` against the DHT's actual closest peers to the - /// pool midpoint. + /// Attach the node's [`P2PNode`] handle so client-PUT verification can + /// check receiver membership and paid-quote issuer closeness, and + /// merkle-payment verification can check candidate `pub_keys` against the + /// DHT's actual closest peers to the pool midpoint. /// /// Production startup MUST call this once the `P2PNode` exists. Without - /// it, the closeness check fails CLOSED in release builds (rejects the - /// PUT with a visible error) and fails open in test builds. Idempotent: + /// it, live-DHT payment checks fail CLOSED in release builds (reject the + /// PUT with a visible error) and fail open in test builds. Idempotent: /// calling twice replaces the handle. pub fn attach_p2p_node(&self, node: Arc) { *self.p2p_node.write() = Some(node); - debug!("PaymentVerifier: P2PNode attached for merkle closeness checks"); + debug!("PaymentVerifier: P2PNode attached for payment live-DHT checks"); } - /// Attach the node's [`LmdbStorage`] handle so storage-delta freshness + /// Attach the node's [`LmdbStorage`] handle so paid-quote price-floor /// checks can query the authoritative on-disk record count. /// /// Production startup MUST call this once the storage exists; otherwise - /// `validate_quote_freshness` falls back to treating the current count as - /// zero, which will reject all non-trivial quotes. Idempotent: calling - /// twice replaces the handle. + /// client PUTs using paid-quote verification are rejected because + /// the local economic floor cannot be checked. Idempotent: calling twice + /// replaces the handle. pub fn attach_storage(&self, storage: Arc) { *self.storage.write() = Some(storage); - debug!("PaymentVerifier: LmdbStorage attached for storage-delta freshness checks"); + debug!("PaymentVerifier: LmdbStorage attached for paid-quote price-floor checks"); } - /// Test-only setter for the current record count used by storage-delta - /// freshness checks. Lets unit tests drive the freshness logic without + /// Test-only setter for the current record count used by paid-quote + /// price-floor checks. Lets unit tests drive the floor logic without /// wiring a real `LmdbStorage`. Has no effect in production code because /// production code is expected to call [`Self::attach_storage`] instead. #[cfg(any(test, feature = "test-utils"))] @@ -383,45 +398,42 @@ impl PaymentVerifier { *self.test_records_override.write() = Some(count); } - /// Test-only setter for the node's own peer ID used by the quote - /// freshness check. Lets unit tests mark which quote in a payment bundle - /// is "ours" without wiring a real `P2PNode`. Has no effect in production - /// code because production code is expected to call - /// [`Self::attach_p2p_node`] instead. + /// Test-only setter for local closest peers used by the paid-quote + /// known-peer check. #[cfg(any(test, feature = "test-utils"))] - pub fn set_peer_id_for_tests(&self, peer_id_bytes: [u8; 32]) { - *self.test_peer_id_override.write() = Some(peer_id_bytes); + pub fn set_paid_quote_known_peers_for_tests(&self, peer_ids: Vec<[u8; 32]>) { + *self.test_paid_quote_known_peers_override.write() = Some(peer_ids); } - /// Snapshot this node's own peer ID for the quote freshness check. - /// - /// Prefers the attached [`P2PNode`] (authoritative). Falls back to a test - /// override if one was set. Returns `None` only when no source is - /// available (mis-configured production startup); the caller treats that - /// as "unknown" and skips the freshness gate rather than rejecting — the - /// same fail-open posture as a missing record-count source. - fn self_peer_id_bytes(&self) -> Option<[u8; 32]> { - if let Some(node) = self.p2p_node.read().as_ref() { - return Some(*node.peer_id().as_bytes()); - } - *self.test_peer_id_override.read() + /// Test-only setter for an on-chain completed payment amount. + #[cfg(any(test, feature = "test-utils"))] + pub fn set_completed_payment_for_tests(&self, quote_hash: QuoteHash, amount: Amount) { + self.test_completed_payments_override + .write() + .insert(quote_hash, amount); + } + + /// Test-only setter for receiver-membership checks. + #[cfg(any(test, feature = "test-utils"))] + pub fn set_receiver_membership_for_tests(&self, is_member: bool) { + *self.test_receiver_membership_override.write() = Some(is_member); } - /// Snapshot the current record count for freshness comparisons. + /// Snapshot the current record count for paid-quote price-floor checks. /// /// Prefers the attached `LmdbStorage` (authoritative — covers client PUTs, /// replication stores, repair fetches, and prune deletes by definition). /// Falls back to a test override if one was set. Returns `None` only when - /// no source is available (mis-configured production startup); the caller - /// treats that as "unknown" and skips storage-delta gating rather than - /// rejecting all quotes outright. + /// no source is available (mis-configured production startup). The + /// paid-quote floor rejects client PUTs because the local floor is + /// the economic security gate for this proof policy. fn current_records_stored(&self) -> Option { if let Some(storage) = self.storage.read().as_ref() { match storage.current_chunks() { Ok(n) => return Some(n), Err(e) => { warn!( - "PaymentVerifier: failed to read current_chunks() for freshness check: {e}" + "PaymentVerifier: failed to read current_chunks() for price-floor check: {e}" ); return None; } @@ -436,11 +448,9 @@ impl PaymentVerifier { /// 1. Check LRU cache (fast path) /// 2. If not cached, payment is required /// - /// The fast path is context-aware: a `ClientPut` lookup is satisfied only - /// by an entry whose verification ran the full client-PUT check set. An - /// entry inserted by a `Replication` verification (which skips the - /// storer-being-paid-now checks) must not let a later client PUT bypass - /// those checks. A `Replication` lookup accepts either kind of entry. + /// The fast path is context-aware. A `ClientPut` lookup is satisfied only + /// by a close-group store verification. A `PaidListAdmission` lookup is + /// satisfied by either a paid-list or client-PUT verification. /// /// # Arguments /// @@ -459,7 +469,9 @@ impl PaymentVerifier { // Check LRU cache (fast path) let cached = match context { VerificationContext::ClientPut => self.cache.contains_client_put_verified(xorname), - VerificationContext::Replication => self.cache.contains(xorname), + VerificationContext::PaidListAdmission => { + self.cache.contains_paid_list_verified(xorname) + } }; if cached { if crate::logging::enabled!(crate::logging::Level::DEBUG) { @@ -488,9 +500,8 @@ impl PaymentVerifier { /// /// * `xorname` - The content-addressed name of the data /// * `payment_proof` - Optional payment proof (required if not in cache) - /// * `context` - Whether the proof backs a live client PUT or an - /// already-settled receipt presented during replication — see - /// [`VerificationContext`] for which checks each context runs + /// * `context` - Which fresh admission path is verifying the proof — see + /// [`VerificationContext`] for receiver-membership width /// /// # Returns /// @@ -506,6 +517,8 @@ impl PaymentVerifier { payment_proof: Option<&[u8]>, context: VerificationContext, ) -> Result { + self.validate_receiver_membership(xorname, context).await?; + // First check if payment is required let status = self.check_payment_required(xorname, context); @@ -562,15 +575,13 @@ impl PaymentVerifier { } } - // Cache the verified xorname, recording which check set - // ran. A Replication-verified entry satisfies later - // replication lookups (re-offers of the same key are - // routine) but not a later ClientPut fast-path — the - // context-gated checks were never run for it. + // Cache the verified xorname at the context's verification + // strength. Stronger entries satisfy weaker future lookups, + // but not the reverse. match context { VerificationContext::ClientPut => self.cache.insert(*xorname), - VerificationContext::Replication => { - self.cache.insert_replication_verified(*xorname); + VerificationContext::PaidListAdmission => { + self.cache.insert_paid_list_verified(*xorname); } } @@ -589,6 +600,68 @@ impl PaymentVerifier { } } + async fn validate_receiver_membership( + &self, + xorname: &XorName, + context: VerificationContext, + ) -> Result<()> { + let width = context.receiver_membership_width(); + + #[cfg(any(test, feature = "test-utils"))] + { + let membership_override = *self.test_receiver_membership_override.read(); + if let Some(is_member) = membership_override { + if is_member { + return Ok(()); + } + return Err(Error::Payment(format!( + "{context:?} receiver is not in the required local peer set for key {}", + hex::encode(xorname) + ))); + } + } + + let attached = self.p2p_node.read().as_ref().map(Arc::clone); + let Some(p2p_node) = attached else { + #[cfg(any(test, feature = "test-utils"))] + { + crate::logging::warn!( + "PaymentVerifier: no P2PNode attached; {context:?} receiver \ + membership check SKIPPED (test build). Production startup \ + MUST call PaymentVerifier::attach_p2p_node." + ); + return Ok(()); + } + #[cfg(not(any(test, feature = "test-utils")))] + { + crate::logging::error!( + "PaymentVerifier: no P2PNode attached; rejecting {context:?}. \ + This is a node-startup bug — PaymentVerifier::attach_p2p_node \ + must be called before any PUT handler runs." + ); + return Err(Error::Payment(format!( + "{context:?} rejected: verifier is not wired to the P2P \ + layer; cannot verify receiver membership." + ))); + } + }; + + let self_id = *p2p_node.peer_id(); + let closest = p2p_node + .dht_manager() + .find_closest_nodes_local_with_self(xorname, width) + .await; + if closest.iter().any(|node| node.peer_id == self_id) { + return Ok(()); + } + + Err(Error::Payment(format!( + "{context:?} receiver {} is not among this node's local {width} closest peers for {}", + self_id.to_hex(), + hex::encode(xorname) + ))) + } + /// Get cache statistics. #[must_use] pub fn cache_stats(&self) -> CacheStats { @@ -624,24 +697,23 @@ impl PaymentVerifier { /// Verify a single-node EVM payment proof. /// /// Verification steps: - /// 1. Exactly `CLOSE_GROUP_SIZE` quotes are present - /// 2. All quotes target the correct content address (xorname binding) - /// 3. This node's own quote price is fresh (`ClientPut` only — a - /// replication receipt's price was fixed at the original sale and the - /// node's record count has legitimately grown since) - /// 4. Peer ID bindings match the ML-DSA-65 public keys - /// 5. This node is among the quoted recipients (`ClientPut` only — a - /// post-churn close-group member receiving a record via replication - /// was never a payee on the original receipt) - /// 6. All ML-DSA-65 signatures are valid (offloaded to `spawn_blocking`) - /// 7. The median-priced quote was paid at least 3x its price on-chain - /// (looked up via `completedPayments(quoteHash)` on the payment vault) + /// 1. Between 1 and `CLOSE_GROUP_SIZE` quotes are present + /// 2. Median-priced candidate quotes are derived from the supplied bundle + /// 3. Each candidate is checked for content binding, peer binding, and a + /// valid ML-DSA-65 signature + /// 4. Each candidate must also come from a locally known close peer and + /// satisfy the paid-quote price floor + /// 5. A candidate is accepted only if `completedPayments(quoteHash)` is at + /// least 3x the median price /// - /// See [`VerificationContext`] for why steps 3 and 5 are context-gated. + /// Non-median quotes are parsed only to locate the median. Their content, + /// peer bindings, and signatures are deliberately ignored: the paid + /// quote's content hash, quote hash, signature, local floor, known-peer + /// check, and on-chain settlement are the authority. A one-quote proof is + /// valid when that single quote passes these checks and was paid 3x. /// - /// For unit tests that don't need on-chain verification, pre-populate - /// the cache so `verify_payment` returns `CachedAsVerified` before - /// reaching this method. + /// See [`VerificationContext`] for the receiver-membership difference + /// between fresh chunk stores and fresh paid-list admission. async fn verify_evm_payment( &self, xorname: &XorName, @@ -657,233 +729,301 @@ impl PaymentVerifier { } Self::validate_quote_structure(payment)?; - Self::validate_quote_content(payment, xorname)?; - if context == VerificationContext::ClientPut { - self.validate_quote_freshness(payment)?; - } - Self::validate_peer_bindings(payment)?; - if context == VerificationContext::ClientPut { - self.validate_local_recipient(payment)?; - } + let candidates = Self::legacy_median_candidates(payment)?; + let mut failures = Vec::with_capacity(candidates.len()); + let mut verified_paid_quote = false; - // Verify quote signatures (CPU-bound, run off async runtime) - let peer_quotes = payment.peer_quotes.clone(); - tokio::task::spawn_blocking(move || { - for (encoded_peer_id, quote) in &peer_quotes { - if !verify_quote_signature(quote) { - return Err(Error::Payment( - format!("Quote ML-DSA-65 signature verification failed for peer {encoded_peer_id:?}"), - )); + for candidate in candidates { + match self + .verify_legacy_median_candidate(xorname, candidate) + .await + { + Ok(()) => { + verified_paid_quote = true; + break; } + Err(err) => failures.push(err.to_string()), } - Ok(()) - }) - .await - .map_err(|e| Error::Payment(format!("Signature verification task failed: {e}")))??; + } + + if !verified_paid_quote { + let xorname_hex = hex::encode(xorname); + let details = if failures.is_empty() { + "no median-priced candidates were available".to_string() + } else { + failures.join("; ") + }; + return Err(Error::Payment(format!( + "Median quote payment verification failed for {xorname_hex}: {details}" + ))); + } + + if crate::logging::enabled!(crate::logging::Level::INFO) { + let xorname_hex = hex::encode(xorname); + info!("EVM payment verified for {xorname_hex}"); + } + Ok(()) + } - // Reconstruct the SingleNodePayment to identify the median quote. - // from_quotes() sorts by price and marks the median for 3x payment. - let quotes_with_prices: Vec<_> = payment + fn legacy_median_candidates( + payment: &ProofOfPayment, + ) -> Result>> { + let mut sorted_quotes: Vec<(&evmlib::EncodedPeerId, &PaymentQuote)> = payment .peer_quotes .iter() - .map(|(_, quote)| (quote.clone(), quote.price)) + .map(|(encoded_peer_id, quote)| (encoded_peer_id, quote)) .collect(); - let single_payment = SingleNodePayment::from_quotes(quotes_with_prices).map_err(|e| { - Error::Payment(format!( - "Failed to reconstruct payment for verification: {e}" - )) - })?; - - // Verify the median quote was paid at least 3x its price on-chain - // via completedPayments(quoteHash) on the payment vault contract. - let verified_amount = single_payment - .verify(&self.config.evm.network) - .await - .map_err(|e| { - let xorname_hex = hex::encode(xorname); + sorted_quotes.sort_by_key(|(_, quote)| quote.price); + let quote_count = sorted_quotes.len(); + let median_index = median_quote_index(quote_count); + let median_price = sorted_quotes + .get(median_index) + .ok_or_else(|| { + Error::Payment(format!("Missing paid quote at median index {median_index}")) + })? + .1 + .price; + let expected_amount = median_price + .checked_mul(Amount::from(PAID_QUOTE_PAYMENT_MULTIPLIER)) + .ok_or_else(|| { Error::Payment(format!( - "Median quote payment verification failed for {xorname_hex}: {e}" + "Median quote payment amount overflow for price {median_price}" )) })?; - if crate::logging::enabled!(crate::logging::Level::INFO) { - let xorname_hex = hex::encode(xorname); - info!("EVM payment verified for {xorname_hex} (median paid {verified_amount} atto)"); + if expected_amount == Amount::ZERO || median_price == Amount::ZERO { + return Err(Error::Payment(format!( + "Median quote has zero price/amount (price={median_price}, amount={expected_amount}); refusing to verify as paid" + ))); } - Ok(()) + + Ok(sorted_quotes + .into_iter() + .filter(|(_, quote)| quote.price == median_price) + .map(|(encoded_peer_id, quote)| LegacyMedianCandidate { + encoded_peer_id, + quote, + expected_amount, + }) + .collect()) } - /// Validate quote count, uniqueness, and basic structure. - fn validate_quote_structure(payment: &ProofOfPayment) -> Result<()> { - if payment.peer_quotes.is_empty() { - return Err(Error::Payment("Payment has no quotes".to_string())); + async fn verify_legacy_median_candidate( + &self, + xorname: &XorName, + candidate: LegacyMedianCandidate<'_>, + ) -> Result<()> { + Self::validate_paid_quote_content(xorname, candidate)?; + let issuer_peer_id = + Self::validate_paid_quote_peer_binding(candidate.encoded_peer_id, candidate.quote)?; + + self.validate_paid_quote_known_peer(xorname, &issuer_peer_id) + .await?; + self.validate_paid_quote_price_floor(candidate.quote)?; + + Self::validate_paid_quote_signature(candidate).await?; + + let on_chain_amount = self + .completed_payment_amount(candidate.quote.hash()) + .await?; + if on_chain_amount >= candidate.expected_amount { + return Ok(()); } - let quote_count = payment.peer_quotes.len(); - if quote_count != CLOSE_GROUP_SIZE { - return Err(Error::Payment(format!( - "Payment must have exactly {CLOSE_GROUP_SIZE} quotes, got {quote_count}" - ))); + Err(Error::Payment(format!( + "Median-priced quote for peer {:?} was not paid enough: expected at least {}, got {on_chain_amount}", + candidate.encoded_peer_id, candidate.expected_amount + ))) + } + + fn validate_paid_quote_content( + xorname: &XorName, + candidate: LegacyMedianCandidate<'_>, + ) -> Result<()> { + if verify_quote_content(candidate.quote, xorname) { + return Ok(()); } - let mut seen: Vec<&evmlib::EncodedPeerId> = Vec::with_capacity(quote_count); - for (encoded_peer_id, _) in &payment.peer_quotes { - if seen.contains(&encoded_peer_id) { + let expected_hex = hex::encode(xorname); + let actual_hex = hex::encode(candidate.quote.content.0); + Err(Error::Payment(format!( + "Paid quote content address mismatch for peer {:?}: expected {expected_hex}, got {actual_hex}", + candidate.encoded_peer_id + ))) + } + + async fn validate_paid_quote_signature(candidate: LegacyMedianCandidate<'_>) -> Result<()> { + let quote_for_signature = candidate.quote.clone(); + let peer_id_for_error = candidate.encoded_peer_id.clone(); + tokio::task::spawn_blocking(move || { + if !verify_quote_signature("e_for_signature) { return Err(Error::Payment(format!( - "Duplicate peer ID in payment quotes: {encoded_peer_id:?}" + "Paid quote ML-DSA-65 signature verification failed for peer {peer_id_for_error:?}" ))); } - seen.push(encoded_peer_id); + Ok(()) + }) + .await + .map_err(|e| Error::Payment(format!("Signature verification task failed: {e}")))? + } + + async fn completed_payment_amount(&self, quote_hash: QuoteHash) -> Result { + #[cfg(any(test, feature = "test-utils"))] + { + let completed_payment_override = { + self.test_completed_payments_override + .read() + .get("e_hash) + .copied() + }; + if let Some(amount) = completed_payment_override { + return Ok(amount); + } } - Ok(()) + let provider = evmlib::utils::http_provider(self.config.evm.network.rpc_url().clone()); + let vault_address = *self.config.evm.network.payment_vault_address(); + let contract = payment_vault::interface::IPaymentVault::new(vault_address, provider); + + let result = contract + .completedPayments(quote_hash) + .call() + .await + .map_err(|e| Error::Payment(format!("completedPayments lookup failed: {e}")))?; + + Ok(Amount::from(result.amount)) } - /// Verify all quotes target the correct content address. - fn validate_quote_content(payment: &ProofOfPayment, xorname: &XorName) -> Result<()> { - for (encoded_peer_id, quote) in &payment.peer_quotes { - if !verify_quote_content(quote, xorname) { - let expected_hex = hex::encode(xorname); - let actual_hex = hex::encode(quote.content.0); - return Err(Error::Payment(format!( - "Quote content address mismatch for peer {encoded_peer_id:?}: expected {expected_hex}, got {actual_hex}" - ))); - } + fn validate_paid_quote_peer_binding( + encoded_peer_id: &evmlib::EncodedPeerId, + quote: &PaymentQuote, + ) -> Result { + let expected_peer_id = peer_id_from_public_key_bytes("e.pub_key) + .map_err(|e| Error::Payment(format!("Invalid ML-DSA public key in quote: {e}")))?; + + if expected_peer_id.as_bytes() != encoded_peer_id.as_bytes() { + let expected_hex = expected_peer_id.to_hex(); + let actual_hex = hex::encode(encoded_peer_id.as_bytes()); + return Err(Error::Payment(format!( + "Paid quote pub_key does not belong to claimed peer {encoded_peer_id:?}: \ + BLAKE3(pub_key) = {expected_hex}, peer_id = {actual_hex}" + ))); } - Ok(()) + + Ok(expected_peer_id) } - /// Verify quote freshness by price staleness, not wall-clock time and not a - /// symmetric record-count delta. - /// - /// The quote price encodes the quoting node's record count via the quadratic - /// pricing formula. We compute the price the node would charge *now* for its - /// current fullness and reject the quote only if the client under-paid that - /// current price by more than [`QUOTE_PRICE_STALENESS_PCT_TOLERANCE`]. This: - /// - /// - removes the platform clock dependency that caused Windows/UTC false - /// rejections (timestamps are deliberately unused); - /// - never rejects an over-payment (the previous symmetric `abs_diff` check - /// rejected quotes where the node had *fewer* records than when it quoted, - /// i.e. the client paid for a fuller, pricier node — nonsensical to - /// reject); and - /// - self-scales with the pricing curve, so benign in-flight churn (a node - /// storing a few replicated records between quoting and verifying) — a - /// negligible price move where the curve is flat — no longer rejects an - /// otherwise-valid payment. On a fresh, rapidly-filling testnet that churn - /// routinely exceeded the old fixed 5-record tolerance and rejected ~100% - /// of uploads via the multiplicative per-chunk effect. - /// - /// The current record count comes from the attached [`LmdbStorage`] via - /// `current_chunks()` — an O(1) B-tree page-header read, authoritative - /// regardless of which path stored the record (client PUT, replication - /// store, repair fetch) or removed it (prune delete). If no storage source - /// is available (mis-configured production startup, or a unit test that - /// didn't set a test override), the gate is skipped entirely rather than - /// rejecting every quote — see [`Self::current_records_stored`]. - /// - /// **Only this node's own quote is gated.** A bundle contains one quote - /// per close-group peer, and fullness across a close group is wildly - /// heterogeneous on a real network (a freshly joined node holds tens of - /// records while an established neighbour holds thousands). Comparing a - /// *neighbour's* quote price against *this node's* record count therefore - /// rejects honest payments whenever the group spans more than the - /// tolerance — on ant-prod-01 a close group spanning 47..=1788 records - /// made the three fullest nodes reject every bundle containing the - /// emptiest node's (perfectly fresh, 10-second-old) quote, failing the - /// PUT after the client had already paid on-chain. The node can only - /// re-derive *its own* price from its own record count, so its own quote - /// is the only one it can legitimately call stale. Replay of another - /// node's old cheap quote is that node's gate to enforce when the PUT - /// reaches it; the on-chain median payment binding is unaffected either - /// way. - /// - /// A bundle holds at most one quote per peer — [`Self::validate_quote_structure`] - /// rejects duplicate peer IDs and runs before this gate on every path — - /// so the loop below matches at most one own quote. - fn validate_quote_freshness(&self, payment: &ProofOfPayment) -> Result<()> { + fn validate_paid_quote_price_floor(&self, quote: &PaymentQuote) -> Result<()> { let Some(current_records) = self.current_records_stored() else { - debug!( - "PaymentVerifier: no record-count source attached; skipping \ - quote price-staleness check" - ); - return Ok(()); - }; - - let Some(self_peer_id) = self.self_peer_id_bytes() else { - debug!( - "PaymentVerifier: no self peer-id source attached; skipping \ - quote price-staleness check" - ); - return Ok(()); + return Err(Error::Payment( + "PaymentVerifier: no record-count source attached; cannot verify \ + paid-quote local price floor" + .to_string(), + )); }; - // The price the node would charge right now for its current fullness, - // and the floor a quote may not drop below (one-directional: paying at - // or above `current_price` is always accepted). let current_price = calculate_price(usize::try_from(current_records).unwrap_or(usize::MAX)); - let min_acceptable_price = current_price.saturating_mul(Amount::from( - 100u64.saturating_sub(QUOTE_PRICE_STALENESS_PCT_TOLERANCE), - )) / Amount::from(100u64); - - let mut own_quote_seen = false; - for (encoded_peer_id, quote) in &payment.peer_quotes { - if encoded_peer_id.as_bytes() != &self_peer_id { - // A neighbour's quote prices the *neighbour's* fullness; this - // node has no basis to judge it against its own record count. - continue; - } - own_quote_seen = true; - if quote.price < min_acceptable_price { - let quoted_records = derive_records_stored_from_price(quote.price); - return Err(Error::Payment(format!( - "Own quote {encoded_peer_id:?} stale: quoted price encodes \ - {quoted_records} records but node currently holds {current_records} \ - (quoted {}, minimum acceptable {min_acceptable_price} at \ - {QUOTE_PRICE_STALENESS_PCT_TOLERANCE}% under-payment tolerance)", - quote.price - ))); - } + let min_acceptable_price = price_floor(current_price, PAID_QUOTE_PRICE_FLOOR_TOLERANCE_PCT); + + if quote.price < min_acceptable_price { + let quoted_records = derive_records_stored_from_price(quote.price); + return Err(Error::Payment(format!( + "Paid quote price below local floor: quoted price encodes \ + {quoted_records} records but node currently holds {current_records} \ + (quoted {}, minimum acceptable {min_acceptable_price} at \ + {PAID_QUOTE_PRICE_FLOOR_TOLERANCE_PCT}% under-payment tolerance)", + quote.price + ))); } - // Two self-identity notions coexist in this verifier and are expected - // to refer to the same node: `validate_local_recipient` matches "us" - // by rewards address, this gate by peer ID. They legitimately diverge - // when a PUT reaches a node whose own quote isn't in the bundle but - // whose rewards address is shared with a quoted sibling (common in - // fleet deployments). The gate fail-opens in that case — leave a - // breadcrumb, because a silent no-op is exactly what makes a - // production incident hard to reconstruct from node logs. - if !own_quote_seen { - let our_rewards_address_quoted = payment - .peer_quotes + Ok(()) + } + + async fn validate_paid_quote_known_peer( + &self, + xorname: &XorName, + issuer_peer_id: &PeerId, + ) -> Result<()> { + #[cfg(any(test, feature = "test-utils"))] + if let Some(known_peer_ids) = self.test_paid_quote_known_peers_override.read().as_ref() { + if known_peer_ids .iter() - .any(|(_, quote)| quote.rewards_address == self.config.local_rewards_address); - if our_rewards_address_quoted { - debug!( - "PaymentVerifier: bundle contains our rewards address but no quote \ - under our peer ID; skipping quote price-staleness check" + .any(|peer_id| peer_id == issuer_peer_id.as_bytes()) + { + return Ok(()); + } + return Err(Error::Payment(format!( + "Paid quote issuer {} is not among this node's local {K_BUCKET_SIZE} closest peers for {}", + issuer_peer_id.to_hex(), + hex::encode(xorname) + ))); + } + + let attached = self.p2p_node.read().as_ref().map(Arc::clone); + let Some(p2p_node) = attached else { + #[cfg(any(test, feature = "test-utils"))] + { + crate::logging::warn!( + "PaymentVerifier: no P2PNode attached; paid-quote known-peer \ + check SKIPPED (test build). Production startup MUST call \ + PaymentVerifier::attach_p2p_node." + ); + return Ok(()); + } + #[cfg(not(any(test, feature = "test-utils")))] + { + crate::logging::error!( + "PaymentVerifier: no P2PNode attached; rejecting paid-quote \ + payment. This is a node-startup bug — \ + PaymentVerifier::attach_p2p_node must be called before \ + any PUT handler runs." ); + return Err(Error::Payment( + "Paid quote rejected: verifier is not wired to the P2P \ + layer; cannot verify issuer closeness." + .into(), + )); } + }; + + let closest = p2p_node + .dht_manager() + .find_closest_nodes_local_with_self(xorname, K_BUCKET_SIZE) + .await; + if closest.iter().any(|node| node.peer_id == *issuer_peer_id) { + return Ok(()); } - Ok(()) + + Err(Error::Payment(format!( + "Paid quote issuer {} is not among this node's local {K_BUCKET_SIZE} closest peers for {}", + issuer_peer_id.to_hex(), + hex::encode(xorname) + ))) } - /// Verify each quote's `pub_key` matches the claimed peer ID via BLAKE3. - fn validate_peer_bindings(payment: &ProofOfPayment) -> Result<()> { - for (encoded_peer_id, quote) in &payment.peer_quotes { - let expected_peer_id = peer_id_from_public_key_bytes("e.pub_key) - .map_err(|e| Error::Payment(format!("Invalid ML-DSA public key in quote: {e}")))?; + /// Validate quote count, uniqueness, and basic structure. + fn validate_quote_structure(payment: &ProofOfPayment) -> Result<()> { + if payment.peer_quotes.is_empty() { + return Err(Error::Payment("Payment has no quotes".to_string())); + } + + let quote_count = payment.peer_quotes.len(); + if quote_count > CLOSE_GROUP_SIZE { + return Err(Error::Payment(format!( + "Payment must have at most {CLOSE_GROUP_SIZE} quotes, got {quote_count}" + ))); + } - if expected_peer_id.as_bytes() != encoded_peer_id.as_bytes() { - let expected_hex = expected_peer_id.to_hex(); - let actual_hex = hex::encode(encoded_peer_id.as_bytes()); + let mut seen: Vec<&evmlib::EncodedPeerId> = Vec::with_capacity(quote_count); + for (encoded_peer_id, _) in &payment.peer_quotes { + if seen.contains(&encoded_peer_id) { return Err(Error::Payment(format!( - "Quote pub_key does not belong to claimed peer {encoded_peer_id:?}: \ - BLAKE3(pub_key) = {expected_hex}, peer_id = {actual_hex}" + "Duplicate peer ID in payment quotes: {encoded_peer_id:?}" ))); } + seen.push(encoded_peer_id); } + Ok(()) } @@ -1425,16 +1565,8 @@ impl PaymentVerifier { // single-flight keyed on pool_hash collapse the Kademlia lookup cost // within a batch and across concurrent PUTs for the same pool. // - // ClientPut only: the check interrogates the *live* DHT, but a - // replication receipt's winner pool was sampled from the DHT of the - // original sale. Churn guarantees old pools eventually stop matching - // the current top-K, which would make old records unreplicatable — - // the same failure mode the single-node freshness gate caused on - // DEV-01. See `VerificationContext` for the trade-off discussion. - if context == VerificationContext::ClientPut { - self.verify_merkle_candidate_closeness(&merkle_proof.winner_pool, pool_hash) - .await?; - } + self.verify_merkle_candidate_closeness(&merkle_proof.winner_pool, pool_hash) + .await?; // Check pool cache first let cached_info = { @@ -1604,21 +1736,6 @@ impl PaymentVerifier { Ok(()) } - - /// Verify this node is among the paid recipients. - fn validate_local_recipient(&self, payment: &ProofOfPayment) -> Result<()> { - let local_addr = &self.config.local_rewards_address; - let is_recipient = payment - .peer_quotes - .iter() - .any(|(_, quote)| quote.rewards_address == *local_addr); - if !is_recipient { - return Err(Error::Payment( - "Payment proof does not include this node as a recipient".to_string(), - )); - } - Ok(()) - } } #[cfg(test)] @@ -1626,6 +1743,10 @@ impl PaymentVerifier { mod tests { use super::*; use evmlib::merkle_payments::MerklePaymentCandidatePool; + use evmlib::PaymentQuote; + use saorsa_core::MlDsa65; + use saorsa_pqc::pqc::types::MlDsaSecretKey; + use saorsa_pqc::pqc::MlDsaOperations; use std::time::SystemTime; /// Create a verifier for unit tests. EVM is always on, but tests can @@ -1636,22 +1757,144 @@ mod tests { cache_capacity: 100, local_rewards_address: RewardsAddress::new([1u8; 20]), }; - PaymentVerifier::new(config) + let verifier = PaymentVerifier::new(config); + verifier.set_receiver_membership_for_tests(true); + verifier } - #[test] - fn test_payment_required_for_new_data() { - let verifier = create_test_verifier(); - let xorname = [1u8; 32]; + fn make_signed_quote( + xorname: XorName, + price: Amount, + rewards_seed: u8, + ) -> (evmlib::EncodedPeerId, PaymentQuote) { + let ml_dsa = MlDsa65::new(); + let (public_key, secret_key) = ml_dsa.generate_keypair().expect("keygen"); + let pub_key_bytes = public_key.as_bytes().to_vec(); + let peer_id = encoded_peer_id_for_pub_key(&pub_key_bytes); + let mut quote = PaymentQuote { + content: xor_name::XorName(xorname), + timestamp: SystemTime::now(), + price, + rewards_address: RewardsAddress::new([rewards_seed; 20]), + pub_key: pub_key_bytes, + signature: Vec::new(), + }; + let secret_key = MlDsaSecretKey::from_bytes(secret_key.as_bytes()).expect("secret key"); + quote.signature = ml_dsa + .sign(&secret_key, "e.bytes_for_sig()) + .expect("sign quote") + .as_bytes() + .to_vec(); + (peer_id, quote) + } - // All uncached data requires payment - let status = verifier.check_payment_required(&xorname, VerificationContext::ClientPut); - assert_eq!(status, PaymentStatus::PaymentRequired); + fn make_signed_legacy_bundle( + xorname: XorName, + prices: [Amount; CLOSE_GROUP_SIZE], + ) -> Vec<(evmlib::EncodedPeerId, PaymentQuote)> { + prices + .into_iter() + .enumerate() + .map(|(index, price)| { + let rewards_seed = u8::try_from(index + 1).expect("small test index"); + make_signed_quote(xorname, price, rewards_seed) + }) + .collect() } - #[test] - fn test_cache_hit() { - let verifier = create_test_verifier(); + fn price_at_records(records: usize) -> Amount { + crate::payment::pricing::calculate_price(records) + } + + fn unique_test_prices() -> [Amount; CLOSE_GROUP_SIZE] { + [ + price_at_records(0), + price_at_records(1), + price_at_records(2), + price_at_records(3), + price_at_records(4), + price_at_records(5), + price_at_records(6), + ] + } + + fn tied_median_test_prices() -> [Amount; CLOSE_GROUP_SIZE] { + [ + price_at_records(0), + price_at_records(1), + price_at_records(2), + price_at_records(3), + price_at_records(3), + price_at_records(4), + price_at_records(5), + ] + } + + fn median_test_candidates( + peer_quotes: &[(evmlib::EncodedPeerId, PaymentQuote)], + ) -> Vec<(evmlib::EncodedPeerId, PaymentQuote)> { + let mut sorted_quotes: Vec<_> = peer_quotes.iter().collect(); + sorted_quotes.sort_by_key(|(_, quote)| quote.price); + let median_index = median_quote_index(sorted_quotes.len()); + let median_price = sorted_quotes + .get(median_index) + .expect("median quote") + .1 + .price; + + sorted_quotes + .into_iter() + .filter(|(_, quote)| quote.price == median_price) + .map(|(peer_id, quote)| (peer_id.clone(), quote.clone())) + .collect() + } + + fn expected_median_payment(peer_quotes: &[(evmlib::EncodedPeerId, PaymentQuote)]) -> Amount { + let median_price = median_test_candidates(peer_quotes) + .first() + .expect("median candidate") + .1 + .price; + median_price * Amount::from(PAID_QUOTE_PAYMENT_MULTIPLIER) + } + + fn mark_known_paid_candidates( + verifier: &PaymentVerifier, + peer_quotes: &[(evmlib::EncodedPeerId, PaymentQuote)], + ) { + let known_peers = median_test_candidates(peer_quotes) + .iter() + .map(|(peer_id, _)| *peer_id.as_bytes()) + .collect(); + verifier.set_paid_quote_known_peers_for_tests(known_peers); + } + + fn mark_candidate_paid(verifier: &PaymentVerifier, quote: &PaymentQuote, amount: Amount) { + verifier.set_completed_payment_for_tests(quote.hash(), amount); + } + + fn mark_all_median_candidates_unpaid( + verifier: &PaymentVerifier, + peer_quotes: &[(evmlib::EncodedPeerId, PaymentQuote)], + ) { + for (_, quote) in median_test_candidates(peer_quotes) { + mark_candidate_paid(verifier, "e, Amount::ZERO); + } + } + + #[test] + fn test_payment_required_for_new_data() { + let verifier = create_test_verifier(); + let xorname = [1u8; 32]; + + // All uncached data requires payment + let status = verifier.check_payment_required(&xorname, VerificationContext::ClientPut); + assert_eq!(status, PaymentStatus::PaymentRequired); + } + + #[test] + fn test_cache_hit() { + let verifier = create_test_verifier(); let xorname = [1u8; 32]; // Manually add to cache @@ -1693,6 +1936,69 @@ mod tests { assert_eq!(result.expect("cached"), PaymentStatus::CachedAsVerified); } + #[tokio::test] + async fn test_client_put_receiver_membership_runs_before_cache() { + let verifier = create_test_verifier(); + let xorname = [0xB4u8; 32]; + verifier.cache.insert(xorname); + verifier.set_receiver_membership_for_tests(false); + + let err = verifier + .verify_payment(&xorname, None, VerificationContext::ClientPut) + .await + .expect_err("cached client PUT must still reject non-responsible receivers"); + + assert!( + format!("{err}").contains("not in the required local peer set"), + "Error should mention receiver membership: {err}" + ); + } + + #[tokio::test] + async fn test_paid_list_receiver_membership_runs_before_cache() { + let verifier = create_test_verifier(); + let xorname = [0xB7u8; 32]; + verifier.cache.insert_paid_list_verified(xorname); + verifier.set_receiver_membership_for_tests(false); + + let err = verifier + .verify_payment(&xorname, None, VerificationContext::PaidListAdmission) + .await + .expect_err("cached paid-list admission must still reject non-members"); + + assert!( + format!("{err}").contains("not in the required local peer set"), + "Error should mention receiver membership: {err}" + ); + } + + #[tokio::test] + async fn test_paid_list_cache_entry_does_not_satisfy_client_put() { + let verifier = create_test_verifier(); + let xorname = [0xB8u8; 32]; + verifier.cache.insert_paid_list_verified(xorname); + + assert_eq!( + verifier.check_payment_required(&xorname, VerificationContext::PaidListAdmission), + PaymentStatus::CachedAsVerified, + "paid-list lookups must hit a paid-list-verified entry" + ); + assert_eq!( + verifier.check_payment_required(&xorname, VerificationContext::ClientPut), + PaymentStatus::PaymentRequired, + "client PUT must not fast-path on a paid-list-verified entry" + ); + + let err = verifier + .verify_payment(&xorname, None, VerificationContext::ClientPut) + .await + .expect_err("proof-less client PUT must not ride the paid-list entry"); + assert!( + format!("{err}").contains("Payment required"), + "client PUT must still demand payment: {err}" + ); + } + #[test] fn test_payment_status_can_store() { assert!(PaymentStatus::CachedAsVerified.can_store()); @@ -1808,22 +2114,513 @@ mod tests { ); } - #[tokio::test] - async fn test_malformed_single_node_proof() { - let verifier = create_test_verifier(); - let xorname = [5u8; 32]; + #[tokio::test] + async fn test_malformed_single_node_proof() { + let verifier = create_test_verifier(); + let xorname = [5u8; 32]; + + // Valid tag (0x01) but garbage payload — should fail deserialization + let mut garbage = vec![crate::ant_protocol::PROOF_TAG_SINGLE_NODE]; + garbage.extend_from_slice(&[0xAB; 63]); + let result = verifier + .verify_payment(&xorname, Some(&garbage), VerificationContext::ClientPut) + .await; + assert!(result.is_err()); + let err_msg = format!("{}", result.expect_err("should fail")); + assert!( + err_msg.contains("deserialize") || err_msg.contains("Failed"), + "Error should mention deserialization failure: {err_msg}" + ); + } + + #[tokio::test] + async fn test_legacy_paid_median_full_path_accepted() { + let verifier = create_test_verifier(); + verifier.set_records_stored_for_tests(0); + let xorname = [0xA1u8; 32]; + let peer_quotes = make_signed_legacy_bundle(xorname, unique_test_prices()); + mark_known_paid_candidates(&verifier, &peer_quotes); + let expected_amount = expected_median_payment(&peer_quotes); + let paid_quote = median_test_candidates(&peer_quotes) + .first() + .expect("median candidate") + .1 + .clone(); + mark_candidate_paid(&verifier, &paid_quote, expected_amount); + + let proof_bytes = serialize_proof(peer_quotes); + let result = verifier + .verify_payment(&xorname, Some(&proof_bytes), VerificationContext::ClientPut) + .await; + + assert_eq!( + result.expect("paid median should verify"), + PaymentStatus::PaymentVerified + ); + } + + #[tokio::test] + async fn test_legacy_single_quote_proof_accepted() { + let verifier = create_test_verifier(); + verifier.set_records_stored_for_tests(0); + let xorname = [0xB1u8; 32]; + let (peer_id, quote) = make_signed_quote(xorname, price_at_records(0), 1); + let peer_quotes = vec![(peer_id, quote.clone())]; + mark_known_paid_candidates(&verifier, &peer_quotes); + mark_candidate_paid(&verifier, "e, expected_median_payment(&peer_quotes)); + + let proof_bytes = serialize_proof(peer_quotes); + let result = verifier + .verify_payment(&xorname, Some(&proof_bytes), VerificationContext::ClientPut) + .await; + + assert_eq!( + result.expect("single paid quote should verify"), + PaymentStatus::PaymentVerified + ); + } + + #[tokio::test] + async fn test_legacy_single_quote_proof_requires_three_x_payment() { + let verifier = create_test_verifier(); + verifier.set_records_stored_for_tests(0); + let xorname = [0xB2u8; 32]; + let (peer_id, quote) = make_signed_quote(xorname, price_at_records(0), 1); + let peer_quotes = vec![(peer_id, quote.clone())]; + mark_known_paid_candidates(&verifier, &peer_quotes); + mark_candidate_paid(&verifier, "e, quote.price); + + let proof_bytes = serialize_proof(peer_quotes); + let err = verifier + .verify_payment(&xorname, Some(&proof_bytes), VerificationContext::ClientPut) + .await + .expect_err("single quote paid less than 3x should be rejected"); + + assert!( + format!("{err}").contains("not paid enough"), + "Error should mention underpayment: {err}" + ); + } + + #[tokio::test] + async fn test_legacy_too_many_quotes_rejected() { + let verifier = create_test_verifier(); + verifier.set_records_stored_for_tests(0); + let xorname = [0xB3u8; 32]; + let mut peer_quotes = make_signed_legacy_bundle(xorname, unique_test_prices()); + peer_quotes.push(make_signed_quote(xorname, price_at_records(7), 8)); + + let proof_bytes = serialize_proof(peer_quotes); + let err = verifier + .verify_payment(&xorname, Some(&proof_bytes), VerificationContext::ClientPut) + .await + .expect_err("proof with more than close-group quotes should be rejected"); + + assert!( + format!("{err}").contains("at most"), + "Error should mention max quote count: {err}" + ); + } + + #[tokio::test] + async fn test_legacy_structural_majority_price_at_median_accepted() { + let verifier = create_test_verifier(); + verifier.set_records_stored_for_tests(1000); + let xorname = [0xA2u8; 32]; + let peer_quotes = make_signed_legacy_bundle( + xorname, + [ + crate::payment::pricing::calculate_price(0), + crate::payment::pricing::calculate_price(100), + crate::payment::pricing::calculate_price(500), + crate::payment::pricing::calculate_price(1000), + crate::payment::pricing::calculate_price(2000), + crate::payment::pricing::calculate_price(4000), + crate::payment::pricing::calculate_price(6000), + ], + ); + mark_known_paid_candidates(&verifier, &peer_quotes); + let expected_amount = expected_median_payment(&peer_quotes); + let paid_quote = median_test_candidates(&peer_quotes) + .first() + .expect("median candidate") + .1 + .clone(); + mark_candidate_paid(&verifier, &paid_quote, expected_amount); + + let proof_bytes = serialize_proof(peer_quotes); + let result = verifier + .verify_payment(&xorname, Some(&proof_bytes), VerificationContext::ClientPut) + .await; + + assert_eq!( + result.expect("median-priced verifier should accept"), + PaymentStatus::PaymentVerified + ); + } + + #[tokio::test] + async fn test_legacy_above_median_verifier_rejected_by_floor() { + let verifier = create_test_verifier(); + verifier.set_records_stored_for_tests(2000); + let xorname = [0xA3u8; 32]; + let peer_quotes = make_signed_legacy_bundle( + xorname, + [ + crate::payment::pricing::calculate_price(0), + crate::payment::pricing::calculate_price(100), + crate::payment::pricing::calculate_price(500), + crate::payment::pricing::calculate_price(1000), + crate::payment::pricing::calculate_price(2000), + crate::payment::pricing::calculate_price(4000), + crate::payment::pricing::calculate_price(6000), + ], + ); + mark_known_paid_candidates(&verifier, &peer_quotes); + let expected_amount = expected_median_payment(&peer_quotes); + let paid_quote = median_test_candidates(&peer_quotes) + .first() + .expect("median candidate") + .1 + .clone(); + mark_candidate_paid(&verifier, &paid_quote, expected_amount); + + let proof_bytes = serialize_proof(peer_quotes); + let err = verifier + .verify_payment(&xorname, Some(&proof_bytes), VerificationContext::ClientPut) + .await + .expect_err("above-median verifier should reject the client PUT"); + + assert!( + format!("{err}").contains("below local floor"), + "Error should mention paid-quote floor: {err}" + ); + } + + #[tokio::test] + async fn test_legacy_paid_median_known_peer_rejection() { + let verifier = create_test_verifier(); + verifier.set_records_stored_for_tests(0); + verifier.set_paid_quote_known_peers_for_tests(vec![rand::random()]); + let xorname = [0xA4u8; 32]; + let peer_quotes = make_signed_legacy_bundle(xorname, unique_test_prices()); + let expected_amount = expected_median_payment(&peer_quotes); + let paid_quote = median_test_candidates(&peer_quotes) + .first() + .expect("median candidate") + .1 + .clone(); + mark_candidate_paid(&verifier, &paid_quote, expected_amount); + + let proof_bytes = serialize_proof(peer_quotes); + let err = verifier + .verify_payment(&xorname, Some(&proof_bytes), VerificationContext::ClientPut) + .await + .expect_err("unknown paid issuer should be rejected"); + + assert!( + format!("{err}").contains("not among this node's local"), + "Error should mention local closest peers: {err}" + ); + } + + #[tokio::test] + async fn test_legacy_paid_median_floor_rejection() { + let verifier = create_test_verifier(); + verifier.set_records_stored_for_tests(6000); + let xorname = [0xA5u8; 32]; + let peer_quotes = make_signed_legacy_bundle( + xorname, + [ + crate::payment::pricing::calculate_price(0), + crate::payment::pricing::calculate_price(0), + crate::payment::pricing::calculate_price(0), + crate::payment::pricing::calculate_price(0), + crate::payment::pricing::calculate_price(0), + crate::payment::pricing::calculate_price(0), + crate::payment::pricing::calculate_price(0), + ], + ); + mark_known_paid_candidates(&verifier, &peer_quotes); + let expected_amount = expected_median_payment(&peer_quotes); + let paid_quote = median_test_candidates(&peer_quotes) + .first() + .expect("median candidate") + .1 + .clone(); + mark_candidate_paid(&verifier, &paid_quote, expected_amount); + + let proof_bytes = serialize_proof(peer_quotes); + let err = verifier + .verify_payment(&xorname, Some(&proof_bytes), VerificationContext::ClientPut) + .await + .expect_err("cheap paid median should be rejected"); + + assert!( + format!("{err}").contains("below local floor"), + "Error should mention local floor: {err}" + ); + } + + #[tokio::test] + async fn test_legacy_zero_price_median_rejected() { + let verifier = create_test_verifier(); + verifier.set_records_stored_for_tests(0); + let xorname = [0xA6u8; 32]; + let peer_quotes = make_signed_legacy_bundle( + xorname, + [ + Amount::ZERO, + Amount::ZERO, + Amount::ZERO, + Amount::ZERO, + Amount::from(1u64), + Amount::from(2u64), + Amount::from(3u64), + ], + ); + + let proof_bytes = serialize_proof(peer_quotes); + let err = verifier + .verify_payment(&xorname, Some(&proof_bytes), VerificationContext::ClientPut) + .await + .expect_err("zero median must be rejected"); + + assert!( + format!("{err}").contains("zero price"), + "Error should mention zero price: {err}" + ); + } + + #[tokio::test] + async fn test_legacy_paid_quote_content_mismatch_rejected() { + let verifier = create_test_verifier(); + verifier.set_records_stored_for_tests(0); + let xorname = [0xA7u8; 32]; + let mut peer_quotes = make_signed_legacy_bundle(xorname, unique_test_prices()); + let median_index = median_quote_index(peer_quotes.len()); + peer_quotes[median_index].1.content = xor_name::XorName([0xE7u8; 32]); + mark_known_paid_candidates(&verifier, &peer_quotes); + + let proof_bytes = serialize_proof(peer_quotes); + let err = verifier + .verify_payment(&xorname, Some(&proof_bytes), VerificationContext::ClientPut) + .await + .expect_err("paid quote content mismatch should be rejected"); + + assert!( + format!("{err}").contains("content address mismatch"), + "Error should mention content mismatch: {err}" + ); + } + + #[tokio::test] + async fn test_legacy_unpaid_quote_content_mismatch_accepted() { + let verifier = create_test_verifier(); + verifier.set_records_stored_for_tests(0); + let xorname = [0xA8u8; 32]; + let mut peer_quotes = make_signed_legacy_bundle(xorname, unique_test_prices()); + peer_quotes[0].1.content = xor_name::XorName([0xE8u8; 32]); + mark_known_paid_candidates(&verifier, &peer_quotes); + let expected_amount = expected_median_payment(&peer_quotes); + let paid_quote = median_test_candidates(&peer_quotes) + .first() + .expect("median candidate") + .1 + .clone(); + mark_candidate_paid(&verifier, &paid_quote, expected_amount); + + let proof_bytes = serialize_proof(peer_quotes); + let result = verifier + .verify_payment(&xorname, Some(&proof_bytes), VerificationContext::ClientPut) + .await; + + assert_eq!( + result.expect("unpaid content mismatch should be ignored"), + PaymentStatus::PaymentVerified + ); + } + + #[tokio::test] + async fn test_legacy_paid_quote_bad_signature_rejected() { + let verifier = create_test_verifier(); + verifier.set_records_stored_for_tests(0); + let xorname = [0xA9u8; 32]; + let mut peer_quotes = make_signed_legacy_bundle(xorname, unique_test_prices()); + let median_index = median_quote_index(peer_quotes.len()); + peer_quotes[median_index].1.signature.push(0xFF); + mark_known_paid_candidates(&verifier, &peer_quotes); + let expected_amount = expected_median_payment(&peer_quotes); + let paid_quote = median_test_candidates(&peer_quotes) + .first() + .expect("median candidate") + .1 + .clone(); + mark_candidate_paid(&verifier, &paid_quote, expected_amount); + + let proof_bytes = serialize_proof(peer_quotes); + let err = verifier + .verify_payment(&xorname, Some(&proof_bytes), VerificationContext::ClientPut) + .await + .expect_err("paid bad signature should be rejected"); + + assert!( + format!("{err}").contains("signature verification failed"), + "Error should mention signature failure: {err}" + ); + } + + #[tokio::test] + async fn test_legacy_unpaid_quote_bad_signature_accepted() { + let verifier = create_test_verifier(); + verifier.set_records_stored_for_tests(0); + let xorname = [0xAAu8; 32]; + let mut peer_quotes = make_signed_legacy_bundle(xorname, unique_test_prices()); + peer_quotes[0].1.signature.push(0xFF); + mark_known_paid_candidates(&verifier, &peer_quotes); + let expected_amount = expected_median_payment(&peer_quotes); + let paid_quote = median_test_candidates(&peer_quotes) + .first() + .expect("median candidate") + .1 + .clone(); + mark_candidate_paid(&verifier, &paid_quote, expected_amount); + + let proof_bytes = serialize_proof(peer_quotes); + let result = verifier + .verify_payment(&xorname, Some(&proof_bytes), VerificationContext::ClientPut) + .await; + + assert_eq!( + result.expect("unpaid bad signature should be ignored"), + PaymentStatus::PaymentVerified + ); + } + + #[tokio::test] + async fn test_legacy_unpaid_peer_binding_mismatch_accepted() { + let verifier = create_test_verifier(); + verifier.set_records_stored_for_tests(0); + let xorname = [0xABu8; 32]; + let mut peer_quotes = make_signed_legacy_bundle(xorname, unique_test_prices()); + peer_quotes[0].0 = evmlib::EncodedPeerId::new(rand::random()); + mark_known_paid_candidates(&verifier, &peer_quotes); + let expected_amount = expected_median_payment(&peer_quotes); + let paid_quote = median_test_candidates(&peer_quotes) + .first() + .expect("median candidate") + .1 + .clone(); + mark_candidate_paid(&verifier, &paid_quote, expected_amount); + + let proof_bytes = serialize_proof(peer_quotes); + let result = verifier + .verify_payment(&xorname, Some(&proof_bytes), VerificationContext::ClientPut) + .await; + + assert_eq!( + result.expect("unpaid peer binding mismatch should be ignored"), + PaymentStatus::PaymentVerified + ); + } + + #[tokio::test] + async fn test_legacy_median_tie_accepts_paid_candidate() { + let verifier = create_test_verifier(); + verifier.set_records_stored_for_tests(0); + let xorname = [0xACu8; 32]; + let peer_quotes = make_signed_legacy_bundle(xorname, tied_median_test_prices()); + mark_known_paid_candidates(&verifier, &peer_quotes); + mark_all_median_candidates_unpaid(&verifier, &peer_quotes); + let expected_amount = expected_median_payment(&peer_quotes); + let paid_quote = median_test_candidates(&peer_quotes) + .get(1) + .expect("second tied median candidate") + .1 + .clone(); + mark_candidate_paid(&verifier, &paid_quote, expected_amount); + + let proof_bytes = serialize_proof(peer_quotes); + let result = verifier + .verify_payment(&xorname, Some(&proof_bytes), VerificationContext::ClientPut) + .await; + + assert_eq!( + result.expect("one paid tied median candidate should verify"), + PaymentStatus::PaymentVerified + ); + } + + #[tokio::test] + async fn test_legacy_paid_list_admission_enforces_known_peer() { + let verifier = create_test_verifier(); + verifier.set_records_stored_for_tests(0); + verifier.set_paid_quote_known_peers_for_tests(Vec::new()); + let xorname = [0xB5u8; 32]; + let peer_quotes = make_signed_legacy_bundle(xorname, unique_test_prices()); + let expected_amount = expected_median_payment(&peer_quotes); + let paid_quote = median_test_candidates(&peer_quotes) + .first() + .expect("median candidate") + .1 + .clone(); + mark_candidate_paid(&verifier, &paid_quote, expected_amount); + + let proof_bytes = serialize_proof(peer_quotes); + let err = verifier + .verify_payment( + &xorname, + Some(&proof_bytes), + VerificationContext::PaidListAdmission, + ) + .await + .expect_err("paid-list admission must enforce the paid issuer known-peer check"); + + assert!( + format!("{err}").contains("not among this node's local"), + "Error should mention local closest peers: {err}" + ); + } + + #[tokio::test] + async fn test_legacy_paid_list_admission_enforces_full_bundle_floor() { + let verifier = create_test_verifier(); + verifier.set_records_stored_for_tests(6000); + let xorname = [0xB6u8; 32]; + let peer_quotes = make_signed_legacy_bundle( + xorname, + [ + crate::payment::pricing::calculate_price(0), + crate::payment::pricing::calculate_price(0), + crate::payment::pricing::calculate_price(0), + crate::payment::pricing::calculate_price(0), + crate::payment::pricing::calculate_price(0), + crate::payment::pricing::calculate_price(0), + crate::payment::pricing::calculate_price(0), + ], + ); + mark_known_paid_candidates(&verifier, &peer_quotes); + let expected_amount = expected_median_payment(&peer_quotes); + let paid_quote = median_test_candidates(&peer_quotes) + .first() + .expect("median candidate") + .1 + .clone(); + mark_candidate_paid(&verifier, &paid_quote, expected_amount); + + let proof_bytes = serialize_proof(peer_quotes); + let err = verifier + .verify_payment( + &xorname, + Some(&proof_bytes), + VerificationContext::PaidListAdmission, + ) + .await + .expect_err("paid-list admission must enforce the floor for full bundles"); - // Valid tag (0x01) but garbage payload — should fail deserialization - let mut garbage = vec![crate::ant_protocol::PROOF_TAG_SINGLE_NODE]; - garbage.extend_from_slice(&[0xAB; 63]); - let result = verifier - .verify_payment(&xorname, Some(&garbage), VerificationContext::ClientPut) - .await; - assert!(result.is_err()); - let err_msg = format!("{}", result.expect_err("should fail")); assert!( - err_msg.contains("deserialize") || err_msg.contains("Failed"), - "Error should mention deserialization failure: {err_msg}" + format!("{err}").contains("below local floor"), + "Error should mention the local price floor: {err}" ); } @@ -2020,195 +2817,6 @@ mod tests { } } - /// Helper: create a fake quote whose price encodes the supplied record count. - fn make_fake_quote_at_records( - xorname: [u8; 32], - timestamp: SystemTime, - rewards_address: RewardsAddress, - records: usize, - ) -> evmlib::PaymentQuote { - let mut quote = make_fake_quote(xorname, timestamp, rewards_address); - quote.price = crate::payment::pricing::calculate_price(records); - quote - } - - /// A small upward record drift between quoting and verifying — the normal - /// in-flight churn on a busy network — must pass. The old fixed 5-record - /// tolerance rejected a drift of 10 as "stale by 10 records"; the - /// price-based gate sees a negligible price move on the near-flat curve and - /// accepts it. - #[test] - fn test_small_record_drift_accepted() { - use evmlib::{EncodedPeerId, RewardsAddress}; - - let verifier = create_test_verifier(); - // Node gained 10 records since quoting (100 -> 110). - verifier.set_records_stored_for_tests(110); - let self_id: [u8; 32] = rand::random(); - verifier.set_peer_id_for_tests(self_id); - let quote = make_fake_quote_at_records( - [0xE0u8; 32], - SystemTime::now(), - RewardsAddress::new([1u8; 20]), - 100, - ); - let payment = ProofOfPayment { - peer_quotes: vec![(EncodedPeerId::new(self_id), quote)], - }; - - verifier - .validate_quote_freshness(&payment) - .expect("benign in-flight drift should pass"); - } - - /// Over-payment must always be accepted: the node had MORE records when it - /// quoted than it does now (e.g. it pruned), so the client paid for a - /// fuller, pricier node. The old symmetric `abs_diff` gate wrongly rejected - /// this; ~36% of STG-01 rejections were exactly this case. - #[test] - fn test_overpayment_accepted() { - use evmlib::{EncodedPeerId, RewardsAddress}; - - let verifier = create_test_verifier(); - // Quote priced at 6000 records, but node now holds only 100. - verifier.set_records_stored_for_tests(100); - let self_id: [u8; 32] = rand::random(); - verifier.set_peer_id_for_tests(self_id); - let quote = make_fake_quote_at_records( - [0xE2u8; 32], - SystemTime::now(), - RewardsAddress::new([1u8; 20]), - 6000, - ); - let payment = ProofOfPayment { - peer_quotes: vec![(EncodedPeerId::new(self_id), quote)], - }; - - verifier - .validate_quote_freshness(&payment) - .expect("over-payment must never be rejected"); - } - - /// Genuine staleness — a quote that under-prices the node's current fullness - /// by far more than the tolerance — is still rejected. Quote encodes 100 - /// records but the node now holds 6000, so the quadratic curve makes the - /// paid price a small fraction of the current price. - #[test] - fn test_underpriced_quote_rejected() { - use evmlib::{EncodedPeerId, RewardsAddress}; - - let verifier = create_test_verifier(); - verifier.set_records_stored_for_tests(6000); - let self_id: [u8; 32] = rand::random(); - verifier.set_peer_id_for_tests(self_id); - let quote = make_fake_quote_at_records( - [0xE1u8; 32], - SystemTime::now(), - RewardsAddress::new([1u8; 20]), - 100, - ); - let payment = ProofOfPayment { - peer_quotes: vec![(EncodedPeerId::new(self_id), quote)], - }; - - let err = verifier - .validate_quote_freshness(&payment) - .expect_err("a quote underpricing by >25% should fail"); - assert!(format!("{err}").contains("stale")); - } - - /// Regression test for the PROD-UL-01 `DataMap` failure (2026-06-04): a - /// close group whose fullness spans 47..=1788 records produces a bundle - /// where the emptiest node's honest quote prices far below a full node's - /// 75% floor. The verifying node must gate only its OWN quote — a - /// neighbour's cheap-but-honest quote is not evidence of staleness. - #[test] - fn test_neighbour_cheap_quote_not_rejected() { - use evmlib::{EncodedPeerId, RewardsAddress}; - - let verifier = create_test_verifier(); - // This node holds 1788 records (the fullest rejector in the incident). - verifier.set_records_stored_for_tests(1788); - let self_id: [u8; 32] = rand::random(); - verifier.set_peer_id_for_tests(self_id); - - let xorname = [0xE3u8; 32]; - let rewards = RewardsAddress::new([1u8; 20]); - // Own quote is fresh: priced at our own current fullness. - let own_quote = make_fake_quote_at_records(xorname, SystemTime::now(), rewards, 1788); - // Neighbour quotes from a heterogeneous close group, including a - // nearly-empty node at 47 records (price far below our 75% floor). - let neighbour_47 = make_fake_quote_at_records(xorname, SystemTime::now(), rewards, 47); - let neighbour_978 = make_fake_quote_at_records(xorname, SystemTime::now(), rewards, 978); - - let payment = ProofOfPayment { - peer_quotes: vec![ - (EncodedPeerId::new(rand::random()), neighbour_47), - (EncodedPeerId::new(self_id), own_quote), - (EncodedPeerId::new(rand::random()), neighbour_978), - ], - }; - - verifier - .validate_quote_freshness(&payment) - .expect("neighbours' cheaper quotes must not trip this node's own staleness gate"); - } - - /// The own-quote gate still bites: if THIS node's own quote in the bundle - /// underprices its current fullness beyond tolerance, the payment is - /// rejected even when every neighbour quote looks expensive. - #[test] - fn test_own_stale_quote_still_rejected_among_neighbours() { - use evmlib::{EncodedPeerId, RewardsAddress}; - - let verifier = create_test_verifier(); - verifier.set_records_stored_for_tests(6000); - let self_id: [u8; 32] = rand::random(); - verifier.set_peer_id_for_tests(self_id); - - let xorname = [0xE4u8; 32]; - let rewards = RewardsAddress::new([1u8; 20]); - let own_stale = make_fake_quote_at_records(xorname, SystemTime::now(), rewards, 100); - let neighbour = make_fake_quote_at_records(xorname, SystemTime::now(), rewards, 7000); - - let payment = ProofOfPayment { - peer_quotes: vec![ - (EncodedPeerId::new(rand::random()), neighbour), - (EncodedPeerId::new(self_id), own_stale), - ], - }; - - let err = verifier - .validate_quote_freshness(&payment) - .expect_err("own underpriced quote must still be rejected"); - assert!(format!("{err}").contains("stale")); - } - - /// Without a self peer-id source (no `P2PNode` attached, no test override) - /// the gate skips rather than rejecting — mirroring the missing - /// record-count-source behaviour. - #[test] - fn test_freshness_skipped_without_self_peer_id() { - use evmlib::{EncodedPeerId, RewardsAddress}; - - let verifier = create_test_verifier(); - verifier.set_records_stored_for_tests(6000); - // NOTE: no set_peer_id_for_tests call. - let quote = make_fake_quote_at_records( - [0xE5u8; 32], - SystemTime::now(), - RewardsAddress::new([1u8; 20]), - 100, - ); - let payment = ProofOfPayment { - peer_quotes: vec![(EncodedPeerId::new(rand::random()), quote)], - }; - - verifier - .validate_quote_freshness(&payment) - .expect("gate must fail open when self identity is unknown"); - } - /// Helper: wrap quotes into a tagged serialized `PaymentProof`. fn serialize_proof(peer_quotes: Vec<(evmlib::EncodedPeerId, evmlib::PaymentQuote)>) -> Vec { use crate::payment::proof::{serialize_single_node_proof, PaymentProof}; @@ -2378,54 +2986,6 @@ mod tests { evmlib::EncodedPeerId::new(*ant_peer_id.as_bytes()) } - #[tokio::test] - async fn test_local_not_in_paid_set_rejected() { - use evmlib::RewardsAddress; - use saorsa_core::MlDsa65; - use saorsa_pqc::pqc::MlDsaOperations; - - // Verifier with a local rewards address set - let local_addr = RewardsAddress::new([0xAAu8; 20]); - let config = PaymentVerifierConfig { - evm: EvmVerifierConfig { - network: EvmNetwork::ArbitrumOne, - }, - cache_capacity: 100, - local_rewards_address: local_addr, - }; - let verifier = PaymentVerifier::new(config); - - let xorname = [0xEEu8; 32]; - // Quotes pay a DIFFERENT rewards address - let other_addr = RewardsAddress::new([0xBBu8; 20]); - - // Use real ML-DSA keys so the pub_key→peer_id binding check passes - let ml_dsa = MlDsa65::new(); - let mut peer_quotes = Vec::new(); - for _ in 0..CLOSE_GROUP_SIZE { - let (public_key, _secret_key) = ml_dsa.generate_keypair().expect("keygen"); - let pub_key_bytes = public_key.as_bytes().to_vec(); - let encoded = encoded_peer_id_for_pub_key(&pub_key_bytes); - - let mut quote = make_fake_quote(xorname, SystemTime::now(), other_addr); - quote.pub_key = pub_key_bytes; - - peer_quotes.push((encoded, quote)); - } - - let proof_bytes = serialize_proof(peer_quotes); - let result = verifier - .verify_payment(&xorname, Some(&proof_bytes), VerificationContext::ClientPut) - .await; - - assert!(result.is_err(), "Should reject payment not addressed to us"); - let err_msg = format!("{}", result.expect_err("should fail")); - assert!( - err_msg.contains("does not include this node as a recipient"), - "Error should mention recipient rejection: {err_msg}" - ); - } - #[tokio::test] async fn test_wrong_peer_binding_rejected() { use evmlib::{EncodedPeerId, RewardsAddress}; @@ -2466,200 +3026,16 @@ mod tests { } // ========================================================================= - // VerificationContext tests — Replication must skip the - // storer-being-paid-now checks (own-quote freshness, local recipient, - // merkle candidate closeness) while keeping every receipt-authenticity - // check. Each test runs the same proof under both contexts and asserts - // the context-gated check fires only under ClientPut. Where a proof - // can't reach Ok(()) without on-chain access, "skipped" is proven by the - // error moving PAST the gated check to a later stage. + // VerificationContext tests — both contexts verify fresh proof admissions. + // Later neighbour-sync repair has no proof-of-payment and is authorized by + // closest-7 storage quorum or closest-K paid-list quorum instead. // ========================================================================= - /// A bundle whose own quote is stale (quoted 100 records, node now holds - /// 6000) is rejected by the freshness gate under `ClientPut`, but under - /// `Replication` the gate is skipped: verification proceeds to the next - /// stage (peer bindings, which fail on the fake `pub_keys`). - #[tokio::test] - async fn test_replication_context_skips_own_quote_freshness() { - use evmlib::{EncodedPeerId, RewardsAddress}; - - let verifier = create_test_verifier(); - verifier.set_records_stored_for_tests(6000); - let self_id: [u8; 32] = rand::random(); - verifier.set_peer_id_for_tests(self_id); - - let xorname = [0xD0u8; 32]; - let rewards = RewardsAddress::new([1u8; 20]); - let own_stale = make_fake_quote_at_records(xorname, SystemTime::now(), rewards, 100); - let mut peer_quotes = vec![(EncodedPeerId::new(self_id), own_stale)]; - for _ in 1..CLOSE_GROUP_SIZE { - let neighbour = make_fake_quote_at_records(xorname, SystemTime::now(), rewards, 6000); - peer_quotes.push((EncodedPeerId::new(rand::random()), neighbour)); - } - let proof_bytes = serialize_proof(peer_quotes); - - let err = verifier - .verify_payment(&xorname, Some(&proof_bytes), VerificationContext::ClientPut) - .await - .expect_err("own stale quote must be rejected on a client PUT"); - assert!( - format!("{err}").contains("stale"), - "ClientPut must fail at the freshness gate: {err}" - ); - - let err = verifier - .verify_payment( - &xorname, - Some(&proof_bytes), - VerificationContext::Replication, - ) - .await - .expect_err("fake pub_keys still fail peer bindings"); - let msg = format!("{err}"); - assert!( - !msg.contains("stale"), - "Replication must skip the freshness gate: {msg}" - ); - assert!( - msg.contains("Invalid ML-DSA public key"), - "Replication should fail at the LATER peer-binding stage: {msg}" - ); - } - - /// A receipt that pays a different node's rewards address is rejected by - /// the local-recipient check under `ClientPut`, but under `Replication` - /// (a post-churn close-group member was never a payee) the check is - /// skipped: verification proceeds to quote-signature verification. - #[tokio::test] - async fn test_replication_context_skips_local_recipient() { - use evmlib::RewardsAddress; - use saorsa_core::MlDsa65; - use saorsa_pqc::pqc::MlDsaOperations; - - let local_addr = RewardsAddress::new([0xAAu8; 20]); - let config = PaymentVerifierConfig { - evm: EvmVerifierConfig { - network: EvmNetwork::ArbitrumOne, - }, - cache_capacity: 100, - local_rewards_address: local_addr, - }; - let verifier = PaymentVerifier::new(config); - - let xorname = [0xD1u8; 32]; - // Quotes pay a DIFFERENT rewards address. - let other_addr = RewardsAddress::new([0xBBu8; 20]); - - // Real ML-DSA keys so the pub_key→peer_id binding check passes and - // the first divergence between contexts is the recipient check. - let ml_dsa = MlDsa65::new(); - let mut peer_quotes = Vec::new(); - for _ in 0..CLOSE_GROUP_SIZE { - let (public_key, _secret_key) = ml_dsa.generate_keypair().expect("keygen"); - let pub_key_bytes = public_key.as_bytes().to_vec(); - let encoded = encoded_peer_id_for_pub_key(&pub_key_bytes); - let mut quote = make_fake_quote(xorname, SystemTime::now(), other_addr); - quote.pub_key = pub_key_bytes; - peer_quotes.push((encoded, quote)); - } - let proof_bytes = serialize_proof(peer_quotes); - - let err = verifier - .verify_payment(&xorname, Some(&proof_bytes), VerificationContext::ClientPut) - .await - .expect_err("payment not addressed to us must fail on a client PUT"); - assert!( - format!("{err}").contains("does not include this node as a recipient"), - "ClientPut must fail at the recipient check: {err}" - ); - - let err = verifier - .verify_payment( - &xorname, - Some(&proof_bytes), - VerificationContext::Replication, - ) - .await - .expect_err("fake quote signatures still fail signature verification"); - let msg = format!("{err}"); - assert!( - !msg.contains("recipient"), - "Replication must skip the recipient check: {msg}" - ); - assert!( - msg.contains("signature verification failed"), - "Replication should fail at the LATER signature stage: {msg}" - ); - } - - /// A `Replication`-verified cache entry must not satisfy a later - /// `ClientPut` fast-path: the context-gated checks were never run for it, - /// so letting it short-circuit a client PUT would bypass them via the - /// cache. It must still satisfy later `Replication` lookups (re-offers of - /// the same key are routine), and a subsequent full `ClientPut` - /// verification upgrades the entry without ever being downgraded back. - #[tokio::test] - async fn test_replication_verified_cache_entry_does_not_satisfy_client_put() { - let verifier = create_test_verifier(); - let xorname = [0xD4u8; 32]; - - // Simulate a successful Replication-context verification. - verifier.cache.insert_replication_verified(xorname); - - assert_eq!( - verifier.check_payment_required(&xorname, VerificationContext::Replication), - PaymentStatus::CachedAsVerified, - "replication lookups must hit a replication-verified entry" - ); - assert_eq!( - verifier.check_payment_required(&xorname, VerificationContext::ClientPut), - PaymentStatus::PaymentRequired, - "a client PUT must not fast-path on a replication-verified entry" - ); - - // End-to-end: a proof-less client PUT is still rejected, while a - // proof-less replication re-check passes via the cache. - let result = verifier - .verify_payment(&xorname, None, VerificationContext::Replication) - .await; - assert_eq!( - result.expect("replication re-check should hit the cache"), - PaymentStatus::CachedAsVerified - ); - let err = verifier - .verify_payment(&xorname, None, VerificationContext::ClientPut) - .await - .expect_err("proof-less client PUT must not ride the replication entry"); - assert!( - format!("{err}").contains("Payment required"), - "client PUT must still demand payment: {err}" - ); - - // A full ClientPut verification upgrades the entry... - verifier.cache.insert(xorname); - assert_eq!( - verifier.check_payment_required(&xorname, VerificationContext::ClientPut), - PaymentStatus::CachedAsVerified, - "a full client-PUT verification must upgrade the entry" - ); - - // ...and a later replication re-verification never downgrades it. - verifier.cache.insert_replication_verified(xorname); - assert_eq!( - verifier.check_payment_required(&xorname, VerificationContext::ClientPut), - PaymentStatus::CachedAsVerified, - "replication re-verification must not downgrade a client-PUT entry" - ); - } - - /// Receipt authenticity is NOT relaxed under `Replication`: a bundle whose - /// quotes are bound to a different content address is rejected in both - /// contexts. A neighbour cannot replay a receipt for chunk A to get - /// chunk B admitted. + /// Content binding is required for every fresh proof context. A receipt for + /// chunk A cannot admit chunk B as either a direct/fresh store or a fresh + /// paid-list update. #[tokio::test] - async fn test_replication_context_still_rejects_content_mismatch() { - use evmlib::{EncodedPeerId, RewardsAddress}; - + async fn test_fresh_contexts_reject_content_mismatch() { let verifier = create_test_verifier(); let stored_xorname = [0xD2u8; 32]; let quoted_xorname = [0xD3u8; 32]; @@ -2668,13 +3044,13 @@ mod tests { let mut peer_quotes = Vec::new(); for _ in 0..CLOSE_GROUP_SIZE { let quote = make_fake_quote(quoted_xorname, SystemTime::now(), rewards); - peer_quotes.push((EncodedPeerId::new(rand::random()), quote)); + peer_quotes.push((evmlib::EncodedPeerId::new(rand::random()), quote)); } let proof_bytes = serialize_proof(peer_quotes); for context in [ VerificationContext::ClientPut, - VerificationContext::Replication, + VerificationContext::PaidListAdmission, ] { let err = verifier .verify_payment(&stored_xorname, Some(&proof_bytes), context) @@ -2688,15 +3064,13 @@ mod tests { } /// The merkle pay-yourself closeness defence (including its duplicate- - /// candidate pre-check, which runs without a `P2PNode`) applies to client - /// PUTs only. Under `Replication` the pool was sampled from the DHT of - /// the original sale, so the live-DHT check is skipped and verification - /// proceeds to the on-chain stages. + /// candidate pre-check, which runs without a `P2PNode`) applies to every + /// proof verification context because every context is a fresh admission. #[tokio::test] - async fn test_replication_context_skips_merkle_closeness() { + async fn test_fresh_contexts_enforce_merkle_closeness() { let verifier = create_test_verifier(); - let (mut merkle_proof, _pool_hash, xorname, timestamp) = make_valid_merkle_proof(); + let (mut merkle_proof, _pool_hash, xorname, _timestamp) = make_valid_merkle_proof(); // 16 copies of one real candidate: every self-signature is valid, but // the candidate PeerIds are duplicates — the closeness pre-check @@ -2710,45 +3084,22 @@ mod tests { for c in &mut merkle_proof.winner_pool.candidate_nodes { *c = shared.clone(); } - let pool_hash = merkle_proof.winner_pool_hash(); - - // Seed the pool cache with a deliberately mismatched timestamp so the - // Replication path fails deterministically AFTER the (skipped) - // closeness check, without needing on-chain access. - { - let info = evmlib::merkle_payments::OnChainPaymentInfo { - depth: 4, - merkle_payment_timestamp: timestamp + 1, - paid_node_addresses: vec![], - }; - verifier.pool_cache.lock().put(pool_hash, info); - } - let tagged = crate::payment::proof::serialize_merkle_proof(&merkle_proof).expect("serialize"); - let err = verifier - .verify_payment(&xorname, Some(&tagged), VerificationContext::ClientPut) - .await - .expect_err("duplicate candidate PeerIds must fail the client-PUT closeness check"); - assert!( - format!("{err}").contains("duplicate candidate PeerId"), - "ClientPut must fail at the closeness pre-check: {err}" - ); - - let err = verifier - .verify_payment(&xorname, Some(&tagged), VerificationContext::Replication) - .await - .expect_err("seeded timestamp mismatch still fails after the skipped check"); - let msg = format!("{err}"); - assert!( - !msg.contains("duplicate candidate PeerId"), - "Replication must skip the closeness check: {msg}" - ); - assert!( - msg.contains("timestamp mismatch"), - "Replication should fail at the LATER timestamp stage: {msg}" - ); + for context in [ + VerificationContext::ClientPut, + VerificationContext::PaidListAdmission, + ] { + let err = verifier + .verify_payment(&xorname, Some(&tagged), context) + .await + .expect_err("duplicate candidate PeerIds must fail fresh admission closeness"); + assert!( + format!("{err}").contains("duplicate candidate PeerId"), + "{context:?} must fail at the closeness pre-check: {err}" + ); + } } // ========================================================================= diff --git a/src/replication/mod.rs b/src/replication/mod.rs index 0e0995c..2c64ecb 100644 --- a/src/replication/mod.rs +++ b/src/replication/mod.rs @@ -72,6 +72,14 @@ use saorsa_core::{DhtNetworkEvent, P2PEvent, P2PNode, TrustEvent}; /// Prefix used by saorsa-core's request-response mechanism. const RR_PREFIX: &str = "/rr/"; +fn fresh_offer_payment_context() -> VerificationContext { + VerificationContext::ClientPut +} + +fn paid_notify_payment_context() -> VerificationContext { + VerificationContext::PaidListAdmission +} + /// Boxed future type for in-flight fetch tasks. type FetchFuture = Pin)> + Send>>; @@ -1135,6 +1143,39 @@ async fn handle_fresh_offer( return Ok(()); } + // Mirror the normal PUT path: the advertised key must be the content + // address of the supplied bytes before any expensive payment verification. + let computed_key = crate::client::compute_address(&offer.data); + if computed_key != offer.key { + warn!( + "Rejecting fresh offer for key {}: content address mismatch, computed {}", + hex::encode(offer.key), + hex::encode(computed_key), + ); + p2p_node + .report_trust_event( + source, + TrustEvent::ApplicationFailure(REPLICATION_TRUST_WEIGHT), + ) + .await; + send_replication_response( + source, + p2p_node, + request_id, + ReplicationMessageBody::FreshReplicationResponse(FreshReplicationResponse::Rejected { + key: offer.key, + reason: format!( + "Content address mismatch: expected {}, computed {}", + hex::encode(offer.key), + hex::encode(computed_key), + ), + }), + rr_message_id, + ) + .await; + return Ok(()); + } + // Rule 7: check responsibility. if !admission::is_responsible(&self_id, &offer.key, p2p_node, config.close_group_size).await { send_replication_response( @@ -1177,17 +1218,17 @@ async fn handle_fresh_offer( return Ok(()); } - // Gap 1: Validate PoP via PaymentVerifier. This is an already-settled - // receipt handed over by a neighbour, not a live sale — Replication - // context skips the storer-being-paid-now checks (own-quote price - // freshness, local recipient, merkle candidate closeness) that would - // otherwise reject every honest hand-over once counts grow, the close - // group churns, or the live DHT drifts from the pool's original sample. + // Gap 1: Validate PoP via PaymentVerifier. Fresh replication is still + // part of the immediate write fan-out: this receiver is about to store the + // record as if the client had PUT it here directly, so it must run the same + // ClientPut checks as the storage handler (receiver membership, + // paid-quote known-peer and local price floor for single-node proofs, + // merkle candidate closeness for merkle proofs). match payment_verifier .verify_payment( &offer.key, Some(&offer.proof_of_payment), - VerificationContext::Replication, + fresh_offer_payment_context(), ) .await { @@ -1301,13 +1342,16 @@ async fn handle_paid_notify( return Ok(()); } - // Gap 1: Validate PoP via PaymentVerifier. Same as the fresh-offer path: - // a settled receipt, so Replication context (see VerificationContext). + // Gap 1: Validate PoP via PaymentVerifier. PaidNotify admits fresh + // paid-list metadata, so it runs the same payment checks as ClientPut. + // The receiver-membership difference is intentional: paid-list metadata + // uses the local K closest peers, while direct/fresh chunk stores use the + // close group. match payment_verifier .verify_payment( ¬ify.key, Some(¬ify.proof_of_payment), - VerificationContext::Replication, + paid_notify_payment_context(), ) .await { @@ -2695,9 +2739,29 @@ fn audit_failure_clears_bootstrap_claim(reason: &AuditFailureReason) -> bool { #[cfg(test)] #[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)] mod tests { - use super::{audit_failure_clears_bootstrap_claim, first_failed_key_label}; + use super::{ + audit_failure_clears_bootstrap_claim, first_failed_key_label, fresh_offer_payment_context, + paid_notify_payment_context, + }; + use crate::payment::VerificationContext; use crate::replication::types::AuditFailureReason; + #[test] + fn fresh_offer_runs_client_put_payment_checks() { + assert_eq!( + fresh_offer_payment_context(), + VerificationContext::ClientPut + ); + } + + #[test] + fn paid_notify_uses_paid_list_admission_payment_checks() { + assert_eq!( + paid_notify_payment_context(), + VerificationContext::PaidListAdmission + ); + } + #[test] fn audit_timeout_preserves_active_bootstrap_claim() { assert!(!audit_failure_clears_bootstrap_claim( diff --git a/src/storage/handler.rs b/src/storage/handler.rs index 26d20d7..dc767de 100644 --- a/src/storage/handler.rs +++ b/src/storage/handler.rs @@ -74,14 +74,13 @@ impl AntProtocol { payment_verifier: Arc, quote_generator: Arc, ) -> Self { - // Keep the PaymentVerifier's freshness gate AND the QuoteGenerator's - // pricing wired to the same authoritative store used by this protocol - // handler. Pricing and the freshness gate MUST read the same record - // count: the generator prices a quote from current_chunks() and the - // verifier later checks the quote against current_chunks(), so the only - // difference they see is genuine in-flight growth. Attaching both here - // makes the invariant automatic for every AntProtocol construction - // path, including tests and future startup variants. + // Keep the PaymentVerifier's paid-quote price floor and the + // QuoteGenerator's pricing wired to the same authoritative store used + // by this protocol handler. Both must read the same record count: the + // generator prices quotes from current_chunks(), and the verifier later + // checks the paid median quote against current_chunks(). Attaching both + // here makes the invariant automatic for every AntProtocol + // construction path, including tests and future startup variants. payment_verifier.attach_storage(Arc::clone(&storage)); quote_generator.attach_storage(Arc::clone(&storage)); @@ -263,8 +262,9 @@ impl AntProtocol { } // 5. Verify payment. This node is the storer being paid right now, so - // the full ClientPut check set applies (own-quote price freshness, - // local recipient, merkle candidate closeness). + // the full ClientPut check set applies (receiver membership, + // paid-quote known-peer and local price floor for single-node proofs, + // merkle candidate closeness). let payment_result = self .payment_verifier .verify_payment( @@ -294,7 +294,7 @@ impl AntProtocol { let content_len = request.content.len(); info!("Stored chunk {addr_hex} ({content_len} bytes)"); // Bump the in-memory fallback counter. Both pricing and the - // freshness gate now read LmdbStorage::current_chunks() directly, + // paid-quote floor now read LmdbStorage::current_chunks() directly, // so this counter only matters when no storage is attached // (unit tests / mis-configured startup). Kept warm so that // fallback path stays roughly accurate. diff --git a/tests/e2e/merkle_payment.rs b/tests/e2e/merkle_payment.rs index c6ceb37..960fc98 100644 --- a/tests/e2e/merkle_payment.rs +++ b/tests/e2e/merkle_payment.rs @@ -23,6 +23,7 @@ use ant_node::compute_address; use ant_node::payment::{ serialize_merkle_proof, MAX_PAYMENT_PROOF_SIZE_BYTES, MIN_PAYMENT_PROOF_SIZE_BYTES, }; +use ant_node::CLOSE_GROUP_SIZE; use bytes::Bytes; use evmlib::common::Amount; use evmlib::merkle_payments::{ @@ -87,6 +88,35 @@ async fn send_put_to_node( ChunkMessage::decode(&response_bytes).map_err(|e| format!("Decode failed: {e}")) } +async fn responsible_receiver_index( + harness: &TestHarness, + address: &[u8; 32], +) -> Result> { + for node in harness.network().nodes() { + let Some(p2p_node) = node.p2p_node.as_ref() else { + continue; + }; + + let self_peer_id = *p2p_node.peer_id(); + let closest = p2p_node + .dht_manager() + .find_closest_nodes_local_with_self(address, CLOSE_GROUP_SIZE) + .await; + if closest + .iter() + .any(|closest_node| closest_node.peer_id == self_peer_id) + { + return Ok(node.index); + } + } + + Err(format!( + "no running node's local view included itself in the closest {CLOSE_GROUP_SIZE} peers for {}", + hex::encode(address) + ) + .into()) +} + /// Create a lightweight test harness with payment enforcement and Anvil wiring. async fn setup_enforcement_env() -> Result<(TestHarness, Testnet), Box> { let testnet = Testnet::new().await?; @@ -555,7 +585,14 @@ async fn test_attack_merkle_pay_yourself_fabricated_pool() -> Result<(), Box Result<(), Box Result<(), Box { + assert!( + reason.contains("Content address mismatch"), + "Should mention content address mismatch, got: {reason}" + ); + } + other => panic!("Expected Rejected, got: {other:?}"), + } + + let protocol_a = node_a.ant_protocol.as_ref().expect("protocol"); + assert!( + !protocol_a.storage().exists(&wrong_address).unwrap_or(false), + "Chunk should not be stored under the wrong address" + ); + assert!( + !protocol_a + .storage() + .exists(&actual_address) + .unwrap_or(false), + "Chunk should not be stored under the actual address after rejected offer" + ); + + harness.teardown().await.expect("teardown"); +} + /// Neighbor sync request returns a sync response (Section 18 #5/#37). /// /// Send a `NeighborSyncRequest` from one node to another and verify we From d74a7e34d5b997a0d15d13cc638cf30b24a28ca1 Mon Sep 17 00:00:00 2001 From: Warm Beer Date: Fri, 12 Jun 2026 12:41:36 +0200 Subject: [PATCH 15/18] fix(payment): use configured client put membership checks --- src/devnet.rs | 10 +++--- src/node.rs | 19 ++++++----- src/payment/verifier.rs | 57 ++++++++++++++++++--------------- src/replication/mod.rs | 9 +++--- src/storage/handler.rs | 60 +++++++++++++++++++++++++++++++---- tests/e2e/data_types/chunk.rs | 2 ++ tests/e2e/testnet.rs | 9 +++--- 7 files changed, 114 insertions(+), 52 deletions(-) diff --git a/src/devnet.rs b/src/devnet.rs index 3dede92..d5062fe 100644 --- a/src/devnet.rs +++ b/src/devnet.rs @@ -10,6 +10,7 @@ use crate::payment::{ EvmVerifierConfig, PaymentVerifier, PaymentVerifierConfig, QuoteGenerator, QuotingMetricsTracker, }; +use crate::replication::config::ReplicationConfig; use crate::storage::{AntProtocol, LmdbStorage, LmdbStorageConfig}; use evmlib::Network as EvmNetwork; use evmlib::RewardsAddress; @@ -550,9 +551,11 @@ impl Devnet { }; let rewards_address = RewardsAddress::new(DEVNET_REWARDS_ADDRESS); + let replication_config = ReplicationConfig::default(); let payment_config = PaymentVerifierConfig { evm: evm_config, cache_capacity: DEVNET_PAYMENT_CACHE_CAPACITY, + close_group_size: replication_config.close_group_size, local_rewards_address: rewards_address, }; let payment_verifier = PaymentVerifier::new(payment_config); @@ -611,10 +614,9 @@ impl Devnet { *node.state.write().await = NodeState::Running; if let (Some(ref p2p), Some(ref protocol)) = (&node.p2p_node, &node.ant_protocol) { - // Wire the P2PNode into the payment verifier for merkle-closeness checks. - protocol - .payment_verifier_arc() - .attach_p2p_node(Arc::clone(p2p)); + // Wire P2P into AntProtocol for payment receiver-membership and + // payment-proof closeness checks. + protocol.attach_p2p_node(Arc::clone(p2p)); let mut events = p2p.subscribe_events(); let p2p_clone = Arc::clone(p2p); diff --git a/src/node.rs b/src/node.rs index 0926df2..151a369 100644 --- a/src/node.rs +++ b/src/node.rs @@ -107,11 +107,15 @@ impl NodeBuilder { Some(Self::build_upgrade_monitor(&self.config, node_id_seed)) }; + let repl_config = ReplicationConfig::default(); + // Initialize ANT protocol handler for chunk storage and // wire the fresh-write channel so PUTs trigger replication. let (ant_protocol, fresh_write_rx) = if self.config.storage.enabled { let (fresh_write_tx, fresh_write_rx) = tokio::sync::mpsc::unbounded_channel(); - let mut protocol = Self::build_ant_protocol(&self.config, &identity).await?; + let mut protocol = + Self::build_ant_protocol(&self.config, &identity, repl_config.close_group_size) + .await?; protocol.set_fresh_write_sender(fresh_write_tx); (Some(Arc::new(protocol)), Some(fresh_write_rx)) } else { @@ -121,19 +125,16 @@ impl NodeBuilder { let p2p_arc = Arc::new(p2p_node); - // Wire the P2PNode handle into the payment verifier so merkle-payment - // checks can query the live DHT for peers actually closest to a pool - // midpoint (pay-yourself defence). + // Wire the P2PNode handle into AntProtocol so direct PUTs can verify + // close-group responsibility and payment proofs can query live-DHT + // closeness. if let Some(ref protocol) = ant_protocol { - protocol - .payment_verifier_arc() - .attach_p2p_node(Arc::clone(&p2p_arc)); + protocol.attach_p2p_node(Arc::clone(&p2p_arc)); } // Initialize replication engine (if storage is enabled) let replication_engine = if let (Some(ref protocol), Some(fresh_rx)) = (&ant_protocol, fresh_write_rx) { - let repl_config = ReplicationConfig::default(); let storage_arc = protocol.storage(); let payment_verifier_arc = protocol.payment_verifier_arc(); match ReplicationEngine::new( @@ -349,6 +350,7 @@ impl NodeBuilder { async fn build_ant_protocol( config: &NodeConfig, identity: &NodeIdentity, + close_group_size: usize, ) -> Result { // Create LMDB storage let storage_config = LmdbStorageConfig { @@ -378,6 +380,7 @@ impl NodeBuilder { network: evm_network, }, cache_capacity: config.payment.cache_capacity, + close_group_size, local_rewards_address: rewards_address, }; let payment_verifier = PaymentVerifier::new(payment_config); diff --git a/src/payment/verifier.rs b/src/payment/verifier.rs index 633481b..d71947b 100644 --- a/src/payment/verifier.rs +++ b/src/payment/verifier.rs @@ -106,6 +106,8 @@ pub struct PaymentVerifierConfig { pub evm: EvmVerifierConfig, /// Cache capacity (number of `XorName` values to cache). pub cache_capacity: usize, + /// Close-group width used to check direct client PUT receiver membership. + pub close_group_size: usize, /// Local node's rewards address. /// /// Kept in the verifier config for payment policies that bind receipts to @@ -116,8 +118,9 @@ pub struct PaymentVerifierConfig { /// The fresh admission path a payment proof is being verified for. /// /// - **`ClientPut`** — the node is the storer being paid *right now*. The -/// node must be in the local close group (`CLOSE_GROUP_SIZE`) for the -/// address, and every live payment check applies. +/// verifier checks receiver responsibility using the configured close-group +/// width, then applies store-strength cache semantics and live payment +/// checks. /// - **`PaidListAdmission`** — the node is admitting fresh paid-list metadata. /// It runs the same live payment checks as `ClientPut`, but the receiver /// membership check uses the local K closest peers because paid-list @@ -129,12 +132,12 @@ pub struct PaymentVerifierConfig { /// /// Later neighbour-sync repair does not include proof-of-payment bytes and /// does not call this verifier. It authorizes repair from network evidence: -/// majority storage among the closest 7, or majority paid-list membership -/// among the closest K. +/// majority storage among the configured close group, or majority paid-list +/// membership among the closest K. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum VerificationContext { - /// The node is the storer being paid right now: all checks apply, - /// including receiver membership in the local close group. + /// The node is the storer being paid right now: receiver membership uses + /// the configured close-group width. ClientPut, /// The node is admitting fresh paid-list metadata: same payment checks as /// `ClientPut`, but receiver membership is local K closest peers. @@ -142,9 +145,9 @@ pub enum VerificationContext { } impl VerificationContext { - fn receiver_membership_width(self) -> usize { + fn receiver_membership_width(self, close_group_size: usize) -> usize { match self { - Self::ClientPut => CLOSE_GROUP_SIZE, + Self::ClientPut => close_group_size, Self::PaidListAdmission => K_BUCKET_SIZE, } } @@ -199,12 +202,12 @@ pub struct PaymentVerifier { /// amplification to one lookup per unique `pool_hash` regardless of /// concurrency. inflight_closeness: Mutex>>, - /// P2P node handle, attached post-construction so client PUT verification - /// can check receiver membership and paid-quote issuer closeness, and - /// merkle verification can check that candidate `pub_keys` map to peers - /// actually close to the pool midpoint in the live DHT. `None` in unit - /// tests that don't exercise live-DHT checks; production startup MUST call - /// [`attach_p2p_node`]. + /// P2P node handle, attached post-construction so client PUT and paid-list + /// admission can check receiver membership, paid-quote verification can + /// check issuer closeness, and merkle verification can check that candidate + /// `pub_keys` map to peers actually close to the pool midpoint in the live + /// DHT. `None` in unit tests that don't exercise live-DHT checks; + /// production startup MUST call [`attach_p2p_node`]. p2p_node: RwLock>>, /// LMDB storage handle, attached post-construction so the paid-quote /// price-floor check can read the authoritative on-disk record count without @@ -363,15 +366,16 @@ impl PaymentVerifier { } } - /// Attach the node's [`P2PNode`] handle so client-PUT verification can - /// check receiver membership and paid-quote issuer closeness, and - /// merkle-payment verification can check candidate `pub_keys` against the - /// DHT's actual closest peers to the pool midpoint. + /// Attach the node's [`P2PNode`] handle so client PUT and paid-list + /// admission can check receiver membership, paid-quote verification can + /// check issuer closeness, and merkle-payment verification can check + /// candidate `pub_keys` against the DHT's actual closest peers to the pool + /// midpoint. /// /// Production startup MUST call this once the `P2PNode` exists. Without - /// it, live-DHT payment checks fail CLOSED in release builds (reject the - /// PUT with a visible error) and fail open in test builds. Idempotent: - /// calling twice replaces the handle. + /// it, live-DHT payment checks fail CLOSED in release builds with a visible + /// error and fail open in test builds. Idempotent: calling twice replaces + /// the handle. pub fn attach_p2p_node(&self, node: Arc) { *self.p2p_node.write() = Some(node); debug!("PaymentVerifier: P2PNode attached for payment live-DHT checks"); @@ -605,7 +609,7 @@ impl PaymentVerifier { xorname: &XorName, context: VerificationContext, ) -> Result<()> { - let width = context.receiver_membership_width(); + let width = context.receiver_membership_width(self.config.close_group_size); #[cfg(any(test, feature = "test-utils"))] { @@ -637,7 +641,7 @@ impl PaymentVerifier { crate::logging::error!( "PaymentVerifier: no P2PNode attached; rejecting {context:?}. \ This is a node-startup bug — PaymentVerifier::attach_p2p_node \ - must be called before any PUT handler runs." + must be called before payment verification runs." ); return Err(Error::Payment(format!( "{context:?} rejected: verifier is not wired to the P2P \ @@ -712,8 +716,7 @@ impl PaymentVerifier { /// check, and on-chain settlement are the authority. A one-quote proof is /// valid when that single quote passes these checks and was paid 3x. /// - /// See [`VerificationContext`] for the receiver-membership difference - /// between fresh chunk stores and fresh paid-list admission. + /// See [`VerificationContext`] for receiver-membership widths. async fn verify_evm_payment( &self, xorname: &XorName, @@ -1755,6 +1758,7 @@ mod tests { let config = PaymentVerifierConfig { evm: EvmVerifierConfig::default(), cache_capacity: 100, + close_group_size: CLOSE_GROUP_SIZE, local_rewards_address: RewardsAddress::new([1u8; 20]), }; let verifier = PaymentVerifier::new(config); @@ -1946,7 +1950,7 @@ mod tests { let err = verifier .verify_payment(&xorname, None, VerificationContext::ClientPut) .await - .expect_err("cached client PUT must still reject non-responsible receivers"); + .expect_err("cached client PUT must still reject non-members"); assert!( format!("{err}").contains("not in the required local peer set"), @@ -3597,6 +3601,7 @@ mod tests { let config = PaymentVerifierConfig { evm: EvmVerifierConfig::default(), cache_capacity: 100, + close_group_size: CLOSE_GROUP_SIZE, local_rewards_address: RewardsAddress::new([1u8; 20]), }; let verifier = PaymentVerifier::new(config); diff --git a/src/replication/mod.rs b/src/replication/mod.rs index 2c64ecb..874b510 100644 --- a/src/replication/mod.rs +++ b/src/replication/mod.rs @@ -1220,10 +1220,11 @@ async fn handle_fresh_offer( // Gap 1: Validate PoP via PaymentVerifier. Fresh replication is still // part of the immediate write fan-out: this receiver is about to store the - // record as if the client had PUT it here directly, so it must run the same - // ClientPut checks as the storage handler (receiver membership, - // paid-quote known-peer and local price floor for single-node proofs, - // merkle candidate closeness for merkle proofs). + // record as if the client had PUT it here directly. Receiver responsibility + // was checked above, and ClientPut verification repeats the configured + // close-group membership check before applying store-strength cache + // semantics, paid-quote known-peer and local price floor for single-node + // proofs, and merkle candidate closeness for merkle proofs. match payment_verifier .verify_payment( &offer.key, diff --git a/src/storage/handler.rs b/src/storage/handler.rs index dc767de..87c700b 100644 --- a/src/storage/handler.rs +++ b/src/storage/handler.rs @@ -41,6 +41,7 @@ use crate::payment::{PaymentVerifier, QuoteGenerator, VerificationContext}; use crate::replication::fresh::FreshWriteEvent; use crate::storage::lmdb::LmdbStorage; use bytes::Bytes; +use saorsa_core::P2PNode; use std::sync::Arc; use tokio::sync::mpsc; @@ -92,6 +93,16 @@ impl AntProtocol { } } + /// Attach the node's P2P handle for payment live-DHT checks. + /// + /// Also wires the same handle into the payment verifier so payment-proof + /// closeness checks and storage-endpoint responsibility checks can use the + /// live routing view. Idempotent: calling twice replaces the verifier handle. + pub fn attach_p2p_node(&self, node: Arc) { + self.payment_verifier.attach_p2p_node(node); + debug!("AntProtocol: P2PNode attached for payment live-DHT checks"); + } + /// Set the channel sender for fresh-write replication events. /// /// When set, successful chunk PUTs will notify the replication engine @@ -261,10 +272,8 @@ impl AntProtocol { return ChunkPutResponse::Error(ProtocolError::StorageFailed(e.to_string())); } - // 5. Verify payment. This node is the storer being paid right now, so - // the full ClientPut check set applies (receiver membership, - // paid-quote known-peer and local price floor for single-node proofs, - // merkle candidate closeness). + // 5. Verify payment. The ClientPut context checks receiver membership, + // applies the store-strength payment cache, and verifies live proofs. let payment_result = self .payment_verifier .verify_payment( @@ -575,6 +584,7 @@ mod tests { let payment_config = PaymentVerifierConfig { evm: EvmVerifierConfig::default(), cache_capacity: 100_000, + close_group_size: crate::ant_protocol::CLOSE_GROUP_SIZE, local_rewards_address: rewards_address, }; let payment_verifier = Arc::new(PaymentVerifier::new(payment_config)); @@ -913,6 +923,44 @@ mod tests { } } + #[tokio::test] + async fn test_put_rejects_out_of_range_receiver_before_payment_cache() { + const REQUEST_ID: u64 = 105; + + let (protocol, _temp) = create_test_protocol().await; + + let content = b"out of range receiver cache test"; + let address = LmdbStorage::compute_address(content); + protocol.payment_verifier().cache_insert(address); + protocol + .payment_verifier() + .set_receiver_membership_for_tests(false); + + let put_request = ChunkPutRequest::new(address, Bytes::copy_from_slice(content)); + let put_msg = ChunkMessage { + request_id: REQUEST_ID, + body: ChunkMessageBody::PutRequest(put_request), + }; + let put_bytes = put_msg.encode().expect("encode put"); + let response_bytes = protocol + .try_handle_request(&put_bytes) + .await + .expect("handle put") + .expect("expected response"); + let response = ChunkMessage::decode(&response_bytes).expect("decode"); + + assert_eq!(response.request_id, REQUEST_ID); + if let ChunkMessageBody::PutResponse(ChunkPutResponse::Error( + ProtocolError::PaymentFailed(message), + )) = response.body + { + assert!(message.contains("required local peer set")); + } else { + panic!("expected receiver responsibility rejection, got: {response:?}"); + } + assert!(!protocol.exists(&address).expect("exists check")); + } + #[tokio::test] async fn test_put_same_chunk_twice_hits_cache() { let (protocol, _temp) = create_test_protocol().await; @@ -935,7 +983,7 @@ mod tests { .await .expect("handle put 1"); - // Second PUT — should return AlreadyExists (checked in storage before payment) + // Second PUT should return AlreadyExists from the storage idempotency check. let response_bytes = protocol .try_handle_request(&put_bytes) .await @@ -945,7 +993,7 @@ mod tests { if let ChunkMessageBody::PutResponse(ChunkPutResponse::AlreadyExists { .. }) = response.body { - // expected — storage check comes before payment check + // expected } else { panic!("expected AlreadyExists, got: {response:?}"); } diff --git a/tests/e2e/data_types/chunk.rs b/tests/e2e/data_types/chunk.rs index c208c1d..d66a427 100644 --- a/tests/e2e/data_types/chunk.rs +++ b/tests/e2e/data_types/chunk.rs @@ -68,6 +68,7 @@ mod tests { QuotingMetricsTracker, }; use ant_node::storage::{AntProtocol, LmdbStorage, LmdbStorageConfig}; + use ant_node::ReplicationConfig; use evmlib::testnet::Testnet; use evmlib::RewardsAddress; use rand::seq::SliceRandom; @@ -442,6 +443,7 @@ mod tests { let payment_verifier = PaymentVerifier::new(PaymentVerifierConfig { evm: EvmVerifierConfig { network }, cache_capacity: 100, + close_group_size: ReplicationConfig::default().close_group_size, local_rewards_address: rewards_address, }); let metrics_tracker = QuotingMetricsTracker::new(100); diff --git a/tests/e2e/testnet.rs b/tests/e2e/testnet.rs index 14216be..fa8b771 100644 --- a/tests/e2e/testnet.rs +++ b/tests/e2e/testnet.rs @@ -1094,11 +1094,13 @@ impl TestNetwork { // When an EVM network is provided (e.g. Anvil), use it for on-chain verification. // Otherwise default to ArbitrumSepoliaTest for test nodes. let rewards_address = RewardsAddress::new(TEST_REWARDS_ADDRESS); + let replication_config = ReplicationConfig::default(); let payment_config = PaymentVerifierConfig { evm: EvmVerifierConfig { network: evm_network.unwrap_or(EvmNetwork::ArbitrumSepoliaTest), }, cache_capacity: TEST_PAYMENT_CACHE_CAPACITY, + close_group_size: replication_config.close_group_size, local_rewards_address: rewards_address, }; let payment_verifier = PaymentVerifier::new(payment_config); @@ -1182,10 +1184,9 @@ impl TestNetwork { // Start protocol handler that routes incoming P2P messages to AntProtocol if let (Some(ref p2p), Some(ref protocol)) = (&node.p2p_node, &node.ant_protocol) { - // Wire the P2PNode into the payment verifier so merkle-payment - // verification can run the pay-yourself closeness check against - // the live DHT. - protocol.payment_verifier().attach_p2p_node(Arc::clone(p2p)); + // Wire P2P into AntProtocol for payment receiver-membership and + // payment-proof closeness checks. + protocol.attach_p2p_node(Arc::clone(p2p)); let mut events = p2p.subscribe_events(); let p2p_clone = Arc::clone(p2p); From 5e54ef6598512ccfc8d6084f737c1ce6206a712f Mon Sep 17 00:00:00 2001 From: Warm Beer Date: Fri, 12 Jun 2026 15:18:47 +0200 Subject: [PATCH 16/18] fix(payment): enforce local admission before proof verification Move receiver placement checks out of PaymentVerifier so proof verification only validates payment contents and paid-quote issuer locality. Direct client PUTs now check local close-group storage responsibility before payment verification, while fresh replication and paid-list notification paths keep their pre-verification admission gates. Paid quote issuers are now checked against the configured close group instead of K closest peers. --- src/devnet.rs | 2 +- src/payment/cache.rs | 10 +- src/payment/verifier.rs | 271 ++++++++++++---------------------------- src/replication/mod.rs | 15 ++- src/storage/handler.rs | 109 ++++++++++++++-- tests/e2e/testnet.rs | 2 +- 6 files changed, 193 insertions(+), 216 deletions(-) diff --git a/src/devnet.rs b/src/devnet.rs index d5062fe..4662e28 100644 --- a/src/devnet.rs +++ b/src/devnet.rs @@ -614,7 +614,7 @@ impl Devnet { *node.state.write().await = NodeState::Running; if let (Some(ref p2p), Some(ref protocol)) = (&node.p2p_node, &node.ant_protocol) { - // Wire P2P into AntProtocol for payment receiver-membership and + // Wire P2P into AntProtocol for direct PUT responsibility and // payment-proof closeness checks. protocol.attach_p2p_node(Arc::clone(p2p)); diff --git a/src/payment/cache.rs b/src/payment/cache.rs index c351889..174c45b 100644 --- a/src/payment/cache.rs +++ b/src/payment/cache.rs @@ -21,8 +21,8 @@ const DEFAULT_CACHE_CAPACITY: usize = 100_000; /// /// Each entry records which fresh proof verification level inserted it. A /// paid-list entry must not satisfy a later client-PUT fast-path because -/// paid-list membership checks K closest peers while client PUTs require the -/// close group. Stronger entries satisfy weaker lookups. +/// paid-list admission does not authorize storing the actual chunk. Stronger +/// entries satisfy weaker lookups. #[derive(Clone)] pub struct VerifiedCache { inner: Arc>>, @@ -117,7 +117,7 @@ impl VerifiedCache { /// paid-list admission check set. /// /// A client-PUT entry returns `true` here because it passed the stricter - /// closest-7 receiver membership check. + /// store-admission path at the caller. #[must_use] pub fn contains_paid_list_verified(&self, xorname: &XorName) -> bool { let found = self @@ -137,10 +137,10 @@ impl VerifiedCache { } /// Check if a `XorName` is cached AND its verification ran the full - /// client-PUT close-group check set. + /// client-PUT store-admission check set. /// /// Paid-list entries return `false` here because they did not pass the - /// client-PUT close-group membership check. + /// client-PUT store-admission path. #[must_use] pub fn contains_client_put_verified(&self, xorname: &XorName) -> bool { let found = self diff --git a/src/payment/verifier.rs b/src/payment/verifier.rs index d71947b..4844d09 100644 --- a/src/payment/verifier.rs +++ b/src/payment/verifier.rs @@ -11,7 +11,6 @@ use crate::payment::pricing::{calculate_price, derive_records_stored_from_price} use crate::payment::proof::{ deserialize_merkle_proof, deserialize_proof, detect_proof_type, ProofType, }; -use crate::replication::config::K_BUCKET_SIZE; use crate::storage::lmdb::LmdbStorage; use ant_protocol::payment::verify::{verify_quote_content, verify_quote_signature}; use evmlib::common::{Amount, QuoteHash}; @@ -106,7 +105,7 @@ pub struct PaymentVerifierConfig { pub evm: EvmVerifierConfig, /// Cache capacity (number of `XorName` values to cache). pub cache_capacity: usize, - /// Close-group width used to check direct client PUT receiver membership. + /// Close-group width used to check paid-quote issuer locality. pub close_group_size: usize, /// Local node's rewards address. /// @@ -117,14 +116,18 @@ pub struct PaymentVerifierConfig { /// The fresh admission path a payment proof is being verified for. /// -/// - **`ClientPut`** — the node is the storer being paid *right now*. The -/// verifier checks receiver responsibility using the configured close-group -/// width, then applies store-strength cache semantics and live payment -/// checks. +/// - **`ClientPut`** — the node is admitting a chunk store. The verifier +/// applies store-strength cache semantics and live payment checks. /// - **`PaidListAdmission`** — the node is admitting fresh paid-list metadata. -/// It runs the same live payment checks as `ClientPut`, but the receiver -/// membership check uses the local K closest peers because paid-list -/// tracking spans a wider group than storage. +/// It runs the same live payment checks as `ClientPut`, but writes a weaker +/// cache entry that does not authorize future chunk stores. +/// +/// The caller must check local receiver/admission membership before invoking +/// the verifier: direct client PUTs and fresh chunk replication require local +/// close-group responsibility; fresh paid-list replication requires local +/// paid-list close-group membership. The verifier itself only checks payment +/// proof validity and that the paid quote's issuer is in the configured close +/// group for the quoted chunk address. /// /// Immediate fresh chunk replication is different: the receiver is about to /// store the newly written chunk as if the client PUT it there directly, so @@ -136,23 +139,13 @@ pub struct PaymentVerifierConfig { /// membership among the closest K. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum VerificationContext { - /// The node is the storer being paid right now: receiver membership uses - /// the configured close-group width. + /// The node is admitting a chunk store with store-strength cache semantics. ClientPut, - /// The node is admitting fresh paid-list metadata: same payment checks as - /// `ClientPut`, but receiver membership is local K closest peers. + /// The node is admitting fresh paid-list metadata with paid-list-strength + /// cache semantics. PaidListAdmission, } -impl VerificationContext { - fn receiver_membership_width(self, close_group_size: usize) -> usize { - match self { - Self::ClientPut => close_group_size, - Self::PaidListAdmission => K_BUCKET_SIZE, - } - } -} - /// Status returned by payment verification. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum PaymentStatus { @@ -202,12 +195,11 @@ pub struct PaymentVerifier { /// amplification to one lookup per unique `pool_hash` regardless of /// concurrency. inflight_closeness: Mutex>>, - /// P2P node handle, attached post-construction so client PUT and paid-list - /// admission can check receiver membership, paid-quote verification can - /// check issuer closeness, and merkle verification can check that candidate - /// `pub_keys` map to peers actually close to the pool midpoint in the live - /// DHT. `None` in unit tests that don't exercise live-DHT checks; - /// production startup MUST call [`attach_p2p_node`]. + /// P2P node handle, attached post-construction so paid-quote verification + /// can check issuer closeness, and merkle verification can check that + /// candidate `pub_keys` map to peers actually close to the pool midpoint + /// in the live DHT. `None` in unit tests that don't exercise live-DHT + /// checks; production startup MUST call [`attach_p2p_node`]. p2p_node: RwLock>>, /// LMDB storage handle, attached post-construction so the paid-quote /// price-floor check can read the authoritative on-disk record count without @@ -222,22 +214,17 @@ pub struct PaymentVerifier { /// [`Self::set_records_stored_for_tests`] so unit tests that don't wire a /// real `LmdbStorage` can still drive the price-floor logic. test_records_override: RwLock>, - /// Test-only override for the paid-quote known-peer check. + /// Test-only override for the paid-quote issuer close-group check. /// /// Production code derives closest peers from the attached [`P2PNode`]. #[cfg(any(test, feature = "test-utils"))] - test_paid_quote_known_peers_override: RwLock>>, + test_paid_quote_close_group_override: RwLock>>, /// Test-only override for `completedPayments(quote_hash)`. /// /// Production always queries the payment vault; unit tests use this to /// exercise the full verifier path without starting an EVM chain. #[cfg(any(test, feature = "test-utils"))] test_completed_payments_override: RwLock>, - /// Test-only override for receiver-membership checks. - /// - /// Production derives membership from the attached [`P2PNode`]. - #[cfg(any(test, feature = "test-utils"))] - test_receiver_membership_override: RwLock>, /// Configuration. config: PaymentVerifierConfig, } @@ -357,17 +344,14 @@ impl PaymentVerifier { storage: RwLock::new(None), test_records_override: RwLock::new(None), #[cfg(any(test, feature = "test-utils"))] - test_paid_quote_known_peers_override: RwLock::new(None), + test_paid_quote_close_group_override: RwLock::new(None), #[cfg(any(test, feature = "test-utils"))] test_completed_payments_override: RwLock::new(HashMap::new()), - #[cfg(any(test, feature = "test-utils"))] - test_receiver_membership_override: RwLock::new(None), config, } } - /// Attach the node's [`P2PNode`] handle so client PUT and paid-list - /// admission can check receiver membership, paid-quote verification can + /// Attach the node's [`P2PNode`] handle so paid-quote verification can /// check issuer closeness, and merkle-payment verification can check /// candidate `pub_keys` against the DHT's actual closest peers to the pool /// midpoint. @@ -381,6 +365,12 @@ impl PaymentVerifier { debug!("PaymentVerifier: P2PNode attached for payment live-DHT checks"); } + /// Configured close-group width used by payment proof admission callers. + #[must_use] + pub fn close_group_size(&self) -> usize { + self.config.close_group_size + } + /// Attach the node's [`LmdbStorage`] handle so paid-quote price-floor /// checks can query the authoritative on-disk record count. /// @@ -403,10 +393,18 @@ impl PaymentVerifier { } /// Test-only setter for local closest peers used by the paid-quote - /// known-peer check. + /// issuer close-group check. + #[cfg(any(test, feature = "test-utils"))] + pub fn set_paid_quote_close_group_for_tests(&self, peer_ids: Vec<[u8; 32]>) { + *self.test_paid_quote_close_group_override.write() = Some(peer_ids); + } + + /// Compatibility alias for older tests that called this the known-peer + /// set. The check is now specifically the configured close group for the + /// quoted chunk address. #[cfg(any(test, feature = "test-utils"))] pub fn set_paid_quote_known_peers_for_tests(&self, peer_ids: Vec<[u8; 32]>) { - *self.test_paid_quote_known_peers_override.write() = Some(peer_ids); + self.set_paid_quote_close_group_for_tests(peer_ids); } /// Test-only setter for an on-chain completed payment amount. @@ -417,12 +415,6 @@ impl PaymentVerifier { .insert(quote_hash, amount); } - /// Test-only setter for receiver-membership checks. - #[cfg(any(test, feature = "test-utils"))] - pub fn set_receiver_membership_for_tests(&self, is_member: bool) { - *self.test_receiver_membership_override.write() = Some(is_member); - } - /// Snapshot the current record count for paid-quote price-floor checks. /// /// Prefers the attached `LmdbStorage` (authoritative — covers client PUTs, @@ -505,7 +497,7 @@ impl PaymentVerifier { /// * `xorname` - The content-addressed name of the data /// * `payment_proof` - Optional payment proof (required if not in cache) /// * `context` - Which fresh admission path is verifying the proof — see - /// [`VerificationContext`] for receiver-membership width + /// [`VerificationContext`] for cache-strength semantics /// /// # Returns /// @@ -521,8 +513,6 @@ impl PaymentVerifier { payment_proof: Option<&[u8]>, context: VerificationContext, ) -> Result { - self.validate_receiver_membership(xorname, context).await?; - // First check if payment is required let status = self.check_payment_required(xorname, context); @@ -604,68 +594,6 @@ impl PaymentVerifier { } } - async fn validate_receiver_membership( - &self, - xorname: &XorName, - context: VerificationContext, - ) -> Result<()> { - let width = context.receiver_membership_width(self.config.close_group_size); - - #[cfg(any(test, feature = "test-utils"))] - { - let membership_override = *self.test_receiver_membership_override.read(); - if let Some(is_member) = membership_override { - if is_member { - return Ok(()); - } - return Err(Error::Payment(format!( - "{context:?} receiver is not in the required local peer set for key {}", - hex::encode(xorname) - ))); - } - } - - let attached = self.p2p_node.read().as_ref().map(Arc::clone); - let Some(p2p_node) = attached else { - #[cfg(any(test, feature = "test-utils"))] - { - crate::logging::warn!( - "PaymentVerifier: no P2PNode attached; {context:?} receiver \ - membership check SKIPPED (test build). Production startup \ - MUST call PaymentVerifier::attach_p2p_node." - ); - return Ok(()); - } - #[cfg(not(any(test, feature = "test-utils")))] - { - crate::logging::error!( - "PaymentVerifier: no P2PNode attached; rejecting {context:?}. \ - This is a node-startup bug — PaymentVerifier::attach_p2p_node \ - must be called before payment verification runs." - ); - return Err(Error::Payment(format!( - "{context:?} rejected: verifier is not wired to the P2P \ - layer; cannot verify receiver membership." - ))); - } - }; - - let self_id = *p2p_node.peer_id(); - let closest = p2p_node - .dht_manager() - .find_closest_nodes_local_with_self(xorname, width) - .await; - if closest.iter().any(|node| node.peer_id == self_id) { - return Ok(()); - } - - Err(Error::Payment(format!( - "{context:?} receiver {} is not among this node's local {width} closest peers for {}", - self_id.to_hex(), - hex::encode(xorname) - ))) - } - /// Get cache statistics. #[must_use] pub fn cache_stats(&self) -> CacheStats { @@ -705,18 +633,17 @@ impl PaymentVerifier { /// 2. Median-priced candidate quotes are derived from the supplied bundle /// 3. Each candidate is checked for content binding, peer binding, and a /// valid ML-DSA-65 signature - /// 4. Each candidate must also come from a locally known close peer and + /// 4. Each candidate must also come from a local close-group peer and /// satisfy the paid-quote price floor /// 5. A candidate is accepted only if `completedPayments(quoteHash)` is at /// least 3x the median price /// /// Non-median quotes are parsed only to locate the median. Their content, /// peer bindings, and signatures are deliberately ignored: the paid - /// quote's content hash, quote hash, signature, local floor, known-peer + /// quote's content hash, quote hash, signature, local floor, issuer + /// close-group /// check, and on-chain settlement are the authority. A one-quote proof is /// valid when that single quote passes these checks and was paid 3x. - /// - /// See [`VerificationContext`] for receiver-membership widths. async fn verify_evm_payment( &self, xorname: &XorName, @@ -820,7 +747,7 @@ impl PaymentVerifier { let issuer_peer_id = Self::validate_paid_quote_peer_binding(candidate.encoded_peer_id, candidate.quote)?; - self.validate_paid_quote_known_peer(xorname, &issuer_peer_id) + self.validate_paid_quote_issuer_close_group(xorname, &issuer_peer_id) .await?; self.validate_paid_quote_price_floor(candidate.quote)?; @@ -942,21 +869,24 @@ impl PaymentVerifier { Ok(()) } - async fn validate_paid_quote_known_peer( + async fn validate_paid_quote_issuer_close_group( &self, xorname: &XorName, issuer_peer_id: &PeerId, ) -> Result<()> { #[cfg(any(test, feature = "test-utils"))] - if let Some(known_peer_ids) = self.test_paid_quote_known_peers_override.read().as_ref() { - if known_peer_ids + if let Some(close_group_peer_ids) = + self.test_paid_quote_close_group_override.read().as_ref() + { + if close_group_peer_ids .iter() .any(|peer_id| peer_id == issuer_peer_id.as_bytes()) { return Ok(()); } + let close_group_size = self.config.close_group_size; return Err(Error::Payment(format!( - "Paid quote issuer {} is not among this node's local {K_BUCKET_SIZE} closest peers for {}", + "Paid quote issuer {} is not among this node's local {close_group_size} closest peers for {}", issuer_peer_id.to_hex(), hex::encode(xorname) ))); @@ -967,8 +897,8 @@ impl PaymentVerifier { #[cfg(any(test, feature = "test-utils"))] { crate::logging::warn!( - "PaymentVerifier: no P2PNode attached; paid-quote known-peer \ - check SKIPPED (test build). Production startup MUST call \ + "PaymentVerifier: no P2PNode attached; paid-quote issuer \ + close-group check SKIPPED (test build). Production startup MUST call \ PaymentVerifier::attach_p2p_node." ); return Ok(()); @@ -989,16 +919,17 @@ impl PaymentVerifier { } }; + let close_group_size = self.config.close_group_size; let closest = p2p_node .dht_manager() - .find_closest_nodes_local_with_self(xorname, K_BUCKET_SIZE) + .find_closest_nodes_local_with_self(xorname, close_group_size) .await; if closest.iter().any(|node| node.peer_id == *issuer_peer_id) { return Ok(()); } Err(Error::Payment(format!( - "Paid quote issuer {} is not among this node's local {K_BUCKET_SIZE} closest peers for {}", + "Paid quote issuer {} is not among this node's local {close_group_size} closest peers for {}", issuer_peer_id.to_hex(), hex::encode(xorname) ))) @@ -1761,9 +1692,7 @@ mod tests { close_group_size: CLOSE_GROUP_SIZE, local_rewards_address: RewardsAddress::new([1u8; 20]), }; - let verifier = PaymentVerifier::new(config); - verifier.set_receiver_membership_for_tests(true); - verifier + PaymentVerifier::new(config) } fn make_signed_quote( @@ -1862,15 +1791,15 @@ mod tests { median_price * Amount::from(PAID_QUOTE_PAYMENT_MULTIPLIER) } - fn mark_known_paid_candidates( + fn mark_close_group_paid_candidates( verifier: &PaymentVerifier, peer_quotes: &[(evmlib::EncodedPeerId, PaymentQuote)], ) { - let known_peers = median_test_candidates(peer_quotes) + let close_group_peers = median_test_candidates(peer_quotes) .iter() .map(|(peer_id, _)| *peer_id.as_bytes()) .collect(); - verifier.set_paid_quote_known_peers_for_tests(known_peers); + verifier.set_paid_quote_close_group_for_tests(close_group_peers); } fn mark_candidate_paid(verifier: &PaymentVerifier, quote: &PaymentQuote, amount: Amount) { @@ -1940,42 +1869,6 @@ mod tests { assert_eq!(result.expect("cached"), PaymentStatus::CachedAsVerified); } - #[tokio::test] - async fn test_client_put_receiver_membership_runs_before_cache() { - let verifier = create_test_verifier(); - let xorname = [0xB4u8; 32]; - verifier.cache.insert(xorname); - verifier.set_receiver_membership_for_tests(false); - - let err = verifier - .verify_payment(&xorname, None, VerificationContext::ClientPut) - .await - .expect_err("cached client PUT must still reject non-members"); - - assert!( - format!("{err}").contains("not in the required local peer set"), - "Error should mention receiver membership: {err}" - ); - } - - #[tokio::test] - async fn test_paid_list_receiver_membership_runs_before_cache() { - let verifier = create_test_verifier(); - let xorname = [0xB7u8; 32]; - verifier.cache.insert_paid_list_verified(xorname); - verifier.set_receiver_membership_for_tests(false); - - let err = verifier - .verify_payment(&xorname, None, VerificationContext::PaidListAdmission) - .await - .expect_err("cached paid-list admission must still reject non-members"); - - assert!( - format!("{err}").contains("not in the required local peer set"), - "Error should mention receiver membership: {err}" - ); - } - #[tokio::test] async fn test_paid_list_cache_entry_does_not_satisfy_client_put() { let verifier = create_test_verifier(); @@ -2143,7 +2036,7 @@ mod tests { verifier.set_records_stored_for_tests(0); let xorname = [0xA1u8; 32]; let peer_quotes = make_signed_legacy_bundle(xorname, unique_test_prices()); - mark_known_paid_candidates(&verifier, &peer_quotes); + mark_close_group_paid_candidates(&verifier, &peer_quotes); let expected_amount = expected_median_payment(&peer_quotes); let paid_quote = median_test_candidates(&peer_quotes) .first() @@ -2170,7 +2063,7 @@ mod tests { let xorname = [0xB1u8; 32]; let (peer_id, quote) = make_signed_quote(xorname, price_at_records(0), 1); let peer_quotes = vec![(peer_id, quote.clone())]; - mark_known_paid_candidates(&verifier, &peer_quotes); + mark_close_group_paid_candidates(&verifier, &peer_quotes); mark_candidate_paid(&verifier, "e, expected_median_payment(&peer_quotes)); let proof_bytes = serialize_proof(peer_quotes); @@ -2191,7 +2084,7 @@ mod tests { let xorname = [0xB2u8; 32]; let (peer_id, quote) = make_signed_quote(xorname, price_at_records(0), 1); let peer_quotes = vec![(peer_id, quote.clone())]; - mark_known_paid_candidates(&verifier, &peer_quotes); + mark_close_group_paid_candidates(&verifier, &peer_quotes); mark_candidate_paid(&verifier, "e, quote.price); let proof_bytes = serialize_proof(peer_quotes); @@ -2243,7 +2136,7 @@ mod tests { crate::payment::pricing::calculate_price(6000), ], ); - mark_known_paid_candidates(&verifier, &peer_quotes); + mark_close_group_paid_candidates(&verifier, &peer_quotes); let expected_amount = expected_median_payment(&peer_quotes); let paid_quote = median_test_candidates(&peer_quotes) .first() @@ -2280,7 +2173,7 @@ mod tests { crate::payment::pricing::calculate_price(6000), ], ); - mark_known_paid_candidates(&verifier, &peer_quotes); + mark_close_group_paid_candidates(&verifier, &peer_quotes); let expected_amount = expected_median_payment(&peer_quotes); let paid_quote = median_test_candidates(&peer_quotes) .first() @@ -2302,10 +2195,10 @@ mod tests { } #[tokio::test] - async fn test_legacy_paid_median_known_peer_rejection() { + async fn test_legacy_paid_median_issuer_close_group_rejection() { let verifier = create_test_verifier(); verifier.set_records_stored_for_tests(0); - verifier.set_paid_quote_known_peers_for_tests(vec![rand::random()]); + verifier.set_paid_quote_close_group_for_tests(vec![rand::random()]); let xorname = [0xA4u8; 32]; let peer_quotes = make_signed_legacy_bundle(xorname, unique_test_prices()); let expected_amount = expected_median_payment(&peer_quotes); @@ -2320,11 +2213,11 @@ mod tests { let err = verifier .verify_payment(&xorname, Some(&proof_bytes), VerificationContext::ClientPut) .await - .expect_err("unknown paid issuer should be rejected"); + .expect_err("out-of-close-group paid issuer should be rejected"); assert!( format!("{err}").contains("not among this node's local"), - "Error should mention local closest peers: {err}" + "Error should mention local close-group peers: {err}" ); } @@ -2345,7 +2238,7 @@ mod tests { crate::payment::pricing::calculate_price(0), ], ); - mark_known_paid_candidates(&verifier, &peer_quotes); + mark_close_group_paid_candidates(&verifier, &peer_quotes); let expected_amount = expected_median_payment(&peer_quotes); let paid_quote = median_test_candidates(&peer_quotes) .first() @@ -2404,7 +2297,7 @@ mod tests { let mut peer_quotes = make_signed_legacy_bundle(xorname, unique_test_prices()); let median_index = median_quote_index(peer_quotes.len()); peer_quotes[median_index].1.content = xor_name::XorName([0xE7u8; 32]); - mark_known_paid_candidates(&verifier, &peer_quotes); + mark_close_group_paid_candidates(&verifier, &peer_quotes); let proof_bytes = serialize_proof(peer_quotes); let err = verifier @@ -2425,7 +2318,7 @@ mod tests { let xorname = [0xA8u8; 32]; let mut peer_quotes = make_signed_legacy_bundle(xorname, unique_test_prices()); peer_quotes[0].1.content = xor_name::XorName([0xE8u8; 32]); - mark_known_paid_candidates(&verifier, &peer_quotes); + mark_close_group_paid_candidates(&verifier, &peer_quotes); let expected_amount = expected_median_payment(&peer_quotes); let paid_quote = median_test_candidates(&peer_quotes) .first() @@ -2453,7 +2346,7 @@ mod tests { let mut peer_quotes = make_signed_legacy_bundle(xorname, unique_test_prices()); let median_index = median_quote_index(peer_quotes.len()); peer_quotes[median_index].1.signature.push(0xFF); - mark_known_paid_candidates(&verifier, &peer_quotes); + mark_close_group_paid_candidates(&verifier, &peer_quotes); let expected_amount = expected_median_payment(&peer_quotes); let paid_quote = median_test_candidates(&peer_quotes) .first() @@ -2481,7 +2374,7 @@ mod tests { let xorname = [0xAAu8; 32]; let mut peer_quotes = make_signed_legacy_bundle(xorname, unique_test_prices()); peer_quotes[0].1.signature.push(0xFF); - mark_known_paid_candidates(&verifier, &peer_quotes); + mark_close_group_paid_candidates(&verifier, &peer_quotes); let expected_amount = expected_median_payment(&peer_quotes); let paid_quote = median_test_candidates(&peer_quotes) .first() @@ -2508,7 +2401,7 @@ mod tests { let xorname = [0xABu8; 32]; let mut peer_quotes = make_signed_legacy_bundle(xorname, unique_test_prices()); peer_quotes[0].0 = evmlib::EncodedPeerId::new(rand::random()); - mark_known_paid_candidates(&verifier, &peer_quotes); + mark_close_group_paid_candidates(&verifier, &peer_quotes); let expected_amount = expected_median_payment(&peer_quotes); let paid_quote = median_test_candidates(&peer_quotes) .first() @@ -2534,7 +2427,7 @@ mod tests { verifier.set_records_stored_for_tests(0); let xorname = [0xACu8; 32]; let peer_quotes = make_signed_legacy_bundle(xorname, tied_median_test_prices()); - mark_known_paid_candidates(&verifier, &peer_quotes); + mark_close_group_paid_candidates(&verifier, &peer_quotes); mark_all_median_candidates_unpaid(&verifier, &peer_quotes); let expected_amount = expected_median_payment(&peer_quotes); let paid_quote = median_test_candidates(&peer_quotes) @@ -2556,10 +2449,10 @@ mod tests { } #[tokio::test] - async fn test_legacy_paid_list_admission_enforces_known_peer() { + async fn test_legacy_paid_list_admission_enforces_issuer_close_group() { let verifier = create_test_verifier(); verifier.set_records_stored_for_tests(0); - verifier.set_paid_quote_known_peers_for_tests(Vec::new()); + verifier.set_paid_quote_close_group_for_tests(Vec::new()); let xorname = [0xB5u8; 32]; let peer_quotes = make_signed_legacy_bundle(xorname, unique_test_prices()); let expected_amount = expected_median_payment(&peer_quotes); @@ -2578,11 +2471,11 @@ mod tests { VerificationContext::PaidListAdmission, ) .await - .expect_err("paid-list admission must enforce the paid issuer known-peer check"); + .expect_err("paid-list admission must enforce the paid issuer close-group check"); assert!( format!("{err}").contains("not among this node's local"), - "Error should mention local closest peers: {err}" + "Error should mention local close-group peers: {err}" ); } @@ -2603,7 +2496,7 @@ mod tests { crate::payment::pricing::calculate_price(0), ], ); - mark_known_paid_candidates(&verifier, &peer_quotes); + mark_close_group_paid_candidates(&verifier, &peer_quotes); let expected_amount = expected_median_payment(&peer_quotes); let paid_quote = median_test_candidates(&peer_quotes) .first() diff --git a/src/replication/mod.rs b/src/replication/mod.rs index 874b510..c2b89f7 100644 --- a/src/replication/mod.rs +++ b/src/replication/mod.rs @@ -1221,10 +1221,10 @@ async fn handle_fresh_offer( // Gap 1: Validate PoP via PaymentVerifier. Fresh replication is still // part of the immediate write fan-out: this receiver is about to store the // record as if the client had PUT it here directly. Receiver responsibility - // was checked above, and ClientPut verification repeats the configured - // close-group membership check before applying store-strength cache - // semantics, paid-quote known-peer and local price floor for single-node - // proofs, and merkle candidate closeness for merkle proofs. + // was checked above before proof work. ClientPut verification applies + // store-strength cache semantics, paid-quote issuer close-group and local + // price floor checks for single-node proofs, and merkle candidate + // closeness for merkle proofs. match payment_verifier .verify_payment( &offer.key, @@ -1344,10 +1344,9 @@ async fn handle_paid_notify( } // Gap 1: Validate PoP via PaymentVerifier. PaidNotify admits fresh - // paid-list metadata, so it runs the same payment checks as ClientPut. - // The receiver-membership difference is intentional: paid-list metadata - // uses the local K closest peers, while direct/fresh chunk stores use the - // close group. + // paid-list metadata, so local paid-list close-group membership was checked + // above before proof work. The verifier then runs the same payment proof + // checks as ClientPut while writing a paid-list-strength cache entry. match payment_verifier .verify_payment( ¬ify.key, diff --git a/src/storage/handler.rs b/src/storage/handler.rs index 87c700b..d5421d0 100644 --- a/src/storage/handler.rs +++ b/src/storage/handler.rs @@ -41,6 +41,7 @@ use crate::payment::{PaymentVerifier, QuoteGenerator, VerificationContext}; use crate::replication::fresh::FreshWriteEvent; use crate::storage::lmdb::LmdbStorage; use bytes::Bytes; +use parking_lot::RwLock; use saorsa_core::P2PNode; use std::sync::Arc; use tokio::sync::mpsc; @@ -57,8 +58,13 @@ pub struct AntProtocol { /// Quote generator for creating storage quotes. /// Also handles merkle candidate quote signing via ML-DSA-65. quote_generator: Arc, + /// P2P node handle used for direct PUT receiver responsibility checks. + p2p_node: RwLock>>, /// Channel for notifying the replication engine about newly-stored chunks. fresh_write_tx: Option>, + /// Test-only override for direct PUT storage responsibility checks. + #[cfg(any(test, feature = "test-utils"))] + test_store_membership_override: RwLock>, } impl AntProtocol { @@ -89,20 +95,31 @@ impl AntProtocol { storage, payment_verifier, quote_generator, + p2p_node: RwLock::new(None), fresh_write_tx: None, + #[cfg(any(test, feature = "test-utils"))] + test_store_membership_override: RwLock::new(None), } } - /// Attach the node's P2P handle for payment live-DHT checks. + /// Attach the node's P2P handle for direct PUT responsibility and payment + /// live-DHT checks. /// /// Also wires the same handle into the payment verifier so payment-proof - /// closeness checks and storage-endpoint responsibility checks can use the - /// live routing view. Idempotent: calling twice replaces the verifier handle. + /// closeness checks can use the live routing view. Idempotent: calling + /// twice replaces both handles. pub fn attach_p2p_node(&self, node: Arc) { + *self.p2p_node.write() = Some(Arc::clone(&node)); self.payment_verifier.attach_p2p_node(node); debug!("AntProtocol: P2PNode attached for payment live-DHT checks"); } + /// Test-only setter for direct PUT storage responsibility checks. + #[cfg(any(test, feature = "test-utils"))] + pub fn set_store_membership_for_tests(&self, is_member: bool) { + *self.test_store_membership_override.write() = Some(is_member); + } + /// Set the channel sender for fresh-write replication events. /// /// When set, successful chunk PUTs will notify the replication engine @@ -253,7 +270,14 @@ impl AntProtocol { Ok(false) => {} } - // 4. Cheap disk-space pre-check — runs BEFORE the expensive payment + // 4. Check storage responsibility before payment verification. A node + // should only accept the actual chunk when its local routing table + // places it in the configured close group for the chunk address. + if let Err(e) = self.validate_store_membership(&address).await { + return ChunkPutResponse::Error(e); + } + + // 5. Cheap disk-space pre-check — runs BEFORE the expensive payment // verification path (ML-DSA pool checks, a Kademlia closeness // lookup, and an on-chain Arbitrum RPC). A disk-full node can never // satisfy this PUT, so reject it here rather than burning that work @@ -272,8 +296,9 @@ impl AntProtocol { return ChunkPutResponse::Error(ProtocolError::StorageFailed(e.to_string())); } - // 5. Verify payment. The ClientPut context checks receiver membership, - // applies the store-strength payment cache, and verifies live proofs. + // 6. Verify payment. The ClientPut context applies the store-strength + // payment cache and verifies live proofs. Storage responsibility was + // checked above before any proof work. let payment_result = self .payment_verifier .verify_payment( @@ -297,7 +322,7 @@ impl AntProtocol { } } - // 6. Store chunk + // 7. Store chunk match self.storage.put(&address, &request.content).await { Ok(_) => { let content_len = request.content.len(); @@ -309,7 +334,7 @@ impl AntProtocol { // fallback path stays roughly accurate. self.quote_generator.record_store(); - // 7. Notify replication engine for fresh fan-out. + // 8. Notify replication engine for fresh fan-out. // Only emit when a real proof is present — cached-as-verified // PUTs have no proof to forward, and the chunk would have // already replicated on the original write that carried one. @@ -338,6 +363,68 @@ impl AntProtocol { } } + async fn validate_store_membership( + &self, + address: &[u8; 32], + ) -> std::result::Result<(), ProtocolError> { + #[cfg(any(test, feature = "test-utils"))] + { + let membership_override = *self.test_store_membership_override.read(); + if let Some(is_member) = membership_override { + if is_member { + return Ok(()); + } + return Err(ProtocolError::PaymentFailed(format!( + "ClientPut receiver is not among this node's local {} closest peers for {}", + self.payment_verifier.close_group_size(), + hex::encode(address) + ))); + } + } + + let attached = self.p2p_node.read().as_ref().map(Arc::clone); + let Some(p2p_node) = attached else { + #[cfg(any(test, feature = "test-utils"))] + { + crate::logging::warn!( + "AntProtocol: no P2PNode attached; ClientPut storage \ + responsibility check SKIPPED (test build). Production startup \ + MUST call AntProtocol::attach_p2p_node." + ); + return Ok(()); + } + #[cfg(not(any(test, feature = "test-utils")))] + { + crate::logging::error!( + "AntProtocol: no P2PNode attached; rejecting ClientPut. \ + This is a node-startup bug — AntProtocol::attach_p2p_node \ + must be called before PUT handling runs." + ); + return Err(ProtocolError::PaymentFailed( + "ClientPut rejected: protocol handler is not wired to the \ + P2P layer; cannot verify storage responsibility." + .to_string(), + )); + } + }; + + let self_id = *p2p_node.peer_id(); + let close_group_size = self.payment_verifier.close_group_size(); + let closest = p2p_node + .dht_manager() + .find_closest_nodes_local_with_self(address, close_group_size) + .await; + if closest.iter().any(|node| node.peer_id == self_id) { + return Ok(()); + } + + Err(ProtocolError::PaymentFailed(format!( + "ClientPut receiver {} is not among this node's local {close_group_size} closest peers for {}", + self_id.to_hex(), + hex::encode(address) + ))) + } + /// Handle a GET request. /// /// Wraps `handle_get_inner` to emit a single structured tracing event per @@ -932,9 +1019,7 @@ mod tests { let content = b"out of range receiver cache test"; let address = LmdbStorage::compute_address(content); protocol.payment_verifier().cache_insert(address); - protocol - .payment_verifier() - .set_receiver_membership_for_tests(false); + protocol.set_store_membership_for_tests(false); let put_request = ChunkPutRequest::new(address, Bytes::copy_from_slice(content)); let put_msg = ChunkMessage { @@ -954,7 +1039,7 @@ mod tests { ProtocolError::PaymentFailed(message), )) = response.body { - assert!(message.contains("required local peer set")); + assert!(message.contains("not among this node's local")); } else { panic!("expected receiver responsibility rejection, got: {response:?}"); } diff --git a/tests/e2e/testnet.rs b/tests/e2e/testnet.rs index fa8b771..90c73b5 100644 --- a/tests/e2e/testnet.rs +++ b/tests/e2e/testnet.rs @@ -1184,7 +1184,7 @@ impl TestNetwork { // Start protocol handler that routes incoming P2P messages to AntProtocol if let (Some(ref p2p), Some(ref protocol)) = (&node.p2p_node, &node.ant_protocol) { - // Wire P2P into AntProtocol for payment receiver-membership and + // Wire P2P into AntProtocol for direct PUT responsibility and // payment-proof closeness checks. protocol.attach_p2p_node(Arc::clone(p2p)); From 8cb2c9ffaefbfff655440f03536de468ab251f15 Mon Sep 17 00:00:00 2001 From: Warm Beer Date: Fri, 12 Jun 2026 16:24:14 +0200 Subject: [PATCH 17/18] fix(replication): widen local storage admission range --- src/replication/admission.rs | 44 ++++++++++++++--------- src/replication/config.rs | 26 ++++++++++++++ src/replication/mod.rs | 51 +++++++++++++++++--------- src/replication/pruning.rs | 69 +++++++++++++++++++++++++----------- src/storage/handler.rs | 20 ++++++----- tests/e2e/replication.rs | 20 +++++++++-- 6 files changed, 166 insertions(+), 64 deletions(-) diff --git a/src/replication/admission.rs b/src/replication/admission.rs index b996eaa..3b99a80 100644 --- a/src/replication/admission.rs +++ b/src/replication/admission.rs @@ -15,7 +15,7 @@ use saorsa_core::identity::PeerId; use saorsa_core::P2PNode; use crate::ant_protocol::XorName; -use crate::replication::config::ReplicationConfig; +use crate::replication::config::{storage_admission_width, ReplicationConfig}; use crate::replication::paid_list::PaidList; use crate::storage::LmdbStorage; @@ -31,19 +31,20 @@ pub struct AdmissionResult { pub rejected_keys: Vec, } -/// Check if this node is responsible for key `K`. +/// Check if this node is within a caller-supplied closest-peer width for key +/// `K`. /// -/// Returns `true` if `self_id` is among the `close_group_size` nearest peers -/// to `K` in `SelfInclusiveRT`. +/// Returns `true` if `self_id` is among the `responsibility_width` nearest +/// peers to `K` in `SelfInclusiveRT`. pub async fn is_responsible( self_id: &PeerId, key: &XorName, p2p_node: &Arc, - close_group_size: usize, + responsibility_width: usize, ) -> bool { let closest = p2p_node .dht_manager() - .find_closest_nodes_local_with_self(key, close_group_size) + .find_closest_nodes_local_with_self(key, responsibility_width) .await; closest.iter().any(|n| n.peer_id == *self_id) } @@ -70,8 +71,9 @@ pub async fn is_in_paid_close_group( /// For each key in `replica_hints` and `paid_hints`: /// - **Cross-set precedence**: if a key appears in both sets, keep only the /// replica-hint entry. -/// - **Replica hints**: admitted if `IsResponsible(self, K)` or key already -/// exists in local store / pending set. +/// - **Replica hints**: admitted if `self` is in the storage-admission group +/// (`close_group_size + STORAGE_ADMISSION_MARGIN`) or key already exists in +/// local store / pending set. /// - **Paid hints**: admitted if `self` is in `PaidCloseGroup(K)` or key is /// already in `PaidForList`. /// @@ -111,7 +113,14 @@ pub async fn admit_hints( continue; } - if is_responsible(self_id, &key, p2p_node, config.close_group_size).await { + if is_responsible( + self_id, + &key, + p2p_node, + storage_admission_width(config.close_group_size), + ) + .await + { result.replica_keys.push(key); } else { result.rejected_keys.push(key); @@ -323,8 +332,9 @@ mod tests { /// gate tested at the e2e level (scenario 17 tests the positive /// case). /// (b) Even if a sender IS in `LocalRT`, the per-key relevance check - /// (`is_responsible` / `is_in_paid_close_group`) in `admit_hints` - /// still applies. Sender identity does not grant key admission. + /// (`is_responsible` with storage-admission width / + /// `is_in_paid_close_group`) in `admit_hints` still applies. Sender + /// identity does not grant key admission. /// /// This test exercises layer (b): the admission pipeline's dedup, /// cross-set precedence, and relevance filtering using the same logic @@ -358,8 +368,8 @@ mod tests { admitted_replica.push(key); continue; } - // key_not_pending: not pending, not local -> needs is_responsible. - // Simulate is_responsible returning false (out of range). + // key_not_pending: not pending, not local -> needs the + // storage-admission check. Simulate it returning false. let is_responsible = false; if is_responsible { admitted_replica.push(key); @@ -416,7 +426,7 @@ mod tests { /// Scenario 7: Out-of-range key hint rejected regardless of quorum. /// /// A key whose XOR distance from self is much larger than the distance - /// of the close-group members fails the `is_responsible` check in + /// of the storage-admission members fails the `is_responsible` check in /// `admit_hints`. The key never enters the verification pipeline, so /// quorum is irrelevant. /// @@ -441,7 +451,7 @@ mod tests { // -- Simulate admit_hints for these keys -- // - // When `close_group_size` peers are all closer to far_key than + // When the storage-admission peers are all closer to far_key than // self, `is_responsible(self, far_key)` returns false. The key is // rejected without entering verification or quorum. @@ -460,8 +470,8 @@ mod tests { admitted.push(key); continue; } - // Simulate is_responsible: self (0x00) has close_group_size - // peers closer to far_key (0xFF) than itself -> not responsible. + // Simulate is_responsible: self (0x00) has the full + // storage-admission group closer to far_key (0xFF) than itself. // For close_key (0x01), self is very close -> responsible. let distance = xor_distance(&self_xor, &key); let simulated_responsible = distance[0] < 0x80; diff --git a/src/replication/config.rs b/src/replication/config.rs index 3337cf2..1f5b00a 100644 --- a/src/replication/config.rs +++ b/src/replication/config.rs @@ -23,6 +23,14 @@ use crate::ant_protocol::CLOSE_GROUP_SIZE; /// Maximum number of peers per k-bucket in the Kademlia routing table. pub const K_BUCKET_SIZE: usize = 20; +/// Extra local-routing-table positions accepted for local chunk storage +/// admission and stored-record pruning. +/// +/// This margin absorbs small local RT disagreement between peers. It does not +/// widen audit, quorum, or paid-list target sets; those remain strict +/// `close_group_size` / paid-list group checks. +pub const STORAGE_ADMISSION_MARGIN: usize = 2; + /// Full-network target for required positive presence votes. /// /// Effective per-key threshold is @@ -39,6 +47,13 @@ pub const NEIGHBOR_SYNC_SCOPE: usize = 20; /// round. pub const NEIGHBOR_SYNC_PEER_COUNT: usize = 4; +/// Width used when deciding whether this node may locally store or retain a +/// chunk. +#[must_use] +pub const fn storage_admission_width(close_group_size: usize) -> usize { + close_group_size.saturating_add(STORAGE_ADMISSION_MARGIN) +} + /// Minimum neighbor-sync cadence. Actual interval is randomized within /// `[min, max]`. const NEIGHBOR_SYNC_INTERVAL_MIN_SECS: u64 = 10 * 60; @@ -411,6 +426,17 @@ mod tests { ); } + #[test] + fn storage_admission_width_adds_margin() { + const TEST_CLOSE_GROUP_SIZE: usize = 7; + + assert_eq!( + storage_admission_width(TEST_CLOSE_GROUP_SIZE), + TEST_CLOSE_GROUP_SIZE + STORAGE_ADMISSION_MARGIN + ); + assert_eq!(storage_admission_width(usize::MAX), usize::MAX); + } + #[test] fn audit_failure_weight_is_five() { assert!((AUDIT_FAILURE_TRUST_WEIGHT - 5.0).abs() <= f64::EPSILON); diff --git a/src/replication/mod.rs b/src/replication/mod.rs index 083a87e..b8ef722 100644 --- a/src/replication/mod.rs +++ b/src/replication/mod.rs @@ -47,8 +47,8 @@ use crate::error::{Error, Result}; use crate::payment::{PaymentVerifier, VerificationContext}; use crate::replication::audit::AuditTickResult; use crate::replication::config::{ - max_parallel_fetch, ReplicationConfig, MAX_CONCURRENT_REPLICATION_SENDS, - REPLICATION_PROTOCOL_ID, + max_parallel_fetch, storage_admission_width, ReplicationConfig, + MAX_CONCURRENT_REPLICATION_SENDS, REPLICATION_PROTOCOL_ID, }; use crate::replication::paid_list::PaidList; use crate::replication::protocol::{ @@ -1176,15 +1176,23 @@ async fn handle_fresh_offer( return Ok(()); } - // Rule 7: check responsibility. - if !admission::is_responsible(&self_id, &offer.key, p2p_node, config.close_group_size).await { + // Rule 7: check storage admission. Fresh chunk receivers accept the close + // group plus a small margin to absorb local routing-table disagreement. + if !admission::is_responsible( + &self_id, + &offer.key, + p2p_node, + storage_admission_width(config.close_group_size), + ) + .await + { send_replication_response( source, p2p_node, request_id, ReplicationMessageBody::FreshReplicationResponse(FreshReplicationResponse::Rejected { key: offer.key, - reason: "Not responsible for this key".to_string(), + reason: "Not in storage-admission range for this key".to_string(), }), rr_message_id, ) @@ -1220,7 +1228,7 @@ async fn handle_fresh_offer( // Gap 1: Validate PoP via PaymentVerifier. Fresh replication is still // part of the immediate write fan-out: this receiver is about to store the - // record as if the client had PUT it here directly. Receiver responsibility + // record as if the client had PUT it here directly. Storage admission // was checked above before proof work. ClientPut verification applies // store-strength cache semantics, paid-quote issuer close-group and local // price floor checks for single-node proofs, and merkle candidate @@ -2141,9 +2149,9 @@ async fn run_verification_cycle(ctx: VerificationCycleContext<'_>) { match pipeline { HintPipeline::PaidOnly => { // Paid-only + local paid state needs one more - // responsibility check outside this lock: if we - // are also in the storage close group, the hint - // can repair a missing replica. + // storage-admission check outside this lock: if we + // are also in the close group plus storage margin, + // the hint can repair a missing replica. local_paid_paid_only_keys.push(*key); } HintPipeline::Replica => { @@ -2166,8 +2174,13 @@ async fn run_verification_cycle(ctx: VerificationCycleContext<'_>) { for key in local_paid_paid_only_keys { if storage.exists(&key).unwrap_or(false) { terminal_paid_only.push(key); - } else if admission::is_responsible(&self_id, &key, p2p_node, config.close_group_size) - .await + } else if admission::is_responsible( + &self_id, + &key, + p2p_node, + storage_admission_width(config.close_group_size), + ) + .await { local_paid_presence_probe_keys.push(key); } else { @@ -2276,9 +2289,9 @@ async fn run_verification_cycle(ctx: VerificationCycleContext<'_>) { } // Paid-only hints normally update PaidForList only. If this node is - // also storage-responsible for the key, a verified paid-only hint can - // safely repair a missing replica using sources from the same - // verification round. + // also within the storage-admission group for the key, a verified + // paid-only hint can safely repair a missing replica using sources + // from the same verification round. let mut paid_only_fetch_keys: HashSet = HashSet::new(); for (key, outcome, pipeline) in &evaluated { if *pipeline == HintPipeline::PaidOnly @@ -2288,7 +2301,13 @@ async fn run_verification_cycle(ctx: VerificationCycleContext<'_>) { | KeyVerificationOutcome::PaidListVerified { .. } ) && !storage.exists(key).unwrap_or(false) - && admission::is_responsible(&self_id, key, p2p_node, config.close_group_size).await + && admission::is_responsible( + &self_id, + key, + p2p_node, + storage_admission_width(config.close_group_size), + ) + .await { paid_only_fetch_keys.insert(*key); } @@ -2313,7 +2332,7 @@ async fn run_verification_cycle(ctx: VerificationCycleContext<'_>) { // retained as pending until queue drains. } else if fetch_eligible && sources.is_empty() { warn!( - "Verified responsible key {} has no holders (possible data loss)", + "Verified storage-admitted key {} has no holders (possible data loss)", hex::encode(key) ); q.remove_pending(&key); diff --git a/src/replication/pruning.rs b/src/replication/pruning.rs index 7b8c718..68ebb8c 100644 --- a/src/replication/pruning.rs +++ b/src/replication/pruning.rs @@ -18,8 +18,8 @@ use tokio::sync::RwLock; use crate::ant_protocol::XorName; use crate::replication::config::{ - ReplicationConfig, AUDIT_FAILURE_TRUST_WEIGHT, MAX_PRUNE_AUDIT_CHALLENGES_PER_PASS, - REPLICATION_PROTOCOL_ID, + storage_admission_width, ReplicationConfig, AUDIT_FAILURE_TRUST_WEIGHT, + MAX_PRUNE_AUDIT_CHALLENGES_PER_PASS, REPLICATION_PROTOCOL_ID, }; use crate::replication::paid_list::PaidList; use crate::replication::protocol::{ @@ -195,10 +195,12 @@ struct PruneAuditReportState { /// Execute post-cycle responsibility pruning. /// /// For each stored record K: -/// - If `IsResponsible(self, K)`: clear `RecordOutOfRangeFirstSeen`. -/// - If not responsible: set timestamp if not already set; delete if the +/// - If `self` is within the storage-admission group +/// (`close_group_size + STORAGE_ADMISSION_MARGIN`): clear +/// `RecordOutOfRangeFirstSeen`. +/// - If not in that group: set timestamp if not already set; delete if the /// timestamp is at least `PRUNE_HYSTERESIS_DURATION` old and all but one -/// of the current close group prove they store the record. +/// of the strict current close group prove they store the record. /// /// For each `PaidForList` entry K: /// - If self is in `PaidCloseGroup(K)`: clear `PaidOutOfRangeFirstSeen`. @@ -280,7 +282,6 @@ async fn prune_stored_records(ctx: &PrunePassContext<'_>) -> (usize, RecordPrune }; let now = Instant::now(); - let dht = ctx.p2p_node.dht_manager(); let mut stats = RecordPruneStats::default(); let mut candidates = Vec::new(); let mut audit_challenge_budget = MAX_PRUNE_AUDIT_CHALLENGES_PER_PASS; @@ -291,12 +292,18 @@ async fn prune_stored_records(ctx: &PrunePassContext<'_>) -> (usize, RecordPrune for offset in 0..stored_keys.len() { let key = &stored_keys[(scan_start + offset) % stored_keys.len()]; - let closest: Vec = dht - .find_closest_nodes_local_with_self(key, ctx.config.close_group_size) - .await; + let (storage_admission_group, strict_close_group) = + record_prune_lookup_groups(key, ctx.p2p_node, ctx.config).await; - let outcome = - evaluate_record_prune_key(ctx, key, &closest, now, &mut audit_challenge_budget).await; + let outcome = evaluate_record_prune_key( + ctx, + key, + &storage_admission_group, + &strict_close_group, + now, + &mut audit_challenge_budget, + ) + .await; if outcome.marked { stats.marked += 1; } @@ -367,15 +374,33 @@ async fn prune_stored_records(ctx: &PrunePassContext<'_>) -> (usize, RecordPrune (stored_keys.len(), stats) } +async fn record_prune_lookup_groups( + key: &XorName, + p2p_node: &Arc, + config: &ReplicationConfig, +) -> (Vec, Vec) { + let dht = p2p_node.dht_manager(); + let storage_admission_group = dht + .find_closest_nodes_local_with_self(key, storage_admission_width(config.close_group_size)) + .await; + let strict_close_group = dht + .find_closest_nodes_local_with_self(key, config.close_group_size) + .await; + (storage_admission_group, strict_close_group) +} + async fn evaluate_record_prune_key( ctx: &PrunePassContext<'_>, key: &XorName, - closest: &[DHTNode], + storage_admission_group: &[DHTNode], + strict_close_group: &[DHTNode], now: Instant, audit_challenge_budget: &mut usize, ) -> RecordPruneKeyOutcome { let mut outcome = RecordPruneKeyOutcome::default(); - let is_responsible = closest.iter().any(|node| node.peer_id == *ctx.self_id); + let is_responsible = storage_admission_group + .iter() + .any(|node| node.peer_id == *ctx.self_id); if is_responsible { if ctx.paid_list.record_out_of_range_since(key).is_some() { @@ -405,7 +430,7 @@ async fn evaluate_record_prune_key( return outcome; } - let target_peers = remote_close_group_peers(closest, ctx.self_id); + let target_peers = remote_close_group_peers(strict_close_group, ctx.self_id); if target_peers.is_empty() { warn!( "Cannot prune {}: current close group has no remote peers", @@ -417,7 +442,8 @@ async fn evaluate_record_prune_key( // Only peers we have hinted (mature repair proof) may be audited; the // proof threshold must be reachable among them. A never-synced peer in // the close group reduces the audit pool instead of vetoing the prune. - let current_close_peers: HashSet = closest.iter().map(|node| node.peer_id).collect(); + let current_close_peers: HashSet = + strict_close_group.iter().map(|node| node.peer_id).collect(); #[cfg(any(test, feature = "test-utils"))] let repair_proof_now = ctx.repair_proof_now.unwrap_or(now); #[cfg(not(any(test, feature = "test-utils")))] @@ -918,17 +944,18 @@ async fn revalidated_record_prune_keys( p2p_node: &Arc, config: &ReplicationConfig, ) -> (Vec, usize) { - let dht = p2p_node.dht_manager(); let mut keys_to_delete = Vec::new(); let mut cleared = 0; let now = Instant::now(); for candidate in candidates { - let closest: Vec = dht - .find_closest_nodes_local_with_self(&candidate.key, config.close_group_size) - .await; + let (storage_admission_group, strict_close_group) = + record_prune_lookup_groups(&candidate.key, p2p_node, config).await; - if closest.iter().any(|n| n.peer_id == *self_id) { + if storage_admission_group + .iter() + .any(|n| n.peer_id == *self_id) + { if paid_list .record_out_of_range_since(&candidate.key) .is_some() @@ -949,7 +976,7 @@ async fn revalidated_record_prune_keys( continue; } - let current_target_peers = remote_close_group_peers(&closest, self_id); + let current_target_peers = remote_close_group_peers(&strict_close_group, self_id); if current_target_peers.is_empty() { warn!( "Cannot prune {}: current close group has no remote peers", diff --git a/src/storage/handler.rs b/src/storage/handler.rs index d5421d0..d51d717 100644 --- a/src/storage/handler.rs +++ b/src/storage/handler.rs @@ -38,6 +38,7 @@ use crate::client::compute_address; use crate::error::{Error, Result}; use crate::logging::{debug, info, warn}; use crate::payment::{PaymentVerifier, QuoteGenerator, VerificationContext}; +use crate::replication::config::storage_admission_width; use crate::replication::fresh::FreshWriteEvent; use crate::storage::lmdb::LmdbStorage; use bytes::Bytes; @@ -58,7 +59,7 @@ pub struct AntProtocol { /// Quote generator for creating storage quotes. /// Also handles merkle candidate quote signing via ML-DSA-65. quote_generator: Arc, - /// P2P node handle used for direct PUT receiver responsibility checks. + /// P2P node handle used for direct PUT storage-admission checks. p2p_node: RwLock>>, /// Channel for notifying the replication engine about newly-stored chunks. fresh_write_tx: Option>, @@ -270,9 +271,10 @@ impl AntProtocol { Ok(false) => {} } - // 4. Check storage responsibility before payment verification. A node + // 4. Check storage admission before payment verification. A node // should only accept the actual chunk when its local routing table - // places it in the configured close group for the chunk address. + // places it in the configured close group plus storage margin for + // the chunk address. if let Err(e) = self.validate_store_membership(&address).await { return ChunkPutResponse::Error(e); } @@ -375,8 +377,9 @@ impl AntProtocol { return Ok(()); } return Err(ProtocolError::PaymentFailed(format!( - "ClientPut receiver is not among this node's local {} closest peers for {}", - self.payment_verifier.close_group_size(), + "ClientPut receiver is not among this node's local {} closest peers for {} \ + (close group plus storage margin)", + storage_admission_width(self.payment_verifier.close_group_size()), hex::encode(address) ))); } @@ -409,17 +412,18 @@ impl AntProtocol { }; let self_id = *p2p_node.peer_id(); - let close_group_size = self.payment_verifier.close_group_size(); + let admission_width = storage_admission_width(self.payment_verifier.close_group_size()); let closest = p2p_node .dht_manager() - .find_closest_nodes_local_with_self(address, close_group_size) + .find_closest_nodes_local_with_self(address, admission_width) .await; if closest.iter().any(|node| node.peer_id == self_id) { return Ok(()); } Err(ProtocolError::PaymentFailed(format!( - "ClientPut receiver {} is not among this node's local {close_group_size} closest peers for {}", + "ClientPut receiver {} is not among this node's local {admission_width} closest peers for {} \ + (close group plus storage margin)", self_id.to_hex(), hex::encode(address) ))) diff --git a/tests/e2e/replication.rs b/tests/e2e/replication.rs index 4d0dcae..cf5e9e0 100644 --- a/tests/e2e/replication.rs +++ b/tests/e2e/replication.rs @@ -7,7 +7,9 @@ use super::TestHarness; use ant_node::client::compute_address; -use ant_node::replication::config::{REPAIR_HINT_MIN_AGE, REPLICATION_PROTOCOL_ID}; +use ant_node::replication::config::{ + storage_admission_width, REPAIR_HINT_MIN_AGE, REPLICATION_PROTOCOL_ID, +}; use ant_node::replication::protocol::{ compute_audit_digest, AuditChallenge, AuditResponse, FetchRequest, FetchResponse, FreshReplicationOffer, FreshReplicationResponse, NeighborSyncRequest, ReplicationMessage, @@ -78,6 +80,7 @@ async fn find_remote_prune_candidate( let pruner = harness.test_node(pruner_idx).expect("pruner"); let pruner_p2p = pruner.p2p_node.as_ref().expect("pruner p2p"); let pruner_peer = *pruner_p2p.peer_id(); + let admission_width = storage_admission_width(close_group_size); for attempt in 0..10_000usize { let content = format!("prune-confirmation-{label}-{attempt}").into_bytes(); @@ -94,6 +97,16 @@ async fn find_remote_prune_candidate( if target_peers.contains(&pruner_peer) { continue; } + let storage_admission_group = pruner_p2p + .dht_manager() + .find_closest_nodes_local_with_self(&address, admission_width) + .await; + if storage_admission_group + .iter() + .any(|node| node.peer_id == pruner_peer) + { + continue; + } if target_peers .iter() .all(|peer| node_index_for_peer(harness, peer).is_some()) @@ -102,7 +115,10 @@ async fn find_remote_prune_candidate( } } - panic!("failed to find a {close_group_size}-peer prune candidate outside pruner {pruner_idx}"); + panic!( + "failed to find a {close_group_size}-peer prune candidate outside pruner {pruner_idx}'s \ + storage-admission range" + ); } async fn store_record_on_peer( From dbce0993a8a6cfcc81e7db9c6d2aa3839e42b335 Mon Sep 17 00:00:00 2001 From: Chris O'Neil Date: Fri, 12 Jun 2026 18:59:29 +0100 Subject: [PATCH 18/18] chore(release): promote rc-2026.6.2 to 0.12.1 --- Cargo.lock | 17 ++++++++++------- Cargo.toml | 6 +++--- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6287d74..a57347f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -809,7 +809,7 @@ dependencies = [ [[package]] name = "ant-node" -version = "0.12.1-rc.1" +version = "0.12.1" dependencies = [ "alloy", "ant-protocol", @@ -861,8 +861,9 @@ dependencies = [ [[package]] name = "ant-protocol" -version = "2.1.3-rc.1" -source = "git+https://github.com/WithAutonomi/ant-protocol?branch=rc-2026.6.2#053be60ced998c6a4c0833b4560f1ddb89a86514" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bab11ef1ecd2d37433b010cfb711125bdee0c76d6df65cb1d4eee661a9559c37" dependencies = [ "blake3", "bytes", @@ -4865,8 +4866,9 @@ dependencies = [ [[package]] name = "saorsa-core" -version = "0.25.0-rc.1" -source = "git+https://github.com/saorsa-labs/saorsa-core?branch=rc-2026.6.2#088c3552bf7257ee10b5a670cd4990d1c0e7b8af" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b52b60284a36bd0e0f1311dd6f22465e3fd29b3a2f720fd563025cba7851293d" dependencies = [ "anyhow", "async-trait", @@ -4979,8 +4981,9 @@ dependencies = [ [[package]] name = "saorsa-transport" -version = "0.35.0-rc.1" -source = "git+https://github.com/saorsa-labs/saorsa-transport?branch=rc-2026.6.2#48b0874adaefcf26650634a5f93a9618715518ca" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "621d0a207914a8fd6453f25e4bcc369914cbfaf59a2857e898c079b95f52f5bb" dependencies = [ "anyhow", "async-trait", diff --git a/Cargo.toml b/Cargo.toml index 26810de..f0a3be8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "ant-node" -version = "0.12.1-rc.1" +version = "0.12.1" edition = "2021" authors = ["David Irvine "] description = "Pure quantum-proof network node for the Autonomi decentralized network" @@ -39,10 +39,10 @@ mimalloc = "0.1" # Until then, the git pin tracks the matching saorsa-core lineage # (the rc-2026.4.2 branch) so Cargo can unify the wire types here # with ant-protocol's re-exports. -ant-protocol = { git = "https://github.com/WithAutonomi/ant-protocol", branch = "rc-2026.6.2" } +ant-protocol = "2.1.3" # Core (provides EVERYTHING: networking, DHT, security, trust, storage) -saorsa-core = { git = "https://github.com/saorsa-labs/saorsa-core", branch = "rc-2026.6.2" } +saorsa-core = "0.25.0" saorsa-pqc = "0.5" # Payment verification - autonomi network lookup + EVM payment