diff --git a/rust/cuvs/src/lib.rs b/rust/cuvs/src/lib.rs index 519519440b..5535136b53 100644 --- a/rust/cuvs/src/lib.rs +++ b/rust/cuvs/src/lib.rs @@ -19,6 +19,7 @@ mod error; pub mod ivf_flat; pub mod ivf_pq; mod resources; +pub mod tiered_index; pub mod vamana; pub use dlpack::ManagedTensor; diff --git a/rust/cuvs/src/tiered_index/index.rs b/rust/cuvs/src/tiered_index/index.rs new file mode 100644 index 0000000000..0403d1f53c --- /dev/null +++ b/rust/cuvs/src/tiered_index/index.rs @@ -0,0 +1,481 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +use std::io::{Write, stderr}; + +use crate::dlpack::ManagedTensor; +use crate::error::{Error, Result, check_cuvs}; +use crate::resources::Resources; +use crate::tiered_index::{AnnAlgo, IndexParams, SearchParams}; + +/// Tiered ANN Index. +/// +/// A tiered index couples a brute-force tier that absorbs incremental inserts +/// with an ANN tier (CAGRA by default). Vectors added via [`Index::extend`] +/// land in the brute-force tier and are immediately searchable, even before the +/// ANN tier has been (re)built — this is the defining behavior of the tiered +/// index. +/// +/// The C API offers no serialize/deserialize for the tiered index, so this +/// wrapper does not expose persistence (and therefore takes no filesystem +/// paths). +#[derive(Debug)] +pub struct Index { + handle: ffi::cuvsTieredIndex_t, + /// The ANN backend the index was built with, used to validate that search + /// params match the backend (the C search API reinterprets an opaque + /// `void*` per this algo). + algo: AnnAlgo, +} + +impl Index { + /// Builds a new tiered Index from the dataset for efficient search. + /// + /// # Arguments + /// + /// * `res` - Resources to use + /// * `params` - Parameters for building the index (backend + ANN params) + /// * `dataset` - A row-major matrix on either the host or device to index + pub fn build>( + res: &Resources, + params: &IndexParams, + dataset: T, + ) -> Result { + let dataset: ManagedTensor = dataset.into(); + let handle = Index::create_handle()?; + unsafe { + check_cuvs(ffi::cuvsTieredIndexBuild( + res.0, + params.as_ptr(), + dataset.as_ptr(), + handle, + ))?; + } + // Capture the backend so search() can reject mismatched params. + Ok(Index { handle, algo: params.algo() }) + } + + /// Creates a new empty index handle. + /// + /// Private: [`Index::build`] is the only public constructor so that every + /// `Index` carries the backend algo captured from its build params. + fn create_handle() -> Result { + unsafe { + let mut index = std::mem::MaybeUninit::::uninit(); + check_cuvs(ffi::cuvsTieredIndexCreate(index.as_mut_ptr()))?; + Ok(index.assume_init()) + } + } + + /// Extends the index with new vectors. + /// + /// The new vectors are added to the brute-force tier and become immediately + /// searchable. If `create_ann_index_on_extend` was set and the incremental + /// tier now exceeds `min_ann_rows`, the ANN tier is (re)built. + /// + /// # Arguments + /// + /// * `res` - Resources to use + /// * `new_vectors` - A row-major matrix on either the host or device to add + pub fn extend>(&self, res: &Resources, new_vectors: T) -> Result<()> { + let new_vectors: ManagedTensor = new_vectors.into(); + unsafe { check_cuvs(ffi::cuvsTieredIndexExtend(res.0, new_vectors.as_ptr(), self.handle)) } + } + + /// Performs an Approximate Nearest Neighbors search on the Index. + /// + /// `params` must match the ANN backend the index was built with (e.g. + /// [`SearchParams::Cagra`] for a CAGRA-backed index); a mismatch returns + /// [`crate::error::Error::InvalidArgument`] rather than risking the + /// undefined behavior of the C API reinterpreting the wrong struct. + /// + /// # Arguments + /// + /// * `res` - Resources to use + /// * `params` - Search parameters for the ANN backend + /// * `queries` - A matrix in device memory to query for + /// * `neighbors` - Matrix in device memory that receives the indices of the nearest neighbors + /// * `distances` - Matrix in device memory that receives the distances of the nearest neighbors + pub fn search( + &self, + res: &Resources, + params: &SearchParams, + queries: &ManagedTensor, + neighbors: &ManagedTensor, + distances: &ManagedTensor, + ) -> Result<()> { + let no_filter = ffi::cuvsFilter { addr: 0, type_: ffi::cuvsFilterType::NO_FILTER }; + self.search_impl(res, params, queries, neighbors, distances, no_filter) + } + + /// Performs an Approximate Nearest Neighbors search with a bitset prefilter. + /// + /// Like [`search`](Self::search), but accepts a bitset filter to exclude + /// vectors from the result set. `params` must match the index's backend; + /// a mismatch returns [`crate::error::Error::InvalidArgument`]. + /// + /// # Arguments + /// + /// * `res` - Resources to use + /// * `params` - Search parameters for the ANN backend + /// * `queries` - A matrix in device memory to query for + /// * `neighbors` - Matrix in device memory that receives the indices of the nearest neighbors + /// * `distances` - Matrix in device memory that receives the distances of the nearest neighbors + /// * `bitset` - A 1-D `uint32` device tensor with `ceil(n_rows / 32)` elements. + /// Each bit corresponds to a dataset row: bit 1 = include, bit 0 = exclude. + pub fn search_with_filter( + &self, + res: &Resources, + params: &SearchParams, + queries: &ManagedTensor, + neighbors: &ManagedTensor, + distances: &ManagedTensor, + bitset: &ManagedTensor, + ) -> Result<()> { + let prefilter = + ffi::cuvsFilter { addr: bitset.as_ptr() as usize, type_: ffi::cuvsFilterType::BITSET }; + self.search_impl(res, params, queries, neighbors, distances, prefilter) + } + + /// Shared search path for the filtered and unfiltered variants. + /// + /// Validates that `params` matches the index's build-time backend before + /// handing the opaque pointer to the C API (which reinterprets it per the + /// index's algo — a mismatch would be undefined behavior). + fn search_impl( + &self, + res: &Resources, + params: &SearchParams, + queries: &ManagedTensor, + neighbors: &ManagedTensor, + distances: &ManagedTensor, + prefilter: ffi::cuvsFilter, + ) -> Result<()> { + if params.algo() != self.algo { + return Err(Error::InvalidArgument(format!( + "searched with {:?} params but index was built with {:?}", + params.algo(), + self.algo, + ))); + } + unsafe { + check_cuvs(ffi::cuvsTieredIndexSearch( + res.0, + // The C API takes the backend's search params as an opaque + // void*; the variant is validated above to match the index's + // build-time backend. + params.as_void_ptr(), + self.handle, + queries.as_ptr(), + neighbors.as_ptr(), + distances.as_ptr(), + prefilter, + )) + } + } +} + +impl Drop for Index { + fn drop(&mut self) { + if let Err(e) = check_cuvs(unsafe { ffi::cuvsTieredIndexDestroy(self.handle) }) { + write!(stderr(), "failed to call cuvsTieredIndexDestroy {:?}", e) + .expect("failed to write to stderr"); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::tiered_index::AnnAlgo; + use crate::{cagra, ivf_flat}; + use ndarray::s; + use ndarray_rand::RandomExt; + use ndarray_rand::rand_distr::Uniform; + + fn default_params() -> IndexParams { + // Keep min_ann_rows low so the ANN tier (CAGRA) is exercised by the + // build, while leaving room for extend to land in the brute-force tier. + IndexParams::new() + .unwrap() + .set_algo(AnnAlgo::CUVS_TIERED_INDEX_ALGO_CAGRA) + .set_min_ann_rows(128) + .set_create_ann_index_on_extend(true) + } + + /// (a) Build with an initial dataset and confirm each query finds itself. + #[test] + fn test_tiered_build_and_search() { + let res = Resources::new().unwrap(); + + let n_datapoints = 1024; + let n_features = 16; + let dataset = + ndarray::Array::::random((n_datapoints, n_features), Uniform::new(0., 1.0)); + let dataset_device = ManagedTensor::from(&dataset).to_device(&res).unwrap(); + + let index = Index::build(&res, &default_params(), dataset_device) + .expect("failed to build tiered index"); + + let n_queries = 4; + let queries = dataset.slice(s![0..n_queries, ..]); + let queries = ManagedTensor::from(&queries).to_device(&res).unwrap(); + + let k = 10; + let mut neighbors_host = ndarray::Array::::zeros((n_queries, k)); + let neighbors = ManagedTensor::from(&neighbors_host).to_device(&res).unwrap(); + let distances_host = ndarray::Array::::zeros((n_queries, k)); + let distances = ManagedTensor::from(&distances_host).to_device(&res).unwrap(); + + let search_params = SearchParams::Cagra(cagra::SearchParams::new().unwrap()); + index.search(&res, &search_params, &queries, &neighbors, &distances).unwrap(); + + neighbors.to_host(&res, &mut neighbors_host).unwrap(); + for i in 0..n_queries { + assert_eq!(neighbors_host[[i, 0]], i as i64, "query {i} should find itself"); + } + } + + /// A non-CAGRA backend must work end-to-end, proving the + /// `SearchParams::algo()` / `as_void_ptr()` mapping for IVF-Flat is + /// correct (the mismatch test alone only proves rejection). + #[test] + fn test_tiered_ivf_flat_backend_search() { + let res = Resources::new().unwrap(); + + let n_datapoints = 1024; + let n_features = 16; + let dataset = + ndarray::Array::::random((n_datapoints, n_features), Uniform::new(0., 1.0)); + let dataset_device = ManagedTensor::from(&dataset).to_device(&res).unwrap(); + + let params = IndexParams::new() + .unwrap() + .set_algo(AnnAlgo::CUVS_TIERED_INDEX_ALGO_IVF_FLAT) + .set_min_ann_rows(128) + .set_ivf_flat_params(crate::ivf_flat::IndexParams::new().unwrap()); + let index = Index::build(&res, ¶ms, dataset_device) + .expect("failed to build IVF-Flat-backed tiered index"); + + let n_queries = 4; + let queries = dataset.slice(s![0..n_queries, ..]); + let queries = ManagedTensor::from(&queries).to_device(&res).unwrap(); + + let k = 10; + let mut neighbors_host = ndarray::Array::::zeros((n_queries, k)); + let neighbors = ManagedTensor::from(&neighbors_host).to_device(&res).unwrap(); + let distances_host = ndarray::Array::::zeros((n_queries, k)); + let distances = ManagedTensor::from(&distances_host).to_device(&res).unwrap(); + + let search_params = SearchParams::IvfFlat(crate::ivf_flat::SearchParams::new().unwrap()); + index.search(&res, &search_params, &queries, &neighbors, &distances).unwrap(); + + neighbors.to_host(&res, &mut neighbors_host).unwrap(); + for i in 0..n_queries { + assert_eq!(neighbors_host[[i, 0]], i as i64, "query {i} should find itself"); + } + } + + /// (b) THE KEY TEST: vectors added via extend after build must be + /// immediately findable — the entire point of the tiered index. + #[test] + fn test_tiered_extend_visibility() { + let res = Resources::new().unwrap(); + + let n_features = 16; + let n_initial = 512; + let dataset = + ndarray::Array::::random((n_initial, n_features), Uniform::new(0., 1.0)); + let dataset_device = ManagedTensor::from(&dataset).to_device(&res).unwrap(); + + let index = Index::build(&res, &default_params(), dataset_device) + .expect("failed to build tiered index"); + + // New vectors NOT in the original dataset. + let n_new = 8; + let new_vectors = + ndarray::Array::::random((n_new, n_features), Uniform::new(10., 11.0)); + let new_device = ManagedTensor::from(&new_vectors).to_device(&res).unwrap(); + index.extend(&res, new_device).expect("extend failed"); + + // Query with the new vectors themselves. Their ids are appended after + // the initial dataset, so neighbor[i][0] should be n_initial + i. + let queries = ManagedTensor::from(&new_vectors).to_device(&res).unwrap(); + let k = 5; + let mut neighbors_host = ndarray::Array::::zeros((n_new, k)); + let neighbors = ManagedTensor::from(&neighbors_host).to_device(&res).unwrap(); + let mut distances_host = ndarray::Array::::zeros((n_new, k)); + let distances = ManagedTensor::from(&distances_host).to_device(&res).unwrap(); + + let search_params = SearchParams::Cagra(cagra::SearchParams::new().unwrap()); + index.search(&res, &search_params, &queries, &neighbors, &distances).unwrap(); + + neighbors.to_host(&res, &mut neighbors_host).unwrap(); + distances.to_host(&res, &mut distances_host).unwrap(); + for i in 0..n_new { + assert_eq!( + neighbors_host[[i, 0]], + (n_initial + i) as i64, + "extended vector {i} must be immediately findable as its own nearest neighbor" + ); + // Self-distance should be ~zero up to float32 rounding. + assert!( + distances_host[[i, 0]] < 1e-2, + "extended vector {i} self-distance {} should be ~0", + distances_host[[i, 0]] + ); + } + } + + /// (c) Repeated extends each remain searchable. + #[test] + fn test_tiered_repeated_extends() { + let res = Resources::new().unwrap(); + + let n_features = 16; + let n_initial = 512; + let dataset = + ndarray::Array::::random((n_initial, n_features), Uniform::new(0., 1.0)); + let dataset_device = ManagedTensor::from(&dataset).to_device(&res).unwrap(); + + let index = Index::build(&res, &default_params(), dataset_device) + .expect("failed to build tiered index"); + + let search_params = SearchParams::Cagra(cagra::SearchParams::new().unwrap()); + let k = 8; + let n_batch = 4; + let mut total = n_initial; + + for round in 0..3 { + // Use a distinct value range per round so each batch is far from the + // [0,1] base cluster and from the other rounds. + let lo = 20.0 + round as f32 * 10.0; + let new_vectors = + ndarray::Array::::random((n_batch, n_features), Uniform::new(lo, lo + 1.0)); + let new_device = ManagedTensor::from(&new_vectors).to_device(&res).unwrap(); + index.extend(&res, new_device).expect("extend failed"); + + let queries = ManagedTensor::from(&new_vectors).to_device(&res).unwrap(); + let mut neighbors_host = ndarray::Array::::zeros((n_batch, k)); + let neighbors = ManagedTensor::from(&neighbors_host).to_device(&res).unwrap(); + let mut distances_host = ndarray::Array::::zeros((n_batch, k)); + let distances = ManagedTensor::from(&distances_host).to_device(&res).unwrap(); + + index.search(&res, &search_params, &queries, &neighbors, &distances).unwrap(); + neighbors.to_host(&res, &mut neighbors_host).unwrap(); + distances.to_host(&res, &mut distances_host).unwrap(); + // Each just-extended vector must be immediately findable: its own id + // appears in the top-k with a near-zero self-distance. We assert + // top-k membership rather than exact rank-0 to stay robust against + // the ANN tier's approximate recall after an on-extend rebuild. + for i in 0..n_batch { + let want = (total + i) as i64; + let pos = (0..k).find(|&j| neighbors_host[[i, j]] == want); + let pos = pos.unwrap_or_else(|| { + panic!( + "round {round}: extended vector {i} (id {want}) not found in top-{k}: {:?}", + neighbors_host.row(i) + ) + }); + // Self-distance is ~0 up to float32 rounding (a handful of + // ULPs across the feature dimension); real neighbors in other + // value ranges are orders of magnitude farther. + assert!( + distances_host[[i, pos]] < 1e-2, + "round {round}: extended vector {i} self-distance {} should be ~0", + distances_host[[i, pos]] + ); + } + total += n_batch; + } + } + + /// (d) Filtered search: a bitset prefilter that excludes a query's own id + /// must keep that id out of the result set. + #[test] + fn test_tiered_filtered_search() { + let res = Resources::new().unwrap(); + + let n_features = 16; + let n_datapoints = 1024; + let dataset = + ndarray::Array::::random((n_datapoints, n_features), Uniform::new(0., 1.0)); + let dataset_device = ManagedTensor::from(&dataset).to_device(&res).unwrap(); + + let index = Index::build(&res, &default_params(), dataset_device) + .expect("failed to build tiered index"); + + // Build a bitset over n_datapoints with all bits set (1 = keep), then + // clear bit 0 so query 0 cannot return itself. cuvs bitsets are u32 + // words, LSB-first. + let n_words = n_datapoints.div_ceil(32); + let mut bitset_host = ndarray::Array::::from_elem(n_words, u32::MAX); + bitset_host[0] &= !1u32; // clear bit 0 -> exclude id 0 + let bitset = ManagedTensor::from(&bitset_host).to_device(&res).unwrap(); + + let n_queries = 1; + let queries = dataset.slice(s![0..n_queries, ..]); + let queries = ManagedTensor::from(&queries).to_device(&res).unwrap(); + let k = 5; + let mut neighbors_host = ndarray::Array::::zeros((n_queries, k)); + let neighbors = ManagedTensor::from(&neighbors_host).to_device(&res).unwrap(); + let distances_host = ndarray::Array::::zeros((n_queries, k)); + let distances = ManagedTensor::from(&distances_host).to_device(&res).unwrap(); + + let search_params = SearchParams::Cagra(cagra::SearchParams::new().unwrap()); + index + .search_with_filter(&res, &search_params, &queries, &neighbors, &distances, &bitset) + .unwrap(); + + neighbors.to_host(&res, &mut neighbors_host).unwrap(); + // id 0 was filtered out, so it must not appear among the neighbors. + for j in 0..k { + assert_ne!(neighbors_host[[0, j]], 0, "filtered id 0 must not be returned"); + } + } + + /// (e) Backend mismatch: searching a CAGRA-backed index with IVF-Flat + /// search params must be rejected before reaching the C API (which would + /// otherwise reinterpret the wrong struct — undefined behavior). + #[test] + fn test_search_params_backend_mismatch() { + let res = Resources::new().unwrap(); + + let n_features = 16; + let n_datapoints = 1024; + let dataset = + ndarray::Array::::random((n_datapoints, n_features), Uniform::new(0., 1.0)); + let dataset_device = ManagedTensor::from(&dataset).to_device(&res).unwrap(); + + // CAGRA-backed index (default_params sets CUVS_TIERED_INDEX_ALGO_CAGRA). + let index = Index::build(&res, &default_params(), dataset_device) + .expect("failed to build tiered index"); + + let n_queries = 1; + let queries = dataset.slice(s![0..n_queries, ..]); + let queries = ManagedTensor::from(&queries).to_device(&res).unwrap(); + let k = 5; + let neighbors_host = ndarray::Array::::zeros((n_queries, k)); + let neighbors = ManagedTensor::from(&neighbors_host).to_device(&res).unwrap(); + let distances_host = ndarray::Array::::zeros((n_queries, k)); + let distances = ManagedTensor::from(&distances_host).to_device(&res).unwrap(); + + // Wrong-backend params: IVF-Flat against a CAGRA index. + let search_params = SearchParams::IvfFlat(ivf_flat::SearchParams::new().unwrap()); + let err = index + .search(&res, &search_params, &queries, &neighbors, &distances) + .expect_err("mismatched search params must be rejected"); + + match err { + Error::InvalidArgument(msg) => { + assert!( + msg.contains("IVF_FLAT") && msg.contains("CAGRA"), + "error should mention both the searched and built backends: {msg}" + ); + } + other => panic!("expected InvalidArgument, got {other:?}"), + } + } +} diff --git a/rust/cuvs/src/tiered_index/index_params.rs b/rust/cuvs/src/tiered_index/index_params.rs new file mode 100644 index 0000000000..1f8eccf377 --- /dev/null +++ b/rust/cuvs/src/tiered_index/index_params.rs @@ -0,0 +1,180 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +use crate::cagra::IndexParams as CagraIndexParams; +use crate::distance_type::DistanceType; +use crate::error::{Result, check_cuvs}; +use crate::ivf_flat::IndexParams as IvfFlatIndexParams; +use crate::ivf_pq::IndexParams as IvfPqIndexParams; +use std::fmt; +use std::io::{Write, stderr}; + +/// Which ANN algorithm backs the tiered index's ANN tier. +pub type AnnAlgo = ffi::cuvsTieredIndexANNAlgo; + +/// Supplemental parameters to build a [`crate::tiered_index::Index`]. +/// +/// A tiered index couples a brute-force tier (which absorbs incremental +/// inserts via [`crate::tiered_index::Index::extend`]) with an ANN tier. The +/// ANN tier is built once the incremental tier accumulates at least +/// `min_ann_rows` rows. Use [`IndexParams::set_algo`] to select which ANN +/// algorithm backs that tier (CAGRA by default), and the matching +/// `set_*_params` setter to supply per-algorithm build parameters. +/// +/// The embedded ANN parameter wrappers are retained inside `IndexParams` so +/// their underlying C structs outlive the borrow taken by the tiered params +/// (which only stores raw pointers to them). +pub struct IndexParams { + inner: ffi::cuvsTieredIndexParams_t, + // Retain ownership of any embedded ANN params so they are not dropped while + // the tiered params struct still points at them. + cagra_params: Option, + ivf_flat_params: Option, + ivf_pq_params: Option, +} + +impl IndexParams { + /// Returns a new IndexParams populated with cuVS defaults. + pub fn new() -> Result { + unsafe { + let mut params = std::mem::MaybeUninit::::uninit(); + check_cuvs(ffi::cuvsTieredIndexParamsCreate(params.as_mut_ptr()))?; + Ok(IndexParams { + inner: params.assume_init(), + cagra_params: None, + ivf_flat_params: None, + ivf_pq_params: None, + }) + } + } + + /// Raw pointer to the underlying C params struct. + pub(crate) fn as_ptr(&self) -> ffi::cuvsTieredIndexParams_t { + self.inner + } + + /// Which ANN algorithm backs the ANN tier, captured at build time so the + /// index can validate that search params match the backend. + pub(crate) fn algo(&self) -> AnnAlgo { + unsafe { (*self.inner).algo } + } + + /// DistanceType to use for building the index. + pub fn set_metric(self, metric: DistanceType) -> IndexParams { + unsafe { + (*self.inner).metric = metric; + } + self + } + + /// Which ANN algorithm backs the ANN tier (CAGRA, IVF-Flat, or IVF-PQ). + pub fn set_algo(self, algo: AnnAlgo) -> IndexParams { + unsafe { + (*self.inner).algo = algo; + } + self + } + + /// The minimum number of rows necessary in the index before an ANN index + /// is created. Below this threshold, all rows live in the brute-force tier. + pub fn set_min_ann_rows(self, min_ann_rows: i64) -> IndexParams { + unsafe { + (*self.inner).min_ann_rows = min_ann_rows; + } + self + } + + /// Whether to (re)build the ANN tier on [`crate::tiered_index::Index::extend`] + /// once the incremental (brute-force) tier exceeds `min_ann_rows`. + pub fn set_create_ann_index_on_extend(self, create: bool) -> IndexParams { + unsafe { + (*self.inner).create_ann_index_on_extend = create; + } + self + } + + /// Supply CAGRA build parameters for the ANN tier. + /// + /// Ownership of `params` is moved into `self` so the underlying C struct + /// outlives the raw pointer stored in the tiered params. + pub fn set_cagra_params(mut self, params: CagraIndexParams) -> IndexParams { + unsafe { + (*self.inner).cagra_params = params.0; + } + self.cagra_params = Some(params); + self + } + + /// Supply IVF-Flat build parameters for the ANN tier. + pub fn set_ivf_flat_params(mut self, params: IvfFlatIndexParams) -> IndexParams { + unsafe { + (*self.inner).ivf_flat_params = params.0; + } + self.ivf_flat_params = Some(params); + self + } + + /// Supply IVF-PQ build parameters for the ANN tier. + pub fn set_ivf_pq_params(mut self, params: IvfPqIndexParams) -> IndexParams { + unsafe { + (*self.inner).ivf_pq_params = params.0; + } + self.ivf_pq_params = Some(params); + self + } +} + +impl fmt::Debug for IndexParams { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + // custom debug trait here, default value will show the pointer address + // for the inner params object which isn't that useful. + write!(f, "IndexParams({:?})", unsafe { *self.inner }) + } +} + +impl Drop for IndexParams { + fn drop(&mut self) { + if let Err(e) = check_cuvs(unsafe { ffi::cuvsTieredIndexParamsDestroy(self.inner) }) { + write!(stderr(), "failed to call cuvsTieredIndexParamsDestroy {:?}", e) + .expect("failed to write to stderr"); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_index_params() { + let params = IndexParams::new() + .unwrap() + .set_algo(AnnAlgo::CUVS_TIERED_INDEX_ALGO_CAGRA) + .set_min_ann_rows(256) + .set_create_ann_index_on_extend(true); + + unsafe { + assert_eq!((*params.inner).algo, AnnAlgo::CUVS_TIERED_INDEX_ALGO_CAGRA); + assert_eq!((*params.inner).min_ann_rows, 256); + assert!((*params.inner).create_ann_index_on_extend); + } + } + + #[test] + fn test_embedded_cagra_params_retained() { + let cagra = CagraIndexParams::new().unwrap().set_graph_degree(32); + let params = IndexParams::new() + .unwrap() + .set_algo(AnnAlgo::CUVS_TIERED_INDEX_ALGO_CAGRA) + .set_cagra_params(cagra); + + // The embedded cagra params pointer must be live (not dangling) because + // IndexParams retains ownership. + unsafe { + assert!(!(*params.inner).cagra_params.is_null()); + assert_eq!((*(*params.inner).cagra_params).graph_degree, 32); + } + } +} diff --git a/rust/cuvs/src/tiered_index/mod.rs b/rust/cuvs/src/tiered_index/mod.rs new file mode 100644 index 0000000000..bf43747bd4 --- /dev/null +++ b/rust/cuvs/src/tiered_index/mod.rs @@ -0,0 +1,79 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +//! The tiered index couples a brute-force tier that absorbs incremental inserts +//! with an ANN tier (CAGRA by default; IVF-Flat and IVF-PQ are also available). +//! +//! Vectors added with [`Index::extend`] land in the brute-force tier and are +//! immediately searchable — even before the ANN tier has been (re)built. Once +//! the incremental tier exceeds `min_ann_rows`, the ANN tier is built (or +//! rebuilt on extend when `create_ann_index_on_extend` is set). +//! +//! The C API does not provide serialize/deserialize for the tiered index, so +//! this module does not expose persistence. +//! +//! Example: +//! ``` +//! +//! use cuvs::tiered_index::{AnnAlgo, Index, IndexParams, SearchParams}; +//! use cuvs::{ManagedTensor, Resources, Result}; +//! +//! use ndarray::s; +//! use ndarray_rand::rand_distr::Uniform; +//! use ndarray_rand::RandomExt; +//! +//! fn tiered_index_example() -> Result<()> { +//! let res = Resources::new()?; +//! +//! // Create a new random dataset to index +//! let n_datapoints = 1024; +//! let n_features = 16; +//! let dataset = +//! ndarray::Array::::random((n_datapoints, n_features), Uniform::new(0., 1.0)); +//! +//! // Build the tiered index, backed by CAGRA for its ANN tier +//! let build_params = IndexParams::new()? +//! .set_algo(AnnAlgo::CUVS_TIERED_INDEX_ALGO_CAGRA) +//! .set_min_ann_rows(128) +//! .set_create_ann_index_on_extend(true); +//! let dataset_device = ManagedTensor::from(&dataset).to_device(&res)?; +//! let index = Index::build(&res, &build_params, dataset_device)?; +//! +//! // Add new vectors after build: they are immediately searchable. +//! let new_vectors = +//! ndarray::Array::::random((8, n_features), Uniform::new(0., 1.0)); +//! let new_device = ManagedTensor::from(&new_vectors).to_device(&res)?; +//! index.extend(&res, new_device)?; +//! +//! // Search using the first 4 points from the dataset as queries +//! let n_queries = 4; +//! let queries = dataset.slice(s![0..n_queries, ..]); +//! let queries = ManagedTensor::from(&queries).to_device(&res)?; +//! +//! let k = 10; +//! let mut neighbors_host = ndarray::Array::::zeros((n_queries, k)); +//! let neighbors = ManagedTensor::from(&neighbors_host).to_device(&res)?; +//! let mut distances_host = ndarray::Array::::zeros((n_queries, k)); +//! let distances = ManagedTensor::from(&distances_host).to_device(&res)?; +//! +//! // The search params variant must match the index's ANN backend. +//! let search_params = SearchParams::Cagra(cuvs::cagra::SearchParams::new()?); +//! index.search(&res, &search_params, &queries, &neighbors, &distances)?; +//! +//! neighbors.to_host(&res, &mut neighbors_host)?; +//! distances.to_host(&res, &mut distances_host)?; +//! println!("Neighbors {:?}", neighbors_host); +//! println!("Distances {:?}", distances_host); +//! Ok(()) +//! } +//! ``` + +mod index; +mod index_params; +mod search_params; + +pub use index::Index; +pub use index_params::{AnnAlgo, IndexParams}; +pub use search_params::SearchParams; diff --git a/rust/cuvs/src/tiered_index/search_params.rs b/rust/cuvs/src/tiered_index/search_params.rs new file mode 100644 index 0000000000..635a9d07eb --- /dev/null +++ b/rust/cuvs/src/tiered_index/search_params.rs @@ -0,0 +1,48 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +use crate::tiered_index::AnnAlgo; + +/// Search parameters for a [`crate::tiered_index::Index`]. +/// +/// The variant must match the ANN backend the index was built with. The C API +/// (`cuvsTieredIndexSearch`) takes the backend's search params as an opaque +/// `void*` and reinterprets it according to the index's build-time algorithm — +/// passing the wrong variant would reinterpret the pointer as the wrong struct +/// (undefined behavior). [`crate::tiered_index::Index::search`] guards against +/// this by validating the variant against the index's algo and returning +/// [`crate::error::Error::InvalidArgument`] on mismatch. +#[derive(Debug)] +pub enum SearchParams { + /// Search params for a CAGRA-backed tiered index. + Cagra(crate::cagra::SearchParams), + /// Search params for an IVF-Flat-backed tiered index. + IvfFlat(crate::ivf_flat::SearchParams), + /// Search params for an IVF-PQ-backed tiered index. + IvfPq(crate::ivf_pq::SearchParams), +} + +impl SearchParams { + /// The ANN backend this variant targets, for validation against the index's + /// build-time algorithm. + pub(crate) fn algo(&self) -> AnnAlgo { + match self { + SearchParams::Cagra(_) => AnnAlgo::CUVS_TIERED_INDEX_ALGO_CAGRA, + SearchParams::IvfFlat(_) => AnnAlgo::CUVS_TIERED_INDEX_ALGO_IVF_FLAT, + SearchParams::IvfPq(_) => AnnAlgo::CUVS_TIERED_INDEX_ALGO_IVF_PQ, + } + } + + /// Raw pointer to the backend search params struct, type-erased to the + /// opaque `void*` the C API expects. The caller must ensure the variant + /// matches the index's build-time algorithm. + pub(crate) fn as_void_ptr(&self) -> *mut std::os::raw::c_void { + match self { + SearchParams::Cagra(p) => p.0 as *mut _, + SearchParams::IvfFlat(p) => p.0 as *mut _, + SearchParams::IvfPq(p) => p.0 as *mut _, + } + } +}