diff --git a/c/include/cuvs/neighbors/cagra.h b/c/include/cuvs/neighbors/cagra.h index 22809da37e..7d31553165 100644 --- a/c/include/cuvs/neighbors/cagra.h +++ b/c/include/cuvs/neighbors/cagra.h @@ -211,12 +211,6 @@ struct cuvsCagraIndexParams { enum cuvsCagraGraphBuildAlgo build_algo; /** Number of Iterations to run if building with NN_DESCENT */ size_t nn_descent_niter; - /** - * Optional: specify compression parameters if compression is desired. - * - * NOTE: this is experimental new API, consider it unsafe. - */ - cuvsCagraCompressionParams_t compression; /** * Optional: specify graph build params based on build_algo * - IVF_PQ: cuvsIvfPqParams_t @@ -466,13 +460,15 @@ CUVS_EXPORT cuvsError_t cuvsCagraSearchParamsDestroy(cuvsCagraSearchParams_t par */ /** - * @brief Struct to hold address of cuvs::neighbors::cagra::index and its active trained dtype + * @brief Struct holding the CAGRA index storage address and vector element dtype (DLPack-style) * + * Matches the usual cuVS C index pattern (`addr` + `dtype`). \p addr points at implementation-owned + * storage (not always a bare `cagra::index*`); free only via \ref cuvsCagraIndexDestroy. \p dtype + * describes index vector elements for queries and template dispatch. */ typedef struct { uintptr_t addr; DLDataType dtype; - } cuvsCagraIndex; typedef cuvsCagraIndex* cuvsCagraIndex_t; diff --git a/c/src/neighbors/cagra.cpp b/c/src/neighbors/cagra.cpp index 004b810c78..7cc41568f7 100644 --- a/c/src/neighbors/cagra.cpp +++ b/c/src/neighbors/cagra.cpp @@ -6,9 +6,14 @@ #include #include #include +#include +#include +#include +#include #include #include +#include #include #include #include @@ -19,6 +24,7 @@ #include #include #include +#include #include "../core/exceptions.hpp" #include "../core/interop.hpp" @@ -28,6 +34,79 @@ namespace { +/** + * Heap-allocated bundle for the C API: owns `cagra::index` and any co-owned device storage + * when the index is not standalone. Lives behind `cuvsCagraIndex::addr` via `cagra_c_api_index_box`. + */ +template +struct cuvs_cagra_c_api_lifetime_holder { + /** Owns padded device dataset bytes when `DatasetViewT` is padded and the index is non-owning. */ + std::unique_ptr> padded_dataset_owner{ + nullptr}; + raft::device_matrix dataset; + cuvs::neighbors::cagra::index idx; + /** Physical merge: owns merge buffers viewed by `idx` after `cagra::merge`. */ + std::optional> merge_storage{}; +}; + +/** Owns how to delete co-located index storage; `cuvsCagraIndex::addr` points here. */ +struct cagra_c_api_index_box { + void* index_ptr; + void* owner; + void (*destroy_owner)(void*); + void* (*try_lifetime_holder_for_extend)(void* owner); +}; + +template +static void destroy_standalone_cagra_index(void* owner) +{ + delete reinterpret_cast*>(owner); +} + +template +static void destroy_c_api_holder(void* owner) +{ + delete reinterpret_cast*>(owner); +} + +static void* extend_holder_none(void*) { return nullptr; } + +static void* extend_holder_self(void* owner) { return owner; } + +template +static void assign_standalone_index(cuvsCagraIndex_t out, + DLDataType dtype, + cuvs::neighbors::cagra::index* raw) +{ + auto* box = new cagra_c_api_index_box{raw, + raw, + &destroy_standalone_cagra_index, + &extend_holder_none}; + out->addr = reinterpret_cast(box); + out->dtype = dtype; +} + +template +static void assign_lifetime_holder(cuvsCagraIndex_t out, + DLDataType dtype, + cuvs_cagra_c_api_lifetime_holder* holder) +{ + auto* box = new cagra_c_api_index_box{&holder->idx, + holder, + &destroy_c_api_holder, + &extend_holder_self}; + out->addr = reinterpret_cast(box); + out->dtype = dtype; +} + +static void destroy_cagra_c_api_box(uintptr_t addr) +{ + if (addr == 0) { return; } + auto* box = reinterpret_cast(addr); + box->destroy_owner(box->owner); + delete box; +} + static void _set_graph_build_params( std::variant -void* _build(cuvsResources_t res, cuvsCagraIndexParams params, DLManagedTensor* dataset_tensor) +void _build(cuvsResources_t res, + cuvsCagraIndexParams params, + DLManagedTensor* dataset_tensor, + cuvsCagraIndex_t output_index) { auto dataset = dataset_tensor->dl_tensor; - auto res_ptr = reinterpret_cast(res); - auto index = new cuvs::neighbors::cagra::index(*res_ptr); auto index_params = cuvs::neighbors::cagra::index_params(); convert_c_index_params(params, dataset.shape[0], dataset.shape[1], &index_params); @@ -120,53 +200,152 @@ void* _build(cuvsResources_t res, cuvsCagraIndexParams params, DLManagedTensor* if (cuvs::core::is_dlpack_device_compatible(dataset)) { using mdspan_type = raft::device_matrix_view; auto mds = cuvs::core::from_dlpack(dataset_tensor); - *index = cuvs::neighbors::cagra::build(*res_ptr, index_params, mds); + // Device `cagra::build` requires a row stride compatible with 16-byte alignment; bare DLPack + // buffers (e.g. small dim) are often tightly packed and must be copied via `make_device_padded_dataset`. + if (cuvs::neighbors::matrix_row_width_matches_cagra_required(mds)) { + auto view = cuvs::neighbors::make_device_padded_dataset_view(*res_ptr, mds); + auto index = cuvs::neighbors::cagra::build(*res_ptr, index_params, view); + index.update_dataset(*res_ptr, view); + auto* raw = new cuvs::neighbors::cagra::device_padded_index(std::move(index)); + assign_standalone_index>(output_index, output_index->dtype, raw); + } else { + auto padded = cuvs::neighbors::make_device_padded_dataset(*res_ptr, mds); + auto view = padded->as_dataset_view(); + auto index = cuvs::neighbors::cagra::build(*res_ptr, index_params, view); + index.update_dataset(*res_ptr, view); + auto* holder = new cuvs_cagra_c_api_lifetime_holder>{ + std::move(padded), + raft::device_matrix(*res_ptr), + std::move(index)}; + assign_lifetime_holder>(output_index, output_index->dtype, holder); + } } else if (cuvs::core::is_dlpack_host_compatible(dataset)) { using mdspan_type = raft::host_matrix_view; auto mds = cuvs::core::from_dlpack(dataset_tensor); - *index = cuvs::neighbors::cagra::build(*res_ptr, index_params, mds); + if (std::holds_alternative( + index_params.graph_build_params)) { + // build returns host_padded_index; convert graph to device device_padded_index for the holder. + // Construct the host padded view directly from the tight DLPack mdspan: ACE graph build is + // host-side CPU work and does not require CUDA row-alignment. + cuvs::neighbors::host_padded_dataset_view host_view( + mds, static_cast(mds.extent(1))); + auto host_idx = cuvs::neighbors::cagra::build(*res_ptr, index_params, host_view); + auto device_idx = cuvs::neighbors::cagra::convert_host_to_device_index(*res_ptr, host_idx); + // convert_host_to_device_index now makes an owned copy of the graph (D→H→D), so + // device_idx.graph_ is self-contained and does not borrow from host_idx. + std::unique_ptr> padded_owner = nullptr; + if (host_idx.dataset_fd().has_value()) { + // Disk-mode ACE: transfer all file descriptors from host index to device index so that + // hnsw::from_cagra can detect the disk-backed index and call serialize_to_hnswlib_from_disk. + device_idx.update_dataset(*res_ptr, std::move(*host_idx.steal_dataset_fd())); + if (host_idx.graph_fd().has_value()) { + device_idx.update_graph(*res_ptr, std::move(*host_idx.steal_graph_fd())); + } + if (host_idx.mapping_fd().has_value()) { + device_idx.update_mapping(*res_ptr, std::move(*host_idx.steal_mapping_fd())); + } + } else { + // In-memory ACE: graph-only, attach device dataset. + auto padded = cuvs::neighbors::make_device_padded_dataset(*res_ptr, mds); + device_idx.update_dataset(*res_ptr, padded->as_dataset_view()); + padded_owner = std::move(padded); + } + auto* holder = new cuvs_cagra_c_api_lifetime_holder>{ + std::move(padded_owner), + raft::device_matrix(*res_ptr), + std::move(device_idx)}; + assign_lifetime_holder>(output_index, output_index->dtype, holder); + } else { + auto padded = cuvs::neighbors::make_device_padded_dataset(*res_ptr, mds); + auto view = padded->as_dataset_view(); + auto index = cuvs::neighbors::cagra::build(*res_ptr, index_params, view); + index.update_dataset(*res_ptr, view); + auto* holder = new cuvs_cagra_c_api_lifetime_holder>{ + std::move(padded), + raft::device_matrix(*res_ptr), + std::move(index)}; + assign_lifetime_holder>(output_index, output_index->dtype, holder); + } } - return index; } template -void* _from_args(cuvsResources_t res, - cuvsDistanceType _metric, - DLManagedTensor* graph_tensor, - DLManagedTensor* dataset_tensor) +void _from_args(cuvsResources_t res, + cuvsDistanceType _metric, + DLManagedTensor* graph_tensor, + DLManagedTensor* dataset_tensor, + cuvsCagraIndex_t output_index) { auto metric = static_cast((int)_metric); auto dataset = dataset_tensor->dl_tensor; auto graph = graph_tensor->dl_tensor; auto res_ptr = reinterpret_cast(res); - void* index = NULL; if (cuvs::core::is_dlpack_device_compatible(dataset)) { using mdspan_type = raft::device_matrix_view; auto mds = cuvs::core::from_dlpack(dataset_tensor); - if (cuvs::core::is_dlpack_device_compatible(graph)) { - using graph_mdspan_type = raft::device_matrix_view; - auto graph_mds = cuvs::core::from_dlpack(graph_tensor); - index = new cuvs::neighbors::cagra::index(*res_ptr, metric, mds, graph_mds); + if (cuvs::neighbors::matrix_row_width_matches_cagra_required(mds)) { + auto dataset_view = cuvs::neighbors::make_device_padded_dataset_view(*res_ptr, mds); + void* raw = nullptr; + if (cuvs::core::is_dlpack_device_compatible(graph)) { + using graph_mdspan_type = raft::device_matrix_view; + auto graph_mds = cuvs::core::from_dlpack(graph_tensor); + raw = new cuvs::neighbors::cagra::device_padded_index( + *res_ptr, metric, dataset_view, graph_mds); + } else { + using graph_mdspan_type = raft::host_matrix_view; + auto graph_mds = cuvs::core::from_dlpack(graph_tensor); + raw = new cuvs::neighbors::cagra::device_padded_index( + *res_ptr, metric, dataset_view, graph_mds); + } + assign_standalone_index>(output_index, + output_index->dtype, + reinterpret_cast*>(raw)); } else { - using graph_mdspan_type = raft::host_matrix_view; - auto graph_mds = cuvs::core::from_dlpack(graph_tensor); - index = new cuvs::neighbors::cagra::index(*res_ptr, metric, mds, graph_mds); + // Same as host path and cagra::_build: row pitch must be CAGRA-aligned; copy into a holder. + auto padded = cuvs::neighbors::make_device_padded_dataset(*res_ptr, mds); + auto idx = new cuvs::neighbors::cagra::device_padded_index(*res_ptr, metric); + idx->update_dataset(*res_ptr, padded->as_dataset_view()); + if (cuvs::core::is_dlpack_device_compatible(graph)) { + using graph_mdspan_type = raft::device_matrix_view; + auto graph_mds = cuvs::core::from_dlpack(graph_tensor); + idx->update_graph(*res_ptr, graph_mds); + } else { + using graph_mdspan_type = raft::host_matrix_view; + auto graph_mds = cuvs::core::from_dlpack(graph_tensor); + idx->update_graph(*res_ptr, graph_mds); + } + auto* holder = new cuvs_cagra_c_api_lifetime_holder>{ + std::move(padded), + raft::device_matrix(*res_ptr), + std::move(*idx)}; + delete idx; + assign_lifetime_holder>(output_index, output_index->dtype, holder); } } else if (cuvs::core::is_dlpack_host_compatible(dataset)) { using mdspan_type = raft::host_matrix_view; auto mds = cuvs::core::from_dlpack(dataset_tensor); + // Match build(): rows must be padded to CAGRA's alignment (see make_device_padded_dataset); a tight + // row-major copy (dim * sizeof(T) not a multiple of 16) misaligns vectorized distance loads. + auto padded = cuvs::neighbors::make_device_padded_dataset(*res_ptr, mds); + auto idx = new cuvs::neighbors::cagra::device_padded_index(*res_ptr, metric); + idx->update_dataset(*res_ptr, padded->as_dataset_view()); if (cuvs::core::is_dlpack_device_compatible(graph)) { using graph_mdspan_type = raft::device_matrix_view; auto graph_mds = cuvs::core::from_dlpack(graph_tensor); - index = new cuvs::neighbors::cagra::index(*res_ptr, metric, mds, graph_mds); + idx->update_graph(*res_ptr, graph_mds); } else { using graph_mdspan_type = raft::host_matrix_view; auto graph_mds = cuvs::core::from_dlpack(graph_tensor); - index = new cuvs::neighbors::cagra::index(*res_ptr, metric, mds, graph_mds); + idx->update_graph(*res_ptr, graph_mds); } + auto* holder = new cuvs_cagra_c_api_lifetime_holder>{ + std::move(padded), + raft::device_matrix(*res_ptr), + std::move(*idx)}; + delete idx; + assign_lifetime_holder>(output_index, output_index->dtype, holder); } - return index; } template @@ -176,26 +355,63 @@ void _extend(cuvsResources_t res, DLManagedTensor* additional_dataset_tensor) { auto dataset = additional_dataset_tensor->dl_tensor; - auto index_ptr = reinterpret_cast*>(index.addr); + auto index_ptr = reinterpret_cast*>( + cuvs::neighbors::cagra::cagra_c_api_index_ptr(&index)); auto res_ptr = reinterpret_cast(res); // TODO: use C struct here (see issue #487) auto extend_params = cuvs::neighbors::cagra::extend_params(); extend_params.max_chunk_size = params.max_chunk_size; + auto cur_ds = index_ptr->dataset(); + const auto stride_elems = + cur_ds.stride(0) > 0 ? static_cast(cur_ds.stride(0)) : static_cast(cur_ds.extent(1)); + const auto dim = static_cast(index_ptr->dim()); + const auto initial_rows = static_cast(index_ptr->size()); + + int64_t add_n = 0; if (cuvs::core::is_dlpack_device_compatible(dataset)) { using mdspan_type = raft::device_matrix_view; auto mds = cuvs::core::from_dlpack(additional_dataset_tensor); - cuvs::neighbors::cagra::extend(*res_ptr, extend_params, mds, *index_ptr); + add_n = static_cast(mds.extent(0)); } else if (cuvs::core::is_dlpack_host_compatible(dataset)) { using mdspan_type = raft::host_matrix_view; auto mds = cuvs::core::from_dlpack(additional_dataset_tensor); - cuvs::neighbors::cagra::extend(*res_ptr, extend_params, mds, *index_ptr); + add_n = static_cast(mds.extent(0)); } else { RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d", dataset.dtype.code, dataset.dtype.bits); } + + auto extended_storage = + raft::make_device_matrix(*res_ptr, initial_rows + add_n, stride_elems); + auto ndv_buf = std::optional>( + raft::make_device_strided_matrix_view( + extended_storage.data_handle(), initial_rows + add_n, dim, stride_elems)); + + if (cuvs::core::is_dlpack_device_compatible(dataset)) { + using mdspan_type = raft::device_matrix_view; + auto mds = cuvs::core::from_dlpack(additional_dataset_tensor); + cuvs::neighbors::cagra::extend(*res_ptr, extend_params, mds, *index_ptr, ndv_buf); + } else { + using mdspan_type = raft::host_matrix_view; + auto mds = cuvs::core::from_dlpack(additional_dataset_tensor); + cuvs::neighbors::cagra::extend(*res_ptr, extend_params, mds, *index_ptr, ndv_buf); + } + + auto* box = reinterpret_cast(index.addr); + RAFT_EXPECTS(box != nullptr, + "cuvsCagraExtend: index handle has no storage (build the index first)."); + void* holder_void = box->try_lifetime_holder_for_extend(box->owner); + RAFT_EXPECTS(holder_void != nullptr, + "cuvsCagraExtend: extended dataset storage must be kept alive via the lifetime-holder " + "build path (e.g. host dataset or device dataset copied to a padded buffer)."); + + auto* holder = reinterpret_cast>*>(holder_void); + auto extended_owning = std::make_unique>( + std::move(extended_storage), index_ptr->dim()); + holder->padded_dataset_owner = std::move(extended_owning); } template @@ -208,7 +424,8 @@ void _search(cuvsResources_t res, cuvsFilter filter) { auto res_ptr = reinterpret_cast(res); - auto index_ptr = reinterpret_cast*>(index.addr); + auto index_ptr = reinterpret_cast*>( + cuvs::neighbors::cagra::cagra_c_api_index_ptr(&index)); auto search_params = cuvs::neighbors::cagra::search_params(); convert_c_search_params(params, &search_params); @@ -270,7 +487,8 @@ void _serialize(cuvsResources_t res, bool include_dataset) { auto res_ptr = reinterpret_cast(res); - auto index_ptr = reinterpret_cast*>(index->addr); + auto index_ptr = reinterpret_cast*>( + cuvs::neighbors::cagra::cagra_c_api_index_ptr(index)); cuvs::neighbors::cagra::serialize(*res_ptr, std::string(filename), *index_ptr, include_dataset); } @@ -278,25 +496,43 @@ template void _serialize_to_hnswlib(cuvsResources_t res, const char* filename, cuvsCagraIndex_t index) { auto res_ptr = reinterpret_cast(res); - auto index_ptr = reinterpret_cast*>(index->addr); + auto index_ptr = reinterpret_cast*>( + cuvs::neighbors::cagra::cagra_c_api_index_ptr(index)); cuvs::neighbors::cagra::serialize_to_hnswlib(*res_ptr, std::string(filename), *index_ptr); } template -void* _deserialize(cuvsResources_t res, const char* filename) +void _deserialize(cuvsResources_t res, const char* filename, cuvsCagraIndex_t output_index) { auto res_ptr = reinterpret_cast(res); - auto index = new cuvs::neighbors::cagra::index(*res_ptr); - cuvs::neighbors::cagra::deserialize(*res_ptr, std::string(filename), index); - return index; + auto* holder = new cuvs_cagra_c_api_lifetime_holder>{ + nullptr, + raft::device_matrix(*res_ptr), + cuvs::neighbors::cagra::device_padded_index(*res_ptr)}; + std::unique_ptr> out_dataset; + cuvs::neighbors::cagra::deserialize(*res_ptr, std::string(filename), &holder->idx, &out_dataset); + holder->padded_dataset_owner = std::move(out_dataset); + + // Deserialized strided layout often matches logical dim (tight rows). CAGRA search requires the + // same row width as device builds (see `matrix_row_width_matches_cagra_required` / `update_dataset`). + auto ds = holder->idx.dataset(); + if (ds.extent(0) > 0 && !cuvs::neighbors::matrix_row_width_matches_cagra_required(ds)) { + auto padded = + cuvs::neighbors::make_device_padded_dataset(*res_ptr, ds); + holder->idx.update_dataset(*res_ptr, padded->as_dataset_view()); + holder->padded_dataset_owner = std::move(padded); + } + + assign_lifetime_holder>(output_index, output_index->dtype, holder); } template -void* _merge(cuvsResources_t res, - cuvsCagraIndexParams params, - cuvsCagraIndex_t* indices, - size_t num_indices, - cuvsFilter filter) +void _merge(cuvsResources_t res, + cuvsCagraIndexParams params, + cuvsCagraIndex_t* indices, + size_t num_indices, + cuvsFilter filter, + cuvsCagraIndex_t output_index) { auto res_ptr = reinterpret_cast(res); cuvs::neighbors::cagra::index_params params_cpp; @@ -311,11 +547,13 @@ void* _merge(cuvsResources_t res, int64_t dim = 0; if (params.build_algo == cuvsCagraGraphBuildAlgo::IVF_PQ) { auto first_idx_ptr = - reinterpret_cast*>(indices[0]->addr); + reinterpret_cast*>( + cuvs::neighbors::cagra::cagra_c_api_index_ptr(indices[0])); dim = first_idx_ptr->dim(); for (size_t i = 0; i < num_indices; ++i) { auto idx_ptr = - reinterpret_cast*>(indices[i]->addr); + reinterpret_cast*>( + cuvs::neighbors::cagra::cagra_c_api_index_ptr(indices[i])); total_size += idx_ptr->size(); } } @@ -326,25 +564,44 @@ void* _merge(cuvsResources_t res, total_size, dim); - std::vector*> index_ptrs; + std::vector*> index_ptrs; index_ptrs.reserve(num_indices); for (size_t i = 0; i < num_indices; ++i) { - auto idx_ptr = reinterpret_cast*>(indices[i]->addr); + auto idx_ptr = reinterpret_cast*>( + cuvs::neighbors::cagra::cagra_c_api_index_ptr(indices[i])); index_ptrs.push_back(idx_ptr); } if (filter.type == NO_FILTER) { - return new cuvs::neighbors::cagra::index( - cuvs::neighbors::cagra::merge(*res_ptr, params_cpp, index_ptrs)); + auto merge_storage = + cuvs::neighbors::cagra::make_merged_dataset(*res_ptr, index_ptrs); + auto merged_idx = + cuvs::neighbors::cagra::merge(*res_ptr, params_cpp, index_ptrs, merge_storage); + auto* holder = new cuvs_cagra_c_api_lifetime_holder>{ + nullptr, raft::device_matrix(*res_ptr), std::move(merged_idx)}; + holder->merge_storage = std::move(merge_storage); + assign_lifetime_holder>(output_index, output_index->dtype, holder); } else if (filter.type == BITSET) { - using filter_mdspan_type = raft::device_vector_view; + int64_t merged_row_count = 0; + for (auto* idx_ptr : index_ptrs) { + merged_row_count += static_cast(idx_ptr->size()); + } + using filter_mdspan_type = + raft::device_vector_view; auto removed_indices_tensor = reinterpret_cast(filter.addr); auto removed_indices = cuvs::core::from_dlpack(removed_indices_tensor); cuvs::core::bitset_view removed_indices_bitset( - removed_indices, total_size); - auto bitset_filter_obj = cuvs::neighbors::filtering::bitset_filter(removed_indices_bitset); - return new cuvs::neighbors::cagra::index( - cuvs::neighbors::cagra::merge(*res_ptr, params_cpp, index_ptrs, bitset_filter_obj)); + removed_indices, merged_row_count); + auto bitset_filter_obj = + cuvs::neighbors::filtering::bitset_filter(removed_indices_bitset); + auto merge_storage = + cuvs::neighbors::cagra::make_merged_dataset(*res_ptr, index_ptrs, bitset_filter_obj); + auto merged_idx = cuvs::neighbors::cagra::merge( + *res_ptr, params_cpp, index_ptrs, merge_storage, bitset_filter_obj); + auto* holder = new cuvs_cagra_c_api_lifetime_holder>{ + nullptr, raft::device_matrix(*res_ptr), std::move(merged_idx)}; + holder->merge_storage = std::move(merge_storage); + assign_lifetime_holder>(output_index, output_index->dtype, holder); } else { RAFT_FAIL("Unsupported filter type: BITMAP"); } @@ -353,14 +610,16 @@ void* _merge(cuvsResources_t res, template void get_dataset_view(cuvsCagraIndex_t index, DLManagedTensor* dataset) { - auto index_ptr = reinterpret_cast*>(index->addr); + auto index_ptr = reinterpret_cast*>( + cuvs::neighbors::cagra::cagra_c_api_index_ptr(index)); cuvs::core::to_dlpack(index_ptr->dataset(), dataset); } template void get_graph_view(cuvsCagraIndex_t index, DLManagedTensor* graph) { - auto index_ptr = reinterpret_cast*>(index->addr); + auto index_ptr = reinterpret_cast*>( + cuvs::neighbors::cagra::cagra_c_api_index_ptr(index)); cuvs::core::to_dlpack(index_ptr->graph(), graph); } @@ -445,16 +704,6 @@ void convert_c_index_params(cuvsCagraIndexParams params, out->graph_degree = params.graph_degree; _set_graph_build_params(out->graph_build_params, params, params.build_algo, n_rows, dim); - if (auto* cparams = params.compression; cparams != nullptr) { - auto compression_params = cuvs::neighbors::vpq_params(); - compression_params.pq_bits = cparams->pq_bits; - compression_params.pq_dim = cparams->pq_dim; - compression_params.vq_n_centers = cparams->vq_n_centers; - compression_params.kmeans_n_iters = cparams->kmeans_n_iters; - compression_params.vq_kmeans_trainset_fraction = cparams->vq_kmeans_trainset_fraction; - compression_params.pq_kmeans_trainset_fraction = cparams->pq_kmeans_trainset_fraction; - out->compression.emplace(compression_params); - } } void convert_c_search_params(cuvsCagraSearchParams params, cuvs::neighbors::cagra::search_params* out) @@ -476,33 +725,25 @@ void convert_c_search_params(cuvsCagraSearchParams params, out->persistent_lifetime = params.persistent_lifetime; out->persistent_device_usage = params.persistent_device_usage; } + +void* cagra_c_api_index_ptr(cuvsCagraIndex const* idx) +{ + // Matches `cagra_c_api_index_box::index_ptr` (first member); keep in sync with that layout. + if (idx == nullptr || idx->addr == 0) { return nullptr; } + return *reinterpret_cast(idx->addr); +} } // namespace cuvs::neighbors::cagra extern "C" cuvsError_t cuvsCagraIndexCreate(cuvsCagraIndex_t* index) { - return cuvs::core::translate_exceptions([=] { *index = new cuvsCagraIndex{}; }); + return cuvs::core::translate_exceptions([=] { + *index = new cuvsCagraIndex{0, {}}; + }); } extern "C" cuvsError_t cuvsCagraIndexDestroy(cuvsCagraIndex_t index_c_ptr) { return cuvs::core::translate_exceptions([=] { - auto index = *index_c_ptr; - - if (index.dtype.code == kDLFloat && index.dtype.bits == 32) { - auto index_ptr = - reinterpret_cast*>(index.addr); - delete index_ptr; - } else if (index.dtype.code == kDLFloat && index.dtype.bits == 16) { - auto index_ptr = reinterpret_cast*>(index.addr); - delete index_ptr; - } else if (index.dtype.code == kDLInt && index.dtype.bits == 8) { - auto index_ptr = - reinterpret_cast*>(index.addr); - delete index_ptr; - } else if (index.dtype.code == kDLUInt && index.dtype.bits == 8) { - auto index_ptr = - reinterpret_cast*>(index.addr); - delete index_ptr; - } + destroy_cagra_c_api_box(index_c_ptr->addr); delete index_c_ptr; }); } @@ -510,7 +751,8 @@ extern "C" cuvsError_t cuvsCagraIndexDestroy(cuvsCagraIndex_t index_c_ptr) extern "C" cuvsError_t cuvsCagraIndexGetDims(cuvsCagraIndex_t index, int64_t* dim) { return cuvs::core::translate_exceptions([=] { - auto index_ptr = reinterpret_cast*>(index->addr); + auto index_ptr = reinterpret_cast*>( + cuvs::neighbors::cagra::cagra_c_api_index_ptr(index)); *dim = index_ptr->dim(); }); } @@ -518,7 +760,8 @@ extern "C" cuvsError_t cuvsCagraIndexGetDims(cuvsCagraIndex_t index, int64_t* di extern "C" cuvsError_t cuvsCagraIndexGetSize(cuvsCagraIndex_t index, int64_t* size) { return cuvs::core::translate_exceptions([=] { - auto index_ptr = reinterpret_cast*>(index->addr); + auto index_ptr = reinterpret_cast*>( + cuvs::neighbors::cagra::cagra_c_api_index_ptr(index)); *size = index_ptr->size(); }); } @@ -526,7 +769,8 @@ extern "C" cuvsError_t cuvsCagraIndexGetSize(cuvsCagraIndex_t index, int64_t* si extern "C" cuvsError_t cuvsCagraIndexGetGraphDegree(cuvsCagraIndex_t index, int64_t* graph_degree) { return cuvs::core::translate_exceptions([=] { - auto index_ptr = reinterpret_cast*>(index->addr); + auto index_ptr = reinterpret_cast*>( + cuvs::neighbors::cagra::cagra_c_api_index_ptr(index)); *graph_degree = index_ptr->graph_degree(); }); } @@ -572,15 +816,17 @@ extern "C" cuvsError_t cuvsCagraBuild(cuvsResources_t res, { return cuvs::core::translate_exceptions([=] { auto dataset = dataset_tensor->dl_tensor; - index->dtype = dataset.dtype; + destroy_cagra_c_api_box(index->addr); + index->addr = 0; + index->dtype = dataset.dtype; if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 32) { - index->addr = reinterpret_cast(_build(res, *params, dataset_tensor)); + _build(res, *params, dataset_tensor, index); } else if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 16) { - index->addr = reinterpret_cast(_build(res, *params, dataset_tensor)); + _build(res, *params, dataset_tensor, index); } else if (dataset.dtype.code == kDLInt && dataset.dtype.bits == 8) { - index->addr = reinterpret_cast(_build(res, *params, dataset_tensor)); + _build(res, *params, dataset_tensor, index); } else if (dataset.dtype.code == kDLUInt && dataset.dtype.bits == 8) { - index->addr = reinterpret_cast(_build(res, *params, dataset_tensor)); + _build(res, *params, dataset_tensor, index); } else { RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d", dataset.dtype.code, @@ -597,19 +843,17 @@ extern "C" cuvsError_t cuvsCagraIndexFromArgs(cuvsResources_t res, { return cuvs::core::translate_exceptions([=] { auto dataset = dataset_tensor->dl_tensor; - index->dtype = dataset.dtype; + destroy_cagra_c_api_box(index->addr); + index->addr = 0; + index->dtype = dataset.dtype; if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 32) { - index->addr = - reinterpret_cast(_from_args(res, metric, graph_tensor, dataset_tensor)); + _from_args(res, metric, graph_tensor, dataset_tensor, index); } else if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 16) { - index->addr = - reinterpret_cast(_from_args(res, metric, graph_tensor, dataset_tensor)); + _from_args(res, metric, graph_tensor, dataset_tensor, index); } else if (dataset.dtype.code == kDLInt && dataset.dtype.bits == 8) { - index->addr = - reinterpret_cast(_from_args(res, metric, graph_tensor, dataset_tensor)); + _from_args(res, metric, graph_tensor, dataset_tensor, index); } else if (dataset.dtype.code == kDLUInt && dataset.dtype.bits == 8) { - index->addr = - reinterpret_cast(_from_args(res, metric, graph_tensor, dataset_tensor)); + _from_args(res, metric, graph_tensor, dataset_tensor, index); } else { RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d", dataset.dtype.code, @@ -711,19 +955,17 @@ extern "C" cuvsError_t cuvsCagraMerge(cuvsResources_t res, } RAFT_EXPECTS(output_index != nullptr, "Output index pointer must not be null"); output_index->dtype = dtype; // output index type matches inputs + destroy_cagra_c_api_box(output_index->addr); + output_index->addr = 0; // Dispatch based on data type if (dtype.code == kDLFloat && dtype.bits == 32) { - output_index->addr = - reinterpret_cast(_merge(res, *params, indices, num_indices, filter)); + _merge(res, *params, indices, num_indices, filter, output_index); } else if (dtype.code == kDLFloat && dtype.bits == 16) { - output_index->addr = - reinterpret_cast(_merge(res, *params, indices, num_indices, filter)); + _merge(res, *params, indices, num_indices, filter, output_index); } else if (dtype.code == kDLInt && dtype.bits == 8) { - output_index->addr = - reinterpret_cast(_merge(res, *params, indices, num_indices, filter)); + _merge(res, *params, indices, num_indices, filter, output_index); } else if (dtype.code == kDLUInt && dtype.bits == 8) { - output_index->addr = - reinterpret_cast(_merge(res, *params, indices, num_indices, filter)); + _merge(res, *params, indices, num_indices, filter, output_index); } else { RAFT_FAIL("Unsupported index data type: code=%d, bits=%d", dtype.code, dtype.bits); } @@ -871,6 +1113,9 @@ extern "C" cuvsError_t cuvsCagraDeserialize(cuvsResources_t res, cuvsCagraIndex_t index) { return cuvs::core::translate_exceptions([=] { + destroy_cagra_c_api_box(index->addr); + index->addr = 0; + // read the numpy dtype from the beginning of the file std::ifstream is(filename, std::ios::in | std::ios::binary); if (!is) { RAFT_FAIL("Cannot open file %s", filename); } @@ -884,16 +1129,16 @@ extern "C" cuvsError_t cuvsCagraDeserialize(cuvsResources_t res, index->dtype.bits = dtype.itemsize * 8; if (dtype.kind == 'f' && dtype.itemsize == 4) { - index->addr = reinterpret_cast(_deserialize(res, filename)); + _deserialize(res, filename, index); index->dtype.code = kDLFloat; } else if (dtype.kind == 'e' && dtype.itemsize == 2) { - index->addr = reinterpret_cast(_deserialize(res, filename)); + _deserialize(res, filename, index); index->dtype.code = kDLFloat; } else if (dtype.kind == 'i' && dtype.itemsize == 1) { - index->addr = reinterpret_cast(_deserialize(res, filename)); + _deserialize(res, filename, index); index->dtype.code = kDLInt; } else if (dtype.kind == 'u' && dtype.itemsize == 1) { - index->addr = reinterpret_cast(_deserialize(res, filename)); + _deserialize(res, filename, index); index->dtype.code = kDLUInt; } else { RAFT_FAIL("Unsupported dtype in file %s", filename); diff --git a/c/src/neighbors/cagra.hpp b/c/src/neighbors/cagra.hpp index 689bc0fb7a..eb7ce70b70 100644 --- a/c/src/neighbors/cagra.hpp +++ b/c/src/neighbors/cagra.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #include @@ -15,4 +15,7 @@ void convert_c_index_params(cuvsCagraIndexParams params, /// Converts C search params to C++ void convert_c_search_params(cuvsCagraSearchParams params, cuvs::neighbors::cagra::search_params* out); + +/** Resolves `cuvsCagraIndex::addr` to `cagra::index*`; nullptr if the handle is empty. */ +void* cagra_c_api_index_ptr(cuvsCagraIndex const* idx); } // namespace cuvs::neighbors::cagra diff --git a/c/src/neighbors/hnsw.cpp b/c/src/neighbors/hnsw.cpp index c69eda0ca0..e6b4503e1a 100644 --- a/c/src/neighbors/hnsw.cpp +++ b/c/src/neighbors/hnsw.cpp @@ -20,6 +20,7 @@ #include "../core/exceptions.hpp" #include "../core/interop.hpp" +#include "cagra.hpp" namespace { @@ -63,7 +64,8 @@ void _from_cagra(cuvsResources_t res, std::optional dataset_tensor) { auto res_ptr = reinterpret_cast(res); - auto index = reinterpret_cast*>(cagra_index->addr); + auto index = reinterpret_cast*>( + cuvs::neighbors::cagra::cagra_c_api_index_ptr(cagra_index)); auto cpp_params = cuvs::neighbors::hnsw::index_params(); cpp_params.hierarchy = static_cast(params->hierarchy); cpp_params.ef_construction = params->ef_construction; diff --git a/c/src/neighbors/mg_cagra.cpp b/c/src/neighbors/mg_cagra.cpp index 495eff8a34..1ae9fe2c61 100644 --- a/c/src/neighbors/mg_cagra.cpp +++ b/c/src/neighbors/mg_cagra.cpp @@ -84,26 +84,26 @@ extern "C" cuvsError_t cuvsMultiGpuCagraIndexDestroy(cuvsMultiGpuCagraIndex_t in // Properly clean up the templated inner object based on dtype, like single GPU API if (index->dtype.code == kDLFloat && index->dtype.bits == 32) { auto mg_index_ptr = - reinterpret_cast, + reinterpret_cast, float, uint32_t>*>(index->addr); delete mg_index_ptr; } else if (index->dtype.code == kDLFloat && index->dtype.bits == 16) { auto mg_index_ptr = - reinterpret_cast, + reinterpret_cast, half, uint32_t>*>(index->addr); delete mg_index_ptr; } else if (index->dtype.code == kDLInt && index->dtype.bits == 8) { auto mg_index_ptr = reinterpret_cast< cuvs::neighbors:: - mg_index, int8_t, uint32_t>*>( + mg_index, int8_t, uint32_t>*>( index->addr); delete mg_index_ptr; } else if (index->dtype.code == kDLUInt && index->dtype.bits == 8) { auto mg_index_ptr = reinterpret_cast< cuvs::neighbors:: - mg_index, uint8_t, uint32_t>*>( + mg_index, uint8_t, uint32_t>*>( index->addr); delete mg_index_ptr; } @@ -159,7 +159,7 @@ void* _mg_build(cuvsResources_t res, auto mds = cuvs::core::from_dlpack(dataset_tensor); auto mg_index = - new cuvs::neighbors::mg_index, T, uint32_t>( + new cuvs::neighbors::mg_index, T, uint32_t>( cuvs::neighbors::cagra::build(*res_ptr, mg_params, mds)); return mg_index; @@ -175,7 +175,7 @@ void _mg_search(cuvsResources_t res, { auto res_ptr = reinterpret_cast(res); auto mg_index_ptr = reinterpret_cast< - cuvs::neighbors::mg_index, T, uint32_t>*>( + cuvs::neighbors::mg_index, T, uint32_t>*>( index.addr); auto mg_search_params = @@ -202,7 +202,7 @@ void _mg_extend(cuvsResources_t res, { auto res_ptr = reinterpret_cast(res); auto mg_index_ptr = reinterpret_cast< - cuvs::neighbors::mg_index, T, uint32_t>*>( + cuvs::neighbors::mg_index, T, uint32_t>*>( index.addr); using vectors_mdspan_type = raft::host_matrix_view; @@ -222,7 +222,7 @@ void _mg_serialize(cuvsResources_t res, cuvsMultiGpuCagraIndex index, const char { auto res_ptr = reinterpret_cast(res); auto mg_index_ptr = reinterpret_cast< - cuvs::neighbors::mg_index, T, uint32_t>*>( + cuvs::neighbors::mg_index, T, uint32_t>*>( index.addr); cuvs::neighbors::cagra::serialize(*res_ptr, *mg_index_ptr, std::string(filename)); @@ -233,7 +233,7 @@ void* _mg_deserialize(cuvsResources_t res, const char* filename) { auto res_ptr = reinterpret_cast(res); auto mg_index = - new cuvs::neighbors::mg_index, T, uint32_t>( + new cuvs::neighbors::mg_index, T, uint32_t>( cuvs::neighbors::cagra::deserialize(*res_ptr, std::string(filename))); return mg_index; @@ -244,7 +244,7 @@ void* _mg_distribute(cuvsResources_t res, const char* filename) { auto res_ptr = reinterpret_cast(res); auto mg_index = - new cuvs::neighbors::mg_index, T, uint32_t>( + new cuvs::neighbors::mg_index, T, uint32_t>( cuvs::neighbors::cagra::distribute(*res_ptr, std::string(filename))); return mg_index; diff --git a/c/src/neighbors/tiered_index.cpp b/c/src/neighbors/tiered_index.cpp index 2a7d54b16d..1d5b7a80bc 100644 --- a/c/src/neighbors/tiered_index.cpp +++ b/c/src/neighbors/tiered_index.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -71,7 +71,7 @@ void* _build(cuvsResources_t res, cuvsTieredIndexParams params, DLManagedTensor* case CUVS_TIERED_INDEX_ALGO_CAGRA: { auto build_params = tiered_index::index_params(); convert_c_index_params(params, dataset.shape[0], dataset.shape[1], &build_params); - return new tiered_index::index>( + return new tiered_index::index>( tiered_index::build(*res_ptr, build_params, mds)); } case CUVS_TIERED_INDEX_ALGO_IVF_FLAT: { @@ -219,7 +219,7 @@ extern "C" cuvsError_t cuvsTieredIndexDestroy(cuvsTieredIndex_t index_c_ptr) switch (index.algo) { case CUVS_TIERED_INDEX_ALGO_CAGRA: { auto index_ptr = - reinterpret_cast>*>(index.addr); + reinterpret_cast>*>(index.addr); delete index_ptr; break; } @@ -292,7 +292,7 @@ extern "C" cuvsError_t cuvsTieredIndexSearch(cuvsResources_t res, switch (index.algo) { case CUVS_TIERED_INDEX_ALGO_CAGRA: { - _search>( + _search>( res, search_params, index, queries_tensor, neighbors_tensor, distances_tensor, filter); break; } @@ -336,7 +336,7 @@ extern "C" cuvsError_t cuvsTieredIndexExtend(cuvsResources_t res, auto index = *index_c_ptr; switch (index.algo) { case CUVS_TIERED_INDEX_ALGO_CAGRA: { - _extend>(res, new_vectors, index); + _extend>(res, new_vectors, index); break; } case CUVS_TIERED_INDEX_ALGO_IVF_FLAT: { @@ -363,7 +363,7 @@ extern "C" cuvsError_t cuvsTieredIndexMerge(cuvsResources_t res, switch (indices[0]->algo) { case CUVS_TIERED_INDEX_ALGO_CAGRA: { - _merge>(res, *params, indices, num_indices, output_index); + _merge>(res, *params, indices, num_indices, output_index); break; } case CUVS_TIERED_INDEX_ALGO_IVF_FLAT: { diff --git a/c/tests/neighbors/ann_cagra_c.cu b/c/tests/neighbors/ann_cagra_c.cu index 9c14bbea7d..5f9a060800 100644 --- a/c/tests/neighbors/ann_cagra_c.cu +++ b/c/tests/neighbors/ann_cagra_c.cu @@ -165,12 +165,23 @@ TEST(CagraC, BuildExtendSearch) (main_data_size + additional_data_size + num_queries) * dimensions, stream); rmm::device_uvector random_labels_d( (main_data_size + additional_data_size + num_queries) * dimensions, stream); - raft::random::make_blobs(random_data_d.data(), - random_labels_d.data(), - main_data_size + additional_data_size + num_queries, - dimensions, - 10, - stream); + + raft::random::make_blobs( + random_data_d.data(), + random_labels_d.data(), + main_data_size + additional_data_size + num_queries, + dimensions, + static_cast(10), + stream, + true, + nullptr, + nullptr, + 1.0f, + true, + -10.0f, + 10.0f, + 42ULL, + raft::random::GenPC); // create dataset DLTensor rmm::device_uvector main_d(main_data_size * dimensions, stream); @@ -321,7 +332,6 @@ TEST(CagraC, BuildExtendSearch) cuvsCagraSearch( res, search_params, index, &queries_tensor, &neighbors_tensor, &distances_tensor, filter); - // check neighbors ASSERT_TRUE( cuvs::devArrMatch(min_cols.data_handle(), neighbors_d.data(), 4, cuvs::Compare())); diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh index 677f3a63f1..50bc0de903 100755 --- a/ci/validate_wheel.sh +++ b/ci/validate_wheel.sh @@ -20,12 +20,13 @@ PYDISTCHECK_ARGS=( # PyPI hard limit is 1GiB, but try to keep these as small as possible if [[ "${package_dir}" == "python/libcuvs" ]]; then if [[ "${RAPIDS_CUDA_MAJOR}" == "12" ]]; then + # Cap is below PyPI’s 1 GiB limit; raise when the shipped libcuvs.so grows. PYDISTCHECK_ARGS+=( - --max-allowed-size-compressed '350Mi' + --max-allowed-size-compressed '450Mi' ) else PYDISTCHECK_ARGS+=( - --max-allowed-size-compressed '220Mi' + --max-allowed-size-compressed '270Mi' ) fi elif [[ "${package_dir}" != "python/cuvs" ]]; then diff --git a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h index 57b47d97db..beb6245a6a 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h +++ b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h @@ -330,13 +330,6 @@ void parse_build_param(const nlohmann::json& conf, cuvs::neighbors::cagra::index std::max(params.graph_degree, params.intermediate_graph_degree); } - nlohmann::json comp_search_conf = collect_conf_with_prefix(conf, "compression_"); - if (!comp_search_conf.empty()) { - auto vpq_pams = params.compression.value_or(cuvs::neighbors::vpq_params{}); - parse_build_param(comp_search_conf, vpq_pams); - params.compression.emplace(vpq_pams); - } - if (conf.contains("guarantee_connectivity")) { params.guarantee_connectivity = conf.at("guarantee_connectivity"); } diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_diskann_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_diskann_wrapper.h index 24246feda3..7e38d39ee7 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_cagra_diskann_wrapper.h +++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_diskann_wrapper.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once @@ -10,10 +10,13 @@ #include #include +#include +#include #include "../common/ann_types.hpp" #include "../diskann/diskann_wrapper.h" #include "cuvs_ann_bench_utils.h" +#include #include #include @@ -165,18 +168,28 @@ void cuvs_cagra_diskann::save(const std::string& file) const // try allocating a buffer for the dataset on host try { - const cuvs::neighbors::strided_dataset* strided_dataset = - dynamic_cast*>( - const_cast*>(&cagra_build_.get_index()->data())); - if (strided_dataset == nullptr) { - RAFT_LOG_DEBUG("dynamic_cast to strided_dataset failed"); + auto const* idx_ptr = cagra_build_.get_index(); + std::optional> h_dataset = std::nullopt; + auto const& data_view = idx_ptr->data(); + if constexpr (cuvs::neighbors::is_padded_dataset_view_v>) { + auto const& v = data_view; + auto n_rows = v.n_rows(); + auto dim = v.dim(); + auto stride = v.stride(); + h_dataset.emplace(raft::make_host_matrix(n_rows, dim)); + raft::copy_matrix(h_dataset->data_handle(), + dim, + v.view().data_handle(), + stride, + dim, + n_rows, + raft::resource::get_cuda_stream(handle_)); } else { - auto h_dataset = - raft::make_host_matrix(strided_dataset->n_rows(), strided_dataset->dim()); - raft::copy(h_dataset.data_handle(), - strided_dataset->view().data_handle(), - strided_dataset->n_rows() * strided_dataset->dim(), - raft::resource::get_cuda_stream(handle_)); + RAFT_LOG_DEBUG("dataset serialization: index dataset is not device_padded_dataset_view"); + } + + if (h_dataset.has_value()) { + raft::resource::sync_stream(handle_); std::string dataset_base_file = file + ".data"; std::ofstream dataset_of(dataset_base_file, std::ios::out | std::ios::binary); if (!dataset_of) { RAFT_FAIL("Cannot open file %s", dataset_base_file.c_str()); } @@ -187,7 +200,7 @@ void cuvs_cagra_diskann::save(const std::string& file) const dataset_of.write((char*)&size, sizeof(int)); dataset_of.write((char*)&dim, sizeof(int)); for (int i = 0; i < size; i++) { - dataset_of.write((char*)(h_dataset.data_handle() + i * h_dataset.extent(1)), + dataset_of.write((char*)(h_dataset->data_handle() + i * h_dataset->extent(1)), dim * sizeof(T)); } dataset_of.close(); diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h index db618f6559..dd69e07557 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h +++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h @@ -82,11 +82,7 @@ class cuvs_cagra_hnswlib : public algo, public algo_gpu { template void cuvs_cagra_hnswlib::build(const T* dataset, size_t nrow) { - // when the data set is on host, we can pass it directly to HNSW - bool dataset_is_on_host = raft::get_device_for_address(dataset) == -1; - auto dataset_view = raft::make_host_matrix_view(dataset, nrow, this->dim_); - // convert the index to HNSW format hnsw_index_ = cuvs::neighbors::hnsw::build(handle_, build_param_.hnsw_index_params, dataset_view); } diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h index 87111e4761..7f4c39be57 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h +++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h @@ -74,6 +74,7 @@ enum class CagraMergeType { kPhysical, kLogical }; template class cuvs_cagra : public algo, public algo_gpu { public: + using index_type = cuvs::neighbors::cagra::device_padded_index; using search_param_base = typename algo::search_param; using algo::dim_; using algo::metric_; @@ -162,7 +163,7 @@ class cuvs_cagra : public algo, public algo_gpu { void save_to_hnswlib(const std::string& file) const; std::unique_ptr> copy() override; - auto get_index() const -> const cuvs::neighbors::cagra::index* { return index_.get(); } + auto get_index() const -> const index_type* { return index_.get(); } private: // handle_ must go first to make sure it dies last and all memory allocated in pool @@ -175,7 +176,7 @@ class cuvs_cagra : public algo, public algo_gpu { build_param index_params_; bool need_dataset_update_{true}; cuvs::neighbors::cagra::search_params search_params_; - std::shared_ptr> index_; + std::shared_ptr index_; std::shared_ptr> graph_; std::shared_ptr> dataset_; std::shared_ptr> input_dataset_v_; @@ -188,7 +189,13 @@ class cuvs_cagra : public algo, public algo_gpu { bool dynamic_batching_conservative_dispatch_; std::shared_ptr filter_; - std::vector>> sub_indices_; + std::vector> sub_indices_; + std::shared_ptr>> + sub_dataset_buffers_ = + std::make_shared>>(); + std::shared_ptr> deserialized_dataset_; + std::vector>> + sub_deserialized_datasets_; inline rmm::device_async_resource_ref get_mr(AllocatorType mem_type) { @@ -206,15 +213,87 @@ void cuvs_cagra::build(const T* dataset, size_t nrow) auto dataset_extents = raft::make_extents(nrow, dim_); auto params = index_params_.cagra_params(dataset_extents, parse_metric_type(metric_)); + // Use int64_t throughout so that device copies are compatible with dataset_ (device_matrix) and so that host padded dataset views carry the correct index type. + auto dataset_extents_i64 = + raft::make_extents(static_cast(nrow), static_cast(dim_)); auto dataset_view_host = - raft::make_mdspan(dataset, dataset_extents); - auto dataset_view_device = - raft::make_mdspan(dataset, dataset_extents); + raft::make_mdspan(dataset, dataset_extents_i64); bool dataset_is_on_host = raft::get_device_for_address(dataset) == -1; + // Host mdspan + ace_params: `cagra::build` dispatches to ACE. Non-ACE from host uses padded + // uses `cagra::build(res, params, dataset_view)` with a padded device dataset (or upload + // host data first). Used for both single-split and logical multi-split build paths. + bool const use_ace_host = + dataset_is_on_host && std::holds_alternative( + params.graph_build_params); if (index_params_.num_dataset_splits <= 1) { - index_ = std::make_shared>(std::move( - dataset_is_on_host ? cuvs::neighbors::cagra::build(handle_, params, dataset_view_host) - : cuvs::neighbors::cagra::build(handle_, params, dataset_view_device))); + if (use_ace_host) { + // ACE build is always graph-only; build the graph from a host_padded_dataset_view (required + // by the new build() API), then upload and attach a device padded copy for search. + // The input data may not satisfy CAGRA's per-row alignment; create an owning host-padded + // copy when needed, or a zero-copy view when the stride already matches. + const uint32_t req_stride = + cuvs::neighbors::cagra_required_row_width(static_cast(dim_), 16); + std::unique_ptr> host_padded_own; + std::optional> host_pdv; + if (static_cast(dim_) == req_stride) { + host_pdv = cuvs::neighbors::make_host_padded_dataset_view(dataset_view_host); + } else { + host_padded_own = cuvs::neighbors::make_host_padded_dataset(handle_, dataset_view_host); + host_pdv = host_padded_own->as_dataset_view(); + } + auto ace_host_index = cuvs::neighbors::cagra::build(handle_, params, *host_pdv); + auto padded = cuvs::neighbors::make_device_padded_dataset(handle_, dataset_view_host); + auto ace_index = cuvs::neighbors::cagra::attach_device_dataset_on_host_index( + handle_, ace_host_index, padded->as_dataset_view()); + *dataset_ = std::move(padded->data_); + index_ = std::make_shared(std::move(ace_index)); + } else { + // Non-ACE CAGRA build must use cagra::build(res, params, dataset_view) from + // make_device_padded_dataset / make_device_padded_dataset_view; the host mdspan and raw + // device mdspan entry points are not valid for these graph types. + // Host + non-ACE: copy to a device buffer first, then use the same path + // as a native device pointer. + raft::device_matrix_view mds; + if (dataset_is_on_host) { + *dataset_ = std::move(raft::make_device_matrix( + handle_, static_cast(nrow), static_cast(dim_))); + raft::copy(dataset_->data_handle(), + dataset, + static_cast(nrow) * dim_, + raft::resource::get_cuda_stream(handle_)); + mds = raft::make_device_matrix_view( + dataset_->data_handle(), static_cast(nrow), static_cast(dim_)); + } else { + mds = raft::make_device_matrix_view( + dataset, static_cast(nrow), static_cast(dim_)); + } + const uint32_t required_stride = + cuvs::neighbors::cagra_required_row_width(static_cast(mds.extent(1)), 16); + const uint32_t src_stride = mds.stride(0) > 0 ? static_cast(mds.stride(0)) + : static_cast(mds.extent(1)); + cudaPointerAttributes ptr_attrs{}; + RAFT_CUDA_TRY(cudaPointerGetAttributes(&ptr_attrs, mds.data_handle())); + const bool device_src = (reinterpret_cast(ptr_attrs.devicePointer) != nullptr); + // `cagra::index` is move-only; use a non-const `index` per branch so + // `std::move(index)` moves (a const `index` would try to copy the deleted + // cagra::index copy ctor). + if (device_src && src_stride == required_stride) { + auto const pdv = cuvs::neighbors::make_device_padded_dataset_view(handle_, mds); + *input_dataset_v_ = raft::make_device_matrix_view( + mds.data_handle(), static_cast(nrow), static_cast(dim_)); + auto index = cuvs::neighbors::cagra::build(handle_, params, pdv); + index.update_dataset(handle_, pdv); + index_ = std::make_shared(std::move(index)); + } else { + auto padded = cuvs::neighbors::make_device_padded_dataset(handle_, mds); + auto view = padded->as_dataset_view(); + auto index = cuvs::neighbors::cagra::build(handle_, params, view); + index.update_dataset(handle_, view); + *dataset_ = std::move(padded->data_); + index_ = std::make_shared(std::move(index)); + } + } } else { IdxT rows_per_split = raft::ceildiv(nrow, static_cast(index_params_.num_dataset_splits)); @@ -225,37 +304,120 @@ void cuvs_cagra::build(const T* dataset, size_t nrow) const T* sub_ptr = dataset + static_cast(start) * dim_; auto sub_host = raft::make_host_matrix_view(sub_ptr, rows, dim_); - auto sub_dev = - raft::make_device_matrix_view(sub_ptr, rows, dim_); + auto sub_dev = raft::make_device_matrix_view( + sub_ptr, static_cast(rows), static_cast(dim_)); - auto sub_index = cuvs::neighbors::cagra::index(handle_, params.metric); + auto sub_index = index_type(handle_, params.metric); if (index_params_.merge_type == CagraMergeType::kPhysical) { if (dataset_is_on_host) { - sub_index.update_dataset(handle_, sub_host); + sub_dataset_buffers_->emplace_back( + raft::make_device_matrix(handle_, rows, dim_)); + raft::copy(sub_dataset_buffers_->back().data_handle(), + sub_ptr, + static_cast(rows) * dim_, + raft::resource::get_cuda_stream(handle_)); + cuvs::neighbors::device_padded_dataset_view dv( + raft::make_const_mdspan(sub_dataset_buffers_->back().view()), dim_); + sub_index.update_dataset(handle_, dv); } else { - sub_index.update_dataset(handle_, sub_dev); + if (cuvs::neighbors::matrix_row_width_matches_cagra_required(sub_dev)) { + auto pdv = cuvs::neighbors::make_device_padded_dataset_view(handle_, sub_dev); + sub_index.update_dataset(handle_, pdv); + } else { + auto padded = cuvs::neighbors::make_device_padded_dataset(handle_, sub_dev); + sub_dataset_buffers_->push_back(std::move(padded->data_)); + cuvs::neighbors::device_padded_dataset_view pdv( + raft::make_const_mdspan(sub_dataset_buffers_->back().view()), dim_); + sub_index.update_dataset(handle_, pdv); + } } } if (index_params_.merge_type == CagraMergeType::kLogical) { - if (dataset_is_on_host) { - sub_index = cuvs::neighbors::cagra::build(handle_, params, sub_host); + if (use_ace_host) { + // ACE build is always graph-only; build the graph from a host_padded_dataset_view + // (required by the new build() API), then upload and attach a device padded copy. + const uint32_t req_stride_sub = + cuvs::neighbors::cagra_required_row_width(static_cast(dim_), 16); + std::unique_ptr> host_padded_sub_own; + std::optional> host_pdv_sub; + if (static_cast(dim_) == req_stride_sub) { + host_pdv_sub = cuvs::neighbors::make_host_padded_dataset_view(sub_host); + } else { + host_padded_sub_own = cuvs::neighbors::make_host_padded_dataset(handle_, sub_host); + host_pdv_sub = host_padded_sub_own->as_dataset_view(); + } + auto ace_host_index = cuvs::neighbors::cagra::build(handle_, params, *host_pdv_sub); + auto padded_sub = cuvs::neighbors::make_device_padded_dataset(handle_, sub_host); + sub_index = cuvs::neighbors::cagra::attach_device_dataset_on_host_index( + handle_, ace_host_index, padded_sub->as_dataset_view()); + sub_dataset_buffers_->push_back(std::move(padded_sub->data_)); + } else if (dataset_is_on_host) { + sub_dataset_buffers_->emplace_back(raft::make_device_matrix( + handle_, static_cast(rows), static_cast(dim_))); + raft::copy(sub_dataset_buffers_->back().data_handle(), + sub_ptr, + static_cast(rows) * dim_, + raft::resource::get_cuda_stream(handle_)); + auto mds_sub = raft::make_device_matrix_view( + sub_dataset_buffers_->back().data_handle(), static_cast(rows), dim_); + const uint32_t req_sub = cuvs::neighbors::cagra_required_row_width( + static_cast(mds_sub.extent(1)), 16); + const uint32_t src_sub = mds_sub.stride(0) > 0 ? static_cast(mds_sub.stride(0)) + : static_cast(mds_sub.extent(1)); + cudaPointerAttributes sub_attrs{}; + RAFT_CUDA_TRY(cudaPointerGetAttributes(&sub_attrs, mds_sub.data_handle())); + const bool sub_device = (reinterpret_cast(sub_attrs.devicePointer) != nullptr); + if (sub_device && src_sub == req_sub) { + auto pdv_sub = cuvs::neighbors::make_device_padded_dataset_view(handle_, mds_sub); + sub_index = cuvs::neighbors::cagra::build(handle_, params, pdv_sub); + sub_index.update_dataset(handle_, pdv_sub); + } else { + auto padded_sub = cuvs::neighbors::make_device_padded_dataset(handle_, mds_sub); + auto view = padded_sub->as_dataset_view(); + auto index = cuvs::neighbors::cagra::build(handle_, params, view); + index.update_dataset(handle_, view); + sub_dataset_buffers_->push_back(std::move(padded_sub->data_)); + sub_index = std::move(index); + } } else { - sub_index = cuvs::neighbors::cagra::build(handle_, params, sub_dev); + auto mds_sub = sub_dev; + const uint32_t req_sub = cuvs::neighbors::cagra_required_row_width( + static_cast(mds_sub.extent(1)), 16); + const uint32_t src_sub = mds_sub.stride(0) > 0 ? static_cast(mds_sub.stride(0)) + : static_cast(mds_sub.extent(1)); + cudaPointerAttributes sub_attrs{}; + RAFT_CUDA_TRY(cudaPointerGetAttributes(&sub_attrs, mds_sub.data_handle())); + const bool sub_device = (reinterpret_cast(sub_attrs.devicePointer) != nullptr); + if (sub_device && src_sub == req_sub) { + auto pdv_sub = cuvs::neighbors::make_device_padded_dataset_view(handle_, mds_sub); + sub_index = cuvs::neighbors::cagra::build(handle_, params, pdv_sub); + sub_index.update_dataset(handle_, pdv_sub); + } else { + auto padded_sub = cuvs::neighbors::make_device_padded_dataset(handle_, mds_sub); + auto view = padded_sub->as_dataset_view(); + auto index = cuvs::neighbors::cagra::build(handle_, params, view); + index.update_dataset(handle_, view); + sub_dataset_buffers_->push_back(std::move(padded_sub->data_)); + sub_index = std::move(index); + } } } - auto sub_index_shared = - std::make_shared>(std::move(sub_index)); + auto sub_index_shared = std::make_shared(std::move(sub_index)); sub_indices_.push_back(std::move(sub_index_shared)); } if (index_params_.merge_type == CagraMergeType::kPhysical) { - std::vector*> indices; + std::vector indices; indices.reserve(sub_indices_.size()); for (auto& ptr : sub_indices_) { indices.push_back(ptr.get()); } - index_ = std::make_shared>( - std::move(cuvs::neighbors::cagra::merge(handle_, params, indices))); + cuvs::neighbors::filtering::none_sample_filter merge_row_filter; + auto merge_storage = + cuvs::neighbors::cagra::make_merged_dataset(handle_, indices, merge_row_filter); + index_ = std::make_shared( + cuvs::neighbors::cagra::merge(handle_, params, indices, merge_storage, merge_row_filter)); + *dataset_ = std::move(merge_storage.merged_storage); } } } @@ -315,7 +477,9 @@ void cuvs_cagra::set_search_param(const search_param_base& param, // First free up existing memory *dataset_ = raft::make_device_matrix(handle_, 0, 0); - index_->update_dataset(handle_, make_const_mdspan(dataset_->view())); + cuvs::neighbors::device_padded_dataset_view empty_dv( + raft::make_device_matrix_view(static_cast(nullptr), 0, this->dim_), this->dim_); + index_->update_dataset(handle_, empty_dv); // Allocate space using the correct memory resource. RAFT_LOG_DEBUG("moving dataset to new memory space: %s", @@ -324,9 +488,11 @@ void cuvs_cagra::set_search_param(const search_param_base& param, auto mr = get_mr(dataset_mem_); cuvs::neighbors::cagra::detail::copy_with_padding(handle_, *dataset_, *input_dataset_v_, mr); - auto dataset_view = raft::make_device_strided_matrix_view( - dataset_->data_handle(), dataset_->extent(0), this->dim_, dataset_->extent(1)); - index_->update_dataset(handle_, dataset_view); + cuvs::neighbors::device_padded_dataset_view dv( + raft::make_device_matrix_view( + dataset_->data_handle(), dataset_->extent(0), dataset_->extent(1)), + this->dim_); + index_->update_dataset(handle_, dv); need_dataset_update_ = false; needs_dynamic_batcher_update = true; @@ -362,6 +528,7 @@ void cuvs_cagra::set_search_dataset(const T* dataset, size_t nrow) if (index_params_.num_dataset_splits > 1 && index_params_.merge_type == CagraMergeType::kLogical) { bool dataset_is_on_host = raft::get_device_for_address(dataset) == -1; + if (dataset_is_on_host) { sub_dataset_buffers_->clear(); } IdxT rows_per_split = raft::ceildiv(nrow, static_cast(index_params_.num_dataset_splits)); for (size_t i = 0; i < sub_indices_.size(); ++i) { @@ -369,32 +536,43 @@ void cuvs_cagra::set_search_dataset(const T* dataset, size_t nrow) if (start >= nrow) break; IdxT rows = std::min(rows_per_split, static_cast(nrow) - start); const T* sub_ptr = dataset + static_cast(start) * dim_; - auto sub_host = - raft::make_host_matrix_view(sub_ptr, rows, dim_); - auto sub_dev = - raft::make_device_matrix_view(sub_ptr, rows, dim_); + auto sub_dev = raft::make_device_matrix_view( + sub_ptr, static_cast(rows), static_cast(dim_)); auto sub_index = sub_indices_[i].get(); if (index_params_.merge_type == CagraMergeType::kLogical) { if (dataset_is_on_host) { - sub_index->update_dataset(handle_, sub_host); + sub_dataset_buffers_->emplace_back( + raft::make_device_matrix(handle_, rows, dim_)); + raft::copy(sub_dataset_buffers_->back().data_handle(), + sub_ptr, + static_cast(rows) * dim_, + raft::resource::get_cuda_stream(handle_)); + cuvs::neighbors::device_padded_dataset_view dv( + raft::make_const_mdspan(sub_dataset_buffers_->back().view()), dim_); + sub_index->update_dataset(handle_, dv); } else { - sub_index->update_dataset(handle_, sub_dev); + if (cuvs::neighbors::matrix_row_width_matches_cagra_required(sub_dev)) { + auto pdv = cuvs::neighbors::make_device_padded_dataset_view(handle_, sub_dev); + sub_index->update_dataset(handle_, pdv); + } else { + auto padded = cuvs::neighbors::make_device_padded_dataset(handle_, sub_dev); + sub_dataset_buffers_->push_back(std::move(padded->data_)); + cuvs::neighbors::device_padded_dataset_view pdv( + raft::make_const_mdspan(sub_dataset_buffers_->back().view()), dim_); + sub_index->update_dataset(handle_, pdv); + } } } } need_dataset_update_ = false; } else { - using ds_idx_type = decltype(index_->data().n_rows()); - bool is_vpq = - dynamic_cast*>(&index_->data()) || - dynamic_cast*>(&index_->data()); // It can happen that we are re-using a previous algo object which already has // the dataset set. Check if we need update. if (static_cast(input_dataset_v_->extent(0)) != nrow || input_dataset_v_->data_handle() != dataset) { *input_dataset_v_ = raft::make_device_matrix_view(dataset, nrow, this->dim_); - need_dataset_update_ = !is_vpq; // ignore update if this is a VPQ dataset. + need_dataset_update_ = true; } } } @@ -412,11 +590,7 @@ void cuvs_cagra::save(const std::string& file) const f << sub_indices_.size(); f.close(); } else { - using ds_idx_type = decltype(index_->data().n_rows()); - bool is_vpq = - dynamic_cast*>(&index_->data()) || - dynamic_cast*>(&index_->data()); - cuvs::neighbors::cagra::serialize(handle_, file, *index_, is_vpq); + cuvs::neighbors::cagra::serialize(handle_, file, *index_, true); } } @@ -437,22 +611,52 @@ void cuvs_cagra::load(const std::string& file) meta >> count; meta.close(); sub_indices_.clear(); + sub_deserialized_datasets_.resize(count); for (size_t i = 0; i < count; ++i) { std::string subfile = file + (i == 0 ? "" : ".subidx." + std::to_string(i)); - auto sub_index = std::make_shared>(handle_); - cuvs::neighbors::cagra::deserialize(handle_, subfile, sub_index.get()); + auto sub_index = std::make_shared(handle_); + std::unique_ptr> tmp_ds; + cuvs::neighbors::cagra::deserialize(handle_, subfile, sub_index.get(), &tmp_ds); + sub_deserialized_datasets_[i] = + std::shared_ptr>(std::move(tmp_ds)); sub_indices_.push_back(std::move(sub_index)); } } else { - index_ = std::make_shared>(handle_); - cuvs::neighbors::cagra::deserialize(handle_, file, index_.get()); + index_ = std::make_shared(handle_); + deserialized_dataset_.reset(); + std::unique_ptr> tmp_ds; + cuvs::neighbors::cagra::deserialize(handle_, file, index_.get(), &tmp_ds); + deserialized_dataset_ = + std::shared_ptr>(std::move(tmp_ds)); } } template std::unique_ptr> cuvs_cagra::copy() { - return std::make_unique>(std::cref(*this)); // use copy constructor + auto out = std::make_unique>(metric_, dim_, index_params_); + out->refine_ratio_ = refine_ratio_; + out->graph_mem_ = graph_mem_; + out->dataset_mem_ = dataset_mem_; + out->need_dataset_update_ = need_dataset_update_; + out->search_params_ = search_params_; + out->index_ = index_; + out->graph_ = graph_; + out->dataset_ = dataset_; + out->input_dataset_v_ = + std::make_shared>( + *input_dataset_v_); + out->dynamic_batcher_ = dynamic_batcher_; + out->dynamic_batcher_sp_ = dynamic_batcher_sp_; + out->dynamic_batching_max_batch_size_ = dynamic_batching_max_batch_size_; + out->dynamic_batching_n_queues_ = dynamic_batching_n_queues_; + out->dynamic_batching_conservative_dispatch_ = dynamic_batching_conservative_dispatch_; + out->filter_ = filter_; + out->sub_indices_ = sub_indices_; + out->sub_dataset_buffers_ = sub_dataset_buffers_; + out->deserialized_dataset_ = deserialized_dataset_; + out->sub_deserialized_datasets_ = sub_deserialized_datasets_; + return out; } template @@ -482,7 +686,7 @@ void cuvs_cagra::search_base( } else { if (index_params_.merge_type == CagraMergeType::kLogical) { // TODO: index merge must happen outside of search, otherwise what are we benchmarking? - std::vector*> cagra_indices; + std::vector cagra_indices; cagra_indices.reserve(sub_indices_.size()); for (auto& ptr : sub_indices_) { cagra_indices.push_back(ptr.get()); diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_cagra_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_mg_cagra_wrapper.h index 1c254c4e7e..078ff8f5fc 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_mg_cagra_wrapper.h +++ b/cpp/bench/ann/src/cuvs/cuvs_mg_cagra_wrapper.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once @@ -77,7 +77,8 @@ class cuvs_mg_cagra : public algo, public algo_gpu { float refine_ratio_; build_param index_params_; cuvs::neighbors::mg_search_params search_params_; - std::shared_ptr, T, IdxT>> + std::shared_ptr< + cuvs::neighbors::mg_index, T, IdxT>> index_; }; @@ -93,9 +94,9 @@ void cuvs_mg_cagra::build(const T* dataset, size_t nrow) auto dataset_view = raft::make_host_matrix_view(dataset, nrow, dim_); auto idx = cuvs::neighbors::cagra::build(clique_, build_params, dataset_view); - index_ = - std::make_shared, T, IdxT>>( - std::move(idx)); + index_ = std::make_shared< + cuvs::neighbors::mg_index, T, IdxT>>( + std::move(idx)); } inline auto allocator_to_string(AllocatorType mem_type) -> std::string; @@ -126,9 +127,9 @@ void cuvs_mg_cagra::save(const std::string& file) const template void cuvs_mg_cagra::load(const std::string& file) { - index_ = - std::make_shared, T, IdxT>>( - std::move(cuvs::neighbors::cagra::deserialize(clique_, file))); + index_ = std::make_shared< + cuvs::neighbors::mg_index, T, IdxT>>( + std::move(cuvs::neighbors::cagra::deserialize(clique_, file))); } template diff --git a/cpp/cmake/patches/faiss-1.14-cuvs-26.08.diff b/cpp/cmake/patches/faiss-1.14-cuvs-26.08.diff new file mode 100644 index 0000000000..eee2746a81 --- /dev/null +++ b/cpp/cmake/patches/faiss-1.14-cuvs-26.08.diff @@ -0,0 +1,262 @@ +diff --git a/faiss/gpu/impl/BinaryCuvsCagra.cu b/faiss/gpu/impl/BinaryCuvsCagra.cu +index b331fdc..c7b5733 100644 +--- a/faiss/gpu/impl/BinaryCuvsCagra.cu ++++ b/faiss/gpu/impl/BinaryCuvsCagra.cu +@@ -58,7 +58,6 @@ BinaryCuvsCagra::BinaryCuvsCagra( + + index_params_.intermediate_graph_degree = intermediate_graph_degree; + index_params_.graph_degree = graph_degree; +- index_params_.attach_dataset_on_build = store_dataset; + + index_params_.metric = cuvs::distance::DistanceType::BitwiseHamming; + +@@ -110,12 +109,14 @@ BinaryCuvsCagra::BinaryCuvsCagra( + auto dataset_mds = + raft::make_device_matrix_view( + train_dataset, n, dim / 8); ++ auto dataset_view = ++ cuvs::neighbors::make_device_padded_dataset_view(raft_handle, dataset_mds); + + cuvs_index = std::make_shared< + cuvs::neighbors::cagra::index>( + raft_handle, + cuvs::distance::DistanceType::BitwiseHamming, +- dataset_mds, ++ dataset_view, + raft::make_const_mdspan(knn_graph_copy.view())); + } else if (!distances_on_gpu && !knn_graph_on_gpu) { + // copy idx_t (int64_t) host knn_graph to uint32_t host knn_graph +@@ -128,12 +129,14 @@ BinaryCuvsCagra::BinaryCuvsCagra( + + auto dataset_mds = raft::make_host_matrix_view( + train_dataset, n, dim / 8); ++ host_to_device_dataset_ = ++ cuvs::neighbors::make_device_padded_dataset(raft_handle, dataset_mds); + + cuvs_index = std::make_shared< + cuvs::neighbors::cagra::index>( + raft_handle, + cuvs::distance::DistanceType::BitwiseHamming, +- dataset_mds, ++ host_to_device_dataset_->as_dataset_view(), + raft::make_const_mdspan(knn_graph_copy.view())); + } else { + FAISS_THROW_MSG( +@@ -166,17 +169,23 @@ void BinaryCuvsCagra::train(idx_t n, const uint8_t* x) { + if (getDeviceForAddress(x) >= 0) { + auto dataset = raft::make_device_matrix_view( + x, n, dim_ / 8); ++ auto dataset_view = ++ cuvs::neighbors::make_device_padded_dataset_view(raft_handle, dataset); + cuvs_index = std::make_shared< + cuvs::neighbors::cagra::index>( + cuvs::neighbors::cagra::build( +- raft_handle, index_params_, dataset)); ++ raft_handle, index_params_, dataset_view)); ++ store_dataset_ = true; + } else { + auto dataset = raft::make_host_matrix_view( + x, n, dim_ / 8); ++ host_to_device_dataset_ = ++ cuvs::neighbors::make_device_padded_dataset(raft_handle, dataset); + cuvs_index = std::make_shared< + cuvs::neighbors::cagra::index>( + cuvs::neighbors::cagra::build( +- raft_handle, index_params_, dataset)); ++ raft_handle, index_params_, ++ host_to_device_dataset_->as_dataset_view())); + } + } + +@@ -212,14 +221,20 @@ void BinaryCuvsCagra::search( + + if (!store_dataset_) { + if (getDeviceForAddress(storage_) >= 0) { ++ host_to_device_dataset_.reset(); + auto dataset = + raft::make_device_matrix_view( + storage_, n_, dim_ / 8); +- cuvs_index->update_dataset(raft_handle, dataset); ++ auto dataset_view = ++ cuvs::neighbors::make_device_padded_dataset_view(raft_handle, dataset); ++ cuvs_index->update_dataset(raft_handle, dataset_view); + } else { +- auto dataset = raft::make_host_matrix_view( ++ auto host_dataset = raft::make_host_matrix_view( + storage_, n_, dim_ / 8); +- cuvs_index->update_dataset(raft_handle, dataset); ++ host_to_device_dataset_ = ++ cuvs::neighbors::make_device_padded_dataset(raft_handle, host_dataset); ++ cuvs_index->update_dataset(raft_handle, ++ host_to_device_dataset_->as_dataset_view()); + } + store_dataset_ = true; + } +@@ -280,6 +295,7 @@ void BinaryCuvsCagra::search( + + void BinaryCuvsCagra::reset() { + cuvs_index.reset(); ++ host_to_device_dataset_.reset(); + } + + idx_t BinaryCuvsCagra::get_knngraph_degree() const { +diff --git a/faiss/gpu/impl/BinaryCuvsCagra.cuh b/faiss/gpu/impl/BinaryCuvsCagra.cuh +index a14480b..7cbfe39 100644 +--- a/faiss/gpu/impl/BinaryCuvsCagra.cuh ++++ b/faiss/gpu/impl/BinaryCuvsCagra.cuh +@@ -28,11 +28,13 @@ + #include + #include + #include ++#include + #include + + #include + + #include ++#include + + namespace faiss { + +@@ -115,6 +117,10 @@ class BinaryCuvsCagra { + /// Parameters to build CAGRA graph using NN Descent + size_t nn_descent_niter_ = 20; + ++ /// Device padded copy when `storage_` is host memory (KNN-graph ctor path). ++ std::unique_ptr> ++ host_to_device_dataset_; ++ + /// Instance of trained cuVS CAGRA index + std::shared_ptr> + cuvs_index{nullptr}; +diff --git a/faiss/gpu/impl/CuvsCagra.cu b/faiss/gpu/impl/CuvsCagra.cu +index 755817f..0eb03ae 100644 +--- a/faiss/gpu/impl/CuvsCagra.cu ++++ b/faiss/gpu/impl/CuvsCagra.cu +@@ -75,7 +75,6 @@ CuvsCagra::CuvsCagra( + + index_params_.intermediate_graph_degree = intermediate_graph_degree; + index_params_.graph_degree = graph_degree; +- index_params_.attach_dataset_on_build = store_dataset; + index_params_.guarantee_connectivity = guarantee_connectivity; + + if (!ivf_pq_search_params_) { +@@ -133,12 +132,14 @@ CuvsCagra::CuvsCagra( + + auto dataset_mds = raft::make_device_matrix_view( + dataset, n, dim); ++ auto dataset_view = ++ cuvs::neighbors::make_device_padded_dataset_view(raft_handle, dataset_mds); + + cuvs_index = std::make_shared< + cuvs::neighbors::cagra::index>( + raft_handle, + metricFaissToCuvs(metric_, false), +- dataset_mds, ++ dataset_view, + raft::make_const_mdspan(knn_graph_copy.view())); + } else if (!dataset_on_gpu && !knn_graph_on_gpu) { + // copy idx_t (int64_t) host knn_graph to uint32_t host knn_graph +@@ -151,12 +152,14 @@ CuvsCagra::CuvsCagra( + + auto dataset_mds = raft::make_host_matrix_view( + dataset, n, dim); ++ host_to_device_dataset_ = ++ cuvs::neighbors::make_device_padded_dataset(raft_handle, dataset_mds); + + cuvs_index = std::make_shared< + cuvs::neighbors::cagra::index>( + raft_handle, + metricFaissToCuvs(metric_, false), +- dataset_mds, ++ host_to_device_dataset_->as_dataset_view(), + raft::make_const_mdspan(knn_graph_copy.view())); + } else { + FAISS_THROW_MSG( +@@ -203,17 +206,23 @@ void CuvsCagra::train(idx_t n, const data_t* x) { + if (getDeviceForAddress(x) >= 0) { + auto dataset = raft::make_device_matrix_view( + x, n, dim_); ++ auto dataset_view = ++ cuvs::neighbors::make_device_padded_dataset_view(raft_handle, dataset); + cuvs_index = std::make_shared< + cuvs::neighbors::cagra::index>( + cuvs::neighbors::cagra::build( +- raft_handle, index_params_, dataset)); ++ raft_handle, index_params_, dataset_view)); ++ store_dataset_ = true; + } else { + auto dataset = + raft::make_host_matrix_view(x, n, dim_); ++ host_to_device_dataset_ = ++ cuvs::neighbors::make_device_padded_dataset(raft_handle, dataset); + cuvs_index = std::make_shared< + cuvs::neighbors::cagra::index>( + cuvs::neighbors::cagra::build( +- raft_handle, index_params_, dataset)); ++ raft_handle, index_params_, ++ host_to_device_dataset_->as_dataset_view())); + } + } + +@@ -248,13 +257,19 @@ void CuvsCagra::search( + + if (!store_dataset_) { + if (getDeviceForAddress(storage_) >= 0) { ++ host_to_device_dataset_.reset(); + auto dataset = raft::make_device_matrix_view( + storage_, n_, dim_); +- cuvs_index->update_dataset(raft_handle, dataset); ++ auto dataset_view = ++ cuvs::neighbors::make_device_padded_dataset_view(raft_handle, dataset); ++ cuvs_index->update_dataset(raft_handle, dataset_view); + } else { +- auto dataset = raft::make_host_matrix_view( ++ auto host_dataset = raft::make_host_matrix_view( + storage_, n_, dim_); +- cuvs_index->update_dataset(raft_handle, dataset); ++ host_to_device_dataset_ = ++ cuvs::neighbors::make_device_padded_dataset(raft_handle, host_dataset); ++ cuvs_index->update_dataset(raft_handle, ++ host_to_device_dataset_->as_dataset_view()); + } + store_dataset_ = true; + } +@@ -303,6 +318,7 @@ void CuvsCagra::search( + template + void CuvsCagra::reset() { + cuvs_index.reset(); ++ host_to_device_dataset_.reset(); + } + + template +diff --git a/faiss/gpu/impl/CuvsCagra.cuh b/faiss/gpu/impl/CuvsCagra.cuh +index a10e9fb..b5c2bcd 100644 +--- a/faiss/gpu/impl/CuvsCagra.cuh ++++ b/faiss/gpu/impl/CuvsCagra.cuh +@@ -27,12 +27,14 @@ + #include + #include + #include ++#include + #include + + #include + + #include + #include ++#include + + namespace faiss { + +@@ -147,6 +149,10 @@ class CuvsCagra { + /// Parameter to use MST optimization to guarantee graph connectivity + bool guarantee_connectivity_ = false; + ++ /// Device padded copy when `storage_` is host memory (KNN-graph ctor path). ++ std::unique_ptr> ++ host_to_device_dataset_; ++ + /// Instance of trained cuVS CAGRA index + std::shared_ptr> cuvs_index{ + nullptr}; diff --git a/cpp/cmake/patches/faiss_override.json b/cpp/cmake/patches/faiss_override.json index fa106bdf89..3d0f79e128 100644 --- a/cpp/cmake/patches/faiss_override.json +++ b/cpp/cmake/patches/faiss_override.json @@ -24,6 +24,11 @@ "file" : "${current_json_dir}/faiss-1.14-cuvs-26.06.diff", "issue" : "Migrate from removed rmm::mr::device_memory_resource to CCCL memory resources", "fixed_in" : "" + }, + { + "file" : "${current_json_dir}/faiss-1.14-cuvs-26.08.diff", + "issue" : "Update Faiss cuVS to be compatible with new Dataset API: update_dataset now takes dataset_view and make_padded_dataset_view must be called beforehand. Loading an index built from a user-provided KNN graph passes dataset_view into cagra::index, not raw mdspan.", + "fixed_in" : "" } ] } diff --git a/cpp/include/cuvs/neighbors/cagra.hpp b/cpp/include/cuvs/neighbors/cagra.hpp index 637e40c340..e9b1b40810 100644 --- a/cpp/include/cuvs/neighbors/cagra.hpp +++ b/cpp/include/cuvs/neighbors/cagra.hpp @@ -5,12 +5,14 @@ #pragma once -#include "common.hpp" #include +#include #include +#include #include #include #include +#include #include #include #include @@ -28,6 +30,8 @@ #include #include +#include +#include #include #include #include @@ -151,12 +155,6 @@ struct index_params : cuvs::neighbors::index_params { size_t intermediate_graph_degree = 128; /** Degree of output graph. */ size_t graph_degree = 64; - /** - * Specify compression parameters if compression is desired. If set, overrides the - * attach_dataset_on_build (and the compressed dataset is always added to the index). - */ - std::optional compression = std::nullopt; - /** Parameters for graph building. * * Set ivf_pq_params, nn_descent_params, ace_params, or iterative_search_params to select the @@ -193,31 +191,32 @@ struct index_params : cuvs::neighbors::index_params { bool guarantee_connectivity = false; /** - * Whether to add the dataset content to the index, i.e.: + * Whether to attach the dataset to the index after graph construction, i.e.: + * + * - `true` (default) means `build` attaches the input dataset as a **non-owning view** to the + * index, so the index is ready to search immediately after `build` returns. The caller is + * responsible for keeping the underlying dataset storage alive for as long as the index is used. + * - `false` means `build` only builds the graph and the caller is expected to attach the dataset + * separately via `cuvs::neighbors::cagra::update_dataset` before searching. + * + * Unlike the legacy behavior, no copy of the dataset is made: the index always stores a view. + * Setting `attach_dataset_on_build = false` is useful when the caller needs to apply specific + * memory placement or transformation (e.g. moving to managed memory) before attaching. * - * - `true` means the index is filled with the dataset vectors and ready to search after calling - * `build` provided there is enough memory available. - * - `false` means `build` only builds the graph and the user is expected to - * update the dataset using cuvs::neighbors::cagra::update_dataset. + * **Note:** this flag is only effective when building from a device dataset view + * (e.g. `device_padded_dataset_view`). For host builds (`host_padded_dataset_view`), it is + * ignored — the returned `host_padded_index` cannot be searched regardless, and the caller must + * always call `attach_device_dataset_on_host_index` to obtain a search-ready device index. * - * Regardless of the value of `attach_dataset_on_build`, the search graph is created using all - * the vectors in the dataset. Setting `attach_dataset_on_build = false` can be useful if - * the user needs to build only the search graph but does not intend to search it using CAGRA - * (e.g. search using another graph search algorithm), or if specific memory placement options - * need to be applied on the dataset before it is attached to the index using `update_dataset`. - * API. * @code{.cpp} - * auto dataset = raft::make_device_matrix(res, n_rows, n_cols); - * // use default index_parameters + * auto dataset = cuvs::neighbors::make_device_padded_dataset(res, host_matrix.view()); * cagra::index_params index_params; - * // update index_params to only build the CAGRA graph + * // Build graph only — caller attaches dataset later. * index_params.attach_dataset_on_build = false; - * auto index = cagra::build(res, index_params, dataset.view()); - * // assert that the dataset is not attached to the index - * ASSERT(index.dataset().extent(0) == 0); - * // update dataset - * index.update_dataset(res, dataset.view()); - * // The index is now ready for search + * auto index = cagra::build(res, index_params, dataset->as_dataset_view()); + * // ASSERT(index.size() == 0); // no dataset yet + * // Attach with a view (storage owned by `dataset`). + * index.update_dataset(res, dataset->as_dataset_view()); * cagra::search(res, search_params, index, queries, neighbors, distances); * @endcode */ @@ -379,6 +378,12 @@ struct extend_params { static_assert(std::is_aggregate_v); static_assert(std::is_aggregate_v); +template > +struct index; + /** * @defgroup cagra_cpp_index CAGRA index type * @{ @@ -392,9 +397,10 @@ static_assert(std::is_aggregate_v); * @tparam T data element type * @tparam IdxT the data type used to store the neighbor indices in the search graph. * It must be large enough to represent values up to dataset.extent(0). + * @tparam DatasetViewT concrete non-owning dataset view type stored by the index * */ -template +template struct CUVS_EXPORT index : cuvs::neighbors::index { using index_params_type = cagra::index_params; using search_params_type = cagra::search_params; @@ -416,7 +422,7 @@ struct CUVS_EXPORT index : cuvs::neighbors::index { /** Total length of the index (number of vectors). */ [[nodiscard]] constexpr inline auto size() const noexcept -> IdxT { - auto data_rows = dataset_->n_rows(); + auto data_rows = dataset_.n_rows(); if (dataset_fd_.has_value()) { return n_rows_; } return data_rows > 0 ? data_rows : graph_view_.extent(0); } @@ -424,7 +430,7 @@ struct CUVS_EXPORT index : cuvs::neighbors::index { /** Dimensionality of the data. */ [[nodiscard]] constexpr inline auto dim() const noexcept -> uint32_t { - return dataset_fd_.has_value() ? dim_ : dataset_->dim(); + return dataset_fd_.has_value() ? dim_ : dataset_.dim(); } /** Graph degree */ [[nodiscard]] constexpr inline auto graph_degree() const noexcept -> uint32_t @@ -432,20 +438,14 @@ struct CUVS_EXPORT index : cuvs::neighbors::index { return dataset_fd_.has_value() ? graph_degree_ : graph_view_.extent(1); } - [[nodiscard]] inline auto dataset() const noexcept + [[nodiscard]] inline auto dataset() const -> raft::device_matrix_view { - auto p = dynamic_cast*>(dataset_.get()); - if (p != nullptr) { return p->view(); } - auto d = dataset_->dim(); - return raft::make_device_strided_matrix_view(nullptr, 0, d, d); + return cuvs::neighbors::cagra::dataset_view_to_strided_device_matrix(dataset_); } - /** Dataset [size, dim] */ - [[nodiscard]] inline auto data() const noexcept -> const cuvs::neighbors::dataset& - { - return *dataset_; - } + /** Non-owning dataset binding stored by the index. */ + [[nodiscard]] inline auto data() const noexcept -> DatasetViewT const& { return dataset_; } /** neighborhood graph [size, graph-degree] */ [[nodiscard]] inline auto graph() const noexcept @@ -471,6 +471,19 @@ struct CUVS_EXPORT index : cuvs::neighbors::index { return dataset_fd_; } + /** + * Move out the dataset file descriptor (for disk-backed index). + * + * Intended for host-to-device index conversion: steal the fd from a host_padded_index and + * then call `update_dataset(res, std::move(*stolen_fd))` on the target device index. + * Clears the stored fd (and leaves n_rows_/dim_ in place for the remaining graph). + */ + [[nodiscard]] inline auto steal_dataset_fd() noexcept + -> std::optional + { + return std::exchange(dataset_fd_, std::nullopt); + } + /** Get the graph file descriptor (for disk-backed index) */ [[nodiscard]] inline auto graph_fd() const noexcept -> const std::optional& @@ -478,6 +491,15 @@ struct CUVS_EXPORT index : cuvs::neighbors::index { return graph_fd_; } + /** + * Move the graph file descriptor out of this index (for transferring ownership to another + * index). Leaves graph_fd_ as nullopt; graph_degree_ remains intact for metadata. + */ + [[nodiscard]] inline auto steal_graph_fd() noexcept -> std::optional + { + return std::exchange(graph_fd_, std::nullopt); + } + /** Get the mapping file descriptor (for disk-backed index) */ [[nodiscard]] inline auto mapping_fd() const noexcept -> const std::optional& @@ -485,6 +507,16 @@ struct CUVS_EXPORT index : cuvs::neighbors::index { return mapping_fd_; } + /** + * Move the mapping file descriptor out of this index (for transferring ownership to another + * index). Leaves mapping_fd_ as nullopt. + */ + [[nodiscard]] inline auto steal_mapping_fd() noexcept + -> std::optional + { + return std::exchange(mapping_fd_, std::nullopt); + } + /** Dataset norms for cosine distance [size] */ [[nodiscard]] inline auto dataset_norms() const noexcept -> std::optional> @@ -502,74 +534,65 @@ struct CUVS_EXPORT index : cuvs::neighbors::index { ~index() = default; /** \endcond */ - /** Construct an empty index. */ + /** Construct a graph-only index with a zero-row dataset view placeholder. */ index(raft::resources const& res, cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded) + requires(cuvs::neighbors::cagra_dataset_view) : cuvs::neighbors::index(), metric_(metric), graph_(raft::make_device_matrix(res, 0, 0)), - dataset_(new cuvs::neighbors::empty_dataset(0)), + dataset_([] { + if constexpr (cuvs::neighbors::is_empty_dataset_view_v) { + return DatasetViewT{0}; + } else if constexpr (cuvs::neighbors::is_device_padded_dataset_view_v) { + auto v = raft::make_device_matrix_view( + static_cast(nullptr), int64_t{0}, uint32_t{0}); + return DatasetViewT(v, uint32_t{0}); + } else if constexpr (cuvs::neighbors::is_host_padded_dataset_view_v) { + auto v = raft::make_host_matrix_view( + static_cast(nullptr), int64_t{0}, uint32_t{0}); + return DatasetViewT(v, uint32_t{0}); + } else if constexpr (cuvs::neighbors::is_vpq_dataset_view_v) { + return DatasetViewT{}; + } else { + static_assert(sizeof(DatasetViewT) == 0, "index: unsupported dataset view type"); + } + }()), dataset_norms_(std::nullopt) { } - /** Construct an index from dataset and knn_graph arrays - * - * If the dataset and graph is already in GPU memory, then the index is just a thin wrapper around - * these that stores a non-owning a reference to the arrays. - * - * The constructor also accepts host arrays. In that case they are copied to the device, and the - * device arrays will be owned by the index. + /** Construct an index from a `dataset_view` and knn_graph. * - * In case the dasates rows are not 16 bytes aligned, then we create a padded copy in device - * memory to ensure alignment for vectorized load. + * Stores a shallow copy of the dataset view. The index stores a **non-owning** view; the caller + * must keep underlying device storage alive for the index lifetime. * - * Usage examples: - * - * - Cagra index is normally created by the cagra::build + * Example — **non-owning** `make_device_padded_dataset_view` (wraps an existing device matrix; + * that matrix must outlive the index): * @code{.cpp} - * using namespace cuvs::neighbors; - * auto dataset = raft::make_host_matrix(n_rows, n_cols); - * load_dataset(dataset.view()); - * // use default index parameters - * cagra::index_params index_params; - * // create and fill the index from a [N, D] dataset - * auto index = cagra::build(res, index_params, dataset); - * // use default search parameters - * cagra::search_params search_params; - * // search K nearest neighbours - * auto neighbors = raft::make_device_matrix(res, n_queries, k); - * auto distances = raft::make_device_matrix(res, n_queries, k); - * cagra::search(res, search_params, index, queries, neighbors.view(), distances.view()); + * raft::device_matrix_view dataset = ...; + * auto view = cuvs::neighbors::make_device_padded_dataset_view(res, dataset); + * auto graph = raft::make_device_matrix_view(...); + * cuvs::neighbors::cagra::device_padded_index idx(res, metric, view, + * raft::make_const_mdspan(graph)); * @endcode - * In the above example, we have passed a host dataset to build. The returned index will own a - * device copy of the dataset and the knn_graph. In contrast, if we pass the dataset as a - * device_mdspan to build, then it will only store a reference to it. * - * - Constructing index using existing knn-graph + * Example — **owning** `make_device_padded_dataset` returns owning storage (`std::unique_ptr`). + * You must + * **keep that object alive** (e.g. hold the `unique_ptr` in a variable or member) for as long as + * the index uses the dataset; the index does not take ownership of the buffer. * @code{.cpp} - * using namespace cuvs::neighbors; - * - * auto dataset = raft::make_device_matrix(res, n_rows, n_cols); - * auto knn_graph = raft::make_device_matrix(res, n_rows, graph_degree); - * - * // custom loading and graph creation - * // load_dataset(dataset.view()); - * // create_knn_graph(knn_graph.view()); - * - * // Wrap the existing device arrays into an index structure - * cagra::index index(res, metric, raft::make_const_mdspan(dataset.view()), - * raft::make_const_mdspan(knn_graph.view())); - * - * // Both knn_graph and dataset objects have to be in scope while the index is used because - * // the index only stores a reference to these. - * cagra::search(res, search_params, index, queries, neighbors, distances); + * auto padded_owner = cuvs::neighbors::make_device_padded_dataset(res, dataset_mdspan); + * auto view = padded_owner->as_dataset_view(); + * cuvs::neighbors::cagra::device_padded_index idx(res, metric, view, + * raft::make_const_mdspan(graph)); + * // `padded_owner` must outlive `idx` (do not let it go out of scope while `idx` is used). * @endcode */ - template + template index(raft::resources const& res, cuvs::distance::DistanceType metric, - raft::mdspan, raft::row_major, data_accessor> dataset, + DatasetViewT const& dataset, raft::mdspan, raft::row_major, @@ -577,104 +600,33 @@ struct CUVS_EXPORT index : cuvs::neighbors::index { : cuvs::neighbors::index(), metric_(metric), graph_(raft::make_device_matrix(res, 0, 0)), - dataset_(make_aligned_dataset(res, dataset, 16)), + dataset_(dataset), dataset_norms_(std::nullopt) { - RAFT_EXPECTS(dataset.extent(0) == knn_graph.extent(0), + RAFT_EXPECTS(dataset.n_rows() == static_cast(knn_graph.extent(0)), "Dataset and knn_graph must have equal number of rows"); update_graph(res, knn_graph); if (metric_ == cuvs::distance::DistanceType::CosineExpanded) { - auto p = dynamic_cast*>(dataset_.get()); - if (p) { - auto dataset_view = p->view(); - if (dataset_view.extent(0) > 0) { compute_dataset_norms_(res); } - } + if (dataset.n_rows() > 0) { compute_dataset_norms_(res); } } raft::resource::sync_stream(res); } /** - * Replace the dataset with a new dataset. - * - * If the new dataset rows are aligned on 16 bytes, then only a reference is stored to the - * dataset. It is the caller's responsibility to ensure that dataset stays alive as long as the - * index. It is expected that the same set of vectors are used for update_dataset and index build. + * Replace the dataset with a new `dataset_view`. * - * Note: This will clear any precomputed dataset norms. + * The index stores a copy of the view handle only (not the vector storage). The caller must + * keep the underlying device data alive. Clears precomputed norms. */ - void update_dataset(raft::resources const& res, - raft::device_matrix_view dataset) - { - dataset_ = make_aligned_dataset(res, dataset, 16); - dataset_norms_.reset(); - - if (metric() == cuvs::distance::DistanceType::CosineExpanded) { - if (dataset.extent(0) > 0) { compute_dataset_norms_(res); } - } - } - - /** Set the dataset reference explicitly to a device matrix view with padding. */ - void update_dataset(raft::resources const& res, - raft::device_matrix_view dataset) + void update_dataset(raft::resources const& res, DatasetViewT const& dataset) + requires cuvs::neighbors::is_device_dataset_view_v { - dataset_ = make_aligned_dataset(res, dataset, 16); + dataset_ = dataset; dataset_norms_.reset(); - if (metric() == cuvs::distance::DistanceType::CosineExpanded) { - if (dataset.extent(0) > 0) { compute_dataset_norms_(res); } - } - } - - /** - * Replace the dataset with a new dataset. - * - * We create a copy of the dataset on the device. The index manages the lifetime of this copy. It - * is expected that the same set of vectors are used for update_dataset and index build. - * - * Note: This will clear any precomputed dataset norms. - */ - void update_dataset(raft::resources const& res, - raft::host_matrix_view dataset) - { - dataset_ = make_aligned_dataset(res, dataset, 16); - dataset_norms_.reset(); - if (metric() == cuvs::distance::DistanceType::CosineExpanded) { - if (dataset.extent(0) > 0) { compute_dataset_norms_(res); } - } - } - - /** - * Replace the dataset with a new dataset. It is expected that the same set of vectors are used - * for update_dataset and index build. - * - * Note: This will clear any precomputed dataset norms. - */ - template - auto update_dataset(raft::resources const& res, DatasetT&& dataset) - -> std::enable_if_t, DatasetT>> - { - dataset_ = std::make_unique(std::move(dataset)); - dataset_norms_.reset(); - if (metric() == cuvs::distance::DistanceType::CosineExpanded) { - auto p = dynamic_cast*>(dataset_.get()); - if (p) { - auto dataset_view = p->view(); - if (dataset_view.extent(0) > 0) { compute_dataset_norms_(res); } - } - } - } - - template - auto update_dataset(raft::resources const& res, std::unique_ptr&& dataset) - -> std::enable_if_t, DatasetT>> - { - dataset_ = std::move(dataset); - dataset_norms_.reset(); - if (metric() == cuvs::distance::DistanceType::CosineExpanded) { - auto dataset_view = this->dataset(); - if (dataset_view.extent(0) > 0) { compute_dataset_norms_(res); } + if (dataset_.n_rows() > 0) { compute_dataset_norms_(res); } } } @@ -787,7 +739,19 @@ struct CUVS_EXPORT index : cuvs::neighbors::index { // Re-open the file descriptor in read-only mode for subsequent operations dataset_fd_.emplace(std::move(fd)); - dataset_ = std::make_unique>(0); + if constexpr (cuvs::neighbors::is_device_padded_dataset_view_v) { + auto v = raft::make_device_matrix_view( + static_cast(nullptr), int64_t{0}, dim_); + dataset_ = DatasetViewT(v, dim_); + } else if constexpr (cuvs::neighbors::is_host_padded_dataset_view_v) { + auto v = raft::make_host_matrix_view( + static_cast(nullptr), int64_t{0}, dim_); + dataset_ = DatasetViewT(v, dim_); + } else if constexpr (cuvs::neighbors::is_empty_dataset_view_v) { + dataset_ = DatasetViewT{dim_}; + } else { + RAFT_FAIL("update_dataset(fd): unsupported DatasetViewT for disk-backed dataset"); + } dataset_norms_.reset(); } @@ -871,12 +835,11 @@ struct CUVS_EXPORT index : cuvs::neighbors::index { cuvs::distance::DistanceType metric_; raft::device_matrix graph_; raft::device_matrix_view graph_view_; - std::unique_ptr> dataset_; + DatasetViewT dataset_; // Mapping from internal graph node indices to the original user-provided indices. std::optional> source_indices_; // only float distances supported at the moment std::optional> dataset_norms_; - // File descriptors for disk-backed index components (ACE disk mode) std::optional dataset_fd_; std::optional graph_fd_; @@ -888,332 +851,92 @@ struct CUVS_EXPORT index : cuvs::neighbors::index { size_t graph_degree_ = 0; }; -/** - * @} - */ +/** CAGRA index with the usual padded device dataset view (graph build output type). */ +template +using device_padded_index = index>; -/** - * @defgroup cagra_cpp_index_build CAGRA index build functions - * @{ - */ +/** CAGRA index with a host-resident padded dataset view (returned by host build path). */ +template +using host_padded_index = index>; -/** - * @brief Build the index from the dataset for efficient search. - * - * The build consist of two steps: build an intermediate knn-graph, and optimize it to - * create the final graph. The index_params struct controls the node degree of these - * graphs. - * - * The following distance metrics are supported: - * - L2 - * - InnerProduct (currently only supported with IVF-PQ as the build algorithm) - * - CosineExpanded - * - L1 (currently only supported with NN-Descent and Iterative Search as the build algorithm) - * - * Usage example: - * @code{.cpp} - * using namespace cuvs::neighbors; - * // use default index parameters - * cagra::index_params index_params; - * // create and fill the index from a [N, D] dataset - * auto index = cagra::build(res, index_params, dataset); - * // use default search parameters - * cagra::search_params search_params; - * // search K nearest neighbours - * auto neighbors = raft::make_device_matrix(res, n_queries, k); - * auto distances = raft::make_device_matrix(res, n_queries, k); - * cagra::search(res, search_params, index, queries, neighbors.view(), distances.view()); - * @endcode - * - * @param[in] res - * @param[in] params parameters for building the index - * @param[in] dataset a matrix view (device) to a row-major matrix [n_rows, dim] - * - * @return the constructed cagra index - */ -auto build(raft::resources const& res, - const cuvs::neighbors::cagra::index_params& params, - raft::device_matrix_view dataset) - -> cuvs::neighbors::cagra::index; +/** CAGRA index with a device-resident VPQ dataset (f16 codebook vectors). */ +template +using vpq_f16_index = index>; -/** - * @brief Build the index from the dataset for efficient search. - * - * The build consist of two steps: build an intermediate knn-graph, and optimize it to - * create the final graph. The index_params struct controls the node degree of these - * graphs. - * - * The following distance metrics are supported: - * - L2 - * - InnerProduct (currently only supported with IVF-PQ as the build algorithm) - * - CosineExpanded - * - L1 (currently only supported with NN-Descent and Iterative Search as the build algorithm) - * - * Usage example: - * @code{.cpp} - * using namespace cuvs::neighbors; - * // use default index parameters - * cagra::index_params index_params; - * // create and fill the index from a [N, D] dataset - * auto index = cagra::build(res, index_params, dataset); - * // use default search parameters - * cagra::search_params search_params; - * // search K nearest neighbours - * auto neighbors = raft::make_device_matrix(res, n_queries, k); - * auto distances = raft::make_device_matrix(res, n_queries, k); - * cagra::search(res, search_params, index, queries, neighbors.view(), distances.view()); - * @endcode - * - * @param[in] res - * @param[in] params parameters for building the index - * @param[in] dataset a matrix view (host) to a row-major matrix [n_rows, dim] - * - * @return the constructed cagra index - */ -auto build(raft::resources const& res, - const cuvs::neighbors::cagra::index_params& params, - raft::host_matrix_view dataset) - -> cuvs::neighbors::cagra::index; +/** CAGRA index with a device-resident VPQ dataset (f32 codebook vectors). */ +template +using vpq_f32_index = index>; -/** - * @brief Build the index from the dataset for efficient search. - * - * The build consist of two steps: build an intermediate knn-graph, and optimize it to - * create the final graph. The index_params struct controls the node degree of these - * graphs. - * - * The following distance metrics are supported: - * - L2 - * - InnerProduct (currently only supported with IVF-PQ as the build algorithm) - * - CosineExpanded (dataset norms are computed as float regardless of input data type) - * - L1 (currently only supported with NN-Descent and Iterative Search as the build algorithm) - * - * Usage example: - * @code{.cpp} - * using namespace cuvs::neighbors; - * // use default index parameters - * cagra::index_params index_params; - * // create and fill the index from a [N, D] dataset - * auto index = cagra::build(res, index_params, dataset); - * // use default search parameters - * cagra::search_params search_params; - * // search K nearest neighbours - * auto neighbors = raft::make_device_matrix(res, n_queries, k); - * auto distances = raft::make_device_matrix(res, n_queries, k); - * cagra::search(res, search_params, index, queries, neighbors.view(), distances.view()); - * @endcode - * - * @param[in] res - * @param[in] params parameters for building the index - * @param[in] dataset a matrix view (device) to a row-major matrix [n_rows, dim] - * - * @return the constructed cagra index - */ -auto build(raft::resources const& res, - const cuvs::neighbors::cagra::index_params& params, - raft::device_matrix_view dataset) - -> cuvs::neighbors::cagra::index; +/** Index type returned by `cagra::build(res, params, dataset_view)`. */ +template +using cagra_index_t = index, + uint32_t, + cuvs::neighbors::dataset_view_type_t>; /** - * @brief Build the index from the dataset for efficient search. - * - * The build consist of two steps: build an intermediate knn-graph, and optimize it to - * create the final graph. The index_params struct controls the node degree of these - * graphs. - * - * The following distance metrics are supported: - * - L2 - * - CosineExpanded (dataset norms are computed as float regardless of input data type) - * - L1 (currently only supported with NN-Descent and Iterative Search as the build algorithm) - * - * Usage example: - * @code{.cpp} - * using namespace cuvs::neighbors; - * // use default index parameters - * cagra::index_params index_params; - * // create and fill the index from a [N, D] dataset - * auto index = cagra::build(res, index_params, dataset); - * // use default search parameters - * cagra::search_params search_params; - * // search K nearest neighbours - * auto neighbors = raft::make_device_matrix(res, n_queries, k); - * auto distances = raft::make_device_matrix(res, n_queries, k); - * cagra::search(res, search_params, index, queries, neighbors.view(), distances.view()); - * @endcode - * - * @param[in] res - * @param[in] params parameters for building the index - * @param[in] dataset a matrix view (host) to a row-major matrix [n_rows, dim] - * - * @return the constructed cagra index + * @} */ -auto build(raft::resources const& res, - const cuvs::neighbors::cagra::index_params& params, - raft::host_matrix_view dataset) - -> cuvs::neighbors::cagra::index; /** - * @brief Build the index from the dataset for efficient search. - * - * The build consist of two steps: build an intermediate knn-graph, and optimize it to - * create the final graph. The index_params struct controls the node degree of these - * graphs. - * - * The following distance metrics are supported: - * - L2 - * - CosineExpanded (dataset norms are computed as float regardless of input data type) - * - L1 (currently only supported with NN-Descent and Iterative Search as the build algorithm) - * - BitwiseHamming (currently only supported with NN-Descent and Iterative Search as the build - * algorithm, and only for int8_t and uint8_t data types) + * @brief Row counts and strides for a CAGRA merge (metadata only; no GPU storage). * - * Usage example: - * @code{.cpp} - * using namespace cuvs::neighbors; - * // use default index parameters - * cagra::index_params index_params; - * // create and fill the index from a [N, D] dataset - * auto index = cagra::build(res, index_params, dataset); - * // use default search parameters - * cagra::search_params search_params; - * // search K nearest neighbours - * auto neighbors = raft::make_device_matrix(res, n_queries, k); - * auto distances = raft::make_device_matrix(res, n_queries, k); - * cagra::search(res, search_params, index, queries, neighbors.view(), distances.view()); - * @endcode - * - * @param[in] res - * @param[in] params parameters for building the index - * @param[in] dataset a matrix view (device) to a row-major matrix [n_rows, dim] - * - * @return the constructed cagra index + * A populated instance is carried inside `merged_dataset_storage` together with the owning + * device matrices allocated by `make_merged_dataset`. */ -auto build(raft::resources const& res, - const cuvs::neighbors::cagra::index_params& params, - raft::device_matrix_view dataset) - -> cuvs::neighbors::cagra::index; +struct merged_dataset { + int64_t merged_rows{}; ///< Full concatenation row count (staging for merge + filter). + int64_t filtered_rows{}; ///< Dataset rows the merged index will reference (filtered or full). + int64_t stride_elements{}; ///< Row pitch in elements (>= dim, matches input index rows). + uint32_t dim{}; + bool bitset_filtered{}; ///< If true, `merged_dataset_storage` holds a second matrix for rows + ///< after the bitset filter. +}; /** - * @brief Build the index from the dataset for efficient search. - * - * The build consist of two steps: build an intermediate knn-graph, and optimize it to - * create the final graph. The index_params struct controls the node degree of these - * graphs. - * - * The following distance metrics are supported: - * - L2 - * - InnerProduct (currently only supported with IVF-PQ as the build algorithm) - * - CosineExpanded (dataset norms are computed as float regardless of input data type) - * - L1 (currently only supported with NN-Descent and Iterative Search as the build algorithm) - * - BitwiseHamming (currently only supported with NN-Descent and Iterative Search as the build - * algorithm, and only for int8_t and uint8_t data types) - * - * Usage example: - * @code{.cpp} - * using namespace cuvs::neighbors; - * // use default index parameters - * cagra::index_params index_params; - * // create and fill the index from a [N, D] dataset - * auto index = cagra::build(res, index_params, dataset); - * // use default search parameters - * cagra::search_params search_params; - * // search K nearest neighbours - * auto neighbors = raft::make_device_matrix(res, n_queries, k); - * auto distances = raft::make_device_matrix(res, n_queries, k); - * cagra::search(res, search_params, index, queries, neighbors.view(), distances.view()); - * @endcode - * - * @param[in] res - * @param[in] params parameters for building the index - * @param[in] dataset a matrix view (host) to a row-major matrix [n_rows, dim] + * @brief Device storage for a physical CAGRA merge, allocated by `make_merged_dataset`. * - * @return the constructed cagra index + * Owns the full-merge staging matrix (`merged_storage`) and, when `layout.bitset_filtered` is + * true, the filtered output matrix (`filtered_storage`). `merge` writes into these buffers and + * returns an index that views them; keep this object alive while using that index. */ -auto build(raft::resources const& res, - const cuvs::neighbors::cagra::index_params& params, - raft::host_matrix_view dataset) - -> cuvs::neighbors::cagra::index; +template +struct merged_dataset_storage { + merged_dataset layout{}; + raft::device_matrix merged_storage; + std::optional> filtered_storage{}; +}; /** - * @brief Build the index from the dataset for efficient search. - * - * The build consist of two steps: build an intermediate knn-graph, and optimize it to - * create the final graph. The index_params struct controls the node degree of these - * graphs. - * - * The following distance metrics are supported: - * - L2 - * - InnerProduct (currently only supported with IVF-PQ as the build algorithm) - * - CosineExpanded (dataset norms are computed as float regardless of input data type) - * - L1 (currently only supported with NN-Descent and Iterative Search as the build algorithm) - * - BitwiseHamming (currently only supported with NN-Descent and Iterative Search as the build - * algorithm, and only for int8_t and uint8_t data types) - * - * Usage example: - * @code{.cpp} - * using namespace cuvs::neighbors; - * // use default index parameters - * cagra::index_params index_params; - * // create and fill the index from a [N, D] dataset - * auto index = cagra::build(res, index_params, dataset); - * // use default search parameters - * cagra::search_params search_params; - * // search K nearest neighbours - * auto neighbors = raft::make_device_matrix(res, n_queries, k); - * auto distances = raft::make_device_matrix(res, n_queries, k); - * cagra::search(res, search_params, index, queries, neighbors.view(), distances.view()); - * @endcode - * - * @param[in] res - * @param[in] params parameters for building the index - * @param[in] dataset a matrix view (device) to a row-major matrix [n_rows, dim] - * - * @return the constructed cagra index + * @defgroup cagra_cpp_index_build CAGRA index build functions + * @{ */ -auto build(raft::resources const& res, - const cuvs::neighbors::cagra::index_params& params, - raft::device_matrix_view dataset) - -> cuvs::neighbors::cagra::index; /** - * @brief Build the index from the dataset for efficient search. + * @brief Build the index from a `dataset_view` (device padded, device VPQ, or host padded). * - * The build consist of two steps: build an intermediate knn-graph, and optimize it to - * create the final graph. The index_params struct controls the node degree of these - * graphs. + * When `index_params.attach_dataset_on_build = true` (the default) **and the input is a device + * view**, the `dataset` view is stored in the returned index as a **non-owning view** — no copy is + * made. The caller must keep the underlying storage alive for the lifetime of the index. The + * returned index is then ready to search immediately. * - * The following distance metrics are supported: - * - L2 - * - InnerProduct (currently only supported with IVF-PQ as the build algorithm) - * - CosineExpanded (dataset norms are computed as float regardless of input data type) - * - L1 (currently only supported with NN-Descent and Iterative Search as the build algorithm) - * - BitwiseHamming (currently only supported with NN-Descent and Iterative Search as the build - * algorithm, and only for int8_t and uint8_t data types) + * When `index_params.attach_dataset_on_build = false`, or when building from a **host view**, only + * the search graph is built and the returned index holds no dataset. * - * Usage example: - * @code{.cpp} - * using namespace cuvs::neighbors; - * // use default index parameters - * cagra::index_params index_params; - * // create and fill the index from a [N, D] dataset - * auto index = cagra::build(res, index_params, dataset); - * // use default search parameters - * cagra::search_params search_params; - * // search K nearest neighbours - * auto neighbors = raft::make_device_matrix(res, n_queries, k); - * auto distances = raft::make_device_matrix(res, n_queries, k); - * cagra::search(res, search_params, index, queries, neighbors.view(), distances.view()); - * @endcode - * - * @param[in] res - * @param[in] params parameters for building the index - * @param[in] dataset a matrix view (host) to a row-major matrix [n_rows, dim] + * For host views, the returned `host_padded_index` cannot be searched regardless of + * `attach_dataset_on_build` (the flag is ignored). Call `attach_device_dataset_on_host_index` to + * convert it to a device-backed index before search. * - * @return the constructed cagra index + * Note: disk-based ACE builds (`ace_params::use_disk = true`) always set a file-descriptor + * dataset internally (also host-typed); `attach_dataset_on_build` is ignored there too. */ +template + requires(!cuvs::neighbors::is_empty_dataset_view_v && + (cuvs::neighbors::is_device_dataset_view_v || + cuvs::neighbors::is_host_dataset_view_v)) auto build(raft::resources const& res, const cuvs::neighbors::cagra::index_params& params, - raft::host_matrix_view dataset) - -> cuvs::neighbors::cagra::index; + DatasetViewT const& dataset) -> cuvs::neighbors::cagra::cagra_index_t; + /** * @} */ @@ -1223,309 +946,98 @@ auto build(raft::resources const& res, * @{ */ -/** @brief Add new vectors to a CAGRA index - * - * Usage example: - * @code{.cpp} - * using namespace cuvs::neighbors; - * auto additional_dataset = raft::make_device_matrix(handle,add_size,dim); - * // set_additional_dataset(additional_dataset.view()); +// Concrete non-template overloads for all supported index types. +// Previously a single template covered all index types; it has been +// replaced with explicit overloads to maintain a stable non-template ABI. When a new index +// type is added (e.g. a future host_padded_index extend), add a corresponding overload here. +// Index types for which extend is not meaningful (e.g. VPQ — read-only compressed codes) +// are intentionally omitted. + +/** @brief Add new vectors to a CAGRA index. * - * cagra::extend_params params; - * cagra::extend(res, params, raft::make_const_mdspan(additional_dataset.view()), index); - * @endcode + * Only `device_padded_index` supports extend (VPQ and other compressed index types are + * read-only once built and have no extend overload). * * @param[in] handle raft resources * @param[in] params extend params * @param[in] additional_dataset additional dataset on device memory * @param[in,out] idx CAGRA index - * @param[out] new_dataset_buffer_view memory buffer view for the dataset including the additional - * part. The data will be copied from the current index in this function. The num rows must be the - * sum of the original and additional datasets, cols must be the dimension of the dataset, and the - * stride must be the same as the original index dataset. This view will be stored in the output - * index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. - * This option is useful when users want to manage the memory space for the dataset themselves. - * @param[out] new_graph_buffer_view memory buffer view for the graph including the additional part. - * The data will be copied from the current index in this function. The num rows must be the sum of - * the original and additional datasets and cols must be the graph degree. This view will be stored - * in the output index. It is the caller's responsibility to ensure that dataset stays alive as long - * as the index. This option is useful when users want to manage the memory space for the graph - * themselves. + * @param[out] new_dataset_buffer_view optional caller-managed buffer for the extended dataset + * @param[out] new_graph_buffer_view optional caller-managed buffer for the extended graph */ void extend( raft::resources const& handle, const cagra::extend_params& params, raft::device_matrix_view additional_dataset, - cuvs::neighbors::cagra::index& idx, + cuvs::neighbors::cagra::device_padded_index& idx, std::optional> new_dataset_buffer_view = std::nullopt, std::optional> new_graph_buffer_view = std::nullopt); -/** @brief Add new vectors to a CAGRA index - * - * Usage example: - * @code{.cpp} - * using namespace cuvs::neighbors; - * auto additional_dataset = raft::make_host_matrix(handle,add_size,dim); - * // set_additional_dataset(additional_dataset.view()); - * - * cagra::extend_params params; - * cagra::extend(res, params, raft::make_const_mdspan(additional_dataset.view()), index); - * @endcode - * - * @param[in] handle raft resources - * @param[in] params extend params - * @param[in] additional_dataset additional dataset on host memory - * @param[in,out] idx CAGRA index - * @param[out] new_dataset_buffer_view memory buffer view for the dataset including the additional - * part. The data will be copied from the current index in this function. The num rows must be the - * sum of the original and additional datasets, cols must be the dimension of the dataset, and the - * stride must be the same as the original index dataset. This view will be stored in the output - * index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. - * This option is useful when users want to manage the memory space for the dataset themselves. - * @param[out] new_graph_buffer_view memory buffer view for the graph including the additional part. - * The data will be copied from the current index in this function. The num rows must be the sum of - * the original and additional datasets and cols must be the graph degree. This view will be stored - * in the output index. It is the caller's responsibility to ensure that dataset stays alive as long - * as the index. This option is useful when users want to manage the memory space for the graph - * themselves. - */ void extend( raft::resources const& handle, const cagra::extend_params& params, - raft::host_matrix_view additional_dataset, - cuvs::neighbors::cagra::index& idx, - std::optional> + raft::device_matrix_view additional_dataset, + cuvs::neighbors::cagra::device_padded_index& idx, + std::optional> new_dataset_buffer_view = std::nullopt, std::optional> new_graph_buffer_view = std::nullopt); -/** @brief Add new vectors to a CAGRA index - * - * Usage example: - * @code{.cpp} - * using namespace cuvs::neighbors; - * auto additional_dataset = raft::make_device_matrix(handle,add_size,dim); - * // set_additional_dataset(additional_dataset.view()); - * - * cagra::extend_params params; - * cagra::extend(res, params, raft::make_const_mdspan(additional_dataset.view()), index); - * @endcode - * - * @param[in] handle raft resources - * @param[in] params extend params - * @param[in] additional_dataset additional dataset on device memory - * @param[in,out] idx CAGRA index - * @param[out] new_dataset_buffer_view memory buffer view for the dataset including the additional - * part. The data will be copied from the current index in this function. The num rows must be the - * sum of the original and additional datasets, cols must be the dimension of the dataset, and the - * stride must be the same as the original index dataset. This view will be stored in the output - * index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. - * This option is useful when users want to manage the memory space for the dataset themselves. - * @param[out] new_graph_buffer_view memory buffer view for the graph including the additional part. - * The data will be copied from the current index in this function. The num rows must be the sum of - * the original and additional datasets and cols must be the graph degree. This view will be stored - * in the output index. It is the caller's responsibility to ensure that dataset stays alive as long - * as the index. This option is useful when users want to manage the memory space for the graph - * themselves. - */ void extend( raft::resources const& handle, const cagra::extend_params& params, - raft::device_matrix_view additional_dataset, - cuvs::neighbors::cagra::index& idx, - std::optional> + raft::device_matrix_view additional_dataset, + cuvs::neighbors::cagra::device_padded_index& idx, + std::optional> new_dataset_buffer_view = std::nullopt, std::optional> new_graph_buffer_view = std::nullopt); -/** @brief Add new vectors to a CAGRA index - * - * Usage example: - * @code{.cpp} - * using namespace cuvs::neighbors; - * auto additional_dataset = raft::make_host_matrix(handle,add_size,dim); - * // set_additional_dataset(additional_dataset.view()); - * - * cagra::extend_params params; - * cagra::extend(res, params, raft::make_const_mdspan(additional_dataset.view()), index); - * @endcode - * - * @param[in] handle raft resources - * @param[in] params extend params - * @param[in] additional_dataset additional dataset on host memory - * @param[in,out] idx CAGRA index - * @param[out] new_dataset_buffer_view memory buffer view for the dataset including the additional - * part. The data will be copied from the current index in this function. The num rows must be the - * sum of the original and additional datasets, cols must be the dimension of the dataset, and the - * stride must be the same as the original index dataset. This view will be stored in the output - * index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. - * This option is useful when users want to manage the memory space for the dataset themselves. - * @param[out] new_graph_buffer_view memory buffer view for the graph including the additional part. - * The data will be copied from the current index in this function. The num rows must be the sum of - * the original and additional datasets and cols must be the graph degree. This view will be stored - * in the output index. It is the caller's responsibility to ensure that dataset stays alive as long - * as the index. This option is useful when users want to manage the memory space for the graph - * themselves. - */ void extend( raft::resources const& handle, const cagra::extend_params& params, - raft::host_matrix_view additional_dataset, - cuvs::neighbors::cagra::index& idx, - std::optional> + raft::device_matrix_view additional_dataset, + cuvs::neighbors::cagra::device_padded_index& idx, + std::optional> new_dataset_buffer_view = std::nullopt, std::optional> new_graph_buffer_view = std::nullopt); -/** @brief Add new vectors to a CAGRA index - * - * Usage example: - * @code{.cpp} - * using namespace cuvs::neighbors; - * auto additional_dataset = raft::make_device_matrix(handle,add_size,dim); - * // set_additional_dataset(additional_dataset.view()); - * - * cagra::extend_params params; - * cagra::extend(res, params, raft::make_const_mdspan(additional_dataset.view()), index); - * @endcode - * - * @param[in] handle raft resources - * @param[in] params extend params - * @param[in] additional_dataset additional dataset on device memory - * @param[in,out] idx CAGRA index - * @param[out] new_dataset_buffer_view memory buffer view for the dataset including the additional - * part. The data will be copied from the current index in this function. The num rows must be the - * sum of the original and additional datasets, cols must be the dimension of the dataset, and the - * stride must be the same as the original index dataset. This view will be stored in the output - * index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. - * This option is useful when users want to manage the memory space for the dataset themselves. - * @param[out] new_graph_buffer_view memory buffer view for the graph including the additional part. - * The data will be copied from the current index in this function. The num rows must be the sum of - * the original and additional datasets and cols must be the graph degree. This view will be stored - * in the output index. It is the caller's responsibility to ensure that dataset stays alive as long - * as the index. This option is useful when users want to manage the memory space for the graph - * themselves. - */ +/** @brief Add new vectors to a CAGRA index (host additional dataset). */ void extend( raft::resources const& handle, const cagra::extend_params& params, - raft::device_matrix_view additional_dataset, - cuvs::neighbors::cagra::index& idx, - std::optional> + raft::host_matrix_view additional_dataset, + cuvs::neighbors::cagra::device_padded_index& idx, + std::optional> new_dataset_buffer_view = std::nullopt, std::optional> new_graph_buffer_view = std::nullopt); -/** @brief Add new vectors to a CAGRA index - * - * Usage example: - * @code{.cpp} - * using namespace cuvs::neighbors; - * auto additional_dataset = raft::make_host_matrix(handle,add_size,dim); - * // set_additional_dataset(additional_dataset.view()); - * - * cagra::extend_params params; - * cagra::extend(res, params, raft::make_const_mdspan(additional_dataset.view()), index); - * @endcode - * - * @param[in] handle raft resources - * @param[in] params extend params - * @param[in] additional_dataset additional dataset on host memory - * @param[in,out] idx CAGRA index - * @param[out] new_dataset_buffer_view memory buffer view for the dataset including the additional - * part. The data will be copied from the current index in this function. The num rows must be the - * sum of the original and additional datasets, cols must be the dimension of the dataset, and the - * stride must be the same as the original index dataset. This view will be stored in the output - * index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. - * This option is useful when users want to manage the memory space for the dataset themselves. - * @param[out] new_graph_buffer_view memory buffer view for the graph including the additional part. - * The data will be copied from the current index in this function. The num rows must be the sum of - * the original and additional datasets and cols must be the graph degree. This view will be stored - * in the output index. It is the caller's responsibility to ensure that dataset stays alive as long - * as the index. This option is useful when users want to manage the memory space for the graph - * themselves. - */ void extend( raft::resources const& handle, const cagra::extend_params& params, - raft::host_matrix_view additional_dataset, - cuvs::neighbors::cagra::index& idx, - std::optional> + raft::host_matrix_view additional_dataset, + cuvs::neighbors::cagra::device_padded_index& idx, + std::optional> new_dataset_buffer_view = std::nullopt, std::optional> new_graph_buffer_view = std::nullopt); -/** @brief Add new vectors to a CAGRA index - * - * Usage example: - * @code{.cpp} - * using namespace cuvs::neighbors; - * auto additional_dataset = raft::make_host_matrix(handle,add_size,dim); - * // set_additional_dataset(additional_dataset.view()); - * - * cagra::extend_params params; - * cagra::extend(res, params, raft::make_const_mdspan(additional_dataset.view()), index); - * @endcode - * - * @param[in] handle raft resources - * @param[in] params extend params - * @param[in] additional_dataset additional dataset on host memory - * @param[in,out] idx CAGRA index - * @param[out] new_dataset_buffer_view memory buffer view for the dataset including the additional - * part. The data will be copied from the current index in this function. The num rows must be the - * sum of the original and additional datasets, cols must be the dimension of the dataset, and the - * stride must be the same as the original index dataset. This view will be stored in the output - * index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. - * This option is useful when users want to manage the memory space for the dataset themselves. - * @param[out] new_graph_buffer_view memory buffer view for the graph including the additional part. - * The data will be copied from the current index in this function. The num rows must be the sum of - * the original and additional datasets and cols must be the graph degree. This view will be stored - * in the output index. It is the caller's responsibility to ensure that dataset stays alive as long - * as the index. This option is useful when users want to manage the memory space for the graph - * themselves. - */ void extend( raft::resources const& handle, const cagra::extend_params& params, - raft::device_matrix_view additional_dataset, - cuvs::neighbors::cagra::index& idx, - std::optional> + raft::host_matrix_view additional_dataset, + cuvs::neighbors::cagra::device_padded_index& idx, + std::optional> new_dataset_buffer_view = std::nullopt, std::optional> new_graph_buffer_view = std::nullopt); -/** @brief Add new vectors to a CAGRA index - * - * Usage example: - * @code{.cpp} - * using namespace cuvs::neighbors; - * auto additional_dataset = raft::make_host_matrix(handle,add_size,dim); - * // set_additional_dataset(additional_dataset.view()); - * - * cagra::extend_params params; - * cagra::extend(res, params, raft::make_const_mdspan(additional_dataset.view()), index); - * @endcode - * - * @param[in] handle raft resources - * @param[in] params extend params - * @param[in] additional_dataset additional dataset on host memory - * @param[in,out] idx CAGRA index - * @param[out] new_dataset_buffer_view memory buffer view for the dataset including the additional - * part. The data will be copied from the current index in this function. The num rows must be the - * sum of the original and additional datasets, cols must be the dimension of the dataset, and the - * stride must be the same as the original index dataset. This view will be stored in the output - * index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. - * This option is useful when users want to manage the memory space for the dataset themselves. - * @param[out] new_graph_buffer_view memory buffer view for the graph including the additional part. - * The data will be copied from the current index in this function. The num rows must be the sum of - * the original and additional datasets and cols must be the graph degree. This view will be stored - * in the output index. It is the caller's responsibility to ensure that dataset stays alive as long - * as the index. This option is useful when users want to manage the memory space for the graph - * themselves. - */ void extend( raft::resources const& handle, const cagra::extend_params& params, raft::host_matrix_view additional_dataset, - cuvs::neighbors::cagra::index& idx, + cuvs::neighbors::cagra::device_padded_index& idx, std::optional> new_dataset_buffer_view = std::nullopt, std::optional> new_graph_buffer_view = std::nullopt); + /** * @} */ @@ -1533,201 +1045,228 @@ void extend( /** * @defgroup cagra_cpp_index_search CAGRA search functions * @{ - * @brief Search ANN using the constructed index. - * - * See the [cagra::build](#cagra::build) documentation for a usage example. - * - * @param[in] res raft resources - * @param[in] params configure the search - * @param[in] index cagra index - * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] - * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset - * [n_queries, k] - * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, - * k] - * @param[in] sample_filter an optional device filter function object that greenlights samples - * for a given query. (none_sample_filter for no filtering) */ +// Concrete non-template overloads for all supported index types. +// Previously a single template covered all index types; it +// has been replaced with explicit overloads to maintain a stable non-template ABI. When a new +// index type is added, add corresponding overloads here. Index types whose search is not yet +// implemented (e.g. vpq_f32_index) are still declared so the symbols exist when the +// implementation lands. void search(raft::resources const& res, cuvs::neighbors::cagra::search_params const& params, - const cuvs::neighbors::cagra::index& index, + const cuvs::neighbors::cagra::device_padded_index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, const cuvs::neighbors::filtering::base_filter& sample_filter = cuvs::neighbors::filtering::none_sample_filter{}); -/** - * @brief Search ANN using the constructed index. - * - * See the [cagra::build](#cagra::build) documentation for a usage example. - * - * @param[in] res raft resources - * @param[in] params configure the search - * @param[in] index cagra index - * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] - * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset - * [n_queries, k] - * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, - * k] - * @param[in] sample_filter an optional device filter function object that greenlights samples - * for a given query. (none_sample_filter for no filtering) - */ void search(raft::resources const& res, cuvs::neighbors::cagra::search_params const& params, - const cuvs::neighbors::cagra::index& index, + const cuvs::neighbors::cagra::device_padded_index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, const cuvs::neighbors::filtering::base_filter& sample_filter = cuvs::neighbors::filtering::none_sample_filter{}); -/** - * @brief Search ANN using the constructed index. - * - * See the [cagra::build](#cagra::build) documentation for a usage example. - * - * @param[in] res raft resources - * @param[in] params configure the search - * @param[in] index cagra index - * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] - * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset - * [n_queries, k] - * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, - * k] - * @param[in] sample_filter an optional device filter function object that greenlights samples - * for a given query. (none_sample_filter for no filtering) - */ void search(raft::resources const& res, cuvs::neighbors::cagra::search_params const& params, - const cuvs::neighbors::cagra::index& index, + const cuvs::neighbors::cagra::device_padded_index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, const cuvs::neighbors::filtering::base_filter& sample_filter = cuvs::neighbors::filtering::none_sample_filter{}); -/** - * @brief Search ANN using the constructed index. - * - * See the [cagra::build](#cagra::build) documentation for a usage example. - * - * @param[in] res raft resources - * @param[in] params configure the search - * @param[in] index cagra index - * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] - * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset - * [n_queries, k] - * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, - * k] - * @param[in] sample_filter an optional device filter function object that greenlights samples - * for a given query. (none_sample_filter for no filtering) - */ void search(raft::resources const& res, cuvs::neighbors::cagra::search_params const& params, - const cuvs::neighbors::cagra::index& index, + const cuvs::neighbors::cagra::device_padded_index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, const cuvs::neighbors::filtering::base_filter& sample_filter = cuvs::neighbors::filtering::none_sample_filter{}); -/** - * @brief Search ANN using the constructed index. - * - * See the [cagra::build](#cagra::build) documentation for a usage example. - * - * @param[in] res raft resources - * @param[in] params configure the search - * @param[in] index cagra index - * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] - * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset - * [n_queries, k] - * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, - * k] - * @param[in] sample_filter an optional device filter function object that greenlights samples - * for a given query. (none_sample_filter for no filtering) - */ +void search(raft::resources const& res, + cuvs::neighbors::cagra::search_params const& params, + const cuvs::neighbors::cagra::device_padded_index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); + +void search(raft::resources const& res, + cuvs::neighbors::cagra::search_params const& params, + const cuvs::neighbors::cagra::device_padded_index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); + +void search(raft::resources const& res, + cuvs::neighbors::cagra::search_params const& params, + const cuvs::neighbors::cagra::device_padded_index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); + +void search(raft::resources const& res, + cuvs::neighbors::cagra::search_params const& params, + const cuvs::neighbors::cagra::device_padded_index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); + +// VPQ f16 index overloads (OutputIdxT = uint32_t) +void search(raft::resources const& res, + cuvs::neighbors::cagra::search_params const& params, + const cuvs::neighbors::cagra::vpq_f16_index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); + +void search(raft::resources const& res, + cuvs::neighbors::cagra::search_params const& params, + const cuvs::neighbors::cagra::vpq_f16_index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); + +void search(raft::resources const& res, + cuvs::neighbors::cagra::search_params const& params, + const cuvs::neighbors::cagra::vpq_f16_index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); + +void search(raft::resources const& res, + cuvs::neighbors::cagra::search_params const& params, + const cuvs::neighbors::cagra::vpq_f16_index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); +// VPQ f16 index overloads (OutputIdxT = int64_t) void search(raft::resources const& res, cuvs::neighbors::cagra::search_params const& params, - const cuvs::neighbors::cagra::index& index, + const cuvs::neighbors::cagra::vpq_f16_index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, const cuvs::neighbors::filtering::base_filter& sample_filter = cuvs::neighbors::filtering::none_sample_filter{}); -/** - * @brief Search ANN using the constructed index. - * - * See the [cagra::build](#cagra::build) documentation for a usage example. - * - * @param[in] res raft resources - * @param[in] params configure the search - * @param[in] index cagra index - * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] - * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset - * [n_queries, k] - * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, - * k] - * @param[in] sample_filter an optional device filter function object that greenlights samples - * for a given query. (none_sample_filter for no filtering) - */ void search(raft::resources const& res, cuvs::neighbors::cagra::search_params const& params, - const cuvs::neighbors::cagra::index& index, + const cuvs::neighbors::cagra::vpq_f16_index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, const cuvs::neighbors::filtering::base_filter& sample_filter = cuvs::neighbors::filtering::none_sample_filter{}); -/** - * @brief Search ANN using the constructed index. - * - * See the [cagra::build](#cagra::build) documentation for a usage example. - * - * @param[in] res raft resources - * @param[in] params configure the search - * @param[in] index cagra index - * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] - * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset - * [n_queries, k] - * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, - * k] - * @param[in] sample_filter an optional device filter function object that greenlights samples - * for a given query. (none_sample_filter for no filtering) - */ void search(raft::resources const& res, cuvs::neighbors::cagra::search_params const& params, - const cuvs::neighbors::cagra::index& index, + const cuvs::neighbors::cagra::vpq_f16_index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); + +void search(raft::resources const& res, + cuvs::neighbors::cagra::search_params const& params, + const cuvs::neighbors::cagra::vpq_f16_index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); + +// VPQ f32 index overloads (OutputIdxT = uint32_t) +void search(raft::resources const& res, + cuvs::neighbors::cagra::search_params const& params, + const cuvs::neighbors::cagra::vpq_f32_index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); + +void search(raft::resources const& res, + cuvs::neighbors::cagra::search_params const& params, + const cuvs::neighbors::cagra::vpq_f32_index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); + +void search(raft::resources const& res, + cuvs::neighbors::cagra::search_params const& params, + const cuvs::neighbors::cagra::vpq_f32_index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); + +void search(raft::resources const& res, + cuvs::neighbors::cagra::search_params const& params, + const cuvs::neighbors::cagra::vpq_f32_index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); + +// VPQ f32 index overloads (OutputIdxT = int64_t) +void search(raft::resources const& res, + cuvs::neighbors::cagra::search_params const& params, + const cuvs::neighbors::cagra::vpq_f32_index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); + +void search(raft::resources const& res, + cuvs::neighbors::cagra::search_params const& params, + const cuvs::neighbors::cagra::vpq_f32_index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); + +void search(raft::resources const& res, + cuvs::neighbors::cagra::search_params const& params, + const cuvs::neighbors::cagra::vpq_f32_index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, const cuvs::neighbors::filtering::base_filter& sample_filter = cuvs::neighbors::filtering::none_sample_filter{}); -/** - * @brief Search ANN using the constructed index. - * - * See the [cagra::build](#cagra::build) documentation for a usage example. - * - * @param[in] res raft resources - * @param[in] params configure the search - * @param[in] index cagra index - * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] - * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset - * [n_queries, k] - * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, - * k] - * @param[in] sample_filter an optional device filter function object that greenlights samples - * for a given query. (none_sample_filter for no filtering) - */ void search(raft::resources const& res, cuvs::neighbors::cagra::search_params const& params, - const cuvs::neighbors::cagra::index& index, + const cuvs::neighbors::cagra::vpq_f32_index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, @@ -1743,6 +1282,11 @@ void search(raft::resources const& res, * @{ */ +// Serialize and deserialize are currently overloaded only for device_padded_index (the common +// dense-dataset case). To support a new dataset kind (e.g. vpq_f16_index) in the future, simply +// add a matching pair of overloads here and a corresponding serialize_cagra__dataset / +// deserialize_ implementation in detail/dataset_serialize.hpp. + /** * Save the index to file. * @@ -1768,7 +1312,7 @@ void search(raft::resources const& res, */ void serialize(raft::resources const& handle, const std::string& filename, - const cuvs::neighbors::cagra::index& index, + const cuvs::neighbors::cagra::device_padded_index& index, bool include_dataset = true); /** @@ -1785,17 +1329,22 @@ void serialize(raft::resources const& handle, * // create a string with a filepath * std::string filename("/path/to/index"); - * cuvs::neighbors::cagra::index index; + * cuvs::neighbors::cagra::device_padded_index index; * cuvs::neighbors::cagra::deserialize(handle, filename, &index); * @endcode * * @param[in] handle the raft handle * @param[in] filename the name of the file that stores the index * @param[out] index the cagra index + * @param[out] out_dataset if non-null, on success may be set to an owned deserialized dataset + * when the file includes dataset data; may be left unchanged otherwise. Optional; pass + * nullptr to ignore. */ -void deserialize(raft::resources const& handle, - const std::string& filename, - cuvs::neighbors::cagra::index* index); +void deserialize( + raft::resources const& handle, + const std::string& filename, + cuvs::neighbors::cagra::device_padded_index* index, + std::unique_ptr>* out_dataset = nullptr); /** * Write the index to an output stream @@ -1821,7 +1370,7 @@ void deserialize(raft::resources const& handle, */ void serialize(raft::resources const& handle, std::ostream& os, - const cuvs::neighbors::cagra::index& index, + const cuvs::neighbors::cagra::device_padded_index& index, bool include_dataset = true); /** @@ -1837,17 +1386,22 @@ void serialize(raft::resources const& handle, * * // create an input stream * std::istream is(std::cin.rdbuf()); - * cuvs::neighbors::cagra::index index; + * cuvs::neighbors::cagra::device_padded_index index; * cuvs::neighbors::cagra::deserialize(handle, is, &index); * @endcode * * @param[in] handle the raft handle * @param[in] is input stream * @param[out] index the cagra index + * @param[out] out_dataset if non-null, on success may be set to an owned deserialized dataset + * when the stream includes dataset data; may be left unchanged otherwise. Optional; pass + * nullptr to ignore. */ -void deserialize(raft::resources const& handle, - std::istream& is, - cuvs::neighbors::cagra::index* index); +void deserialize( + raft::resources const& handle, + std::istream& is, + cuvs::neighbors::cagra::device_padded_index* index, + std::unique_ptr>* out_dataset = nullptr); /** * Save the index to file. * @@ -1873,7 +1427,7 @@ void deserialize(raft::resources const& handle, */ void serialize(raft::resources const& handle, const std::string& filename, - const cuvs::neighbors::cagra::index& index, + const cuvs::neighbors::cagra::device_padded_index& index, bool include_dataset = true); /** @@ -1890,17 +1444,22 @@ void serialize(raft::resources const& handle, * // create a string with a filepath * std::string filename("/path/to/index"); - * cuvs::neighbors::cagra::index index; + * cuvs::neighbors::cagra::device_padded_index index; * cuvs::neighbors::cagra::deserialize(handle, filename, &index); * @endcode * * @param[in] handle the raft handle * @param[in] filename the name of the file that stores the index * @param[out] index the cagra index + * @param[out] out_dataset if non-null, on success may be set to an owned deserialized dataset + * when the file includes dataset data; may be left unchanged otherwise. Optional; pass + * nullptr to ignore. */ -void deserialize(raft::resources const& handle, - const std::string& filename, - cuvs::neighbors::cagra::index* index); +void deserialize( + raft::resources const& handle, + const std::string& filename, + cuvs::neighbors::cagra::device_padded_index* index, + std::unique_ptr>* out_dataset = nullptr); /** * Write the index to an output stream @@ -1926,7 +1485,7 @@ void deserialize(raft::resources const& handle, */ void serialize(raft::resources const& handle, std::ostream& os, - const cuvs::neighbors::cagra::index& index, + const cuvs::neighbors::cagra::device_padded_index& index, bool include_dataset = true); /** @@ -1942,17 +1501,22 @@ void serialize(raft::resources const& handle, * * // create an input stream * std::istream is(std::cin.rdbuf()); - * cuvs::neighbors::cagra::index index; + * cuvs::neighbors::cagra::device_padded_index index; * cuvs::neighbors::cagra::deserialize(handle, is, &index); * @endcode * * @param[in] handle the raft handle * @param[in] is input stream * @param[out] index the cagra index + * @param[out] out_dataset if non-null, on success may be set to an owned deserialized dataset + * when the stream includes dataset data; may be left unchanged otherwise. Optional; pass + * nullptr to ignore. */ -void deserialize(raft::resources const& handle, - std::istream& is, - cuvs::neighbors::cagra::index* index); +void deserialize( + raft::resources const& handle, + std::istream& is, + cuvs::neighbors::cagra::device_padded_index* index, + std::unique_ptr>* out_dataset = nullptr); /** * Save the index to file. @@ -1978,7 +1542,7 @@ void deserialize(raft::resources const& handle, */ void serialize(raft::resources const& handle, const std::string& filename, - const cuvs::neighbors::cagra::index& index, + const cuvs::neighbors::cagra::device_padded_index& index, bool include_dataset = true); /** @@ -1995,17 +1559,22 @@ void serialize(raft::resources const& handle, * // create a string with a filepath * std::string filename("/path/to/index"); - * cuvs::neighbors::cagra::index index; + * cuvs::neighbors::cagra::device_padded_index index; * cuvs::neighbors::cagra::deserialize(handle, filename, &index); * @endcode * * @param[in] handle the raft handle * @param[in] filename the name of the file that stores the index * @param[out] index the cagra index + * @param[out] out_dataset if non-null, on success may be set to an owned deserialized dataset + * when the file includes dataset data; may be left unchanged otherwise. Optional; pass + * nullptr to ignore. */ -void deserialize(raft::resources const& handle, - const std::string& filename, - cuvs::neighbors::cagra::index* index); +void deserialize( + raft::resources const& handle, + const std::string& filename, + cuvs::neighbors::cagra::device_padded_index* index, + std::unique_ptr>* out_dataset = nullptr); /** * Write the index to an output stream @@ -2031,7 +1600,7 @@ void deserialize(raft::resources const& handle, */ void serialize(raft::resources const& handle, std::ostream& os, - const cuvs::neighbors::cagra::index& index, + const cuvs::neighbors::cagra::device_padded_index& index, bool include_dataset = true); /** @@ -2047,17 +1616,22 @@ void serialize(raft::resources const& handle, * * // create an input stream * std::istream is(std::cin.rdbuf()); - * cuvs::neighbors::cagra::index index; + * cuvs::neighbors::cagra::device_padded_index index; * cuvs::neighbors::cagra::deserialize(handle, is, &index); * @endcode * * @param[in] handle the raft handle * @param[in] is input stream * @param[out] index the cagra index + * @param[out] out_dataset if non-null, on success may be set to an owned deserialized dataset + * when the stream includes dataset data; may be left unchanged otherwise. Optional; pass + * nullptr to ignore. */ -void deserialize(raft::resources const& handle, - std::istream& is, - cuvs::neighbors::cagra::index* index); +void deserialize( + raft::resources const& handle, + std::istream& is, + cuvs::neighbors::cagra::device_padded_index* index, + std::unique_ptr>* out_dataset = nullptr); /** * Save the index to file. @@ -2083,7 +1657,7 @@ void deserialize(raft::resources const& handle, */ void serialize(raft::resources const& handle, const std::string& filename, - const cuvs::neighbors::cagra::index& index, + const cuvs::neighbors::cagra::device_padded_index& index, bool include_dataset = true); /** @@ -2100,17 +1674,22 @@ void serialize(raft::resources const& handle, * // create a string with a filepath * std::string filename("/path/to/index"); - * cuvs::neighbors::cagra::index index; + * cuvs::neighbors::cagra::device_padded_index index; * cuvs::neighbors::cagra::deserialize(handle, filename, &index); * @endcode * * @param[in] handle the raft handle * @param[in] filename the name of the file that stores the index * @param[out] index the cagra index + * @param[out] out_dataset if non-null, on success may be set to an owned deserialized dataset + * when the file includes dataset data; may be left unchanged otherwise. Optional; pass + * nullptr to ignore. */ -void deserialize(raft::resources const& handle, - const std::string& filename, - cuvs::neighbors::cagra::index* index); +void deserialize( + raft::resources const& handle, + const std::string& filename, + cuvs::neighbors::cagra::device_padded_index* index, + std::unique_ptr>* out_dataset = nullptr); /** * Write the index to an output stream @@ -2136,7 +1715,7 @@ void deserialize(raft::resources const& handle, */ void serialize(raft::resources const& handle, std::ostream& os, - const cuvs::neighbors::cagra::index& index, + const cuvs::neighbors::cagra::device_padded_index& index, bool include_dataset = true); /** @@ -2152,17 +1731,22 @@ void serialize(raft::resources const& handle, * * // create an input stream * std::istream is(std::cin.rdbuf()); - * cuvs::neighbors::cagra::index index; + * cuvs::neighbors::cagra::device_padded_index index; * cuvs::neighbors::cagra::deserialize(handle, is, &index); * @endcode * * @param[in] handle the raft handle * @param[in] is input stream * @param[out] index the cagra index + * @param[out] out_dataset if non-null, on success may be set to an owned deserialized dataset + * when the stream includes dataset data; may be left unchanged otherwise. Optional; pass + * nullptr to ignore. */ -void deserialize(raft::resources const& handle, - std::istream& is, - cuvs::neighbors::cagra::index* index); +void deserialize( + raft::resources const& handle, + std::istream& is, + cuvs::neighbors::cagra::device_padded_index* index, + std::unique_ptr>* out_dataset = nullptr); /** * Write the CAGRA built index as a base layer HNSW index to an output stream @@ -2193,7 +1777,7 @@ void deserialize(raft::resources const& handle, void serialize_to_hnswlib( raft::resources const& handle, std::ostream& os, - const cuvs::neighbors::cagra::index& index, + const cuvs::neighbors::cagra::device_padded_index& index, std::optional> dataset = std::nullopt); @@ -2227,7 +1811,7 @@ void serialize_to_hnswlib( void serialize_to_hnswlib( raft::resources const& handle, const std::string& filename, - const cuvs::neighbors::cagra::index& index, + const cuvs::neighbors::cagra::device_padded_index& index, std::optional> dataset = std::nullopt); @@ -2260,7 +1844,7 @@ void serialize_to_hnswlib( void serialize_to_hnswlib( raft::resources const& handle, std::ostream& os, - const cuvs::neighbors::cagra::index& index, + const cuvs::neighbors::cagra::device_padded_index& index, std::optional> dataset = std::nullopt); @@ -2294,7 +1878,7 @@ void serialize_to_hnswlib( void serialize_to_hnswlib( raft::resources const& handle, const std::string& filename, - const cuvs::neighbors::cagra::index& index, + const cuvs::neighbors::cagra::device_padded_index& index, std::optional> dataset = std::nullopt); @@ -2327,7 +1911,7 @@ void serialize_to_hnswlib( void serialize_to_hnswlib( raft::resources const& handle, std::ostream& os, - const cuvs::neighbors::cagra::index& index, + const cuvs::neighbors::cagra::device_padded_index& index, std::optional> dataset = std::nullopt); @@ -2361,7 +1945,7 @@ void serialize_to_hnswlib( void serialize_to_hnswlib( raft::resources const& handle, const std::string& filename, - const cuvs::neighbors::cagra::index& index, + const cuvs::neighbors::cagra::device_padded_index& index, std::optional> dataset = std::nullopt); @@ -2394,7 +1978,7 @@ void serialize_to_hnswlib( void serialize_to_hnswlib( raft::resources const& handle, std::ostream& os, - const cuvs::neighbors::cagra::index& index, + const cuvs::neighbors::cagra::device_padded_index& index, std::optional> dataset = std::nullopt); @@ -2428,7 +2012,7 @@ void serialize_to_hnswlib( void serialize_to_hnswlib( raft::resources const& handle, const std::string& filename, - const cuvs::neighbors::cagra::index& index, + const cuvs::neighbors::cagra::device_padded_index& index, std::optional> dataset = std::nullopt); @@ -2441,69 +2025,34 @@ void serialize_to_hnswlib( * @{ */ -/** @brief Merge multiple CAGRA indices into a single index. - * - * This function merges multiple CAGRA indices into one, combining both the datasets and graph - * structures. - * - * @note: When device memory is sufficient, the dataset attached to the returned index is allocated - * in device memory by default; otherwise, host memory is used automatically. - * - * @note: This API only supports physical merge (`merge_strategy = MERGE_STRATEGY_PHYSICAL`), and - * attempting a logical merge here will throw an error. - * - * Usage example: - * @code{.cpp} - * using namespace cuvs::neighbors; - * auto dataset0 = raft::make_host_matrix(handle, size0, dim); - * auto dataset1 = raft::make_host_matrix(handle, size1, dim); +/** @brief Allocate device merge buffers for the given indices and row filter. * - * auto index0 = cagra::build(res, index_params, dataset0); - * auto index1 = cagra::build(res, index_params, dataset1); - * - * std::vector*> indices{&index0, &index1}; - * - * auto merged_index = cagra::merge(res, index_params, indices); - * @endcode - * - * @param[in] res RAFT resources used for the merge operation. - * @param[in] params Parameters that control the merging process. - * @param[in] indices A vector of pointers to the CAGRA indices to merge. All indices must: - * - Have attached datasets with the same dimension. - * @param[in] row_filter an optional device filter function object that greenlights rows - * to include in the merged index (none_sample_filter for no filtering) - * @return A new CAGRA index containing the merged indices, graph, and dataset. + * Computes row counts and stride (see `merged_dataset`), allocates `merged_storage` with shape + * `[merged_rows, stride_elements]`, and when using a bitset row filter also allocates + * `filtered_storage` with shape `[filtered_rows, stride_elements]`. Pass the result to `merge` with + * the same `indices` and `row_filter`. */ -auto merge(raft::resources const& res, - const cuvs::neighbors::cagra::index_params& params, - std::vector*>& indices, - const cuvs::neighbors::filtering::base_filter& row_filter = - cuvs::neighbors::filtering::none_sample_filter{}) - -> cuvs::neighbors::cagra::index; +template +merged_dataset_storage make_merged_dataset( + raft::resources const& res, + std::vector*> const& indices, + const cuvs::neighbors::filtering::base_filter& row_filter = + cuvs::neighbors::filtering::none_sample_filter{}); -/** @copydoc merge */ -auto merge(raft::resources const& res, - const cuvs::neighbors::cagra::index_params& params, - std::vector*>& indices, - const cuvs::neighbors::filtering::base_filter& row_filter = - cuvs::neighbors::filtering::none_sample_filter{}) - -> cuvs::neighbors::cagra::index; - -/** @copydoc merge */ +/** @brief Merge multiple CAGRA indices into a single index. + * + * @note This API only supports physical merge (`merge_strategy = MERGE_STRATEGY_PHYSICAL`). + * All input indices must use the same `DatasetViewT` (padded dataset views today). + */ +template auto merge(raft::resources const& res, const cuvs::neighbors::cagra::index_params& params, - std::vector*>& indices, + std::vector*>& indices, + merged_dataset_storage& storage, const cuvs::neighbors::filtering::base_filter& row_filter = cuvs::neighbors::filtering::none_sample_filter{}) - -> cuvs::neighbors::cagra::index; + -> cuvs::neighbors::cagra::index; -/** @copydoc merge */ -auto merge(raft::resources const& res, - const cuvs::neighbors::cagra::index_params& params, - std::vector*>& indices, - const cuvs::neighbors::filtering::base_filter& row_filter = - cuvs::neighbors::filtering::none_sample_filter{}) - -> cuvs::neighbors::cagra::index; /** * @} */ @@ -2530,7 +2079,7 @@ auto merge(raft::resources const& res, auto build(const raft::resources& clique, const cuvs::neighbors::mg_index_params& index_params, raft::host_matrix_view index_dataset) - -> cuvs::neighbors::mg_index, float, uint32_t>; + -> cuvs::neighbors::mg_index, float, uint32_t>; /// \ingroup mg_cpp_index_build /** @@ -2552,7 +2101,7 @@ auto build(const raft::resources& clique, auto build(const raft::resources& clique, const cuvs::neighbors::mg_index_params& index_params, raft::host_matrix_view index_dataset) - -> cuvs::neighbors::mg_index, half, uint32_t>; + -> cuvs::neighbors::mg_index, half, uint32_t>; /// \ingroup mg_cpp_index_build /** @@ -2574,7 +2123,7 @@ auto build(const raft::resources& clique, auto build(const raft::resources& clique, const cuvs::neighbors::mg_index_params& index_params, raft::host_matrix_view index_dataset) - -> cuvs::neighbors::mg_index, int8_t, uint32_t>; + -> cuvs::neighbors::mg_index, int8_t, uint32_t>; /// \ingroup mg_cpp_index_build /** @@ -2596,7 +2145,7 @@ auto build(const raft::resources& clique, auto build(const raft::resources& clique, const cuvs::neighbors::mg_index_params& index_params, raft::host_matrix_view index_dataset) - -> cuvs::neighbors::mg_index, uint8_t, uint32_t>; + -> cuvs::neighbors::mg_index, uint8_t, uint32_t>; /// \defgroup mg_cpp_index_extend ANN MG index extend @@ -2619,10 +2168,11 @@ auto build(const raft::resources& clique, * `std::nullopt` means default continuous range `[0...n_rows)` * */ -void extend(const raft::resources& clique, - cuvs::neighbors::mg_index, float, uint32_t>& index, - raft::host_matrix_view new_vectors, - std::optional> new_indices); +void extend( + const raft::resources& clique, + cuvs::neighbors::mg_index, float, uint32_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); /// \ingroup mg_cpp_index_extend /** @@ -2643,10 +2193,11 @@ void extend(const raft::resources& clique, * `std::nullopt` means default continuous range `[0...n_rows)` * */ -void extend(const raft::resources& clique, - cuvs::neighbors::mg_index, half, uint32_t>& index, - raft::host_matrix_view new_vectors, - std::optional> new_indices); +void extend( + const raft::resources& clique, + cuvs::neighbors::mg_index, half, uint32_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); /// \ingroup mg_cpp_index_extend /** @@ -2667,10 +2218,11 @@ void extend(const raft::resources& clique, * `std::nullopt` means default continuous range `[0...n_rows)` * */ -void extend(const raft::resources& clique, - cuvs::neighbors::mg_index, int8_t, uint32_t>& index, - raft::host_matrix_view new_vectors, - std::optional> new_indices); +void extend( + const raft::resources& clique, + cuvs::neighbors::mg_index, int8_t, uint32_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); /// \ingroup mg_cpp_index_extend /** @@ -2691,10 +2243,12 @@ void extend(const raft::resources& clique, * `std::nullopt` means default continuous range `[0...n_rows)` * */ -void extend(const raft::resources& clique, - cuvs::neighbors::mg_index, uint8_t, uint32_t>& index, - raft::host_matrix_view new_vectors, - std::optional> new_indices); +void extend( + const raft::resources& clique, + cuvs::neighbors::mg_index, uint8_t, uint32_t>& + index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); /// \defgroup mg_cpp_index_search ANN MG index search @@ -2720,12 +2274,14 @@ void extend(const raft::resources& clique, * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] * */ -void search(const raft::resources& clique, - const cuvs::neighbors::mg_index, float, uint32_t>& index, - const cuvs::neighbors::mg_search_params& search_params, - raft::host_matrix_view queries, - raft::host_matrix_view neighbors, - raft::host_matrix_view distances); +void search( + const raft::resources& clique, + const cuvs::neighbors::mg_index, float, uint32_t>& + index, + const cuvs::neighbors::mg_search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances); /// \ingroup mg_cpp_index_search /** @@ -2749,12 +2305,14 @@ void search(const raft::resources& clique, * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] * */ -void search(const raft::resources& clique, - const cuvs::neighbors::mg_index, half, uint32_t>& index, - const cuvs::neighbors::mg_search_params& search_params, - raft::host_matrix_view queries, - raft::host_matrix_view neighbors, - raft::host_matrix_view distances); +void search( + const raft::resources& clique, + const cuvs::neighbors::mg_index, half, uint32_t>& + index, + const cuvs::neighbors::mg_search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances); /// \ingroup mg_cpp_index_search /** @@ -2780,7 +2338,8 @@ void search(const raft::resources& clique, */ void search( const raft::resources& clique, - const cuvs::neighbors::mg_index, int8_t, uint32_t>& index, + const cuvs::neighbors::mg_index, int8_t, uint32_t>& + index, const cuvs::neighbors::mg_search_params& search_params, raft::host_matrix_view queries, raft::host_matrix_view neighbors, @@ -2810,7 +2369,8 @@ void search( */ void search( const raft::resources& clique, - const cuvs::neighbors::mg_index, uint8_t, uint32_t>& index, + const cuvs::neighbors::mg_index, uint8_t, uint32_t>& + index, const cuvs::neighbors::mg_search_params& search_params, raft::host_matrix_view queries, raft::host_matrix_view neighbors, @@ -2838,12 +2398,14 @@ void search( * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] * */ -void search(const raft::resources& clique, - const cuvs::neighbors::mg_index, float, uint32_t>& index, - const cuvs::neighbors::mg_search_params& search_params, - raft::host_matrix_view queries, - raft::host_matrix_view neighbors, - raft::host_matrix_view distances); +void search( + const raft::resources& clique, + const cuvs::neighbors::mg_index, float, uint32_t>& + index, + const cuvs::neighbors::mg_search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances); /// \ingroup mg_cpp_index_search /** @@ -2867,12 +2429,14 @@ void search(const raft::resources& clique, * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] * */ -void search(const raft::resources& clique, - const cuvs::neighbors::mg_index, half, uint32_t>& index, - const cuvs::neighbors::mg_search_params& search_params, - raft::host_matrix_view queries, - raft::host_matrix_view neighbors, - raft::host_matrix_view distances); +void search( + const raft::resources& clique, + const cuvs::neighbors::mg_index, half, uint32_t>& + index, + const cuvs::neighbors::mg_search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances); /// \ingroup mg_cpp_index_search /** @@ -2898,7 +2462,8 @@ void search(const raft::resources& clique, */ void search( const raft::resources& clique, - const cuvs::neighbors::mg_index, int8_t, uint32_t>& index, + const cuvs::neighbors::mg_index, int8_t, uint32_t>& + index, const cuvs::neighbors::mg_search_params& search_params, raft::host_matrix_view queries, raft::host_matrix_view neighbors, @@ -2928,7 +2493,8 @@ void search( */ void search( const raft::resources& clique, - const cuvs::neighbors::mg_index, uint8_t, uint32_t>& index, + const cuvs::neighbors::mg_index, uint8_t, uint32_t>& + index, const cuvs::neighbors::mg_search_params& search_params, raft::host_matrix_view queries, raft::host_matrix_view neighbors, @@ -2956,7 +2522,8 @@ void search( */ void serialize( const raft::resources& clique, - const cuvs::neighbors::mg_index, float, uint32_t>& index, + const cuvs::neighbors::mg_index, float, uint32_t>& + index, const std::string& filename); /// \ingroup mg_cpp_serialize @@ -2977,9 +2544,11 @@ void serialize( * @param[in] filename path to the file to be serialized * */ -void serialize(const raft::resources& clique, - const cuvs::neighbors::mg_index, half, uint32_t>& index, - const std::string& filename); +void serialize( + const raft::resources& clique, + const cuvs::neighbors::mg_index, half, uint32_t>& + index, + const std::string& filename); /// \ingroup mg_cpp_serialize /** @@ -3001,7 +2570,8 @@ void serialize(const raft::resources& clique, */ void serialize( const raft::resources& clique, - const cuvs::neighbors::mg_index, int8_t, uint32_t>& index, + const cuvs::neighbors::mg_index, int8_t, uint32_t>& + index, const std::string& filename); /// \ingroup mg_cpp_serialize @@ -3024,7 +2594,8 @@ void serialize( */ void serialize( const raft::resources& clique, - const cuvs::neighbors::mg_index, uint8_t, uint32_t>& index, + const cuvs::neighbors::mg_index, uint8_t, uint32_t>& + index, const std::string& filename); /// \defgroup mg_cpp_deserialize ANN MG index deserialization @@ -3050,7 +2621,7 @@ void serialize( */ template auto deserialize(const raft::resources& clique, const std::string& filename) - -> cuvs::neighbors::mg_index, T, IdxT>; + -> cuvs::neighbors::mg_index, T, IdxT>; /// \defgroup mg_cpp_distribute ANN MG local index distribution @@ -3076,7 +2647,7 @@ auto deserialize(const raft::resources& clique, const std::string& filename) */ template auto distribute(const raft::resources& clique, const std::string& filename) - -> cuvs::neighbors::mg_index, T, IdxT>; + -> cuvs::neighbors::mg_index, T, IdxT>; /** * @brief Build a kNN graph using IVF-PQ. @@ -3104,7 +2675,7 @@ auto distribute(const raft::resources& clique, const std::string& filename) * auto optimized_gaph = raft::make_host_matrix(dataset.extent(0), 64); * cagra::optimize(res, dataset, knn_graph.view(), optimized_graph.view()); * // Construct an index from dataset and optimized knn_graph - * auto index = cagra::index(res, build_params.metric(), dataset, + * auto index = cagra::device_padded_index(res, build_params.metric(), dataset, * optimized_graph.view()); * @endcode * @@ -3144,7 +2715,7 @@ void build_knn_graph(raft::resources const& res, * auto optimized_gaph = raft::make_host_matrix(dataset.extent(0), 64); * cagra::optimize(res, dataset, knn_graph.view(), optimized_graph.view()); * // Construct an index from dataset and optimized knn_graph - * auto index = cagra::index(res, build_params.metric(), dataset, + * auto index = cagra::device_padded_index(res, build_params.metric(), dataset, * optimized_graph.view()); * @endcode * @@ -3184,7 +2755,7 @@ void build_knn_graph(raft::resources const& res, * auto optimized_gaph = raft::make_host_matrix(dataset.extent(0), 64); * cagra::optimize(res, dataset, knn_graph.view(), optimized_graph.view()); * // Construct an index from dataset and optimized knn_graph - * auto index = cagra::index(res, build_params.metric(), dataset, + * auto index = cagra::device_padded_index(res, build_params.metric(), dataset, * optimized_graph.view()); * @endcode * @@ -3224,7 +2795,7 @@ void build_knn_graph(raft::resources const& res, * auto optimized_gaph = raft::make_host_matrix(dataset.extent(0), 64); * cagra::optimize(res, dataset, knn_graph.view(), optimized_graph.view()); * // Construct an index from dataset and optimized knn_graph - * auto index = cagra::index(res, build_params.metric(), dataset, + * auto index = cagra::device_padded_index(res, build_params.metric(), dataset, * optimized_graph.view()); * @endcode * @@ -3238,6 +2809,71 @@ void build_knn_graph(raft::resources const& res, raft::host_matrix_view knn_graph, cuvs::neighbors::cagra::graph_build_params::ivf_pq_params build_params); +/** + * @brief Convert a host-resident CAGRA index to a device-resident index (graph only). + * + * Copies the graph host → device. The returned device index has no dataset attached; + * call `index::update_dataset(res, device_view)` or `attach_device_dataset_on_host_index` + * before search. + * + * @tparam T element type + * @tparam IdxT index type + * @tparam HostViewT any host-resident dataset view type + * @param[in] res RAFT resources + * @param[in] src host index (graph only, no dataset needed) + * @return device index with graph copied from src + */ +template + requires cuvs::neighbors::is_host_dataset_view_v +auto convert_host_to_device_index(raft::resources const& res, index const& src) + -> index> +{ + using DeviceViewT = cuvs::neighbors::device_counterpart_t; + using GraphIndexType = typename index::graph_index_type; + index out(res, src.metric()); + if (src.graph().size() > 0) { + // The graph lives in device memory owned by `src`. `update_graph(device_view)` would only + // store a view (no ownership transfer), leaving `out` with a dangling pointer once `src` + // is destroyed. Copy device→host→device so that `out` owns its graph memory. + auto graph_host = + raft::make_host_matrix(src.graph().extent(0), src.graph().extent(1)); + raft::copy(graph_host.data_handle(), + src.graph().data_handle(), + src.graph().size(), + raft::resource::get_cuda_stream(res)); + raft::resource::sync_stream(res); + out.update_graph(res, raft::make_const_mdspan(graph_host.view())); // host overload: copies H→D + } + return out; +} + +/** + * @brief Convert a host index to device and attach a device dataset in one step. + * + * Equivalent to `convert_host_to_device_index(res, host_idx)` followed by + * `device_idx.update_dataset(res, device_dataset)`. + * + * @tparam T element type + * @tparam IdxT index type + * @tparam HostViewT host-resident dataset view type + * @tparam DeviceViewT device-resident dataset view of the same kind + * @param[in] res RAFT resources + * @param[in] host_idx host index returned by `build(res, params, host_view)` + * @param[in] device_dataset device dataset view to attach (caller owns underlying memory) + * @return device index with graph and dataset ready for search + */ +template + requires cuvs::neighbors::compatible_host_device_dataset_views_v +auto attach_device_dataset_on_host_index(raft::resources const& res, + index const& host_idx, + DeviceViewT const& device_dataset) + -> index +{ + auto device_idx = convert_host_to_device_index(res, host_idx); + device_idx.update_dataset(res, device_dataset); + return device_idx; +} + } // namespace cagra } // namespace neighbors } // namespace CUVS_EXPORT cuvs diff --git a/cpp/include/cuvs/neighbors/cagra_dataset_view_dispatch.hpp b/cpp/include/cuvs/neighbors/cagra_dataset_view_dispatch.hpp new file mode 100644 index 0000000000..727075618f --- /dev/null +++ b/cpp/include/cuvs/neighbors/cagra_dataset_view_dispatch.hpp @@ -0,0 +1,97 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +/** + * @file cagra_dataset_view_dispatch.hpp + * @brief Template helpers for concrete CAGRA dataset views (no variant dispatch). + */ + +#include +#include +#include +#include + +namespace cuvs::neighbors::cagra { + +template +void expect_cagra_row_width_for_graph(uint32_t logical_dim, int64_t pitch) +{ + static constexpr uint32_t k_default_row_align_bytes = 16; + const uint32_t need = + cuvs::neighbors::cagra_required_row_width(logical_dim, k_default_row_align_bytes); + RAFT_EXPECTS( + pitch == static_cast(need), + "convert_dataset_view_to_padded_for_graph_build: row width in elements (pitch) must match " + "CAGRA's required width for this element type and logical dimension (expected %u, got %ld; " + "logical dim %u). Use make_device_padded_dataset_view() or make_device_padded_dataset() with " + "the same " + "default alignment as CAGRA graph build.", + static_cast(need), + static_cast(pitch), + static_cast(logical_dim)); +} + +template + requires is_padded_dataset_view_v> +auto convert_dataset_view_to_padded_for_graph_build(device_padded_dataset_view const& view) + -> device_padded_dataset_view +{ + expect_cagra_row_width_for_graph(view.dim(), static_cast(view.stride())); + return view; +} + +template + requires is_empty_dataset_view_v> +auto convert_dataset_view_to_padded_for_graph_build(device_empty_dataset_view const&) + -> device_padded_dataset_view +{ + RAFT_FAIL("cagra::build: empty dataset."); +} + +template + requires is_vpq_dataset_view_v> +auto convert_dataset_view_to_padded_for_graph_build(device_vpq_dataset_view const&) + -> device_padded_dataset_view +{ + RAFT_FAIL( + "cagra::build: VPQ-compressed dataset cannot be converted to padded dense rows for graph " + "construction."); +} + +template +auto dataset_view_to_strided_device_matrix(device_padded_dataset_view const& view) + -> raft::device_matrix_view +{ + return raft::make_device_strided_matrix_view( + view.view().data_handle(), view.n_rows(), view.dim(), view.stride()); +} + +template +auto dataset_view_to_strided_device_matrix(device_vpq_dataset_view const& view) + -> raft::device_matrix_view +{ + auto d = view.dim(); + return raft::make_device_strided_matrix_view(nullptr, 0, d, d); +} + +template +auto dataset_view_to_strided_device_matrix(device_vpq_dataset_view const& view) + -> raft::device_matrix_view +{ + auto d = view.dim(); + return raft::make_device_strided_matrix_view(nullptr, 0, d, d); +} + +template +auto dataset_view_to_strided_device_matrix(device_empty_dataset_view const& view) + -> raft::device_matrix_view +{ + auto d = view.dim(); + return raft::make_device_strided_matrix_view(nullptr, 0, d, d); +} + +} // namespace cuvs::neighbors::cagra diff --git a/cpp/include/cuvs/neighbors/common.hpp b/cpp/include/cuvs/neighbors/common.hpp index 2fd804f115..6da9b2de6b 100644 --- a/cpp/include/cuvs/neighbors/common.hpp +++ b/cpp/include/cuvs/neighbors/common.hpp @@ -11,10 +11,11 @@ #include #include #include +#include #include #include #include -#include // get_device_for_address +#include // get_device_for_address, copy_matrix #include // rounding up #include @@ -22,12 +23,14 @@ #include #include +#include + +#include #include #include #include #include #include - #ifdef __cpp_lib_bitops #include #endif @@ -135,323 +138,348 @@ enum class MergeStrategy { /** @} */ // end group neighbors_index -/** Two-dimensional dataset; maybe owning, maybe compressed, maybe strided. */ -template +/** + * @brief Tags selecting dataset representation for `dataset` / `dataset_view`. + * + * The first template parameter `containertype` on `dataset` / `dataset_view` is one of these types. + */ +struct host_empty_dataset_container {}; +struct device_empty_dataset_container {}; +struct host_padded_dataset_container {}; +struct device_padded_dataset_container {}; +struct host_vpq_dataset_container {}; +struct device_vpq_dataset_container {}; + +template struct dataset { + static_assert(!std::is_same_v, + "dataset: unsupported containertype / type-parameter combination"); +}; + +template +struct dataset_view { + static_assert(!std::is_same_v, + "dataset_view: unsupported containertype / type-parameter combination"); +}; + +// ----------------------------------------------------------------------------- +// empty +// ----------------------------------------------------------------------------- + +template +struct dataset { using index_type = IdxT; - /** Size of the dataset. */ - [[nodiscard]] virtual auto n_rows() const noexcept -> index_type = 0; - /** Dimensionality of the dataset. */ - [[nodiscard]] virtual auto dim() const noexcept -> uint32_t = 0; - /** Whether the object owns the data. */ - [[nodiscard]] virtual auto is_owning() const noexcept -> bool = 0; - virtual ~dataset() noexcept = default; + uint32_t suggested_dim{}; + explicit dataset(uint32_t dim) noexcept : suggested_dim(dim) {} + [[nodiscard]] auto n_rows() const noexcept -> index_type { return 0; } + [[nodiscard]] auto dim() const noexcept -> uint32_t { return suggested_dim; } + [[nodiscard]] auto as_dataset_view() const noexcept + -> dataset_view + { + return dataset_view{suggested_dim}; + } }; template -struct empty_dataset : public dataset { +struct dataset_view { using index_type = IdxT; - uint32_t suggested_dim; - explicit empty_dataset(uint32_t dim) noexcept : suggested_dim(dim) {} - [[nodiscard]] auto n_rows() const noexcept -> index_type final { return 0; } - [[nodiscard]] auto dim() const noexcept -> uint32_t final { return suggested_dim; } - [[nodiscard]] auto is_owning() const noexcept -> bool final { return true; } + uint32_t suggested_dim_{}; + explicit dataset_view(uint32_t dim) noexcept : suggested_dim_(dim) {} + [[nodiscard]] auto n_rows() const noexcept -> index_type { return 0; } + [[nodiscard]] auto dim() const noexcept -> uint32_t { return suggested_dim_; } }; -template -struct strided_dataset : public dataset { +template +struct dataset { using index_type = IdxT; - using value_type = DataT; - using view_type = raft::device_matrix_view; - [[nodiscard]] auto n_rows() const noexcept -> index_type final { return view().extent(0); } - [[nodiscard]] auto dim() const noexcept -> uint32_t final + uint32_t suggested_dim{}; + explicit dataset(uint32_t dim) noexcept : suggested_dim(dim) {} + [[nodiscard]] auto n_rows() const noexcept -> index_type { return 0; } + [[nodiscard]] auto dim() const noexcept -> uint32_t { return suggested_dim; } + [[nodiscard]] auto as_dataset_view() const noexcept + -> dataset_view + { + return dataset_view{suggested_dim}; + } +}; + +template +struct dataset_view { + using index_type = IdxT; + uint32_t suggested_dim_{}; + explicit dataset_view(uint32_t dim) noexcept : suggested_dim_(dim) {} + [[nodiscard]] auto n_rows() const noexcept -> index_type { return 0; } + [[nodiscard]] auto dim() const noexcept -> uint32_t { return suggested_dim_; } +}; + +// ----------------------------------------------------------------------------- +// padded (device row-major with logical dim vs stride) +// ----------------------------------------------------------------------------- + +template +struct dataset { + using index_type = IdxT; + using value_type = DataT; + using storage_type = raft::device_matrix; + using view_type = raft::device_matrix_view; + + storage_type data_; + uint32_t dim_; + + dataset(storage_type&& data, uint32_t logical_dim) noexcept + : data_{std::move(data)}, dim_{logical_dim} + { + } + + [[nodiscard]] auto n_rows() const noexcept -> index_type { return data_.extent(0); } + [[nodiscard]] auto dim() const noexcept -> uint32_t { return dim_; } + [[nodiscard]] auto stride() const noexcept -> uint32_t { - return static_cast(view().extent(1)); + return static_cast(data_.extent(1)); } - /** Leading dimension of the dataset. */ - [[nodiscard]] constexpr auto stride() const noexcept -> uint32_t + [[nodiscard]] auto view() const noexcept -> view_type { return data_.view(); } + [[nodiscard]] auto as_dataset_view() const noexcept + -> dataset_view { - auto v = view(); - return static_cast(v.stride(0) > 0 ? v.stride(0) : v.extent(1)); + return dataset_view(data_.view(), dim_); + } + [[nodiscard]] auto data_handle() noexcept -> value_type* { return data_.data_handle(); } + [[nodiscard]] auto data_handle() const noexcept -> const value_type* + { + return data_.data_handle(); } - /** Get the view of the data. */ - [[nodiscard]] virtual auto view() const noexcept -> view_type = 0; }; template -struct non_owning_dataset : public strided_dataset { +struct dataset_view { using index_type = IdxT; using value_type = DataT; - using typename strided_dataset::view_type; - view_type data; - explicit non_owning_dataset(view_type v) noexcept : data(v) {} - [[nodiscard]] auto is_owning() const noexcept -> bool final { return false; } - [[nodiscard]] auto view() const noexcept -> view_type final { return data; }; -}; + using view_type = raft::device_matrix_view; -template -struct owning_dataset : public strided_dataset { - using index_type = IdxT; - using value_type = DataT; - using typename strided_dataset::view_type; - using storage_type = - raft::mdarray, LayoutPolicy, ContainerPolicy>; - using mapping_type = typename view_type::mapping_type; - storage_type data; - mapping_type view_mapping; - owning_dataset(storage_type&& store, mapping_type view_mapping) noexcept - : data{std::move(store)}, view_mapping{view_mapping} + view_type data_; + uint32_t logical_dim_; + + explicit dataset_view(view_type v) noexcept + : data_(v), logical_dim_(static_cast(v.extent(1))) { } - [[nodiscard]] auto is_owning() const noexcept -> bool final { return true; } - [[nodiscard]] auto view() const noexcept -> view_type final + dataset_view(view_type v, uint32_t logical_dim) noexcept : data_(v), logical_dim_(logical_dim) {} + + dataset_view(dataset_view const& other) noexcept + : data_(other.data_), logical_dim_(other.logical_dim_) { - return view_type{data.data_handle(), view_mapping}; - }; -}; + } -template -struct is_strided_dataset : std::false_type {}; + [[nodiscard]] auto n_rows() const noexcept -> index_type { return data_.extent(0); } + [[nodiscard]] auto dim() const noexcept -> uint32_t { return logical_dim_; } + [[nodiscard]] auto stride() const noexcept -> uint32_t + { + return static_cast(data_.stride(0) > 0 ? data_.stride(0) : data_.extent(1)); + } + [[nodiscard]] auto view() const noexcept -> view_type { return data_; } +}; -template -struct is_strided_dataset> : std::true_type {}; +// ----------------------------------------------------------------------------- +// padded (host row-major with logical dim vs stride) +// ----------------------------------------------------------------------------- template -struct is_strided_dataset> : std::true_type {}; +struct dataset { + using index_type = IdxT; + using value_type = DataT; + using storage_type = raft::host_matrix; + using view_type = raft::host_matrix_view; -template -struct is_strided_dataset> - : std::true_type {}; + storage_type data_; + uint32_t dim_; -template -inline constexpr bool is_strided_dataset_v = is_strided_dataset::value; + dataset(storage_type&& data, uint32_t logical_dim) noexcept + : data_{std::move(data)}, dim_{logical_dim} + { + } -/** - * @brief Construct a strided matrix from any mdarray or mdspan. - * - * This function constructs a non-owning view if the input satisfied two conditions: - * - * 1) The data is accessible from the current device - * 2) The memory layout is the same as expected (row-major matrix with the required stride) - * - * Otherwise, this function constructs an owning device matrix and copies the data. - * When the data is copied, padding elements are filled with zeroes. - * - * @tparam SrcT the source mdarray or mdspan - * - * @param[in] res raft resources handle - * @param[in] src the source mdarray or mdspan - * @param[in] required_stride the leading dimension (in elements) - * @return maybe owning current-device-accessible strided matrix - */ -template -auto make_strided_dataset(const raft::resources& res, const SrcT& src, uint32_t required_stride) - -> std::unique_ptr> -{ - using extents_type = typename SrcT::extents_type; - using value_type = typename SrcT::value_type; - using index_type = typename SrcT::index_type; - using layout_type = typename SrcT::layout_type; - static_assert(extents_type::rank() == 2, "The input must be a matrix."); - static_assert(std::is_same_v || - std::is_same_v> || - std::is_same_v, - "The input must be row-major"); - RAFT_EXPECTS(src.extent(1) <= required_stride, - "The input row length must be not larger than the desired stride."); - cudaPointerAttributes ptr_attrs; - RAFT_CUDA_TRY(cudaPointerGetAttributes(&ptr_attrs, src.data_handle())); - auto* device_ptr = reinterpret_cast(ptr_attrs.devicePointer); - const uint32_t src_stride = src.stride(0) > 0 ? src.stride(0) : src.extent(1); - const bool device_accessible = device_ptr != nullptr; - const bool row_major = src.stride(1) <= 1; - const bool stride_matches = required_stride == src_stride; - - if (device_accessible && row_major && stride_matches) { - // Everything matches: make a non-owning dataset - return std::make_unique>( - raft::make_device_strided_matrix_view( - device_ptr, src.extent(0), src.extent(1), required_stride)); + [[nodiscard]] auto n_rows() const noexcept -> index_type { return data_.extent(0); } + [[nodiscard]] auto dim() const noexcept -> uint32_t { return dim_; } + [[nodiscard]] auto stride() const noexcept -> uint32_t + { + return static_cast(data_.extent(1)); } - // Something is wrong: have to make a copy and produce an owning dataset - auto out_layout = - raft::make_strided_layout(src.extents(), cuda::std::array{required_stride, 1}); - auto out_array = - raft::make_device_matrix(res, src.extent(0), required_stride); + [[nodiscard]] auto view() const noexcept -> view_type { return data_.view(); } + [[nodiscard]] auto as_dataset_view() const noexcept + -> dataset_view + { + return dataset_view(data_.view(), dim_); + } + [[nodiscard]] auto data_handle() noexcept -> value_type* { return data_.data_handle(); } + [[nodiscard]] auto data_handle() const noexcept -> const value_type* + { + return data_.data_handle(); + } +}; - using out_mdarray_type = decltype(out_array); - using out_layout_type = typename out_mdarray_type::layout_type; - using out_container_policy_type = typename out_mdarray_type::container_policy_type; - using out_owning_type = - owning_dataset; +template +struct dataset_view { + using index_type = IdxT; + using value_type = DataT; + using view_type = raft::host_matrix_view; - RAFT_CUDA_TRY(cudaMemsetAsync(out_array.data_handle(), - 0, - out_array.size() * sizeof(value_type), - raft::resource::get_cuda_stream(res))); - raft::copy_matrix(out_array.data_handle(), - required_stride, - src.data_handle(), - src_stride, - src.extent(1), - src.extent(0), - raft::resource::get_cuda_stream(res)); + view_type data_; + uint32_t logical_dim_; - return std::make_unique(std::move(out_array), out_layout); -} + explicit dataset_view(view_type v) noexcept + : data_(v), logical_dim_(static_cast(v.extent(1))) + { + } -/** - * @brief Construct a strided matrix from any mdarray. - * - * This function constructs an owning device matrix and copies the data. - * When the data is copied, padding elements are filled with zeroes. - * - * @tparam DataT - * @tparam IdxT - * @tparam LayoutPolicy - * @tparam ContainerPolicy - * - * @param[in] res raft resources handle - * @param[in] src the source mdarray or mdspan - * @param[in] required_stride the leading dimension (in elements) - * @return owning current-device-accessible strided matrix - */ -template -auto make_strided_dataset( - const raft::resources& res, - raft::mdarray, LayoutPolicy, ContainerPolicy>&& src, - uint32_t required_stride) -> std::unique_ptr> -{ - using value_type = DataT; - using index_type = IdxT; - using layout_type = LayoutPolicy; - using container_policy_type = ContainerPolicy; - static_assert(std::is_same_v || - std::is_same_v> || - std::is_same_v, - "The input must be row-major"); - RAFT_EXPECTS(src.extent(1) <= required_stride, - "The input row length must be not larger than the desired stride."); - const uint32_t src_stride = src.stride(0) > 0 ? src.stride(0) : src.extent(1); - const bool stride_matches = required_stride == src_stride; - - auto out_layout = - raft::make_strided_layout(src.extents(), cuda::std::array{required_stride, 1}); - - using out_mdarray_type = raft::device_matrix; - using out_layout_type = typename out_mdarray_type::layout_type; - using out_container_policy_type = typename out_mdarray_type::container_policy_type; - using out_owning_type = - owning_dataset; - - if constexpr (std::is_same_v && - std::is_same_v) { - if (stride_matches) { - // Everything matches, we can own the mdarray - return std::make_unique(std::move(src), out_layout); - } + dataset_view(view_type v, uint32_t logical_dim) noexcept : data_(v), logical_dim_(logical_dim) {} + + dataset_view(dataset_view const& other) noexcept + : data_(other.data_), logical_dim_(other.logical_dim_) + { } - // Something is wrong: have to make a copy and produce an owning dataset - auto out_array = - raft::make_device_matrix(res, src.extent(0), required_stride); - RAFT_CUDA_TRY(cudaMemsetAsync(out_array.data_handle(), - 0, - out_array.size() * sizeof(value_type), - raft::resource::get_cuda_stream(res))); - raft::copy_matrix(out_array.data_handle(), - required_stride, - src.data_handle(), - src_stride, - src.extent(1), - src.extent(0), - raft::resource::get_cuda_stream(res)); + [[nodiscard]] auto n_rows() const noexcept -> index_type { return data_.extent(0); } + [[nodiscard]] auto dim() const noexcept -> uint32_t { return logical_dim_; } + [[nodiscard]] auto stride() const noexcept -> uint32_t + { + return static_cast(data_.stride(0) > 0 ? data_.stride(0) : data_.extent(1)); + } + [[nodiscard]] auto view() const noexcept -> view_type { return data_; } +}; - return std::make_unique(std::move(out_array), out_layout); -} +// ----------------------------------------------------------------------------- +// VPQ compressed owning dataset (+ non-owning view below) +// ----------------------------------------------------------------------------- -/** - * @brief Construct a strided matrix from any mdarray or mdspan. - * - * A variant `make_strided_dataset` that allows specifying the byte alignment instead of the - * explicit stride length. - * - * @tparam SrcT the source mdarray or mdspan - * - * @param[in] res raft resources handle - * @param[in] src the source mdarray or mdspan - * @param[in] align_bytes the required byte alignment for the dataset rows. - * @return maybe owning current-device-accessible strided matrix - */ -template -auto make_aligned_dataset(const raft::resources& res, SrcT src, uint32_t align_bytes = 16) - -> std::unique_ptr> -{ - using source_type = std::remove_cv_t>; - using value_type = typename source_type::value_type; - constexpr size_t kSize = sizeof(value_type); - uint32_t required_stride = - raft::round_up_safe(src.extent(1) * kSize, std::lcm(align_bytes, kSize)) / kSize; - return make_strided_dataset(res, std::forward(src), required_stride); -} -/** - * @brief VPQ compressed dataset. - * - * The dataset is compressed using two level quantization - * - * 1. Vector Quantization - * 2. Product Quantization of residuals - * - * @tparam MathT the type of elements in the codebooks - * @tparam IdxT type of the vector indices (represent dataset.extent(0)) - * - */ -template -struct vpq_dataset : public dataset { +template +struct dataset_view; + +template +struct dataset_view; + +template +[[nodiscard]] auto vpq_dataset_as_view_impl(dataset const* self) + -> dataset_view; + +template +struct dataset { using index_type = IdxT; - using math_type = MathT; - /** Vector Quantization codebook - "coarse cluster centers". */ + /** Same as `DataT`: floating-point type used for VQ/PQ codebooks (rows are still uint8 codes). */ + using math_type = DataT; raft::device_matrix vq_code_book; - /** Product Quantization codebook - "fine cluster centers". */ raft::device_matrix pq_code_book; - /** Compressed dataset. */ raft::device_matrix data; - vpq_dataset(raft::device_matrix&& vq_code_book, - raft::device_matrix&& pq_code_book, - raft::device_matrix&& data) + dataset(raft::device_matrix&& vq_code_book, + raft::device_matrix&& pq_code_book, + raft::device_matrix&& data) + : vq_code_book{std::move(vq_code_book)}, + pq_code_book{std::move(pq_code_book)}, + data{std::move(data)} + { + } + + [[nodiscard]] auto n_rows() const noexcept -> index_type { return data.extent(0); } + [[nodiscard]] auto dim() const noexcept -> uint32_t { return vq_code_book.extent(1); } + + [[nodiscard]] constexpr inline auto encoded_row_length() const noexcept -> uint32_t + { + return data.extent(1); + } + [[nodiscard]] constexpr inline auto vq_n_centers() const noexcept -> uint32_t + { + return vq_code_book.extent(0); + } + [[nodiscard]] constexpr inline auto pq_bits() const noexcept -> uint32_t + { + auto pq_width = pq_n_centers(); +#ifdef __cpp_lib_bitops + return std::countr_zero(pq_width); +#else + uint32_t pq_bits = 0; + while (pq_width > 1) { + pq_bits++; + pq_width >>= 1; + } + return pq_bits; +#endif + } + [[nodiscard]] constexpr inline auto pq_dim() const noexcept -> uint32_t + { + return raft::div_rounding_up_unsafe(dim(), pq_len()); + } + [[nodiscard]] constexpr inline auto pq_len() const noexcept -> uint32_t + { + return pq_code_book.extent(1); + } + [[nodiscard]] constexpr inline auto pq_n_centers() const noexcept -> uint32_t + { + return pq_code_book.extent(0); + } + + [[nodiscard]] auto as_dataset_view() const + -> dataset_view + { + return vpq_dataset_as_view_impl(this); + } +}; + +template +struct dataset_view { + using index_type = IdxT; + using target_type = dataset; + + target_type const* target_{}; + + dataset_view() = default; + + explicit dataset_view(target_type const* ptr) : target_(ptr) + { + RAFT_EXPECTS(ptr != nullptr, "device_vpq_dataset_view: null target"); + } + + [[nodiscard]] auto n_rows() const noexcept -> index_type + { + return target_ != nullptr ? target_->n_rows() : index_type{0}; + } + [[nodiscard]] auto dim() const noexcept -> uint32_t + { + return target_ != nullptr ? target_->dim() : uint32_t{0}; + } + [[nodiscard]] target_type const& dset() const noexcept { return *target_; } +}; + +template +struct dataset { + using index_type = IdxT; + using math_type = DataT; + raft::host_matrix vq_code_book; + raft::host_matrix pq_code_book; + raft::host_matrix data; + + dataset(raft::host_matrix&& vq_code_book, + raft::host_matrix&& pq_code_book, + raft::host_matrix&& data) : vq_code_book{std::move(vq_code_book)}, pq_code_book{std::move(pq_code_book)}, data{std::move(data)} { } - [[nodiscard]] auto n_rows() const noexcept -> index_type final { return data.extent(0); } - [[nodiscard]] auto dim() const noexcept -> uint32_t final { return vq_code_book.extent(1); } - [[nodiscard]] auto is_owning() const noexcept -> bool final { return true; } + [[nodiscard]] auto n_rows() const noexcept -> index_type { return data.extent(0); } + [[nodiscard]] auto dim() const noexcept -> uint32_t { return vq_code_book.extent(1); } - /** Row length of the encoded data in bytes. */ [[nodiscard]] constexpr inline auto encoded_row_length() const noexcept -> uint32_t { return data.extent(1); } - /** The number of "coarse cluster centers" */ [[nodiscard]] constexpr inline auto vq_n_centers() const noexcept -> uint32_t { return vq_code_book.extent(0); } - /** The bit length of an encoded vector element after compression by PQ. */ [[nodiscard]] constexpr inline auto pq_bits() const noexcept -> uint32_t { - /* - NOTE: pq_bits and the book size - - Normally, we'd store `pq_bits` as a part of the index. - However, we know there's an invariant `pq_n_centers = 1 << pq_bits`, i.e. the codebook size is - the same as the number of possible code values. Hence, we don't store the pq_bits and derive it - from the array dimensions instead. - */ auto pq_width = pq_n_centers(); #ifdef __cpp_lib_bitops return std::countr_zero(pq_width); @@ -464,32 +492,329 @@ struct vpq_dataset : public dataset { return pq_bits; #endif } - /** The dimensionality of an encoded vector after compression by PQ. */ [[nodiscard]] constexpr inline auto pq_dim() const noexcept -> uint32_t { return raft::div_rounding_up_unsafe(dim(), pq_len()); } - /** Dimensionality of a subspaces, i.e. the number of vector components mapped to a subspace */ [[nodiscard]] constexpr inline auto pq_len() const noexcept -> uint32_t { return pq_code_book.extent(1); } - /** The number of vectors in a PQ codebook (`1 << pq_bits`). */ [[nodiscard]] constexpr inline auto pq_n_centers() const noexcept -> uint32_t { return pq_code_book.extent(0); } + + [[nodiscard]] auto as_dataset_view() const + -> dataset_view + { + return vpq_dataset_as_view_impl(this); + } +}; + +template +struct dataset_view { + using index_type = IdxT; + using target_type = dataset; + + target_type const* target_{}; + + dataset_view() = default; + + explicit dataset_view(target_type const* ptr) : target_(ptr) + { + RAFT_EXPECTS(ptr != nullptr, "host_vpq_dataset_view: null target"); + } + + [[nodiscard]] auto n_rows() const noexcept -> index_type + { + return target_ != nullptr ? target_->n_rows() : index_type{0}; + } + [[nodiscard]] auto dim() const noexcept -> uint32_t + { + return target_ != nullptr ? target_->dim() : uint32_t{0}; + } + [[nodiscard]] target_type const& dset() const noexcept { return *target_; } +}; + +template +[[nodiscard]] inline auto vpq_dataset_as_view_impl(dataset const* self) + -> dataset_view +{ + return dataset_view(self); +} + +/** + * @brief Aliases for concrete `dataset` / `dataset_view` layouts. + */ +template +using device_empty_dataset = dataset; + +template +using device_empty_dataset_view = dataset_view; + +template +using host_empty_dataset = dataset; + +template +using host_empty_dataset_view = dataset_view; + +template +using device_padded_dataset = dataset; + +template +using device_padded_dataset_view = dataset_view; + +template +using host_padded_dataset = dataset; + +template +using host_padded_dataset_view = dataset_view; + +template +using device_vpq_dataset = dataset; + +template +using device_vpq_dataset_view = dataset_view; + +template +using host_vpq_dataset = dataset; + +template +using host_vpq_dataset_view = dataset_view; + +// Maps a dataset view type to its owning (allocating) dataset counterpart. +// Used by serialize/deserialize to type the out_dataset output parameter; +// adding a new dataset type only requires adding a new specialization here. +template +struct owning_dataset_for_view; + +template +struct owning_dataset_for_view> { + using type = device_padded_dataset; +}; + +template +struct owning_dataset_for_view> { + using type = device_vpq_dataset; }; +template +using owning_dataset_for_view_t = typename owning_dataset_for_view::type; + +template +struct is_padded_dataset : std::false_type {}; + +template +struct is_padded_dataset> : std::true_type {}; + +template +struct is_padded_dataset> : std::true_type {}; + +template +struct is_padded_dataset> : std::true_type {}; + +template +struct is_padded_dataset> : std::true_type {}; + +template +inline constexpr bool is_padded_dataset_v = is_padded_dataset::value; + template struct is_vpq_dataset : std::false_type {}; -template -struct is_vpq_dataset> : std::true_type {}; +template +struct is_vpq_dataset> : std::true_type {}; + +template +struct is_vpq_dataset> : std::true_type {}; template inline constexpr bool is_vpq_dataset_v = is_vpq_dataset::value; +// ----------------------------------------------------------------------------- +// CAGRA row width in elements (same for make_device_padded_dataset* and index layout checks). +// ----------------------------------------------------------------------------- + +/** + * @brief Required row width in elements for CAGRA: minimum leading dimension (LDA) per row for the + * default per-row byte alignment (16 bytes, combined with `sizeof` element type), given + * `logical_columns` feature columns. + */ +[[nodiscard]] inline uint32_t cagra_required_row_width(uint32_t logical_columns, + std::size_t sizeof_value, + uint32_t align_bytes = 16) +{ + return static_cast( + raft::round_up_safe(static_cast(logical_columns) * sizeof_value, + std::lcm(align_bytes, static_cast(sizeof_value))) / + sizeof_value); +} + +template +[[nodiscard]] inline uint32_t cagra_required_row_width(uint32_t logical_columns, + uint32_t align_bytes = 16) +{ + return cagra_required_row_width(logical_columns, sizeof(ValueT), align_bytes); +} + +/** Actual row width in elements (leading dimension) of a 2D row-major matrix view. */ +template +[[nodiscard]] inline uint32_t matrix_actual_row_width(raft::device_matrix_view m) +{ + return m.stride(0) > 0 ? static_cast(m.stride(0)) : static_cast(m.extent(1)); +} + +template +[[nodiscard]] inline uint32_t matrix_actual_row_width(raft::host_matrix_view m) +{ + return m.stride(0) > 0 ? static_cast(m.stride(0)) : static_cast(m.extent(1)); +} + +/** + * @brief True if the matrix's row width in elements matches `cagra_required_row_width` for + * `m.extent(1)` and element type `T` (CAGRA row layout is satisfied for this view). + */ +template +[[nodiscard]] inline bool matrix_row_width_matches_cagra_required( + raft::device_matrix_view m, uint32_t align_bytes = 16) +{ + using value_type = std::remove_const_t; + const uint32_t need = + cagra_required_row_width(static_cast(m.extent(1)), align_bytes); + return matrix_actual_row_width(m) == need; +} + +template +[[nodiscard]] inline bool matrix_row_width_matches_cagra_required(raft::host_matrix_view m, + uint32_t align_bytes = 16) +{ + using value_type = std::remove_const_t; + const uint32_t need = + cagra_required_row_width(static_cast(m.extent(1)), align_bytes); + return matrix_actual_row_width(m) == need; +} + +template +auto make_device_padded_dataset_view(const raft::resources& res, + SrcT const& src, + uint32_t align_bytes = 16) + -> device_padded_dataset_view +{ + using value_type = typename SrcT::value_type; + using index_type = typename SrcT::index_type; + uint32_t required_stride = + cagra_required_row_width(static_cast(src.extent(1)), align_bytes); + uint32_t src_stride = src.stride(0) > 0 ? static_cast(src.stride(0)) : src.extent(1); + cudaPointerAttributes ptr_attrs; + RAFT_CUDA_TRY(cudaPointerGetAttributes(&ptr_attrs, src.data_handle())); + auto* device_ptr = reinterpret_cast(ptr_attrs.devicePointer); + RAFT_EXPECTS(device_ptr != nullptr, + "make_device_padded_dataset_view: source must be device-accessible. " + "Use make_device_padded_dataset() to get an owning copy."); + RAFT_EXPECTS( + src_stride == required_stride, + "make_device_padded_dataset_view: stride is incorrect (required stride for alignment). " + "Use make_device_padded_dataset() to get an owning padded copy."); + auto v = + raft::make_device_matrix_view(device_ptr, src.extent(0), static_cast(src_stride)); + return device_padded_dataset_view(v, src.extent(1)); +} + +template +auto make_device_padded_dataset(const raft::resources& res, + SrcT const& src, + uint32_t align_bytes = 16) + -> std::unique_ptr> +{ + using value_type = typename SrcT::value_type; + using index_type = typename SrcT::index_type; + uint32_t required_stride = + cagra_required_row_width(static_cast(src.extent(1)), align_bytes); + uint32_t src_stride = src.stride(0) > 0 ? static_cast(src.stride(0)) : src.extent(1); + cudaPointerAttributes ptr_attrs; + RAFT_CUDA_TRY(cudaPointerGetAttributes(&ptr_attrs, src.data_handle())); + bool const device_src = + (ptr_attrs.type == cudaMemoryTypeDevice) || (ptr_attrs.type == cudaMemoryTypeManaged); + if (device_src && src_stride == required_stride) { + RAFT_EXPECTS(false, + "make_device_padded_dataset: source is device and stride is already correct. " + "Use make_device_padded_dataset_view() to get a view instead."); + } + RAFT_EXPECTS(src.extent(1) <= required_stride, + "Source row length must not exceed required stride."); + auto out_array = + raft::make_device_matrix(res, src.extent(0), required_stride); + RAFT_CUDA_TRY(cudaMemsetAsync(out_array.data_handle(), + 0, + out_array.size() * sizeof(value_type), + raft::resource::get_cuda_stream(res))); + raft::copy_matrix(out_array.data_handle(), + required_stride, + src.data_handle(), + src_stride, + src.extent(1), + src.extent(0), + raft::resource::get_cuda_stream(res)); + return std::make_unique>( + std::move(out_array), static_cast(src.extent(1))); +} + +template +auto make_host_padded_dataset_view(SrcT const& src, uint32_t align_bytes = 16) + -> host_padded_dataset_view +{ + using value_type = typename SrcT::value_type; + using index_type = typename SrcT::index_type; + uint32_t required_stride = + cagra_required_row_width(static_cast(src.extent(1)), align_bytes); + uint32_t src_stride = src.stride(0) > 0 ? static_cast(src.stride(0)) : src.extent(1); + RAFT_EXPECTS(raft::get_device_for_address(src.data_handle()) == -1, + "make_host_padded_dataset_view: source must be host-accessible. " + "Use make_host_padded_dataset() to get an owning copy."); + RAFT_EXPECTS( + src_stride == required_stride, + "make_host_padded_dataset_view: stride is incorrect (required stride for alignment). " + "Use make_host_padded_dataset() to get an owning padded copy."); + auto v = raft::make_host_matrix_view( + const_cast(src.data_handle()), src.extent(0), static_cast(src_stride)); + return host_padded_dataset_view(v, src.extent(1)); +} + +template +auto make_host_padded_dataset(const raft::resources& res, + SrcT const& src, + uint32_t align_bytes = 16) + -> std::unique_ptr> +{ + using value_type = typename SrcT::value_type; + using index_type = typename SrcT::index_type; + uint32_t required_stride = + cagra_required_row_width(static_cast(src.extent(1)), align_bytes); + uint32_t src_stride = src.stride(0) > 0 ? static_cast(src.stride(0)) : src.extent(1); + RAFT_EXPECTS(raft::get_device_for_address(src.data_handle()) == -1, + "make_host_padded_dataset: source must be host-accessible. " + "Use make_device_padded_dataset() for device sources."); + if (src_stride == required_stride) { + RAFT_EXPECTS(false, + "make_host_padded_dataset: source stride is already correct. " + "Use make_host_padded_dataset_view() to get a view instead."); + } + RAFT_EXPECTS(src.extent(1) <= required_stride, + "Source row length must not exceed required stride."); + auto out_array = raft::make_host_matrix(src.extent(0), required_stride); + std::memset(out_array.data_handle(), 0, out_array.size() * sizeof(value_type)); + raft::copy_matrix(out_array.data_handle(), + required_stride, + src.data_handle(), + src_stride, + src.extent(1), + src.extent(0), + raft::resource::get_cuda_stream(res)); + return std::make_unique>( + std::move(out_array), static_cast(src.extent(1))); +} + namespace filtering { /** @@ -896,11 +1221,16 @@ using namespace raft; template struct iface { - iface() : mutex_(std::make_shared()) {} + iface() : cagra_owned_dataset_(nullptr), mutex_(std::make_shared()) {} const IdxT size() const { return index_.value().size(); } std::optional index_; + /** Used by CAGRA when built from host: holds device copy so index dataset view stays valid. */ + std::optional> cagra_build_dataset_; + /** Used by CAGRA when deserializing an index that contains a dataset; keeps it alive for the + * view. */ + std::unique_ptr> cagra_owned_dataset_; std::shared_ptr mutex_; }; diff --git a/cpp/include/cuvs/neighbors/composite/index.hpp b/cpp/include/cuvs/neighbors/composite/index.hpp index d7970a5cd6..756df25e3a 100644 --- a/cpp/include/cuvs/neighbors/composite/index.hpp +++ b/cpp/include/cuvs/neighbors/composite/index.hpp @@ -48,7 +48,8 @@ class CUVS_EXPORT composite_index { using out_index_type = OutputIdxT; using matrix_index_type = int64_t; - explicit composite_index(std::vector*> children) + explicit composite_index( + std::vector*> children) : children_(std::move(children)) { } @@ -91,7 +92,7 @@ class CUVS_EXPORT composite_index { } private: - std::vector*> children_; + std::vector*> children_; }; } // namespace composite diff --git a/cpp/include/cuvs/neighbors/dataset_view_concepts.hpp b/cpp/include/cuvs/neighbors/dataset_view_concepts.hpp new file mode 100644 index 0000000000..dd0608cc73 --- /dev/null +++ b/cpp/include/cuvs/neighbors/dataset_view_concepts.hpp @@ -0,0 +1,230 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +/** + * @file dataset_view_concepts.hpp + * @brief Compile-time contracts for CAGRA (and shared) dataset view types. + * + * These replace runtime `std::variant` dispatch: each `DatasetViewT` is a concrete + * `dataset_view` specialization known at compile time. + */ + +#include + +#include +#include +#include + +namespace cuvs::neighbors { + +/** Any non-owning dataset view exposing row count and logical dimension. */ +template +concept cagra_dataset_view = requires(V const& v) { + { v.n_rows() } -> std::convertible_to; + { v.dim() } -> std::convertible_to; +}; + +enum class dataset_view_kind { + // TODO(removal): Remove `unknown` once all deprecated host_matrix_view / device_matrix_view / + // mdspan overloads are deleted. It exists solely so that overload resolution on the deprecated + // build(host_matrix_view) / build(device_matrix_view) shims does not cause a hard error when + // the compiler evaluates is_host/device_dataset_view_v for a plain mdspan type. + unknown, + device_empty, + host_empty, + device_padded, + host_padded, + device_vpq_f16, + host_vpq_f16, + device_vpq_f32, + host_vpq_f32, +}; + +/** Primary template returns `unknown` so traits safely return `false` for non-dataset-view types. + */ +template +struct dataset_view_kind_of { + static constexpr dataset_view_kind value = dataset_view_kind::unknown; +}; + +template +struct dataset_view_kind_of> { + static constexpr dataset_view_kind value = dataset_view_kind::device_empty; +}; + +template +struct dataset_view_kind_of> { + static constexpr dataset_view_kind value = dataset_view_kind::host_empty; +}; + +template +struct dataset_view_kind_of> { + static constexpr dataset_view_kind value = dataset_view_kind::device_padded; +}; + +template +struct dataset_view_kind_of> { + static constexpr dataset_view_kind value = dataset_view_kind::host_padded; +}; + +template +struct dataset_view_kind_of> { + static constexpr dataset_view_kind value = dataset_view_kind::device_vpq_f16; +}; + +template +struct dataset_view_kind_of> { + static constexpr dataset_view_kind value = dataset_view_kind::device_vpq_f32; +}; + +template +struct dataset_view_kind_of> { + static constexpr dataset_view_kind value = dataset_view_kind::host_vpq_f16; +}; + +template +struct dataset_view_kind_of> { + static constexpr dataset_view_kind value = dataset_view_kind::host_vpq_f32; +}; + +template +using dataset_view_type_t = std::remove_cvref_t; + +template +inline constexpr dataset_view_kind dataset_view_kind_v = + dataset_view_kind_of>::value; + +template +inline constexpr bool is_device_empty_dataset_view_v = + dataset_view_kind_v == dataset_view_kind::device_empty; + +template +inline constexpr bool is_host_empty_dataset_view_v = + dataset_view_kind_v == dataset_view_kind::host_empty; + +/** True for any empty dataset view (device or host). */ +template +inline constexpr bool is_empty_dataset_view_v = + is_device_empty_dataset_view_v || is_host_empty_dataset_view_v; + +template +inline constexpr bool is_device_padded_dataset_view_v = + dataset_view_kind_v == dataset_view_kind::device_padded; + +template +inline constexpr bool is_host_padded_dataset_view_v = + dataset_view_kind_v == dataset_view_kind::host_padded; + +/** True for either `device_padded_dataset_view` or `host_padded_dataset_view`. */ +template +inline constexpr bool is_padded_dataset_view_v = + is_device_padded_dataset_view_v || is_host_padded_dataset_view_v; + +template +inline constexpr bool is_device_vpq_f16_dataset_view_v = + dataset_view_kind_v == dataset_view_kind::device_vpq_f16; + +template +inline constexpr bool is_host_vpq_f16_dataset_view_v = + dataset_view_kind_v == dataset_view_kind::host_vpq_f16; + +template +inline constexpr bool is_vpq_f16_dataset_view_v = + is_device_vpq_f16_dataset_view_v || is_host_vpq_f16_dataset_view_v; + +template +inline constexpr bool is_device_vpq_f32_dataset_view_v = + dataset_view_kind_v == dataset_view_kind::device_vpq_f32; + +template +inline constexpr bool is_host_vpq_f32_dataset_view_v = + dataset_view_kind_v == dataset_view_kind::host_vpq_f32; + +template +inline constexpr bool is_vpq_f32_dataset_view_v = + is_device_vpq_f32_dataset_view_v || is_host_vpq_f32_dataset_view_v; + +template +inline constexpr bool is_device_vpq_dataset_view_v = + is_device_vpq_f16_dataset_view_v || is_device_vpq_f32_dataset_view_v; + +template +inline constexpr bool is_host_vpq_dataset_view_v = + is_host_vpq_f16_dataset_view_v || is_host_vpq_f32_dataset_view_v; + +template +inline constexpr bool is_vpq_dataset_view_v = + is_device_vpq_dataset_view_v || is_host_vpq_dataset_view_v; + +/** True for any device-resident dataset view. */ +template +inline constexpr bool is_device_dataset_view_v = + is_device_empty_dataset_view_v || is_device_padded_dataset_view_v || + is_device_vpq_dataset_view_v; + +/** True for any host-resident dataset view. */ +template +inline constexpr bool is_host_dataset_view_v = + is_host_empty_dataset_view_v || is_host_padded_dataset_view_v || + is_host_vpq_dataset_view_v; + +/** + * True when a host view `H` and device view `D` represent the same storage kind and differ + * only in residency (host vs. device). Used to constrain `attach_device_dataset_on_host_index`. + */ +template +inline constexpr bool compatible_host_device_dataset_views_v = + (is_host_padded_dataset_view_v && is_device_padded_dataset_view_v) || + (is_host_vpq_f16_dataset_view_v && is_device_vpq_f16_dataset_view_v) || + (is_host_vpq_f32_dataset_view_v && is_device_vpq_f32_dataset_view_v) || + (is_host_empty_dataset_view_v && is_device_empty_dataset_view_v); + +/** Maps a host dataset view type to its device-resident counterpart. */ +template +struct device_counterpart; + +template +struct device_counterpart> { + using type = device_padded_dataset_view; +}; + +template +struct device_counterpart> { + using type = device_vpq_dataset_view; +}; + +template +struct device_counterpart> { + using type = device_empty_dataset_view; +}; + +template +using device_counterpart_t = typename device_counterpart>::type; + +/** Element type `T` for `cagra::build(res, params, dataset_view)` (deduced, not a template arg). */ +template +struct cagra_view_element_type; + +template +struct cagra_view_element_type> { + using type = DataT; +}; + +template +struct cagra_view_element_type> { + using type = DataT; +}; + +template +struct cagra_view_element_type> { + using type = MathT; +}; + +template +using cagra_view_element_type_t = typename cagra_view_element_type>::type; + +} // namespace cuvs::neighbors diff --git a/cpp/include/cuvs/neighbors/hnsw.hpp b/cpp/include/cuvs/neighbors/hnsw.hpp index fb726fed71..2b638c5f5e 100644 --- a/cpp/include/cuvs/neighbors/hnsw.hpp +++ b/cpp/include/cuvs/neighbors/hnsw.hpp @@ -7,12 +7,8 @@ #pragma once -#include "common.hpp" - #include #include - -#include "cagra.hpp" #include #include @@ -474,7 +470,7 @@ std::unique_ptr> build( std::unique_ptr> from_cagra( raft::resources const& res, const index_params& params, - const cuvs::neighbors::cagra::index& cagra_index, + const cuvs::neighbors::cagra::device_padded_index& cagra_index, std::optional> dataset = std::nullopt); @@ -510,7 +506,7 @@ std::unique_ptr> from_cagra( std::unique_ptr> from_cagra( raft::resources const& res, const index_params& params, - const cuvs::neighbors::cagra::index& cagra_index, + const cuvs::neighbors::cagra::device_padded_index& cagra_index, std::optional> dataset = std::nullopt); @@ -546,7 +542,7 @@ std::unique_ptr> from_cagra( std::unique_ptr> from_cagra( raft::resources const& res, const index_params& params, - const cuvs::neighbors::cagra::index& cagra_index, + const cuvs::neighbors::cagra::device_padded_index& cagra_index, std::optional> dataset = std::nullopt); @@ -582,7 +578,7 @@ std::unique_ptr> from_cagra( std::unique_ptr> from_cagra( raft::resources const& res, const index_params& params, - const cuvs::neighbors::cagra::index& cagra_index, + const cuvs::neighbors::cagra::device_padded_index& cagra_index, std::optional> dataset = std::nullopt); diff --git a/cpp/include/cuvs/neighbors/ivf_flat.hpp b/cpp/include/cuvs/neighbors/ivf_flat.hpp index d2e0015498..8bca8c9d31 100644 --- a/cpp/include/cuvs/neighbors/ivf_flat.hpp +++ b/cpp/include/cuvs/neighbors/ivf_flat.hpp @@ -5,7 +5,6 @@ #pragma once -#include "common.hpp" #include #include #include diff --git a/cpp/include/cuvs/neighbors/ivf_sq.hpp b/cpp/include/cuvs/neighbors/ivf_sq.hpp index 6ac765213c..df9aa5a650 100644 --- a/cpp/include/cuvs/neighbors/ivf_sq.hpp +++ b/cpp/include/cuvs/neighbors/ivf_sq.hpp @@ -5,7 +5,6 @@ #pragma once -#include "common.hpp" #include #include #include diff --git a/cpp/include/cuvs/neighbors/tiered_index.hpp b/cpp/include/cuvs/neighbors/tiered_index.hpp index 8d0e18281c..21f30dc7a3 100644 --- a/cpp/include/cuvs/neighbors/tiered_index.hpp +++ b/cpp/include/cuvs/neighbors/tiered_index.hpp @@ -87,7 +87,7 @@ struct index_params : upstream_index_params_type { auto build(raft::resources const& res, const index_params& index_params, raft::device_matrix_view dataset) - -> tiered_index::index>; + -> tiered_index::index>; /** @copydoc build */ auto build(raft::resources const& res, @@ -121,7 +121,7 @@ auto build(raft::resources const& res, */ void extend(raft::resources const& res, raft::device_matrix_view new_vectors, - tiered_index::index>* idx); + tiered_index::index>* idx); /** @copydoc extend */ void extend(raft::resources const& res, @@ -141,7 +141,8 @@ void extend(raft::resources const& res, * @param[in] res * @param[inout] idx */ -void compact(raft::resources const& res, tiered_index::index>* idx); +void compact(raft::resources const& res, + tiered_index::index>* idx); /** @copydoc compact */ void compact(raft::resources const& res, tiered_index::index>* idx); @@ -166,7 +167,7 @@ void compact(raft::resources const& res, */ void search(raft::resources const& res, const cagra::search_params& search_params, - const tiered_index::index>& index, + const tiered_index::index>& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, @@ -205,10 +206,11 @@ void search(raft::resources const& res, * * @return A new tiered index containing the merged indices */ -auto merge(raft::resources const& res, - const index_params& index_params, - const std::vector>*>& indices) - -> tiered_index::index>; +auto merge( + raft::resources const& res, + const index_params& index_params, + const std::vector>*>& indices) + -> tiered_index::index>; /** @copydoc merge */ auto merge(raft::resources const& res, diff --git a/cpp/include/cuvs/neighbors/vamana.hpp b/cpp/include/cuvs/neighbors/vamana.hpp index 645adc5c5d..c4061256c7 100644 --- a/cpp/include/cuvs/neighbors/vamana.hpp +++ b/cpp/include/cuvs/neighbors/vamana.hpp @@ -5,7 +5,6 @@ #pragma once -#include "common.hpp" #include #include #include @@ -19,7 +18,6 @@ #include #include -#include namespace CUVS_EXPORT cuvs { namespace neighbors { @@ -116,22 +114,27 @@ struct index : cuvs::neighbors::index { /** Total length of the index (number of vectors). */ [[nodiscard]] constexpr inline auto size() const noexcept -> IdxT { - auto data_rows = dataset_->n_rows(); + auto data_rows = dataset_.has_value() ? dataset_->n_rows() : IdxT{0}; return data_rows > 0 ? data_rows : graph_view_.extent(0); } /** Dimensionality of the data. */ - [[nodiscard]] constexpr inline auto dim() const noexcept -> uint32_t { return dataset_->dim(); } + [[nodiscard]] constexpr inline auto dim() const noexcept -> uint32_t + { + return dataset_.has_value() ? dataset_->dim() : 0u; + } /** Graph degree */ [[nodiscard]] constexpr inline auto graph_degree() const noexcept -> uint32_t { return graph_view_.extent(1); } - /** Dataset [size, dim] */ - [[nodiscard]] inline auto data() const noexcept -> const cuvs::neighbors::dataset& + /** Non-owning dataset view stored by the index (full-precision vectors may live in + * `full_precision_storage_`). */ + [[nodiscard]] inline auto data() const noexcept + -> const cuvs::neighbors::device_padded_dataset_view& { - return *dataset_; + return dataset_.value(); } /** Quantized dataset [size, codes_rowlen] */ @@ -166,7 +169,8 @@ struct index : cuvs::neighbors::index { : cuvs::neighbors::index(), metric_(metric), graph_(raft::make_device_matrix(res, 0, 0)), - dataset_(new cuvs::neighbors::empty_dataset(0)), + full_precision_storage_(), + dataset_{std::nullopt}, quantized_dataset_(raft::make_device_matrix(res, 0, 0)) { } @@ -184,12 +188,30 @@ struct index : cuvs::neighbors::index { : cuvs::neighbors::index(), metric_(metric), graph_(raft::make_device_matrix(res, 0, 0)), - dataset_(make_aligned_dataset(res, dataset, 16)), + full_precision_storage_(), + dataset_{}, quantized_dataset_(raft::make_device_matrix(res, 0, 0)), medoid_id_(medoid_id) { RAFT_EXPECTS(dataset.extent(0) == vamana_graph.extent(0), "Dataset and vamana_graph must have equal number of rows"); + + const bool on_device = raft::get_device_for_address(dataset.data_handle()) >= 0; + bool use_padded_view = false; + if (on_device) { + const int64_t row_stride = + dataset.stride(0) > 0 ? static_cast(dataset.stride(0)) : dataset.extent(1); + auto d_m = raft::make_device_matrix_view( + dataset.data_handle(), dataset.extent(0), row_stride); + use_padded_view = cuvs::neighbors::matrix_row_width_matches_cagra_required(d_m); + } + + if (use_padded_view) { + dataset_ = cuvs::neighbors::make_device_padded_dataset_view(res, dataset); + } else { + full_precision_storage_ = cuvs::neighbors::make_device_padded_dataset(res, dataset); + dataset_ = full_precision_storage_->as_dataset_view(); + } update_graph(res, vamana_graph); raft::resource::sync_stream(res); @@ -264,7 +286,9 @@ struct index : cuvs::neighbors::index { cuvs::distance::DistanceType metric_; raft::device_matrix graph_; raft::device_matrix_view graph_view_; - std::unique_ptr> dataset_; + /** Owns CAGRA-padded full-precision device storage for the index dataset view. */ + std::unique_ptr> full_precision_storage_; + std::optional> dataset_; raft::device_matrix quantized_dataset_; IdxT medoid_id_; }; diff --git a/cpp/include/cuvs/preprocessing/quantize/pq.hpp b/cpp/include/cuvs/preprocessing/quantize/pq.hpp index bdbe77bac6..d14822a45b 100644 --- a/cpp/include/cuvs/preprocessing/quantize/pq.hpp +++ b/cpp/include/cuvs/preprocessing/quantize/pq.hpp @@ -11,6 +11,7 @@ #include #include +#include #include #include @@ -147,7 +148,7 @@ struct quantizer { /** Parameters used to build this quantizer. */ params params_quantizer; /** VPQ codebooks produced during training. */ - cuvs::neighbors::vpq_dataset vpq_codebooks; + cuvs::neighbors::device_vpq_dataset vpq_codebooks; }; /** @@ -243,6 +244,61 @@ void inverse_transform( raft::device_matrix_view out, std::optional> vq_labels = std::nullopt); +namespace detail { + +template +[[nodiscard]] cuvs::neighbors::device_vpq_dataset vpq_train_from_device_rows( + raft::resources const& res, + cuvs::neighbors::vpq_params const& params, + T const* src_ptr, + int64_t n_rows, + int64_t dim, + int64_t stride); + +} // namespace detail + +/** + * @brief Train VPQ storage (codebooks + encoded rows) from a device row-major mdspan/matrix. + * + * Accepts any device-accessible mdspan with `value_type`, `extent`, `stride`, and `data_handle` + * (same pattern as `cuvs::neighbors::make_device_padded_dataset`). Row-major tight storage (logical + * stride equals dimension) is passed through to training without an extra pack copy; wider row + * pitch triggers a contiguous dense copy first. Empty sources are rejected. + * + * Typical **CAGRA** usage: build the graph on dense vectors, then attach VPQ for search (metric + * must remain `L2Expanded` for this path). Train VPQ from the same CAGRA-padded device layout you + * used for graph build, keep the `device_vpq_dataset` alive, and call `index::update_dataset` with + * a non-owning view. + * + * @code{.cpp} + * #include + * #include + * + * // `idx` is a `cagra::index` with graph built on dense rows. + * // `padded` is a `device_padded_dataset_view` view of those same rows. + * cuvs::neighbors::vpq_params vpq_params{}; + * auto vpq = cuvs::preprocessing::quantize::pq::make_vpq_dataset(res, vpq_params, padded.view()); + * idx.update_dataset(res, vpq.as_dataset_view()); + * @endcode + */ +template +[[nodiscard]] auto make_vpq_dataset(raft::resources const& res, + cuvs::neighbors::vpq_params const& params, + SrcT const& src) + -> cuvs::neighbors::device_vpq_dataset +{ + using T = typename SrcT::value_type; + RAFT_EXPECTS(src.extent(0) > 0, "make_vpq_dataset: dataset is empty"); + cudaPointerAttributes ptr_attrs; + RAFT_CUDA_TRY(cudaPointerGetAttributes(&ptr_attrs, src.data_handle())); + auto const* device_ptr = reinterpret_cast(ptr_attrs.devicePointer); + RAFT_EXPECTS(device_ptr != nullptr, "make_vpq_dataset: source must be device-accessible."); + const int64_t n_rows = src.extent(0); + const int64_t dim = src.extent(1); + const int64_t stride = src.stride(0) > 0 ? src.stride(0) : dim; + return detail::vpq_train_from_device_rows(res, params, device_ptr, n_rows, dim, stride); +} + /** @} */ // end of group product } // namespace pq diff --git a/cpp/src/neighbors/cagra.cuh b/cpp/src/neighbors/cagra.cuh index ee87c2c0ab..9f040d4fbf 100644 --- a/cpp/src/neighbors/cagra.cuh +++ b/cpp/src/neighbors/cagra.cuh @@ -23,24 +23,47 @@ #include #include +#include #include #include +#include +#include +#include namespace cuvs::neighbors::cagra { // Member function implementations for cagra::index -template -void index::compute_dataset_norms_(raft::resources const& res) +template +void index::compute_dataset_norms_(raft::resources const& res) { - // Get the dataset view - auto dataset_view = this->dataset(); + // raft::linalg::reduce wants row-major with leading dim = row pitch in elements. Prefer padded + // storage's native row-major view; for strided non-owning rows use the mdspan stride, not only + // index::dataset()'s synthetic mdspan when avoidable. Skip norm precomputation for VPQ + // (compressed codes); CosineExpanded with VPQ is handled (or rejected) on the search path. + namespace nb = cuvs::neighbors; + bool skip_norms = false; + std::optional> rm_dataset; + + if constexpr (nb::is_padded_dataset_view_v) { + rm_dataset = dataset_.view(); + } else if constexpr (nb::is_vpq_dataset_view_v) { + skip_norms = true; + } + + if (skip_norms) { return; } + + if (!rm_dataset.has_value()) { + auto strided = this->dataset(); + rm_dataset = raft::make_device_matrix_view( + strided.data_handle(), strided.extent(0), strided.stride(0)); + } // Allocate norms vector if not already allocated - if (!dataset_norms_.has_value() || dataset_norms_->extent(0) != dataset_view.extent(0)) { + if (!dataset_norms_.has_value() || dataset_norms_->extent(0) != rm_dataset->extent(0)) { dataset_norms_.reset(); - dataset_norms_ = raft::make_device_vector(res, dataset_view.extent(0)); + dataset_norms_ = raft::make_device_vector(res, rm_dataset->extent(0)); } constexpr float kScale = cuvs::spatial::knn::detail::utils::config::kDivisor / @@ -49,16 +72,14 @@ void index::compute_dataset_norms_(raft::resources const& res) // first scale the dataset and then compute norms auto scaled_sq_op = raft::compose_op( raft::sq_op{}, raft::div_const_op{float(kScale)}, raft::cast_op()); - raft::linalg::reduce( - res, - raft::make_device_matrix_view( - dataset_view.data_handle(), dataset_view.extent(0), dataset_view.stride(0)), - dataset_norms_->view(), - (float)0, - false, - scaled_sq_op, - raft::add_op(), - raft::sqrt_op{}); + raft::linalg::reduce(res, + *rm_dataset, + dataset_norms_->view(), + (float)0, + false, + scaled_sq_op, + raft::add_op(), + raft::sqrt_op{}); } /** @@ -92,8 +113,8 @@ void index::compute_dataset_norms_(raft::resources const& res) * auto optimized_gaph = raft::make_host_matrix(dataset.extent(0), 64); * cagra::optimize(res, dataset, knn_graph.view(), optimized_graph.view()); * // Construct an index from dataset and optimized knn_graph - * auto index = cagra::index(res, build_params.metric(), dataset, - * optimized_graph.view()); + * auto index = cagra::index>( + * res, build_params.metric(), dataset, optimized_graph.view()); * @endcode * * @tparam DataT data element type @@ -150,8 +171,8 @@ void build_knn_graph( * auto optimized_gaph = raft::make_host_matrix(dataset.extent(0), 64); * cagra::optimize(res, dataset, nn_descent_index.graph.view(), optimized_graph.view()); * // Construct an index from dataset and optimized knn_graph - * auto index = cagra::index(res, build_params.metric(), dataset, - * optimized_graph.view()); + * auto index = cagra::device_padded_index(res, build_params.metric(), dataset, + * optimized_graph.view()); * @endcode * * @tparam DataT data element type @@ -196,8 +217,8 @@ void build_knn_graph( * // optimize graph * cagra::optimize(res, dataset, knn_graph.view(), optimized_graph.view()); * // Construct an index from dataset and optimized knn_graph - * auto index = cagra::index(res, build_params.metric(), dataset, - * optimized_graph.view()); + * auto index = cagra::index>( + * res, build_params.metric(), dataset, optimized_graph.view()); * @endcode * * @tparam DataT type of the data in the source dataset @@ -264,25 +285,42 @@ void optimize( detail::optimize(res, knn_graph, new_graph, guarantee_connectivity); } -template , raft::memory_type::host>> -index build( - raft::resources const& res, - const index_params& params, - raft::mdspan, raft::row_major, Accessor> dataset) +/** + * @brief Build the index from a `dataset_view` (device padded, device VPQ, or host padded). + * + * When `index_params.attach_dataset_on_build = true` (the default) **and the input is a device + * view**, the `dataset` view is stored in the returned index as a non-owning view — no copy is + * made. The caller must keep the underlying storage alive for the lifetime of the index. + * + * For host views, `attach_dataset_on_build` is ignored — the host_padded_index cannot be + * searched; call `attach_device_dataset_on_host_index` to get a search-ready device index. + */ +template + requires(!cuvs::neighbors::is_empty_dataset_view_v && + (cuvs::neighbors::is_device_dataset_view_v || + cuvs::neighbors::is_host_dataset_view_v)) +auto build(raft::resources const& res, const index_params& params, DatasetViewT const& dataset) + -> cuvs::neighbors::cagra::cagra_index_t { - // Check if ACE dispatch is requested via graph_build_params - if (std::holds_alternative(params.graph_build_params)) { - // ACE expects the dataset to be on host due to the large dataset size - RAFT_EXPECTS(raft::get_device_for_address(dataset.data_handle()) == -1, - "ACE: Dataset must be on host for ACE build"); - auto dataset_view = raft::make_host_matrix_view( - dataset.data_handle(), dataset.extent(0), dataset.extent(1)); - return cuvs::neighbors::cagra::detail::build_ace(res, params, dataset_view); + using T = cuvs::neighbors::cagra_view_element_type_t; + using IdxT = uint32_t; + + // Device path: build graph, optionally attach dataset view. + // attach_dataset_on_build is only meaningful for device builds — a host_padded_index cannot + // be searched regardless; the caller must call attach_device_dataset_on_host_index. + if constexpr (cuvs::neighbors::is_device_dataset_view_v) { + auto idx = cuvs::neighbors::cagra::detail::build_from_device_matrix( + res, params, dataset); + if (params.attach_dataset_on_build) { idx.update_dataset(res, dataset); } + return idx; + } else { + if (std::holds_alternative(params.graph_build_params)) { + return cuvs::neighbors::cagra::detail::build_ace( + res, params, dataset.view()); + } + return cuvs::neighbors::cagra::detail::build_from_host_matrix( + res, params, dataset.view()); } - return cuvs::neighbors::cagra::detail::build(res, params, dataset); } /** @@ -324,10 +362,14 @@ index build( * k] * @param[in] sample_filter a device filter function that greenlights samples for a given query */ -template +template void search_with_filtering(raft::resources const& res, const search_params& params, - const index& idx, + const index& idx, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, @@ -342,14 +384,17 @@ void search_with_filtering(raft::resources const& res, RAFT_EXPECTS(queries.extent(1) == idx.dim(), "Number of query dimensions should equal number of dimensions in the index."); - return cagra::detail::search_main( + return cagra::detail::search_main( res, params, idx, queries, neighbors, distances, sample_filter); } -template +template void search(raft::resources const& res, const search_params& params, - const index& idx, + const index& idx, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, @@ -361,7 +406,7 @@ void search(raft::resources const& res, search_params params_copy = params; if (params.filtering_rate < 0.0) { params_copy.filtering_rate = 0.0; } auto sample_filter_copy = sample_filter; - return search_with_filtering( + return search_with_filtering( res, params_copy, idx, queries, neighbors, distances, sample_filter_copy); } catch (const std::bad_cast&) { } @@ -380,7 +425,7 @@ void search(raft::resources const& res, std::min(std::max(filtering_rate, min_filtering_rate), max_filtering_rate); } auto sample_filter_copy = sample_filter; - return search_with_filtering( + return search_with_filtering( res, params_copy, idx, queries, neighbors, distances, sample_filter_copy); } catch (const std::bad_cast&) { } @@ -399,44 +444,60 @@ void search(raft::resources const& res, max_filtering_rate); } auto sample_filter_copy = sample_filter; - return search_with_filtering( + return search_with_filtering( res, params_copy, idx, queries, neighbors, distances, sample_filter_copy); } catch (const std::bad_cast&) { RAFT_FAIL("Unsupported sample filter type"); } } -template -void extend( - raft::resources const& handle, - raft::mdspan, raft::row_major, Accessor> additional_dataset, - cuvs::neighbors::cagra::index& index, - const cagra::extend_params& params, - std::optional> ndv, - std::optional> ngv) +template +void extend(raft::resources const& handle, + const cagra::extend_params& params, + raft::device_matrix_view additional_dataset, + cuvs::neighbors::cagra::index& index, + std::optional> ndv, + std::optional> ngv) { - cagra::extend_core(handle, additional_dataset, index, params, ndv, ngv); + extend_core(handle, additional_dataset, index, params, ndv, ngv); } -template -index merge(raft::resources const& handle, - const cagra::index_params& params, - std::vector*>& indices, - const cuvs::neighbors::filtering::base_filter& row_filter) +template +void extend(raft::resources const& handle, + const cagra::extend_params& params, + raft::host_matrix_view additional_dataset, + cuvs::neighbors::cagra::index& index, + std::optional> ndv, + std::optional> ngv) { - return cagra::detail::merge(handle, params, indices, row_filter); + extend_core(handle, additional_dataset, index, params, ndv, ngv); +} + +template +cuvs::neighbors::cagra::index merge( + raft::resources const& handle, + const cagra::index_params& params, + std::vector*>& indices, + merged_dataset_storage& storage, + const cuvs::neighbors::filtering::base_filter& row_filter) +{ + return cagra::detail::merge(handle, params, indices, storage, row_filter); } /** @} */ // end group cagra } // namespace cuvs::neighbors::cagra -#define CUVS_INST_CAGRA_MERGE(T, IdxT) \ - auto merge(raft::resources const& handle, \ - const cuvs::neighbors::cagra::index_params& params, \ - std::vector*>& indices, \ - const cuvs::neighbors::filtering::base_filter& row_filter) \ - -> cuvs::neighbors::cagra::index \ - { \ - return cuvs::neighbors::cagra::merge(handle, params, indices, row_filter); \ - } +#define CUVS_INST_CAGRA_MERGE(T, IdxT, DatasetViewT) \ + template CUVS_EXPORT cuvs::neighbors::cagra::merged_dataset_storage \ + cuvs::neighbors::cagra::make_merged_dataset( \ + raft::resources const& handle, \ + std::vector*> const& indices, \ + cuvs::neighbors::filtering::base_filter const& row_filter); \ + template CUVS_EXPORT cuvs::neighbors::cagra::index \ + cuvs::neighbors::cagra::merge( \ + raft::resources const& handle, \ + const cuvs::neighbors::cagra::index_params& params, \ + std::vector*>& indices, \ + cuvs::neighbors::cagra::merged_dataset_storage& storage, \ + cuvs::neighbors::filtering::base_filter const& row_filter); diff --git a/cpp/src/neighbors/cagra_build_inst.cu.in b/cpp/src/neighbors/cagra_build_inst.cu.in index 00e0fab327..a45a2c3ee1 100644 --- a/cpp/src/neighbors/cagra_build_inst.cu.in +++ b/cpp/src/neighbors/cagra_build_inst.cu.in @@ -8,10 +8,16 @@ #include #include +#include + namespace { -using data_t = @data_type@; -using index_t = @index_type@; +using data_t = @data_type@; +using index_t = @index_type@; +using inst_device_padded_view_t = cuvs::neighbors::device_padded_dataset_view; +using inst_host_padded_view_t = cuvs::neighbors::host_padded_dataset_view; +using inst_vpq_f16_view_t = cuvs::neighbors::device_vpq_dataset_view; +using inst_vpq_f32_view_t = cuvs::neighbors::device_vpq_dataset_view; } // namespace @@ -25,22 +31,17 @@ void build_knn_graph(raft::resources const& handle, cuvs::neighbors::cagra::build_knn_graph(handle, dataset, knn_graph, params); } -auto build(raft::resources const& handle, - const cuvs::neighbors::cagra::index_params& params, - raft::device_matrix_view dataset) - -> cuvs::neighbors::cagra::index -{ - return cuvs::neighbors::cagra::build(handle, params, dataset); -} +#define CUVS_INST_CAGRA_BUILD(DatasetViewT) \ + template CUVS_EXPORT auto cuvs::neighbors::cagra::build( \ + raft::resources const& res, \ + const cuvs::neighbors::cagra::index_params& params, \ + DatasetViewT const& dataset) -> cuvs::neighbors::cagra::cagra_index_t -auto build(raft::resources const& handle, - const cuvs::neighbors::cagra::index_params& params, - raft::host_matrix_view dataset) - -> cuvs::neighbors::cagra::index -{ - return cuvs::neighbors::cagra::build(handle, params, dataset); -} +CUVS_INST_CAGRA_BUILD(inst_device_padded_view_t); +CUVS_INST_CAGRA_BUILD(inst_host_padded_view_t); +CUVS_INST_CAGRA_BUILD(inst_vpq_f16_view_t); +CUVS_INST_CAGRA_BUILD(inst_vpq_f32_view_t); -template struct index; +#undef CUVS_INST_CAGRA_BUILD } // namespace cuvs::neighbors::cagra diff --git a/cpp/src/neighbors/cagra_extend_inst.cu.in b/cpp/src/neighbors/cagra_extend_inst.cu.in index d544789713..38cf2356a7 100644 --- a/cpp/src/neighbors/cagra_extend_inst.cu.in +++ b/cpp/src/neighbors/cagra_extend_inst.cu.in @@ -3,40 +3,47 @@ * SPDX-License-Identifier: Apache-2.0 */ -#include - #include #include namespace { -using data_t = @data_type@; -using index_t = @index_type@; +using data_t = @data_type@; +using index_t = @index_type@; +using inst_device_padded_view_t = cuvs::neighbors::device_padded_dataset_view; } // namespace namespace cuvs::neighbors::cagra { -void extend(raft::resources const& handle, - const cagra::extend_params& params, - raft::device_matrix_view additional_dataset, - cuvs::neighbors::cagra::index& idx, - std::optional> ndv, - std::optional> ngv) -{ - cuvs::neighbors::cagra::extend( - handle, additional_dataset, idx, params, ndv, ngv); -} - -void extend(raft::resources const& handle, - const cagra::extend_params& params, - raft::host_matrix_view additional_dataset, - cuvs::neighbors::cagra::index& idx, - std::optional> ndv, - std::optional> ngv) -{ - cuvs::neighbors::cagra::extend( - handle, additional_dataset, idx, params, ndv, ngv); -} +#define CUVS_INST_CAGRA_EXTEND_DEVICE(T, IdxT, DatasetViewT) \ + void extend(raft::resources const& handle, \ + const cuvs::neighbors::cagra::extend_params& params, \ + raft::device_matrix_view additional_dataset, \ + cuvs::neighbors::cagra::index& index, \ + std::optional> ndv, \ + std::optional> ngv) \ + { \ + cuvs::neighbors::cagra::extend( \ + handle, params, additional_dataset, index, ndv, ngv); \ + } + +#define CUVS_INST_CAGRA_EXTEND_HOST(T, IdxT, DatasetViewT) \ + void extend(raft::resources const& handle, \ + const cuvs::neighbors::cagra::extend_params& params, \ + raft::host_matrix_view additional_dataset, \ + cuvs::neighbors::cagra::index& index, \ + std::optional> ndv, \ + std::optional> ngv) \ + { \ + cuvs::neighbors::cagra::extend( \ + handle, params, additional_dataset, index, ndv, ngv); \ + } + +CUVS_INST_CAGRA_EXTEND_DEVICE(data_t, index_t, inst_device_padded_view_t); +CUVS_INST_CAGRA_EXTEND_HOST(data_t, index_t, inst_device_padded_view_t); + +#undef CUVS_INST_CAGRA_EXTEND_HOST +#undef CUVS_INST_CAGRA_EXTEND_DEVICE } // namespace cuvs::neighbors::cagra diff --git a/cpp/src/neighbors/cagra_merge_inst.cu.in b/cpp/src/neighbors/cagra_merge_inst.cu.in index 2fafb37ae4..9ff73540c9 100644 --- a/cpp/src/neighbors/cagra_merge_inst.cu.in +++ b/cpp/src/neighbors/cagra_merge_inst.cu.in @@ -10,11 +10,14 @@ namespace { -using data_t = @data_type@; -using index_t = @index_type@; +using data_t = @data_type@; +using index_t = @index_type@; +using inst_device_padded_view_t = cuvs::neighbors::device_padded_dataset_view; } // namespace namespace cuvs::neighbors::cagra { -CUVS_INST_CAGRA_MERGE(data_t, index_t); + +CUVS_INST_CAGRA_MERGE(data_t, index_t, inst_device_padded_view_t); + } // namespace cuvs::neighbors::cagra diff --git a/cpp/src/neighbors/cagra_search_inst.cu.in b/cpp/src/neighbors/cagra_search_inst.cu.in index dfef630798..f8189b2473 100644 --- a/cpp/src/neighbors/cagra_search_inst.cu.in +++ b/cpp/src/neighbors/cagra_search_inst.cu.in @@ -8,28 +8,39 @@ namespace { -using data_t = @data_type@; +using data_t = @data_type@; +using inst_device_padded_view_t = cuvs::neighbors::device_padded_dataset_view; +using inst_vpq_f16_view_t = cuvs::neighbors::device_vpq_dataset_view; +using inst_vpq_f32_view_t = cuvs::neighbors::device_vpq_dataset_view; +using inst_empty_view_t = cuvs::neighbors::device_empty_dataset_view; -} +} // namespace namespace cuvs::neighbors::cagra { -#define CUVS_INST_CAGRA_SEARCH(T, IdxT, OutputIdxT) \ +#define CUVS_INST_CAGRA_SEARCH(T, IdxT, DatasetViewT, OutputIdxT) \ void search(raft::resources const& handle, \ cuvs::neighbors::cagra::search_params const& params, \ - const cuvs::neighbors::cagra::index& index, \ + const cuvs::neighbors::cagra::index& index, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbors, \ raft::device_matrix_view distances, \ const cuvs::neighbors::filtering::base_filter& sample_filter) \ { \ - cuvs::neighbors::cagra::search( \ + cuvs::neighbors::cagra::search( \ handle, params, index, queries, neighbors, distances, sample_filter); \ } -CUVS_INST_CAGRA_SEARCH(data_t, uint32_t, uint32_t); -CUVS_INST_CAGRA_SEARCH(data_t, uint32_t, int64_t); +#define CUVS_INST_CAGRA_SEARCH_ALL_VIEWS(T, OutputIdxT) \ + CUVS_INST_CAGRA_SEARCH(T, uint32_t, inst_device_padded_view_t, OutputIdxT); \ + CUVS_INST_CAGRA_SEARCH(T, uint32_t, inst_vpq_f16_view_t, OutputIdxT); \ + CUVS_INST_CAGRA_SEARCH(T, uint32_t, inst_vpq_f32_view_t, OutputIdxT); \ + CUVS_INST_CAGRA_SEARCH(T, uint32_t, inst_empty_view_t, OutputIdxT) +CUVS_INST_CAGRA_SEARCH_ALL_VIEWS(data_t, uint32_t); +CUVS_INST_CAGRA_SEARCH_ALL_VIEWS(data_t, int64_t); + +#undef CUVS_INST_CAGRA_SEARCH_ALL_VIEWS #undef CUVS_INST_CAGRA_SEARCH } // namespace cuvs::neighbors::cagra diff --git a/cpp/src/neighbors/cagra_serialize.cuh b/cpp/src/neighbors/cagra_serialize.cuh index b18577255a..64df61bd52 100644 --- a/cpp/src/neighbors/cagra_serialize.cuh +++ b/cpp/src/neighbors/cagra_serialize.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -9,56 +9,61 @@ namespace cuvs::neighbors::cagra { -#define CUVS_INST_CAGRA_SERIALIZE(DTYPE) \ - void serialize(raft::resources const& handle, \ - const std::string& filename, \ - const cuvs::neighbors::cagra::index& index, \ - bool include_dataset) \ - { \ - cuvs::neighbors::cagra::detail::serialize( \ - handle, filename, index, include_dataset); \ - }; \ - \ - void deserialize(raft::resources const& handle, \ - const std::string& filename, \ - cuvs::neighbors::cagra::index* index) \ - { \ - cuvs::neighbors::cagra::detail::deserialize(handle, filename, index); \ - }; \ - void serialize(raft::resources const& handle, \ - std::ostream& os, \ - const cuvs::neighbors::cagra::index& index, \ - bool include_dataset) \ - { \ - cuvs::neighbors::cagra::detail::serialize( \ - handle, os, index, include_dataset); \ - } \ - \ - void deserialize(raft::resources const& handle, \ - std::istream& is, \ - cuvs::neighbors::cagra::index* index) \ - { \ - cuvs::neighbors::cagra::detail::deserialize(handle, is, index); \ - } \ - \ - void serialize_to_hnswlib( \ - raft::resources const& handle, \ - std::ostream& os, \ - const cuvs::neighbors::cagra::index& index, \ - std::optional> dataset) \ - { \ - cuvs::neighbors::cagra::detail::serialize_to_hnswlib( \ - handle, os, index, dataset); \ - } \ - \ - void serialize_to_hnswlib( \ - raft::resources const& handle, \ - const std::string& filename, \ - const cuvs::neighbors::cagra::index& index, \ - std::optional> dataset) \ - { \ - cuvs::neighbors::cagra::detail::serialize_to_hnswlib( \ - handle, filename, index, dataset); \ +#define CUVS_INST_CAGRA_SERIALIZE(DTYPE) \ + void serialize(raft::resources const& handle, \ + const std::string& filename, \ + const cuvs::neighbors::cagra::device_padded_index& index, \ + bool include_dataset) \ + { \ + cuvs::neighbors::cagra::detail::serialize( \ + handle, filename, index, include_dataset); \ + }; \ + \ + void deserialize( \ + raft::resources const& handle, \ + const std::string& filename, \ + cuvs::neighbors::cagra::device_padded_index* index, \ + std::unique_ptr>* out_dataset) \ + { \ + cuvs::neighbors::cagra::detail::deserialize( \ + handle, filename, index, out_dataset); \ + }; \ + void serialize(raft::resources const& handle, \ + std::ostream& os, \ + const cuvs::neighbors::cagra::device_padded_index& index, \ + bool include_dataset) \ + { \ + cuvs::neighbors::cagra::detail::serialize( \ + handle, os, index, include_dataset); \ + } \ + \ + void deserialize( \ + raft::resources const& handle, \ + std::istream& is, \ + cuvs::neighbors::cagra::device_padded_index* index, \ + std::unique_ptr>* out_dataset) \ + { \ + cuvs::neighbors::cagra::detail::deserialize(handle, is, index, out_dataset); \ + } \ + \ + void serialize_to_hnswlib( \ + raft::resources const& handle, \ + std::ostream& os, \ + const cuvs::neighbors::cagra::device_padded_index& index, \ + std::optional> dataset) \ + { \ + cuvs::neighbors::cagra::detail::serialize_to_hnswlib( \ + handle, os, index, dataset); \ + } \ + \ + void serialize_to_hnswlib( \ + raft::resources const& handle, \ + const std::string& filename, \ + const cuvs::neighbors::cagra::device_padded_index& index, \ + std::optional> dataset) \ + { \ + cuvs::neighbors::cagra::detail::serialize_to_hnswlib( \ + handle, filename, index, dataset); \ } } // namespace cuvs::neighbors::cagra diff --git a/cpp/src/neighbors/detail/cagra/add_nodes.cuh b/cpp/src/neighbors/detail/cagra/add_nodes.cuh index 5d0a6654e9..4d47b64c49 100644 --- a/cpp/src/neighbors/detail/cagra/add_nodes.cuh +++ b/cpp/src/neighbors/detail/cagra/add_nodes.cuh @@ -5,6 +5,7 @@ #include "../../../core/omp_wrapper.hpp" #include "../ann_utils.cuh" #include +#include #include #include #include @@ -20,10 +21,10 @@ namespace cuvs::neighbors::cagra { -template +template void add_node_core( raft::resources const& handle, - const cuvs::neighbors::cagra::index& idx, + const cuvs::neighbors::cagra::index& idx, raft::mdspan, raft::layout_stride, Accessor> additional_dataset_view, raft::host_matrix_view updated_graph, @@ -276,11 +277,11 @@ void add_node_core( } } -template +template void add_graph_nodes( raft::resources const& handle, raft::device_matrix_view input_updated_dataset_view, - const neighbors::cagra::index& index, + const neighbors::cagra::index& index, raft::host_matrix_view updated_graph_view, const cagra::extend_params& params) { @@ -297,15 +298,17 @@ void add_graph_nodes( const std::size_t max_chunk_size_ = params.max_chunk_size == 0 ? new_dataset_size : params.max_chunk_size; - raft::copy(handle, - raft::make_device_vector_view(updated_graph_view.data_handle(), index.graph().size()), - raft::make_device_vector_view(index.graph().data_handle(), index.graph().size())); + auto updated_graph_prefix = raft::make_host_matrix_view( + updated_graph_view.data_handle(), initial_dataset_size, degree); + raft::copy(handle, updated_graph_prefix, raft::make_const_mdspan(index.graph())); - neighbors::cagra::index internal_index( - handle, - index.metric(), - raft::make_device_matrix_view(nullptr, 0, dim), - raft::make_device_matrix_view(nullptr, 0, degree)); + using padded_view_t = cuvs::neighbors::device_padded_dataset_view; + auto zero_row = raft::make_device_matrix_view( + static_cast(nullptr), int64_t{0}, static_cast(dim)); + padded_view_t device_empty_dataset_view(zero_row, static_cast(dim)); + auto empty_graph_view = raft::make_device_matrix_view(nullptr, 0, degree); + neighbors::cagra::index internal_index( + handle, index.metric(), device_empty_dataset_view, empty_graph_view); for (std::size_t additional_dataset_offset = 0; additional_dataset_offset < num_new_nodes; additional_dataset_offset += max_chunk_size_) { @@ -320,7 +323,8 @@ void add_graph_nodes( auto graph_view = raft::make_host_matrix_view( updated_graph_view.data_handle(), initial_dataset_size + additional_dataset_offset, degree); - internal_index.update_dataset(handle, dataset_view); + auto pdv = cuvs::neighbors::make_device_padded_dataset_view(handle, dataset_view); + internal_index.update_dataset(handle, pdv); // Note: The graph is copied to the device memory. internal_index.update_graph(handle, graph_view); @@ -337,32 +341,31 @@ void add_graph_nodes( dim, stride); - neighbors::cagra::add_node_core( + neighbors::cagra::add_node_core( handle, internal_index, additional_dataset_view, updated_graph, params); raft::resource::sync_stream(handle); } } -template +template void extend_core( raft::resources const& handle, raft::mdspan, raft::row_major, Accessor> additional_dataset, - cuvs::neighbors::cagra::index& index, + cuvs::neighbors::cagra::index& index, const cagra::extend_params& params, std::optional> new_dataset_buffer_view, std::optional> new_graph_buffer_view) { + static_assert(cuvs::neighbors::is_padded_dataset_view_v, + "cagra::extend requires a padded dataset view index type"); RAFT_EXPECTS(!index.dataset_fd().has_value(), "Cannot extend a disk-backed CAGRA index. Convert it with " "cuvs::neighbors::hnsw::from_cagra() and load it into memory via " "cuvs::neighbors::hnsw::deserialize() before calling extend()."); - if (dynamic_cast*>(&index.data()) != nullptr && - !new_dataset_buffer_view.has_value()) { - RAFT_LOG_WARN( - "New memory space for extended dataset will be allocated while the memory space for the old " - "dataset is allocated by user."); - } + RAFT_EXPECTS(new_dataset_buffer_view.has_value(), + "cagra::extend requires new_dataset_buffer_view. " + "Provide a buffer view for the extended dataset (initial + additional vectors)."); const std::size_t num_new_nodes = additional_dataset.extent(0); const std::size_t initial_dataset_size = index.size(); const std::size_t new_dataset_size = initial_dataset_size + num_new_nodes; @@ -391,26 +394,23 @@ void extend_core( num_new_nodes); } - using ds_idx_type = decltype(index.data().n_rows()); - if (auto* strided_dset = dynamic_cast*>(&index.data()); - strided_dset != nullptr) { + auto try_extend = [&](auto const& leaf) { // Allocate memory space for updated graph on host auto updated_graph = raft::make_host_matrix(new_dataset_size, degree); - const auto stride = strided_dset->stride(); - auto updated_dataset = raft::make_device_matrix(handle, 0, stride); - auto updated_dataset_view = - raft::make_device_strided_matrix_view(nullptr, 0, dim, stride); + const std::size_t stride = static_cast(leaf.stride()); + const T* src_rows = leaf.view().data_handle(); + auto updated_dataset_view = new_dataset_buffer_view.value(); - // Update dataset + // Update dataset on host, then copy to device buffer provided by caller auto host_updated_dataset = raft::make_host_matrix(new_dataset_size, stride); - // The padding area must be filled with zeros.!!!!!!!!!!!!!!!!!!! + // The padding area must be filled with zeros. memset(host_updated_dataset.data_handle(), 0, sizeof(T) * host_updated_dataset.size()); raft::copy_matrix(host_updated_dataset.data_handle(), stride, - strided_dset->view().data_handle(), + src_rows, stride, dim, initial_dataset_size, @@ -423,43 +423,23 @@ void extend_core( num_new_nodes, raft::resource::get_cuda_stream(handle)); - if (new_dataset_buffer_view.has_value()) { - updated_dataset_view = new_dataset_buffer_view.value(); - } else { - // Deallocate the current dataset memory space if the dataset is `owning'. - index.update_dataset( - handle, raft::make_device_strided_matrix_view(nullptr, 0, dim, stride)); - - // Allocate the new dataset - updated_dataset = raft::make_device_matrix(handle, new_dataset_size, stride); - updated_dataset_view = raft::make_device_strided_matrix_view( - updated_dataset.data_handle(), new_dataset_size, dim, stride); - } - - // Copy updated dataset on host memory to device memory - raft::copy( - handle, - raft::make_device_vector_view(updated_dataset_view.data_handle(), new_dataset_size * stride), - raft::make_host_vector_view(host_updated_dataset.data_handle(), new_dataset_size * stride)); + // Copy updated dataset on host memory to device memory (caller's buffer) + raft::copy(updated_dataset_view.data_handle(), + host_updated_dataset.data_handle(), + new_dataset_size * stride, + raft::resource::get_cuda_stream(handle)); // Add graph nodes cuvs::neighbors::cagra::add_graph_nodes( handle, raft::make_const_mdspan(updated_dataset_view), index, updated_graph.view(), params); - // Update index dataset - if (new_dataset_buffer_view.has_value()) { - index.update_dataset(handle, raft::make_const_mdspan(updated_dataset_view)); - } else { - using out_mdarray_type = decltype(updated_dataset); - using out_layout_type = typename out_mdarray_type::layout_type; - using out_container_policy_type = typename out_mdarray_type::container_policy_type; - using out_owning_type = - owning_dataset; - auto out_layout = raft::make_strided_layout(updated_dataset_view.extents(), - cuda::std::array{stride, 1}); - - index.update_dataset(handle, out_owning_type{std::move(updated_dataset), out_layout}); - } + // Attach view over caller's buffer; index does not take ownership + cuvs::neighbors::device_padded_dataset_view dv( + raft::make_device_matrix_view(updated_dataset_view.data_handle(), + updated_dataset_view.extent(0), + updated_dataset_view.stride(0)), + dim); + index.update_dataset(handle, dv); // Update index graph if (new_graph_buffer_view.has_value()) { @@ -472,12 +452,15 @@ void extend_core( } else { index.update_graph(handle, raft::make_const_mdspan(updated_graph.view())); } - } else if (dynamic_cast*>(&index.data()) != - nullptr) { + }; + + auto const& leaf = index.data(); + if constexpr (cuvs::neighbors::is_padded_dataset_view_v>) { + try_extend(leaf); + } else if constexpr (cuvs::neighbors::is_empty_dataset_view_v>) { RAFT_FAIL( "cagra::extend only supports an index to which the dataset is attached. Please check if the " - "index was built with index_param.attach_dataset_on_build = true, or if a dataset was " - "attached after the build."); + "index has an empty dataset; attach one with update_dataset before extend."); } else { RAFT_FAIL("cagra::extend only supports an uncompressed dataset index"); } diff --git a/cpp/src/neighbors/detail/cagra/cagra_build.cuh b/cpp/src/neighbors/detail/cagra/cagra_build.cuh index a7c15b4161..575e15feb1 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_build.cuh @@ -5,8 +5,8 @@ #pragma once #include "../../../core/nvtx.hpp" -#include "../../../preprocessing/quantize/vpq_build-ext.cuh" #include "graph_core.cuh" +#include #include #include @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -23,6 +24,8 @@ #include #include #include +#include +#include #include #include #include @@ -38,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -1110,6 +1114,13 @@ void ace_validate_disk_mode_partitions(raft::resources const& res, } } +template + requires cuvs::neighbors::is_device_dataset_view_v +auto build_from_device_matrix(raft::resources const& res, + const index_params& params, + DatasetViewT const& device_dataset) + -> cuvs::neighbors::cagra::index; + // Build CAGRA index using ACE (Augmented Core Extraction) partitioning // ACE enables building indexes for datasets too large to fit in GPU memory by: // 1. Partitioning the dataset using balanced k-means in core (non-overlapping) and augmented @@ -1119,10 +1130,12 @@ void ace_validate_disk_mode_partitions(raft::resources const& res, // Supports both in-memory and disk-based modes depending on available host memory. // In disk mode, the graph is stored in build_dir and dataset is reordered on disk. // The returned index is not usable for search. Use the created files for search instead. -template -index build_ace(raft::resources const& res, - const index_params& params, - raft::host_matrix_view dataset) +template + requires cuvs::neighbors::is_host_dataset_view_v +auto build_ace(raft::resources const& res, + const index_params& params, + raft::host_matrix_view dataset) + -> cuvs::neighbors::cagra::index { // Extract ACE parameters from graph_build_params RAFT_EXPECTS( @@ -1136,7 +1149,7 @@ index build_ace(raft::resources const& res, bool use_disk = ace_params.use_disk; common::nvtx::range function_scope( - "cagra::build_ace(%zu, %zu, %zu)", + "cagra::detail::build_ace(%zu, %zu, %zu)", params.intermediate_graph_degree, params.graph_degree, npartitions); @@ -1389,11 +1402,14 @@ index build_ace(raft::resources const& res, ef_construction, cuvs::neighbors::cagra::hnsw_heuristic_type::SAME_GRAPH_FOOTPRINT, params.metric); - sub_index_params.attach_dataset_on_build = false; - sub_index_params.guarantee_connectivity = params.guarantee_connectivity; + sub_index_params.guarantee_connectivity = params.guarantee_connectivity; - auto sub_index = cuvs::neighbors::cagra::build( - res, sub_index_params, raft::make_const_mdspan(sub_dataset.view())); + // Copy host partition to device with padding; build_from_device_matrix accepts + // device_padded_dataset_view. + auto sub_dataset_dev = cuvs::neighbors::make_device_padded_dataset( + res, raft::make_const_mdspan(sub_dataset.view())); + auto sub_index = ::cuvs::neighbors::cagra::detail::build_from_device_matrix( + res, sub_index_params, sub_dataset_dev->as_dataset_view()); auto optimize_end = std::chrono::high_resolution_clock::now(); auto optimize_elapsed = @@ -1493,25 +1509,9 @@ index build_ace(raft::resources const& res, } auto index_creation_start = std::chrono::high_resolution_clock::now(); - index idx(res, params.metric); - // Only add graph and dataset if not using disk storage. The returned index is empty if using - // disk storage. Use the files written to disk for search. + cuvs::neighbors::cagra::index idx(res, params.metric); if (!use_disk_mode) { idx.update_graph(res, raft::make_const_mdspan(search_graph.view())); - - if (params.attach_dataset_on_build) { - try { - idx.update_dataset(res, dataset); - } catch (std::bad_alloc& e) { - RAFT_LOG_WARN( - "Insufficient GPU memory to attach dataset to ACE index. Only the graph will be " - "stored."); - } catch (raft::logic_error& e) { - RAFT_LOG_WARN( - "Insufficient GPU memory to attach dataset to ACE index. Only the graph will be " - "stored."); - } - } } else { idx.update_dataset(res, std::move(reordered_fd)); idx.update_graph(res, std::move(graph_fd)); @@ -1537,7 +1537,7 @@ index build_ace(raft::resources const& res, std::chrono::duration_cast(total_end - total_start).count(); RAFT_LOG_INFO("ACE: Partitioned CAGRA build completed in %ld ms total", total_elapsed); - return idx; + return std::move(idx); } catch (const std::exception& e) { // Clean up build directory on failure if we created it RAFT_LOG_ERROR("ACE: Build failed with exception: %s", e.what()); @@ -1997,14 +1997,11 @@ struct mmap_owner { size_t size_; }; -template , raft::memory_type::host>> -auto iterative_build_graph( - raft::resources const& res, - const index_params& params, - raft::mdspan, raft::row_major, Accessor> dataset) +template +auto iterative_build_graph(raft::resources const& res, + const index_params& params, + cuvs::neighbors::device_padded_dataset_view const& dataset) + -> raft::host_matrix { size_t intermediate_degree = params.intermediate_graph_degree; size_t graph_degree = params.graph_degree; @@ -2012,32 +2009,18 @@ auto iterative_build_graph( auto cagra_graph = raft::make_host_matrix(0, 0); // Iteratively improve the accuracy of the graph by repeatedly running - // CAGRA's search() and optimize(). As for the size of the graph, instead - // of targeting all nodes from the beginning, the number of nodes is - // initially small, and the number of nodes is doubled with each iteration. + // CAGRA's search() and optimize(). Dataset is already on device with correct + // stride (caller uses make_device_padded_dataset_view or + // make_device_padded_dataset()->as_dataset_view()). As for the size of the graph, instead of + // targeting all nodes from the beginning, the number of nodes is initially small, and the number + // of nodes is doubled with each iteration. RAFT_LOG_INFO("Iteratively creating/improving graph index using CAGRA's search() and optimize()"); - // If dataset is a host matrix, change it to a device matrix. Also, if the - // dimensionality of the dataset does not meet the alighnemt restriction, - // add extra dimensions and change it to a strided matrix. - std::unique_ptr> dev_aligned_dataset; - try { - dev_aligned_dataset = make_aligned_dataset(res, dataset); - } catch (raft::logic_error& e) { - RAFT_LOG_ERROR("Iterative CAGRA graph build requires the dataset to fit GPU memory"); - throw e; - } - auto dev_aligned_dataset_view = dev_aligned_dataset.get()->view(); - - // If the matrix stride and extent do no match, the extra dimensions are - // also as extent since it cannot be used as query matrix. - auto dev_dataset = - raft::make_device_matrix_view(dev_aligned_dataset_view.data_handle(), - dev_aligned_dataset_view.extent(0), - dev_aligned_dataset_view.stride(0)); + auto dev_dataset = dataset.view(); + uint32_t logical_dim = dataset.dim(); // Determine initial graph size. - uint64_t final_graph_size = (uint64_t)dataset.extent(0); + uint64_t final_graph_size = (uint64_t)dataset.n_rows(); uint64_t initial_graph_size = (final_graph_size + 1) / 2; while (initial_graph_size > graph_degree * 64) { initial_graph_size = (initial_graph_size + 1) / 2; @@ -2052,6 +2035,12 @@ auto iterative_build_graph( auto dev_neighbors = raft::make_device_matrix(res, max_chunk_size, topk); auto dev_distances = raft::make_device_matrix(res, max_chunk_size, topk); + std::optional> query_contiguous; + if (static_cast(logical_dim) != dev_dataset.extent(1)) { + query_contiguous.emplace( + raft::make_device_matrix(res, max_chunk_size, logical_dim)); + } + // Determine graph degree and number of search results while increasing // graph size. auto small_graph_degree = std::max(graph_degree / 2, std::min(graph_degree, (uint64_t)24)); @@ -2124,9 +2113,11 @@ auto iterative_build_graph( // search results (neighbors). auto dev_dataset_view = raft::make_device_matrix_view( dev_dataset.data_handle(), (int64_t)curr_graph_size, dev_dataset.extent(1)); + cuvs::neighbors::device_padded_dataset_view sub_padded(dev_dataset_view, + logical_dim); - auto idx = index( - res, params.metric, dev_dataset_view, raft::make_const_mdspan(cagra_graph.view())); + auto idx = cuvs::neighbors::cagra::device_padded_index( + res, params.metric, sub_padded, raft::make_const_mdspan(cagra_graph.view())); auto dev_query_view = raft::make_device_matrix_view( dev_dataset.data_handle(), (int64_t)curr_query_size, dev_dataset.extent(1)); @@ -2145,8 +2136,21 @@ auto iterative_build_graph( raft::resource::get_cuda_stream(res), raft::resource::get_workspace_resource_ref(res)); for (const auto& batch : query_batch) { - auto batch_dev_query_view = raft::make_device_matrix_view( - batch.data(), batch.size(), dev_query_view.extent(1)); + raft::device_matrix_view batch_dev_query_view; + if (query_contiguous) { + raft::copy_matrix(query_contiguous->data_handle(), + static_cast(logical_dim), + batch.data(), + dev_query_view.extent(1), + static_cast(logical_dim), + batch.size(), + raft::resource::get_cuda_stream(res)); + batch_dev_query_view = raft::make_device_matrix_view( + query_contiguous->data_handle(), batch.size(), static_cast(logical_dim)); + } else { + batch_dev_query_view = raft::make_device_matrix_view( + batch.data(), batch.size(), dev_query_view.extent(1)); + } auto batch_dev_neighbors_view = raft::make_device_matrix_view( dev_neighbors.data_handle(), batch.size(), curr_topk); auto batch_dev_distances_view = raft::make_device_matrix_view( @@ -2183,40 +2187,31 @@ auto iterative_build_graph( return cagra_graph; } -template , raft::memory_type::host>> -index build( +template +[[nodiscard]] inline auto resolve_cagra_default_knn_graph_build_params( raft::resources const& res, - const index_params& params, - raft::mdspan, raft::row_major, Accessor> dataset) + index_params const& params, + raft::matrix_extent dataset_extents, + size_t intermediate_degree) { - size_t intermediate_degree = params.intermediate_graph_degree; - size_t graph_degree = params.graph_degree; - common::nvtx::range function_scope( - "cagra::build<%s>(%zu, %zu)", - Accessor::is_managed_type::value ? "managed" - : Accessor::is_host_type::value ? "host" - : "device", - intermediate_degree, - graph_degree); - check_graph_degree(intermediate_degree, graph_degree, dataset.extent(0)); - - // Set default value in case knn_build_params is not defined. auto knn_build_params = params.graph_build_params; if (std::holds_alternative(params.graph_build_params)) { - // Heuristic to decide default build algo and its params. - if (cuvs::neighbors::nn_descent::has_enough_device_memory( - res, dataset.extents(), sizeof(IdxT))) { + if (cuvs::neighbors::nn_descent::has_enough_device_memory(res, dataset_extents, sizeof(IdxT))) { RAFT_LOG_DEBUG("NN descent solver"); knn_build_params = cagra::graph_build_params::nn_descent_params(intermediate_degree, params.metric); } else { RAFT_LOG_DEBUG("Selecting IVF-PQ solver"); - knn_build_params = cagra::graph_build_params::ivf_pq_params(dataset.extents(), params.metric); + knn_build_params = cagra::graph_build_params::ivf_pq_params(dataset_extents, params.metric); } } + return knn_build_params; +} + +template +inline void validate_cagra_knn_graph_build_constraints(index_params const& params, + KnnParamsVariant const& knn_build_params) +{ RAFT_EXPECTS( params.metric != cuvs::distance::DistanceType::BitwiseHamming || std::holds_alternative( @@ -2230,104 +2225,192 @@ index build( std::holds_alternative(knn_build_params), "CosineExpanded distance is not supported for iterative CAGRA graph build."); - // Validate data type for BitwiseHamming metric RAFT_EXPECTS(params.metric != cuvs::distance::DistanceType::BitwiseHamming || (std::is_same_v || std::is_same_v), "BitwiseHamming distance is only supported for int8_t and uint8_t data types. " "Current data type is not supported."); +} - auto cagra_graph = raft::make_host_matrix(0, 0); - - // Dispatch based on graph_build_params +/** + * Iterative / IVF-PQ / NN-descent KNN graph construction and `optimize` → final host CAGRA graph. + * + * @param ensure_padded_for_iterative_and_nn Host path: lazy `make_device_padded_dataset`; device + * path: return existing padded view (cheap). Used for iterative and NN-descent only. + * @param ivf_pq_graph_dataset IVF-PQ `build_knn_graph` dataset (host mdspan or device padded + * view). + */ +template +auto build_cagra_host_graph_from_knn_params(raft::resources const& res, + index_params const& params, + KnnParamsVariant const& knn_build_params, + int64_t n_rows, + size_t intermediate_degree, + size_t graph_degree, + EnsurePaddedFn&& ensure_padded_for_iterative_and_nn, + IvfPqDatasetMdspan&& ivf_pq_graph_dataset) + -> raft::host_matrix +{ if (std::holds_alternative( knn_build_params)) { - cagra_graph = iterative_build_graph(res, params, dataset); + auto padded = ensure_padded_for_iterative_and_nn(); + return iterative_build_graph(res, params, padded); + } + + std::optional> knn_graph( + raft::make_host_matrix(n_rows, intermediate_degree)); + + if (std::holds_alternative(knn_build_params)) { + auto ivf_pq_params = + std::get(knn_build_params); + if (ivf_pq_params.build_params.metric != params.metric) { + RAFT_LOG_WARN( + "Metric (%lu) for IVF-PQ needs to match cagra metric (%lu), " + "aligning IVF-PQ metric.", + ivf_pq_params.build_params.metric, + params.metric); + ivf_pq_params.build_params.metric = params.metric; + } + build_knn_graph(res, ivf_pq_graph_dataset, knn_graph->view(), ivf_pq_params); } else { - std::optional> knn_graph( - raft::make_host_matrix(dataset.extent(0), intermediate_degree)); - - if (std::holds_alternative(knn_build_params)) { - auto ivf_pq_params = - std::get(knn_build_params); - if (ivf_pq_params.build_params.metric != params.metric) { - RAFT_LOG_WARN( - "Metric (%lu) for IVF-PQ needs to match cagra metric (%lu), " - "aligning IVF-PQ metric.", - ivf_pq_params.build_params.metric, - params.metric); - ivf_pq_params.build_params.metric = params.metric; - } - build_knn_graph(res, dataset, knn_graph->view(), ivf_pq_params); - } else { - auto nn_descent_params = - std::get(knn_build_params); - - if (nn_descent_params.metric != params.metric) { - RAFT_LOG_WARN( - "Metric (%lu) for nn-descent needs to match cagra metric (%lu), " - "aligning nn-descent metric.", - nn_descent_params.metric, - params.metric); - nn_descent_params.metric = params.metric; - } - if (nn_descent_params.graph_degree != intermediate_degree) { - RAFT_LOG_WARN( - "Graph degree (%lu) for nn-descent needs to match cagra intermediate graph degree (%lu), " - "aligning " - "nn-descent graph_degree.", - nn_descent_params.graph_degree, - intermediate_degree); - nn_descent_params = - cagra::graph_build_params::nn_descent_params(intermediate_degree, params.metric); - } + auto nn_descent_params = + std::get(knn_build_params); - // Use nn-descent to build CAGRA knn graph - nn_descent_params.return_distances = false; - build_knn_graph(res, dataset, knn_graph->view(), nn_descent_params); + if (nn_descent_params.metric != params.metric) { + RAFT_LOG_WARN( + "Metric (%lu) for nn-descent needs to match cagra metric (%lu), " + "aligning nn-descent metric.", + nn_descent_params.metric, + params.metric); + nn_descent_params.metric = params.metric; + } + if (nn_descent_params.graph_degree != intermediate_degree) { + RAFT_LOG_WARN( + "Graph degree (%lu) for nn-descent needs to match cagra intermediate graph degree (%lu), " + "aligning " + "nn-descent graph_degree.", + nn_descent_params.graph_degree, + intermediate_degree); + nn_descent_params = + cagra::graph_build_params::nn_descent_params(intermediate_degree, params.metric); } - cagra_graph = raft::make_host_matrix(dataset.extent(0), graph_degree); + nn_descent_params.return_distances = false; + auto padded = ensure_padded_for_iterative_and_nn(); + build_knn_graph(res, padded.view(), knn_graph->view(), nn_descent_params); + } - RAFT_LOG_TRACE("optimizing graph"); - optimize(res, knn_graph->view(), cagra_graph.view(), params.guarantee_connectivity); + auto cagra_graph = raft::make_host_matrix(n_rows, graph_degree); - // free intermediate graph before trying to create the index - knn_graph.reset(); - } + RAFT_LOG_TRACE("optimizing graph"); + optimize(res, knn_graph->view(), cagra_graph.view(), params.guarantee_connectivity); - RAFT_LOG_TRACE("Graph optimized, creating index"); + knn_graph.reset(); + return cagra_graph; +} - // Construct an index from dataset and optimized knn graph. - if (params.compression.has_value()) { - RAFT_EXPECTS(params.metric == cuvs::distance::DistanceType::L2Expanded, - "VPQ compression is only supported with L2Expanded distance mertric"); - index idx(res, params.metric); - idx.update_graph(res, raft::make_const_mdspan(cagra_graph.view())); - idx.update_dataset( - res, - // TODO: hardcoding codebook math to `half`, we can do runtime dispatching later - cuvs::preprocessing::quantize::pq::vpq_build(res, *params.compression, dataset)); +/** + * Build from a host row-major matrix without uploading the full dataset early when IVF-PQ graph + * construction can consume host batches directly. NN-descent / iterative paths still materialize a + * padded device copy for graph build. The returned index contains only the optimized graph; call + * `index::update_dataset` with a device dataset view before search. + */ +template + requires cuvs::neighbors::is_host_dataset_view_v +auto build_from_host_matrix(raft::resources const& res, + const index_params& params, + raft::host_matrix_view host_dataset) + -> cuvs::neighbors::cagra::index +{ + std::unique_ptr> padded_own{}; - return idx; - } - if (params.attach_dataset_on_build) { - try { - return index( - res, params.metric, dataset, raft::make_const_mdspan(cagra_graph.view())); - } catch (std::bad_alloc& e) { - RAFT_LOG_WARN( - "Insufficient GPU memory to construct CAGRA index with dataset on GPU. Only the graph will " - "be added to the index"); - // We just add the graph. User is expected to update dataset separately (e.g allocating in - // managed memory). - } catch (raft::logic_error& e) { - // The memory error can also manifest as logic_error. - RAFT_LOG_WARN( - "Insufficient GPU memory to construct CAGRA index with dataset on GPU. Only the graph will " - "be added to the index"); + auto ensure_padded = [&]() -> cuvs::neighbors::device_padded_dataset_view { + if (!padded_own) { + padded_own = cuvs::neighbors::make_device_padded_dataset(res, host_dataset); } - } - index idx(res, params.metric); + return padded_own->as_dataset_view(); + }; + + size_t const n_rows = static_cast(host_dataset.extent(0)); + size_t const dim = static_cast(host_dataset.extent(1)); + + size_t intermediate_degree = params.intermediate_graph_degree; + size_t graph_degree = params.graph_degree; + common::nvtx::range function_scope( + "cagra::detail::build_from_host_matrix(%zu, %zu)", intermediate_degree, graph_degree); + check_graph_degree(intermediate_degree, graph_degree, n_rows); + + auto dataset_extents = + raft::matrix_extent(static_cast(n_rows), static_cast(dim)); + + auto knn_build_params = resolve_cagra_default_knn_graph_build_params( + res, params, dataset_extents, intermediate_degree); + validate_cagra_knn_graph_build_constraints(params, knn_build_params); + + auto cagra_graph = build_cagra_host_graph_from_knn_params(res, + params, + knn_build_params, + static_cast(n_rows), + intermediate_degree, + graph_degree, + ensure_padded, + host_dataset); + + RAFT_LOG_TRACE("Graph optimized, creating index"); + + cuvs::neighbors::cagra::index out(res, params.metric); + out.update_graph(res, raft::make_const_mdspan(cagra_graph.view())); + padded_own.reset(); + return out; +} + +/** + * Build from a concrete `dataset_view` after resolving graph vectors to **device** padded storage + * via `convert_dataset_view_to_padded_for_graph_build`. + * + * Supported inputs include `device_padded_dataset_view` and VPQ views (graph build rejects VPQ). + * This entry point does **not** accept host-backed bases for graph construction (see + * `build_from_host_matrix`). Also used from ACE sub-builds and merge. The returned index + * contains only the optimized graph; call `index::update_dataset` before search. + */ +template + requires cuvs::neighbors::is_device_dataset_view_v +auto build_from_device_matrix(raft::resources const& res, + const index_params& params, + DatasetViewT const& device_dataset) + -> cuvs::neighbors::cagra::index +{ + const auto padded = convert_dataset_view_to_padded_for_graph_build(device_dataset); + + size_t intermediate_degree = params.intermediate_graph_degree; + size_t graph_degree = params.graph_degree; + common::nvtx::range function_scope( + "cagra::detail::build_from_device_matrix(%zu, %zu)", intermediate_degree, graph_degree); + check_graph_degree( + intermediate_degree, graph_degree, static_cast(padded.n_rows())); + + auto dataset_extents = raft::matrix_extent(padded.n_rows(), padded.dim()); + + auto knn_build_params = resolve_cagra_default_knn_graph_build_params( + res, params, dataset_extents, intermediate_degree); + validate_cagra_knn_graph_build_constraints(params, knn_build_params); + + auto cagra_graph = build_cagra_host_graph_from_knn_params( + res, + params, + knn_build_params, + padded.n_rows(), + intermediate_degree, + graph_degree, + [&padded]() -> cuvs::neighbors::device_padded_dataset_view { return padded; }, + padded.view()); + + RAFT_LOG_TRACE("Graph optimized, creating index"); + + cuvs::neighbors::cagra::index idx(res, params.metric); idx.update_graph(res, raft::make_const_mdspan(cagra_graph.view())); return idx; } diff --git a/cpp/src/neighbors/detail/cagra/cagra_merge.cuh b/cpp/src/neighbors/detail/cagra/cagra_merge.cuh index 1dd4cbe075..c681ad4ae4 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_merge.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_merge.cuh @@ -6,6 +6,8 @@ #include +#include "cagra_build.cuh" + #include #include #include @@ -18,25 +20,24 @@ #include #include +#include +#include #include #include #include -#include -#include #include namespace cuvs::neighbors::cagra::detail { -template -index merge(raft::resources const& handle, - const cagra::index_params& params, - std::vector*>& indices, - const cuvs::neighbors::filtering::base_filter& row_filter) +template +merged_dataset compute_merged_dataset_layout( + raft::resources const& handle, + std::vector*> const& indices, + cuvs::neighbors::filtering::base_filter const& row_filter) { - using cagra_index_t = cuvs::neighbors::cagra::index; - using ds_idx_type = typename cagra_index_t::dataset_index_type; + using cagra_index_t = cuvs::neighbors::cagra::index; std::size_t dim = 0; std::size_t new_dataset_size = 0; @@ -48,131 +49,183 @@ index merge(raft::resources const& handle, for (cagra_index_t* index : indices) { RAFT_EXPECTS(index != nullptr, "Null pointer detected in 'indices'. Ensure all elements are valid before usage."); - if (auto* strided_dset = dynamic_cast*>(&index->data()); - strided_dset != nullptr) { + auto const& v = index->data(); + if constexpr (cuvs::neighbors::is_padded_dataset_view_v) { + if (v.n_rows() == 0) { + RAFT_FAIL( + "cagra::merge only supports an index to which the dataset is attached. Please check if " + "the index has an empty dataset; attach one with update_dataset before merge."); + } if (dim == 0) { dim = index->dim(); - stride = strided_dset->stride(); + stride = static_cast(v.stride()); } else { RAFT_EXPECTS(dim == index->dim(), "Dimension of datasets in indices must be equal."); + RAFT_EXPECTS(stride == static_cast(v.stride()), + "Row stride of datasets in indices must be equal."); } new_dataset_size += index->size(); - } else if (dynamic_cast*>(&index->data()) != - nullptr) { - RAFT_FAIL( - "cagra::merge only supports an index to which the dataset is attached. Please check if the " - "index was built with index_param.attach_dataset_on_build = true, or if a dataset was " - "attached after the build."); } else { - RAFT_FAIL("cagra::merge only supports an uncompressed dataset index"); + RAFT_FAIL("cagra::merge only supports an uncompressed padded dataset index"); } } - IdxT offset = 0; + merged_dataset layout{}; + layout.merged_rows = static_cast(new_dataset_size); + layout.stride_elements = stride; + layout.dim = static_cast(dim); + layout.bitset_filtered = + (row_filter.get_filter_type() == cuvs::neighbors::filtering::FilterType::Bitset); + if (layout.bitset_filtered) { + auto const& actual_filter = + dynamic_cast&>(row_filter); + layout.filtered_rows = actual_filter.view().count(handle); + } else { + layout.filtered_rows = layout.merged_rows; + } + return layout; +} + +template +cuvs::neighbors::cagra::index merge( + raft::resources const& handle, + const cagra::index_params& params, + std::vector*>& indices, + merged_dataset_storage& storage, + const cuvs::neighbors::filtering::base_filter& row_filter) +{ + using cagra_index_t = cuvs::neighbors::cagra::index; + + auto const expected = + compute_merged_dataset_layout(handle, indices, row_filter); + RAFT_EXPECTS(expected.merged_rows == storage.layout.merged_rows && + expected.filtered_rows == storage.layout.filtered_rows && + expected.stride_elements == storage.layout.stride_elements && + expected.dim == storage.layout.dim && + expected.bitset_filtered == storage.layout.bitset_filtered, + "merged_dataset_storage.layout does not match indices and row_filter (use the same " + "arguments as " + "make_merged_dataset)."); + + auto merged_storage = storage.merged_storage.view(); + RAFT_EXPECTS(merged_storage.extent(0) == storage.layout.merged_rows, + "merged_storage rows (%ld) must equal layout.merged_rows (%ld)", + long(merged_storage.extent(0)), + long(storage.layout.merged_rows)); + RAFT_EXPECTS(merged_storage.extent(1) == storage.layout.stride_elements, + "merged_storage stride (%ld) must equal layout.stride_elements (%ld)", + long(merged_storage.extent(1)), + long(storage.layout.stride_elements)); + + std::optional> filtered_view{}; + if (storage.layout.bitset_filtered) { + RAFT_EXPECTS(storage.filtered_storage.has_value(), + "Bitset-filtered merge requires merged_dataset_storage.filtered_storage."); + filtered_view = storage.filtered_storage->view(); + RAFT_EXPECTS(filtered_view->extent(0) == storage.layout.filtered_rows, + "filtered_storage rows (%ld) must equal layout.filtered_rows (%ld)", + long(filtered_view->extent(0)), + long(storage.layout.filtered_rows)); + RAFT_EXPECTS(filtered_view->extent(1) == storage.layout.stride_elements, + "filtered_storage stride (%ld) must equal layout.stride_elements (%ld)", + long(filtered_view->extent(1)), + long(storage.layout.stride_elements)); + } else { + RAFT_EXPECTS(!storage.filtered_storage.has_value(), + "Non-bitset merge requires merged_dataset_storage.filtered_storage be unset."); + } - auto merge_dataset = [&](T* dst) { + auto merge_dataset = [&](T* dst, std::size_t dst_ld) { + IdxT row_offset = 0; for (cagra_index_t* index : indices) { - auto* strided_dset = dynamic_cast*>(&index->data()); - raft::copy_matrix(dst + offset * dim, - dim, - strided_dset->view().data_handle(), - static_cast(stride), - dim, - static_cast(strided_dset->n_rows()), + const T* src_ptr = nullptr; + std::size_t n_rows = 0; + auto const& v = index->data(); + if constexpr (cuvs::neighbors::is_padded_dataset_view_v) { + src_ptr = v.view().data_handle(); + n_rows = static_cast(v.n_rows()); + } else { + RAFT_FAIL("cagra::merge: unexpected dataset type while copying rows"); + } + raft::copy_matrix(dst + static_cast(row_offset) * dst_ld, + dst_ld, + src_ptr, + static_cast(storage.layout.stride_elements), + static_cast(storage.layout.dim), + n_rows, raft::resource::get_cuda_stream(handle)); - offset += IdxT(index->data().n_rows()); + row_offset += IdxT(index->data().n_rows()); } }; - try { - auto updated_dataset = - raft::make_device_matrix(handle, int64_t(new_dataset_size), int64_t(dim)); - - merge_dataset(updated_dataset.data_handle()); - - if (row_filter.get_filter_type() == cuvs::neighbors::filtering::FilterType::Bitset) { - auto actual_filter = - dynamic_cast&>( - row_filter); - auto filtered_row_count = actual_filter.view().count(handle); - - // Convert the filter to a CSR matrix (so that we can pass indices to raft::copy_rows) - auto indices_csr = raft::make_device_csr_matrix( - handle, 1, new_dataset_size); - indices_csr.initialize_sparsity(filtered_row_count); - - actual_filter.view().to_csr(handle, indices_csr); - - // Get the indices array from the csr matrix. Note that this returns a raft::span object - // and we need to pass as device_vector_view, which is a 1D mdspan (instead of a span) - // so we need to translate here (and adjust to be const) - auto indices = indices_csr.structure_view().get_indices(); - auto indices_view = raft::make_device_vector_view( - indices.data(), static_cast(indices.size())); - - auto filtered_dataset = raft::make_device_matrix(handle, filtered_row_count, dim); - raft::matrix::copy_rows(handle, - raft::make_const_mdspan(updated_dataset.view()), - filtered_dataset.view(), - indices_view); - - auto merged_index = - cagra::build(handle, params, raft::make_const_mdspan(filtered_dataset.view())); - if (!merged_index.data().is_owning() && params.attach_dataset_on_build) { - using matrix_t = decltype(updated_dataset); - using layout_t = typename matrix_t::layout_type; - using container_policy_t = typename matrix_t::container_policy_type; - using owning_t = owning_dataset; - auto out_layout = raft::make_strided_layout(filtered_dataset.view().extents(), - cuda::std::array{stride, 1}); - - merged_index.update_dataset(handle, owning_t{std::move(filtered_dataset), out_layout}); - } - RAFT_LOG_DEBUG("cagra merge: using device memory for merged dataset"); - return merged_index; - } else { - auto merged_index = - cagra::build(handle, params, raft::make_const_mdspan(updated_dataset.view())); - if (!merged_index.data().is_owning() && params.attach_dataset_on_build) { - using matrix_t = decltype(updated_dataset); - using layout_t = typename matrix_t::layout_type; - using container_policy_t = typename matrix_t::container_policy_type; - using owning_t = owning_dataset; - auto out_layout = raft::make_strided_layout(updated_dataset.view().extents(), - cuda::std::array{stride, 1}); - - merged_index.update_dataset(handle, owning_t{std::move(updated_dataset), out_layout}); - } - RAFT_LOG_DEBUG("cagra merge: using device memory for merged dataset"); - return merged_index; - } - } catch (std::bad_alloc& e) { - // We don't currently support the cpu memory fallback with filtered merge, since the - // 'raft::matrix::copy_rows' only supports gpu memory - RAFT_EXPECTS(row_filter.get_filter_type() == cuvs::neighbors::filtering::FilterType::None, - "Filtered merge isn't available on cpu memory"); - - RAFT_LOG_DEBUG("cagra::merge: using host memory for merged dataset"); - - auto updated_dataset = - raft::make_host_matrix(std::int64_t(new_dataset_size), std::int64_t(dim)); - - merge_dataset(updated_dataset.data_handle()); - - auto merged_index = - cagra::build(handle, params, raft::make_const_mdspan(updated_dataset.view())); - if (!merged_index.data().is_owning() && params.attach_dataset_on_build) { - using matrix_t = decltype(updated_dataset); - using layout_t = typename matrix_t::layout_type; - using container_policy_t = typename matrix_t::container_policy_type; - using owning_t = owning_dataset; - auto out_layout = raft::make_strided_layout(updated_dataset.view().extents(), - cuda::std::array{stride, 1}); - merged_index.update_dataset(handle, owning_t{std::move(updated_dataset), out_layout}); - } - return merged_index; + cudaStream_t stream = raft::resource::get_cuda_stream(handle); + const auto merged_bytes = static_cast(merged_storage.size()) * sizeof(T); + RAFT_CUDA_TRY(cudaMemsetAsync(merged_storage.data_handle(), 0, merged_bytes, stream)); + + merge_dataset(merged_storage.data_handle(), + static_cast(storage.layout.stride_elements)); + + if (storage.layout.bitset_filtered) { + auto actual_filter = + dynamic_cast&>(row_filter); + + auto indices_csr = raft::make_device_csr_matrix( + handle, 1, static_cast(storage.layout.merged_rows)); + indices_csr.initialize_sparsity(storage.layout.filtered_rows); + + actual_filter.view().to_csr(handle, indices_csr); + + auto csr_indices = indices_csr.structure_view().get_indices(); + auto indices_view = raft::make_device_vector_view( + csr_indices.data(), static_cast(csr_indices.size())); + + auto& filtered_storage = *filtered_view; + RAFT_CUDA_TRY(cudaMemsetAsync(filtered_storage.data_handle(), + 0, + static_cast(filtered_storage.size()) * sizeof(T), + stream)); + + raft::matrix::copy_rows( + handle, raft::make_const_mdspan(merged_storage), filtered_storage, indices_view); + + cuvs::neighbors::device_padded_dataset_view dv( + raft::make_const_mdspan(filtered_storage), storage.layout.dim); + auto index = + ::cuvs::neighbors::cagra::detail::build_from_device_matrix(handle, params, dv); + index.update_dataset(handle, dv); + RAFT_LOG_DEBUG("cagra merge: using device memory for merged dataset"); + return index; } + + cuvs::neighbors::device_padded_dataset_view dv( + raft::make_const_mdspan(merged_storage), storage.layout.dim); + auto index = + ::cuvs::neighbors::cagra::detail::build_from_device_matrix(handle, params, dv); + index.update_dataset(handle, dv); + RAFT_LOG_DEBUG("cagra merge: using device memory for merged dataset"); + return index; } } // namespace cuvs::neighbors::cagra::detail + +namespace cuvs::neighbors::cagra { + +template +merged_dataset_storage make_merged_dataset( + raft::resources const& res, + std::vector*> const& indices, + cuvs::neighbors::filtering::base_filter const& row_filter) +{ + merged_dataset layout = detail::compute_merged_dataset_layout(res, indices, row_filter); + auto merged_storage = + raft::make_device_matrix(res, layout.merged_rows, layout.stride_elements); + std::optional> filtered_storage; + if (layout.bitset_filtered) { + filtered_storage.emplace( + raft::make_device_matrix(res, layout.filtered_rows, layout.stride_elements)); + } + return {layout, std::move(merged_storage), std::move(filtered_storage)}; +} + +} // namespace cuvs::neighbors::cagra diff --git a/cpp/src/neighbors/detail/cagra/cagra_search.cuh b/cpp/src/neighbors/detail/cagra/cagra_search.cuh index bca8d3314d..588f699f57 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_search.cuh @@ -19,6 +19,7 @@ #include #include +#include // TODO: Fix these when ivf methods are moved over #include "../../ivf_common.cuh" @@ -83,31 +84,76 @@ void search_main_core( RAFT_LOG_DEBUG("Cagra search"); const uint32_t max_queries = plan->max_queries; - const uint32_t query_dim = queries.extent(1); + const uint32_t query_dim = static_cast(queries.extent(1)); + // Same 16B row-pitch rule as make_device_padded_dataset. Tight [n,dim] rows can be misaligned + // between rows (e.g. float, dim=1) and trigger misaligned access in CAGRA search. If + // query_row_stride>dim, device code still advances with "+= dim*query_id" in setup_workspace; in + // that case run one query per plan call so every kernel sees query_id==0 and the base pointer + // selects the row (keeps batched path when stride==dim). + const DataT* queries_buf{}; + uint32_t query_row_stride{}; + std::unique_ptr> queries_padded_own; + if (cuvs::neighbors::matrix_row_width_matches_cagra_required(queries)) { + auto v = cuvs::neighbors::make_device_padded_dataset_view(res, queries); + queries_buf = v.view().data_handle(); + query_row_stride = v.stride(); + } else { + queries_padded_own = cuvs::neighbors::make_device_padded_dataset(res, queries); + auto v = queries_padded_own->as_dataset_view(); + queries_buf = v.view().data_handle(); + query_row_stride = v.stride(); + } + const bool can_batch_n_queries = (query_row_stride == query_dim); for (unsigned qid = 0; qid < queries.extent(0); qid += max_queries) { const uint32_t n_queries = std::min(max_queries, queries.extent(0) - qid); - auto _topk_indices_ptr = neighbors.data_handle() + (topk * qid); - auto _topk_distances_ptr = distances.data_handle() + (topk * qid); - // todo(tfeher): one could keep distances optional and pass nullptr - const auto* _query_ptr = queries.data_handle() + (query_dim * qid); - const auto* _seed_ptr = - plan->num_seeds > 0 - ? reinterpret_cast(plan->dev_seed.data()) + (plan->num_seeds * qid) - : nullptr; - uint32_t* _num_executed_iterations = nullptr; - - (*plan)(res, - graph, - source_indices, - _topk_indices_ptr, - _topk_distances_ptr, - _query_ptr, - n_queries, - _seed_ptr, - _num_executed_iterations, - topk, - set_offset(sample_filter, qid)); + if (can_batch_n_queries) { + auto _topk_indices_ptr = neighbors.data_handle() + (topk * qid); + auto _topk_distances_ptr = distances.data_handle() + (topk * qid); + const auto* _query_ptr = + queries_buf + (static_cast(query_row_stride) * static_cast(qid)); + const auto* _seed_ptr = + plan->num_seeds > 0 + ? reinterpret_cast(plan->dev_seed.data()) + (plan->num_seeds * qid) + : nullptr; + uint32_t* _num_executed_iterations = nullptr; + + (*plan)(res, + graph, + source_indices, + _topk_indices_ptr, + _topk_distances_ptr, + _query_ptr, + n_queries, + _seed_ptr, + _num_executed_iterations, + topk, + set_offset(sample_filter, qid)); + } else { + for (uint32_t qi = 0; qi < n_queries; ++qi) { + const size_t g = static_cast(qid) + static_cast(qi); + auto _topk_indices_ptr = neighbors.data_handle() + (topk * g); + auto _topk_distances_ptr = distances.data_handle() + (topk * g); + const auto* _query_ptr = queries_buf + (query_row_stride * g); + const auto* _seed_ptr = + plan->num_seeds > 0 + ? reinterpret_cast(plan->dev_seed.data()) + (plan->num_seeds * g) + : nullptr; + uint32_t* _num_executed_iterations = nullptr; + + (*plan)(res, + graph, + source_indices, + _topk_indices_ptr, + _topk_distances_ptr, + _query_ptr, + 1u, + _seed_ptr, + _num_executed_iterations, + topk, + set_offset(sample_filter, g)); + } + } } } @@ -133,10 +179,11 @@ template + typename DistanceT = float, + cuvs::neighbors::cagra_dataset_view DatasetViewT> void search_main(raft::resources const& res, search_params params, - const index& index, + const index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, @@ -147,13 +194,9 @@ void search_main(raft::resources const& res, "Use cuvs::neighbors::hnsw::from_cagra() to convert the index and " "cuvs::neighbors::hnsw::deserialize() to load it into memory before searching."); - // n_rows has the same type as the dataset index (the array extents type) - using ds_idx_type = decltype(index.data().n_rows()); using graph_idx_type = uint32_t; - // Dispatch search parameters based on the dataset kind. - if (auto* strided_dset = dynamic_cast*>(&index.data()); - strided_dset != nullptr) { - // Search using a plain (strided) row-major dataset + + auto run_strided_like = [&](auto const& row_dataset) { RAFT_EXPECTS(index.metric() != cuvs::distance::DistanceType::CosineExpanded || index.dataset_norms().has_value(), "Dataset norms must be provided for CosineExpanded metric"); @@ -163,7 +206,7 @@ void search_main(raft::resources const& res, dataset_norms_ptr = index.dataset_norms().value().data_handle(); } auto desc = dataset_descriptor_init_with_cache( - res, params, *strided_dset, index.metric(), dataset_norms_ptr); + res, params, row_dataset, index.metric(), dataset_norms_ptr); search_main_core( res, params, @@ -174,14 +217,17 @@ void search_main(raft::resources const& res, neighbors, distances, sample_filter); - } else if (auto* vpq_dset = dynamic_cast*>(&index.data()); - vpq_dset != nullptr) { - // Search using a compressed dataset + }; + + if constexpr (cuvs::neighbors::is_empty_dataset_view_v) { + RAFT_FAIL( + "Attempted to search without a dataset. Please call index.update_dataset(...) first."); + } else if constexpr (cuvs::neighbors::is_device_vpq_f32_dataset_view_v) { RAFT_FAIL("FP32 VPQ dataset support is coming soon"); - } else if (auto* vpq_dset = dynamic_cast*>(&index.data()); - vpq_dset != nullptr) { - auto desc = dataset_descriptor_init_with_cache( - res, params, *vpq_dset, index.metric(), nullptr); + } else if constexpr (cuvs::neighbors::is_device_vpq_f16_dataset_view_v) { + auto const& vv = index.data(); + auto desc = dataset_descriptor_init_with_cache( + res, params, vv.dset(), index.metric(), nullptr); search_main_core( res, params, @@ -192,14 +238,15 @@ void search_main(raft::resources const& res, neighbors, distances, sample_filter); - } else if (auto* empty_dset = dynamic_cast*>(&index.data()); - empty_dset != nullptr) { - // Forgot to add a dataset. - RAFT_FAIL( - "Attempted to search without a dataset. Please call index.update_dataset(...) first."); + } else if constexpr (cuvs::neighbors::is_device_padded_dataset_view_v) { + run_strided_like(index.data()); + } else if constexpr (cuvs::neighbors::is_host_dataset_view_v) { + static_assert(sizeof(DatasetViewT) == 0, + "search requires a device-resident dataset. " + "Call cagra::attach_device_dataset_on_host_index(res, host_idx, device_view) " + "to convert the host index and attach a device dataset before searching."); } else { - // This is a logic error. - RAFT_FAIL("Unrecognized dataset format"); + static_assert(sizeof(DatasetViewT) == 0, "search: unsupported dataset view type"); } static_assert(std::is_same_v, diff --git a/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh index f106b82500..fa669e4e02 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh @@ -40,10 +40,10 @@ constexpr int serialization_version = 5; * @param[in] index_ CAGRA index * */ -template +template void serialize(raft::resources const& res, std::ostream& os, - const index& index_, + const cuvs::neighbors::cagra::index& index_, bool include_dataset) { raft::common::nvtx::range fun_scope("cagra::serialize"); @@ -75,7 +75,15 @@ void serialize(raft::resources const& res, raft::serialize_scalar(res, os, content_map); if (include_dataset) { RAFT_LOG_DEBUG("Saving CAGRA index with dataset"); - neighbors::detail::serialize(res, os, index_.data()); + if constexpr (cuvs::neighbors::is_device_padded_dataset_view_v) { + neighbors::detail::serialize_cagra_padded_dataset(res, os, index_.data()); + } else { + // Future dataset types (e.g. VPQ) require a new branch here and a corresponding + // deserialize overload. Use static_assert to catch unsupported types at compile time. + static_assert( + sizeof(DatasetViewT) == 0, + "serialize: dataset serialization is not yet implemented for this DatasetViewT"); + } } else { RAFT_LOG_DEBUG("Saving CAGRA index WITHOUT dataset"); } @@ -83,10 +91,10 @@ void serialize(raft::resources const& res, if (has_source_indices) { raft::serialize_mdspan(res, os, index_.source_indices().value()); } } -template +template void serialize(raft::resources const& res, const std::string& filename, - const index& index_, + const cuvs::neighbors::cagra::index& index_, bool include_dataset) { RAFT_EXPECTS(!index_.dataset_fd().has_value(), @@ -106,7 +114,7 @@ template void serialize_to_hnswlib( raft::resources const& res, std::ostream& os, - const cuvs::neighbors::cagra::index& index_, + const cuvs::neighbors::cagra::device_padded_index& index_, std::optional> dataset) { // static_assert(std::is_same_v or std::is_same_v, @@ -243,7 +251,7 @@ template void serialize_to_hnswlib( raft::resources const& res, const std::string& filename, - const cuvs::neighbors::cagra::index& index_, + const cuvs::neighbors::cagra::device_padded_index& index_, std::optional> dataset) { std::ofstream of(filename, std::ios::out | std::ios::binary); @@ -264,8 +272,12 @@ void serialize_to_hnswlib( * @param[in] index_ CAGRA index * */ -template -void deserialize(raft::resources const& res, std::istream& is, index* index_) +template +void deserialize( + raft::resources const& res, + std::istream& is, + cuvs::neighbors::cagra::index* index_, + std::unique_ptr>* out_dataset = nullptr) { raft::common::nvtx::range fun_scope("cagra::deserialize"); @@ -302,13 +314,22 @@ void deserialize(raft::resources const& res, std::istream& is, index* i auto graph = raft::make_host_matrix(n_rows, graph_degree); deserialize_mdspan(res, is, graph.view()); - *index_ = index(res, metric); + *index_ = cuvs::neighbors::cagra::index(res, metric); index_->update_graph(res, raft::make_const_mdspan(graph.view())); auto content_map = raft::deserialize_scalar(res, is); bool has_dataset = content_map & 0x1u; if (has_dataset) { - index_->update_dataset(res, cuvs::neighbors::detail::deserialize_dataset(res, is)); + RAFT_EXPECTS(out_dataset != nullptr, + "deserialize: index contains a dataset; pass a non-null out_dataset to own it."); + if constexpr (cuvs::neighbors::is_device_padded_dataset_view_v) { + *out_dataset = cuvs::neighbors::detail::deserialize_dataset(res, is); + index_->update_dataset(res, (*out_dataset)->as_dataset_view()); + } else { + static_assert(sizeof(DatasetViewT) == 0, + "deserialize: dataset deserialization is not yet implemented for this " + "DatasetViewT"); + } } bool has_source_indices = content_map & 0x2u; @@ -321,14 +342,18 @@ void deserialize(raft::resources const& res, std::istream& is, index* i } } -template -void deserialize(raft::resources const& res, const std::string& filename, index* index_) +template +void deserialize( + raft::resources const& res, + const std::string& filename, + cuvs::neighbors::cagra::index* index_, + std::unique_ptr>* out_dataset = nullptr) { std::ifstream is(filename, std::ios::in | std::ios::binary); if (!is) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } - detail::deserialize(res, is, index_); + detail::deserialize(res, is, index_, out_dataset); is.close(); } diff --git a/cpp/src/neighbors/detail/cagra/compute_distance_standard.hpp b/cpp/src/neighbors/detail/cagra/compute_distance_standard.hpp index ef82b1760e..11941d0082 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance_standard.hpp +++ b/cpp/src/neighbors/detail/cagra/compute_distance_standard.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once @@ -28,7 +28,7 @@ struct standard_descriptor_spec : public instance_spec template constexpr static inline bool accepts_dataset() { - return is_strided_dataset_v; + return is_padded_dataset_v; } template diff --git a/cpp/src/neighbors/detail/cagra/factory.cuh b/cpp/src/neighbors/detail/cagra/factory.cuh index 26cd13bab8..6ffba819d5 100644 --- a/cpp/src/neighbors/detail/cagra/factory.cuh +++ b/cpp/src/neighbors/detail/cagra/factory.cuh @@ -93,7 +93,7 @@ template auto make_key(const cagra::search_params& params, const DatasetT& dataset, cuvs::distance::DistanceType metric) - -> std::enable_if_t, key> + -> std::enable_if_t, key> { return key{reinterpret_cast(dataset.view().data_handle()), uint64_t(dataset.n_rows()), diff --git a/cpp/src/neighbors/detail/dataset_serialize.hpp b/cpp/src/neighbors/detail/dataset_serialize.hpp index 00032ae9d2..217ef9105e 100644 --- a/cpp/src/neighbors/detail/dataset_serialize.hpp +++ b/cpp/src/neighbors/detail/dataset_serialize.hpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -25,16 +26,11 @@ constexpr dataset_instance_tag kSerializeEmptyDataset = 1; constexpr dataset_instance_tag kSerializeStridedDataset = 2; constexpr dataset_instance_tag kSerializeVPQDataset = 3; -template -void serialize(const raft::resources& res, std::ostream& os, const empty_dataset& dataset) -{ - raft::serialize_scalar(res, os, dataset.suggested_dim); -} - +// Padded: `device_padded_dataset_view` writes the payload. template void serialize(const raft::resources& res, std::ostream& os, - const strided_dataset& dataset) + const device_padded_dataset_view& dataset) { auto n_rows = dataset.n_rows(); auto dim = dataset.dim(); @@ -42,7 +38,6 @@ void serialize(const raft::resources& res, raft::serialize_scalar(res, os, n_rows); raft::serialize_scalar(res, os, dim); raft::serialize_scalar(res, os, stride); - // Remove padding before saving the dataset auto src = dataset.view(); auto dst = raft::make_host_matrix(n_rows, dim); raft::copy_matrix(dst.data_handle(), @@ -56,85 +51,54 @@ void serialize(const raft::resources& res, raft::serialize_mdspan(res, os, dst.view()); } -template -void serialize(const raft::resources& res, - std::ostream& os, - const vpq_dataset& dataset) -{ - raft::serialize_scalar(res, os, dataset.n_rows()); - raft::serialize_scalar(res, os, dataset.dim()); - raft::serialize_scalar(res, os, dataset.vq_n_centers()); - raft::serialize_scalar(res, os, dataset.pq_n_centers()); - raft::serialize_scalar(res, os, dataset.pq_len()); - raft::serialize_scalar(res, os, dataset.encoded_row_length()); - raft::serialize_mdspan(res, os, make_const_mdspan(dataset.vq_code_book.view())); - raft::serialize_mdspan(res, os, make_const_mdspan(dataset.pq_code_book.view())); - raft::serialize_mdspan(res, os, make_const_mdspan(dataset.data.view())); -} - -template -void serialize(const raft::resources& res, std::ostream& os, const dataset& dataset) +/** Write CAGRA index dataset blob (tag + element dtype + padded payload). */ +template +void serialize_cagra_padded_dataset(const raft::resources& res, + std::ostream& os, + const device_padded_dataset_view& dataset) { - if (auto x = dynamic_cast*>(&dataset); x != nullptr) { - raft::serialize_scalar(res, os, kSerializeEmptyDataset); - return serialize(res, os, *x); - } - if (auto x = dynamic_cast*>(&dataset); x != nullptr) { - raft::serialize_scalar(res, os, kSerializeStridedDataset); + raft::serialize_scalar(res, os, kSerializeStridedDataset); + if constexpr (std::is_same_v) { raft::serialize_scalar(res, os, CUDA_R_32F); - return serialize(res, os, *x); - } - if (auto x = dynamic_cast*>(&dataset); x != nullptr) { - raft::serialize_scalar(res, os, kSerializeStridedDataset); + } else if constexpr (std::is_same_v) { raft::serialize_scalar(res, os, CUDA_R_16F); - return serialize(res, os, *x); - } - if (auto x = dynamic_cast*>(&dataset); x != nullptr) { - raft::serialize_scalar(res, os, kSerializeStridedDataset); + } else if constexpr (std::is_same_v) { raft::serialize_scalar(res, os, CUDA_R_8I); - return serialize(res, os, *x); - } - if (auto x = dynamic_cast*>(&dataset); x != nullptr) { - raft::serialize_scalar(res, os, kSerializeStridedDataset); + } else if constexpr (std::is_same_v) { raft::serialize_scalar(res, os, CUDA_R_8U); - return serialize(res, os, *x); + } else { + static_assert(!std::is_same_v, "unsupported element type for CAGRA serialize"); } - if (auto x = dynamic_cast*>(&dataset); x != nullptr) { - raft::serialize_scalar(res, os, kSerializeVPQDataset); - raft::serialize_scalar(res, os, CUDA_R_32F); - return serialize(res, os, *x); - } - if (auto x = dynamic_cast*>(&dataset); x != nullptr) { - raft::serialize_scalar(res, os, kSerializeVPQDataset); - raft::serialize_scalar(res, os, CUDA_R_16F); - return serialize(res, os, *x); - } - RAFT_FAIL("unsupported dataset type."); + serialize(res, os, dataset); } template auto deserialize_empty(raft::resources const& res, std::istream& is) - -> std::unique_ptr> + -> std::unique_ptr> { auto suggested_dim = raft::deserialize_scalar(res, is); - return std::make_unique>(suggested_dim); + return std::make_unique>(suggested_dim); } template -auto deserialize_strided(raft::resources const& res, std::istream& is) - -> std::unique_ptr> +auto deserialize_padded(raft::resources const& res, std::istream& is) + -> std::unique_ptr> { - auto n_rows = raft::deserialize_scalar(res, is); - auto dim = raft::deserialize_scalar(res, is); - auto stride = raft::deserialize_scalar(res, is); + auto n_rows = raft::deserialize_scalar(res, is); + auto dim = raft::deserialize_scalar(res, is); + auto stride = raft::deserialize_scalar(res, is); + RAFT_EXPECTS(dim <= stride, + "deserialize_padded: logical dim (%u) must not exceed row stride (%u).", + static_cast(dim), + static_cast(stride)); auto host_array = raft::make_host_matrix(n_rows, dim); raft::deserialize_mdspan(res, is, host_array.view()); - return make_strided_dataset(res, std::move(host_array), stride); + return cuvs::neighbors::make_device_padded_dataset(res, host_array.view()); } -template +template auto deserialize_vpq(raft::resources const& res, std::istream& is) - -> std::unique_ptr> + -> std::unique_ptr> { auto n_rows = raft::deserialize_scalar(res, is); auto dim = raft::deserialize_scalar(res, is); @@ -144,9 +108,9 @@ auto deserialize_vpq(raft::resources const& res, std::istream& is) auto encoded_row_length = raft::deserialize_scalar(res, is); auto vq_code_book = - raft::make_device_matrix(res, vq_n_centers, dim); + raft::make_device_matrix(res, vq_n_centers, dim); auto pq_code_book = - raft::make_device_matrix(res, pq_n_centers, pq_len); + raft::make_device_matrix(res, pq_n_centers, pq_len); auto data = raft::make_device_matrix(res, n_rows, encoded_row_length); @@ -154,43 +118,33 @@ auto deserialize_vpq(raft::resources const& res, std::istream& is) raft::deserialize_mdspan(res, is, pq_code_book.view()); raft::deserialize_mdspan(res, is, data.view()); - return std::make_unique>( + return std::make_unique>( std::move(vq_code_book), std::move(pq_code_book), std::move(data)); } -template +// Reads tag + dtype prefix, validates they match DataT, and returns a concrete +// device_padded_dataset. This is the only currently-supported dataset kind for CAGRA +// serialize/deserialize. When a new dataset kind is supported, add a matching overload of +// deserialize_dataset here rather than extending this one — overload dispatch replaces the old +// type-erased variant routing. +template auto deserialize_dataset(raft::resources const& res, std::istream& is) - -> std::unique_ptr> + -> std::unique_ptr> { const auto tag = raft::deserialize_scalar(res, is); - switch (tag) { - case kSerializeEmptyDataset: return deserialize_empty(res, is); - case kSerializeStridedDataset: { - const auto dtype = raft::deserialize_scalar(res, is); - switch (dtype) { - case CUDA_R_32F: return deserialize_strided(res, is); - case CUDA_R_16F: return deserialize_strided(res, is); - case CUDA_R_8I: return deserialize_strided(res, is); - case CUDA_R_8U: return deserialize_strided(res, is); - default: - RAFT_FAIL("Failed to deserialize dataset: unsupported strided dataset element type %d.", - static_cast(dtype)); - } - } - case kSerializeVPQDataset: { - const auto dtype = raft::deserialize_scalar(res, is); - switch (dtype) { - case CUDA_R_32F: return deserialize_vpq(res, is); - case CUDA_R_16F: return deserialize_vpq(res, is); - default: - RAFT_FAIL("Failed to deserialize dataset: unsupported VPQ dtype %d.", - static_cast(dtype)); - } - } - default: - RAFT_FAIL("Failed to deserialize dataset: unknown instance tag %u.", - static_cast(tag)); - } + RAFT_EXPECTS(tag == kSerializeStridedDataset, + "deserialize_dataset: expected padded (strided) tag, got %u", + static_cast(tag)); + const auto dtype = raft::deserialize_scalar(res, is); + constexpr cudaDataType_t expected_dtype = std::is_same_v ? CUDA_R_32F + : std::is_same_v ? CUDA_R_16F + : std::is_same_v ? CUDA_R_8I + : CUDA_R_8U; // uint8_t + RAFT_EXPECTS(dtype == expected_dtype, + "deserialize_dataset: serialized dtype (%d) does not match expected (%d)", + static_cast(dtype), + static_cast(expected_dtype)); + return deserialize_padded(res, is); } } // namespace cuvs::neighbors::detail diff --git a/cpp/src/neighbors/detail/hnsw.hpp b/cpp/src/neighbors/detail/hnsw.hpp index 88580de929..827a976d3f 100644 --- a/cpp/src/neighbors/detail/hnsw.hpp +++ b/cpp/src/neighbors/detail/hnsw.hpp @@ -219,7 +219,7 @@ template std::enable_if_t>> from_cagra( raft::resources const& res, const index_params& params, - const cuvs::neighbors::cagra::index& cagra_index, + const cuvs::neighbors::cagra::device_padded_index& cagra_index, std::optional> dataset) { common::nvtx::range fun_scope("hnsw::from_cagra"); @@ -247,7 +247,7 @@ template std::enable_if_t>> from_cagra( raft::resources const& res, const index_params& params, - const cuvs::neighbors::cagra::index& cagra_index, + const cuvs::neighbors::cagra::device_padded_index& cagra_index, std::optional> dataset) { common::nvtx::range fun_scope("hnsw::from_cagra"); @@ -689,10 +689,11 @@ void serialize_to_hnswlib_batched(raft::resources const& res, // Serialize a disk-backed CAGRA index into hnswlib format by reading graph/dataset/label // rows directly from the backing files via pread. template -void serialize_to_hnswlib_from_disk(raft::resources const& res, - std::ostream& os_raw, - const cuvs::neighbors::hnsw::index_params& params, - const cuvs::neighbors::cagra::index& index_) +void serialize_to_hnswlib_from_disk( + raft::resources const& res, + std::ostream& os_raw, + const cuvs::neighbors::hnsw::index_params& params, + const cuvs::neighbors::cagra::device_padded_index& index_) { RAFT_EXPECTS(index_.dataset_fd().has_value() && index_.graph_fd().has_value(), "Function only implements serialization from disk."); @@ -865,7 +866,7 @@ void serialize_to_hnswlib_from_inmem( raft::resources const& res, std::ostream& os_raw, const cuvs::neighbors::hnsw::index_params& params, - const cuvs::neighbors::cagra::index& index_, + const cuvs::neighbors::cagra::device_padded_index& index_, std::optional> dataset) { auto stream = raft::resource::get_cuda_stream(res); @@ -971,7 +972,7 @@ template std::enable_if_t>> from_cagra( raft::resources const& res, const index_params& params, - const cuvs::neighbors::cagra::index& cagra_index, + const cuvs::neighbors::cagra::device_padded_index& cagra_index, std::optional> dataset) { common::nvtx::range fun_scope("hnsw::from_cagra"); @@ -1237,7 +1238,7 @@ template std::unique_ptr> from_cagra( raft::resources const& res, const index_params& params, - const cuvs::neighbors::cagra::index& cagra_index, + const cuvs::neighbors::cagra::device_padded_index& cagra_index, std::optional> dataset) { // special treatment for index on disk @@ -1594,13 +1595,37 @@ std::unique_ptr> build(raft::resources const& res, cagra_ace_params.max_gpu_memory_gb = ace_params.max_gpu_memory_gb; cagra_params.graph_build_params = cagra_ace_params; } - // Build CAGRA index optionally using ACE - auto cagra_index = cuvs::neighbors::cagra::build(res, cagra_params, dataset); + + // Public HNSW API uses host_matrix_view; CAGRA build expects a padded dataset view. + // ACE graph build is CPU-side and does not require CUDA row-alignment. The device + // dataset is padded separately below. + cuvs::neighbors::host_padded_dataset_view host_padded_view( + dataset, static_cast(dataset.extent(1))); + auto ace_host_index = cuvs::neighbors::cagra::build(res, cagra_params, host_padded_view); RAFT_LOG_INFO("hnsw::build - Converting CAGRA index to HNSW format"); - // Convert CAGRA index to HNSW index - return from_cagra(res, params, cagra_index, dataset); + if (ace_host_index.dataset_fd().has_value()) { + // Disk-mode ACE: transfer all FDs to a device index so that from_cagra detects the + // disk-backed index and calls serialize_to_hnswlib_from_disk, writing hnsw_index.bin. + cuvs::neighbors::cagra::device_padded_index ace_device_idx( + res, ace_host_index.metric()); + ace_device_idx.update_dataset(res, std::move(*ace_host_index.steal_dataset_fd())); + if (ace_host_index.graph_fd().has_value()) { + ace_device_idx.update_graph(res, std::move(*ace_host_index.steal_graph_fd())); + } + if (ace_host_index.mapping_fd().has_value()) { + ace_device_idx.update_mapping(res, std::move(*ace_host_index.steal_mapping_fd())); + } + return from_cagra(res, params, ace_device_idx, std::nullopt); + } else { + // In-memory ACE: attach the original (un-reordered) dataset as a device-padded view. + // from_cagra receives the host dataset directly to avoid an extra device-to-host copy. + auto ace_device_padded = cuvs::neighbors::make_device_padded_dataset(res, dataset); + auto ace_index = cuvs::neighbors::cagra::attach_device_dataset_on_host_index( + res, ace_host_index, ace_device_padded->as_dataset_view()); + return from_cagra(res, params, ace_index, std::make_optional(dataset)); + } } } // namespace cuvs::neighbors::hnsw::detail diff --git a/cpp/src/neighbors/detail/tiered_index.cuh b/cpp/src/neighbors/detail/tiered_index.cuh index 9cad64549b..4e2c13ed68 100644 --- a/cpp/src/neighbors/detail/tiered_index.cuh +++ b/cpp/src/neighbors/detail/tiered_index.cuh @@ -6,6 +6,7 @@ #pragma once #include +#include #include #include @@ -22,7 +23,10 @@ #include #include +#include + namespace cuvs::neighbors::tiered_index::detail { + /** Storage for brute force based incremental indices @@ -109,9 +113,45 @@ template struct index_state { using value_type = typename UpstreamT::value_type; + /** + * When row pitch is not CAGRA-aligned, `cagra::build(res, params, device_matrix_view)` calls + * `make_device_padded_dataset_view` and throws. For `cagra::index` we keep an + * owning padded copy in \p ann_build_pad and call `cagra::build` on `device_padded_dataset_view`. + */ + template + [[nodiscard]] static auto build_upstream_ann( + raft::resources const& res, + index_params const& tiered_params, + BuildFn&& build_fn, + DatasetView dataset, + std::shared_ptr>& ann_build_pad) + -> std::shared_ptr + { + if (!cuvs::neighbors::matrix_row_width_matches_cagra_required(dataset)) { + if constexpr (std::is_same_v>) { + auto own = cuvs::neighbors::make_device_padded_dataset(res, dataset); + ann_build_pad = + std::shared_ptr>( + std::move(own)); + auto index = + cuvs::neighbors::cagra::build(res, tiered_params, ann_build_pad->as_dataset_view()); + index.update_dataset(res, ann_build_pad->as_dataset_view()); + return std::make_shared(std::move(index)); + } + } + + ann_build_pad.reset(); + auto index = std::forward(build_fn)(res, tiered_params, dataset); + if constexpr (std::is_same_v>) { + index.update_dataset(res, cuvs::neighbors::make_device_padded_dataset_view(res, dataset)); + } + return std::make_shared(std::move(index)); + } + index_state(const index_state& other) : storage(other.storage), ann_index(other.ann_index), + ann_build_pad_(other.ann_build_pad_), build_params(other.build_params), build_fn(other.build_fn) { @@ -129,7 +169,7 @@ struct index_state { // Create an ANN index if we have sufficient rows in initial dataset if (dataset.extent(0) > index_params.min_ann_rows) { - ann_index = std::make_shared(std::move(build_fn(res, index_params, dataset))); + ann_index = build_upstream_ann(res, index_params, build_fn, dataset, ann_build_pad_); } // allocate bfknn storage for growing the index incrementally @@ -261,6 +301,9 @@ struct index_state { // ANN index data std::shared_ptr ann_index; + /** Owns a padded device copy of the ANN build matrix when row stride is not CAGRA-aligned. */ + std::shared_ptr> ann_build_pad_; + // stores a copy of the build params - used during compact index_params build_params; @@ -268,6 +311,31 @@ struct index_state { std::function> build_fn; }; +/** + * After BF storage grows, repoint CAGRA at the first \p ann_rows rows. Tight row-major storage + * often fails CAGRA stride checks; when it does, refresh \p ann_build_pad and attach the padded + * view (same contract as `build_upstream_ann`). + */ +inline void update_cagra_ann_dataset_for_stride( + raft::resources const& res, + cuvs::neighbors::cagra::device_padded_index& ann_index, + raft::device_matrix_view dataset, + std::shared_ptr>& ann_build_pad) +{ + if (!cuvs::neighbors::matrix_row_width_matches_cagra_required(dataset)) { + // Keep the new buffer alive locally, repoint the index first, then replace ann_build_pad. + // Otherwise assigning to ann_build_pad can destroy the dataset the index still views. + auto new_pad = cuvs::neighbors::make_device_padded_dataset(res, dataset); + ann_index.update_dataset(res, new_pad->as_dataset_view()); + ann_build_pad = + std::shared_ptr>(std::move(new_pad)); + } else { + // Repoint to the strided view before dropping the padded owner the index may reference. + ann_index.update_dataset(res, cuvs::neighbors::make_device_padded_dataset_view(res, dataset)); + ann_build_pad.reset(); + } +} + /** * @brief Build the tiered index from the dataset for efficient search. * @@ -435,8 +503,8 @@ auto compact(raft::resources const& res, const index_state& current) auto dataset = raft::make_device_matrix_view( storage->dataset.data(), storage->num_rows_used, storage->dim); - next_state->ann_index = std::make_shared( - std::move(next_state->build_fn(res, next_state->build_params, dataset))); + next_state->ann_index = index_state::build_upstream_ann( + res, next_state->build_params, next_state->build_fn, dataset, next_state->ann_build_pad_); return next_state; } } // namespace cuvs::neighbors::tiered_index::detail diff --git a/cpp/src/neighbors/detail/vamana/vamana_build.cuh b/cpp/src/neighbors/detail/vamana/vamana_build.cuh index 336d81215b..e1dae2bada 100644 --- a/cpp/src/neighbors/detail/vamana/vamana_build.cuh +++ b/cpp/src/neighbors/detail/vamana/vamana_build.cuh @@ -646,7 +646,7 @@ index build( auto quantizer = cuvs::preprocessing::quantize::pq::quantizer( pq_params, - cuvs::neighbors::vpq_dataset{ + cuvs::neighbors::device_vpq_dataset{ raft::make_device_matrix(res, 0, 0), std::move(pq_codebook), raft::make_device_matrix(res, 0, 0)}); diff --git a/cpp/src/neighbors/detail/vamana/vamana_serialize.cuh b/cpp/src/neighbors/detail/vamana/vamana_serialize.cuh index 4bf32e8a64..9fc48f338e 100644 --- a/cpp/src/neighbors/detail/vamana/vamana_serialize.cuh +++ b/cpp/src/neighbors/detail/vamana/vamana_serialize.cuh @@ -24,7 +24,6 @@ #include #include #include - namespace cuvs::neighbors::vamana::detail { // write matrix containing dataset to file @@ -58,31 +57,26 @@ void to_file(const std::string& dataset_base_file, raft::host_matrix */ template void serialize_dataset(raft::resources const& res, - const cuvs::neighbors::dataset* dataset, + const cuvs::neighbors::device_padded_dataset_view* dataset, const std::string& dataset_base_file) { + if (dataset == nullptr) { return; } // try allocating a buffer for the dataset on host try { - const auto* strided_dataset = - dynamic_cast*>(dataset); - if (strided_dataset) { - auto nrows = strided_dataset->n_rows(); - auto dim = strided_dataset->dim(); - auto stride = strided_dataset->stride(); - auto d_data = strided_dataset->view(); - auto h_dataset = raft::make_host_matrix(nrows, dim); - raft::copy_matrix(h_dataset.data_handle(), - dim, - d_data.data_handle(), - stride, - dim, - nrows, - raft::resource::get_cuda_stream(res)); - raft::resource::sync_stream(res); - to_file(dataset_base_file, h_dataset); - } else { - RAFT_LOG_DEBUG("dynamic_cast to strided_dataset failed"); - } + auto nrows = dataset->n_rows(); + auto dim = dataset->dim(); + auto stride = dataset->stride(); + auto d_data = dataset->view(); + auto h_dataset = raft::make_host_matrix(nrows, dim); + raft::copy_matrix(h_dataset.data_handle(), + dim, + d_data.data_handle(), + stride, + dim, + nrows, + raft::resource::get_cuda_stream(res)); + raft::resource::sync_stream(res); + to_file(dataset_base_file, h_dataset); } catch (std::bad_alloc& e) { RAFT_LOG_INFO("Failed to serialize dataset"); } catch (raft::logic_error& e) { @@ -120,11 +114,12 @@ void serialize_dataset(raft::resources const& res, * */ template -void serialize_sector_aligned(raft::resources const& res, - const HostMatT& h_graph, - const cuvs::neighbors::dataset& dataset, - const uint64_t medoid, - std::ofstream& output_writer) +void serialize_sector_aligned( + raft::resources const& res, + const HostMatT& h_graph, + const cuvs::neighbors::device_padded_dataset_view& dataset, + const uint64_t medoid, + std::ofstream& output_writer) { if constexpr (!std::is_same_v) { RAFT_FAIL("serialization is only implemented for uint32_t graph"); @@ -159,15 +154,11 @@ void serialize_sector_aligned(raft::resources const& res, const uint64_t nnodes_per_sector = sector_len / max_node_len; // 0 if max_node_len > sector_len // copy dataset to host - auto dataset_strided = - dynamic_cast*>(&dataset); - if (!dataset_strided) { RAFT_FAIL("Invalid dataset"); } - auto d_data = dataset_strided->view(); auto h_data = raft::make_host_matrix(npts, ndims); raft::copy_matrix(h_data.data_handle(), ndims, - d_data.data_handle(), - dataset_strided->stride(), + dataset.view().data_handle(), + dataset.stride(), ndims, npts, raft::resource::get_cuda_stream(res)); diff --git a/cpp/src/neighbors/detail/vpq_dataset.cuh b/cpp/src/neighbors/detail/vpq_dataset.cuh index ec4a684274..6bd77d5ae9 100644 --- a/cpp/src/neighbors/detail/vpq_dataset.cuh +++ b/cpp/src/neighbors/detail/vpq_dataset.cuh @@ -415,7 +415,7 @@ void process_and_fill_codes( bool inline_vq_labels = false) { using data_t = typename DatasetT::value_type; - using cdataset_t = vpq_dataset; + using cdataset_t = device_vpq_dataset; using label_t = uint32_t; const ix_t n_rows = dataset.extent(0); @@ -807,7 +807,7 @@ void process_and_fill_codes_subspaces( raft::device_matrix_view codes) { using data_t = typename DatasetT::value_type; - using cdataset_t = vpq_dataset; + using cdataset_t = device_vpq_dataset; using label_t = uint32_t; const ix_t n_rows = dataset.extent(0); diff --git a/cpp/src/neighbors/dynamic_batching.cu b/cpp/src/neighbors/dynamic_batching.cu index cfbef44409..d2e8b89b24 100644 --- a/cpp/src/neighbors/dynamic_batching.cu +++ b/cpp/src/neighbors/dynamic_batching.cu @@ -13,9 +13,19 @@ #include #include +namespace cuvs::neighbors::cagra { + +// Single-token names for CUVS_INST_DYNAMIC_BATCHING_INDEX (macro expands Namespace ::__VA_ARGS__). +using cagra_f32_u32_index = device_padded_index; +using cagra_f16_u32_index = device_padded_index; +using cagra_i8_u32_index = device_padded_index; +using cagra_u8_u32_index = device_padded_index; + +} // namespace cuvs::neighbors::cagra + namespace cuvs::neighbors::dynamic_batching { -// NB: the (template) index parameter should be the last; it may contain the spaces and so split +// NB: the (template) index parameter should be the last; it must be a single preprocessor token // into multiple preprocessor token. Then it is consumed as __VA_ARGS__ // #define CUVS_INST_DYNAMIC_BATCHING_INDEX(T, IdxT, Namespace, ...) \ @@ -47,22 +57,16 @@ namespace cuvs::neighbors::dynamic_batching { CUVS_INST_DYNAMIC_BATCHING_INDEX(float, int64_t, cuvs::neighbors::brute_force, index); // CAGRA build and search with 32-bit indices -CUVS_INST_DYNAMIC_BATCHING_INDEX(float, uint32_t, cuvs::neighbors::cagra, index); -CUVS_INST_DYNAMIC_BATCHING_INDEX(half, uint32_t, cuvs::neighbors::cagra, index); -CUVS_INST_DYNAMIC_BATCHING_INDEX(int8_t, uint32_t, cuvs::neighbors::cagra, index); -CUVS_INST_DYNAMIC_BATCHING_INDEX(uint8_t, - uint32_t, - cuvs::neighbors::cagra, - index); +CUVS_INST_DYNAMIC_BATCHING_INDEX(float, uint32_t, cuvs::neighbors::cagra, cagra_f32_u32_index); +CUVS_INST_DYNAMIC_BATCHING_INDEX(half, uint32_t, cuvs::neighbors::cagra, cagra_f16_u32_index); +CUVS_INST_DYNAMIC_BATCHING_INDEX(int8_t, uint32_t, cuvs::neighbors::cagra, cagra_i8_u32_index); +CUVS_INST_DYNAMIC_BATCHING_INDEX(uint8_t, uint32_t, cuvs::neighbors::cagra, cagra_u8_u32_index); // CAGRA build with 32-bit indices, search with 64-bit indices -CUVS_INST_DYNAMIC_BATCHING_INDEX(float, int64_t, cuvs::neighbors::cagra, index); -CUVS_INST_DYNAMIC_BATCHING_INDEX(half, int64_t, cuvs::neighbors::cagra, index); -CUVS_INST_DYNAMIC_BATCHING_INDEX(int8_t, int64_t, cuvs::neighbors::cagra, index); -CUVS_INST_DYNAMIC_BATCHING_INDEX(uint8_t, - int64_t, - cuvs::neighbors::cagra, - index); +CUVS_INST_DYNAMIC_BATCHING_INDEX(float, int64_t, cuvs::neighbors::cagra, cagra_f32_u32_index); +CUVS_INST_DYNAMIC_BATCHING_INDEX(half, int64_t, cuvs::neighbors::cagra, cagra_f16_u32_index); +CUVS_INST_DYNAMIC_BATCHING_INDEX(int8_t, int64_t, cuvs::neighbors::cagra, cagra_i8_u32_index); +CUVS_INST_DYNAMIC_BATCHING_INDEX(uint8_t, int64_t, cuvs::neighbors::cagra, cagra_u8_u32_index); // IVF-PQ with 64-bit indices CUVS_INST_DYNAMIC_BATCHING_INDEX(float, int64_t, cuvs::neighbors::ivf_pq, index); diff --git a/cpp/src/neighbors/hnsw.cpp b/cpp/src/neighbors/hnsw.cpp index 54e9dcf12a..0a53c0d1be 100644 --- a/cpp/src/neighbors/hnsw.cpp +++ b/cpp/src/neighbors/hnsw.cpp @@ -46,7 +46,7 @@ CUVS_INST_HNSW_BUILD(int8_t); std::unique_ptr> from_cagra( \ raft::resources const& res, \ const index_params& params, \ - const cuvs::neighbors::cagra::index& cagra_index, \ + const cuvs::neighbors::cagra::device_padded_index& cagra_index, \ std::optional> dataset) \ { \ return detail::from_cagra(res, params, cagra_index, dataset); \ diff --git a/cpp/src/neighbors/iface/iface.hpp b/cpp/src/neighbors/iface/iface.hpp index e76a3673af..9cab537320 100644 --- a/cpp/src/neighbors/iface/iface.hpp +++ b/cpp/src/neighbors/iface/iface.hpp @@ -12,14 +12,59 @@ #include #include #include +#include #include #include +#include + namespace cuvs::neighbors { using namespace raft; +namespace iface_detail { +/** + * @brief True when \p mds is already CAGRA row-padded on device (device or managed memory). + */ +template +bool dataset_mdspan_uses_padded_device_view( + raft::mdspan, row_major, Accessor> mds) +{ + using value_type = T; + uint32_t const required_stride = + cagra_required_row_width(static_cast(mds.extent(1))); + uint32_t const src_stride = + mds.stride(0) > 0 ? static_cast(mds.stride(0)) : static_cast(mds.extent(1)); + cudaPointerAttributes a{}; + RAFT_CUDA_TRY(cudaPointerGetAttributes(&a, mds.data_handle())); + bool const device_src = (a.type == cudaMemoryTypeDevice) || (a.type == cudaMemoryTypeManaged); + return device_src && (src_stride == required_stride); +} + +/** Graph build via padded device view, not mdspan host build. */ +template +void cagra_build_from_device_dataset( + raft::resources const& h, + cagra::index_params const& cagra_params, + raft::mdspan, row_major, Accessor> m, + cuvs::neighbors::iface, T, IdxT>& interface) +{ + uint32_t const stride = + m.stride(0) > 0 ? static_cast(m.stride(0)) : static_cast(m.extent(1)); + auto dview = raft::make_device_strided_matrix_view( + m.data_handle(), m.extent(0), m.extent(1), stride); + auto padded = cuvs::neighbors::make_device_padded_dataset_view(h, dview); + auto index = cuvs::neighbors::cagra::build(h, cagra_params, padded); + index.update_dataset(h, padded); + interface.cagra_owned_dataset_.reset(); + interface.index_.emplace(std::move(index)); +} +} // namespace iface_detail + +// TODO: Refactor this function signature to use the Dataset API instead of raft::mdspan. +// Currently takes a raw mdspan; should accept a dataset_view<...> so callers pass typed +// views. template void build(const raft::resources& handle, cuvs::neighbors::iface& interface, @@ -36,10 +81,27 @@ void build(const raft::resources& handle, auto idx = cuvs::neighbors::ivf_pq::build( handle, *static_cast(index_params), index_dataset); interface.index_.emplace(std::move(idx)); - } else if constexpr (std::is_same>::value) { - auto idx = cuvs::neighbors::cagra::build( - handle, *static_cast(index_params), index_dataset); - interface.index_.emplace(std::move(idx)); + } else if constexpr (std::is_same>::value) { + const auto& cagra_params = *static_cast(index_params); + if (raft::get_device_for_address(index_dataset.data_handle()) != -1) { + iface_detail::cagra_build_from_device_dataset(handle, cagra_params, index_dataset, interface); + } else { + // Explicitly form a host_matrix_view so the call always resolves to the host build + // regardless of the mdspan Accessor type (both branches compile for all Accessors; + // at runtime this else branch is only reached when data_handle() is host memory). + // Wrap in host_padded_dataset_view directly (graph build is CPU-side; CUDA alignment + // is not required here — the device dataset is padded separately below). + auto host_view = raft::make_host_matrix_view( + index_dataset.data_handle(), index_dataset.extent(0), index_dataset.extent(1)); + cuvs::neighbors::host_padded_dataset_view host_padded( + host_view, static_cast(host_view.extent(1))); + auto host_idx = cuvs::neighbors::cagra::build(handle, cagra_params, host_padded); + auto padded_r = cuvs::neighbors::make_device_padded_dataset(handle, index_dataset); + auto device_idx = cuvs::neighbors::cagra::attach_device_dataset_on_host_index( + handle, host_idx, padded_r->as_dataset_view()); + interface.cagra_owned_dataset_ = std::move(padded_r); + interface.index_.emplace(std::move(device_idx)); + } } resource::sync_stream(handle); } @@ -62,7 +124,7 @@ void extend( auto idx = cuvs::neighbors::ivf_pq::extend(handle, new_vectors, new_indices, interface.index_.value()); interface.index_.emplace(std::move(idx)); - } else if constexpr (std::is_same>::value) { + } else if constexpr (std::is_same>::value) { RAFT_FAIL("CAGRA does not implement the extend method"); } resource::sync_stream(handle); @@ -92,7 +154,7 @@ void search(const raft::resources& handle, queries, neighbors, distances); - } else if constexpr (std::is_same>::value) { + } else if constexpr (std::is_same>::value) { cuvs::neighbors::cagra::search(handle, *reinterpret_cast(search_params), interface.index_.value(), @@ -134,7 +196,7 @@ void serialize(const raft::resources& handle, ivf_flat::serialize(handle, os, interface.index_.value()); } else if constexpr (std::is_same>::value) { ivf_pq::serialize(handle, os, interface.index_.value()); - } else if constexpr (std::is_same>::value) { + } else if constexpr (std::is_same>::value) { cagra::serialize(handle, os, interface.index_.value(), true); } @@ -158,9 +220,11 @@ void deserialize(const raft::resources& handle, ivf_pq::deserialize(handle, is, &idx); resource::sync_stream(handle); interface.index_.emplace(std::move(idx)); - } else if constexpr (std::is_same>::value) { - cagra::index idx(handle); - cagra::deserialize(handle, is, &idx); + } else if constexpr (std::is_same>::value) { + cagra::device_padded_index idx(handle); + std::unique_ptr> out_dataset; + cagra::deserialize(handle, is, &idx, &out_dataset); + if (out_dataset) { interface.cagra_owned_dataset_ = std::move(out_dataset); } resource::sync_stream(handle); interface.index_.emplace(std::move(idx)); } @@ -186,9 +250,11 @@ void deserialize(const raft::resources& handle, ivf_pq::deserialize(handle, is, &idx); resource::sync_stream(handle); interface.index_.emplace(std::move(idx)); - } else if constexpr (std::is_same>::value) { - cagra::index idx(handle); - cagra::deserialize(handle, is, &idx); + } else if constexpr (std::is_same>::value) { + cagra::device_padded_index idx(handle); + std::unique_ptr> out_dataset; + cagra::deserialize(handle, is, &idx, &out_dataset); + if (out_dataset) { interface.cagra_owned_dataset_ = std::move(out_dataset); } resource::sync_stream(handle); interface.index_.emplace(std::move(idx)); } diff --git a/cpp/src/neighbors/iface/iface_cagra_inst.cu.in b/cpp/src/neighbors/iface/iface_cagra_inst.cu.in index c2456390d5..0cfdd818ab 100644 --- a/cpp/src/neighbors/iface/iface_cagra_inst.cu.in +++ b/cpp/src/neighbors/iface/iface_cagra_inst.cu.in @@ -25,33 +25,34 @@ using IdxT_da = template void build( const raft::resources& handle, - cuvs::neighbors::iface, data_t, index_t>& interface, + cuvs::neighbors::iface, data_t, index_t>& interface, const cuvs::neighbors::index_params* index_params, raft::mdspan, row_major, T_ha> index_dataset); template void build( const raft::resources& handle, - cuvs::neighbors::iface, data_t, index_t>& interface, + cuvs::neighbors::iface, data_t, index_t>& interface, const cuvs::neighbors::index_params* index_params, raft::mdspan, row_major, T_da> index_dataset); template void extend( const raft::resources& handle, - cuvs::neighbors::iface, data_t, index_t>& interface, + cuvs::neighbors::iface, data_t, index_t>& interface, raft::mdspan, row_major, T_ha> new_vectors, std::optional, layout_c_contiguous, IdxT_ha>> new_indices); template void extend( const raft::resources& handle, - cuvs::neighbors::iface, data_t, index_t>& interface, + cuvs::neighbors::iface, data_t, index_t>& interface, raft::mdspan, row_major, T_da> new_vectors, std::optional, layout_c_contiguous, IdxT_da>> new_indices); template void search( const raft::resources& handle, - const cuvs::neighbors::iface, data_t, index_t>& interface, + const cuvs::neighbors::iface, data_t, index_t>& + interface, const cuvs::neighbors::search_params* search_params, raft::device_matrix_view queries, raft::device_matrix_view neighbors, @@ -59,7 +60,8 @@ template void search( template void search( const raft::resources& handle, - const cuvs::neighbors::iface, data_t, index_t>& interface, + const cuvs::neighbors::iface, data_t, index_t>& + interface, const cuvs::neighbors::search_params* search_params, raft::host_matrix_view h_queries, raft::device_matrix_view d_neighbors, @@ -67,7 +69,8 @@ template void search( template void search( const raft::resources& handle, - const cuvs::neighbors::iface, data_t, index_t>& interface, + const cuvs::neighbors::iface, data_t, index_t>& + interface, const cuvs::neighbors::search_params* search_params, raft::device_matrix_view queries, raft::device_matrix_view neighbors, @@ -75,7 +78,8 @@ template void search( template void search( const raft::resources& handle, - const cuvs::neighbors::iface, data_t, index_t>& interface, + const cuvs::neighbors::iface, data_t, index_t>& + interface, const cuvs::neighbors::search_params* search_params, raft::host_matrix_view h_queries, raft::device_matrix_view d_neighbors, @@ -83,17 +87,18 @@ template void search( template void serialize( const raft::resources& handle, - const cuvs::neighbors::iface, data_t, index_t>& interface, + const cuvs::neighbors::iface, data_t, index_t>& + interface, std::ostream& os); template void deserialize( const raft::resources& handle, - cuvs::neighbors::iface, data_t, index_t>& interface, + cuvs::neighbors::iface, data_t, index_t>& interface, std::istream& is); template void deserialize( const raft::resources& handle, - cuvs::neighbors::iface, data_t, index_t>& interface, + cuvs::neighbors::iface, data_t, index_t>& interface, const std::string& filename); } // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/mg/mg_cagra_inst.cu.in b/cpp/src/neighbors/mg/mg_cagra_inst.cu.in index 6e57c3f598..ee78042b1c 100644 --- a/cpp/src/neighbors/mg/mg_cagra_inst.cu.in +++ b/cpp/src/neighbors/mg/mg_cagra_inst.cu.in @@ -5,87 +5,93 @@ #include -#define CUVS_INST_MG_CAGRA(T, IdxT) \ - namespace cuvs::neighbors::cagra { \ - using namespace cuvs::neighbors; \ - \ - cuvs::neighbors::mg_index, T, IdxT> build( \ - const raft::resources& res, \ - const mg_index_params& index_params, \ - raft::host_matrix_view index_dataset) \ - { \ - cuvs::neighbors::mg_index, T, IdxT> index(res, index_params.mode); \ - cuvs::neighbors::snmg::detail::build( \ - res, \ - index, \ - static_cast(&index_params), \ - index_dataset); \ - return index; \ - } \ - \ - void extend(const raft::resources& res, \ - cuvs::neighbors::mg_index, T, IdxT>& index, \ - raft::host_matrix_view new_vectors, \ - std::optional> new_indices) \ - { \ - cuvs::neighbors::snmg::detail::extend(res, index, new_vectors, new_indices); \ - } \ - \ - void search(const raft::resources& res, \ - const cuvs::neighbors::mg_index, T, IdxT>& index, \ - const mg_search_params& search_params, \ - raft::host_matrix_view queries, \ - raft::host_matrix_view neighbors, \ - raft::host_matrix_view distances) \ - { \ - cuvs::neighbors::snmg::detail::search( \ - res, \ - index, \ - static_cast(&search_params), \ - queries, \ - neighbors, \ - distances); \ - } \ - \ - void search(const raft::resources& res, \ - const cuvs::neighbors::mg_index, T, IdxT>& index, \ - const mg_search_params& search_params, \ - raft::host_matrix_view queries, \ - raft::host_matrix_view neighbors, \ - raft::host_matrix_view distances) \ - { \ - cuvs::neighbors::snmg::detail::search( \ - res, \ - index, \ - static_cast(&search_params), \ - queries, \ - neighbors, \ - distances); \ - } \ - \ - void serialize(const raft::resources& res, \ - const cuvs::neighbors::mg_index, T, IdxT>& index, \ - const std::string& filename) \ - { \ - cuvs::neighbors::snmg::detail::serialize(res, index, filename); \ - } \ - \ - template <> \ - CUVS_EXPORT cuvs::neighbors::mg_index, T, IdxT> deserialize( \ - const raft::resources& res, const std::string& filename) \ - { \ - auto idx = cuvs::neighbors::mg_index, T, IdxT>(res, filename); \ - return idx; \ - } \ - \ - template <> \ - CUVS_EXPORT cuvs::neighbors::mg_index, T, IdxT> distribute( \ - const raft::resources& res, const std::string& filename) \ - { \ - auto idx = cuvs::neighbors::mg_index, T, IdxT>(res, REPLICATED); \ - cuvs::neighbors::snmg::detail::deserialize_and_distribute(res, idx, filename); \ - return idx; \ - } \ +#define CUVS_INST_MG_CAGRA(T, IdxT) \ + namespace cuvs::neighbors::cagra { \ + using namespace cuvs::neighbors; \ + \ + cuvs::neighbors::mg_index, T, IdxT> build( \ + const raft::resources& res, \ + const mg_index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + cuvs::neighbors::mg_index, T, IdxT> index( \ + res, index_params.mode); \ + cuvs::neighbors::snmg::detail::build( \ + res, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void extend(const raft::resources& res, \ + cuvs::neighbors::mg_index, T, IdxT>& index, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices) \ + { \ + cuvs::neighbors::snmg::detail::extend(res, index, new_vectors, new_indices); \ + } \ + \ + void search( \ + const raft::resources& res, \ + const cuvs::neighbors::mg_index, T, IdxT>& index, \ + const mg_search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances) \ + { \ + cuvs::neighbors::snmg::detail::search( \ + res, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances); \ + } \ + \ + void search( \ + const raft::resources& res, \ + const cuvs::neighbors::mg_index, T, IdxT>& index, \ + const mg_search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances) \ + { \ + cuvs::neighbors::snmg::detail::search( \ + res, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances); \ + } \ + \ + void serialize( \ + const raft::resources& res, \ + const cuvs::neighbors::mg_index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::snmg::detail::serialize(res, index, filename); \ + } \ + \ + template <> \ + CUVS_EXPORT cuvs::neighbors::mg_index, T, IdxT> \ + deserialize(const raft::resources& res, const std::string& filename) \ + { \ + auto idx = \ + cuvs::neighbors::mg_index, T, IdxT>(res, filename); \ + return idx; \ + } \ + \ + template <> \ + CUVS_EXPORT cuvs::neighbors::mg_index, T, IdxT> \ + distribute(const raft::resources& res, const std::string& filename) \ + { \ + auto idx = \ + cuvs::neighbors::mg_index, T, IdxT>(res, REPLICATED); \ + cuvs::neighbors::snmg::detail::deserialize_and_distribute(res, idx, filename); \ + return idx; \ + } \ } // namespace cuvs::neighbors::cagra CUVS_INST_MG_CAGRA(@data_type@, uint32_t); diff --git a/cpp/src/neighbors/mg/snmg.cuh b/cpp/src/neighbors/mg/snmg.cuh index 43e4aa4471..a21c3a5f91 100644 --- a/cpp/src/neighbors/mg/snmg.cuh +++ b/cpp/src/neighbors/mg/snmg.cuh @@ -587,7 +587,7 @@ void search(const raft::resources& clique, static_cast*>(search_params); search_mode = mg_search_params->search_mode; n_rows_per_batch = mg_search_params->n_rows_per_batch; - } else if constexpr (std::is_same>::value) { + } else if constexpr (std::is_same>::value) { const cuvs::neighbors::mg_search_params* mg_search_params = static_cast*>(search_params); search_mode = mg_search_params->search_mode; @@ -666,7 +666,7 @@ void search(const raft::resources& clique, static_cast*>(search_params); merge_mode = mg_search_params->merge_mode; n_rows_per_batch = mg_search_params->n_rows_per_batch; - } else if constexpr (std::is_same>::value) { + } else if constexpr (std::is_same>::value) { const cuvs::neighbors::mg_search_params* mg_search_params = static_cast*>(search_params); merge_mode = mg_search_params->merge_mode; diff --git a/cpp/src/neighbors/tiered_index.cu b/cpp/src/neighbors/tiered_index.cu index 076c0c4a7c..084d0d24a3 100644 --- a/cpp/src/neighbors/tiered_index.cu +++ b/cpp/src/neighbors/tiered_index.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -28,14 +28,30 @@ void typed_search(raft::resources const& res, } } // namespace cuvs::neighbors::ivf_pq +namespace { +// Wrapper with the exact signature expected by upstream_build_function_type. +// cagra::build is now a template (no concrete device_matrix_view overload), so it cannot be +// passed as a plain function pointer; this wrapper bridges the gap. +cuvs::neighbors::cagra::device_padded_index cagra_build_for_tiered( + raft::resources const& res, + cuvs::neighbors::cagra::index_params const& params, + raft::device_matrix_view dataset) +{ + cuvs::neighbors::device_padded_dataset_view view( + dataset, static_cast(dataset.extent(1))); + return cuvs::neighbors::cagra::build(res, params, view); +} +} // namespace + namespace cuvs::neighbors::tiered_index { auto build(raft::resources const& res, const index_params& params, raft::device_matrix_view dataset) - -> tiered_index::index> + -> tiered_index::index> { - auto state = detail::build>(res, params, cagra::build, dataset); - return cuvs::neighbors::tiered_index::index>(state); + auto state = detail::build>( + res, params, cagra_build_for_tiered, dataset); + return cuvs::neighbors::tiered_index::index>(state); } auto build(raft::resources const& res, @@ -60,7 +76,7 @@ auto build(raft::resources const& res, void extend(raft::resources const& res, raft::device_matrix_view new_vectors, - tiered_index::index>* idx) + tiered_index::index>* idx) { std::scoped_lock lock(idx->write_mutex); auto next_state = detail::extend(res, *idx->state, new_vectors); @@ -78,7 +94,8 @@ void extend(raft::resources const& res, // Block 'search' calls during the update_dataset call to ensure that this // doesn't cause issues in a multithreaded environment std::unique_lock lock(idx->ann_mutex); - next_state->ann_index->update_dataset(res, dataset); + detail::update_cagra_ann_dataset_for_stride( + res, *next_state->ann_index, dataset, next_state->ann_build_pad_); } } @@ -103,7 +120,8 @@ void extend(raft::resources const& res, idx->state = next_state; } -void compact(raft::resources const& res, tiered_index::index>* idx) +void compact(raft::resources const& res, + tiered_index::index>* idx) { std::scoped_lock lock(idx->write_mutex); auto next_state = detail::compact(res, *idx->state); @@ -127,7 +145,7 @@ void compact(raft::resources const& res, void search(raft::resources const& res, const cagra::search_params& search_params, - const tiered_index::index>& index, + const tiered_index::index>& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, @@ -164,13 +182,14 @@ void search(raft::resources const& res, res, search_params, ivf_pq::typed_search, queries, neighbors, distances, sample_filter); } -auto merge(raft::resources const& res, - const index_params& index_params, - const std::vector>*>& indices) - -> tiered_index::index> +auto merge( + raft::resources const& res, + const index_params& index_params, + const std::vector>*>& indices) + -> tiered_index::index> { auto state = detail::merge(res, index_params, indices); - return cuvs::neighbors::tiered_index::index>(state); + return cuvs::neighbors::tiered_index::index>(state); } auto merge(raft::resources const& res, @@ -203,7 +222,7 @@ int64_t index::dim() const noexcept return state->dim(); } -template struct index>; +template struct index>; template struct index>; template struct index>; diff --git a/cpp/src/preprocessing/quantize/detail/pq.cuh b/cpp/src/preprocessing/quantize/detail/pq.cuh index 5d77e2dd44..480a53dacb 100644 --- a/cpp/src/preprocessing/quantize/detail/pq.cuh +++ b/cpp/src/preprocessing/quantize/detail/pq.cuh @@ -193,7 +193,7 @@ quantizer build( res, filled_params, dataset, raft::make_const_mdspan(vq_code_book.view())); } return {filled_params, - cuvs::neighbors::vpq_dataset{ + cuvs::neighbors::device_vpq_dataset{ std::move(vq_code_book), std::move(pq_code_book), std::move(empty_codes)}}; } @@ -369,8 +369,8 @@ void inverse_transform( template void vpq_convert_math_type(const raft::resources& res, - const cuvs::neighbors::vpq_dataset& src, - cuvs::neighbors::vpq_dataset& dst) + const cuvs::neighbors::device_vpq_dataset& src, + cuvs::neighbors::device_vpq_dataset& dst) { raft::linalg::map(res, dst.vq_code_book.view(), @@ -409,7 +409,7 @@ inline auto make_pq_params_from_vpq(const cuvs::neighbors::vpq_params& in_params template auto vpq_build(const raft::resources& res, const cuvs::neighbors::vpq_params& params, - const DatasetT& dataset) -> cuvs::neighbors::vpq_dataset + const DatasetT& dataset) -> cuvs::neighbors::device_vpq_dataset { using label_t = uint32_t; // Use a heuristic to impute missing parameters. @@ -437,17 +437,17 @@ auto vpq_build(const raft::resources& res, codes.view(), true); - return cuvs::neighbors::vpq_dataset{ + return cuvs::neighbors::device_vpq_dataset{ std::move(vq_code_book), std::move(pq_code_book), std::move(codes)}; } template auto vpq_build_half(const raft::resources& res, const cuvs::neighbors::vpq_params& params, - const DatasetT& dataset) -> cuvs::neighbors::vpq_dataset + const DatasetT& dataset) -> cuvs::neighbors::device_vpq_dataset { auto old_type = vpq_build(res, params, dataset); - auto new_type = cuvs::neighbors::vpq_dataset{ + auto new_type = cuvs::neighbors::device_vpq_dataset{ raft::make_device_mdarray(res, old_type.vq_code_book.extents()), raft::make_device_mdarray(res, old_type.pq_code_book.extents()), std::move(old_type.data)}; diff --git a/cpp/src/preprocessing/quantize/pq.cu b/cpp/src/preprocessing/quantize/pq.cu index 761474bdf8..110acf33b4 100644 --- a/cpp/src/preprocessing/quantize/pq.cu +++ b/cpp/src/preprocessing/quantize/pq.cu @@ -7,6 +7,9 @@ #include +#include +#include + namespace cuvs::preprocessing::quantize::pq { #define CUVS_INST_QUANTIZATION(T, QuantI) \ @@ -73,4 +76,58 @@ CUVS_INST_VPQ_BUILD(uint8_t); #undef CUVS_INST_VPQ_BUILD +namespace detail { + +template +auto vpq_train_from_device_rows(raft::resources const& res, + cuvs::neighbors::vpq_params const& params, + T const* src_ptr, + int64_t n_rows, + int64_t dim, + int64_t stride) + -> cuvs::neighbors::device_vpq_dataset +{ + auto stream = raft::resource::get_cuda_stream(res); + if (stride != dim) { + auto dense = raft::make_device_matrix(res, n_rows, dim); + raft::copy_matrix(dense.data_handle(), dim, src_ptr, stride, dim, n_rows, stream); + auto dense_view = + raft::make_device_matrix_view(dense.data_handle(), n_rows, dim); + return detail::vpq_build_half(res, params, dense_view); + } + auto row_view = raft::make_device_matrix_view(src_ptr, n_rows, dim); + return detail::vpq_build_half(res, params, row_view); +} + +} // namespace detail + +template cuvs::neighbors::device_vpq_dataset +detail::vpq_train_from_device_rows(raft::resources const&, + cuvs::neighbors::vpq_params const&, + float const*, + int64_t, + int64_t, + int64_t); +template cuvs::neighbors::device_vpq_dataset +detail::vpq_train_from_device_rows(raft::resources const&, + cuvs::neighbors::vpq_params const&, + half const*, + int64_t, + int64_t, + int64_t); +template cuvs::neighbors::device_vpq_dataset +detail::vpq_train_from_device_rows(raft::resources const&, + cuvs::neighbors::vpq_params const&, + int8_t const*, + int64_t, + int64_t, + int64_t); +template cuvs::neighbors::device_vpq_dataset +detail::vpq_train_from_device_rows(raft::resources const&, + cuvs::neighbors::vpq_params const&, + uint8_t const*, + int64_t, + int64_t, + int64_t); + } // namespace cuvs::preprocessing::quantize::pq diff --git a/cpp/src/preprocessing/quantize/vpq_build-ext.cuh b/cpp/src/preprocessing/quantize/vpq_build-ext.cuh deleted file mode 100644 index 1745e53a33..0000000000 --- a/cpp/src/preprocessing/quantize/vpq_build-ext.cuh +++ /dev/null @@ -1,28 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ -#pragma once - -#include -#include - -namespace cuvs::preprocessing::quantize::pq { - -#define CUVS_INST_VPQ_BUILD(T) \ - cuvs::neighbors::vpq_dataset vpq_build( \ - const raft::resources& res, \ - const cuvs::neighbors::vpq_params& params, \ - const raft::host_matrix_view& dataset); \ - cuvs::neighbors::vpq_dataset vpq_build( \ - const raft::resources& res, \ - const cuvs::neighbors::vpq_params& params, \ - const raft::device_matrix_view& dataset); - -CUVS_INST_VPQ_BUILD(float); -CUVS_INST_VPQ_BUILD(half); -CUVS_INST_VPQ_BUILD(int8_t); -CUVS_INST_VPQ_BUILD(uint8_t); - -#undef CUVS_INST_VPQ_BUILD -} // namespace cuvs::preprocessing::quantize::pq diff --git a/cpp/tests/neighbors/ann_cagra.cuh b/cpp/tests/neighbors/ann_cagra.cuh index a6704f892a..6898e3d2f9 100644 --- a/cpp/tests/neighbors/ann_cagra.cuh +++ b/cpp/tests/neighbors/ann_cagra.cuh @@ -8,11 +8,14 @@ #include "ann_utils.cuh" #include +#include "cagra_padded_build_helpers.cuh" #include "naive_knn.cuh" #include #include #include +#include +#include #include #include #include @@ -36,13 +39,40 @@ #include #include +#include #include #include +#include #include namespace cuvs::neighbors::cagra { namespace { +/** + * If \p ace_host_dataset is set, builds from that host mdspan via `cagra::build` (ACE is selected + * by `graph_build_params`). Otherwise builds from \p padded via `cagra::build`. When \p + * ACE is selected by `graph_build_params`. + */ +template +void cagra_build_into_index( + raft::resources const& res, + cagra::index_params const& params, + std::optional> ace_host_dataset, + cuvs::neighbors::device_padded_dataset_view const& padded, + cagra::device_padded_index& index) +{ + if (ace_host_dataset.has_value()) { + cuvs::neighbors::host_padded_dataset_view host_view( + *ace_host_dataset, static_cast(ace_host_dataset->extent(1))); + auto host_idx = cagra::build(res, params, host_view); + // In-memory ACE returns graph-only; attach device padded storage for search. + index = cagra::attach_device_dataset_on_host_index(res, host_idx, padded); + return; + } + index = cagra::build(res, params, padded); + index.update_dataset(res, padded); +} + struct test_cagra_sample_filter { static constexpr unsigned offset = 300; inline _RAFT_HOST_DEVICE auto operator()( @@ -270,7 +300,6 @@ struct AnnCagraInputs { // std::optional double min_recall; // = std::nullopt; std::optional ivf_pq_search_refine_ratio = std::nullopt; - std::optional compression = std::nullopt; std::optional non_owning_memory_buffer_flag = std::nullopt; cuvs::neighbors::MergeStrategy merge_strategy = @@ -308,11 +337,6 @@ inline ::std::ostream& operator<<(::std::ostream& os, const AnnCagraInputs& p) if ((int)p.build_algo == 0 && p.ivf_pq_search_refine_ratio) { os << "(refine_rate=" << *p.ivf_pq_search_refine_ratio << ')'; } - if (p.compression.has_value()) { - auto vpq = p.compression.value(); - os << ", pq_bits=" << vpq.pq_bits << ", pq_dim=" << vpq.pq_dim - << ", vq_n_centers=" << vpq.vq_n_centers; - } os << '}' << std::endl; return os; } @@ -346,7 +370,6 @@ class AnnCagraTest : public ::testing::TestWithParam { ps.build_algo != graph_build_algo::ITERATIVE_CAGRA_SEARCH) GTEST_SKIP(); if (ps.metric == cuvs::distance::DistanceType::CosineExpanded) { - if (ps.compression.has_value()) { GTEST_SKIP(); } if (ps.build_algo == graph_build_algo::ITERATIVE_CAGRA_SEARCH || ps.dim == 1) { GTEST_SKIP(); } @@ -409,7 +432,6 @@ class AnnCagraTest : public ::testing::TestWithParam { break; }; - index_params.compression = ps.compression; cagra::search_params search_params; search_params.algo = ps.algo; search_params.max_queries = ps.max_queries; @@ -417,21 +439,26 @@ class AnnCagraTest : public ::testing::TestWithParam { auto database_view = raft::make_device_matrix_view( (const DataT*)database.data(), ps.n_rows, ps.dim); + cuvs::neighbors::test::padded_device_matrix_for_cagra device_padded(handle_, + database_view); tmp_index_file index_file; { std::optional> database_host{std::nullopt}; - cagra::index index(handle_, index_params.metric); + std::optional> ace_host_dataset; + cagra::device_padded_index index(handle_, index_params.metric); if (ps.host_dataset) { - database_host = raft::make_host_matrix(ps.n_rows, ps.dim); + database_host.emplace(raft::make_host_matrix(ps.n_rows, ps.dim)); raft::copy(database_host->data_handle(), database.data(), database.size(), stream_); - auto database_host_view = raft::make_host_matrix_view( - (const DataT*)database_host->data_handle(), ps.n_rows, ps.dim); - - index = cagra::build(handle_, index_params, database_host_view); - } else { - index = cagra::build(handle_, index_params, database_view); - }; + raft::resource::sync_stream(handle_); + if (std::holds_alternative( + index_params.graph_build_params)) { + ace_host_dataset.emplace(raft::make_host_matrix_view( + database_host->data_handle(), ps.n_rows, ps.dim)); + } + } + cagra_build_into_index( + handle_, index_params, ace_host_dataset, device_padded.view, index); if (ps.use_source_indices) { auto source_indices = @@ -443,10 +470,11 @@ class AnnCagraTest : public ::testing::TestWithParam { cagra::serialize(handle_, index_file.filename, index, ps.include_serialized_dataset); } - cagra::index index(handle_); - cagra::deserialize(handle_, index_file.filename, &index); + cagra::device_padded_index index(handle_); + std::unique_ptr> loaded_dataset; + cagra::deserialize(handle_, index_file.filename, &index, &loaded_dataset); - if (!ps.include_serialized_dataset) { index.update_dataset(handle_, database_view); } + if (!ps.include_serialized_dataset) { index.update_dataset(handle_, device_padded.view); } auto search_queries_view = raft::make_device_matrix_view( search_queries.data(), ps.n_queries, ps.dim); @@ -479,20 +507,18 @@ class AnnCagraTest : public ::testing::TestWithParam { ps.k, 0.003, min_recall)); - if (!ps.compression.has_value()) { - // Don't evaluate distances for CAGRA-Q for now as the error can be somewhat large - EXPECT_TRUE(eval_distances(handle_, - database.data(), - search_queries.data(), - indices_dev.data(), - distances_dev.data(), - ps.n_rows, - ps.dim, - ps.n_queries, - ps.k, - ps.metric, - 1.0e-4)); - } + // Don't evaluate distances for CAGRA-Q for now as the error can be somewhat large + EXPECT_TRUE(eval_distances(handle_, + database.data(), + search_queries.data(), + indices_dev.data(), + distances_dev.data(), + ps.n_rows, + ps.dim, + ps.n_queries, + ps.k, + ps.metric, + 1.0e-4)); } } @@ -539,12 +565,10 @@ class AnnCagraAddNodesTest : public ::testing::TestWithParam { ps.build_algo != graph_build_algo::ITERATIVE_CAGRA_SEARCH) GTEST_SKIP(); if (ps.metric == cuvs::distance::DistanceType::CosineExpanded) { - if (ps.compression.has_value()) { GTEST_SKIP(); } if (ps.build_algo == graph_build_algo::ITERATIVE_CAGRA_SEARCH || ps.dim == 1) { GTEST_SKIP(); } } - if (ps.compression != std::nullopt) GTEST_SKIP(); // IVF_PQ graph build does not support BitwiseHamming if (ps.metric == cuvs::distance::DistanceType::BitwiseHamming && ((!std::is_same_v) || (ps.build_algo == graph_build_algo::IVF_PQ))) @@ -625,21 +649,24 @@ class AnnCagraAddNodesTest : public ::testing::TestWithParam { auto initial_database_view = raft::make_device_matrix_view( (const DataT*)database.data(), initial_database_size, ps.dim); + cuvs::neighbors::test::padded_device_matrix_for_cagra initial_padded( + handle_, initial_database_view); std::optional> database_host{std::nullopt}; - cagra::index index(handle_); + std::optional> ace_host_dataset; + cagra::device_padded_index index(handle_); if (ps.host_dataset) { - database_host = raft::make_host_matrix(ps.n_rows, ps.dim); + database_host.emplace(raft::make_host_matrix(ps.n_rows, ps.dim)); raft::copy( database_host->data_handle(), database.data(), initial_database_view.size(), stream_); - auto database_host_view = raft::make_host_matrix_view( - (const DataT*)database_host->data_handle(), initial_database_size, ps.dim); - // NB: database_host must live no less than the index, because the index _may_be_ - // non-onwning - index = cagra::build(handle_, index_params, database_host_view); - } else { - index = cagra::build(handle_, index_params, initial_database_view); - }; + raft::resource::sync_stream(handle_); + if (std::holds_alternative( + index_params.graph_build_params)) { + ace_host_dataset.emplace(raft::make_host_matrix_view( + database_host->data_handle(), initial_database_size, ps.dim)); + } + } + cagra_build_into_index(handle_, index_params, ace_host_dataset, initial_padded.view, index); auto additional_dataset = raft::make_host_matrix(ps.n_rows - initial_database_size, index.dim()); @@ -648,24 +675,21 @@ class AnnCagraAddNodesTest : public ::testing::TestWithParam { additional_dataset.size(), stream_); - auto new_dataset_buffer = raft::make_device_matrix(handle_, 0, 0); - auto new_graph_buffer = raft::make_device_matrix(handle_, 0, 0); - std::optional> - new_dataset_buffer_view = std::nullopt; - std::optional> new_graph_buffer_view = std::nullopt; - if (ps.non_owning_memory_buffer_flag.has_value() && - ps.non_owning_memory_buffer_flag.value()) { - const auto stride = - dynamic_cast*>(&index.data()) - ->stride(); - new_dataset_buffer = raft::make_device_matrix(handle_, ps.n_rows, stride); - new_graph_buffer = - raft::make_device_matrix(handle_, ps.n_rows, index.graph_degree()); + std::size_t row_stride = static_cast(ps.dim); + auto const& data_view = index.data(); + if constexpr (cuvs::neighbors::is_padded_dataset_view_v) { + row_stride = static_cast(data_view.stride()); + } + auto new_dataset_buffer = + raft::make_device_matrix(handle_, ps.n_rows, row_stride); + auto new_graph_buffer = + raft::make_device_matrix(handle_, ps.n_rows, index.graph_degree()); + std::optional> new_dataset_buffer_view = raft::make_device_strided_matrix_view( - new_dataset_buffer.data_handle(), ps.n_rows, ps.dim, stride); - new_graph_buffer_view = new_graph_buffer.view(); - } + new_dataset_buffer.data_handle(), ps.n_rows, ps.dim, row_stride); + std::optional> new_graph_buffer_view = + new_graph_buffer.view(); cagra::extend_params extend_params; cagra::extend(handle_, @@ -755,7 +779,6 @@ class AnnCagraFilterTest : public ::testing::TestWithParam { ps.build_algo != graph_build_algo::ITERATIVE_CAGRA_SEARCH) GTEST_SKIP(); if (ps.metric == cuvs::distance::DistanceType::CosineExpanded) { - if (ps.compression.has_value()) { GTEST_SKIP(); } if (ps.build_algo == graph_build_algo::ITERATIVE_CAGRA_SEARCH || ps.dim == 1) { GTEST_SKIP(); } @@ -835,7 +858,6 @@ class AnnCagraFilterTest : public ::testing::TestWithParam { break; }; - index_params.compression = ps.compression; cagra::search_params search_params; search_params.algo = ps.algo; search_params.max_queries = ps.max_queries; @@ -844,20 +866,23 @@ class AnnCagraFilterTest : public ::testing::TestWithParam { auto database_view = raft::make_device_matrix_view( (const DataT*)database.data(), ps.n_rows, ps.dim); + cuvs::neighbors::test::padded_device_matrix_for_cagra device_padded(handle_, + database_view); std::optional> database_host{std::nullopt}; - cagra::index index(handle_); + std::optional> ace_host_dataset; + cagra::device_padded_index index(handle_); if (ps.host_dataset) { - database_host = raft::make_host_matrix(ps.n_rows, ps.dim); + database_host.emplace(raft::make_host_matrix(ps.n_rows, ps.dim)); raft::copy(database_host->data_handle(), database.data(), database.size(), stream_); - auto database_host_view = raft::make_host_matrix_view( - (const DataT*)database_host->data_handle(), ps.n_rows, ps.dim); - index = cagra::build(handle_, index_params, database_host_view); - } else { - index = cagra::build(handle_, index_params, database_view); + raft::resource::sync_stream(handle_); + if (std::holds_alternative( + index_params.graph_build_params)) { + ace_host_dataset.emplace(raft::make_host_matrix_view( + database_host->data_handle(), ps.n_rows, ps.dim)); + } } - - if (!ps.include_serialized_dataset) { index.update_dataset(handle_, database_view); } + cagra_build_into_index(handle_, index_params, ace_host_dataset, device_padded.view, index); if (ps.use_source_indices) { auto source_indices = @@ -916,20 +941,18 @@ class AnnCagraFilterTest : public ::testing::TestWithParam { 0.003, min_recall, false)); - if (!ps.compression.has_value()) { - // Don't evaluate distances for CAGRA-Q for now as the error can be somewhat large - EXPECT_TRUE(eval_distances(handle_, - database.data(), - search_queries.data(), - indices_dev.data(), - distances_dev.data(), - ps.n_rows, - ps.dim, - ps.n_queries, - ps.k, - ps.metric, - 1.0e-4)); - } + // Don't evaluate distances for CAGRA-Q for now as the error can be somewhat large + EXPECT_TRUE(eval_distances(handle_, + database.data(), + search_queries.data(), + indices_dev.data(), + distances_dev.data(), + ps.n_rows, + ps.dim, + ps.n_queries, + ps.k, + ps.metric, + 1.0e-4)); } } @@ -981,7 +1004,6 @@ class AnnCagraIndexFilteredMergeTest : public ::testing::TestWithParam) || (ps.build_algo == graph_build_algo::IVF_PQ))) @@ -1098,35 +1120,38 @@ class AnnCagraIndexFilteredMergeTest : public ::testing::TestWithParam( (const DataT*)database.data() + database0_view.size(), database1_size, ps.dim); - cagra::index index0(handle_, index_params.metric); - cagra::index index1(handle_, index_params.metric); + cuvs::neighbors::test::padded_device_matrix_for_cagra padded0(handle_, + database0_view); + cuvs::neighbors::test::padded_device_matrix_for_cagra padded1(handle_, + database1_view); + + cagra::device_padded_index index0(handle_, index_params.metric); + cagra::device_padded_index index1(handle_, index_params.metric); std::optional> database_host{std::nullopt}; + std::optional> ace_host0, ace_host1; if (ps.host_dataset) { - database_host = raft::make_host_matrix(handle_, ps.n_rows, ps.dim); + database_host.emplace(raft::make_host_matrix(handle_, ps.n_rows, ps.dim)); raft::copy(database_host->data_handle(), database.data(), database.size(), stream_); - { - auto database_host_view = raft::make_host_matrix_view( - (const DataT*)database_host->data_handle(), database0_size, ps.dim); - index0 = cagra::build(handle_, index_params, database_host_view); - } - { - auto database_host_view = raft::make_host_matrix_view( - (const DataT*)database_host->data_handle() + database0_size * ps.dim, - database1_size, - ps.dim); - index1 = cagra::build(handle_, index_params, database_host_view); + raft::resource::sync_stream(handle_); + if (std::holds_alternative( + index_params.graph_build_params)) { + ace_host0.emplace(raft::make_host_matrix_view( + database_host->data_handle(), database0_size, ps.dim)); + ace_host1.emplace(raft::make_host_matrix_view( + database_host->data_handle() + database0_size * ps.dim, database1_size, ps.dim)); } - } else { - index0 = cagra::build(handle_, index_params, database0_view); - index1 = cagra::build(handle_, index_params, database1_view); - }; + } + cagra_build_into_index(handle_, index_params, ace_host0, padded0.view, index0); + cagra_build_into_index(handle_, index_params, ace_host1, padded1.view, index1); - std::vector*> indices; + std::vector*> indices; indices.push_back(&index0); indices.push_back(&index1); - auto index = - cuvs::neighbors::cagra::merge(handle_, index_params, indices, bitset_filter_obj); + auto merge_storage = + cuvs::neighbors::cagra::make_merged_dataset(handle_, indices, bitset_filter_obj); + auto merge_idx = cuvs::neighbors::cagra::merge( + handle_, index_params, indices, merge_storage, bitset_filter_obj); auto search_queries_view = raft::make_device_matrix_view( search_queries.data(), ps.n_queries, ps.dim); @@ -1142,7 +1167,7 @@ class AnnCagraIndexFilteredMergeTest : public ::testing::TestWithParam { GTEST_SKIP(); } } - if (ps.compression != std::nullopt) GTEST_SKIP(); // IVF_PQ graph build does not support BitwiseHamming if (ps.metric == cuvs::distance::DistanceType::BitwiseHamming && ((!std::is_same_v) || (ps.build_algo == graph_build_algo::IVF_PQ))) @@ -1311,28 +1334,29 @@ class AnnCagraIndexMergeTest : public ::testing::TestWithParam { auto database1_view = raft::make_device_matrix_view( (const DataT*)database.data() + database0_view.size(), database1_size, ps.dim); - cagra::index index0(handle_, index_params.metric); - cagra::index index1(handle_, index_params.metric); + cuvs::neighbors::test::padded_device_matrix_for_cagra merge_padded0(handle_, + database0_view); + cuvs::neighbors::test::padded_device_matrix_for_cagra merge_padded1(handle_, + database1_view); + + cagra::device_padded_index index0(handle_, index_params.metric); + cagra::device_padded_index index1(handle_, index_params.metric); std::optional> database_host{std::nullopt}; + std::optional> ace_host0, ace_host1; if (ps.host_dataset) { - database_host = raft::make_host_matrix(handle_, ps.n_rows, ps.dim); + database_host.emplace(raft::make_host_matrix(handle_, ps.n_rows, ps.dim)); raft::copy(database_host->data_handle(), database.data(), database.size(), stream_); - { - auto database_host_view = raft::make_host_matrix_view( - (const DataT*)database_host->data_handle(), database0_size, ps.dim); - index0 = cagra::build(handle_, index_params, database_host_view); + raft::resource::sync_stream(handle_); + if (std::holds_alternative( + index_params.graph_build_params)) { + ace_host0.emplace(raft::make_host_matrix_view( + database_host->data_handle(), database0_size, ps.dim)); + ace_host1.emplace(raft::make_host_matrix_view( + database_host->data_handle() + database0_size * ps.dim, database1_size, ps.dim)); } - { - auto database_host_view = raft::make_host_matrix_view( - (const DataT*)database_host->data_handle() + database0_size * ps.dim, - database1_size, - ps.dim); - index1 = cagra::build(handle_, index_params, database_host_view); - } - } else { - index0 = cagra::build(handle_, index_params, database0_view); - index1 = cagra::build(handle_, index_params, database1_view); - }; + } + cagra_build_into_index(handle_, index_params, ace_host0, merge_padded0.view, index0); + cagra_build_into_index(handle_, index_params, ace_host1, merge_padded1.view, index1); auto search_queries_view = raft::make_device_matrix_view( search_queries.data(), ps.n_queries, ps.dim); @@ -1347,12 +1371,18 @@ class AnnCagraIndexMergeTest : public ::testing::TestWithParam { search_params.team_size = ps.team_size; search_params.itopk_size = ps.itopk_size; - std::vector*> indices_to_merge{&index0, &index1}; + std::vector*> indices_to_merge{&index0, &index1}; if (ps.merge_strategy == cuvs::neighbors::MergeStrategy::MERGE_STRATEGY_PHYSICAL) { - auto merged = cagra::merge(handle_, index_params, indices_to_merge); - cagra::search( - handle_, search_params, merged, search_queries_view, indices_out_view, dists_out_view); + auto merge_storage = + cuvs::neighbors::cagra::make_merged_dataset(handle_, indices_to_merge); + auto merged_idx = cagra::merge(handle_, index_params, indices_to_merge, merge_storage); + cagra::search(handle_, + search_params, + merged_idx, + search_queries_view, + indices_out_view, + dists_out_view); } else { cuvs::neighbors::composite::composite_index composite( indices_to_merge); @@ -1438,7 +1468,6 @@ inline std::vector generate_inputs() {true, false}, {0.995}, {std::optional{std::nullopt}}, - {std::optional{std::nullopt}}, {std::optional{std::nullopt}}, {cuvs::neighbors::MergeStrategy::MERGE_STRATEGY_PHYSICAL}); @@ -1463,7 +1492,6 @@ inline std::vector generate_inputs() {false}, {0.995}, {std::optional{std::nullopt}}, - {std::optional{std::nullopt}}, {std::optional{std::nullopt}}, {cuvs::neighbors::MergeStrategy::MERGE_STRATEGY_LOGICAL}); inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); @@ -1489,7 +1517,6 @@ inline std::vector generate_inputs() {false}, {0.995}, {std::optional{std::nullopt}}, - {std::optional{std::nullopt}}, {std::optional{std::nullopt}}, {cuvs::neighbors::MergeStrategy::MERGE_STRATEGY_PHYSICAL}); inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); @@ -1512,7 +1539,6 @@ inline std::vector generate_inputs() {true}, {0.995}, {std::optional{std::nullopt}}, - {std::optional{std::nullopt}}, {std::optional{std::nullopt}}, {cuvs::neighbors::MergeStrategy::MERGE_STRATEGY_PHYSICAL, cuvs::neighbors::MergeStrategy::MERGE_STRATEGY_LOGICAL}); @@ -1541,7 +1567,6 @@ inline std::vector generate_inputs() {false}, {0.995}, {std::optional{std::nullopt}}, - {std::optional{std::nullopt}}, {std::optional{std::nullopt}}, {cuvs::neighbors::MergeStrategy::MERGE_STRATEGY_PHYSICAL, cuvs::neighbors::MergeStrategy::MERGE_STRATEGY_LOGICAL}); @@ -1571,7 +1596,6 @@ inline std::vector generate_inputs() {false}, {0.995}, {std::optional{std::nullopt}}, - {std::optional{std::nullopt}}, {std::optional{std::nullopt}}, {cuvs::neighbors::MergeStrategy::MERGE_STRATEGY_PHYSICAL, cuvs::neighbors::MergeStrategy::MERGE_STRATEGY_LOGICAL}); @@ -1599,7 +1623,6 @@ inline std::vector generate_inputs() {false}, {0.995}, {std::optional{std::nullopt}}, - {std::optional{std::nullopt}}, {std::optional{std::nullopt}}, {cuvs::neighbors::MergeStrategy::MERGE_STRATEGY_PHYSICAL}); inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); @@ -1622,48 +1645,11 @@ inline std::vector generate_inputs() {true}, {0.985}, {std::optional{std::nullopt}}, - {std::optional{std::nullopt}}, {std::optional{std::nullopt}}, {cuvs::neighbors::MergeStrategy::MERGE_STRATEGY_PHYSICAL, cuvs::neighbors::MergeStrategy::MERGE_STRATEGY_LOGICAL}); inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); - // A few PQ configurations. - // Varying dim, vq_n_centers - inputs2 = raft::util::itertools::product( - {100}, - {10000}, - {64, 128, 192, 256, 512, 1024}, // dim - {16}, // k - {graph_build_algo::IVF_PQ}, - {search_algo::AUTO}, - {10}, - {0}, - {64}, - {1}, - {cuvs::distance::DistanceType::L2Expanded}, - {false}, - {true}, - {false}, - {0.6}, - {std::optional{std::nullopt}}, - {std::optional{std::nullopt}}, - {std::optional{std::nullopt}}, - {cuvs::neighbors::MergeStrategy::MERGE_STRATEGY_PHYSICAL, - cuvs::neighbors::MergeStrategy::MERGE_STRATEGY_LOGICAL}); // don't demand high recall - // without refinement - for (uint32_t pq_len : {2}) { // for now, only pq_len = 2 is supported, more options coming soon - for (uint32_t vq_n_centers : {100, 1000}) { - for (auto input : inputs2) { - vpq_params ps{}; - ps.pq_dim = input.dim / pq_len; - ps.vq_n_centers = vq_n_centers; - input.compression.emplace(ps); - inputs.push_back(input); - } - } - } - // Refinement options // Varying host_dataset, ivf_pq_search_refine_ratio inputs2 = raft::util::itertools::product( @@ -1683,7 +1669,6 @@ inline std::vector generate_inputs() {true}, {0.99}, {1.0f, 2.0f, 3.0f}, - {std::optional{std::nullopt}}, {std::optional{std::nullopt}}, {cuvs::neighbors::MergeStrategy::MERGE_STRATEGY_PHYSICAL, cuvs::neighbors::MergeStrategy::MERGE_STRATEGY_LOGICAL}); @@ -1707,7 +1692,6 @@ inline std::vector generate_inputs() {false}, {0.995}, {std::optional{std::nullopt}}, - {std::optional{std::nullopt}}, {std::optional{std::nullopt}}, {cuvs::neighbors::MergeStrategy::MERGE_STRATEGY_PHYSICAL, cuvs::neighbors::MergeStrategy::MERGE_STRATEGY_LOGICAL}); @@ -1762,40 +1746,10 @@ inline std::vector generate_addnode_inputs() {false}, {0.985}, {std::optional{std::nullopt}}, - {std::optional{std::nullopt}}, {std::optional{std::nullopt}}, {cuvs::neighbors::MergeStrategy::MERGE_STRATEGY_PHYSICAL}); inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); - // a few PQ configurations - inputs2 = raft::util::itertools::product( - {100}, - {10000}, - {192, 1024}, // dim - {16}, // k - {graph_build_algo::IVF_PQ}, - {search_algo::AUTO}, - {10}, - {0}, - {64}, - {1}, - {cuvs::distance::DistanceType::L2Expanded}, - {false}, - {true}, - {true}, - {0.6}); // don't demand high recall without refinement - for (uint32_t pq_len : {2}) { // for now, only pq_len = 2 is supported, more options coming soon - for (uint32_t vq_n_centers : {100}) { - for (auto input : inputs2) { - vpq_params ps{}; - ps.pq_dim = input.dim / pq_len; - ps.vq_n_centers = vq_n_centers; - input.compression.emplace(ps); - inputs.push_back(input); - } - } - } - return inputs; } @@ -1838,35 +1792,6 @@ inline std::vector generate_filtering_inputs() {0.995}); inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); - // a few PQ configurations - inputs2 = raft::util::itertools::product( - {100}, - {10000}, - {256}, // dim - {16}, // k - {graph_build_algo::IVF_PQ}, - {search_algo::AUTO}, - {10}, - {0}, - {64}, - {1}, - {cuvs::distance::DistanceType::L2Expanded}, - {false}, - {true}, - {true}, - {0.6}); // don't demand high recall without refinement - for (uint32_t pq_len : {2}) { // for now, only pq_len = 2 is supported, more options coming soon - for (uint32_t vq_n_centers : {100}) { - for (auto input : inputs2) { - vpq_params ps{}; - ps.pq_dim = input.dim / pq_len; - ps.vq_n_centers = vq_n_centers; - input.compression.emplace(ps); - inputs.push_back(input); - } - } - } - return inputs; } const std::vector inputs = generate_inputs(); diff --git a/cpp/tests/neighbors/ann_cagra/bug_extreme_inputs_oob.cu b/cpp/tests/neighbors/ann_cagra/bug_extreme_inputs_oob.cu index 8468a724a4..26760d1edb 100644 --- a/cpp/tests/neighbors/ann_cagra/bug_extreme_inputs_oob.cu +++ b/cpp/tests/neighbors/ann_cagra/bug_extreme_inputs_oob.cu @@ -1,10 +1,11 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #include +#include "../cagra_padded_build_helpers.cuh" #include #include @@ -30,7 +31,9 @@ class cagra_extreme_inputs_oob_test : public ::testing::Test { ix_ps.intermediate_graph_degree = 128; try { - [[maybe_unused]] auto ix = cagra::build(res, ix_ps, raft::make_const_mdspan(dataset->view())); + cuvs::neighbors::test::padded_device_matrix_for_cagra padded( + res, raft::make_const_mdspan(dataset->view())); + [[maybe_unused]] auto ix = cagra::build(res, ix_ps, padded.view); raft::resource::sync_stream(res); } catch (const std::exception&) { SUCCEED(); diff --git a/cpp/tests/neighbors/ann_cagra/bug_graph_smaller_than_dataset.cu b/cpp/tests/neighbors/ann_cagra/bug_graph_smaller_than_dataset.cu index adeb774a8b..00a4aae566 100644 --- a/cpp/tests/neighbors/ann_cagra/bug_graph_smaller_than_dataset.cu +++ b/cpp/tests/neighbors/ann_cagra/bug_graph_smaller_than_dataset.cu @@ -1,160 +1,169 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -#include - -#include - -#include -#include -#include -#include -#include - -#include - -namespace cuvs::neighbors::cagra { - -/** - * @brief Test verifying graph.extent(0) is used for random seed selection - * - * This test ensures that CAGRA search kernels correctly use graph.extent(0) - * (graph size) rather than dataset.size for random seed node selection. - * - * The bug: random seed selection previously used dataset_desc.size, which - * could cause OOB access if the graph size differed from dataset size - * (e.g., in CAGRA-Q iterative builds with compression). - * - * The fix: kernels now receive graph.extent(0) as graph_size parameter, - * ensuring seeds are always within valid graph node range [0, graph_size). - */ -class cagra_graph_smaller_than_dataset_test : public ::testing::Test { - public: - using data_type = float; - using index_type = uint32_t; - - protected: - void run() - { - // Create a dataset with 1000 points - constexpr int64_t n_dataset = 1000; - constexpr int64_t n_dim = 128; - constexpr int64_t n_queries = 100; - constexpr int64_t k = 10; - - // Build index normally - auto dataset = raft::make_device_matrix(res, n_dataset, n_dim); - raft::random::RngState r(1234ULL); - raft::random::uniform( - res, r, dataset.data_handle(), n_dataset * n_dim, data_type(-1), data_type(1)); - - cagra::index_params index_params; - index_params.graph_degree = 32; - index_params.intermediate_graph_degree = 64; - - auto index = cagra::build(res, index_params, raft::make_const_mdspan(dataset.view())); - raft::resource::sync_stream(res); - - // Get the graph from the index - auto original_graph = index.graph(); - ASSERT_EQ(original_graph.extent(0), n_dataset); - - // Recreate the bug scenario: LARGE dataset, SMALL graph - // (like iterative_build_graph does in intermediate iterations) - constexpr int64_t n_graph = n_dataset / 2; // Only 500 nodes in graph - - // Step 1: Build index on SMALL subset (500 points) - auto small_dataset_view = raft::make_device_matrix_view( - dataset.data_handle(), n_graph, n_dim); - - cagra::index_params small_index_params; - small_index_params.graph_degree = 32; - auto small_index = cagra::build(res, small_index_params, small_dataset_view); - raft::resource::sync_stream(res); - - // Step 2: Update to FULL dataset (1000 points) but keep small graph (500 nodes) - // This creates the exact bug scenario: dataset.size=1000, graph.extent(0)=500 - small_index.update_dataset(res, raft::make_const_mdspan(dataset.view())); - - // Verify the mismatch - THIS IS THE BUG SCENARIO! - ASSERT_EQ(small_index.graph().extent(0), n_graph); // Graph has 500 nodes - ASSERT_EQ(small_index.size(), n_dataset); // Dataset has 1000 points - ASSERT_NE(small_index.graph().extent(0), small_index.size()); // Mismatch! - - // Create queries - auto queries = raft::make_device_matrix(res, n_queries, n_dim); - raft::random::uniform( - res, r, queries.data_handle(), n_queries * n_dim, data_type(-1), data_type(1)); - - // Allocate output - auto neighbors = raft::make_device_matrix(res, n_queries, k); - auto distances = raft::make_device_matrix(res, n_queries, k); - - // Setup search params - cagra::search_params search_params; - search_params.itopk_size = 64; - search_params.search_width = 1; - search_params.max_iterations = 10; - search_params.algo = cagra::search_algo::SINGLE_CTA; - - // THIS SHOULD NOT CRASH OR CAUSE OOB ACCESS - // Before fix: random seeds use dataset.size (1000) -> tries to access graph[700] -> CRASH! - // After fix: random seeds use graph.extent(0) (500) -> only accesses graph[0-499] -> SAFE! - cagra::search(res, - search_params, - small_index, - raft::make_const_mdspan(queries.view()), - neighbors.view(), - distances.view()); - - raft::resource::sync_stream(res); - - // Verify results are valid (neighbors should be < graph size) - auto neighbors_host = raft::make_host_matrix(n_queries, k); - raft::copy(neighbors_host.data_handle(), - neighbors.data_handle(), - n_queries * k, - raft::resource::get_cuda_stream(res)); - raft::resource::sync_stream(res); - - // All neighbor indices should be valid (< n_graph) - for (int64_t i = 0; i < n_queries * k; i++) { - ASSERT_LT(neighbors_host.data_handle()[i], n_graph) - << "Neighbor index " << neighbors_host.data_handle()[i] << " is >= graph size " << n_graph; - } - - // Test with MULTI_CTA algorithm as well (also had the same bug) - search_params.algo = cagra::search_algo::MULTI_CTA; - - cagra::search(res, - search_params, - small_index, - raft::make_const_mdspan(queries.view()), - neighbors.view(), - distances.view()); - - raft::resource::sync_stream(res); - - // Verify again - raft::copy(neighbors_host.data_handle(), - neighbors.data_handle(), - n_queries * k, - raft::resource::get_cuda_stream(res)); - raft::resource::sync_stream(res); - - for (int64_t i = 0; i < n_queries * k; i++) { - ASSERT_LT(neighbors_host.data_handle()[i], n_graph) - << "Neighbor index " << neighbors_host.data_handle()[i] << " is >= graph size " << n_graph - << " (MULTI_CTA)"; - } - } - - private: - raft::resources res; -}; - -TEST_F(cagra_graph_smaller_than_dataset_test, search_with_smaller_graph) { this->run(); } - -} // namespace cuvs::neighbors::cagra +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include "../cagra_padded_build_helpers.cuh" +#include + +#include +#include +#include +#include +#include + +#include + +namespace cuvs::neighbors::cagra { + +/** + * @brief Test verifying graph.extent(0) is used for random seed selection + * + * This test ensures that CAGRA search kernels correctly use graph.extent(0) + * (graph size) rather than dataset.size for random seed node selection. + * + * The bug: random seed selection previously used dataset_desc.size, which + * could cause OOB access if the graph size differed from dataset size + * (e.g., in CAGRA-Q iterative builds with compression). + * + * The fix: kernels now receive graph.extent(0) as graph_size parameter, + * ensuring seeds are always within valid graph node range [0, graph_size). + */ +class cagra_graph_smaller_than_dataset_test : public ::testing::Test { + public: + using data_type = float; + using index_type = uint32_t; + + protected: + void run() + { + // Create a dataset with 1000 points + constexpr int64_t n_dataset = 1000; + constexpr int64_t n_dim = 128; + constexpr int64_t n_queries = 100; + constexpr int64_t k = 10; + + // Build index normally + auto dataset = raft::make_device_matrix(res, n_dataset, n_dim); + raft::random::RngState r(1234ULL); + raft::random::uniform( + res, r, dataset.data_handle(), n_dataset * n_dim, data_type(-1), data_type(1)); + + cagra::index_params index_params; + index_params.graph_degree = 32; + index_params.intermediate_graph_degree = 64; + + cuvs::neighbors::test::padded_device_matrix_for_cagra padded_full( + res, raft::make_const_mdspan(dataset.view())); + auto index = cagra::build(res, index_params, padded_full.view); + raft::resource::sync_stream(res); + + // Get the graph from the index + auto original_graph = index.graph(); + ASSERT_EQ(original_graph.extent(0), n_dataset); + + // Recreate the bug scenario: LARGE dataset, SMALL graph + // (like iterative_build_graph does in intermediate iterations) + constexpr int64_t n_graph = n_dataset / 2; // Only 500 nodes in graph + + // Step 1: Build index on SMALL subset (500 points) + auto small_dataset_view = raft::make_device_matrix_view( + dataset.data_handle(), n_graph, n_dim); + + cagra::index_params small_index_params; + small_index_params.graph_degree = 32; + cuvs::neighbors::test::padded_device_matrix_for_cagra padded_small( + res, small_dataset_view); + auto small_index = cagra::build(res, small_index_params, padded_small.view); + small_index.update_dataset(res, padded_small.view); + raft::resource::sync_stream(res); + + // Step 2: Update to FULL dataset (1000 points) but keep small graph (500 nodes) + // This creates the exact bug scenario: dataset.size=1000, graph.extent(0)=500 + small_index.update_dataset(res, + cuvs::neighbors::make_device_padded_dataset_view( + res, raft::make_const_mdspan(dataset.view()))); + + // Verify the mismatch - THIS IS THE BUG SCENARIO! + ASSERT_EQ(small_index.graph().extent(0), n_graph); // Graph has 500 nodes + ASSERT_EQ(small_index.size(), n_dataset); // Dataset has 1000 points + ASSERT_NE(small_index.graph().extent(0), + small_index.size()); // Mismatch! + + // Create queries + auto queries = raft::make_device_matrix(res, n_queries, n_dim); + raft::random::uniform( + res, r, queries.data_handle(), n_queries * n_dim, data_type(-1), data_type(1)); + + // Allocate output + auto neighbors = raft::make_device_matrix(res, n_queries, k); + auto distances = raft::make_device_matrix(res, n_queries, k); + + // Setup search params + cagra::search_params search_params; + search_params.itopk_size = 64; + search_params.search_width = 1; + search_params.max_iterations = 10; + search_params.algo = cagra::search_algo::SINGLE_CTA; + + // THIS SHOULD NOT CRASH OR CAUSE OOB ACCESS + // Before fix: random seeds use dataset.size (1000) -> tries to access graph[700] -> CRASH! + // After fix: random seeds use graph.extent(0) (500) -> only accesses graph[0-499] -> SAFE! + cagra::search(res, + search_params, + small_index, + raft::make_const_mdspan(queries.view()), + neighbors.view(), + distances.view()); + + raft::resource::sync_stream(res); + + // Verify results are valid (neighbors should be < graph size) + auto neighbors_host = raft::make_host_matrix(n_queries, k); + raft::copy(neighbors_host.data_handle(), + neighbors.data_handle(), + n_queries * k, + raft::resource::get_cuda_stream(res)); + raft::resource::sync_stream(res); + + // All neighbor indices should be valid (< n_graph) + for (int64_t i = 0; i < n_queries * k; i++) { + ASSERT_LT(neighbors_host.data_handle()[i], n_graph) + << "Neighbor index " << neighbors_host.data_handle()[i] << " is >= graph size " << n_graph; + } + + // Test with MULTI_CTA algorithm as well (also had the same bug) + search_params.algo = cagra::search_algo::MULTI_CTA; + + cagra::search(res, + search_params, + small_index, + raft::make_const_mdspan(queries.view()), + neighbors.view(), + distances.view()); + + raft::resource::sync_stream(res); + + // Verify again + raft::copy(neighbors_host.data_handle(), + neighbors.data_handle(), + n_queries * k, + raft::resource::get_cuda_stream(res)); + raft::resource::sync_stream(res); + + for (int64_t i = 0; i < n_queries * k; i++) { + ASSERT_LT(neighbors_host.data_handle()[i], n_graph) + << "Neighbor index " << neighbors_host.data_handle()[i] << " is >= graph size " << n_graph + << " (MULTI_CTA)"; + } + } + + private: + raft::resources res; +}; + +TEST_F(cagra_graph_smaller_than_dataset_test, search_with_smaller_graph) { this->run(); } + +} // namespace cuvs::neighbors::cagra diff --git a/cpp/tests/neighbors/ann_cagra/bug_issue_93_reproducer.cu b/cpp/tests/neighbors/ann_cagra/bug_issue_93_reproducer.cu index 6b4b037167..f865d5d267 100644 --- a/cpp/tests/neighbors/ann_cagra/bug_issue_93_reproducer.cu +++ b/cpp/tests/neighbors/ann_cagra/bug_issue_93_reproducer.cu @@ -28,6 +28,7 @@ #include +#include "../cagra_padded_build_helpers.cuh" #include #include #include @@ -40,6 +41,7 @@ #include #include #include +#include #include namespace cuvs::neighbors::cagra { @@ -56,8 +58,9 @@ TEST(Issue93Reproducer, ConcurrentSearchDifferentGraphDegrees) constexpr int dim = 64; constexpr int top_k = 10; - // Build indices on the main thread. - std::vector> indices; + // Build indices on the main thread (keep padded builders alive for view-based indexes). + std::vector> padded_builders; + std::vector> indices; for (int n_rows : dataset_sizes) { auto database = raft::make_device_matrix(handle, n_rows, dim); raft::random::uniform( @@ -70,7 +73,10 @@ TEST(Issue93Reproducer, ConcurrentSearchDifferentGraphDegrees) ip.graph_build_params = graph_build_params::nn_descent_params(ip.intermediate_graph_degree, ip.metric); - indices.push_back(cagra::build(handle, ip, raft::make_const_mdspan(database.view()))); + padded_builders.emplace_back(handle, raft::make_const_mdspan(database.view())); + auto index = cagra::build(handle, ip, padded_builders.back().view); + index.update_dataset(handle, padded_builders.back().view); + indices.push_back(std::move(index)); } raft::resource::sync_stream(handle); diff --git a/cpp/tests/neighbors/ann_cagra/bug_iterative_cagra_build.cu b/cpp/tests/neighbors/ann_cagra/bug_iterative_cagra_build.cu index aaee5a77e5..70d009be15 100644 --- a/cpp/tests/neighbors/ann_cagra/bug_iterative_cagra_build.cu +++ b/cpp/tests/neighbors/ann_cagra/bug_iterative_cagra_build.cu @@ -1,85 +1,88 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. - * SPDX-License-Identifier: Apache-2.0 - */ - -#include - -#include - -#include -#include -#include - -#include -#include - -namespace cuvs::neighbors::cagra { - -template -class CagraIterativeBuildBugTest : public ::testing::Test { - public: - using data_type = DataT; - - protected: - void run() - { - // Set up iterative CAGRA graph building - cagra::index_params index_params; - // The bug manifests when graph_degree is equal to intermediate_graph_degree - // see issue https://github.com/rapidsai/cuvs/issues/1818 - index_params.graph_degree = 16; - index_params.intermediate_graph_degree = 16; - - // Use iterative CAGRA search for graph building - index_params.graph_build_params = graph_build_params::iterative_search_params(); - - // Build the index - auto cagra_index = cagra::build(res, index_params, raft::make_const_mdspan(dataset->view())); - raft::resource::sync_stream(res); - - // Verify the index was built successfully - ASSERT_GT(cagra_index.size(), 0); - ASSERT_EQ(cagra_index.dim(), n_dim); - } - - void SetUp() override - { - dataset.emplace(raft::make_device_matrix(res, n_samples, n_dim)); - raft::random::RngState r(1234ULL); - - // Generate random data based on type - if constexpr (std::is_same_v) { - raft::random::normal( - res, r, dataset->data_handle(), n_samples * n_dim, data_type(0), data_type(1)); - } else if constexpr (std::is_same_v) { - raft::random::uniformInt( - res, r, dataset->data_handle(), n_samples * n_dim, int8_t(-128), int8_t(127)); - } else if constexpr (std::is_same_v) { - raft::random::uniformInt( - res, r, dataset->data_handle(), n_samples * n_dim, uint8_t(0), uint8_t(255)); - } - raft::resource::sync_stream(res); - } - - void TearDown() override - { - dataset.reset(); - raft::resource::sync_stream(res); - } - - private: - raft::resources res; - std::optional> dataset = std::nullopt; - - constexpr static int64_t n_samples = 10000; - constexpr static int64_t n_dim = 1024; -}; - -// Instantiate test for different data types -using TestTypes = ::testing::Types; -TYPED_TEST_SUITE(CagraIterativeBuildBugTest, TestTypes); - -TYPED_TEST(CagraIterativeBuildBugTest, IterativeBuildTest) { this->run(); } - -} // namespace cuvs::neighbors::cagra +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include "../cagra_padded_build_helpers.cuh" +#include + +#include +#include +#include + +#include +#include + +namespace cuvs::neighbors::cagra { + +template +class CagraIterativeBuildBugTest : public ::testing::Test { + public: + using data_type = DataT; + + protected: + void run() + { + // Set up iterative CAGRA graph building + cagra::index_params index_params; + // The bug manifests when graph_degree is equal to intermediate_graph_degree + // see issue https://github.com/rapidsai/cuvs/issues/1818 + index_params.graph_degree = 16; + index_params.intermediate_graph_degree = 16; + + // Use iterative CAGRA search for graph building + index_params.graph_build_params = graph_build_params::iterative_search_params(); + + cuvs::neighbors::test::padded_device_matrix_for_cagra padded( + res, raft::make_const_mdspan(dataset->view())); + auto cagra_index = cagra::build(res, index_params, padded.view); + cagra_index.update_dataset(res, padded.view); + raft::resource::sync_stream(res); + + // Verify the index was built successfully + ASSERT_GT(cagra_index.size(), 0); + ASSERT_EQ(cagra_index.dim(), n_dim); + } + + void SetUp() override + { + dataset.emplace(raft::make_device_matrix(res, n_samples, n_dim)); + raft::random::RngState r(1234ULL); + + // Generate random data based on type + if constexpr (std::is_same_v) { + raft::random::normal( + res, r, dataset->data_handle(), n_samples * n_dim, data_type(0), data_type(1)); + } else if constexpr (std::is_same_v) { + raft::random::uniformInt( + res, r, dataset->data_handle(), n_samples * n_dim, int8_t(-128), int8_t(127)); + } else if constexpr (std::is_same_v) { + raft::random::uniformInt( + res, r, dataset->data_handle(), n_samples * n_dim, uint8_t(0), uint8_t(255)); + } + raft::resource::sync_stream(res); + } + + void TearDown() override + { + dataset.reset(); + raft::resource::sync_stream(res); + } + + private: + raft::resources res; + std::optional> dataset = std::nullopt; + + constexpr static int64_t n_samples = 10000; + constexpr static int64_t n_dim = 1024; +}; + +// Instantiate test for different data types +using TestTypes = ::testing::Types; +TYPED_TEST_SUITE(CagraIterativeBuildBugTest, TestTypes); + +TYPED_TEST(CagraIterativeBuildBugTest, IterativeBuildTest) { this->run(); } + +} // namespace cuvs::neighbors::cagra diff --git a/cpp/tests/neighbors/ann_cagra/bug_multi_cta_crash.cu b/cpp/tests/neighbors/ann_cagra/bug_multi_cta_crash.cu index 4b418b20cd..cdf5e7e334 100644 --- a/cpp/tests/neighbors/ann_cagra/bug_multi_cta_crash.cu +++ b/cpp/tests/neighbors/ann_cagra/bug_multi_cta_crash.cu @@ -1,11 +1,12 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #include #include "../ann_cagra.cuh" +#include "../cagra_padded_build_helpers.cuh" #include @@ -27,8 +28,9 @@ class AnnCagraBugMultiCTACrash : public ::testing::TestWithParamview())); + build_padded_.emplace(res, raft::make_const_mdspan(dataset->view())); + auto cagra_index = cagra::build(res, cagra_index_params, build_padded_->view); + cagra_index.update_dataset(res, build_padded_->view); raft::resource::sync_stream(res); cagra::search_params cagra_search_params; @@ -67,6 +69,7 @@ class AnnCagraBugMultiCTACrash : public ::testing::TestWithParam> build_padded_{}; std::optional> dataset = std::nullopt; std::optional> queries = std::nullopt; std::optional> neighbors = std::nullopt; diff --git a/cpp/tests/neighbors/ann_cagra/test_filter_udf.cu b/cpp/tests/neighbors/ann_cagra/test_filter_udf.cu index 093727d318..d6d4c6e79a 100644 --- a/cpp/tests/neighbors/ann_cagra/test_filter_udf.cu +++ b/cpp/tests/neighbors/ann_cagra/test_filter_udf.cu @@ -123,7 +123,10 @@ class CagraUdfFilterTest : public ::testing::TestWithParam { index_params.graph_build_params = cagra::graph_build_params::nn_descent_params(index_params.intermediate_graph_degree); - index.emplace(cagra::build(res, index_params, raft::make_const_mdspan(dataset->view()))); + index.emplace(cagra::build(res, + index_params, + cuvs::neighbors::make_device_padded_dataset_view( + res, raft::make_const_mdspan(dataset->view())))); raft::resource::sync_stream(res); } @@ -166,9 +169,9 @@ class CagraUdfFilterTest : public ::testing::TestWithParam { } raft::resources res; - std::optional> dataset = std::nullopt; - std::optional> queries = std::nullopt; - std::optional> index = std::nullopt; + std::optional> dataset = std::nullopt; + std::optional> queries = std::nullopt; + std::optional> index = std::nullopt; }; class CagraUdfFilterHalfTest : public ::testing::TestWithParam { @@ -199,7 +202,10 @@ class CagraUdfFilterHalfTest : public ::testing::TestWithParamview()))); + index.emplace(cagra::build(res, + index_params, + cuvs::neighbors::make_device_padded_dataset_view( + res, raft::make_const_mdspan(dataset->view())))); raft::resource::sync_stream(res); } @@ -242,9 +248,9 @@ class CagraUdfFilterHalfTest : public ::testing::TestWithParam> dataset = std::nullopt; - std::optional> queries = std::nullopt; - std::optional> index = std::nullopt; + std::optional> dataset = std::nullopt; + std::optional> queries = std::nullopt; + std::optional> index = std::nullopt; }; TEST_P(CagraUdfFilterTest, AcceptAllMatchesNoFilter) diff --git a/cpp/tests/neighbors/ann_hnsw_ace.cuh b/cpp/tests/neighbors/ann_hnsw_ace.cuh index c75b3555f6..531b8fe0f3 100644 --- a/cpp/tests/neighbors/ann_hnsw_ace.cuh +++ b/cpp/tests/neighbors/ann_hnsw_ace.cuh @@ -301,15 +301,17 @@ class AnnHnswAceTest : public ::testing::TestWithParam { raft::copy(database_host.data_handle(), database_dev.data(), ps.n_rows * ps.dim, stream_); raft::resource::sync_stream(handle_); - auto database_view = + auto database_dev_view = raft::make_device_matrix_view(database_dev.data(), ps.n_rows, ps.dim); + cuvs::neighbors::test::padded_device_matrix_for_cagra device_padded(handle_, + database_dev_view); // Build an in-memory CAGRA index (device graph + device dataset). cuvs::neighbors::cagra::index_params cagra_params; cagra_params.metric = ps.metric; cagra_params.graph_degree = 64; cagra_params.intermediate_graph_degree = 128; - auto cagra_index = cuvs::neighbors::cagra::build(handle_, cagra_params, database_view); + auto cagra_index = cuvs::neighbors::cagra::build(handle_, cagra_params, device_padded.view); raft::resource::sync_stream(handle_); cuvs::neighbors::hnsw::search_params search_params; @@ -318,7 +320,7 @@ class AnnHnswAceTest : public ::testing::TestWithParam { // Runs from_cagra with a tiny host-memory limit to force the disk spill, searches the // returned (disk-backed) index, checks recall, and returns the neighbor indices. - auto run_spilled = [&](const cuvs::neighbors::cagra::index& idx, + auto run_spilled = [&](const cuvs::neighbors::cagra::device_padded_index& idx, hnsw::HnswHierarchy hierarchy, bool pass_host_dataset) -> std::vector { static std::atomic counter{0}; @@ -402,8 +404,8 @@ class AnnHnswAceTest : public ::testing::TestWithParam { raft::resource::sync_stream(handle_); auto managed_graph_view = raft::make_device_matrix_view( managed_graph.data(), ps.n_rows, degree); - cuvs::neighbors::cagra::index managed_index( - handle_, ps.metric, database_view, managed_graph_view); + cuvs::neighbors::cagra::device_padded_index managed_index( + handle_, ps.metric, device_padded.view, managed_graph_view); run_spilled(managed_index, hnsw::HnswHierarchy::NONE, /*pass_host_dataset=*/false); } diff --git a/cpp/tests/neighbors/ann_scann.cuh b/cpp/tests/neighbors/ann_scann.cuh index eafddec9d2..66eab4a158 100644 --- a/cpp/tests/neighbors/ann_scann.cuh +++ b/cpp/tests/neighbors/ann_scann.cuh @@ -186,7 +186,7 @@ class scann_test : public ::testing::TestWithParam { cuvs::preprocessing::quantize::pq::quantizer quantizer{ pq_params, - cuvs::neighbors::vpq_dataset{ + cuvs::neighbors::device_vpq_dataset{ std::move(vq_codebook), std::move(pq_codebook_copy), std::move(empty_data)}}; auto quantized_residuals_device = diff --git a/cpp/tests/neighbors/ann_vamana.cuh b/cpp/tests/neighbors/ann_vamana.cuh index 0397c74e1c..bcad083b42 100644 --- a/cpp/tests/neighbors/ann_vamana.cuh +++ b/cpp/tests/neighbors/ann_vamana.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -9,6 +9,7 @@ #include "ann_utils.cuh" #include +#include "cagra_padded_build_helpers.cuh" #include "naive_knn.cuh" #include @@ -207,10 +208,10 @@ class AnnVamanaTest : public ::testing::TestWithParam { handle_, index.graph().extent(0), index.graph().extent(1)); raft::linalg::map(handle_, graph_valid.view(), edge_op{}, index.graph()); - auto cagra_index = cagra::index(handle_, - ps.metric, - raft::make_const_mdspan(database_view), - raft::make_const_mdspan(graph_valid.view())); + cuvs::neighbors::test::padded_device_matrix_for_cagra cagra_base(handle_, + database_view); + auto cagra_index = cagra::device_padded_index( + handle_, ps.metric, cagra_base.view, raft::make_const_mdspan(graph_valid.view())); cagra::search_params search_params; search_params.algo = ps.algo; diff --git a/cpp/tests/neighbors/cagra_padded_build_helpers.cuh b/cpp/tests/neighbors/cagra_padded_build_helpers.cuh new file mode 100644 index 0000000000..a8aae58cd9 --- /dev/null +++ b/cpp/tests/neighbors/cagra_padded_build_helpers.cuh @@ -0,0 +1,59 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ +#pragma once + +#include +#include + +#include + +namespace cuvs::neighbors::test { + +/** + * Prepares a device_padded_dataset_view for cagra::build: uses make_device_padded_dataset_view when + * the source row stride already matches alignment, otherwise make_device_padded_dataset and keeps + * the copy in + * \p owned. The caller must keep this object alive for the lifetime of any index that only holds a + * view over the data. + */ +template +struct padded_device_matrix_for_cagra { + std::unique_ptr> owned; + cuvs::neighbors::device_padded_dataset_view view; + + padded_device_matrix_for_cagra( + raft::resources const& res, raft::device_matrix_view src) + : padded_device_matrix_for_cagra{build(res, src)} + { + } + + private: + struct build_result { + std::unique_ptr> owned; + cuvs::neighbors::device_padded_dataset_view view; + }; + + // device_padded_dataset_view has no default constructor; fill both members from one build step. + explicit padded_device_matrix_for_cagra(build_result&& br) + : owned{std::move(br.owned)}, view{std::move(br.view)} + { + } + + static auto build(raft::resources const& res, + raft::device_matrix_view src) + -> build_result + { + using namespace cuvs::neighbors; + if (matrix_row_width_matches_cagra_required(src)) { + return build_result{nullptr, make_device_padded_dataset_view(res, src)}; + } else { + auto own = make_device_padded_dataset(res, src); + auto vw = own->as_dataset_view(); + return build_result{std::move(own), vw}; + } + } +}; + +} // namespace cuvs::neighbors::test diff --git a/cpp/tests/neighbors/dynamic_batching/test_cagra.cu b/cpp/tests/neighbors/dynamic_batching/test_cagra.cu index 4c046367f2..783e76d82a 100644 --- a/cpp/tests/neighbors/dynamic_batching/test_cagra.cu +++ b/cpp/tests/neighbors/dynamic_batching/test_cagra.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -8,19 +8,36 @@ #include "../dynamic_batching.cuh" #include +#include namespace cuvs::neighbors::dynamic_batching { +namespace { + +template +auto build_cagra_with_dataset(raft::resources const& res, + cagra::index_params const& params, + raft::device_matrix_view dataset) + -> cagra::device_padded_index +{ + auto padded = cuvs::neighbors::make_device_padded_dataset_view(res, dataset); + auto index = cagra::build(res, params, padded); + index.update_dataset(res, padded); + return index; +} + +} // namespace + using cagra_F32 = dynamic_batching_test, - cagra::build, + cagra::device_padded_index, + build_cagra_with_dataset, cagra::search>; using cagra_U8 = dynamic_batching_test, - cagra::build, + cagra::device_padded_index, + build_cagra_with_dataset, cagra::search>; template diff --git a/cpp/tests/neighbors/hnsw.cu b/cpp/tests/neighbors/hnsw.cu index 28238c17bf..1e38cc8d99 100644 --- a/cpp/tests/neighbors/hnsw.cu +++ b/cpp/tests/neighbors/hnsw.cu @@ -1,12 +1,14 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #include "../test_utils.cuh" #include "ann_utils.cuh" +#include "cagra_padded_build_helpers.cuh" #include +#include #include #include #include @@ -93,8 +95,10 @@ class AnnHNSWTest : public ::testing::TestWithParam { auto database_view = raft::make_device_matrix_view( (const DataT*)database.data(), ps.n_rows, ps.dim); + cuvs::neighbors::test::padded_device_matrix_for_cagra padded(handle_, database_view); - auto index = cuvs::neighbors::cagra::build(handle_, index_params, database_view); + auto index = cuvs::neighbors::cagra::build(handle_, index_params, padded.view); + index.update_dataset(handle_, padded.view); raft::resource::sync_stream(handle_); cuvs::neighbors::hnsw::search_params search_params; diff --git a/cpp/tests/neighbors/mg.cuh b/cpp/tests/neighbors/mg.cuh index fd5dc8e9dc..5417e8fd99 100644 --- a/cpp/tests/neighbors/mg.cuh +++ b/cpp/tests/neighbors/mg.cuh @@ -1,11 +1,12 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once #include "../test_utils.cuh" #include "ann_utils.cuh" +#include "cagra_padded_build_helpers.cuh" #include "naive_knn.cuh" #include @@ -218,9 +219,9 @@ class AnnMGTest : public ::testing::TestWithParam { d_mode = distribution_mode::SHARDED; mg_index_params index_params; - index_params.graph_build_params = cagra::graph_build_params::ivf_pq_params( - raft::matrix_extent(ps.num_db_vecs, ps.dim)); - index_params.mode = d_mode; + // Host dataset uses ACE build path; must set ace_params (not ivf_pq_params). + index_params.graph_build_params = cagra::graph_build_params::ace_params{}; + index_params.mode = d_mode; mg_search_params search_params; @@ -376,7 +377,9 @@ class AnnMGTest : public ::testing::TestWithParam { { auto index_dataset = raft::make_device_matrix_view( d_index_dataset.data(), ps.num_db_vecs, ps.dim); - auto index = cuvs::neighbors::cagra::build(clique_, index_params, index_dataset); + cuvs::neighbors::test::padded_device_matrix_for_cagra padded(clique_, index_dataset); + auto index = cuvs::neighbors::cagra::build(clique_, index_params, padded.view); + index.update_dataset(clique_, padded.view); cuvs::neighbors::cagra::serialize(clique_, index_file.filename, index); } @@ -554,9 +557,9 @@ class AnnMGTest : public ::testing::TestWithParam { ASSERT_TRUE(ps.num_queries <= 4); mg_index_params index_params; - index_params.graph_build_params = cagra::graph_build_params::ivf_pq_params( - raft::matrix_extent(ps.num_db_vecs, ps.dim)); - index_params.mode = REPLICATED; + // Host dataset uses ACE build path; must set ace_params (not ivf_pq_params). + index_params.graph_build_params = cagra::graph_build_params::ace_params{}; + index_params.mode = REPLICATED; mg_search_params search_params; search_params.search_mode = ROUND_ROBIN; diff --git a/cpp/tests/neighbors/tiered_index.cu b/cpp/tests/neighbors/tiered_index.cu index 38d0126e03..0effd7fa8b 100644 --- a/cpp/tests/neighbors/tiered_index.cu +++ b/cpp/tests/neighbors/tiered_index.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -217,7 +217,7 @@ const std::vector inputs = {10}, // n_queries {TEST_EXTEND, TEST_MERGE} // test_strategy ); -typedef ANNTieredIndexTest> CAGRA_F; +typedef ANNTieredIndexTest> CAGRA_F; TEST_P(CAGRA_F, AnnTieredIndex) { this->testTieredIndex(); } INSTANTIATE_TEST_CASE_P(ANNTieredIndexTest, CAGRA_F, ::testing::ValuesIn(inputs)); diff --git a/examples/cpp/src/cagra_example.cu b/examples/cpp/src/cagra_example.cu index 856030c520..35e279a198 100644 --- a/examples/cpp/src/cagra_example.cu +++ b/examples/cpp/src/cagra_example.cu @@ -9,6 +9,7 @@ #include #include +#include #include @@ -31,7 +32,9 @@ void cagra_build_search_simple(raft::device_resources const& dev_resources, cagra::index_params index_params; std::cout << "Building CAGRA index (search graph)" << std::endl; - auto index = cagra::build(dev_resources, index_params, dataset); + auto padded = cuvs::neighbors::make_device_padded_dataset_view(dev_resources, dataset); + auto index = cagra::build(dev_resources, index_params, padded); + index.update_dataset(dev_resources, padded); std::cout << "CAGRA index has " << index.size() << " vectors" << std::endl; std::cout << "CAGRA graph has degree " << index.graph_degree() << ", graph size [" diff --git a/examples/cpp/src/cagra_filter_udf_example.cu b/examples/cpp/src/cagra_filter_udf_example.cu index 0ab42dd580..d0a24de046 100644 --- a/examples/cpp/src/cagra_filter_udf_example.cu +++ b/examples/cpp/src/cagra_filter_udf_example.cu @@ -4,6 +4,7 @@ */ #include +#include #include #include @@ -144,8 +145,9 @@ int main() index_params.intermediate_graph_degree); std::cout << "Building CAGRA index" << std::endl; - auto index = - cuvs::neighbors::cagra::build(res, index_params, raft::make_const_mdspan(dataset.view())); + auto padded = cuvs::neighbors::make_device_padded_dataset_view(res, dataset.view()); + auto index = cuvs::neighbors::cagra::build(res, index_params, padded); + index.update_dataset(res, padded); std::vector row_tenant_ids(n_rows); std::vector row_timestamps(n_rows); diff --git a/examples/cpp/src/cagra_hnsw_ace_example.cu b/examples/cpp/src/cagra_hnsw_ace_example.cu index d1bde25ad6..8bf76c446d 100644 --- a/examples/cpp/src/cagra_hnsw_ace_example.cu +++ b/examples/cpp/src/cagra_hnsw_ace_example.cu @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -65,9 +66,13 @@ void cagra_build_search_ace(raft::device_resources const& dev_resources, raft::resource::sync_stream(dev_resources); auto dataset_host_view = raft::make_host_matrix_view( dataset_host.data_handle(), dataset_host.extent(0), dataset_host.extent(1)); + // Wrap in a host_padded_dataset_view. ACE graph construction is host-side CPU work and does not + // require CUDA row-alignment; construct the view directly to avoid the alignment check. + cuvs::neighbors::host_padded_dataset_view host_padded_view( + dataset_host_view, static_cast(dataset_host_view.extent(1))); std::cout << "Building CAGRA index (search graph)" << std::endl; - auto index = cagra::build(dev_resources, index_params, dataset_host_view); + auto ace_host_index = cagra::build(dev_resources, index_params, host_padded_view); // In-memory build of ACE provides the index in memory, so we can search it directly using // cagra::search @@ -80,7 +85,30 @@ void cagra_build_search_ace(raft::device_resources const& dev_resources, std::cout << "Converting CAGRA index to HNSW" << std::endl; hnsw::index_params hnsw_params; hnsw_params.hierarchy = hnsw::HnswHierarchy::GPU; // Offload hierarchy construction to GPU - auto hnsw_index = hnsw::from_cagra(dev_resources, hnsw_params, index); + + std::unique_ptr> hnsw_index; + std::unique_ptr> padded_owner; + if (ace_host_index.dataset_fd().has_value()) { + // Disk ACE path: ACE artifacts (dataset, graph, mapping) live on disk. Transfer file + // descriptors to a device index so from_cagra can serialize to hnsw_index.bin on disk. + cagra::device_padded_index device_index(dev_resources, + ace_host_index.metric()); + device_index.update_dataset(dev_resources, std::move(*ace_host_index.steal_dataset_fd())); + if (ace_host_index.graph_fd().has_value()) { + device_index.update_graph(dev_resources, std::move(*ace_host_index.steal_graph_fd())); + } + if (ace_host_index.mapping_fd().has_value()) { + device_index.update_mapping(dev_resources, std::move(*ace_host_index.steal_mapping_fd())); + } + hnsw_index = hnsw::from_cagra(dev_resources, hnsw_params, device_index, std::nullopt); + } else { + // In-memory ACE path: graph is in host memory. Upload the original dataset to device and + // attach it before from_cagra builds the HNSW hierarchy in memory. + padded_owner = cuvs::neighbors::make_device_padded_dataset(dev_resources, dataset_host_view); + auto device_index = cagra::attach_device_dataset_on_host_index( + dev_resources, ace_host_index, padded_owner->as_dataset_view()); + hnsw_index = hnsw::from_cagra(dev_resources, hnsw_params, device_index, dataset_host_view); + } // HNSW search requires host matrices auto queries_host = raft::make_host_matrix(n_queries, queries.extent(1)); @@ -116,8 +144,12 @@ void cagra_build_search_ace(raft::device_resources const& dev_resources, std::cout << "Deserializing HNSW index from disk for search." << std::endl; hnsw::index* hnsw_index_raw = nullptr; - hnsw::deserialize( - dev_resources, hnsw_params, hnsw_index_path, index.dim(), index.metric(), &hnsw_index_raw); + hnsw::deserialize(dev_resources, + hnsw_params, + hnsw_index_path, + ace_host_index.dim(), + ace_host_index.metric(), + &hnsw_index_raw); std::unique_ptr> hnsw_index_deserialized(hnsw_index_raw); diff --git a/examples/cpp/src/cagra_persistent_example.cu b/examples/cpp/src/cagra_persistent_example.cu index ded3a287b2..13db5e3ff9 100644 --- a/examples/cpp/src/cagra_persistent_example.cu +++ b/examples/cpp/src/cagra_persistent_example.cu @@ -6,6 +6,7 @@ #include "common.cuh" #include +#include #include #include #include @@ -68,7 +69,9 @@ void cagra_build_search_variants(raft::device_resources const& res, cagra::index_params index_params; std::cout << "Building CAGRA index (search graph)" << std::endl; - auto index = cagra::build(res, index_params, dataset); + auto padded = cuvs::neighbors::make_device_padded_dataset_view(res, dataset); + auto index = cagra::build(res, index_params, padded); + index.update_dataset(res, padded); std::cout << "CAGRA index has " << index.size() << " vectors" << std::endl; std::cout << "CAGRA graph has degree " << index.graph_degree() << ", graph size [" diff --git a/examples/cpp/src/dynamic_batching_example.cu b/examples/cpp/src/dynamic_batching_example.cu index 317e2c5aff..a44ee6fa07 100644 --- a/examples/cpp/src/dynamic_batching_example.cu +++ b/examples/cpp/src/dynamic_batching_example.cu @@ -6,6 +6,7 @@ #include "common.cuh" #include +#include #include #include @@ -113,7 +114,9 @@ void dynamic_batching_example(raft::resources const& res, cagra::index_params orig_index_params; std::cout << "Building CAGRA index (search graph)" << std::endl; - auto orig_index = cagra::build(res, orig_index_params, dataset); + auto padded = cuvs::neighbors::make_device_padded_dataset_view(res, dataset); + auto orig_index = cagra::build(res, orig_index_params, padded); + orig_index.update_dataset(res, padded); std::cout << "CAGRA index has " << orig_index.size() << " vectors" << std::endl; std::cout << "CAGRA graph has degree " << orig_index.graph_degree() << ", graph size [" diff --git a/fern/pages/cpp_api/cpp-api-neighbors-common.md b/fern/pages/cpp_api/cpp-api-neighbors-common.md index 68430cf6fb..12528ad179 100644 --- a/fern/pages/cpp_api/cpp-api-neighbors-common.md +++ b/fern/pages/cpp_api/cpp-api-neighbors-common.md @@ -70,7 +70,7 @@ struct dataset; ``` -### neighbors::vpq_dataset +### neighbors::device_vpq_dataset VPQ compressed dataset. @@ -81,7 +81,7 @@ The dataset is compressed using two level quantization ```cpp template -struct vpq_dataset : public dataset { +struct device_vpq_dataset : public dataset { raft::device_matrix vq_code_book; raft::device_matrix pq_code_book; raft::device_matrix data; diff --git a/fern/pages/cpp_api/cpp-api-preprocessing-quantize-pq.md b/fern/pages/cpp_api/cpp-api-preprocessing-quantize-pq.md index 85bcf86fbf..b70c451b3f 100644 --- a/fern/pages/cpp_api/cpp-api-preprocessing-quantize-pq.md +++ b/fern/pages/cpp_api/cpp-api-preprocessing-quantize-pq.md @@ -94,7 +94,7 @@ Defines and stores VPQ codebooks upon training template struct quantizer { params params_quantizer; - cuvs::neighbors::vpq_dataset vpq_codebooks; + cuvs::neighbors::device_vpq_dataset vpq_codebooks; }; ``` @@ -103,7 +103,7 @@ struct quantizer { | Name | Type | Description | | --- | --- | --- | | `params_quantizer` | [`params`](/api-reference/cpp-api-preprocessing-quantize-pq#preprocessing-quantize-pq-params) | Parameters used to build this quantizer. | -| `vpq_codebooks` | [`cuvs::neighbors::vpq_dataset`](/api-reference/cpp-api-neighbors-common#neighbors-vpq-dataset) | VPQ codebooks produced during training. | +| `vpq_codebooks` | [`cuvs::neighbors::device_vpq_dataset`](/api-reference/cpp-api-neighbors-common#neighbors-vpq-dataset) | VPQ codebooks produced during training. | ### preprocessing::quantize::pq::build diff --git a/go/cagra/cagra_test.go b/go/cagra/cagra_test.go index bb4fd0a0a1..9b6b2a4610 100644 --- a/go/cagra/cagra_test.go +++ b/go/cagra/cagra_test.go @@ -8,20 +8,6 @@ import ( ) func TestCagra(t *testing.T) { - testCases := []struct { - name string - compress bool - }{ - { - name: "No compression", - compress: false, - }, - { - name: "Compression", - compress: true, - }, - } - const ( nDataPoints = 1024 nFeatures = 16 @@ -31,129 +17,114 @@ func TestCagra(t *testing.T) { ) r := rand.New(rand.NewPCG(42, 0)) - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - resource, _ := cuvs.NewResource(nil) - defer resource.Close() - - testDataset := make([][]float32, nDataPoints) - for i := range testDataset { - testDataset[i] = make([]float32, nFeatures) - for j := range testDataset[i] { - testDataset[i][j] = r.Float32() - } - } + resource, _ := cuvs.NewResource(nil) + defer resource.Close() - dataset, err := cuvs.NewTensor(testDataset) - if err != nil { - t.Fatalf("error creating dataset tensor: %v", err) - } - defer dataset.Close() + testDataset := make([][]float32, nDataPoints) + for i := range testDataset { + testDataset[i] = make([]float32, nFeatures) + for j := range testDataset[i] { + testDataset[i][j] = r.Float32() + } + } - indexParams, err := CreateIndexParams() - if err != nil { - t.Fatalf("error creating index params: %v", err) - } - defer indexParams.Close() + dataset, err := cuvs.NewTensor(testDataset) + if err != nil { + t.Fatalf("error creating dataset tensor: %v", err) + } + defer dataset.Close() - index, err := CreateIndex() - if err != nil { - t.Fatalf("error creating index: %v", err) - } - defer index.Close() + indexParams, err := CreateIndexParams() + if err != nil { + t.Fatalf("error creating index params: %v", err) + } + defer indexParams.Close() - // Use the first 4 points from the dataset as queries : will test that we get them back - // as their own nearest neighbor - queries, err := cuvs.NewTensor(testDataset[:nQueries]) - if err != nil { - t.Fatalf("error creating queries tensor: %v", err) - } - defer queries.Close() + index, err := CreateIndex() + if err != nil { + t.Fatalf("error creating index: %v", err) + } + defer index.Close() - neighbors, err := cuvs.NewTensorOnDevice[uint32](&resource, []int64{int64(nQueries), int64(k)}) - if err != nil { - t.Fatalf("error creating neighbors tensor: %v", err) - } - defer neighbors.Close() + // Use the first 4 points from the dataset as queries : will test that we get them back + // as their own nearest neighbor + queries, err := cuvs.NewTensor(testDataset[:nQueries]) + if err != nil { + t.Fatalf("error creating queries tensor: %v", err) + } + defer queries.Close() - distances, err := cuvs.NewTensorOnDevice[float32](&resource, []int64{int64(nQueries), int64(k)}) - if err != nil { - t.Fatalf("error creating distances tensor: %v", err) - } - defer distances.Close() + neighbors, err := cuvs.NewTensorOnDevice[uint32](&resource, []int64{int64(nQueries), int64(k)}) + if err != nil { + t.Fatalf("error creating neighbors tensor: %v", err) + } + defer neighbors.Close() - if _, err := dataset.ToDevice(&resource); err != nil { - t.Fatalf("error moving dataset to device: %v", err) - } + distances, err := cuvs.NewTensorOnDevice[float32](&resource, []int64{int64(nQueries), int64(k)}) + if err != nil { + t.Fatalf("error creating distances tensor: %v", err) + } + defer distances.Close() - if tc.compress { - compressionParams, err := CreateCompressionParams() - if err != nil { - t.Fatalf("error creating compression params: %v", err) - } - indexParams.SetCompression(compressionParams) - } + if _, err := dataset.ToDevice(&resource); err != nil { + t.Fatalf("error moving dataset to device: %v", err) + } - if err := BuildIndex(resource, indexParams, &dataset, index); err != nil { - t.Fatalf("error building index: %v", err) - } + if err := BuildIndex(resource, indexParams, &dataset, index); err != nil { + t.Fatalf("error building index: %v", err) + } - if err := resource.Sync(); err != nil { - t.Fatalf("error syncing resource: %v", err) - } + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } - if _, err := queries.ToDevice(&resource); err != nil { - t.Fatalf("error moving queries to device: %v", err) - } + if _, err := queries.ToDevice(&resource); err != nil { + t.Fatalf("error moving queries to device: %v", err) + } - SearchParams, err := CreateSearchParams() - if err != nil { - t.Fatalf("error creating search params: %v", err) - } - defer SearchParams.Close() + SearchParams, err := CreateSearchParams() + if err != nil { + t.Fatalf("error creating search params: %v", err) + } + defer SearchParams.Close() - err = SearchIndex(resource, SearchParams, index, &queries, &neighbors, &distances, nil) - if err != nil { - t.Fatalf("error searching index: %v", err) - } + err = SearchIndex(resource, SearchParams, index, &queries, &neighbors, &distances, nil) + if err != nil { + t.Fatalf("error searching index: %v", err) + } - if _, err := neighbors.ToHost(&resource); err != nil { - t.Fatalf("error moving neighbors to host: %v", err) - } + if _, err := neighbors.ToHost(&resource); err != nil { + t.Fatalf("error moving neighbors to host: %v", err) + } - if _, err := distances.ToHost(&resource); err != nil { - t.Fatalf("error moving distances to host: %v", err) - } + if _, err := distances.ToHost(&resource); err != nil { + t.Fatalf("error moving distances to host: %v", err) + } - if err := resource.Sync(); err != nil { - t.Fatalf("error syncing resource: %v", err) - } + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } - neighborsSlice, err := neighbors.Slice() - if err != nil { - t.Fatalf("error getting neighbors slice: %v", err) - } + neighborsSlice, err := neighbors.Slice() + if err != nil { + t.Fatalf("error getting neighbors slice: %v", err) + } - for i := range neighborsSlice { - if neighborsSlice[i][0] != uint32(i) { - t.Error("wrong neighbor, expected", i, "got", neighborsSlice[i][0]) - } - } + for i := range neighborsSlice { + if neighborsSlice[i][0] != uint32(i) { + t.Error("wrong neighbor, expected", i, "got", neighborsSlice[i][0]) + } + } - distancesSlice, err := distances.Slice() - if err != nil { - t.Fatalf("error getting distances slice: %v", err) - } + distancesSlice, err := distances.Slice() + if err != nil { + t.Fatalf("error getting distances slice: %v", err) + } - if !tc.compress { - // Compress makes the result nondeterministic - for i := range distancesSlice { - if distancesSlice[i][0] >= epsilon || distancesSlice[i][0] <= -epsilon { - t.Error("distance should be close to 0, got", distancesSlice[i][0]) - } - } - } - }) + for i := range distancesSlice { + if distancesSlice[i][0] >= epsilon || distancesSlice[i][0] <= -epsilon { + t.Error("distance should be close to 0, got", distancesSlice[i][0]) + } } } diff --git a/go/cagra/index_params.go b/go/cagra/index_params.go index 99f4b70b93..c90ea95e46 100644 --- a/go/cagra/index_params.go +++ b/go/cagra/index_params.go @@ -13,11 +13,6 @@ type IndexParams struct { params C.cuvsCagraIndexParams_t } -// Supplemental parameters to build CAGRA Index -type CompressionParams struct { - params C.cuvsCagraCompressionParams_t -} - type BuildAlgo int const ( @@ -32,69 +27,6 @@ var cBuildAlgos = map[BuildAlgo]int{ AutoSelect: C.AUTO_SELECT, } -// Creates a new CompressionParams -func CreateCompressionParams() (*CompressionParams, error) { - var params C.cuvsCagraCompressionParams_t - - err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraCompressionParamsCreate(¶ms))) - if err != nil { - return nil, err - } - - if params == nil { - return nil, errors.New("memory allocation failed") - } - - return &CompressionParams{params: params}, nil -} - -// The bit length of the vector element after compression by PQ. -func (p *CompressionParams) SetPQBits(pq_bits uint32) (*CompressionParams, error) { - p.params.pq_bits = C.uint32_t(pq_bits) - - return p, nil -} - -// The dimensionality of the vector after compression by PQ. When zero, -// an optimal value is selected using a heuristic. -func (p *CompressionParams) SetPQDim(pq_dim uint32) (*CompressionParams, error) { - p.params.pq_dim = C.uint32_t(pq_dim) - - return p, nil -} - -// Vector Quantization (VQ) codebook size - number of "coarse cluster -// centers". When zero, an optimal value is selected using a heuristic. -func (p *CompressionParams) SetVQNCenters(vq_n_centers uint32) (*CompressionParams, error) { - p.params.vq_n_centers = C.uint32_t(vq_n_centers) - - return p, nil -} - -// The number of iterations searching for kmeans centers (both VQ & PQ -// phases). -func (p *CompressionParams) SetKMeansNIters(kmeans_n_iters uint32) (*CompressionParams, error) { - p.params.kmeans_n_iters = C.uint32_t(kmeans_n_iters) - - return p, nil -} - -// The fraction of data to use during iterative kmeans building (VQ -// phase). When zero, an optimal value is selected using a heuristic. -func (p *CompressionParams) SetVQKMeansTrainsetFraction(vq_kmeans_trainset_fraction float64) (*CompressionParams, error) { - p.params.vq_kmeans_trainset_fraction = C.double(vq_kmeans_trainset_fraction) - - return p, nil -} - -// The fraction of data to use during iterative kmeans building (PQ -// phase). When zero, an optimal value is selected using a heuristic. -func (p *CompressionParams) SetPQKMeansTrainsetFraction(pq_kmeans_trainset_fraction float64) (*CompressionParams, error) { - p.params.pq_kmeans_trainset_fraction = C.double(pq_kmeans_trainset_fraction) - - return p, nil -} - // Creates a new IndexParams func CreateIndexParams() (*IndexParams, error) { var params C.cuvsCagraIndexParams_t @@ -141,13 +73,6 @@ func (p *IndexParams) SetNNDescentNiter(nn_descent_niter uint32) (*IndexParams, return p, nil } -// Compression parameters -func (p *IndexParams) SetCompression(compression *CompressionParams) (*IndexParams, error) { - p.params.compression = C.cuvsCagraCompressionParams_t(compression.params) - - return p, nil -} - // Destroys IndexParams func (p *IndexParams) Close() error { err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraIndexParamsDestroy(p.params))) diff --git a/go/cagra/index_params_test.go b/go/cagra/index_params_test.go index 579419ca92..532cd736b1 100644 --- a/go/cagra/index_params_test.go +++ b/go/cagra/index_params_test.go @@ -4,225 +4,6 @@ import ( "testing" ) -// CompressionParams Tests -func TestCreateCompressionParams(t *testing.T) { - params, err := CreateCompressionParams() - if err != nil { - t.Fatalf("Failed to create CompressionParams: %v", err) - } - if params == nil { - t.Fatal("CreateCompressionParams returned nil params") - } - - if params.params == nil { - t.Fatal("CompressionParams internal params are nil") - } - if params.params.pq_kmeans_trainset_fraction != 0 { - t.Fatalf("Error params.params.pq_kmeans_trainset_fraction != 0, got = %v", params.params.pq_kmeans_trainset_fraction) - } - if params.params.pq_bits != 8 { - t.Fatalf("Error params.params.pq_bits != 8, got = %v", params.params.pq_bits) - } - if params.params.pq_dim != 0 { - t.Fatalf("Error params.params.pq_dim != 0, got = %v", params.params.pq_dim) - } - if params.params.vq_n_centers != 0 { - t.Fatalf("Error params.params.vq_n_centers != 0, got = %v", params.params.vq_n_centers) - } - if params.params.kmeans_n_iters != 25 { - t.Fatalf("Error params.params.kmeans_n_iters != 25, got = %v", params.params.kmeans_n_iters) - } -} - -func TestCompressionParamsSetPQBits(t *testing.T) { - params, err := CreateCompressionParams() - if err != nil { - t.Fatalf("Failed to create CompressionParams: %v", err) - } - - testCases := []struct { - name string - value uint32 - }{ - {"4 bits", 4}, - {"8 bits", 8}, - {"16 bits", 16}, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - result, err := params.SetPQBits(tc.value) - if err != nil { - t.Errorf("SetPQBits failed: %v", err) - } - if result != params { - t.Error("SetPQBits should return the same params instance") - } - if uint32(params.params.pq_bits) != tc.value { - t.Errorf("Expected pq_bits %d, got %d", tc.value, params.params.pq_bits) - } - }) - } -} - -func TestCompressionParamsSetPQDim(t *testing.T) { - params, err := CreateCompressionParams() - if err != nil { - t.Fatalf("Failed to create CompressionParams: %v", err) - } - - testCases := []struct { - name string - value uint32 - }{ - {"Zero (auto)", 0}, - {"Small dimension", 32}, - {"Large dimension", 128}, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - result, err := params.SetPQDim(tc.value) - if err != nil { - t.Errorf("SetPQDim failed: %v", err) - } - if result != params { - t.Error("SetPQDim should return the same params instance") - } - if uint32(params.params.pq_dim) != tc.value { - t.Errorf("Expected pq_dim %d, got %d", tc.value, params.params.pq_dim) - } - }) - } -} - -func TestCompressionParamsSetVQNCenters(t *testing.T) { - params, err := CreateCompressionParams() - if err != nil { - t.Fatalf("Failed to create CompressionParams: %v", err) - } - - testCases := []struct { - name string - value uint32 - }{ - {"Zero (auto)", 0}, - {"Small centers", 256}, - {"Large centers", 2048}, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - result, err := params.SetVQNCenters(tc.value) - if err != nil { - t.Errorf("SetVQNCenters failed: %v", err) - } - if result != params { - t.Error("SetVQNCenters should return the same params instance") - } - if uint32(params.params.vq_n_centers) != tc.value { - t.Errorf("Expected vq_n_centers %d, got %d", tc.value, params.params.vq_n_centers) - } - }) - } -} - -func TestCompressionParamsSetKMeansNIters(t *testing.T) { - params, err := CreateCompressionParams() - if err != nil { - t.Fatalf("Failed to create CompressionParams: %v", err) - } - - testCases := []struct { - name string - value uint32 - }{ - {"Few iterations", 10}, - {"Default iterations", 25}, - {"Many iterations", 100}, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - result, err := params.SetKMeansNIters(tc.value) - if err != nil { - t.Errorf("SetKMeansNIters failed: %v", err) - } - if result != params { - t.Error("SetKMeansNIters should return the same params instance") - } - if uint32(params.params.kmeans_n_iters) != tc.value { - t.Errorf("Expected kmeans_n_iters %d, got %d", tc.value, params.params.kmeans_n_iters) - } - }) - } -} - -func TestCompressionParamsSetVQKMeansTrainsetFraction(t *testing.T) { - params, err := CreateCompressionParams() - if err != nil { - t.Fatalf("Failed to create CompressionParams: %v", err) - } - - testCases := []struct { - name string - value float64 - }{ - {"Zero (auto)", 0.0}, - {"Half dataset", 0.5}, - {"Full dataset", 1.0}, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - result, err := params.SetVQKMeansTrainsetFraction(tc.value) - if err != nil { - t.Errorf("SetVQKMeansTrainsetFraction failed: %v", err) - } - if result != params { - t.Error("SetVQKMeansTrainsetFraction should return the same params instance") - } - if float64(params.params.vq_kmeans_trainset_fraction) != tc.value { - t.Errorf("Expected vq_kmeans_trainset_fraction %f, got %f", - tc.value, params.params.vq_kmeans_trainset_fraction) - } - }) - } -} - -func TestCompressionParamsSetPQKMeansTrainsetFraction(t *testing.T) { - params, err := CreateCompressionParams() - if err != nil { - t.Fatalf("Failed to create CompressionParams: %v", err) - } - - testCases := []struct { - name string - value float64 - }{ - {"Zero (auto)", 0.0}, - {"Quarter dataset", 0.25}, - {"Half dataset", 0.5}, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - result, err := params.SetPQKMeansTrainsetFraction(tc.value) - if err != nil { - t.Errorf("SetPQKMeansTrainsetFraction failed: %v", err) - } - if result != params { - t.Error("SetPQKMeansTrainsetFraction should return the same params instance") - } - if float64(params.params.pq_kmeans_trainset_fraction) != tc.value { - t.Errorf("Expected pq_kmeans_trainset_fraction %f, got %f", - tc.value, params.params.pq_kmeans_trainset_fraction) - } - }) - } -} - -// IndexParams Tests func TestCreateIndexParams(t *testing.T) { params, err := CreateIndexParams() if err != nil { @@ -376,31 +157,6 @@ func TestIndexParamsSetNNDescentNiter(t *testing.T) { } } -func TestIndexParamsSetCompression(t *testing.T) { - params, err := CreateIndexParams() - if err != nil { - t.Fatalf("Failed to create IndexParams: %v", err) - } - defer params.Close() - - compression, err := CreateCompressionParams() - if err != nil { - t.Fatalf("Failed to create CompressionParams: %v", err) - } - - // Configure compression params - compression.SetPQBits(8) - compression.SetPQDim(64) - - result, err := params.SetCompression(compression) - if err != nil { - t.Errorf("SetCompression failed: %v", err) - } - if result != params { - t.Error("SetCompression should return the same params instance") - } -} - func TestIndexParamsClose(t *testing.T) { params, err := CreateIndexParams() if err != nil { @@ -414,7 +170,6 @@ func TestIndexParamsClose(t *testing.T) { } func TestBuildAlgoConstants(t *testing.T) { - // Test that BuildAlgo constants are properly defined algos := []BuildAlgo{IvfPq, NnDescent, AutoSelect} for _, algo := range algos { diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraIndexParams.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraIndexParams.java index e185ed9f26..ba0d0477ab 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraIndexParams.java +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraIndexParams.java @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ package com.nvidia.cuvs; @@ -25,7 +25,6 @@ public class CagraIndexParams { private final int numWriterThreads; private final CuVSIvfPqParams cuVSIvfPqParams; private final CuVSAceParams cuVSAceParams; - private final CagraCompressionParams cagraCompressionParams; /** * Enum that denotes which ANN algorithm is used to build CAGRA graph. @@ -335,8 +334,7 @@ private CagraIndexParams( int writerThreads, CuvsDistanceType cuvsDistanceType, CuVSIvfPqParams cuVSIvfPqParams, - CuVSAceParams cuVSAceParams, - CagraCompressionParams cagraCompressionParams) { + CuVSAceParams cuVSAceParams) { this.intermediateGraphDegree = intermediateGraphDegree; this.graphDegree = graphDegree; this.cuvsCagraGraphBuildAlgo = CuvsCagraGraphBuildAlgo; @@ -345,7 +343,6 @@ private CagraIndexParams( this.cuvsDistanceType = cuvsDistanceType; this.cuVSIvfPqParams = cuVSIvfPqParams; this.cuVSAceParams = cuVSAceParams; - this.cagraCompressionParams = cagraCompressionParams; } public static CagraIndexParams fromHnswParams( @@ -427,13 +424,6 @@ public CagraGraphBuildAlgo getCuvsCagraGraphBuildAlgo() { return cuvsCagraGraphBuildAlgo; } - /** - * Gets the CAGRA compression parameters. - */ - public CagraCompressionParams getCagraCompressionParams() { - return cagraCompressionParams; - } - @Override public String toString() { return "CagraIndexParams [cuvsCagraGraphBuildAlgo=" @@ -452,8 +442,6 @@ public String toString() { + cuVSIvfPqParams + ", cuVSAceParams=" + cuVSAceParams - + ", cagraCompressionParams=" - + cagraCompressionParams + "]"; } @@ -470,7 +458,6 @@ public static class Builder { private int numWriterThreads = 2; private CuVSIvfPqParams cuVSIvfPqParams = new CuVSIvfPqParams.Builder().build(); private CuVSAceParams cuVSAceParams = new CuVSAceParams.Builder().build(); - private CagraCompressionParams cagraCompressionParams; public Builder() {} @@ -564,18 +551,6 @@ public Builder withCuVSAceParams(CuVSAceParams cuVSAceParams) { return this; } - /** - * Registers an instance of configured {@link CagraCompressionParams} with this - * Builder. - * - * @param cagraCompressionParams An instance of CagraCompressionParams. - * @return An instance of this Builder. - */ - public Builder withCompressionParams(CagraCompressionParams cagraCompressionParams) { - this.cagraCompressionParams = cagraCompressionParams; - return this; - } - /** * Builds an instance of {@link CagraIndexParams}. * @@ -590,8 +565,7 @@ public CagraIndexParams build() { numWriterThreads, cuvsDistanceType, cuVSIvfPqParams, - cuVSAceParams, - cagraCompressionParams); + cuVSAceParams); } } } diff --git a/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/CagraIndexImpl.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/CagraIndexImpl.java index abc53a5945..fd1cf53b53 100644 --- a/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/CagraIndexImpl.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/CagraIndexImpl.java @@ -545,28 +545,6 @@ private static void populateNativeIndexParams( cuvsCagraIndexParams.nn_descent_niter(indexPtr, params.getNNDescentNumIterations()); cuvsCagraIndexParams.metric(indexPtr, params.getCuvsDistanceType().value); - CagraCompressionParams cagraCompressionParams = params.getCagraCompressionParams(); - if (cagraCompressionParams != null) { - var compressionParams = createCagraCompressionParams(); - handles.add(compressionParams); - MemorySegment cuvsCagraCompressionParamsMemorySegment = compressionParams.handle(); - cuvsCagraCompressionParams.pq_bits( - cuvsCagraCompressionParamsMemorySegment, cagraCompressionParams.getPqBits()); - cuvsCagraCompressionParams.pq_dim( - cuvsCagraCompressionParamsMemorySegment, cagraCompressionParams.getPqDim()); - cuvsCagraCompressionParams.vq_n_centers( - cuvsCagraCompressionParamsMemorySegment, cagraCompressionParams.getVqNCenters()); - cuvsCagraCompressionParams.kmeans_n_iters( - cuvsCagraCompressionParamsMemorySegment, cagraCompressionParams.getKmeansNIters()); - cuvsCagraCompressionParams.vq_kmeans_trainset_fraction( - cuvsCagraCompressionParamsMemorySegment, - cagraCompressionParams.getVqKmeansTrainsetFraction()); - cuvsCagraCompressionParams.pq_kmeans_trainset_fraction( - cuvsCagraCompressionParamsMemorySegment, - cagraCompressionParams.getPqKmeansTrainsetFraction()); - cuvsCagraIndexParams.compression(indexPtr, cuvsCagraCompressionParamsMemorySegment); - } - if (params.getCagraGraphBuildAlgo().equals(CagraGraphBuildAlgo.IVF_PQ)) { var ivfPqIndexParams = createIvfPqIndexParams(); diff --git a/python/cuvs/cuvs/neighbors/cagra/__init__.py b/python/cuvs/cuvs/neighbors/cagra/__init__.py index ec70305d72..a96ea2d4dc 100644 --- a/python/cuvs/cuvs/neighbors/cagra/__init__.py +++ b/python/cuvs/cuvs/neighbors/cagra/__init__.py @@ -1,10 +1,9 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from .cagra import ( AceParams, - CompressionParams, ExtendParams, Index, IndexParams, @@ -19,7 +18,6 @@ __all__ = [ "AceParams", - "CompressionParams", "ExtendParams", "Index", "IndexParams", diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pxd b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd index e575ed5360..3a232995df 100644 --- a/python/cuvs/cuvs/neighbors/cagra/cagra.pxd +++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd @@ -42,16 +42,6 @@ cdef extern from "cuvs/neighbors/cagra.h" nogil: ITERATIVE_CAGRA_SEARCH ACE - ctypedef struct cuvsCagraCompressionParams: - uint32_t pq_bits - uint32_t pq_dim - uint32_t vq_n_centers - uint32_t kmeans_n_iters - double vq_kmeans_trainset_fraction - double pq_kmeans_trainset_fraction - - ctypedef cuvsCagraCompressionParams* cuvsCagraCompressionParams_t - ctypedef struct cuvsIvfPqParams: cuvsIvfPqIndexParams_t ivf_pq_build_params cuvsIvfPqSearchParams_t ivf_pq_search_params @@ -73,7 +63,6 @@ cdef extern from "cuvs/neighbors/cagra.h" nogil: size_t graph_degree cuvsCagraGraphBuildAlgo build_algo size_t nn_descent_niter - cuvsCagraCompressionParams_t compression void* graph_build_params ctypedef cuvsCagraIndexParams* cuvsCagraIndexParams_t @@ -115,12 +104,6 @@ cdef extern from "cuvs/neighbors/cagra.h" nogil: ctypedef cuvsCagraIndex* cuvsCagraIndex_t - cuvsError_t cuvsCagraCompressionParamsCreate( - cuvsCagraCompressionParams_t* params) - - cuvsError_t cuvsCagraCompressionParamsDestroy( - cuvsCagraCompressionParams_t index) - cuvsError_t cuvsAceParamsCreate(cuvsAceParams_t* params) cuvsError_t cuvsAceParamsDestroy(cuvsAceParams_t params) @@ -204,7 +187,6 @@ cdef class Index: cdef class IndexParams: cdef cuvsCagraIndexParams* params - cdef public object compression cdef public object ivf_pq_build_params cdef public object ivf_pq_search_params cdef public object ace_params diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx index 8e3bca3ab2..c6db6d538f 100644 --- a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx +++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx @@ -47,83 +47,6 @@ from cuvs.neighbors import ivf_pq from cuvs.neighbors.filters import no_filter -cdef class CompressionParams: - """ - Parameters for VPQ Compression - - Parameters - ---------- - pq_bits: int - The bit length of the vector element after compression by PQ. - Possible values: [4, 5, 6, 7, 8]. The smaller the 'pq_bits', the - smaller the index size and the better the search performance, but - the lower the recall. - pq_dim: int - The dimensionality of the vector after compression by PQ. When zero, - an optimal value is selected using a heuristic. - vq_n_centers: int - Vector Quantization (VQ) codebook size - number of "coarse cluster - centers". When zero, an optimal value is selected using a heuristic. - kmeans_n_iters: int - The number of iterations searching for kmeans centers (both VQ & PQ - phases). - vq_kmeans_trainset_fraction: float - The fraction of data to use during iterative kmeans building (VQ - phase). When zero, an optimal value is selected using a heuristic. - pq_kmeans_trainset_fraction: float - The fraction of data to use during iterative kmeans building (PQ - phase). When zero, an optimal value is selected using a heuristic. - """ - cdef cuvsCagraCompressionParams * params - - def __cinit__(self): - check_cuvs(cuvsCagraCompressionParamsCreate(&self.params)) - - def __dealloc__(self): - check_cuvs(cuvsCagraCompressionParamsDestroy(self.params)) - - def __init__(self, *, - pq_bits=8, - pq_dim=0, - vq_n_centers=0, - kmeans_n_iters=25, - vq_kmeans_trainset_fraction=0.0, - pq_kmeans_trainset_fraction=0.0): - self.params.pq_bits = pq_bits - self.params.pq_dim = pq_dim - self.params.vq_n_centers = vq_n_centers - self.params.kmeans_n_iters = kmeans_n_iters - self.params.vq_kmeans_trainset_fraction = vq_kmeans_trainset_fraction - self.params.pq_kmeans_trainset_fraction = pq_kmeans_trainset_fraction - - @property - def pq_bits(self): - return self.params.pq_bits - - @property - def pq_dim(self): - return self.params.pq_dim - - @property - def vq_n_centers(self): - return self.params.vq_n_centers - - @property - def kmeans_n_iters(self): - return self.params.kmeans_n_iters - - @property - def vq_kmeans_trainset_fraction(self): - return self.params.vq_kmeans_trainset_fraction - - @property - def pq_kmeans_trainset_fraction(self): - return self.params.pq_kmeans_trainset_fraction - - def get_handle(self): - return self.params - - cdef class AceParams: """ Parameters for ACE (Augmented Core Extraction) graph building algorithm. @@ -271,9 +194,6 @@ cdef class IndexParams: - ace will use ACE (Augmented Core Extraction) for building indices for datasets too large to fit in GPU memory - compression: CompressionParams, optional - If compression is desired should be a CompressionParams object. If None - compression will be disabled. ivf_pq_build_params: cuvs.neighbors.ivf_pq.IndexParams, optional Parameters for IVF-PQ algorithm. If provided, it will be used for building the graph. @@ -289,7 +209,6 @@ cdef class IndexParams: def __cinit__(self): check_cuvs(cuvsCagraIndexParamsCreate(&self.params)) - self.compression = None self.ivf_pq_build_params = None self.ivf_pq_search_params = None self.ace_params = None @@ -304,7 +223,6 @@ cdef class IndexParams: graph_degree=64, build_algo="ivf_pq", nn_descent_niter=20, - compression=None, ivf_pq_build_params: ivf_pq.IndexParams = None, ivf_pq_search_params: ivf_pq.SearchParams = None, ace_params: AceParams = None, @@ -329,10 +247,6 @@ cdef class IndexParams: raise ValueError(f"Unknown build_algo '{build_algo}'") self.params.nn_descent_niter = nn_descent_niter - if compression is not None: - self.compression = compression - self.params.compression = \ - compression.get_handle() # Handle graph build params based on build algorithm if build_algo == "ace": diff --git a/python/cuvs/cuvs/tests/test_cagra.py b/python/cuvs/cuvs/tests/test_cagra.py index c0d436951e..6c902ef78e 100644 --- a/python/cuvs/cuvs/tests/test_cagra.py +++ b/python/cuvs/cuvs/tests/test_cagra.py @@ -34,7 +34,6 @@ def run_cagra_build_search_test( inplace=True, test_extend=False, search_params={}, - compression=None, serialize=False, ): dataset = generate_data((n_rows, n_cols), dtype) @@ -49,7 +48,6 @@ def run_cagra_build_search_test( intermediate_graph_degree=intermediate_graph_degree, graph_degree=graph_degree, build_algo=build_algo, - compression=compression, ) if test_extend: @@ -129,27 +127,26 @@ def run_cagra_build_search_test( cp_graph = cp.array(graph) assert cp_graph.shape == (n_rows, graph_degree) - if compression is None: - # make sure we can get the dataset from the cagra index - dataset_from_index = index.dataset + # make sure we can get the dataset from the cagra index + dataset_from_index = index.dataset - dataset_from_index_host = dataset_from_index.copy_to_host() - assert np.allclose(dataset, dataset_from_index_host) + dataset_from_index_host = dataset_from_index.copy_to_host() + assert np.allclose(dataset, dataset_from_index_host) - # make sure we can reconstruct the index from the graph - # Note that we can't actually use the dataset from the index itself - # - since that is a strided matrix (and we expect non-strided inputs - # in the C++ cagra::build api), so we are using the host version - # which will have been copied into a non-strided layout - reloaded_index = cagra.from_graph( - graph, dataset_from_index_host, metric=metric - ) + # make sure we can reconstruct the index from the graph + # Note that we can't actually use the dataset from the index itself + # - since that is a strided matrix (and we expect non-strided inputs + # in the C++ cagra::build api), so we are using the host version + # which will have been copied into a non-strided layout + reloaded_index = cagra.from_graph( + graph, dataset_from_index_host, metric=metric + ) - dist_device, idx_device = cagra.search( - search_params, reloaded_index, queries_device, k - ) - recall = calc_recall(idx_device.copy_to_host(), skl_idx) - assert recall > 0.9 + dist_device, idx_device = cagra.search( + search_params, reloaded_index, queries_device, k + ) + recall = calc_recall(idx_device.copy_to_host(), skl_idx) + assert recall > 0.9 @pytest.mark.parametrize("inplace", [True, False]) @@ -234,14 +231,6 @@ def test_cagra_index_params(params): ) -def test_cagra_vpq_compression(): - dim = 64 - pq_len = 2 - run_cagra_build_search_test( - n_cols=dim, compression=cagra.CompressionParams(pq_dim=dim / pq_len) - ) - - @pytest.mark.parametrize("internal_dtype", [np.float32, np.float16, np.uint8]) def test_cagra_ivf_pq( internal_dtype, diff --git a/python/cuvs/cuvs/tests/test_mg_ivf_flat.py b/python/cuvs/cuvs/tests/test_mg_ivf_flat.py index 99dff4e221..1a94996404 100644 --- a/python/cuvs/cuvs/tests/test_mg_ivf_flat.py +++ b/python/cuvs/cuvs/tests/test_mg_ivf_flat.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # @@ -252,6 +252,10 @@ def test_mg_ivf_flat_metrics(metric): @requires_multiple_gpus +@pytest.mark.skip( + reason="Temporarily disabled: flaky recall on extend-from-empty path vs sklearn " + "(re-enable after stabilizing thresholds, seeding, or extend quality)." +) def test_mg_ivf_flat_extend(): """Test extending multi-GPU IVF-Flat index with new vectors.""" run_mg_ivf_flat_build_search_test( diff --git a/rust/cuvs-sys/src/bindings.rs b/rust/cuvs-sys/src/bindings.rs index 0498b77f3a..e54414063f 100644 --- a/rust/cuvs-sys/src/bindings.rs +++ b/rust/cuvs-sys/src/bindings.rs @@ -1240,14 +1240,12 @@ pub struct cuvsCagraIndexParams { pub build_algo: cuvsCagraGraphBuildAlgo, #[doc = " Number of Iterations to run if building with NN_DESCENT"] pub nn_descent_niter: usize, - #[doc = " Optional: specify compression parameters if compression is desired.\n\n NOTE: this is experimental new API, consider it unsafe."] - pub compression: cuvsCagraCompressionParams_t, #[doc = " Optional: specify graph build params based on build_algo\n - IVF_PQ: cuvsIvfPqParams_t\n - ACE: cuvsAceParams_t\n - Others: nullptr"] pub graph_build_params: *mut ::std::os::raw::c_void, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { - ["Size of cuvsCagraIndexParams"][::std::mem::size_of::() - 56usize]; + ["Size of cuvsCagraIndexParams"][::std::mem::size_of::() - 48usize]; ["Alignment of cuvsCagraIndexParams"][::std::mem::align_of::() - 8usize]; ["Offset of field: cuvsCagraIndexParams::metric"] [::std::mem::offset_of!(cuvsCagraIndexParams, metric) - 0usize]; @@ -1259,10 +1257,8 @@ const _: () = { [::std::mem::offset_of!(cuvsCagraIndexParams, build_algo) - 24usize]; ["Offset of field: cuvsCagraIndexParams::nn_descent_niter"] [::std::mem::offset_of!(cuvsCagraIndexParams, nn_descent_niter) - 32usize]; - ["Offset of field: cuvsCagraIndexParams::compression"] - [::std::mem::offset_of!(cuvsCagraIndexParams, compression) - 40usize]; ["Offset of field: cuvsCagraIndexParams::graph_build_params"] - [::std::mem::offset_of!(cuvsCagraIndexParams, graph_build_params) - 48usize]; + [::std::mem::offset_of!(cuvsCagraIndexParams, graph_build_params) - 40usize]; }; pub type cuvsCagraIndexParams_t = *mut cuvsCagraIndexParams; unsafe extern "C" { diff --git a/rust/cuvs/src/cagra/index.rs b/rust/cuvs/src/cagra/index.rs index d69a4d5033..fcdb43a631 100644 --- a/rust/cuvs/src/cagra/index.rs +++ b/rust/cuvs/src/cagra/index.rs @@ -262,14 +262,6 @@ mod tests { test_cagra(build_params); } - #[test] - fn test_cagra_compression() { - use crate::cagra::CompressionParams; - let build_params = - IndexParams::new().unwrap().set_compression(CompressionParams::new().unwrap()); - test_cagra(build_params); - } - /// Test bitset-filtered search: exclude odd-indexed rows, verify they don't appear. #[test] fn test_cagra_search_with_filter() { diff --git a/rust/cuvs/src/cagra/index_params.rs b/rust/cuvs/src/cagra/index_params.rs index 9425ea060a..7246a48d6d 100644 --- a/rust/cuvs/src/cagra/index_params.rs +++ b/rust/cuvs/src/cagra/index_params.rs @@ -9,80 +9,7 @@ use std::io::{Write, stderr}; pub type BuildAlgo = ffi::cuvsCagraGraphBuildAlgo; -/// Supplemental parameters to build CAGRA Index -pub struct CompressionParams(pub ffi::cuvsCagraCompressionParams_t); - -impl CompressionParams { - /// Returns a new CompressionParams - pub fn new() -> Result { - unsafe { - let mut params = std::mem::MaybeUninit::::uninit(); - check_cuvs(ffi::cuvsCagraCompressionParamsCreate(params.as_mut_ptr()))?; - Ok(CompressionParams(params.assume_init())) - } - } - - /// The bit length of the vector element after compression by PQ. - pub fn set_pq_bits(self, pq_bits: u32) -> CompressionParams { - unsafe { - (*self.0).pq_bits = pq_bits; - } - self - } - - /// The dimensionality of the vector after compression by PQ. When zero, - /// an optimal value is selected using a heuristic. - pub fn set_pq_dim(self, pq_dim: u32) -> CompressionParams { - unsafe { - (*self.0).pq_dim = pq_dim; - } - self - } - - /// Vector Quantization (VQ) codebook size - number of "coarse cluster - /// centers". When zero, an optimal value is selected using a heuristic. - pub fn set_vq_n_centers(self, vq_n_centers: u32) -> CompressionParams { - unsafe { - (*self.0).vq_n_centers = vq_n_centers; - } - self - } - - /// The number of iterations searching for kmeans centers (both VQ & PQ - /// phases). - pub fn set_kmeans_n_iters(self, kmeans_n_iters: u32) -> CompressionParams { - unsafe { - (*self.0).kmeans_n_iters = kmeans_n_iters; - } - self - } - - /// The fraction of data to use during iterative kmeans building (VQ - /// phase). When zero, an optimal value is selected using a heuristic. - pub fn set_vq_kmeans_trainset_fraction( - self, - vq_kmeans_trainset_fraction: f64, - ) -> CompressionParams { - unsafe { - (*self.0).vq_kmeans_trainset_fraction = vq_kmeans_trainset_fraction; - } - self - } - - /// The fraction of data to use during iterative kmeans building (PQ - /// phase). When zero, an optimal value is selected using a heuristic. - pub fn set_pq_kmeans_trainset_fraction( - self, - pq_kmeans_trainset_fraction: f64, - ) -> CompressionParams { - unsafe { - (*self.0).pq_kmeans_trainset_fraction = pq_kmeans_trainset_fraction; - } - self - } -} - -pub struct IndexParams(pub ffi::cuvsCagraIndexParams_t, Option); +pub struct IndexParams(pub ffi::cuvsCagraIndexParams_t); impl IndexParams { /// Returns a new IndexParams @@ -90,7 +17,7 @@ impl IndexParams { unsafe { let mut params = std::mem::MaybeUninit::::uninit(); check_cuvs(ffi::cuvsCagraIndexParamsCreate(params.as_mut_ptr()))?; - Ok(IndexParams(params.assume_init(), None)) + Ok(IndexParams(params.assume_init())) } } @@ -125,16 +52,6 @@ impl IndexParams { } self } - - pub fn set_compression(mut self, compression: CompressionParams) -> IndexParams { - unsafe { - (*self.0).compression = compression.0; - } - // Note: we're moving the ownership of compression here to avoid having it cleaned up - // and leaving a dangling pointer - self.1 = Some(compression); - self - } } impl fmt::Debug for IndexParams { @@ -145,12 +62,6 @@ impl fmt::Debug for IndexParams { } } -impl fmt::Debug for CompressionParams { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "CompressionParams({:?})", unsafe { *self.0 }) - } -} - impl Drop for IndexParams { fn drop(&mut self) { if let Err(e) = check_cuvs(unsafe { ffi::cuvsCagraIndexParamsDestroy(self.0) }) { @@ -160,15 +71,6 @@ impl Drop for IndexParams { } } -impl Drop for CompressionParams { - fn drop(&mut self) { - if let Err(e) = check_cuvs(unsafe { ffi::cuvsCagraCompressionParamsDestroy(self.0) }) { - write!(stderr(), "failed to call cuvsCagraCompressionParamsDestroy {:?}", e) - .expect("failed to write to stderr"); - } - } -} - #[cfg(test)] mod tests { use super::*; @@ -180,8 +82,7 @@ mod tests { .set_intermediate_graph_degree(128) .set_graph_degree(16) .set_build_algo(BuildAlgo::NN_DESCENT) - .set_nn_descent_niter(10) - .set_compression(CompressionParams::new().unwrap().set_pq_bits(4).set_pq_dim(8)); + .set_nn_descent_niter(10); // make sure the setters actually updated internal representation on the c-struct unsafe { @@ -189,8 +90,6 @@ mod tests { assert_eq!((*params.0).intermediate_graph_degree, 128); assert_eq!((*params.0).build_algo, BuildAlgo::NN_DESCENT); assert_eq!((*params.0).nn_descent_niter, 10); - assert_eq!((*(*params.0).compression).pq_dim, 8); - assert_eq!((*(*params.0).compression).pq_bits, 4); } } } diff --git a/rust/cuvs/src/cagra/mod.rs b/rust/cuvs/src/cagra/mod.rs index 9043b17386..209d5201a5 100644 --- a/rust/cuvs/src/cagra/mod.rs +++ b/rust/cuvs/src/cagra/mod.rs @@ -94,5 +94,5 @@ mod index_params; mod search_params; pub use index::Index; -pub use index_params::{BuildAlgo, CompressionParams, IndexParams}; +pub use index_params::{BuildAlgo, IndexParams}; pub use search_params::{HashMode, SearchAlgo, SearchParams};