diff --git a/cpp/src/neighbors/detail/cagra/cagra_build.cuh b/cpp/src/neighbors/detail/cagra/cagra_build.cuh index a7c15b4161..98fbfc002c 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_build.cuh @@ -69,6 +69,17 @@ void check_graph_degree(size_t& intermediate_degree, size_t& graph_degree, size_ intermediate_degree); graph_degree = intermediate_degree; } + + const size_t recommended_intermediate = recommended_intermediate_graph_degree(graph_degree); + if (intermediate_degree < recommended_intermediate) { + RAFT_LOG_WARN( + "CAGRA: intermediate_graph_degree (%lu) is below the recommended minimum (%lu, i.e. " + "1.5 * graph_degree). Pruning to graph_degree (%lu) may result in a disconnected graph" + " if the intermediate graph does not retain enough neighborhood information.", + intermediate_degree, + recommended_intermediate, + graph_degree); + } } // ACE: Get partition labels for partitioned approach diff --git a/cpp/src/neighbors/detail/cagra/graph_core.cuh b/cpp/src/neighbors/detail/cagra/graph_core.cuh index 52b4542798..e7e9842d27 100644 --- a/cpp/src/neighbors/detail/cagra/graph_core.cuh +++ b/cpp/src/neighbors/detail/cagra/graph_core.cuh @@ -1676,11 +1676,19 @@ void prune_graph_gpu( raft::copy(res, host_stats.view(), raft::make_const_mdspan(dev_stats.view())); raft::resource::sync_stream(res); + const size_t recommended_intermediate = + recommended_intermediate_graph_degree(output_graph_degree); RAFT_EXPECTS( invalid_neighbor_list == 0, - "Could not generate an intermediate CAGRA graph because the initial kNN graph contains too " - "many invalid or duplicated neighbor nodes. This error can occur, for example, if too many " - "overflows occur during the norm computation between the dataset vectors."); + "CAGRA graph pruning failed: could not select graph_degree (%lu) distinct neighbors for " + "every node from the intermediate kNN graph (intermediate_graph_degree=%lu). This usually " + "means the intermediate graph does not encode enough neighborhood information for pruning — " + "for example when intermediate_graph_degree is too close to graph_degree. Set " + "intermediate_graph_degree >= %lu (1.5 * graph_degree). " + "Other causes include invalid or duplicate neighbor indices in the intermediate kNN graph.", + output_graph_degree, + knn_graph_degree, + recommended_intermediate); num_keep = host_stats.data_handle()[0]; num_full = host_stats.data_handle()[1]; diff --git a/cpp/src/neighbors/detail/cagra/utils.hpp b/cpp/src/neighbors/detail/cagra/utils.hpp index 58bf68bb43..7f0c516db2 100644 --- a/cpp/src/neighbors/detail/cagra/utils.hpp +++ b/cpp/src/neighbors/detail/cagra/utils.hpp @@ -301,4 +301,9 @@ void copy_with_padding( } } +constexpr size_t recommended_intermediate_graph_degree(size_t graph_degree) noexcept +{ + return graph_degree + ((graph_degree + 1) / 2); +} + } // namespace cuvs::neighbors::cagra::detail