Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion c/include/cuvs/core/c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ CUVS_EXPORT cuvsError_t cuvsMatrixCopy(cuvsResources_t res, DLManagedTensor* src
* @param[in] res cuvsResources_t opaque C handle
* @param[in] src Pointer to DLManagedTensor to copy
* @param[in] start First row index to include in the output
* @param[in] end Last row index to include in the output
* @param[in] end One past the last row index to include in the output
* @param[out] dst Pointer to DLManagedTensor to receive slice from matrix
*/
CUVS_EXPORT cuvsError_t cuvsMatrixSliceRows(
Expand Down
7 changes: 4 additions & 3 deletions c/include/cuvs/distance/pairwise_distance.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,10 @@ extern "C" {
* @endcode
*
* @param[in] res cuvs resources object for managing expensive resources
* @param[in] x first set of points (size n*k)
* @param[in] y second set of points (size m*k)
* @param[out] dist output distance matrix (size n*m)
* @param[in] x first set of points (size n*k). Must have the same floating point dtype as `y`
* @param[in] y second set of points (size m*k). Must have the same floating point dtype as `x`
* @param[out] dist output distance matrix (size n*m). Must be float32 for float16 inputs, and
* match the input dtype otherwise
* @param[in] metric distance to evaluate
* @param[in] metric_arg metric argument (used for Minkowski distance)
*/
Expand Down
46 changes: 32 additions & 14 deletions c/src/core/c_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -339,33 +339,51 @@ extern "C" cuvsError_t cuvsMatrixSliceRows(cuvsResources_t res,
DLManagedTensor* dst_managed)
{
return cuvs::core::translate_exceptions([=] {
RAFT_EXPECTS(end >= start, "end index must be greater than start index");
RAFT_EXPECTS(dst_managed != nullptr, "dst tensor should be initialized");

dst_managed->dl_tensor = DLTensor{};
dst_managed->manager_ctx = nullptr;
dst_managed->deleter = nullptr;

RAFT_EXPECTS(src_managed != nullptr, "src tensor should be initialized");

DLTensor& src = src_managed->dl_tensor;
DLTensor& dst = dst_managed->dl_tensor;
RAFT_EXPECTS(src.ndim <= 2, "src should be a 1 or 2 dimensional tensor");
RAFT_EXPECTS(src.ndim == 1 || src.ndim == 2, "src should be a 1 or 2 dimensional tensor");
RAFT_EXPECTS(src.shape != nullptr, "shape should be initialized in the src tensor");
RAFT_EXPECTS(src.data != nullptr, "data should be initialized in the src tensor");
RAFT_EXPECTS(start >= 0 && end >= start && end <= src.shape[0],
"row slice range must satisfy 0 <= start <= end <= src.shape[0]");

dst.dtype = src.dtype;
dst.device = src.device;
dst.ndim = src.ndim;
dst.shape = new int64_t[dst.ndim];
dst.shape[0] = end - start;
auto shape = std::make_unique<int64_t[]>(src.ndim);
std::unique_ptr<int64_t[]> strides;
shape[0] = end - start;

int64_t row_strides = 1;

if (dst.ndim == 2) {
dst.shape[1] = src.shape[1];
row_strides = dst.shape[1];
if (src.ndim == 1 && src.strides) {
strides = std::make_unique<int64_t[]>(1);
row_strides = strides[0] = src.strides[0];
}

if (src.ndim == 2) {
shape[1] = src.shape[1];
row_strides = shape[1];

if (src.strides) {
dst.strides = new int64_t[2];
row_strides = dst.strides[0] = src.strides[0];
dst.strides[1] = src.strides[1];
strides = std::make_unique<int64_t[]>(2);
row_strides = strides[0] = src.strides[0];
strides[1] = src.strides[1];
}
}

dst.data = static_cast<char*>(src.data) + start * row_strides * (dst.dtype.bits / 8);
dst.dtype = src.dtype;
dst.device = src.device;
dst.ndim = src.ndim;
dst.shape = shape.release();
dst.strides = strides.release();
dst.byte_offset = src.byte_offset;
dst.data = static_cast<char*>(src.data) + start * row_strides * (dst.dtype.bits / 8);
dst_managed->deleter = cuvsMatrixDestroy;
});
}
19 changes: 15 additions & 4 deletions c/src/distance/pairwise_distance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,26 @@ extern "C" cuvsError_t cuvsPairwiseDistance(cuvsResources_t res,
{
return cuvs::core::translate_exceptions([=] {
auto x_dt = x_tensor->dl_tensor.dtype;
auto y_dt = x_tensor->dl_tensor.dtype;
auto dist_dt = x_tensor->dl_tensor.dtype;
auto y_dt = y_tensor->dl_tensor.dtype;
auto dist_dt = distances_tensor->dl_tensor.dtype;

if ((x_dt.code != kDLFloat) || (y_dt.code != kDLFloat) || (dist_dt.code != kDLFloat)) {
RAFT_FAIL("Inputs to cuvsPairwiseDistance must all be floating point tensors");
}

if ((x_dt.bits != y_dt.bits) || (x_dt.bits != dist_dt.bits)) {
RAFT_FAIL("Inputs to cuvsPairwiseDistance must all have the same dtype");
if (x_dt.lanes != 1 || y_dt.lanes != 1 || dist_dt.lanes != 1) {
RAFT_FAIL("Inputs to cuvsPairwiseDistance must all have a single dtype lane");
}

if (x_dt.bits != y_dt.bits) {
RAFT_FAIL("X and Y inputs to cuvsPairwiseDistance must have the same dtype");
}

auto expected_dist_bits = x_dt.bits == 16 ? 32 : x_dt.bits;
if (dist_dt.bits != expected_dist_bits) {
RAFT_FAIL(
"distances output to cuvsPairwiseDistance must have dtype float32 for float16 inputs "
"and match the input dtype otherwise");
}

bool x_row_major;
Expand Down
71 changes: 70 additions & 1 deletion c/tests/core/c_api.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

Expand All @@ -9,6 +9,73 @@
#include <stdio.h>
#include <stdlib.h>

static void expect_matrix_slice_error(cuvsResources_t res,
DLManagedTensor* src,
int64_t start,
int64_t end)
{
int64_t sentinel_stride = 0;
DLManagedTensor dst = {0};
dst.dl_tensor.strides = &sentinel_stride;

if (cuvsMatrixSliceRows(res, src, start, end, &dst) != CUVS_ERROR) { exit(EXIT_FAILURE); }
if (dst.dl_tensor.shape != NULL || dst.dl_tensor.strides != NULL || dst.deleter != NULL) {
exit(EXIT_FAILURE);
}
}

static void test_matrix_slice_rows(cuvsResources_t res)
{
int32_t data[] = {0, 1, 2, 3, 4, 5};
int64_t shape_2d[] = {3, 2};
int64_t sentinel_stride = 0;
DLManagedTensor src_2d = {0};
src_2d.dl_tensor.data = data;
src_2d.dl_tensor.device = (DLDevice){kDLCPU, 0};
src_2d.dl_tensor.ndim = 2;
src_2d.dl_tensor.dtype = (DLDataType){kDLInt, 32, 1};
src_2d.dl_tensor.shape = shape_2d;
src_2d.dl_tensor.byte_offset = 0;

DLManagedTensor dst_2d = {0};
dst_2d.dl_tensor.strides = &sentinel_stride;
if (cuvsMatrixSliceRows(res, &src_2d, 1, 3, &dst_2d) != CUVS_SUCCESS) {
exit(EXIT_FAILURE);
}
if (dst_2d.dl_tensor.ndim != 2 || dst_2d.dl_tensor.shape[0] != 2 ||
dst_2d.dl_tensor.shape[1] != 2 || dst_2d.dl_tensor.data != (void*)(data + 2) ||
dst_2d.dl_tensor.strides != NULL || dst_2d.deleter == NULL) {
exit(EXIT_FAILURE);
}
dst_2d.deleter(&dst_2d);

int64_t shape_1d[] = {6};
DLManagedTensor src_1d = {0};
src_1d.dl_tensor.data = data;
src_1d.dl_tensor.device = (DLDevice){kDLCPU, 0};
src_1d.dl_tensor.ndim = 1;
src_1d.dl_tensor.dtype = (DLDataType){kDLInt, 32, 1};
src_1d.dl_tensor.shape = shape_1d;

DLManagedTensor dst_1d = {0};
if (cuvsMatrixSliceRows(res, &src_1d, 1, 4, &dst_1d) != CUVS_SUCCESS) {
exit(EXIT_FAILURE);
}
if (dst_1d.dl_tensor.ndim != 1 || dst_1d.dl_tensor.shape[0] != 3 ||
dst_1d.dl_tensor.data != (void*)(data + 1) || dst_1d.dl_tensor.strides != NULL ||
dst_1d.deleter == NULL) {
exit(EXIT_FAILURE);
}
dst_1d.deleter(&dst_1d);

expect_matrix_slice_error(res, &src_2d, -1, 1);
expect_matrix_slice_error(res, &src_2d, 0, 4);

DLManagedTensor src_0d = src_2d;
src_0d.dl_tensor.ndim = 0;
expect_matrix_slice_error(res, &src_0d, 0, 0);
}

int main()
{
// Create resources
Expand Down Expand Up @@ -73,6 +140,8 @@ int main()
cuvsError_t free_error_pinned = cuvsRMMHostFree(ptr3, 1024);
if (free_error_pinned == CUVS_ERROR) { exit(EXIT_FAILURE); }

test_matrix_slice_rows(res);

// Destroy resources
error = cuvsResourcesDestroy(res);
if (error == CUVS_ERROR) { exit(EXIT_FAILURE); }
Expand Down
129 changes: 129 additions & 0 deletions c/tests/distance/pairwise_distance_c.cu
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,20 @@
*/

#include <cuda.h>
#include <cuda_fp16.h>

#include <gtest/gtest.h>
#include <raft/core/error.hpp>
#include <raft/core/device_mdarray.hpp>
#include <raft/core/handle.hpp>
#include <raft/random/rng.cuh>

#include <cuvs/core/c_api.h>
#include <cuvs/distance/distance.h>
#include <cuvs/distance/pairwise_distance.h>

#include <string>
#include <string_view>

extern "C" void run_pairwise_distance(int64_t n_rows,
int64_t n_queries,
Expand All @@ -28,6 +35,69 @@ void generate_random_data(T* devPtr, size_t size)
raft::random::uniform(handle, r, devPtr, size, T(0.1), T(2.0));
};

namespace {

struct DeviceMatrixTensor {
DLManagedTensor tensor{};
int64_t shape[2]{};

DeviceMatrixTensor(void* data, int64_t rows, int64_t cols, DLDataType dtype)
{
shape[0] = rows;
shape[1] = cols;
tensor.dl_tensor.data = data;
tensor.dl_tensor.device = DLDevice{kDLCUDA, 0};
tensor.dl_tensor.ndim = 2;
tensor.dl_tensor.dtype = dtype;
tensor.dl_tensor.shape = shape;
tensor.dl_tensor.strides = nullptr;
tensor.dl_tensor.byte_offset = 0;
}
};

DLDataType float_dtype(uint8_t bits) { return DLDataType{kDLFloat, bits, 1}; }

void expect_pairwise_distance_error_contains(DLDataType x_dtype,
DLDataType y_dtype,
DLDataType distances_dtype,
std::string_view expected_error)
{
cuvsResources_t res;
ASSERT_EQ(cuvsResourcesCreate(&res), CUVS_SUCCESS);

void *x_data, *y_data, *distances_data;
RAFT_CUDA_TRY(cudaMalloc(&x_data, 2 * 3 * sizeof(double)));
RAFT_CUDA_TRY(cudaMalloc(&y_data, 4 * 3 * sizeof(double)));
RAFT_CUDA_TRY(cudaMalloc(&distances_data, 2 * 4 * sizeof(double)));

DeviceMatrixTensor x_tensor{x_data, 2, 3, x_dtype};
DeviceMatrixTensor y_tensor{y_data, 4, 3, y_dtype};
DeviceMatrixTensor distances_tensor{distances_data, 2, 4, distances_dtype};

auto status = cuvsPairwiseDistance(res,
&x_tensor.tensor,
&y_tensor.tensor,
&distances_tensor.tensor,
L2Expanded,
2.0f);
EXPECT_EQ(status, CUVS_ERROR);
if (status == CUVS_ERROR) {
const char* error_text = cuvsGetLastErrorText();
if (error_text == nullptr) {
ADD_FAILURE() << "Expected cuvsPairwiseDistance to set an error message";
} else {
EXPECT_NE(std::string{error_text}.find(expected_error), std::string::npos) << error_text;
}
}

RAFT_CUDA_TRY(cudaFree(x_data));
RAFT_CUDA_TRY(cudaFree(y_data));
RAFT_CUDA_TRY(cudaFree(distances_data));
ASSERT_EQ(cuvsResourcesDestroy(res), CUVS_SUCCESS);
}

} // namespace

TEST(PairwiseDistanceC, Distance)
{
int64_t n_rows = 8096;
Expand All @@ -51,3 +121,62 @@ TEST(PairwiseDistanceC, Distance)
cudaFree(query_data);
cudaFree(distances_data);
}

TEST(PairwiseDistanceC, FailsWithMismatchedInputDtypes)
{
expect_pairwise_distance_error_contains(float_dtype(32),
float_dtype(64),
float_dtype(32),
"X and Y inputs to cuvsPairwiseDistance must have the "
"same dtype");
}

TEST(PairwiseDistanceC, FailsWithMismatchedFloatOutputDtype)
{
expect_pairwise_distance_error_contains(
float_dtype(32),
float_dtype(32),
float_dtype(64),
"distances output to cuvsPairwiseDistance must have dtype float32 for float16 inputs");
}

TEST(PairwiseDistanceC, FailsWithFloat16OutputForFloat16Inputs)
{
expect_pairwise_distance_error_contains(
float_dtype(16),
float_dtype(16),
float_dtype(16),
"distances output to cuvsPairwiseDistance must have dtype float32 for float16 inputs");
}

TEST(PairwiseDistanceC, AllowsFloat32OutputForFloat16Inputs)
{
cuvsResources_t res;
ASSERT_EQ(cuvsResourcesCreate(&res), CUVS_SUCCESS);

constexpr int64_t n_rows = 2;
constexpr int64_t n_queries = 3;
constexpr int64_t n_dim = 4;

half *x_data, *y_data;
float* distances_data;
RAFT_CUDA_TRY(cudaMalloc(&x_data, sizeof(half) * n_rows * n_dim));
RAFT_CUDA_TRY(cudaMalloc(&y_data, sizeof(half) * n_queries * n_dim));
RAFT_CUDA_TRY(cudaMalloc(&distances_data, sizeof(float) * n_rows * n_queries));
RAFT_CUDA_TRY(cudaMemset(x_data, 0, sizeof(half) * n_rows * n_dim));
RAFT_CUDA_TRY(cudaMemset(y_data, 0, sizeof(half) * n_queries * n_dim));

DeviceMatrixTensor x_tensor{x_data, n_rows, n_dim, float_dtype(16)};
DeviceMatrixTensor y_tensor{y_data, n_queries, n_dim, float_dtype(16)};
DeviceMatrixTensor distances_tensor{distances_data, n_rows, n_queries, float_dtype(32)};

auto status = cuvsPairwiseDistance(
res, &x_tensor.tensor, &y_tensor.tensor, &distances_tensor.tensor, L2Expanded, 2.0f);
EXPECT_EQ(status, CUVS_SUCCESS) << (cuvsGetLastErrorText() ? cuvsGetLastErrorText() : "");
if (status == CUVS_SUCCESS) { EXPECT_EQ(cuvsStreamSync(res), CUVS_SUCCESS); }

RAFT_CUDA_TRY(cudaFree(x_data));
RAFT_CUDA_TRY(cudaFree(y_data));
RAFT_CUDA_TRY(cudaFree(distances_data));
ASSERT_EQ(cuvsResourcesDestroy(res), CUVS_SUCCESS);
}
9 changes: 5 additions & 4 deletions rust/cuvs/examples/cagra.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ fn cagra_example() -> Result<()> {

// build the cagra index
let build_params = IndexParams::new()?;
let index = Index::build(&res, &build_params, &dataset)?;
let dataset_device = ManagedTensor::from_ndarray(&dataset)?.to_device(&res)?;
let index = Index::build(&res, &build_params, dataset_device)?;
println!("Indexed {}x{} datapoints into cagra index", n_datapoints, n_features);

// use the first 4 points from the dataset as queries : will test that we get them back
Expand All @@ -35,12 +36,12 @@ fn cagra_example() -> Result<()> {
// CAGRA search API requires queries and outputs to be on device memory
// copy query data over, and allocate new device memory for the distances/ neighbors
// outputs
let queries = ManagedTensor::from(&queries).to_device(&res)?;
let queries = ManagedTensor::from_ndarray(&queries)?.to_device(&res)?;
let mut neighbors_host = ndarray::Array::<u32, _>::zeros((n_queries, k));
let neighbors = ManagedTensor::from(&neighbors_host).to_device(&res)?;
let neighbors = ManagedTensor::from_ndarray(&neighbors_host)?.to_device(&res)?;

let mut distances_host = ndarray::Array::<f32, _>::zeros((n_queries, k));
let distances = ManagedTensor::from(&distances_host).to_device(&res)?;
let distances = ManagedTensor::from_ndarray(&distances_host)?.to_device(&res)?;

let search_params = SearchParams::new()?;

Expand Down
Loading
Loading