Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 32 additions & 5 deletions python/cuvs/cuvs/common/cydlpack.pyx
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
#
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
# SPDX-License-Identifier: Apache-2.0
#
# cython: language_level=3

import numpy as np

from cuda.bindings.cyruntime cimport (
cudaError,
cudaError_t,
cudaPointerAttributes,
cudaPointerGetAttributes,
)
from libc cimport stdlib
from libc.stdint cimport uintptr_t

Expand Down Expand Up @@ -57,12 +63,36 @@ def dl_data_type_to_numpy(DLDataType dtype):
raise ValueError(f"unknown DLDataTypeCode.code: {dtype.code}")


cdef int _cuda_pointer_device_id(uintptr_t tensor_ptr) except *:
cdef cudaPointerAttributes attributes
cdef cudaError_t status = cudaPointerGetAttributes(
&attributes, <const void*>tensor_ptr
)
if status != cudaError.cudaSuccess:
raise ValueError(
f"Unable to determine CUDA device for array pointer: {status}"
)
return attributes.device


cdef int _dlpack_device_id_c(ary) except *:
cdef uintptr_t tensor_ptr = <uintptr_t>ary.ai_["data"][0]
if ary.from_cai:
return _cuda_pointer_device_id(tensor_ptr)
return 0


def _dlpack_device_id(ary):
return _dlpack_device_id_c(ary)


cdef DLManagedTensor* dlpack_c(ary):
# todo(dgd): add checking options/parameters
cdef DLDeviceType dev_type
cdef DLDevice dev
cdef DLDataType dtype
cdef DLTensor tensor
cdef uintptr_t tensor_ptr = <uintptr_t>ary.ai_["data"][0]
cdef DLManagedTensor* dlm = \
<DLManagedTensor*>stdlib.malloc(sizeof(DLManagedTensor))

Expand All @@ -72,7 +102,7 @@ cdef DLManagedTensor* dlpack_c(ary):
dev_type = DLDeviceType.kDLCPU

dev.device_type = dev_type
dev.device_id = 0
dev.device_id = _dlpack_device_id_c(ary)

# todo (dgd): change to nice dict
if ary.dtype == np.float32:
Expand Down Expand Up @@ -117,9 +147,6 @@ cdef DLManagedTensor* dlpack_c(ary):
for i in range(ndim):
shape[i] = ary.shape[i]

cdef uintptr_t tensor_ptr
tensor_ptr = <uintptr_t>ary.ai_["data"][0]

tensor.data = <void*> tensor_ptr
tensor.device = dev
tensor.dtype = dtype
Expand Down
29 changes: 28 additions & 1 deletion python/cuvs/cuvs/tests/test_device_tensor_view.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,42 @@
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
# SPDX-License-Identifier: Apache-2.0
#

import cupy as cp
import numpy as np
import pytest

from pylibraft.common.cai_wrapper import wrap_array

from cuvs.common.cydlpack import _dlpack_device_id
from cuvs.common.device_tensor_view import DeviceTensorView
from cuvs.tests.ann_utils import generate_data


def has_multiple_gpus():
try:
return cp.cuda.runtime.getDeviceCount() > 1
except Exception:
return False


requires_multiple_gpus = pytest.mark.skipif(
not has_multiple_gpus(), reason="Multi-GPU tests require multiple GPUs"
)


def test_dlpack_device_id_for_host_array():
ary = np.empty((4,), dtype=np.float32)
assert _dlpack_device_id(wrap_array(ary)) == 0


@requires_multiple_gpus
def test_dlpack_device_id_matches_cuda_array_device():
with cp.cuda.Device(1):
ary = cp.empty((4,), dtype=cp.float32)
assert _dlpack_device_id(wrap_array(ary)) == 1


@pytest.mark.parametrize("dtype", [np.float32, np.int8, np.int32])
def test_device_tensor_view(dtype):
n_rows, n_cols = 1000, 64
Expand Down
Loading