libxnvme_cuda.h#
Structs#
xnvme_cuda_queue#
-
struct xnvme_cuda_queue#
GPU NVMe IO helpers.
Host-side queue management and device-side helpers for submitting NVMe commands from CUDA kernels using xNVMe’s NVMe spec types.
Instances live in CUDA device memory. Create one via xnvme_cuda_queue_create() and pass the returned pointer as a kernel argument.
With CUDACC (nvcc compiling CUDA kernels): the full definition is provided. This is required for xnvme_cuda_cmd_io().
Note
This API is experimental and may change without notice. GPU-resident NVMe queue pair
Public Members
-
struct nvme_qpair_cuda qpair#
-
struct nvme_qpair_cuda qpair#
Functions#
xnvme_cuda_cmd_io#
-
static inline int xnvme_cuda_cmd_io(struct xnvme_cuda_queue *qp, struct xnvme_spec_cmd *cmd, size_t tid, size_t batch_size)#
Submit an NVMe command and reap its completion from a CUDA kernel.
Each active thread (tid < batch_size) submits one command and reaps one completion. Inactive threads participate in the required barriers but do not submit or reap.
- Parameters:
qp – GPU queue pair (from xnvme_cuda_queue_create())
cmd – Fully populated NVMe command
tid – Thread index within the block (threadIdx.x)
batch_size – Number of IOs to submit; threads with tid >= batch_size participate in barriers but do not submit or reap
- Returns:
0 on success. -EAGAIN on completion timeout. Positive NVMe status code on device error.
xnvme_cuda_queue_create#
-
int xnvme_cuda_queue_create(struct xnvme_dev *dev, uint16_t depth, struct xnvme_cuda_queue **queue)#
Create a GPU-resident NVMe IO queue.
Allocates a queue pair in CUDA device memory and registers it with the NVMe controller. The resulting pointer lives in device memory and can be passed directly as a CUDA kernel argument.
- Parameters:
dev – An xnvme_dev opened on an
upcie-cudadevicedepth – Number of IO slots in the queue
queue – On success, set to a device pointer to the GPU queue
- Returns:
0 on success, negative error code on failure
xnvme_cuda_queue_destroy#
-
void xnvme_cuda_queue_destroy(struct xnvme_dev *dev, struct xnvme_cuda_queue *queue)#
Destroy a GPU-resident NVMe IO queue.
Deletes the queue pair from the NVMe controller and frees the device memory.
- Parameters:
dev – The xnvme_dev used to create the queue
queue – GPU queue pointer returned by xnvme_cuda_queue_create()
Header#
// SPDX-FileCopyrightText: Samsung Electronics Co., Ltd
//
// SPDX-License-Identifier: BSD-3-Clause
/**
* GPU NVMe IO helpers
*
* Host-side queue management and device-side helpers for submitting NVMe
* commands from CUDA kernels using xNVMe's NVMe spec types.
*
* @note This API is experimental and may change without notice.
*
* @headerfile libxnvme_cuda.h
*/
#ifndef __LIBXNVME_CUDA_H
#define __LIBXNVME_CUDA_H
#include <stdint.h>
#ifdef __CUDACC__
#include <errno.h>
#include <upcie/nvme/nvme_command.h>
#include <upcie/nvme/nvme_qpair_cuda.h>
/**
* GPU-resident NVMe queue pair
*
* Instances live in CUDA device memory. Create one via xnvme_cuda_queue_create()
* and pass the returned pointer as a kernel argument.
*
* With __CUDACC__ (nvcc compiling CUDA kernels):
* the full definition is provided. This is required for xnvme_cuda_cmd_io().
*/
struct xnvme_cuda_queue {
struct nvme_qpair_cuda qpair;
};
#else
/**
* Opaque GPU-resident NVMe queue pair
*
* Instances live in CUDA device memory. Create one via xnvme_cuda_queue_create()
* and pass the returned pointer as a kernel argument.
*
* Without __CUDACC__ (host C/C++ callers):
* only a forward declaration is visible
*/
struct xnvme_cuda_queue;
#endif
#ifdef __cplusplus
extern "C" {
#endif
/**
* Create a GPU-resident NVMe IO queue
*
* Allocates a queue pair in CUDA device memory and registers it with the NVMe
* controller. The resulting pointer lives in device memory and can be passed
* directly as a CUDA kernel argument.
*
* @param dev An xnvme_dev opened on an ``upcie-cuda`` device
* @param depth Number of IO slots in the queue
* @param queue On success, set to a device pointer to the GPU queue
*
* @return 0 on success, negative error code on failure
*/
int
xnvme_cuda_queue_create(struct xnvme_dev *dev, uint16_t depth, struct xnvme_cuda_queue **queue);
/**
* Destroy a GPU-resident NVMe IO queue
*
* Deletes the queue pair from the NVMe controller and frees the device memory.
*
* @param dev The xnvme_dev used to create the queue
* @param queue GPU queue pointer returned by xnvme_cuda_queue_create()
*/
void
xnvme_cuda_queue_destroy(struct xnvme_dev *dev, struct xnvme_cuda_queue *queue);
/**
* Submit an NVMe command and reap its completion from a CUDA kernel
*
* Each active thread (tid < batch_size) submits one command and reaps one
* completion. Inactive threads participate in the required barriers but do
* not submit or reap.
*
* @param qp GPU queue pair (from xnvme_cuda_queue_create())
* @param cmd Fully populated NVMe command
* @param tid Thread index within the block (threadIdx.x)
* @param batch_size Number of IOs to submit; threads with tid >= batch_size
* participate in barriers but do not submit or reap
*
* @return 0 on success. -EAGAIN on completion timeout. Positive NVMe status
* code on device error.
*/
#ifdef __CUDACC__
static inline __device__ int
xnvme_cuda_cmd_io(struct xnvme_cuda_queue *qp, struct xnvme_spec_cmd *cmd, size_t tid,
size_t batch_size)
{
return nvme_qpair_cuda_io(&qp->qpair, (struct nvme_command *)cmd, tid, batch_size);
}
#endif /* __CUDACC__ */
#ifdef __cplusplus
}
#endif
#endif /* __LIBXNVME_CUDA_H */