From: Elena Agostini <eagost...@nvidia.com> Implement aligned GPU memory allocation in GPU CUDA driver.
Signed-off-by: Elena Agostini <eagost...@nvidia.com> --- drivers/gpu/cuda/cuda.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/cuda/cuda.c b/drivers/gpu/cuda/cuda.c index 882df08e56..4ad3f5fc90 100644 --- a/drivers/gpu/cuda/cuda.c +++ b/drivers/gpu/cuda/cuda.c @@ -139,8 +139,10 @@ typedef uintptr_t cuda_ptr_key; /* Single entry of the memory list */ struct mem_entry { CUdeviceptr ptr_d; + CUdeviceptr ptr_orig_d; void *ptr_h; size_t size; + size_t size_orig; struct rte_gpu *dev; CUcontext ctx; cuda_ptr_key pkey; @@ -569,7 +571,7 @@ cuda_dev_info_get(struct rte_gpu *dev, struct rte_gpu_info *info) */ static int -cuda_mem_alloc(struct rte_gpu *dev, size_t size, void **ptr) +cuda_mem_alloc(struct rte_gpu *dev, size_t size, void **ptr, unsigned int align) { CUresult res; const char *err_string; @@ -610,8 +612,10 @@ cuda_mem_alloc(struct rte_gpu *dev, size_t size, void **ptr) /* Allocate memory */ mem_alloc_list_tail->size = size; - res = pfn_cuMemAlloc(&(mem_alloc_list_tail->ptr_d), - mem_alloc_list_tail->size); + mem_alloc_list_tail->size_orig = size + align; + + res = pfn_cuMemAlloc(&(mem_alloc_list_tail->ptr_orig_d), + mem_alloc_list_tail->size_orig); if (res != 0) { pfn_cuGetErrorString(res, &(err_string)); rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", @@ -620,6 +624,13 @@ cuda_mem_alloc(struct rte_gpu *dev, size_t size, void **ptr) return -rte_errno; } + + /* Align memory address */ + mem_alloc_list_tail->ptr_d = mem_alloc_list_tail->ptr_orig_d; + if (align && ((uintptr_t)mem_alloc_list_tail->ptr_d) % align) + mem_alloc_list_tail->ptr_d += (align - + (((uintptr_t)mem_alloc_list_tail->ptr_d) % align)); + /* GPUDirect RDMA attribute required */ res = pfn_cuPointerSetAttribute(&flag, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, @@ -634,7 +645,6 @@ cuda_mem_alloc(struct rte_gpu *dev, size_t size, void **ptr) mem_alloc_list_tail->pkey = get_hash_from_ptr((void *)mem_alloc_list_tail->ptr_d); mem_alloc_list_tail->ptr_h = NULL; - mem_alloc_list_tail->size = size; mem_alloc_list_tail->dev = dev; mem_alloc_list_tail->ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); mem_alloc_list_tail->mtype = GPU_MEM; @@ -761,6 +771,7 @@ cuda_mem_register(struct rte_gpu *dev, size_t size, void *ptr) mem_alloc_list_tail->dev = dev; mem_alloc_list_tail->ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); mem_alloc_list_tail->mtype = CPU_REGISTERED; + mem_alloc_list_tail->ptr_orig_d = mem_alloc_list_tail->ptr_d; /* Restore original ctx as current ctx */ res = pfn_cuCtxSetCurrent(current_ctx); @@ -796,7 +807,7 @@ cuda_mem_free(struct rte_gpu *dev, void *ptr) } if (mem_item->mtype == GPU_MEM) { - res = pfn_cuMemFree(mem_item->ptr_d); + res = pfn_cuMemFree(mem_item->ptr_orig_d); if (res != 0) { pfn_cuGetErrorString(res, &(err_string)); rte_cuda_log(ERR, "cuMemFree current failed with %s", -- 2.17.1