On Tue, 4 Jan 2022 01:47:21 +0000 <eagost...@nvidia.com> wrote: > static int > -cuda_mem_alloc(struct rte_gpu *dev, size_t size, void **ptr) > +cuda_mem_alloc(struct rte_gpu *dev, size_t size, void **ptr, unsigned int > align) > { > CUresult res; > const char *err_string; > @@ -610,8 +612,10 @@ cuda_mem_alloc(struct rte_gpu *dev, size_t size, void > **ptr) > > /* Allocate memory */ > mem_alloc_list_tail->size = size; > - res = pfn_cuMemAlloc(&(mem_alloc_list_tail->ptr_d), > - mem_alloc_list_tail->size); > + mem_alloc_list_tail->size_orig = size + align; > + > + res = pfn_cuMemAlloc(&(mem_alloc_list_tail->ptr_orig_d), > + mem_alloc_list_tail->size_orig); > if (res != 0) { > pfn_cuGetErrorString(res, &(err_string)); > rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", > @@ -620,6 +624,13 @@ cuda_mem_alloc(struct rte_gpu *dev, size_t size, void > **ptr) > return -rte_errno; > } > > + > + /* Align memory address */ > + mem_alloc_list_tail->ptr_d = mem_alloc_list_tail->ptr_orig_d; > + if (align && ((uintptr_t)mem_alloc_list_tail->ptr_d) % align) > + mem_alloc_list_tail->ptr_d += (align - > + (((uintptr_t)mem_alloc_list_tail->ptr_d) % > align));
Posix memalign takes size_t for both size and alignment. Better to put the input parameters first, and then the resulting output parameter last for consistency; follows the Rusty API design manifesto. Alignment only makes sense if power of two. The code should check that and optimize for that.