On Tue, 4 Jan 2022 01:47:21 +0000
<eagost...@nvidia.com> wrote:

>  static int
> -cuda_mem_alloc(struct rte_gpu *dev, size_t size, void **ptr)
> +cuda_mem_alloc(struct rte_gpu *dev, size_t size, void **ptr, unsigned int 
> align)
>  {
>       CUresult res;
>       const char *err_string;
> @@ -610,8 +612,10 @@ cuda_mem_alloc(struct rte_gpu *dev, size_t size, void 
> **ptr)
>  
>       /* Allocate memory */
>       mem_alloc_list_tail->size = size;
> -     res = pfn_cuMemAlloc(&(mem_alloc_list_tail->ptr_d),
> -                     mem_alloc_list_tail->size);
> +     mem_alloc_list_tail->size_orig = size + align;
> +
> +     res = pfn_cuMemAlloc(&(mem_alloc_list_tail->ptr_orig_d),
> +                     mem_alloc_list_tail->size_orig);
>       if (res != 0) {
>               pfn_cuGetErrorString(res, &(err_string));
>               rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s",
> @@ -620,6 +624,13 @@ cuda_mem_alloc(struct rte_gpu *dev, size_t size, void 
> **ptr)
>               return -rte_errno;
>       }
>  
> +
> +     /* Align memory address */
> +     mem_alloc_list_tail->ptr_d = mem_alloc_list_tail->ptr_orig_d;
> +     if (align && ((uintptr_t)mem_alloc_list_tail->ptr_d) % align)
> +             mem_alloc_list_tail->ptr_d += (align -
> +                             (((uintptr_t)mem_alloc_list_tail->ptr_d) % 
> align));


Posix memalign takes size_t for both size and alignment.

Better to put the input parameters first, and then the resulting output 
parameter last
for consistency; follows the Rusty API design manifesto.

Alignment only makes sense if power of two. The code should check that and 
optimize
for that.

Reply via email to