On 4/29/25 17:24, Arvind Yadav wrote:
> Introduce a new `enum dma_fence_unwrap_mode` to support multiple modes
> of fence unwrapping and merging behavior. This lays the groundwork for
> more flexible and efficient fence operations.
> 
> Add a new macro `dma_fence_unwrap_shrink_array()` to allow callers to
> unwrap, deduplicate, and sort dma_fence arrays in-place, avoiding
> additional memory allocations. This mode filters out already signaled
> fences and keeps only the latest fence per context.
> 
> To support this, refactor `__dma_fence_unwrap_merge()` to handle both
> merge and shrink modes through the new `dma_fence_unwrap_mode` parameter.
> The sorting and deduplication logic is extracted into a new helper function
> `dma_fence_unwrap_dedup_sort()`.
> 
> These changes allow clients to efficiently compact arrays of fences with
> minimal overhead, improving memory usage and performance in common
> scenarios like command submission or dependency tracking.
> 
> v2: - Export this code from dma-fence-unwrap.c(by Christian).
> v3: - To split this in a dma_buf patch and amd userq patch(by Sunil).
>     - No need to add a new function just re-use existing(by Christian).
> 
> Cc: Alex Deucher <alexander.deuc...@amd.com>
> Cc: Christian Koenig <christian.koe...@amd.com>
> Cc: Sunil Khatri <sunil.kha...@amd.com>
> Cc: Arunpravin Paneer Selvam <arunpravin.paneersel...@amd.com>
> Signed-off-by: Arvind Yadav <arvind.ya...@amd.com>
> ---
>  drivers/dma-buf/dma-fence-unwrap.c | 54 +++++++++++++++++++++---------
>  include/linux/dma-fence-unwrap.h   | 44 ++++++++++++++++++++++--
>  2 files changed, 81 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/dma-buf/dma-fence-unwrap.c 
> b/drivers/dma-buf/dma-fence-unwrap.c
> index 2a059ac0ed27..f389c37a0eb5 100644
> --- a/drivers/dma-buf/dma-fence-unwrap.c
> +++ b/drivers/dma-buf/dma-fence-unwrap.c
> @@ -79,15 +79,51 @@ static int fence_cmp(const void *_a, const void *_b)
>       return 0;
>  }
>  
> +static void dma_fence_unwrap_dedup_sort(struct dma_fence **array,
> +                                     unsigned int *count)

Just call that dma_fence_dedup_array() (e.g. without the unwrap).

> +{
> +     int i, j;
> +
> +     sort(array, *count, sizeof(*array), fence_cmp, NULL);
> +
> +     /*
> +      * Only keep the most recent fence for each context.
> +      */
> +     j = 0;
> +     for (i = 1; i < *count; i++) {
> +             if (array[i]->context == array[j]->context)
> +                     dma_fence_put(array[i]);
> +             else
> +                     array[++j] = array[i];
> +     }
> +
> +     *count = ++j;
> +}

Instead of making count a pointer just return the resulting count.

> +
>  /* Implementation for the dma_fence_merge() marco, don't use directly */
>  struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
>                                          struct dma_fence **fences,
> -                                        struct dma_fence_unwrap *iter)
> +                                        struct dma_fence_unwrap *iter,
> +                                        enum dma_fence_unwrap_mode mode,
> +                                        unsigned int *out_count)
>  {
>       struct dma_fence *tmp, *unsignaled = NULL, **array;
>       struct dma_fence_array *result;
>       ktime_t timestamp;
> -     int i, j, count;
> +     int i, count;
> +
> +     if (mode == DMA_FENCE_UNWRAP_ARRAY) {
> +             array = fences;
> +             count = num_fences;
> +
> +             if (count > 1)
> +                     dma_fence_unwrap_dedup_sort(array, &count);
> +
> +             if (out_count)
> +                     *out_count = count;
> +
> +             return NULL;
> +     }


Completely drop that. Just export the dma_fence_dedup_array() function and use 
that directly.

Regards,
Christian.

>  
>       count = 0;
>       timestamp = ns_to_ktime(0);
> @@ -141,19 +177,7 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int 
> num_fences,
>       if (count == 0 || count == 1)
>               goto return_fastpath;
>  
> -     sort(array, count, sizeof(*array), fence_cmp, NULL);
> -
> -     /*
> -      * Only keep the most recent fence for each context.
> -      */
> -     j = 0;
> -     for (i = 1; i < count; i++) {
> -             if (array[i]->context == array[j]->context)
> -                     dma_fence_put(array[i]);
> -             else
> -                     array[++j] = array[i];
> -     }
> -     count = ++j;
> +     dma_fence_unwrap_dedup_sort(array, &count);
>  
>       if (count > 1) {
>               result = dma_fence_array_create(count, array,
> diff --git a/include/linux/dma-fence-unwrap.h 
> b/include/linux/dma-fence-unwrap.h
> index 66b1e56fbb81..b5683b024548 100644
> --- a/include/linux/dma-fence-unwrap.h
> +++ b/include/linux/dma-fence-unwrap.h
> @@ -10,6 +10,18 @@
>  
>  struct dma_fence;
>  
> +/**
> + * enum dma_fence_unwrap_mode - Mode of operation for fence unwrap and merge
> + * @DMA_FENCE_UNWRAP_MERGE: Merge all unsignaled fences into a new dma_fence 
> object.
> + *                          Returns a single merged fence or stub.
> + * @DMA_FENCE_UNWRAP_ARRAY: Deduplicate and sort fences in-place.
> + *                          Returns nothing, but updates the input array and 
> count.
> + */
> +enum dma_fence_unwrap_mode {
> +     DMA_FENCE_UNWRAP_MERGE,  /* Return a single merged dma_fence or NULL on 
> error */
> +     DMA_FENCE_UNWRAP_ARRAY,  /* Return deduplicated, sorted in-place array 
> */
> +};
> +
>  /**
>   * struct dma_fence_unwrap - cursor into the container structure
>   *
> @@ -50,7 +62,9 @@ struct dma_fence *dma_fence_unwrap_next(struct 
> dma_fence_unwrap *cursor);
>  
>  struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
>                                          struct dma_fence **fences,
> -                                        struct dma_fence_unwrap *cursors);
> +                                        struct dma_fence_unwrap *cursors,
> +                                        enum dma_fence_unwrap_mode mode,
> +                                        unsigned int *out_count);
>  
>  /**
>   * dma_fence_unwrap_merge - unwrap and merge fences
> @@ -58,6 +72,9 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int 
> num_fences,
>   * All fences given as parameters are unwrapped and merged back together as 
> flat
>   * dma_fence_array. Useful if multiple containers need to be merged together.
>   *
> + * Internally uses the DMA_FENCE_UNWRAP_MERGE mode to return a single merged
> + * dma_fence (or a stub if all fences are signaled).
> + *
>   * Implemented as a macro to allocate the necessary arrays on the stack and
>   * account the stack frame size to the caller.
>   *
> @@ -69,7 +86,30 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int 
> num_fences,
>               struct dma_fence *__f[] = { __VA_ARGS__ };              \
>               struct dma_fence_unwrap __c[ARRAY_SIZE(__f)];           \
>                                                                       \
> -             __dma_fence_unwrap_merge(ARRAY_SIZE(__f), __f, __c);    \
> +             __dma_fence_unwrap_merge(ARRAY_SIZE(__f), __f, __c,     \
> +                                      DMA_FENCE_UNWRAP_MERGE, NULL); \
> +     })
> +
> +/**
> + * dma_fence_unwrap_shrink_array - Deduplicate and sort an array of fences 
> in-place
> + * @ _num_fences: Number of input fences
> + * @ _fences:     Array of dma_fence pointers to be deduplicated and sorted
> + * @ _out_fences: Output variable updated with the number of fences after 
> deduplication
> + *
> + * This macro unwraps each fence (handling dma_fence_array if needed),
> + * filters out signaled fences, sorts the remaining ones by context and
> + * timestamp, and removes duplicates (keeping only the most recent fence per 
> context).
> + *
> + * Internally uses the DMA_FENCE_UNWRAP_ARRAY mode to perform in-place 
> filtering and
> + * sorting.
> + *
> + * The result is stored back in the input array (_fences) and the final count
> + * is written to @_out_fences. No memory allocation is performed.
> + */
> +#define dma_fence_unwrap_shrink_array(_num_fences, _fences, _out_fences)     
> \
> +     ({                                                                      
> \
> +             __dma_fence_unwrap_merge((_num_fences), (_fences), NULL,        
> \
> +                                      DMA_FENCE_UNWRAP_ARRAY, &_out_fences); 
> \
>       })
>  
>  #endif

Reply via email to