On 4/29/25 17:24, Arvind Yadav wrote: > Introduce a new `enum dma_fence_unwrap_mode` to support multiple modes > of fence unwrapping and merging behavior. This lays the groundwork for > more flexible and efficient fence operations. > > Add a new macro `dma_fence_unwrap_shrink_array()` to allow callers to > unwrap, deduplicate, and sort dma_fence arrays in-place, avoiding > additional memory allocations. This mode filters out already signaled > fences and keeps only the latest fence per context. > > To support this, refactor `__dma_fence_unwrap_merge()` to handle both > merge and shrink modes through the new `dma_fence_unwrap_mode` parameter. > The sorting and deduplication logic is extracted into a new helper function > `dma_fence_unwrap_dedup_sort()`. > > These changes allow clients to efficiently compact arrays of fences with > minimal overhead, improving memory usage and performance in common > scenarios like command submission or dependency tracking. > > v2: - Export this code from dma-fence-unwrap.c(by Christian). > v3: - To split this in a dma_buf patch and amd userq patch(by Sunil). > - No need to add a new function just re-use existing(by Christian). > > Cc: Alex Deucher <alexander.deuc...@amd.com> > Cc: Christian Koenig <christian.koe...@amd.com> > Cc: Sunil Khatri <sunil.kha...@amd.com> > Cc: Arunpravin Paneer Selvam <arunpravin.paneersel...@amd.com> > Signed-off-by: Arvind Yadav <arvind.ya...@amd.com> > --- > drivers/dma-buf/dma-fence-unwrap.c | 54 +++++++++++++++++++++--------- > include/linux/dma-fence-unwrap.h | 44 ++++++++++++++++++++++-- > 2 files changed, 81 insertions(+), 17 deletions(-) > > diff --git a/drivers/dma-buf/dma-fence-unwrap.c > b/drivers/dma-buf/dma-fence-unwrap.c > index 2a059ac0ed27..f389c37a0eb5 100644 > --- a/drivers/dma-buf/dma-fence-unwrap.c > +++ b/drivers/dma-buf/dma-fence-unwrap.c > @@ -79,15 +79,51 @@ static int fence_cmp(const void *_a, const void *_b) > return 0; > } > > +static void dma_fence_unwrap_dedup_sort(struct dma_fence **array, > + unsigned int *count)
Just call that dma_fence_dedup_array() (e.g. without the unwrap). > +{ > + int i, j; > + > + sort(array, *count, sizeof(*array), fence_cmp, NULL); > + > + /* > + * Only keep the most recent fence for each context. > + */ > + j = 0; > + for (i = 1; i < *count; i++) { > + if (array[i]->context == array[j]->context) > + dma_fence_put(array[i]); > + else > + array[++j] = array[i]; > + } > + > + *count = ++j; > +} Instead of making count a pointer just return the resulting count. > + > /* Implementation for the dma_fence_merge() marco, don't use directly */ > struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, > struct dma_fence **fences, > - struct dma_fence_unwrap *iter) > + struct dma_fence_unwrap *iter, > + enum dma_fence_unwrap_mode mode, > + unsigned int *out_count) > { > struct dma_fence *tmp, *unsignaled = NULL, **array; > struct dma_fence_array *result; > ktime_t timestamp; > - int i, j, count; > + int i, count; > + > + if (mode == DMA_FENCE_UNWRAP_ARRAY) { > + array = fences; > + count = num_fences; > + > + if (count > 1) > + dma_fence_unwrap_dedup_sort(array, &count); > + > + if (out_count) > + *out_count = count; > + > + return NULL; > + } Completely drop that. Just export the dma_fence_dedup_array() function and use that directly. Regards, Christian. > > count = 0; > timestamp = ns_to_ktime(0); > @@ -141,19 +177,7 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int > num_fences, > if (count == 0 || count == 1) > goto return_fastpath; > > - sort(array, count, sizeof(*array), fence_cmp, NULL); > - > - /* > - * Only keep the most recent fence for each context. > - */ > - j = 0; > - for (i = 1; i < count; i++) { > - if (array[i]->context == array[j]->context) > - dma_fence_put(array[i]); > - else > - array[++j] = array[i]; > - } > - count = ++j; > + dma_fence_unwrap_dedup_sort(array, &count); > > if (count > 1) { > result = dma_fence_array_create(count, array, > diff --git a/include/linux/dma-fence-unwrap.h > b/include/linux/dma-fence-unwrap.h > index 66b1e56fbb81..b5683b024548 100644 > --- a/include/linux/dma-fence-unwrap.h > +++ b/include/linux/dma-fence-unwrap.h > @@ -10,6 +10,18 @@ > > struct dma_fence; > > +/** > + * enum dma_fence_unwrap_mode - Mode of operation for fence unwrap and merge > + * @DMA_FENCE_UNWRAP_MERGE: Merge all unsignaled fences into a new dma_fence > object. > + * Returns a single merged fence or stub. > + * @DMA_FENCE_UNWRAP_ARRAY: Deduplicate and sort fences in-place. > + * Returns nothing, but updates the input array and > count. > + */ > +enum dma_fence_unwrap_mode { > + DMA_FENCE_UNWRAP_MERGE, /* Return a single merged dma_fence or NULL on > error */ > + DMA_FENCE_UNWRAP_ARRAY, /* Return deduplicated, sorted in-place array > */ > +}; > + > /** > * struct dma_fence_unwrap - cursor into the container structure > * > @@ -50,7 +62,9 @@ struct dma_fence *dma_fence_unwrap_next(struct > dma_fence_unwrap *cursor); > > struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, > struct dma_fence **fences, > - struct dma_fence_unwrap *cursors); > + struct dma_fence_unwrap *cursors, > + enum dma_fence_unwrap_mode mode, > + unsigned int *out_count); > > /** > * dma_fence_unwrap_merge - unwrap and merge fences > @@ -58,6 +72,9 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int > num_fences, > * All fences given as parameters are unwrapped and merged back together as > flat > * dma_fence_array. Useful if multiple containers need to be merged together. > * > + * Internally uses the DMA_FENCE_UNWRAP_MERGE mode to return a single merged > + * dma_fence (or a stub if all fences are signaled). > + * > * Implemented as a macro to allocate the necessary arrays on the stack and > * account the stack frame size to the caller. > * > @@ -69,7 +86,30 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int > num_fences, > struct dma_fence *__f[] = { __VA_ARGS__ }; \ > struct dma_fence_unwrap __c[ARRAY_SIZE(__f)]; \ > \ > - __dma_fence_unwrap_merge(ARRAY_SIZE(__f), __f, __c); \ > + __dma_fence_unwrap_merge(ARRAY_SIZE(__f), __f, __c, \ > + DMA_FENCE_UNWRAP_MERGE, NULL); \ > + }) > + > +/** > + * dma_fence_unwrap_shrink_array - Deduplicate and sort an array of fences > in-place > + * @ _num_fences: Number of input fences > + * @ _fences: Array of dma_fence pointers to be deduplicated and sorted > + * @ _out_fences: Output variable updated with the number of fences after > deduplication > + * > + * This macro unwraps each fence (handling dma_fence_array if needed), > + * filters out signaled fences, sorts the remaining ones by context and > + * timestamp, and removes duplicates (keeping only the most recent fence per > context). > + * > + * Internally uses the DMA_FENCE_UNWRAP_ARRAY mode to perform in-place > filtering and > + * sorting. > + * > + * The result is stored back in the input array (_fences) and the final count > + * is written to @_out_fences. No memory allocation is performed. > + */ > +#define dma_fence_unwrap_shrink_array(_num_fences, _fences, _out_fences) > \ > + ({ > \ > + __dma_fence_unwrap_merge((_num_fences), (_fences), NULL, > \ > + DMA_FENCE_UNWRAP_ARRAY, &_out_fences); > \ > }) > > #endif