Introduce a new `enum dma_fence_unwrap_mode` to support multiple modes of fence unwrapping and merging behavior. This lays the groundwork for more flexible and efficient fence operations.
Add a new macro `dma_fence_unwrap_shrink_array()` to allow callers to unwrap, deduplicate, and sort dma_fence arrays in-place, avoiding additional memory allocations. This mode filters out already signaled fences and keeps only the latest fence per context. To support this, refactor `__dma_fence_unwrap_merge()` to handle both merge and shrink modes through the new `dma_fence_unwrap_mode` parameter. The sorting and deduplication logic is extracted into a new helper function `dma_fence_unwrap_dedup_sort()`. These changes allow clients to efficiently compact arrays of fences with minimal overhead, improving memory usage and performance in common scenarios like command submission or dependency tracking. v2: - Export this code from dma-fence-unwrap.c(by Christian). v3: - To split this in a dma_buf patch and amd userq patch(by Sunil). - No need to add a new function just re-use existing(by Christian). Cc: Alex Deucher <alexander.deuc...@amd.com> Cc: Christian Koenig <christian.koe...@amd.com> Cc: Sunil Khatri <sunil.kha...@amd.com> Cc: Arunpravin Paneer Selvam <arunpravin.paneersel...@amd.com> Signed-off-by: Arvind Yadav <arvind.ya...@amd.com> --- drivers/dma-buf/dma-fence-unwrap.c | 54 +++++++++++++++++++++--------- include/linux/dma-fence-unwrap.h | 44 ++++++++++++++++++++++-- 2 files changed, 81 insertions(+), 17 deletions(-) diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c index 2a059ac0ed27..f389c37a0eb5 100644 --- a/drivers/dma-buf/dma-fence-unwrap.c +++ b/drivers/dma-buf/dma-fence-unwrap.c @@ -79,15 +79,51 @@ static int fence_cmp(const void *_a, const void *_b) return 0; } +static void dma_fence_unwrap_dedup_sort(struct dma_fence **array, + unsigned int *count) +{ + int i, j; + + sort(array, *count, sizeof(*array), fence_cmp, NULL); + + /* + * Only keep the most recent fence for each context. + */ + j = 0; + for (i = 1; i < *count; i++) { + if (array[i]->context == array[j]->context) + dma_fence_put(array[i]); + else + array[++j] = array[i]; + } + + *count = ++j; +} + /* Implementation for the dma_fence_merge() marco, don't use directly */ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, struct dma_fence **fences, - struct dma_fence_unwrap *iter) + struct dma_fence_unwrap *iter, + enum dma_fence_unwrap_mode mode, + unsigned int *out_count) { struct dma_fence *tmp, *unsignaled = NULL, **array; struct dma_fence_array *result; ktime_t timestamp; - int i, j, count; + int i, count; + + if (mode == DMA_FENCE_UNWRAP_ARRAY) { + array = fences; + count = num_fences; + + if (count > 1) + dma_fence_unwrap_dedup_sort(array, &count); + + if (out_count) + *out_count = count; + + return NULL; + } count = 0; timestamp = ns_to_ktime(0); @@ -141,19 +177,7 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, if (count == 0 || count == 1) goto return_fastpath; - sort(array, count, sizeof(*array), fence_cmp, NULL); - - /* - * Only keep the most recent fence for each context. - */ - j = 0; - for (i = 1; i < count; i++) { - if (array[i]->context == array[j]->context) - dma_fence_put(array[i]); - else - array[++j] = array[i]; - } - count = ++j; + dma_fence_unwrap_dedup_sort(array, &count); if (count > 1) { result = dma_fence_array_create(count, array, diff --git a/include/linux/dma-fence-unwrap.h b/include/linux/dma-fence-unwrap.h index 66b1e56fbb81..b5683b024548 100644 --- a/include/linux/dma-fence-unwrap.h +++ b/include/linux/dma-fence-unwrap.h @@ -10,6 +10,18 @@ struct dma_fence; +/** + * enum dma_fence_unwrap_mode - Mode of operation for fence unwrap and merge + * @DMA_FENCE_UNWRAP_MERGE: Merge all unsignaled fences into a new dma_fence object. + * Returns a single merged fence or stub. + * @DMA_FENCE_UNWRAP_ARRAY: Deduplicate and sort fences in-place. + * Returns nothing, but updates the input array and count. + */ +enum dma_fence_unwrap_mode { + DMA_FENCE_UNWRAP_MERGE, /* Return a single merged dma_fence or NULL on error */ + DMA_FENCE_UNWRAP_ARRAY, /* Return deduplicated, sorted in-place array */ +}; + /** * struct dma_fence_unwrap - cursor into the container structure * @@ -50,7 +62,9 @@ struct dma_fence *dma_fence_unwrap_next(struct dma_fence_unwrap *cursor); struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, struct dma_fence **fences, - struct dma_fence_unwrap *cursors); + struct dma_fence_unwrap *cursors, + enum dma_fence_unwrap_mode mode, + unsigned int *out_count); /** * dma_fence_unwrap_merge - unwrap and merge fences @@ -58,6 +72,9 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, * All fences given as parameters are unwrapped and merged back together as flat * dma_fence_array. Useful if multiple containers need to be merged together. * + * Internally uses the DMA_FENCE_UNWRAP_MERGE mode to return a single merged + * dma_fence (or a stub if all fences are signaled). + * * Implemented as a macro to allocate the necessary arrays on the stack and * account the stack frame size to the caller. * @@ -69,7 +86,30 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, struct dma_fence *__f[] = { __VA_ARGS__ }; \ struct dma_fence_unwrap __c[ARRAY_SIZE(__f)]; \ \ - __dma_fence_unwrap_merge(ARRAY_SIZE(__f), __f, __c); \ + __dma_fence_unwrap_merge(ARRAY_SIZE(__f), __f, __c, \ + DMA_FENCE_UNWRAP_MERGE, NULL); \ + }) + +/** + * dma_fence_unwrap_shrink_array - Deduplicate and sort an array of fences in-place + * @ _num_fences: Number of input fences + * @ _fences: Array of dma_fence pointers to be deduplicated and sorted + * @ _out_fences: Output variable updated with the number of fences after deduplication + * + * This macro unwraps each fence (handling dma_fence_array if needed), + * filters out signaled fences, sorts the remaining ones by context and + * timestamp, and removes duplicates (keeping only the most recent fence per context). + * + * Internally uses the DMA_FENCE_UNWRAP_ARRAY mode to perform in-place filtering and + * sorting. + * + * The result is stored back in the input array (_fences) and the final count + * is written to @_out_fences. No memory allocation is performed. + */ +#define dma_fence_unwrap_shrink_array(_num_fences, _fences, _out_fences) \ + ({ \ + __dma_fence_unwrap_merge((_num_fences), (_fences), NULL, \ + DMA_FENCE_UNWRAP_ARRAY, &_out_fences); \ }) #endif -- 2.34.1