Introduce a new `enum dma_fence_unwrap_mode` to support multiple modes
of fence unwrapping and merging behavior. This lays the groundwork for
more flexible and efficient fence operations.

Add a new macro `dma_fence_unwrap_shrink_array()` to allow callers to
unwrap, deduplicate, and sort dma_fence arrays in-place, avoiding
additional memory allocations. This mode filters out already signaled
fences and keeps only the latest fence per context.

To support this, refactor `__dma_fence_unwrap_merge()` to handle both
merge and shrink modes through the new `dma_fence_unwrap_mode` parameter.
The sorting and deduplication logic is extracted into a new helper function
`dma_fence_unwrap_dedup_sort()`.

These changes allow clients to efficiently compact arrays of fences with
minimal overhead, improving memory usage and performance in common
scenarios like command submission or dependency tracking.

v2: - Export this code from dma-fence-unwrap.c(by Christian).
v3: - To split this in a dma_buf patch and amd userq patch(by Sunil).
    - No need to add a new function just re-use existing(by Christian).

Cc: Alex Deucher <alexander.deuc...@amd.com>
Cc: Christian Koenig <christian.koe...@amd.com>
Cc: Sunil Khatri <sunil.kha...@amd.com>
Cc: Arunpravin Paneer Selvam <arunpravin.paneersel...@amd.com>
Signed-off-by: Arvind Yadav <arvind.ya...@amd.com>
---
 drivers/dma-buf/dma-fence-unwrap.c | 54 +++++++++++++++++++++---------
 include/linux/dma-fence-unwrap.h   | 44 ++++++++++++++++++++++--
 2 files changed, 81 insertions(+), 17 deletions(-)

diff --git a/drivers/dma-buf/dma-fence-unwrap.c 
b/drivers/dma-buf/dma-fence-unwrap.c
index 2a059ac0ed27..f389c37a0eb5 100644
--- a/drivers/dma-buf/dma-fence-unwrap.c
+++ b/drivers/dma-buf/dma-fence-unwrap.c
@@ -79,15 +79,51 @@ static int fence_cmp(const void *_a, const void *_b)
        return 0;
 }
 
+static void dma_fence_unwrap_dedup_sort(struct dma_fence **array,
+                                       unsigned int *count)
+{
+       int i, j;
+
+       sort(array, *count, sizeof(*array), fence_cmp, NULL);
+
+       /*
+        * Only keep the most recent fence for each context.
+        */
+       j = 0;
+       for (i = 1; i < *count; i++) {
+               if (array[i]->context == array[j]->context)
+                       dma_fence_put(array[i]);
+               else
+                       array[++j] = array[i];
+       }
+
+       *count = ++j;
+}
+
 /* Implementation for the dma_fence_merge() marco, don't use directly */
 struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
                                           struct dma_fence **fences,
-                                          struct dma_fence_unwrap *iter)
+                                          struct dma_fence_unwrap *iter,
+                                          enum dma_fence_unwrap_mode mode,
+                                          unsigned int *out_count)
 {
        struct dma_fence *tmp, *unsignaled = NULL, **array;
        struct dma_fence_array *result;
        ktime_t timestamp;
-       int i, j, count;
+       int i, count;
+
+       if (mode == DMA_FENCE_UNWRAP_ARRAY) {
+               array = fences;
+               count = num_fences;
+
+               if (count > 1)
+                       dma_fence_unwrap_dedup_sort(array, &count);
+
+               if (out_count)
+                       *out_count = count;
+
+               return NULL;
+       }
 
        count = 0;
        timestamp = ns_to_ktime(0);
@@ -141,19 +177,7 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int 
num_fences,
        if (count == 0 || count == 1)
                goto return_fastpath;
 
-       sort(array, count, sizeof(*array), fence_cmp, NULL);
-
-       /*
-        * Only keep the most recent fence for each context.
-        */
-       j = 0;
-       for (i = 1; i < count; i++) {
-               if (array[i]->context == array[j]->context)
-                       dma_fence_put(array[i]);
-               else
-                       array[++j] = array[i];
-       }
-       count = ++j;
+       dma_fence_unwrap_dedup_sort(array, &count);
 
        if (count > 1) {
                result = dma_fence_array_create(count, array,
diff --git a/include/linux/dma-fence-unwrap.h b/include/linux/dma-fence-unwrap.h
index 66b1e56fbb81..b5683b024548 100644
--- a/include/linux/dma-fence-unwrap.h
+++ b/include/linux/dma-fence-unwrap.h
@@ -10,6 +10,18 @@
 
 struct dma_fence;
 
+/**
+ * enum dma_fence_unwrap_mode - Mode of operation for fence unwrap and merge
+ * @DMA_FENCE_UNWRAP_MERGE: Merge all unsignaled fences into a new dma_fence 
object.
+ *                          Returns a single merged fence or stub.
+ * @DMA_FENCE_UNWRAP_ARRAY: Deduplicate and sort fences in-place.
+ *                          Returns nothing, but updates the input array and 
count.
+ */
+enum dma_fence_unwrap_mode {
+       DMA_FENCE_UNWRAP_MERGE,  /* Return a single merged dma_fence or NULL on 
error */
+       DMA_FENCE_UNWRAP_ARRAY,  /* Return deduplicated, sorted in-place array 
*/
+};
+
 /**
  * struct dma_fence_unwrap - cursor into the container structure
  *
@@ -50,7 +62,9 @@ struct dma_fence *dma_fence_unwrap_next(struct 
dma_fence_unwrap *cursor);
 
 struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
                                           struct dma_fence **fences,
-                                          struct dma_fence_unwrap *cursors);
+                                          struct dma_fence_unwrap *cursors,
+                                          enum dma_fence_unwrap_mode mode,
+                                          unsigned int *out_count);
 
 /**
  * dma_fence_unwrap_merge - unwrap and merge fences
@@ -58,6 +72,9 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int 
num_fences,
  * All fences given as parameters are unwrapped and merged back together as 
flat
  * dma_fence_array. Useful if multiple containers need to be merged together.
  *
+ * Internally uses the DMA_FENCE_UNWRAP_MERGE mode to return a single merged
+ * dma_fence (or a stub if all fences are signaled).
+ *
  * Implemented as a macro to allocate the necessary arrays on the stack and
  * account the stack frame size to the caller.
  *
@@ -69,7 +86,30 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int 
num_fences,
                struct dma_fence *__f[] = { __VA_ARGS__ };              \
                struct dma_fence_unwrap __c[ARRAY_SIZE(__f)];           \
                                                                        \
-               __dma_fence_unwrap_merge(ARRAY_SIZE(__f), __f, __c);    \
+               __dma_fence_unwrap_merge(ARRAY_SIZE(__f), __f, __c,     \
+                                        DMA_FENCE_UNWRAP_MERGE, NULL); \
+       })
+
+/**
+ * dma_fence_unwrap_shrink_array - Deduplicate and sort an array of fences 
in-place
+ * @ _num_fences: Number of input fences
+ * @ _fences:     Array of dma_fence pointers to be deduplicated and sorted
+ * @ _out_fences: Output variable updated with the number of fences after 
deduplication
+ *
+ * This macro unwraps each fence (handling dma_fence_array if needed),
+ * filters out signaled fences, sorts the remaining ones by context and
+ * timestamp, and removes duplicates (keeping only the most recent fence per 
context).
+ *
+ * Internally uses the DMA_FENCE_UNWRAP_ARRAY mode to perform in-place 
filtering and
+ * sorting.
+ *
+ * The result is stored back in the input array (_fences) and the final count
+ * is written to @_out_fences. No memory allocation is performed.
+ */
+#define dma_fence_unwrap_shrink_array(_num_fences, _fences, _out_fences)       
\
+       ({                                                                      
\
+               __dma_fence_unwrap_merge((_num_fences), (_fences), NULL,        
\
+                                        DMA_FENCE_UNWRAP_ARRAY, &_out_fences); 
\
        })
 
 #endif
-- 
2.34.1

Reply via email to