On 11/30/21 13:25, Maarten Lankhorst wrote:
On 30-11-2021 13:19, Thomas Hellström wrote:
The locking order for taking two fence locks is implicitly defined in
at least two ways in the code:

1) Fence containers first and other fences next, which is defined by
the enable_signaling() callbacks of dma_fence_chain and
dma_fence_array.
2) Reverse signal order, which is used by __i915_active_fence_set().

Now 1) implies 2), except for the signal_on_any mode of dma_fence_array
and 2) does not imply 1), and also 1) makes locking order between
different containers confusing.

Establish 2) and fix up the signal_on_any mode by calling
enable_signaling() on such fences unlocked at creation.

Cc: linaro-mm-...@lists.linaro.org
Cc: dri-devel@lists.freedesktop.org
Cc: Christian König <christian.koe...@amd.com>
Signed-off-by: Thomas Hellström <thomas.hellst...@linux.intel.com>
---
  drivers/dma-buf/dma-fence-array.c | 13 +++--
  drivers/dma-buf/dma-fence-chain.c |  3 +-
  drivers/dma-buf/dma-fence.c       | 79 +++++++++++++++++++++----------
  include/linux/dma-fence.h         |  3 ++
  4 files changed, 69 insertions(+), 29 deletions(-)

diff --git a/drivers/dma-buf/dma-fence-array.c 
b/drivers/dma-buf/dma-fence-array.c
index 3e07f961e2f3..0322b92909fe 100644
--- a/drivers/dma-buf/dma-fence-array.c
+++ b/drivers/dma-buf/dma-fence-array.c
@@ -84,8 +84,8 @@ static bool dma_fence_array_enable_signaling(struct dma_fence 
*fence)
                 * insufficient).
                 */
                dma_fence_get(&array->base);
-               if (dma_fence_add_callback(array->fences[i], &cb[i].cb,
-                                          dma_fence_array_cb_func)) {
+               if (dma_fence_add_callback_nested(array->fences[i], &cb[i].cb,
+                                                 dma_fence_array_cb_func)) {
                        int error = array->fences[i]->error;
dma_fence_array_set_pending_error(array, error);
@@ -158,6 +158,7 @@ struct dma_fence_array *dma_fence_array_create(int 
num_fences,
  {
        struct dma_fence_array *array;
        size_t size = sizeof(*array);
+       struct dma_fence *fence;
/* Allocate the callback structures behind the array. */
        size += num_fences * sizeof(struct dma_fence_array_cb);
@@ -165,8 +166,9 @@ struct dma_fence_array *dma_fence_array_create(int 
num_fences,
        if (!array)
                return NULL;
+ fence = &array->base;
        spin_lock_init(&array->lock);
-       dma_fence_init(&array->base, &dma_fence_array_ops, &array->lock,
+       dma_fence_init(fence, &dma_fence_array_ops, &array->lock,
                       context, seqno);
        init_irq_work(&array->work, irq_dma_fence_array_work);
@@ -174,7 +176,10 @@ struct dma_fence_array *dma_fence_array_create(int num_fences,
        atomic_set(&array->num_pending, signal_on_any ? 1 : num_fences);
        array->fences = fences;
- array->base.error = PENDING_ERROR;
+       fence->error = PENDING_ERROR;
+
+       if (signal_on_any)
+               dma_fence_enable_sw_signaling(fence);
return array;
  }
diff --git a/drivers/dma-buf/dma-fence-chain.c 
b/drivers/dma-buf/dma-fence-chain.c
index 1b4cb3e5cec9..0518e53880f6 100644
--- a/drivers/dma-buf/dma-fence-chain.c
+++ b/drivers/dma-buf/dma-fence-chain.c
@@ -152,7 +152,8 @@ static bool dma_fence_chain_enable_signaling(struct 
dma_fence *fence)
                struct dma_fence *f = chain ? chain->fence : fence;
dma_fence_get(f);
-               if (!dma_fence_add_callback(f, &head->cb, dma_fence_chain_cb)) {
+               if (!dma_fence_add_callback_nested(f, &head->cb,
+                                                  dma_fence_chain_cb)) {
                        dma_fence_put(fence);
                        return true;
                }
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 066400ed8841..90a3d5121746 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -610,6 +610,37 @@ void dma_fence_enable_sw_signaling(struct dma_fence *fence)
  }
  EXPORT_SYMBOL(dma_fence_enable_sw_signaling);
+static int __dma_fence_add_callback(struct dma_fence *fence,
+                                   struct dma_fence_cb *cb,
+                                   dma_fence_func_t func,
+                                   int nest_level)
+{
+       unsigned long flags;
+       int ret = 0;
+
+       if (WARN_ON(!fence || !func))
+               return -EINVAL;
+
+       if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
+               INIT_LIST_HEAD(&cb->node);
+               return -ENOENT;
+       }
+
+       spin_lock_irqsave_nested(fence->lock, flags, 0);
Forgot to hook up nest_level here?

Ah Yes :)


+
+       if (__dma_fence_enable_signaling(fence)) {
+               cb->func = func;
+               list_add_tail(&cb->node, &fence->cb_list);
+       } else {
+               INIT_LIST_HEAD(&cb->node);
+               ret = -ENOENT;
+       }
+
+       spin_unlock_irqrestore(fence->lock, flags);
+
+       return ret;
+}
+
  /**
   * dma_fence_add_callback - add a callback to be called when the fence
   * is signaled
@@ -635,33 +666,33 @@ EXPORT_SYMBOL(dma_fence_enable_sw_signaling);
  int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb,
                           dma_fence_func_t func)
  {
-       unsigned long flags;
-       int ret = 0;
-
-       if (WARN_ON(!fence || !func))
-               return -EINVAL;
-
-       if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
-               INIT_LIST_HEAD(&cb->node);
-               return -ENOENT;
-       }
-
-       spin_lock_irqsave(fence->lock, flags);
-
-       if (__dma_fence_enable_signaling(fence)) {
-               cb->func = func;
-               list_add_tail(&cb->node, &fence->cb_list);
-       } else {
-               INIT_LIST_HEAD(&cb->node);
-               ret = -ENOENT;
-       }
-
-       spin_unlock_irqrestore(fence->lock, flags);
-
-       return ret;
+       return __dma_fence_add_callback(fence, cb, func, 0);
  }
  EXPORT_SYMBOL(dma_fence_add_callback);
Other than that, I didn't investigate the nesting fails enough to say I can 
accurately review this. :)

Basically the problem is that within enable_signaling() which is called with the dma_fence lock held, we take the dma_fence lock of another fence. If that other fence is a dma_fence_array, or a dma_fence_chain which in turn tries to lock a dma_fence_array we hit a splat.

But I'll update the commit message with a typical splat.

/Thomas



~Maarten


Reply via email to