Am 27.03.25 um 09:42 schrieb Philipp Stanner:
> Nouveau currently relies on the assumption that dma_fences will only
> ever get signalled through nouveau_fence_signal(), which takes care of
> removing a signalled fence from the list nouveau_fence_chan.pending.
>
> This self-imposed rule is violated in nouveau_fence_done(), where
> dma_fence_is_signaled() can signal the fence without removing it from
> the list. This enables accesses to already signalled fences through the
> list, which is a bug.
>
> Furthermore, it must always be possible to use standard dma_fence
> methods an a dma_fence and observe valid behavior. The canonical way of
> ensuring that signalling a fence has additional effects is to add those
> effects to a callback and register it on the fence.

Good catch.

>
> Move the code from nouveau_fence_signal() into a dma_fence callback.
> Register that callback when creating the fence.

But that's a really ugly approach.

Either nouveau shouldn't implement the signaled callback or make sure that when 
returning true from the signaled callback the fence is also removed from the 
pending list.

>
> Cc: <sta...@vger.kernel.org> # 4.10+
> Fixes: f54d1867005c ("dma-buf: Rename struct fence to dma_fence")
> Signed-off-by: Philipp Stanner <pha...@kernel.org>
> ---
> I'm not entirely sure what Fixes-Tag is appropriate. The last time the
> line causing the signalled fence in the list was touched is the commit
> listed above.

Yeah, that's most likely not correct. My educated guess is that the bug was 
always there just never discovered.

Regards,
Christian.

> ---
>  drivers/gpu/drm/nouveau/nouveau_fence.c | 41 ++++++++++++++++---------
>  drivers/gpu/drm/nouveau/nouveau_fence.h |  1 +
>  2 files changed, 27 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c 
> b/drivers/gpu/drm/nouveau/nouveau_fence.c
> index 7cc84472cece..b2c2241a8803 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fence.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
> @@ -50,24 +50,22 @@ nouveau_fctx(struct nouveau_fence *fence)
>       return container_of(fence->base.lock, struct nouveau_fence_chan, lock);
>  }
>  
> -static int
> -nouveau_fence_signal(struct nouveau_fence *fence)
> +static void
> +nouveau_fence_cleanup_cb(struct dma_fence *dfence, struct dma_fence_cb *cb)
>  {
> -     int drop = 0;
> +     struct nouveau_fence_chan *fctx;
> +     struct nouveau_fence *fence;
> +
> +     fence = container_of(dfence, struct nouveau_fence, base);
> +     fctx = nouveau_fctx(fence);
>  
> -     dma_fence_signal_locked(&fence->base);
>       list_del(&fence->head);
>       rcu_assign_pointer(fence->channel, NULL);
>  
> -     if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags)) {
> -             struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
> -
> -             if (!--fctx->notify_ref)
> -                     drop = 1;
> -     }
> +     if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags))
> +             --fctx->notify_ref;
>  
>       dma_fence_put(&fence->base);
> -     return drop;
>  }
>  
>  static struct nouveau_fence *
> @@ -93,7 +91,8 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, 
> int error)
>               if (error)
>                       dma_fence_set_error(&fence->base, error);
>  
> -             if (nouveau_fence_signal(fence))
> +             dma_fence_signal_locked(&fence->base);
> +             if (fctx->notify_ref == 0)
>                       nvif_event_block(&fctx->event);
>       }
>       fctx->killed = 1;
> @@ -131,7 +130,6 @@ static int
>  nouveau_fence_update(struct nouveau_channel *chan, struct nouveau_fence_chan 
> *fctx)
>  {
>       struct nouveau_fence *fence;
> -     int drop = 0;
>       u32 seq = fctx->read(chan);
>  
>       while (!list_empty(&fctx->pending)) {
> @@ -140,10 +138,10 @@ nouveau_fence_update(struct nouveau_channel *chan, 
> struct nouveau_fence_chan *fc
>               if ((int)(seq - fence->base.seqno) < 0)
>                       break;
>  
> -             drop |= nouveau_fence_signal(fence);
> +             dma_fence_signal_locked(&fence->base);
>       }
>  
> -     return drop;
> +     return fctx->notify_ref == 0 ? 1 : 0;
>  }
>  
>  static void
> @@ -235,6 +233,19 @@ nouveau_fence_emit(struct nouveau_fence *fence)
>                              &fctx->lock, fctx->context, ++fctx->sequence);
>       kref_get(&fctx->fence_ref);
>  
> +     fence->cb.func = nouveau_fence_cleanup_cb;
> +     /* Adding a callback runs into __dma_fence_enable_signaling(), which 
> will
> +      * ultimately run into nouveau_fence_no_signaling(), where a WARN_ON
> +      * would fire because the refcount can be dropped there.
> +      *
> +      * Increment the refcount here temporarily to work around that.
> +      */
> +     dma_fence_get(&fence->base);
> +     ret = dma_fence_add_callback(&fence->base, &fence->cb, 
> nouveau_fence_cleanup_cb);
> +     dma_fence_put(&fence->base);
> +     if (ret)
> +             return ret;
> +
>       ret = fctx->emit(fence);
>       if (!ret) {
>               dma_fence_get(&fence->base);
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h 
> b/drivers/gpu/drm/nouveau/nouveau_fence.h
> index 8bc065acfe35..e6b2df7fdc42 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fence.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
> @@ -10,6 +10,7 @@ struct nouveau_bo;
>  
>  struct nouveau_fence {
>       struct dma_fence base;
> +     struct dma_fence_cb cb;
>  
>       struct list_head head;
>  

Reply via email to