On 2/16/26 16:49, Srinivasan Shanmugam wrote:
> Add a minimal producer for the render-node
> eventfd subscription mechanism by signaling a fixed event_id on GFX11
> MES/userq EOP interrupts.
>
> To reach the correct per-file registry from the IRQ path, plumb the
> originating drm_file's fpriv into the userq fence driver, and in the
> EOP IRQ handler walk:
>
> doorbell_offset -> fence_drv -> fence_drv->fpriv -> eventfd_xa[event_id]
>
> Cc: Harish Kasiviswanathan <[email protected]>
> Cc: Felix Kuehling <[email protected]>
> Cc: Alex Deucher <[email protected]>
> Cc: Christian König <[email protected]>
> Signed-off-by: Srinivasan Shanmugam <[email protected]>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 5 +++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 4 ++++
> drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 14 ++++++++++++++
> 4 files changed, 24 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> index b700c2b91465..a3a38efdc3aa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> @@ -820,7 +820,7 @@ amdgpu_userq_create(struct drm_file *filp, union
> drm_amdgpu_userq *args)
>
> queue->doorbell_index = index;
> xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
> - r = amdgpu_userq_fence_driver_alloc(adev, queue);
> + r = amdgpu_userq_fence_driver_alloc(adev, fpriv, queue);
The fence driver can't have a reference to the fpriv since it lives longer than
fpriv.
That whole approach here won't work.
Regards,
Christian.
> if (r) {
> drm_file_err(uq_mgr->file, "Failed to alloc fence driver\n");
> goto unlock;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> index 212056d4ddf0..507defcfabd0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> @@ -76,6 +76,7 @@ amdgpu_userq_fence_write(struct amdgpu_userq_fence_driver
> *fence_drv,
> }
>
> int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
> + struct amdgpu_fpriv *fpriv,
> struct amdgpu_usermode_queue *userq)
> {
> struct amdgpu_userq_fence_driver *fence_drv;
> @@ -102,6 +103,8 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device
> *adev,
> fence_drv->context = dma_fence_context_alloc(1);
> get_task_comm(fence_drv->timeline_name, current);
>
> + fence_drv->fpriv = fpriv;
> +
> xa_lock_irqsave(&adev->userq_xa, flags);
> r = xa_err(__xa_store(&adev->userq_xa, userq->doorbell_index,
> fence_drv, GFP_KERNEL));
> @@ -192,6 +195,8 @@ void amdgpu_userq_fence_driver_destroy(struct kref *ref)
> unsigned long index, flags;
> struct dma_fence *f;
>
> + WRITE_ONCE(fence_drv->fpriv, NULL);
> +
> spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
> list_for_each_entry_safe(fence, tmp, &fence_drv->fences, link) {
> f = &fence->base;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
> index d76add2afc77..8fa444a07f77 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
> @@ -42,6 +42,8 @@ struct amdgpu_userq_fence {
> struct amdgpu_userq_fence_driver **fence_drv_array;
> };
>
> +struct amdgpu_fpriv;
> +
> struct amdgpu_userq_fence_driver {
> struct kref refcount;
> u64 va;
> @@ -56,6 +58,7 @@ struct amdgpu_userq_fence_driver {
> struct list_head fences;
> struct amdgpu_device *adev;
> char timeline_name[TASK_COMM_LEN];
> + struct amdgpu_fpriv *fpriv;
> };
>
> int amdgpu_userq_fence_slab_init(void);
> @@ -64,6 +67,7 @@ void amdgpu_userq_fence_slab_fini(void);
> void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver
> *fence_drv);
> void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver
> *fence_drv);
> int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
> + struct amdgpu_fpriv *fpriv,
> struct amdgpu_usermode_queue *userq);
> void amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq);
> void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver
> *fence_drv);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> index b1a1b8a10a08..b06adeeeed2a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> @@ -54,6 +54,8 @@
> #define GFX11_NUM_GFX_RINGS 1
> #define GFX11_MEC_HPD_SIZE 2048
>
> +#define AMDGPU_EVENT_ID_USERQ_EOP 1
> +
> #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
> #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388
>
> @@ -6489,6 +6491,7 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
>
> if (adev->enable_mes && doorbell_offset) {
> struct amdgpu_userq_fence_driver *fence_drv = NULL;
> + struct amdgpu_fpriv *fpriv = NULL;
> struct xarray *xa = &adev->userq_xa;
> unsigned long flags;
>
> @@ -6496,7 +6499,18 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device
> *adev,
> fence_drv = xa_load(xa, doorbell_offset);
> if (fence_drv)
> amdgpu_userq_fence_driver_process(fence_drv);
> + /*
> + * Read fpriv while fence_drv is still guaranteed alive under
> xa_lock.
> + * fence_drv->fpriv is cleared during teardown.
> + */
> + fpriv = fence_drv ? READ_ONCE(fence_drv->fpriv) : NULL;
> xa_unlock_irqrestore(xa, flags);
> + /*
> + * RFC: notify render-node eventfd subscribers for this
> drm_file.
> + * Mapping: doorbell_offset -> fence_drv -> fpriv ->
> eventfd_xa[event_id]
> + */
> + if (fpriv)
> + amdgpu_eventfd_signal(fpriv, AMDGPU_EVENT_ID_USERQ_EOP,
> 1);
> } else {
> me_id = (entry->ring_id & 0x0c) >> 2;
> pipe_id = (entry->ring_id & 0x03) >> 0;