On 12/11/25 18:44, Srinivasan Shanmugam wrote:
> This commit simplifies the amdgpu_gem_va_ioctl function, key updates
> include:
>  - Moved the logic for managing the last update fence directly into
>    amdgpu_gem_va_update_vm.
>  - Introduced checks for the timeline point to enable conditional
>    replacement or addition of fences.
> 
> v2: Addressed review comments from Christian.
> v3: Updated comments (Christian).
> v4: The previous version selected the fence too early and did not manage
>     its reference correctly, which could lead to stale or freed fences
>     being used. This resulted in refcount underflows and could crash when
>     updating GPU timelines.
>     The fence is now chosen only after the VA mapping work is completed,
>     and its reference is taken safely. After exporting it to the
>     VM timeline syncobj, the driver always drops its local fence reference,
>     ensuring balanced refcounting and avoiding use-after-free on dma_fence.
> 
>       Crash signature:
>       [  205.828135] refcount_t: underflow; use-after-free.
>       [  205.832963] WARNING: CPU: 30 PID: 7274 at lib/refcount.c:28 
> refcount_warn_saturate+0xbe/0x110
>               ...
>       [  206.074014] Call Trace:
>       [  206.076488]  <TASK>
>       [  206.078608]  amdgpu_gem_va_ioctl+0x6ea/0x740 [amdgpu]
>       [  206.084040]  ? __pfx_amdgpu_gem_va_ioctl+0x10/0x10 [amdgpu]
>       [  206.089994]  drm_ioctl_kernel+0x86/0xe0 [drm]
>       [  206.094415]  drm_ioctl+0x26e/0x520 [drm]
>       [  206.098424]  ? __pfx_amdgpu_gem_va_ioctl+0x10/0x10 [amdgpu]
>       [  206.104402]  amdgpu_drm_ioctl+0x4b/0x80 [amdgpu]
>       [  206.109387]  __x64_sys_ioctl+0x96/0xe0
>       [  206.113156]  do_syscall_64+0x66/0x2d0
>               ...
>       [  206.553351] BUG: unable to handle page fault for address: 
> ffffffffc0dfde90
>               ...
>       [  206.553378] RIP: 0010:dma_fence_signal_timestamp_locked+0x39/0xe0
>               ...
>       [  206.553405] Call Trace:
>       [  206.553409]  <IRQ>
>       [  206.553415]  ? __pfx_drm_sched_fence_free_rcu+0x10/0x10 [gpu_sched]
>       [  206.553424]  dma_fence_signal+0x30/0x60
>       [  206.553427]  drm_sched_job_done.isra.0+0x123/0x150 [gpu_sched]
>       [  206.553434]  dma_fence_signal_timestamp_locked+0x6e/0xe0
>       [  206.553437]  dma_fence_signal+0x30/0x60
>       [  206.553441]  amdgpu_fence_process+0xd8/0x150 [amdgpu]
>       [  206.553854]  sdma_v4_0_process_trap_irq+0x97/0xb0 [amdgpu]
>       [  206.554353]  edac_mce_amd(E) ee1004(E)
>       [  206.554270]  amdgpu_irq_dispatch+0x150/0x230 [amdgpu]
>       [  206.554702]  amdgpu_ih_process+0x6a/0x180 [amdgpu]
>       [  206.555101]  amdgpu_irq_handler+0x23/0x60 [amdgpu]
>       [  206.555500]  __handle_irq_event_percpu+0x4a/0x1c0
>       [  206.555506]  handle_irq_event+0x38/0x80
>       [  206.555509]  handle_edge_irq+0x92/0x1e0
>       [  206.555513]  __common_interrupt+0x3e/0xb0
>       [  206.555519]  common_interrupt+0x80/0xa0
>       [  206.555525]  </IRQ>
>       [  206.555527]  <TASK>
>               ...
>       [  206.555650] RIP: 0010:dma_fence_signal_timestamp_locked+0x39/0xe0
>               ...
>       [  206.555667] Kernel panic - not syncing: Fatal exception in interrupt
> 
> Cc: Alex Deucher <[email protected]>
> Cc: Christian König <[email protected]>
> Suggested-by: Christian König <[email protected]>
> Signed-off-by: Srinivasan Shanmugam <[email protected]>
> Reviewed-by: Christian König <[email protected]>
> Link: https://patchwork.freedesktop.org/patch/654669/
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 135 ++++++++++++++----------
>  1 file changed, 82 insertions(+), 53 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 9b81a6677f90..e5fb8f5346b6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -112,47 +112,6 @@ amdgpu_gem_update_timeline_node(struct drm_file *filp,
>       return 0;
>  }
>  
> -static void
> -amdgpu_gem_update_bo_mapping(struct drm_file *filp,
> -                          struct amdgpu_bo_va *bo_va,
> -                          uint32_t operation,
> -                          uint64_t point,
> -                          struct dma_fence *fence,
> -                          struct drm_syncobj *syncobj,
> -                          struct dma_fence_chain *chain)
> -{
> -     struct amdgpu_bo *bo = bo_va ? bo_va->base.bo : NULL;
> -     struct amdgpu_fpriv *fpriv = filp->driver_priv;
> -     struct amdgpu_vm *vm = &fpriv->vm;
> -     struct dma_fence *last_update;
> -
> -     if (!syncobj)
> -             return;
> -
> -     /* Find the last update fence */
> -     switch (operation) {
> -     case AMDGPU_VA_OP_MAP:
> -     case AMDGPU_VA_OP_REPLACE:
> -             if (bo && (bo->tbo.base.resv == vm->root.bo->tbo.base.resv))
> -                     last_update = vm->last_update;
> -             else
> -                     last_update = bo_va->last_pt_update;
> -             break;
> -     case AMDGPU_VA_OP_UNMAP:
> -     case AMDGPU_VA_OP_CLEAR:
> -             last_update = fence;
> -             break;
> -     default:
> -             return;
> -     }
> -
> -     /* Add fence to timeline */
> -     if (!point)
> -             drm_syncobj_replace_fence(syncobj, last_update);
> -     else
> -             drm_syncobj_add_point(syncobj, chain, last_update, point);
> -}
> -
>  static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf)
>  {
>       struct ttm_buffer_object *bo = vmf->vma->vm_private_data;
> @@ -773,16 +732,19 @@ amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>                       struct amdgpu_bo_va *bo_va,
>                       uint32_t operation)
>  {
> -     struct dma_fence *fence = dma_fence_get_stub();
> +     struct dma_fence *clear_fence = dma_fence_get_stub();
> +     struct dma_fence *last_update = NULL;
>       int r;
>  
>       if (!amdgpu_vm_ready(vm))
> -             return fence;
> +             return clear_fence;
>  
> -     r = amdgpu_vm_clear_freed(adev, vm, &fence);
> +     /* First clear freed BOs and get a fence for that work, if any. */
> +     r = amdgpu_vm_clear_freed(adev, vm, &clear_fence);
>       if (r)
>               goto error;
>  
> +     /* For MAP/REPLACE we also need to update the BO mappings. */
>       if (operation == AMDGPU_VA_OP_MAP ||
>           operation == AMDGPU_VA_OP_REPLACE) {
>               r = amdgpu_vm_bo_update(adev, bo_va, false);
> @@ -790,13 +752,59 @@ amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>                       goto error;
>       }
>  
> +     /* Always update PDEs after we touched the mappings. */
>       r = amdgpu_vm_update_pdes(adev, vm, false);
> +     if (r)
> +             goto error;
> +
> +     /*
> +      * Decide which fence represents the "last update" for this VM/BO:
> +      *
> +      * - For MAP/REPLACE we want the PT update fence, which is tracked as
> +      *   either vm->last_update (for always-valid BOs) or 
> bo_va->last_pt_update
> +      *   (for per-BO updates).
> +      *
> +      * - For UNMAP/CLEAR we rely on the fence returned by
> +      *   amdgpu_vm_clear_freed(), which already covers the page table work
> +      *   for the removed mappings.
> +      */
> +     switch (operation) {
> +     case AMDGPU_VA_OP_MAP:
> +     case AMDGPU_VA_OP_REPLACE:
> +             if (bo_va && bo_va->base.bo) {
> +                     if (amdgpu_vm_is_bo_always_valid(vm, bo_va->base.bo)) {

This isn't 100% correct.

You need something like this:

if (bo_va && bo_va->base.bo &&
    amdgpu_vm_is_bo_always_valid(vm, bo_va->base.bo) {....

Otherwise PRT mappings won't work correctly.

> +                             if (vm->last_update)

That check is superflous, vm->last_update is initialized with the stub fence 
and never NULL.

> +                                     last_update = 
> dma_fence_get(vm->last_update);
> +                     } else {
> +                             if (bo_va->last_pt_update)

same here.

> +                                     last_update = 
> dma_fence_get(bo_va->last_pt_update);
> +                     }
> +             }
> +             break;
> +     case AMDGPU_VA_OP_UNMAP:
> +     case AMDGPU_VA_OP_CLEAR:
> +             if (clear_fence)

and here.

> +                     last_update = dma_fence_get(clear_fence);
> +             break;
> +     default:
> +             break;
> +     }
>  
>  error:
>       if (r && r != -ERESTARTSYS)
>               DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
>  
> -     return fence;

> +     /*
> +      * If we managed to pick a more specific last-update fence, prefer it
> +      * over the generic clear_fence and drop the extra reference to the
> +      * latter.
> +      */
> +     if (last_update) {
> +             dma_fence_put(clear_fence);
> +             return last_update;
> +     }

As far as I can see that won't work as intended. last_update should never be 
NULL here.

Regards,
Christian.


> +
> +     return clear_fence;
>  }
>  
>  int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
> @@ -822,6 +830,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void 
> *data,
>       uint64_t vm_size;
>       int r = 0;
>  
> +     /* Validate virtual address range against reserved regions. */
>       if (args->va_address < AMDGPU_VA_RESERVED_BOTTOM) {
>               dev_dbg(dev->dev,
>                       "va_address 0x%llx is in reserved area 0x%llx\n",
> @@ -855,6 +864,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void 
> *data,
>               return -EINVAL;
>       }
>  
> +     /* Validate operation type. */
>       switch (args->operation) {
>       case AMDGPU_VA_OP_MAP:
>       case AMDGPU_VA_OP_UNMAP:
> @@ -878,6 +888,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void 
> *data,
>               abo = NULL;
>       }
>  
> +     /* Add input syncobj fences (if any) for synchronization. */
>       r = amdgpu_gem_add_input_fence(filp,
>                                      args->input_fence_syncobj_handles,
>                                      args->num_syncobj_handles);
> @@ -900,6 +911,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void 
> *data,
>                       goto error;
>       }
>  
> +     /* Resolve the BO-VA mapping for this VM/BO combination. */
>       if (abo) {
>               bo_va = amdgpu_vm_bo_find(&fpriv->vm, abo);
>               if (!bo_va) {
> @@ -912,6 +924,11 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void 
> *data,
>               bo_va = NULL;
>       }
>  
> +     /*
> +      * Prepare the timeline syncobj node if the user requested a VM
> +      * timeline update. This only allocates/looks up the syncobj and
> +      * chain node; the actual fence is attached later.
> +      */
>       r = amdgpu_gem_update_timeline_node(filp,
>                                           args->vm_timeline_syncobj_out,
>                                           args->vm_timeline_point,
> @@ -943,18 +960,30 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void 
> *data,
>       default:
>               break;
>       }
> +
> +     /*
> +      * Once the VA operation is done, update the VM and obtain the fence
> +      * that represents the last relevant update for this mapping. This
> +      * fence can then be exported to the user-visible VM timeline.
> +      */
>       if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) {
>               fence = amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
>                                               args->operation);
>  
> -             if (timeline_syncobj)
> -                     amdgpu_gem_update_bo_mapping(filp, bo_va,
> -                                          args->operation,
> -                                          args->vm_timeline_point,
> -                                          fence, timeline_syncobj,
> -                                          timeline_chain);
> -             else
> -                     dma_fence_put(fence);
> +             if (timeline_syncobj && fence) {
> +                     if (!args->vm_timeline_point) {
> +                             /* Replace the existing fence when no point is 
> given. */
> +                             drm_syncobj_replace_fence(timeline_syncobj,
> +                                                       fence);
> +                     } else {
> +                             /* Attach the last-update fence at a specific 
> point. */
> +                             drm_syncobj_add_point(timeline_syncobj,
> +                                                   timeline_chain,
> +                                                   fence,
> +                                                   args->vm_timeline_point);
> +                     }
> +             }
> +             dma_fence_put(fence);
>  
>       }
>  

Reply via email to