On 2025-05-27 21:55, Lang Yu wrote:
> For simulation C models that don't run CP FW where adev->mes.sched_version
> is not populated correctly. This causes NULL dereference in
> amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&pqn->q->wptr_bo_gart)
> and warning on unpinned BO in amdgpu_bo_gpu_offset(q->properties.wptr_bo).
>
> Compared with adding version check here and there,
> always map wptr BO to GART simplifies things.
>
> v2: Add NULL check in amdgpu_amdkfd_free_gtt_mem.(Philip)
>
> Signed-off-by: Lang Yu <lang...@amd.com>

Reviewed-by: Felix Kuehling <felix.kuehl...@amd.com>


> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c    |  3 +++
>  .../amd/amdkfd/kfd_process_queue_manager.c    | 23 ++++++++-----------
>  2 files changed, 13 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index 4cec3a873995..d8ac4b1051a8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -368,6 +368,9 @@ void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device 
> *adev, void **mem_obj)
>  {
>       struct amdgpu_bo **bo = (struct amdgpu_bo **) mem_obj;
>  
> +     if (!bo || !*bo)
> +             return;
> +
>       (void)amdgpu_bo_reserve(*bo, true);
>       amdgpu_bo_kunmap(*bo);
>       amdgpu_bo_unpin(*bo);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> index 6d5fa57d4a23..c643e0ccec52 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> @@ -279,20 +279,17 @@ static int init_user_queue(struct process_queue_manager 
> *pqm,
>               /* Starting with GFX11, wptr BOs must be mapped to GART for MES 
> to determine work
>                * on unmapped queues for usermode queue oversubscription (no 
> aggregated doorbell)
>                */
> -             if (((dev->adev->mes.sched_version & 
> AMDGPU_MES_API_VERSION_MASK)
> -                 >> AMDGPU_MES_API_VERSION_SHIFT) >= 2) {
> -                     if (dev->adev != 
> amdgpu_ttm_adev(q_properties->wptr_bo->tbo.bdev)) {
> -                             pr_err("Queue memory allocated to wrong 
> device\n");
> -                             retval = -EINVAL;
> -                             goto free_gang_ctx_bo;
> -                     }
> +             if (dev->adev != 
> amdgpu_ttm_adev(q_properties->wptr_bo->tbo.bdev)) {
> +                     pr_err("Queue memory allocated to wrong device\n");
> +                     retval = -EINVAL;
> +                     goto free_gang_ctx_bo;
> +             }
>  
> -                     retval = 
> amdgpu_amdkfd_map_gtt_bo_to_gart(q_properties->wptr_bo,
> -                                                               
> &(*q)->wptr_bo_gart);
> -                     if (retval) {
> -                             pr_err("Failed to map wptr bo to GART\n");
> -                             goto free_gang_ctx_bo;
> -                     }
> +             retval = amdgpu_amdkfd_map_gtt_bo_to_gart(q_properties->wptr_bo,
> +                                                       &(*q)->wptr_bo_gart);
> +             if (retval) {
> +                     pr_err("Failed to map wptr bo to GART\n");
> +                     goto free_gang_ctx_bo;
>               }
>       }
>  

Reply via email to