amdkfd: Map VRAM MQD on GART

Christian König Tue, 16 Dec 2025 01:10:53 -0800

On 12/15/25 17:56, Philip Yang wrote:
> MQD BO on VRAM access via FB aperture is mtype UC uncaching, map
> to GART as mtype RW caching, to reduce queue switch latency
> 
> Add GART mm_node to kfd mem obj to free the GART entries after
> MQD mem obj is freed.
> 
> Use resource cursor to handle VRAM resource which maybe on multiple
> blocks and use cursor_gart to handle GART entries.
> 
> Signed-off-by: Philip Yang <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 94 +++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |  4 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c  |  2 +
>  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |  9 ++
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  1 +
>  5 files changed, 109 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 4f8bc7f35cdc..d7bf96a7b6b2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -880,6 +880,67 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct 
> amdgpu_device *adev,
>       }
>  }
>  
> +/*
> + * Same function and MQD description from amdgpu_ttm_gart_bind_gfx9_mqd,
> + * except this is for MQD on VRAM BO and use dynamic alloc GART entries.
> + */
> +static void amdgpu_ttm_gart_bind_gfx9_mqd_vram(struct amdgpu_device *adev,
> +                             struct ttm_buffer_object *tbo,
> +                             struct drm_mm_node *mm_node,
> +                             uint64_t flags)
> +{
> +     uint64_t total_pages;
> +     int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
> +     uint64_t page_idx, pages_per_xcc;
> +     struct amdgpu_res_cursor cursor_gart;
> +     struct amdgpu_res_cursor cursor;
> +     uint64_t ctrl_flags = flags;
> +     int i;
> +
> +     total_pages = tbo->base.size >> PAGE_SHIFT;
> +
> +     amdgpu_gmc_get_vm_pte(adev, NULL, NULL, AMDGPU_VM_MTYPE_NC, 
> &ctrl_flags);
> +
> +     if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 3))
> +             amdgpu_gmc_get_vm_pte(adev, NULL, NULL, AMDGPU_VM_MTYPE_RW, 
> &flags);
> +
> +     pages_per_xcc = total_pages;
> +     do_div(pages_per_xcc, num_xcc);
> +
> +     amdgpu_res_first(NULL, mm_node->start, total_pages, &cursor_gart);
> +     amdgpu_res_first(tbo->resource, 0, tbo->resource->size, &cursor);
> +
> +     for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
> +             u64 start_page;
> +             u64 npages, n;
> +             u64 pa;
> +
> +             start_page = cursor_gart.start;
> +             pa = cursor.start + adev->vm_manager.vram_base_offset;
> +             n = 1;
> +             amdgpu_gart_map_vram_range(adev, pa, start_page, n,
> +                                        flags, NULL);
> +
> +             npages = pages_per_xcc - 1;
> +             while (npages) {
> +                     amdgpu_res_next(&cursor_gart, n);
> +                     amdgpu_res_next(&cursor, n * PAGE_SIZE);
> +
> +                     start_page = cursor_gart.start;
> +                     pa = cursor.start + adev->vm_manager.vram_base_offset;
> +                     n = min3(cursor.size / PAGE_SIZE, cursor_gart.size, 
> npages);
> +
> +                     amdgpu_gart_map_vram_range(adev, pa, start_page, n,
> +                                                ctrl_flags, NULL);
> +
> +                     npages -= n;
> +             }
> +
> +             amdgpu_res_next(&cursor_gart, n);
> +             amdgpu_res_next(&cursor, n * PAGE_SIZE);
> +     }
> +}
> +
>  static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
>                                struct ttm_buffer_object *tbo,
>                                uint64_t flags)
> @@ -1017,6 +1078,39 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
>       return 0;
>  }
>  
> +/*
> + * amdgpu_ttm_alloc_gart_vram_bo - Bind VRAM pages to GART mapping
> + *
> + * call amdgpu_ttm_alloc_gart_entries to alloc GART dynamically
> + */
> +int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo,


That need a better name, something noting that it is mqd specific.

Potentially just merge the code into amdgpu_ttm_gart_bind_gfx9_mqd_vram() and 
keep that name.

> +                               struct drm_mm_node *mm_node,
> +                               u64 *gpu_addr)
> +{
> +     struct ttm_buffer_object *bo = &abo->tbo;
> +     struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
> +     uint64_t flags;
> +     int r;
> +
> +     /* Only for valid VRAM bo resource */
> +     if (amdgpu_mem_type_to_domain(bo->resource->mem_type) !=
> +         AMDGPU_GEM_DOMAIN_VRAM)
> +             return 0;

Please drop that check.

> +
> +     r = amdgpu_gtt_mgr_alloc_entries(&adev->mman.gtt_mgr, mm_node,
> +                                      amdgpu_bo_ngpu_pages(abo), 0);
> +     if (r)
> +             return r;
> +
> +     /* compute PTE flags for this buffer object */
> +     flags = amdgpu_ttm_tt_pte_flags(adev, NULL, bo->resource);
> +     amdgpu_ttm_gart_bind_gfx9_mqd_vram(adev, bo, mm_node, flags);
> +     amdgpu_gart_invalidate_tlb(adev);
> +
> +     *gpu_addr = mm_node->start << PAGE_SHIFT;

And a helper for this would be better. Maybe we should move all the gtt_mgr 
functions into a separate header like we did for the vram_mgr as well.

But that can come later on.

Regards,
Christian.


> +     return 0;
> +}
> +
>  /*
>   * amdgpu_ttm_recover_gart - Rebind GTT pages
>   *
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 25640bed7dc9..9f07856433fd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -140,7 +140,6 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev);
>  
>  bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem);
>  void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr);
> -
>  int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr,
>                                struct drm_mm_node *node,
>                                u64 num_pages,
> @@ -191,6 +190,9 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity 
> *entity,
>                      u64 k_job_id);
>  
>  int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
> +int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo,
> +                               struct drm_mm_node *mm_node,
> +                               u64 *gpu_addr);
>  void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
>  uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);
>  
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> index f78b249e1a41..edb72f4ef82d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> @@ -225,6 +225,8 @@ void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
>             struct kfd_mem_obj *mqd_mem_obj)
>  {
>       if (mqd_mem_obj->mem) {
> +             amdgpu_gtt_mgr_free_entries(&mm->dev->adev->mman.gtt_mgr,
> +                                         &mqd_mem_obj->mm_node);
>               amdgpu_amdkfd_free_kernel_mem(mm->dev->adev, &mqd_mem_obj->mem);
>               kfree(mqd_mem_obj);
>       } else {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> index 14123e1a9716..5828220056bd 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> @@ -148,6 +148,15 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_node 
> *node,
>                       kfree(mqd_mem_obj);
>                       return NULL;
>               }
> +
> +             retval = amdgpu_ttm_alloc_gart_vram_bo(mqd_mem_obj->mem,
> +                                                    &mqd_mem_obj->mm_node,
> +                                                    
> &(mqd_mem_obj->gpu_addr));
> +             if (retval) {
> +                     amdgpu_amdkfd_free_kernel_mem(node->adev, 
> &(mqd_mem_obj->mem));
> +                     kfree(mqd_mem_obj);
> +                     return NULL;
> +             }
>       } else {
>               retval = kfd_gtt_sa_allocate(node, sizeof(struct v9_mqd),
>                               &mqd_mem_obj);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 06cd675c9e74..55738b30c2ec 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -253,6 +253,7 @@ struct kfd_mem_obj {
>       uint64_t gpu_addr;
>       uint32_t *cpu_ptr;
>       void *mem;
> +     struct drm_mm_node mm_node;
>  };
>  
>  struct kfd_vmid_info {

Re: [PATCH v6 6/6] drm/amdkfd: Map VRAM MQD on GART

Reply via email to