MQD BO on VRAM access via FB aperture is mtype UC uncaching, map
to GART as mtype RW caching, to reduce queue switch latency.

Add helper amdgpu_ttm_alloc/free_gart_entries.
Add helper amdgpu_ttm_gart_bind_gfx9_mqd_vram to bind VRAM pages
to GART mapping.

Add GART drm mm_node to kfd mem obj to free the GART entries after
MQD is freed.

Signed-off-by: Philip Yang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 103 ++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   8 ++
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c  |   1 +
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |   9 ++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |   1 +
 5 files changed, 122 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 4f8bc7f35cdc..fc6f4daa9b87 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -880,6 +880,42 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct 
amdgpu_device *adev,
        }
 }
 
+static void amdgpu_ttm_gart_bind_gfx9_mqd_vram(struct amdgpu_device *adev,
+                               struct ttm_buffer_object *tbo,
+                               struct drm_mm_node *mm_node,
+                               uint64_t flags)
+{
+       uint64_t total_pages;
+       int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
+       uint64_t page_idx, pages_per_xcc;
+       uint64_t ctrl_flags = flags;
+       int i;
+
+       total_pages = tbo->resource->size >> PAGE_SHIFT;
+
+       amdgpu_gmc_get_vm_pte(adev, NULL, NULL, AMDGPU_VM_MTYPE_NC, 
&ctrl_flags);
+
+       if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 3))
+               amdgpu_gmc_get_vm_pte(adev, NULL, NULL, AMDGPU_VM_MTYPE_RW, 
&flags);
+
+       pages_per_xcc = total_pages;
+       do_div(pages_per_xcc, num_xcc);
+
+       for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
+               u64 pa = (tbo->resource->start + page_idx) << PAGE_SHIFT;
+               u64 start_page = mm_node->start + page_idx;
+
+               pa += adev->vm_manager.vram_base_offset;
+               amdgpu_gart_map_vram_range(adev, pa, start_page, 1,
+                                          flags, NULL);
+
+               amdgpu_gart_map_vram_range(adev, pa + PAGE_SIZE,
+                                          start_page + 1,
+                                          pages_per_xcc - 1,
+                                          ctrl_flags, NULL);
+       }
+}
+s
 static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
                                 struct ttm_buffer_object *tbo,
                                 uint64_t flags)
@@ -1017,6 +1053,73 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
        return 0;
 }
 
+int amdgpu_ttm_alloc_gart_entries(struct amdgpu_device *adev,
+                                 struct drm_mm_node *mm_node,
+                                 u64 num_pages)
+{
+       struct ttm_resource_manager *man;
+       struct amdgpu_gtt_mgr *mgr;
+       int r;
+
+       man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
+       mgr = container_of(man, struct amdgpu_gtt_mgr, manager);
+
+       spin_lock(&mgr->lock);
+       r = drm_mm_insert_node_in_range(&mgr->mm, mm_node, num_pages,
+                                       0, 0, 0,
+                                       adev->gmc.gart_size >> PAGE_SHIFT,
+                                       DRM_MM_INSERT_BEST);
+       spin_unlock(&mgr->lock);
+       return r;
+}
+
+void amdgpu_ttm_free_gart_entries(struct amdgpu_device *adev,
+                                 struct drm_mm_node *mm_node)
+{
+       struct ttm_resource_manager *man;
+       struct amdgpu_gtt_mgr *mgr;
+
+       man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
+       mgr = container_of(man, struct amdgpu_gtt_mgr, manager);
+
+       spin_lock(&mgr->lock);
+       if (drm_mm_node_allocated(mm_node))
+               drm_mm_remove_node(mm_node);
+       spin_unlock(&mgr->lock);
+}
+
+/*
+ * amdgpu_ttm_alloc_gart_vram_bo - Bind VRAM pages to GART mapping
+ *
+ * call amdgpu_ttm_alloc_gart_entries to alloc GART dynamically
+ */
+int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo,
+                                 struct drm_mm_node *mm_node,
+                                 u64 *gpu_addr)
+{
+       struct ttm_buffer_object *bo = &abo->tbo;
+       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+       uint64_t flags;
+       int r;
+
+       /* Only for valid VRAM bo resource */
+       if (bo->resource->start == AMDGPU_BO_INVALID_OFFSET)
+               return 0;
+
+       r = amdgpu_ttm_alloc_gart_entries(adev, mm_node,
+                                         amdgpu_bo_ngpu_pages(abo));
+       if (r)
+               return r;
+
+       /* compute PTE flags for this buffer object */
+       flags = amdgpu_ttm_tt_pte_flags(adev, NULL, bo->resource);
+       amdgpu_ttm_gart_bind_gfx9_mqd_vram(adev, bo, mm_node, flags);
+       amdgpu_gart_invalidate_tlb(adev);
+
+       *gpu_addr = mm_node->start << PAGE_SHIFT;
+       return 0;
+}
+
 /*
  * amdgpu_ttm_recover_gart - Rebind GTT pages
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 72488124aa59..cb6123358843 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -185,6 +185,14 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity 
*entity,
                       u64 k_job_id);
 
 int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
+int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo,
+                                 struct drm_mm_node *mm_node,
+                                 u64 *gpu_addr);
+int amdgpu_ttm_alloc_gart_entries(struct amdgpu_device *adev,
+                                 struct drm_mm_node *mm_node,
+                                 u64 num_pages);
+void amdgpu_ttm_free_gart_entries(struct amdgpu_device *adev,
+                                 struct drm_mm_node *mm_node);
 void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
 uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index f78b249e1a41..00e1e5b30a3a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -225,6 +225,7 @@ void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
              struct kfd_mem_obj *mqd_mem_obj)
 {
        if (mqd_mem_obj->mem) {
+               amdgpu_ttm_free_gart_entries(mm->dev->adev, 
&mqd_mem_obj->mm_node);
                amdgpu_amdkfd_free_kernel_mem(mm->dev->adev, &mqd_mem_obj->mem);
                kfree(mqd_mem_obj);
        } else {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 14123e1a9716..5828220056bd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -148,6 +148,15 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_node 
*node,
                        kfree(mqd_mem_obj);
                        return NULL;
                }
+
+               retval = amdgpu_ttm_alloc_gart_vram_bo(mqd_mem_obj->mem,
+                                                      &mqd_mem_obj->mm_node,
+                                                      
&(mqd_mem_obj->gpu_addr));
+               if (retval) {
+                       amdgpu_amdkfd_free_kernel_mem(node->adev, 
&(mqd_mem_obj->mem));
+                       kfree(mqd_mem_obj);
+                       return NULL;
+               }
        } else {
                retval = kfd_gtt_sa_allocate(node, sizeof(struct v9_mqd),
                                &mqd_mem_obj);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 29419b3249cf..fdde907836fb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -252,6 +252,7 @@ struct kfd_mem_obj {
        uint64_t gpu_addr;
        uint32_t *cpu_ptr;
        void *mem;
+       struct drm_mm_node mm_node;
 };
 
 struct kfd_vmid_info {
-- 
2.50.1

Reply via email to