On 12/15/2025 10:56 AM, Philip Yang wrote:
MQD BO on VRAM access via FB aperture is mtype UC uncaching, map
to GART as mtype RW caching, to reduce queue switch latency
Add GART mm_node to kfd mem obj to free the GART entries after
MQD mem obj is freed.
Use resource cursor to handle VRAM resource which maybe on multiple
blocks and use cursor_gart to handle GART entries.
Signed-off-by: Philip Yang<[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 94 +++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 4 +-
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 2 +
.../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 9 ++
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 +
5 files changed, 109 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 4f8bc7f35cdc..d7bf96a7b6b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -880,6 +880,67 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct
amdgpu_device *adev,
}
}
+/*
+ * Same function and MQD description from amdgpu_ttm_gart_bind_gfx9_mqd,
+ * except this is for MQD on VRAM BO and use dynamic alloc GART entries.
+ */
+static void amdgpu_ttm_gart_bind_gfx9_mqd_vram(struct amdgpu_device *adev,
+ struct ttm_buffer_object *tbo,
+ struct drm_mm_node *mm_node,
+ uint64_t flags)
+{
+ uint64_t total_pages;
+ int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
+ uint64_t page_idx, pages_per_xcc;
+ struct amdgpu_res_cursor cursor_gart;
+ struct amdgpu_res_cursor cursor;
+ uint64_t ctrl_flags = flags;
+ int i;
+
+ total_pages = tbo->base.size >> PAGE_SHIFT;
+
+ amdgpu_gmc_get_vm_pte(adev, NULL, NULL, AMDGPU_VM_MTYPE_NC,
&ctrl_flags);
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 3))
+ amdgpu_gmc_get_vm_pte(adev, NULL, NULL, AMDGPU_VM_MTYPE_RW,
&flags);
+
+ pages_per_xcc = total_pages;
+ do_div(pages_per_xcc, num_xcc);
+
+ amdgpu_res_first(NULL, mm_node->start, total_pages, &cursor_gart);
no need use cursor_gar. mm_node->start + n indicates where to update
gart page table.
+ amdgpu_res_first(tbo->resource, 0, tbo->resource->size, &cursor);
+
+ for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
+ u64 start_page;
+ u64 npages, n;
+ u64 pa;
+
+ start_page = cursor_gart.start;
+ pa = cursor.start + adev->vm_manager.vram_base_offset;
+ n = 1;
+ amdgpu_gart_map_vram_range(adev, pa, start_page, n,
+ flags, NULL);
+
+ npages = pages_per_xcc - 1;
+ while (npages) {
+ amdgpu_res_next(&cursor_gart, n);
+ amdgpu_res_next(&cursor, n * PAGE_SIZE);
+
+ start_page = cursor_gart.start;
+ pa = cursor.start + adev->vm_manager.vram_base_offset;
+ n = min3(cursor.size / PAGE_SIZE, cursor_gart.size,
npages);
+
+ amdgpu_gart_map_vram_range(adev, pa, start_page, n,
+ ctrl_flags, NULL);
+
+ npages -= n;
+ }
+
+ amdgpu_res_next(&cursor_gart, n);
+ amdgpu_res_next(&cursor, n * PAGE_SIZE);
+ }
+}
+
static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
struct ttm_buffer_object *tbo,
uint64_t flags)
@@ -1017,6 +1078,39 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
return 0;
}
+/*
+ * amdgpu_ttm_alloc_gart_vram_bo - Bind VRAM pages to GART mapping
+ *
+ * call amdgpu_ttm_alloc_gart_entries to alloc GART dynamically
+ */
+int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo,
+ struct drm_mm_node *mm_node,
+ u64 *gpu_addr)
+{
+ struct ttm_buffer_object *bo = &abo->tbo;
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+ uint64_t flags;
+ int r;
+
+ /* Only for valid VRAM bo resource */
+ if (amdgpu_mem_type_to_domain(bo->resource->mem_type) !=
+ AMDGPU_GEM_DOMAIN_VRAM)
+ return 0;
+
+ r = amdgpu_gtt_mgr_alloc_entries(&adev->mman.gtt_mgr, mm_node,
+ amdgpu_bo_ngpu_pages(abo), 0);
+ if (r)
+ return r;
+
+ /* compute PTE flags for this buffer object */
+ flags = amdgpu_ttm_tt_pte_flags(adev, NULL, bo->resource);
+ amdgpu_ttm_gart_bind_gfx9_mqd_vram(adev, bo, mm_node, flags);
+ amdgpu_gart_invalidate_tlb(adev);
+
+ *gpu_addr = mm_node->start << PAGE_SHIFT;
+ return 0;
+}
+
/*
* amdgpu_ttm_recover_gart - Rebind GTT pages
*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 25640bed7dc9..9f07856433fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -140,7 +140,6 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev);
bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem);
void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr);
-
int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr,
struct drm_mm_node *node,
u64 num_pages,
@@ -191,6 +190,9 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity
*entity,
u64 k_job_id);
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
+int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo,
+ struct drm_mm_node *mm_node,
+ u64 *gpu_addr);
void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index f78b249e1a41..edb72f4ef82d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -225,6 +225,8 @@ void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj)
{
if (mqd_mem_obj->mem) {
+ amdgpu_gtt_mgr_free_entries(&mm->dev->adev->mman.gtt_mgr,
+ &mqd_mem_obj->mm_node);
amdgpu_amdkfd_free_kernel_mem(mm->dev->adev, &mqd_mem_obj->mem);
kfree(mqd_mem_obj);
} else {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 14123e1a9716..5828220056bd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -148,6 +148,15 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_node
*node,
kfree(mqd_mem_obj);
return NULL;
}
+
+ retval = amdgpu_ttm_alloc_gart_vram_bo(mqd_mem_obj->mem,
+ &mqd_mem_obj->mm_node,
+
&(mqd_mem_obj->gpu_addr));
Here you create new drm_mm_node for gart page table entries. Before that
amdgpu_amdkfd_alloc_kernel_mem also creates gart page table entries and
drm_mm_node. Is there duplication or how do you handle the
entries/drm_mm_node from amdgpu_amdkfd_alloc_kernel_mem?
Regards
Xiaogang
+ if (retval) {
+ amdgpu_amdkfd_free_kernel_mem(node->adev,
&(mqd_mem_obj->mem));
+ kfree(mqd_mem_obj);
+ return NULL;
+ }
} else {
retval = kfd_gtt_sa_allocate(node, sizeof(struct v9_mqd),
&mqd_mem_obj);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 06cd675c9e74..55738b30c2ec 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -253,6 +253,7 @@ struct kfd_mem_obj {
uint64_t gpu_addr;
uint32_t *cpu_ptr;
void *mem;
+ struct drm_mm_node mm_node;
};
struct kfd_vmid_info {