MQD BO on VRAM access via FB aperture is mtype UC uncaching, map to GART as mtype RW caching, to reduce queue switch latency.
Add helper amdgpu_ttm_alloc/free_gart_entries. Add helper amdgpu_ttm_gart_bind_gfx9_mqd_vram to bind VRAM pages to GART mapping. Add GART drm mm_node to kfd mem obj to free the GART entries after MQD is freed. Signed-off-by: Philip Yang <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 103 ++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 8 ++ drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 1 + .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 9 ++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + 5 files changed, 122 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 4f8bc7f35cdc..fc6f4daa9b87 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -880,6 +880,42 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev, } } +static void amdgpu_ttm_gart_bind_gfx9_mqd_vram(struct amdgpu_device *adev, + struct ttm_buffer_object *tbo, + struct drm_mm_node *mm_node, + uint64_t flags) +{ + uint64_t total_pages; + int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp); + uint64_t page_idx, pages_per_xcc; + uint64_t ctrl_flags = flags; + int i; + + total_pages = tbo->resource->size >> PAGE_SHIFT; + + amdgpu_gmc_get_vm_pte(adev, NULL, NULL, AMDGPU_VM_MTYPE_NC, &ctrl_flags); + + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 3)) + amdgpu_gmc_get_vm_pte(adev, NULL, NULL, AMDGPU_VM_MTYPE_RW, &flags); + + pages_per_xcc = total_pages; + do_div(pages_per_xcc, num_xcc); + + for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) { + u64 pa = (tbo->resource->start + page_idx) << PAGE_SHIFT; + u64 start_page = mm_node->start + page_idx; + + pa += adev->vm_manager.vram_base_offset; + amdgpu_gart_map_vram_range(adev, pa, start_page, 1, + flags, NULL); + + amdgpu_gart_map_vram_range(adev, pa + PAGE_SIZE, + start_page + 1, + pages_per_xcc - 1, + ctrl_flags, NULL); + } +} +s static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, struct ttm_buffer_object *tbo, uint64_t flags) @@ -1017,6 +1053,73 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) return 0; } +int amdgpu_ttm_alloc_gart_entries(struct amdgpu_device *adev, + struct drm_mm_node *mm_node, + u64 num_pages) +{ + struct ttm_resource_manager *man; + struct amdgpu_gtt_mgr *mgr; + int r; + + man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); + mgr = container_of(man, struct amdgpu_gtt_mgr, manager); + + spin_lock(&mgr->lock); + r = drm_mm_insert_node_in_range(&mgr->mm, mm_node, num_pages, + 0, 0, 0, + adev->gmc.gart_size >> PAGE_SHIFT, + DRM_MM_INSERT_BEST); + spin_unlock(&mgr->lock); + return r; +} + +void amdgpu_ttm_free_gart_entries(struct amdgpu_device *adev, + struct drm_mm_node *mm_node) +{ + struct ttm_resource_manager *man; + struct amdgpu_gtt_mgr *mgr; + + man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); + mgr = container_of(man, struct amdgpu_gtt_mgr, manager); + + spin_lock(&mgr->lock); + if (drm_mm_node_allocated(mm_node)) + drm_mm_remove_node(mm_node); + spin_unlock(&mgr->lock); +} + +/* + * amdgpu_ttm_alloc_gart_vram_bo - Bind VRAM pages to GART mapping + * + * call amdgpu_ttm_alloc_gart_entries to alloc GART dynamically + */ +int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo, + struct drm_mm_node *mm_node, + u64 *gpu_addr) +{ + struct ttm_buffer_object *bo = &abo->tbo; + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); + uint64_t flags; + int r; + + /* Only for valid VRAM bo resource */ + if (bo->resource->start == AMDGPU_BO_INVALID_OFFSET) + return 0; + + r = amdgpu_ttm_alloc_gart_entries(adev, mm_node, + amdgpu_bo_ngpu_pages(abo)); + if (r) + return r; + + /* compute PTE flags for this buffer object */ + flags = amdgpu_ttm_tt_pte_flags(adev, NULL, bo->resource); + amdgpu_ttm_gart_bind_gfx9_mqd_vram(adev, bo, mm_node, flags); + amdgpu_gart_invalidate_tlb(adev); + + *gpu_addr = mm_node->start << PAGE_SHIFT; + return 0; +} + /* * amdgpu_ttm_recover_gart - Rebind GTT pages * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 72488124aa59..cb6123358843 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -185,6 +185,14 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity, u64 k_job_id); int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); +int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo, + struct drm_mm_node *mm_node, + u64 *gpu_addr); +int amdgpu_ttm_alloc_gart_entries(struct amdgpu_device *adev, + struct drm_mm_node *mm_node, + u64 num_pages); +void amdgpu_ttm_free_gart_entries(struct amdgpu_device *adev, + struct drm_mm_node *mm_node); void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index f78b249e1a41..00e1e5b30a3a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -225,6 +225,7 @@ void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd, struct kfd_mem_obj *mqd_mem_obj) { if (mqd_mem_obj->mem) { + amdgpu_ttm_free_gart_entries(mm->dev->adev, &mqd_mem_obj->mm_node); amdgpu_amdkfd_free_kernel_mem(mm->dev->adev, &mqd_mem_obj->mem); kfree(mqd_mem_obj); } else { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 14123e1a9716..5828220056bd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -148,6 +148,15 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node, kfree(mqd_mem_obj); return NULL; } + + retval = amdgpu_ttm_alloc_gart_vram_bo(mqd_mem_obj->mem, + &mqd_mem_obj->mm_node, + &(mqd_mem_obj->gpu_addr)); + if (retval) { + amdgpu_amdkfd_free_kernel_mem(node->adev, &(mqd_mem_obj->mem)); + kfree(mqd_mem_obj); + return NULL; + } } else { retval = kfd_gtt_sa_allocate(node, sizeof(struct v9_mqd), &mqd_mem_obj); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 29419b3249cf..fdde907836fb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -252,6 +252,7 @@ struct kfd_mem_obj { uint64_t gpu_addr; uint32_t *cpu_ptr; void *mem; + struct drm_mm_node mm_node; }; struct kfd_vmid_info { -- 2.50.1
