MQD BO on VRAM access via FB aperture is mtype UC uncaching, map to GART as mtype RW caching, to reduce queue switch latency.
Add helper amdgpu_gtt_mgr_alloc/free_entries. Add helper amdgpu_ttm_gart_bind_gfx9_mqd_vram to bind VRAM pages to GART entries. Add GART mm_node to kfd mem obj to free the GART entries after MQD mem obj is freed. Signed-off-by: Philip Yang <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 37 ++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 71 +++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 10 +++ drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 1 + .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 9 +++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + 6 files changed, 129 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 895c1e4c6747..c83e1cf1f02d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -321,3 +321,40 @@ void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev) ttm_resource_manager_cleanup(man); ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL); } + +int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_device *adev, + struct drm_mm_node *mm_node, + u64 num_pages, u64 alignment, + unsigned long color, u64 range_start, + u64 range_end, enum drm_mm_insert_mode mode) +{ + struct ttm_resource_manager *man; + struct amdgpu_gtt_mgr *mgr; + int r; + + man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); + mgr = container_of(man, struct amdgpu_gtt_mgr, manager); + + spin_lock(&mgr->lock); + r = drm_mm_insert_node_in_range(&mgr->mm, mm_node, num_pages, + alignment, color, range_start, + adev->gmc.gart_size >> PAGE_SHIFT, + mode); + spin_unlock(&mgr->lock); + return r; +} + +void amdgpu_gtt_mgr_free_entries(struct amdgpu_device *adev, + struct drm_mm_node *mm_node) +{ + struct ttm_resource_manager *man; + struct amdgpu_gtt_mgr *mgr; + + man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); + mgr = container_of(man, struct amdgpu_gtt_mgr, manager); + + spin_lock(&mgr->lock); + if (drm_mm_node_allocated(mm_node)) + drm_mm_remove_node(mm_node); + spin_unlock(&mgr->lock); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 4f8bc7f35cdc..43009d3809b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -880,6 +880,42 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev, } } +static void amdgpu_ttm_gart_bind_gfx9_mqd_vram(struct amdgpu_device *adev, + struct ttm_buffer_object *tbo, + struct drm_mm_node *mm_node, + uint64_t flags) +{ + uint64_t total_pages; + int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp); + uint64_t page_idx, pages_per_xcc; + uint64_t ctrl_flags = flags; + int i; + + total_pages = tbo->resource->size >> PAGE_SHIFT; + + amdgpu_gmc_get_vm_pte(adev, NULL, NULL, AMDGPU_VM_MTYPE_NC, &ctrl_flags); + + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 3)) + amdgpu_gmc_get_vm_pte(adev, NULL, NULL, AMDGPU_VM_MTYPE_RW, &flags); + + pages_per_xcc = total_pages; + do_div(pages_per_xcc, num_xcc); + + for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) { + u64 pa = (tbo->resource->start + page_idx) << PAGE_SHIFT; + u64 start_page = mm_node->start + page_idx; + + pa += adev->vm_manager.vram_base_offset; + amdgpu_gart_map_vram_range(adev, pa, start_page, 1, + flags, NULL); + + amdgpu_gart_map_vram_range(adev, pa + PAGE_SIZE, + start_page + 1, + pages_per_xcc - 1, + ctrl_flags, NULL); + } +} + static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, struct ttm_buffer_object *tbo, uint64_t flags) @@ -1017,6 +1053,41 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) return 0; } +/* + * amdgpu_ttm_alloc_gart_vram_bo - Bind VRAM pages to GART mapping + * + * call amdgpu_ttm_alloc_gart_entries to alloc GART dynamically + */ +int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo, + struct drm_mm_node *mm_node, + u64 *gpu_addr) +{ + struct ttm_buffer_object *bo = &abo->tbo; + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); + uint64_t flags; + int r; + + /* Only for valid VRAM bo resource */ + if (bo->resource->start == AMDGPU_BO_INVALID_OFFSET) + return 0; + + r = amdgpu_gtt_mgr_alloc_entries(adev, mm_node, + amdgpu_bo_ngpu_pages(abo), + 0, 0, 0, + adev->gmc.gart_size >> PAGE_SHIFT, + 0); + if (r) + return r; + + /* compute PTE flags for this buffer object */ + flags = amdgpu_ttm_tt_pte_flags(adev, NULL, bo->resource); + amdgpu_ttm_gart_bind_gfx9_mqd_vram(adev, bo, mm_node, flags); + amdgpu_gart_invalidate_tlb(adev); + + *gpu_addr = mm_node->start << PAGE_SHIFT; + return 0; +} + /* * amdgpu_ttm_recover_gart - Rebind GTT pages * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 72488124aa59..eff536f12cb4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -140,6 +140,13 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev); bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem); void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr); +int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_device *adev, + struct drm_mm_node *mm_node, + u64 num_pages, u64 alignment, + unsigned long color, u64 range_start, + u64 range_end, enum drm_mm_insert_mode mode); +void amdgpu_gtt_mgr_free_entries(struct amdgpu_device *adev, + struct drm_mm_node *mm_node); uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man); @@ -185,6 +192,9 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity, u64 k_job_id); int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); +int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo, + struct drm_mm_node *mm_node, + u64 *gpu_addr); void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index f78b249e1a41..0bf9c35112fc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -225,6 +225,7 @@ void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd, struct kfd_mem_obj *mqd_mem_obj) { if (mqd_mem_obj->mem) { + amdgpu_gtt_mgr_free_entries(mm->dev->adev, &mqd_mem_obj->mm_node); amdgpu_amdkfd_free_kernel_mem(mm->dev->adev, &mqd_mem_obj->mem); kfree(mqd_mem_obj); } else { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 14123e1a9716..5828220056bd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -148,6 +148,15 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node, kfree(mqd_mem_obj); return NULL; } + + retval = amdgpu_ttm_alloc_gart_vram_bo(mqd_mem_obj->mem, + &mqd_mem_obj->mm_node, + &(mqd_mem_obj->gpu_addr)); + if (retval) { + amdgpu_amdkfd_free_kernel_mem(node->adev, &(mqd_mem_obj->mem)); + kfree(mqd_mem_obj); + return NULL; + } } else { retval = kfd_gtt_sa_allocate(node, sizeof(struct v9_mqd), &mqd_mem_obj); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 29419b3249cf..fdde907836fb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -252,6 +252,7 @@ struct kfd_mem_obj { uint64_t gpu_addr; uint32_t *cpu_ptr; void *mem; + struct drm_mm_node mm_node; }; struct kfd_vmid_info { -- 2.50.1
