From: Mukul Joshi <[email protected]> For GFX 12.1.0, setup correct MTYPE for a BO depending on its current location relative to the mapping GPU.
Signed-off-by: Mukul Joshi <[email protected]> Reviewed-by: Alex Sierra <[email protected]> Reviewed-by: Harish Kasiviswanathan <[email protected]> Signed-off-by: Alex Deucher <[email protected]> --- drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c | 62 ++++++++++++++++++++++++-- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 15 +++++++ 2 files changed, 74 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c index 4ead488261a68..b9ae8469d5538 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c @@ -265,6 +265,62 @@ static void gmc_v12_1_get_vm_pde(struct amdgpu_device *adev, int level, } } +#if 0 +static void gmc_v12_1_get_coherence_flags(struct amdgpu_device *adev, + struct amdgpu_bo *bo, + uint64_t *flags) +{ + struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); + bool is_vram = bo->tbo.resource && + bo->tbo.resource->mem_type == TTM_PL_VRAM; + bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT | + AMDGPU_GEM_CREATE_EXT_COHERENT); + bool ext_coherent = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENT; + uint32_t gc_ip_version = amdgpu_ip_version(adev, GC_HWIP, 0); + bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; + unsigned int mtype, mtype_local; + bool snoop = false; + bool is_local; + + switch (gc_ip_version) { + case IP_VERSION(12, 1, 0): + mtype_local = MTYPE_RW; + if (amdgpu_mtype_local == 1) { + DRM_INFO_ONCE("Using MTYPE_NC for local memory\n"); + mtype_local = MTYPE_NC; + } else if (amdgpu_mtype_local == 2) { + DRM_INFO_ONCE("MTYPE_CC not supported, using MTYPE_RW instead for local memory\n"); + } else { + DRM_INFO_ONCE("Using MTYPE_RW for local memory\n"); + } + + is_local = (is_vram && adev == bo_adev); + snoop = true; + if (uncached) { + mtype = MTYPE_UC; + } else if (ext_coherent) { + mtype = is_local ? mtype_local : MTYPE_UC; + } else { + if (is_local) + mtype = mtype_local; + else + mtype = MTYPE_NC; + } + break; + default: + if (uncached || coherent) + mtype = MTYPE_UC; + else + mtype = MTYPE_NC; + } + + if (mtype != MTYPE_NC) + *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, mtype); + + *flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; +} +#endif + static void gmc_v12_1_get_vm_pte(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo *bo, @@ -306,11 +362,11 @@ static void gmc_v12_1_get_vm_pte(struct amdgpu_device *adev, AMDGPU_GEM_CREATE_UNCACHED)) *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC); - if (bo && bo->flags & AMDGPU_GEM_CREATE_UNCACHED) - *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC); - if (adev->have_atomics_support) *flags |= AMDGPU_PTE_BUS_ATOMICS; + + if (bo && bo->flags & AMDGPU_GEM_CREATE_UNCACHED) + *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC); } static const struct amdgpu_gmc_funcs gmc_v12_1_gmc_funcs = { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 903064610c23c..0210819b4c7f2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1287,6 +1287,21 @@ svm_range_get_pte_flags(struct kfd_node *node, struct amdgpu_vm *vm, case IP_VERSION(12, 0, 1): mapping_flags |= AMDGPU_VM_MTYPE_NC; break; + case IP_VERSION(12, 1, 0): + snoop = true; + if (domain == SVM_RANGE_VRAM_DOMAIN) { + /* local HBM */ + if (bo_node->adev == node->adev) + mapping_flags |= AMDGPU_VM_MTYPE_RW; + /* Remote GPU memory */ + else + mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : + AMDGPU_VM_MTYPE_NC; + /* system memory accessed by the dGPU */ + } else { + mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + } + break; default: mapping_flags |= coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; -- 2.51.1
