From: Mukul Joshi <[email protected]>

For GFX 12.1.0, setup correct MTYPE for a BO depending on
its current location relative to the mapping GPU.

Signed-off-by: Mukul Joshi <[email protected]>
Reviewed-by:  Alex Sierra <[email protected]>
Reviewed-by: Harish Kasiviswanathan <[email protected]>
Signed-off-by: Alex Deucher <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c | 62 ++++++++++++++++++++++++--
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c   | 15 +++++++
 2 files changed, 74 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c
index 4ead488261a68..b9ae8469d5538 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c
@@ -265,6 +265,62 @@ static void gmc_v12_1_get_vm_pde(struct amdgpu_device 
*adev, int level,
        }
 }
 
+#if 0
+static void gmc_v12_1_get_coherence_flags(struct amdgpu_device *adev,
+                                         struct amdgpu_bo *bo,
+                                         uint64_t *flags)
+{
+       struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
+       bool is_vram = bo->tbo.resource &&
+                      bo->tbo.resource->mem_type == TTM_PL_VRAM;
+       bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+                                    AMDGPU_GEM_CREATE_EXT_COHERENT);
+       bool ext_coherent = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENT;
+       uint32_t gc_ip_version = amdgpu_ip_version(adev, GC_HWIP, 0);
+       bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
+       unsigned int mtype, mtype_local;
+       bool snoop = false;
+       bool is_local;
+
+       switch (gc_ip_version) {
+       case IP_VERSION(12, 1, 0):
+               mtype_local = MTYPE_RW;
+               if (amdgpu_mtype_local == 1) {
+                       DRM_INFO_ONCE("Using MTYPE_NC for local memory\n");
+                       mtype_local = MTYPE_NC;
+               } else if (amdgpu_mtype_local == 2) {
+                       DRM_INFO_ONCE("MTYPE_CC not supported, using MTYPE_RW 
instead for local memory\n");
+               } else {
+                       DRM_INFO_ONCE("Using MTYPE_RW for local memory\n");
+               }
+
+               is_local = (is_vram && adev == bo_adev);
+               snoop = true;
+               if (uncached) {
+                       mtype = MTYPE_UC;
+               } else if (ext_coherent) {
+                       mtype = is_local ? mtype_local : MTYPE_UC;
+               } else {
+                       if (is_local)
+                               mtype = mtype_local;
+                       else
+                               mtype = MTYPE_NC;
+               }
+               break;
+       default:
+               if (uncached || coherent)
+                       mtype = MTYPE_UC;
+               else
+                       mtype = MTYPE_NC;
+       }
+
+       if (mtype != MTYPE_NC)
+               *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, mtype);
+
+       *flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
+}
+#endif
+
 static void gmc_v12_1_get_vm_pte(struct amdgpu_device *adev,
                                 struct amdgpu_vm *vm,
                                 struct amdgpu_bo *bo,
@@ -306,11 +362,11 @@ static void gmc_v12_1_get_vm_pte(struct amdgpu_device 
*adev,
                               AMDGPU_GEM_CREATE_UNCACHED))
                *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC);
 
-       if (bo && bo->flags & AMDGPU_GEM_CREATE_UNCACHED)
-               *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC);
-
        if (adev->have_atomics_support)
                *flags |= AMDGPU_PTE_BUS_ATOMICS;
+
+       if (bo && bo->flags & AMDGPU_GEM_CREATE_UNCACHED)
+               *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC);
 }
 
 static const struct amdgpu_gmc_funcs gmc_v12_1_gmc_funcs = {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 903064610c23c..0210819b4c7f2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1287,6 +1287,21 @@ svm_range_get_pte_flags(struct kfd_node *node, struct 
amdgpu_vm *vm,
        case IP_VERSION(12, 0, 1):
                mapping_flags |= AMDGPU_VM_MTYPE_NC;
                break;
+       case IP_VERSION(12, 1, 0):
+               snoop = true;
+               if (domain == SVM_RANGE_VRAM_DOMAIN) {
+                       /* local HBM  */
+                       if (bo_node->adev == node->adev)
+                               mapping_flags |= AMDGPU_VM_MTYPE_RW;
+                       /* Remote GPU memory */
+                       else
+                               mapping_flags |= ext_coherent ? 
AMDGPU_VM_MTYPE_UC :
+                                                               
AMDGPU_VM_MTYPE_NC;
+               /* system memory accessed by the dGPU */
+               } else {
+                       mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : 
AMDGPU_VM_MTYPE_NC;
+               }
+               break;
        default:
                mapping_flags |= coherent ?
                        AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
-- 
2.51.1

Reply via email to