We need heavy-weight flushes not just for SVM. If this is broken it will affect ROCm either way.

Regards,
  Felix


On 2023-09-07 08:08, Lang Yu wrote:
GC 10.1.3/4 have problems with TLB_FLUSH_HEAVYWEIGHT
which is used by SVM in svm_range_unmap_from_gpus().
This causes problems on GC 10.1.3/4.

Signed-off-by: Lang Yu <lang...@amd.com>
---
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 22 +++++++++++++++++-----
  1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 7d82c7da223a..dd3db3d88d59 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -992,6 +992,22 @@ static const struct dev_pagemap_ops svm_migrate_pgmap_ops 
= {
  /* Each VRAM page uses sizeof(struct page) on system memory */
  #define SVM_HMM_PAGE_STRUCT_SIZE(size) ((size)/PAGE_SIZE * sizeof(struct 
page))
+static inline bool is_zone_device_needed(struct amdgpu_device *adev)
+{
+       /* Page migration works on gfx9 or newer */
+       if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 1))
+               return false;
+
+       if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 3) ||
+           adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 4))
+               return false;
+
+       if (adev->gmc.is_app_apu)
+               return false;
+
+       return true;
+}
+
  int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
  {
        struct amdgpu_kfd_dev *kfddev = &adev->kfd;
@@ -1000,11 +1016,7 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
        unsigned long size;
        void *r;
- /* Page migration works on gfx9 or newer */
-       if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 1))
-               return -EINVAL;
-
-       if (adev->gmc.is_app_apu)
+       if (!is_zone_device_needed(adev))
                return 0;
pgmap = &kfddev->pgmap;

Reply via email to