On 2024-05-03 18:06, Harish Kasiviswanathan wrote:
gpu_id needs to be unique for user space to identify GPUs via KFD
interface. In the current implementation there is a very small
probability of having non unique gpu_ids.

v2: Add check to confirm if gpu_id is unique. If not unique, find one
     Changed commit header to reflect the above

Signed-off-by: Harish Kasiviswanathan <harish.kasiviswanat...@amd.com>
---
  drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 26 ++++++++++++++++++++++-
  1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index b93913934b03..01d4c2e10c6d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1095,6 +1095,8 @@ static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu)
        uint32_t hashout;
        uint32_t buf[8];
        uint64_t local_mem_size;
+       struct kfd_topology_device *dev;
+       bool is_unique;
        int i;
if (!gpu)
@@ -1115,6 +1117,28 @@ static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu)
        for (i = 0, hashout = 0; i < 8; i++)
                hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH);
+ /* hash generated could be non-unique. Check if it is unique.
+        * If not unique increment till unique one is found. In case
+        * of overflow, restart from 1
+       */
+       down_read(&topology_lock);
+       do {
+               is_unique = true;
+               list_for_each_entry(dev, &topology_device_list, list) {
+                       if (dev->gpu && dev->gpu_id == hashout) {
+                               is_unique = false;
+                               break;
+                       }
+               }
+               if (unlikely(!is_unique)) {
+                       hashout = (hashout + 1) &
+                                 ((1 << KFD_GPU_ID_HASH_WIDTH) - 1);
+                       if (!hashout)
+                               hashout = 1;

This doesn't catch the case that hashout was 0 before incrementing it, and was found to be unique.

Regards,
  Felix


+               }
+       } while (!is_unique);
+       up_read(&topology_lock);
+
        return hashout;
  }
  /* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If
@@ -1946,7 +1970,6 @@ int kfd_topology_add_device(struct kfd_node *gpu)
        struct amdgpu_gfx_config *gfx_info = &gpu->adev->gfx.config;
        struct amdgpu_cu_info *cu_info = &gpu->adev->gfx.cu_info;
- gpu_id = kfd_generate_gpu_id(gpu);
        if (gpu->xcp && !gpu->xcp->ddev) {
                dev_warn(gpu->adev->dev,
                         "Won't add GPU to topology since it has no drm node 
assigned.");
@@ -1969,6 +1992,7 @@ int kfd_topology_add_device(struct kfd_node *gpu)
        if (res)
                return res;
+ gpu_id = kfd_generate_gpu_id(gpu);
        dev->gpu_id = gpu_id;
        gpu->id = gpu_id;

Reply via email to