Hi Alan,

Based on your older patches, I understand that this patch is required because 
host (gim) driver assuemes guest driver is available within 3s. I am not sure 
how the 3s timeout was decided. I feel better approach should be a more robust 
handshake between guest and host driver. You might be able to temporarily get 
away by rearranging the initialization code but that could break easily if some 
other change in future causes a delay.

Best Regards,
Harish


On 2025-11-17 01:38, chong li wrote:
> [Why]
> function "devm_memremap_pages" in function "kgd2kfd_init_zone_device",
> sometimes cost too much time.
> 
> [How]
> move the function "kgd2kfd_init_zone_device"
> after release full gpu access(amdgpu_virt_release_full_gpu).
> 
> v2:
> improve the coding style.
> 
> Signed-off-by: chong li <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  8 +++++++-
>  drivers/gpu/drm/amd/amdkfd/kfd_topology.c  | 23 ++++++++++++++++++++++
>  drivers/gpu/drm/amd/amdkfd/kfd_topology.h  |  6 ++++++
>  4 files changed, 37 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 40c46e6c8898..6d204ba2c267 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -37,7 +37,7 @@
>  #include "amdgpu_sync.h"
>  #include "amdgpu_vm.h"
>  #include "amdgpu_xcp.h"
> -
> +#include "kfd_topology.h"
>  extern uint64_t amdgpu_amdkfd_total_mem_size;
>  
>  enum TLB_FLUSH_TYPE {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 0b40ddcb8ba1..b4e1f258119c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3333,7 +3333,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device 
> *adev)
>  
>       /* Don't init kfd if whole hive need to be reset during init */
>       if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
> -             kgd2kfd_init_zone_device(adev);
>               amdgpu_amdkfd_device_init(adev);
>       }
>  
> @@ -4931,6 +4930,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>  
>       if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
>               amdgpu_xgmi_reset_on_init(adev);
> +
> +     /* Don't init kfd if whole hive need to be reset during init */
> +     if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
> +             kgd2kfd_init_zone_device(adev);
> +             kfd_update_svm_support_properties(adev);
> +     }
> +
>       /*
>        * Place those sysfs registering after `late_init`. As some of those
>        * operations performed in `late_init` might affect the sysfs
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> index 8644039777b8..8511b00a7463 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> @@ -2475,3 +2475,26 @@ int kfd_debugfs_rls_by_device(struct seq_file *m, void 
> *data)
>  }
>  
>  #endif
> +
> +void kfd_update_svm_support_properties(struct amdgpu_device *adev)
> +{
> +     struct kfd_topology_device *dev;
> +     int ret;
> +
> +     down_write(&topology_lock);
> +     list_for_each_entry(dev, &topology_device_list, list) {
> +             if (!dev->gpu || dev->gpu->adev != adev)
> +                     continue;
> +
> +             if (KFD_IS_SVM_API_SUPPORTED(adev)) {
> +                     dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED;
> +                     ret = kfd_topology_update_sysfs();
> +                     if (!ret)
> +                             sys_props.generation_count++;
> +                     else
> +                             dev_err(adev->dev, "Failed to update SVM 
> support properties. ret=%d\n", ret);
> +             } else
> +                     dev->node_props.capability &= ~HSA_CAP_SVMAPI_SUPPORTED;
> +     }
> +     up_write(&topology_lock);
> +}
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h 
> b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
> index ab7a3bf1bdef..129b447fcf84 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
> @@ -202,4 +202,10 @@ struct kfd_topology_device *kfd_create_topology_device(
>               struct list_head *device_list);
>  void kfd_release_topology_device_list(struct list_head *device_list);
>  
> +#if IS_ENABLED(CONFIG_HSA_AMD)
> +void kfd_update_svm_support_properties(struct amdgpu_device *adev);
> +#else
> +static inline void kfd_update_svm_support_properties(struct amdgpu_device 
> *adev) {}
> +#endif
> +
>  #endif /* __KFD_TOPOLOGY_H__ */

Reply via email to