[AMD Official Use Only - Internal Distribution Only]

Hi,  Andrey 
The existing reset function (amdgpu_device_gpu_recover or amd do_asic _reset) 
assumed driver already have  the correct hive info . But in my case, it's  not 
true . The gpus are in a bad state and the XGMI TA  might not functional 
properly , so driver can  not  get the hive and node info when probe the device 
.  It means driver even don't know  the device belongs to which hive on a 
system with multiple hive configuration (ex, 8 gpus in  two hive). The only 
solution I can think of is let driver trigger the reset on all gpus at the same 
time after driver do the minimum initialization on the HW ( bring up the  SMU 
IP)  no matter they belongs to the same hive or not and call 
amdgpu_xgmi_add_device for each device after re-init . 
The 100 ms delay added after the baco reset . I think they can be removed . let 
me verify it. 

Regards
Shaoyun.liu 



-----Original Message-----
From: Grodzovsky, Andrey <andrey.grodzov...@amd.com> 
Sent: Friday, March 5, 2021 2:27 PM
To: Liu, Shaoyun <shaoyun....@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 5/5] drm/amdgpu: Reset the devices in the XGMI hive duirng 
probe



On 2021-03-05 12:52 p.m., shaoyunl wrote:
> In passthrough configuration, hypervisior will trigger the 
> SBR(Secondary bus reset) to the devices without sync to each other. 
> This could cause device hang since for XGMI configuration, all the 
> devices within the hive need to be reset at a limit time slot. This 
> serial of patches try to solve this issue by co-operate with new SMU 
> which will only do minimum house keeping to response the SBR request 
> but don't do the real reset job and leave it to driver. Driver need to 
> do the whole sw init and minimum HW init to bring up the SMU and 
> trigger the reset(possibly BACO) on all the ASICs at the same time
> 
> Signed-off-by: shaoyunl <shaoyun....@amd.com>
> Change-Id: I34e838e611b7623c7ad824704c7ce350808014fc
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  13 +++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 102 +++++++++++++++------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  71 ++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h    |   1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   |   8 +-
>   5 files changed, 165 insertions(+), 30 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index d46d3794699e..5602c6edee97 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -125,6 +125,10 @@ struct amdgpu_mgpu_info
>       uint32_t                        num_gpu;
>       uint32_t                        num_dgpu;
>       uint32_t                        num_apu;
> +
> +     /* delayed reset_func for XGMI configuration if necessary */
> +     struct delayed_work             delayed_reset_work;
> +     bool                            pending_reset;
>   };
>   
>   #define AMDGPU_MAX_TIMEOUT_PARAM_LENGTH     256
> @@ -1124,6 +1128,15 @@ void amdgpu_device_indirect_wreg64(struct 
> amdgpu_device *adev,
>   bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
>   bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
>   
> +int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
> +                               struct amdgpu_job *job,
> +                               bool *need_full_reset_arg);
> +
> +int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
> +                       struct list_head *device_list_handle,
> +                       bool *need_full_reset_arg,
> +                       bool skip_hw_reset);
> +
>   int emu_soc_asic_init(struct amdgpu_device *adev);
>   
>   /*
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 3c35b0c1e710..5b520f70e660 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -1220,6 +1220,10 @@ bool amdgpu_device_need_post(struct amdgpu_device 
> *adev)
>               }
>       }
>   
> +     /* Don't post if we need to reset whole hive on init */
> +     if (adev->gmc.xgmi.pending_reset)
> +             return false;
> +
>       if (adev->has_hw_reset) {
>               adev->has_hw_reset = false;
>               return true;
> @@ -2149,6 +2153,9 @@ static int amdgpu_device_fw_loading(struct 
> amdgpu_device *adev)
>                       if (adev->ip_blocks[i].version->type != 
> AMD_IP_BLOCK_TYPE_PSP)
>                               continue;
>   
> +                     if (!adev->ip_blocks[i].status.sw)
> +                             continue;
> +
>                       /* no need to do the fw loading again if already done*/
>                       if (adev->ip_blocks[i].status.hw == true)
>                               break;
> @@ -2289,7 +2296,10 @@ static int amdgpu_device_ip_init(struct 
> amdgpu_device *adev)
>   
>       if (adev->gmc.xgmi.num_physical_nodes > 1)
>               amdgpu_xgmi_add_device(adev);
> -     amdgpu_amdkfd_device_init(adev);
> +
> +     /* Don't init kfd if whole hive need to be reset during init */
> +     if (!adev->gmc.xgmi.pending_reset)
> +             amdgpu_amdkfd_device_init(adev);
>   
>       amdgpu_fru_get_product_info(adev);
>   
> @@ -2734,6 +2744,16 @@ static int amdgpu_device_ip_suspend_phase2(struct 
> amdgpu_device *adev)
>                       adev->ip_blocks[i].status.hw = false;
>                       continue;
>               }
> +
> +             /* skip unnecessary suspend if we do not initialize them yet */
> +             if (adev->gmc.xgmi.pending_reset &&
> +                 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC 
> ||
> +                   adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC 
> ||
> +                   adev->ip_blocks[i].version->type == 
> AMD_IP_BLOCK_TYPE_COMMON ||
> +                   adev->ip_blocks[i].version->type == 
> AMD_IP_BLOCK_TYPE_IH)) {
> +                     adev->ip_blocks[i].status.hw = false;
> +                     continue;
> +             }
>               /* XXX handle errors */
>               r = adev->ip_blocks[i].version->funcs->suspend(adev);
>               /* XXX handle errors */
> @@ -3407,10 +3427,28 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>        *  E.g., driver was not cleanly unloaded previously, etc.
>        */
>       if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
> -             r = amdgpu_asic_reset(adev);
> -             if (r) {
> -                     dev_err(adev->dev, "asic reset on init failed\n");
> -                     goto failed;
> +             if (adev->gmc.xgmi.num_physical_nodes) {
> +                     dev_info(adev->dev, "Pending hive reset.\n");
> +                     adev->gmc.xgmi.pending_reset = true;
> +                     /* Only need to init necessary block for SMU to handle 
> the reset */
> +                     for (i = 0; i < adev->num_ip_blocks; i++) {
> +                             if (!adev->ip_blocks[i].status.valid)
> +                                     continue;
> +                             if (!(adev->ip_blocks[i].version->type == 
> AMD_IP_BLOCK_TYPE_GMC ||
> +                                   adev->ip_blocks[i].version->type == 
> AMD_IP_BLOCK_TYPE_COMMON ||
> +                                   adev->ip_blocks[i].version->type == 
> AMD_IP_BLOCK_TYPE_IH ||
> +                                   adev->ip_blocks[i].version->type == 
> AMD_IP_BLOCK_TYPE_SMC)) {
> +                                     DRM_DEBUG("IP %s disabed for 
> hw_init.\n",
> +                                             
> adev->ip_blocks[i].version->funcs->name);
> +                                     adev->ip_blocks[i].status.hw = true;
> +                             }
> +                     }
> +             } else {
> +                     r = amdgpu_asic_reset(adev);
> +                     if (r) {
> +                             dev_err(adev->dev, "asic reset on init 
> failed\n");
> +                             goto failed;
> +                     }
>               }
>       }
>   
> @@ -3541,19 +3579,19 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>       /* enable clockgating, etc. after ib tests, etc. since some blocks 
> require
>        * explicit gating rather than handling it automatically.
>        */
> -     r = amdgpu_device_ip_late_init(adev);
> -     if (r) {
> -             dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
> -             amdgpu_vf_error_put(adev, 
> AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
> -             goto failed;
> +     if (!adev->gmc.xgmi.pending_reset) {
> +             r = amdgpu_device_ip_late_init(adev);
> +             if (r) {
> +                     dev_err(adev->dev, "amdgpu_device_ip_late_init 
> failed\n");
> +                     amdgpu_vf_error_put(adev, 
> AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
> +                     goto failed;
> +             }
> +             /* must succeed. */
> +             amdgpu_ras_resume(adev);
> +             queue_delayed_work(system_wq, &adev->delayed_init_work,
> +                                msecs_to_jiffies(AMDGPU_RESUME_MS));
>       }
>   
> -     /* must succeed. */
> -     amdgpu_ras_resume(adev);
> -
> -     queue_delayed_work(system_wq, &adev->delayed_init_work,
> -                        msecs_to_jiffies(AMDGPU_RESUME_MS));
> -
>       if (amdgpu_sriov_vf(adev))
>               flush_delayed_work(&adev->delayed_init_work);
>   
> @@ -3570,6 +3608,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>       if (amdgpu_device_cache_pci_state(adev->pdev))
>               pci_restore_state(pdev);
>   
> +     if (adev->gmc.xgmi.pending_reset)
> +             queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
> +                                msecs_to_jiffies(AMDGPU_RESUME_MS));
> +
>       return 0;
>   
>   failed:
> @@ -4240,14 +4282,16 @@ bool amdgpu_device_should_recover_gpu(struct 
> amdgpu_device *adev)
>   }
>   
>   
> -static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
> -                                     struct amdgpu_job *job,
> -                                     bool *need_full_reset_arg)
> +int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
> +                               struct amdgpu_job *job,
> +                               bool *need_full_reset_arg)
>   {
>       int i, r = 0;
>       bool need_full_reset  = *need_full_reset_arg;
>   
> -     amdgpu_debugfs_wait_dump(adev);
> +     /* no need to dump if device is not in good state during probe period */
> +     if (!adev->gmc.xgmi.pending_reset)
> +             amdgpu_debugfs_wait_dump(adev);
>   
>       if (amdgpu_sriov_vf(adev)) {
>               /* stop the data exchange thread */ @@ -4293,10 +4337,10 @@ 
> static 
> int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
>       return r;
>   }
>   
> -static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
> -                            struct list_head *device_list_handle,
> -                            bool *need_full_reset_arg,
> -                            bool skip_hw_reset)
> +int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
> +                       struct list_head *device_list_handle,
> +                       bool *need_full_reset_arg,
> +                       bool skip_hw_reset)
>   {
>       struct amdgpu_device *tmp_adev = NULL;
>       bool need_full_reset = *need_full_reset_arg, vram_lost = false; @@ 
> -4310,6 +4354,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info 
> *hive,
>               list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
>                       /* For XGMI run all resets in parallel to speed up the 
> process */
>                       if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
> +                             tmp_adev->gmc.xgmi.pending_reset = false;
>                               if (!queue_work(system_unbound_wq, 
> &tmp_adev->xgmi_reset_work))
>                                       r = -EALREADY;
>                       } else
> @@ -4348,10 +4393,10 @@ static int amdgpu_do_asic_reset(struct 
> amdgpu_hive_info *hive,
>       list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
>               if (need_full_reset) {
>                       /* post card */
> -                     if (amdgpu_device_asic_init(tmp_adev))
> +                     r = amdgpu_device_asic_init(tmp_adev);
> +                     if (r) {
>                               dev_warn(tmp_adev->dev, "asic atom init 
> failed!");
> -
> -                     if (!r) {
> +                     } else {
>                               dev_info(tmp_adev->dev, "GPU reset succeeded, 
> trying to resume\n");
>                               r = amdgpu_device_ip_resume_phase1(tmp_adev);
>                               if (r)
> @@ -4384,6 +4429,9 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info 
> *hive,
>                                */
>                               amdgpu_register_gpu_instance(tmp_adev);
>   
> +                             if (!hive && 
> tmp_adev->gmc.xgmi.num_physical_nodes > 1)
> +                                     amdgpu_xgmi_add_device(tmp_adev);
> +
>                               r = amdgpu_device_ip_late_init(tmp_adev);
>                               if (r)
>                                       goto out;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 253c59e0a100..aebe4bc561ee 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -44,6 +44,7 @@
>   #include "amdgpu_amdkfd.h"
>   
>   #include "amdgpu_ras.h"
> +#include "amdgpu_xgmi.h"
>   
>   /*
>    * KMS wrapper.
> @@ -167,8 +168,13 @@ uint amdgpu_freesync_vid_mode;
>   int amdgpu_reset_method = -1; /* auto */
>   int amdgpu_num_kcq = -1;
>   
> +static void amdgpu_drv_delayed_reset_work_handler(struct work_struct 
> +*work);
> +
>   struct amdgpu_mgpu_info mgpu_info = {
>       .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
> +     .delayed_reset_work = __DELAYED_WORK_INITIALIZER(
> +                     mgpu_info.delayed_reset_work,
> +                     amdgpu_drv_delayed_reset_work_handler, 0),
>   };
>   int amdgpu_ras_enable = -1;
>   uint amdgpu_ras_mask = 0xffffffff;
> @@ -1297,6 +1303,71 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)
>       adev->mp1_state = PP_MP1_STATE_NONE;
>   }
>   
> +/**
> + * amdgpu_drv_delayed_reset_work_handler - work handler for reset
> + *
> + * @work: work_struct.
> + */
> +static void amdgpu_drv_delayed_reset_work_handler(struct work_struct 
> +*work) {
> +     struct list_head device_list;
> +     struct amdgpu_device *adev;
> +     int i, r;
> +     bool need_full_reset = true;
> +
> +     mutex_lock(&mgpu_info.mutex);
> +     if (mgpu_info.pending_reset == true) {
> +             mutex_unlock(&mgpu_info.mutex);
> +             return;
> +     }
> +     mgpu_info.pending_reset = true;
> +     mutex_unlock(&mgpu_info.mutex);
> +
> +     for (i = 0; i < mgpu_info.num_dgpu; i++) {
> +             adev = mgpu_info.gpu_ins[i].adev;
> +             r = amdgpu_device_pre_asic_reset(adev, NULL, &need_full_reset);

Why amdgpu_device_pre_asic_reset is needed ?

> +             if (r) {
> +                     dev_err(adev->dev, "GPU pre asic reset failed with err, 
> %d for drm dev, %s ",
> +                             r, adev_to_drm(adev)->unique);
> +             }
> +             if (!queue_work(system_unbound_wq, &adev->xgmi_reset_work))
> +                     r = -EALREADY;

amdgpu_do_asic_reset bellow will already schedule xgmi_reset_work for this 
device, what you could do instead is call amdgpu_do_asic_reset for each member 
of the hive and because there is a task barrier in 
amdgpu_device_xgmi_reset_func, it will synchronize all the resets to same point 
in time already.

> +     }
> +     msleep(100);

What's the 100ms is wiating for ?

> +     for (i = 0; i < mgpu_info.num_dgpu; i++) {
> +             adev = mgpu_info.gpu_ins[i].adev;
> +             adev->gmc.xgmi.pending_reset = false;
> +             flush_work(&adev->xgmi_reset_work);
> +     }
> +
> +     msleep(100);

Same as above

> +     /* reset function will rebuild the xgmi hive info , clear it now */
> +     for (i = 0; i < mgpu_info.num_dgpu; i++)
> +             amdgpu_xgmi_remove_device(mgpu_info.gpu_ins[i].adev);
> +
> +     INIT_LIST_HEAD(&device_list);
> +
> +     for (i = 0; i < mgpu_info.num_dgpu; i++)
> +             list_add_tail(&mgpu_info.gpu_ins[i].adev->reset_list, 
> +&device_list);
> +
> +     /* unregister the GPU first, reset function will add them back */
> +     list_for_each_entry(adev, &device_list, reset_list)
> +             amdgpu_unregister_gpu_instance(adev);
> +
> +     r = amdgpu_do_asic_reset(NULL, &device_list, &need_full_reset, true);
> +     if (r) {
> +             DRM_ERROR("reinit gpus failure");
> +             return;
> +     }
> +     for (i = 0; i < mgpu_info.num_dgpu; i++) {
> +             adev = mgpu_info.gpu_ins[i].adev;
> +             if (!adev->kfd.init_complete)
> +                     amdgpu_amdkfd_device_init(adev);
> +             amdgpu_ttm_set_buffer_funcs_status(adev, true);
> +     }
> +     return;
> +}
> +
>   static int amdgpu_pmops_suspend(struct device *dev)
>   {
>       struct drm_device *drm_dev = dev_get_drvdata(dev); diff --git 
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index aa0c83776ce0..8c71d84a2fbe 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -149,6 +149,7 @@ struct amdgpu_xgmi {
>       struct list_head head;
>       bool supported;
>       struct ras_common_if *ras_if;
> +     bool pending_reset;
>   };
>   
>   struct amdgpu_gmc {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> index 659b385b27b5..b459ef755ea9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> @@ -492,7 +492,8 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
>       if (!adev->gmc.xgmi.supported)
>               return 0;
>   
> -     if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
> +     if (!adev->gmc.xgmi.pending_reset &&
> +         amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
>               ret = psp_xgmi_initialize(&adev->psp);
>               if (ret) {
>                       dev_err(adev->dev,
> @@ -538,7 +539,8 @@ int amdgpu_xgmi_add_device(struct amdgpu_device 
> *adev)
>   
>       task_barrier_add_task(&hive->tb);
>   
> -     if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
> +     if (!adev->gmc.xgmi.pending_reset &&
> +         amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
>               list_for_each_entry(tmp_adev, &hive->device_list, 
> gmc.xgmi.head) {
>                       /* update node list for other device in the hive */
>                       if (tmp_adev != adev) {
> @@ -567,7 +569,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
>               }
>       }
>   
> -     if (!ret)
> +     if (!ret && !adev->gmc.xgmi.pending_reset)
>               ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive);
>   
>   exit_unlock:
> 
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to