For virtual machine with vGPUs in SRIOV single device mode and XGMI is enabled, XGMI physical node ids may change when waking up from hiberation with different vGPU devices. So update XGMI physical node ids on resume.
Update GPU memory controller configuration on resume if XGMI physical node ids are changed. Signed-off-by: Jiang Liu <ge...@linux.alibaba.com> Signed-off-by: Samuel Zhang <guoqing.zh...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 25 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 3 +-- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 5 +++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d477a901af84..af2c784a6ccd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5040,6 +5040,28 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) return 0; } +static int amdgpu_device_update_xgmi_nodes(struct amdgpu_device *adev) +{ + int r; + unsigned int prev_physical_node_id; + + /* Get xgmi info again for sriov to detect device changes */ + if (amdgpu_sriov_vf(adev) && + !(adev->flags & AMD_IS_APU) && + adev->gmc.xgmi.supported && + !adev->gmc.xgmi.connected_to_cpu) { + prev_physical_node_id = adev->gmc.xgmi.physical_node_id; + r = adev->gfxhub.funcs->get_xgmi_info(adev); + if (r) + return r; + + dev_info(adev->dev, "xgmi node, old id %d, new id %d\n", + prev_physical_node_id, adev->gmc.xgmi.physical_node_id); + } + return 0; +} + + /** * amdgpu_device_resume - initiate device resume * @@ -5059,6 +5081,9 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients) r = amdgpu_virt_request_full_gpu(adev, true); if (r) return r; + r = amdgpu_device_update_xgmi_nodes(adev); + if (r) + return r; } if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index d1fa5e8e3937..a2abddf3c110 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -1298,8 +1298,7 @@ int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev, if (!mem_ranges || !exp_ranges) return -EINVAL; - refresh = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) && - (adev->gmc.reset_flags & AMDGPU_GMC_INIT_RESET_NPS); + refresh = true; ret = amdgpu_discovery_get_nps_info(adev, &nps_type, &ranges, &range_cnt, refresh); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 59385da80185..3c950c75dea1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -2533,6 +2533,11 @@ static int gmc_v9_0_resume(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int r; + r = gmc_v9_0_mc_init(adev); + if (r) + return r; + gmc_v9_0_init_sw_mem_ranges(adev, adev->gmc.mem_partitions); + /* If a reset is done for NPS mode switch, read the memory range * information again. */ -- 2.43.5