[PATCH] drm/amdgpu: skip reset other device in the same hive if it's sriov vf

Zhigang Luo Fri, 03 Dec 2021 14:21:22 -0800

For sriov vf hang, vf flr will be triggered. Hive reset is not needed.

Signed-off-by: Zhigang Luo <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)


diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 3c5afa45173c..474f8ea58aa5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4746,7 +4746,7 @@ static int amdgpu_device_lock_hive_adev(struct 
amdgpu_device *adev, struct amdgp
 {
        struct amdgpu_device *tmp_adev = NULL;
 
-       if (adev->gmc.xgmi.num_physical_nodes > 1) {
+       if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
                if (!hive) {
                        dev_err(adev->dev, "Hive is NULL while device has 
multiple xgmi nodes");
                        return -ENODEV;
@@ -4958,7 +4958,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
         * We always reset all schedulers for device and all devices for XGMI
         * hive so that should take care of them too.
         */
-       hive = amdgpu_get_xgmi_hive(adev);
+       if (!amdgpu_sriov_vf(adev))
+               hive = amdgpu_get_xgmi_hive(adev);
        if (hive) {
                if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
                        DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as 
another already in progress",
@@ -4999,7 +5000,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
         * to put adev in the 1st position.
         */
        INIT_LIST_HEAD(&device_list);
-       if (adev->gmc.xgmi.num_physical_nodes > 1) {
+       if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
                list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
                        list_add_tail(&tmp_adev->reset_list, &device_list);
                if (!list_is_first(&adev->reset_list, &device_list))
-- 
2.17.1

[PATCH] drm/amdgpu: skip reset other device in the same hive if it's sriov vf

Reply via email to