On 2/14/2025 11:05 PM, Alex Deucher wrote:
Re-send the mes message on resume to make sure the
mes state is up to date.

Fixes: 8521e3c5f058 ("drm/amd/amdgpu: limit single process inside MES")
Signed-off-by: Alex Deucher<alexander.deuc...@amd.com>
Cc: Shaoyun Liu<shaoyun....@amd.com>
Cc: Srinivasan Shanmugam<srinivasan.shanmu...@amd.com>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 13 ++++---------
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 20 +++++++++++++++++++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h |  2 +-
  drivers/gpu/drm/amd/amdgpu/mes_v11_0.c  |  4 ++++
  drivers/gpu/drm/amd/amdgpu/mes_v12_0.c  |  4 ++++
  5 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index b9bd6654f3172..a194bf3347cbc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -1665,24 +1665,19 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct 
device *dev,
        }
mutex_lock(&adev->enforce_isolation_mutex);
-
        for (i = 0; i < num_partitions; i++) {
-               if (adev->enforce_isolation[i] && !partition_values[i]) {
+               if (adev->enforce_isolation[i] && !partition_values[i])
                        /* Going from enabled to disabled */
                        amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i));
-                       if (adev->enable_mes && adev->gfx.enable_cleaner_shader)
-                               amdgpu_mes_set_enforce_isolation(adev, i, 
false);
-               } else if (!adev->enforce_isolation[i] && partition_values[i]) {
+               else if (!adev->enforce_isolation[i] && partition_values[i])
                        /* Going from disabled to enabled */
                        amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i));
-                       if (adev->enable_mes && adev->gfx.enable_cleaner_shader)
-                               amdgpu_mes_set_enforce_isolation(adev, i, true);
-               }
                adev->enforce_isolation[i] = partition_values[i];
        }
-
        mutex_unlock(&adev->enforce_isolation_mutex);
+ amdgpu_mes_update_enforce_isolation(adev);
+
        return count;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index cee38bb6cfaf2..ca076306adba4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -1508,7 +1508,8 @@ bool amdgpu_mes_suspend_resume_all_supported(struct 
amdgpu_device *adev)
  }
/* Fix me -- node_id is used to identify the correct MES instances in the future */
-int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t 
node_id, bool enable)
+static int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev,
+                                           uint32_t node_id, bool enable)
  {
        struct mes_misc_op_input op_input = {0};
        int r;
@@ -1530,6 +1531,23 @@ int amdgpu_mes_set_enforce_isolation(struct 
amdgpu_device *adev, uint32_t node_i
        return r;
  }
+int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev)
+{
+       int i, r = 0;
+
+       if (adev->enable_mes && adev->gfx.enable_cleaner_shader) {
+               mutex_lock(&adev->enforce_isolation_mutex);
+               for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); 
i++) {
+                       if (adev->enforce_isolation[i])
+                               r |= amdgpu_mes_set_enforce_isolation(adev, i, 
true);
+                       else
+                               r |= amdgpu_mes_set_enforce_isolation(adev, i, 
false);
+               }
+               mutex_unlock(&adev->enforce_isolation_mutex);
+       }
+       return r;
+}
+
  #if defined(CONFIG_DEBUG_FS)
static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 6a792ffc81e33..3a65c3788956d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -532,6 +532,6 @@ static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes)
bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev); -int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t node_id, bool enable);
+int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev);
#endif /* __AMDGPU_MES_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 530371e6a7aee..fc7b17463cb4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -1660,6 +1660,10 @@ static int mes_v11_0_hw_init(struct amdgpu_ip_block 
*ip_block)
                goto failure;
        }
+ r = amdgpu_mes_update_enforce_isolation(adev);
+       if (r)
+               goto failure;
+

Hi Alex,

Should this also be moved to mes_v11_0_hw_init. Please let me know your thoughts?

  out:
        /*
         * Disable KIQ ring usage from the driver once MES is enabled.
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
index 6db88584dd529..ec91c78468f30 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -1773,6 +1773,10 @@ static int mes_v12_0_hw_init(struct amdgpu_ip_block 
*ip_block)
                goto failure;
        }
+ r = amdgpu_mes_update_enforce_isolation(adev);
+       if (r)
+               goto failure;
+

And Similarly here also?

Thanks!

Srini

  out:
        /*
         * Disable KIQ ring usage from the driver once MES is enabled.

Reply via email to