[AMD Official Use Only - AMD Internal Distribution Only]

Hi Lijo,
    Could you help to review the patch again? Thanks.

Emily Deng
Best Wishes



>-----Original Message-----
>From: Deng, Emily
>Sent: Thursday, June 5, 2025 3:21 PM
>To: Chen, Horace <horace.c...@amd.com>; Lazar, Lijo <lijo.la...@amd.com>
>Cc: Emily Deng <emily.d...@amd.com>; amd-gfx@lists.freedesktop.org; Zhang,
>Owen(SRDC) <owen.zha...@amd.com>
>Subject: RE: [PATCH v3] drm/amdkfd: Move the process suspend and resume out of
>full access
>
>@Lazar, Lijo and @Chen, Horace
> Could you help to review again, thanks?
>
>Emily Deng
>Best Wishes
>
>
>
>>-----Original Message-----
>>From: Emily Deng <emily.d...@amd.com>
>>Sent: Wednesday, June 4, 2025 3:48 PM
>>To: amd-gfx@lists.freedesktop.org
>>Cc: Deng, Emily <emily.d...@amd.com>
>>Subject: [PATCH v3] drm/amdkfd: Move the process suspend and resume out
>>of full access
>>
>>For the suspend and resume process, exclusive access is not required.
>>Therefore, it can be moved out of the full access section to reduce the
>>duration of exclusive access.
>>
>>v3:
>>Move suspend processes before hardware fini.
>>Remove twice call for bare metal.
>>
>>Signed-off-by: Emily Deng <emily.d...@amd.com>
>>---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 16 +++++++++
>>drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 13 +++++++
>>drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  9 +++--
>> drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 40 +++++++++++++++++-----
>> 4 files changed, 67 insertions(+), 11 deletions(-)
>>
>>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>index d8ac4b1051a8..0a8e7835d0fc 100644
>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>@@ -264,6 +264,22 @@ int amdgpu_amdkfd_resume(struct amdgpu_device
>>*adev, bool run_pm)
>>      return r;
>> }
>>
>>+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev, bool
>>+run_pm) {
>>+     if (adev->kfd.dev)
>>+             kgd2kfd_suspend_process(adev->kfd.dev, run_pm); }
>>+
>>+int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev, bool
>>+run_pm) {
>>+     int r = 0;
>>+
>>+     if (adev->kfd.dev)
>>+             r = kgd2kfd_resume_process(adev->kfd.dev, run_pm);
>>+
>>+     return r;
>>+}
>>+
>> int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev,
>>                          struct amdgpu_reset_context *reset_context)  { diff 
>> --git
>>a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>index b6ca41859b53..841ae8b75ab1 100644
>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>@@ -156,6 +156,8 @@ void amdgpu_amdkfd_fini(void);
>>
>> void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm);
>>int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm);
>>+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev, bool
>>+run_pm); int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev,
>>+bool run_pm);
>> void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
>>                      const void *ih_ring_entry);
>> void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); @@ -413,6
>>+415,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,  void
>>kgd2kfd_device_exit(struct kfd_dev *kfd);  void kgd2kfd_suspend(struct
>>kfd_dev *kfd, bool run_pm);  int kgd2kfd_resume(struct kfd_dev *kfd,
>>bool run_pm);
>>+void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool run_pm); int
>>+kgd2kfd_resume_process(struct kfd_dev *kfd, bool run_pm);
>> int kgd2kfd_pre_reset(struct kfd_dev *kfd,
>>                    struct amdgpu_reset_context *reset_context);  int
>>kgd2kfd_post_reset(struct kfd_dev *kfd); @@ -463,6 +467,15 @@ static
>>inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
>>      return 0;
>> }
>>
>>+static inline void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool
>>+run_pm) { }
>>+
>>+static inline int kgd2kfd_resume_process(struct kfd_dev *kfd, bool
>>+run_pm) {
>>+     return 0;
>>+}
>>+
>> static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd,
>>                                  struct amdgpu_reset_context *reset_context) 
>>  { diff --
>git
>>a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>index 5289400879ec..08ff9917c62f 100644
>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>@@ -5061,6 +5061,8 @@ int amdgpu_device_suspend(struct drm_device *dev,
>>bool
>>notify_clients)
>>      adev->in_suspend = true;
>>
>>      if (amdgpu_sriov_vf(adev)) {
>>+             if (!adev->in_s0ix)
>>+                     amdgpu_amdkfd_suspend_process(adev, adev->in_runpm);
>>              amdgpu_virt_fini_data_exchange(adev);
>>              r = amdgpu_virt_request_full_gpu(adev, false);
>>              if (r)
>>@@ -5080,7 +5082,7 @@ int amdgpu_device_suspend(struct drm_device *dev,
>>bool
>>notify_clients)
>>      amdgpu_device_ip_suspend_phase1(adev);
>>
>>      if (!adev->in_s0ix) {
>>-             amdgpu_amdkfd_suspend(adev, adev->in_runpm);
>>+             amdgpu_amdkfd_suspend(adev, amdgpu_sriov_vf(adev) || adev-
>>>in_runpm);
>>              amdgpu_userq_suspend(adev);
>>      }
>>
>>@@ -5178,7 +5180,7 @@ int amdgpu_device_resume(struct drm_device *dev,
>>bool
>>notify_clients)
>>      }
>>
>>      if (!adev->in_s0ix) {
>>-             r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
>>+             r = amdgpu_amdkfd_resume(adev, amdgpu_sriov_vf(adev) ||
>>+adev->in_runpm);
>>              if (r)
>>                      goto exit;
>>
>>@@ -5197,6 +5199,9 @@ int amdgpu_device_resume(struct drm_device *dev,
>>bool
>>notify_clients)
>>      if (amdgpu_sriov_vf(adev)) {
>>              amdgpu_virt_init_data_exchange(adev);
>>              amdgpu_virt_release_full_gpu(adev, true);
>>+
>>+             if (!adev->in_s0ix && !r)
>>+                     r = amdgpu_amdkfd_resume_process(adev, adev->in_runpm);
>>      }
>>
>>      if (r)
>>diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>index bf0854bd5555..22c6ef7c42b6 100644
>>--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>@@ -1027,15 +1027,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool
>run_pm)
>>      if (!kfd->init_complete)
>>              return;
>>
>>-     /* for runtime suspend, skip locking kfd */
>>-     if (!run_pm) {
>>-             mutex_lock(&kfd_processes_mutex);
>>-             /* For first KFD device suspend all the KFD processes */
>>-             if (++kfd_locked == 1)
>>-                     kfd_suspend_all_processes();
>>-             mutex_unlock(&kfd_processes_mutex);
>>-     }
>>-
>>+     kgd2kfd_suspend_process(kfd, run_pm);
>>      for (i = 0; i < kfd->num_nodes; i++) {
>>              node = kfd->nodes[i];
>>              node->dqm->ops.stop(node->dqm);
>>@@ -1055,6 +1047,36 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
>>                      return ret;
>>      }
>>
>>+     ret = kgd2kfd_resume_process(kfd, run_pm);
>>+
>>+     return ret;
>>+}
>>+
>>+void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool run_pm) {
>>+     struct kfd_node *node;
>>+     int i;
>>+
>>+     if (!kfd->init_complete)
>>+             return;
>>+
>>+     /* for runtime suspend, skip locking kfd */
>>+     if (!run_pm) {
>>+             mutex_lock(&kfd_processes_mutex);
>>+             /* For first KFD device suspend all the KFD processes */
>>+             if (++kfd_locked == 1)
>>+                     kfd_suspend_all_processes();
>>+             mutex_unlock(&kfd_processes_mutex);
>>+     }
>>+}
>>+
>>+int kgd2kfd_resume_process(struct kfd_dev *kfd, bool run_pm) {
>>+     int ret, i;
>>+
>>+     if (!kfd->init_complete)
>>+             return 0;
>>+
>>      /* for runtime resume, skip unlocking kfd */
>>      if (!run_pm) {
>>              mutex_lock(&kfd_processes_mutex);
>>--
>>2.34.1

Reply via email to