[AMD Official Use Only - AMD Internal Distribution Only]

Ping......

Emily Deng
Best Wishes



>-----Original Message-----
>From: Deng, Emily
>Sent: Tuesday, June 3, 2025 5:11 PM
>To: Koenig, Christian <christian.koe...@amd.com>; Chen, Horace
><horace.c...@amd.com>
>Cc: amd-gfx@lists.freedesktop.org; Zhang, Owen(SRDC)
><owen.zha...@amd.com>
>Subject: RE: [PATCH v2] drm/amdkfd: Move the process suspend and resume out of
>full access
>
>Hi Christian and Horace,
>     Could you help to review this?
>
>Emily Deng
>Best Wishes
>
>
>
>>-----Original Message-----
>>From: Zhang, Owen(SRDC) <owen.zha...@amd.com>
>>Sent: Friday, May 30, 2025 5:50 PM
>>To: Deng, Emily <emily.d...@amd.com>; amd-gfx@lists.freedesktop.org
>>Subject: RE: [PATCH v2] drm/amdkfd: Move the process suspend and resume
>>out of full access
>>
>>[AMD Official Use Only - AMD Internal Distribution Only]
>>
>>Hi Team, @amd-gfx@lists.freedesktop.org
>>
>>Can you pls review and provide your inputs? Thanks for support!
>>
>>
>>Rgds/Owen
>>
>>-----Original Message-----
>>From: Deng, Emily <emily.d...@amd.com>
>>Sent: Friday, May 30, 2025 9:39 AM
>>To: amd-gfx@lists.freedesktop.org
>>Cc: Zhang, Owen(SRDC) <owen.zha...@amd.com>
>>Subject: RE: [PATCH v2] drm/amdkfd: Move the process suspend and resume
>>out of full access
>>
>>[AMD Official Use Only - AMD Internal Distribution Only]
>>
>>Ping......
>>
>>Emily Deng
>>Best Wishes
>>
>>
>>
>>>-----Original Message-----
>>>From: Deng, Emily <emily.d...@amd.com>
>>>Sent: Wednesday, May 28, 2025 2:20 PM
>>>To: Deng, Emily <emily.d...@amd.com>; amd-gfx@lists.freedesktop.org
>>>Cc: Zhang, Owen(SRDC) <owen.zha...@amd.com>
>>>Subject: RE: [PATCH v2] drm/amdkfd: Move the process suspend and
>>>resume out of full access
>>>
>>>[AMD Official Use Only - AMD Internal Distribution Only]
>>>
>>>>-----Original Message-----
>>>>From: Emily Deng <emily.d...@amd.com>
>>>>Sent: Tuesday, May 27, 2025 6:50 PM
>>>>To: amd-gfx@lists.freedesktop.org
>>>>Cc: Deng, Emily <emily.d...@amd.com>
>>>>Subject: [PATCH v2] drm/amdkfd: Move the process suspend and resume
>>>>out of full access
>>>>
>>>>For the suspend and resume process, exclusive access is not required.
>>>>Therefore, it can be moved out of the full access section to reduce
>>>>the duration of exclusive access.
>>>>
>>>>Signed-off-by: Emily Deng <emily.d...@amd.com>
>>>>---
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 16 +++++++++
>>>>drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 13 +++++++
>>>>drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++++--
>>>> drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 40 +++++++++++++++++-----
>>>> 4 files changed, 70 insertions(+), 11 deletions(-)
>>>>
>>>>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>>b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>>index 4cec3a873995..ba07e9c6619d 100644
>>>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>>@@ -264,6 +264,22 @@ int amdgpu_amdkfd_resume(struct amdgpu_device
>>>>*adev, bool run_pm)
>>>>       return r;
>>>> }
>>>>
>>>>+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev, bool
>>>>+run_pm) {
>>>>+      if (adev->kfd.dev)
>>>>+              kgd2kfd_suspend_process(adev->kfd.dev, run_pm); }
>>>>+
>>>>+int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev, bool
>>>>+run_pm) {
>>>>+      int r = 0;
>>>>+
>>>>+      if (adev->kfd.dev)
>>>>+              r = kgd2kfd_resume_process(adev->kfd.dev, run_pm);
>>>>+
>>>>+      return r;
>>>>+}
>>>>+
>>>> int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev,
>>>>                           struct amdgpu_reset_context
>>>>*reset_context) { diff --git
>>>>a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>>b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>>index b6ca41859b53..841ae8b75ab1 100644
>>>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>>@@ -156,6 +156,8 @@ void amdgpu_amdkfd_fini(void);
>>>>
>>>> void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm);
>>>>int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm);
>>>>+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev, bool
>>>>+run_pm); int amdgpu_amdkfd_resume_process(struct amdgpu_device
>>>>+*adev, bool run_pm);
>>>> void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
>>>>                       const void *ih_ring_entry); void
>>>> amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); @@ -413,6
>>>>+415,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,  void
>>>>kgd2kfd_device_exit(struct kfd_dev *kfd);  void
>>>>kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm);  int
>>>>kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm);
>>>>+void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool run_pm); int
>>>>+kgd2kfd_resume_process(struct kfd_dev *kfd, bool run_pm);
>>>> int kgd2kfd_pre_reset(struct kfd_dev *kfd,
>>>>                     struct amdgpu_reset_context *reset_context);
>>>>int kgd2kfd_post_reset(struct kfd_dev *kfd); @@ -463,6 +467,15 @@
>>>>static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
>>>>       return 0;
>>>> }
>>>>
>>>>+static inline void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool
>>>>+run_pm) { }
>>>>+
>>>>+static inline int kgd2kfd_resume_process(struct kfd_dev *kfd, bool
>>>>+run_pm) {
>>>>+      return 0;
>>>>+}
>>>>+
>>>> static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd,
>>>>                                   struct amdgpu_reset_context
>>>>*reset_context)  { diff -- git
>>>>a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>index 625c416c7d45..6e29f8bd54bb 100644
>>>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>@@ -5080,7 +5080,7 @@ int amdgpu_device_suspend(struct drm_device
>>>>*dev, bool
>>>>notify_clients)
>>>>       amdgpu_device_ip_suspend_phase1(adev);
>>>>
>>>>       if (!adev->in_s0ix) {
>>>>-              amdgpu_amdkfd_suspend(adev, adev->in_runpm);
>>>>+              amdgpu_amdkfd_suspend(adev,
>>>>+ !amdgpu_sriov_runtime(adev)
>>>>+ ||
>>>>+adev->in_runpm);
>>>>               amdgpu_userq_suspend(adev);
>>>>       }
>>>>
>>>>@@ -5097,6 +5097,9 @@ int amdgpu_device_suspend(struct drm_device
>>>>*dev, bool
>>>>notify_clients)
>>>>       if (amdgpu_sriov_vf(adev))
>>>>               amdgpu_virt_release_full_gpu(adev, false);
>>>>
>>>>+      if (!adev->in_s0ix)
>>>>+              amdgpu_amdkfd_suspend_process(adev, adev->in_runpm);
>>>>+
>>>>       r = amdgpu_dpm_notify_rlc_state(adev, false);
>>>>       if (r)
>>>>               return r;
>>>>@@ -5178,7 +5181,7 @@ int amdgpu_device_resume(struct drm_device
>>>>*dev, bool
>>>>notify_clients)
>>>>       }
>>>>
>>>>       if (!adev->in_s0ix) {
>>>>-              r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
>>>>+              r = amdgpu_amdkfd_resume(adev,
>>>>+ !amdgpu_sriov_runtime(adev) ||
>>>>+adev->in_runpm);
>>>>               if (r)
>>>>                       goto exit;
>>>>
>>>>@@ -5199,6 +5202,11 @@ int amdgpu_device_resume(struct drm_device
>>>>*dev, bool
>>>>notify_clients)
>>>>               amdgpu_virt_release_full_gpu(adev, true);
>>>>       }
>>>>
>>>>+      if (!adev->in_s0ix) {
>>>>+              r = amdgpu_amdkfd_resume_process(adev, adev->in_runpm);
>>>>+              if (r)
>>>>+                      goto exit;
>>>>+      }
>>>>       if (r)
>>>>               return r;
>>>>
>>>>diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>>b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>>index bf0854bd5555..22c6ef7c42b6 100644
>>>>--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>>+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>>@@ -1027,15 +1027,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool
>>>run_pm)
>>>>       if (!kfd->init_complete)
>>>>               return;
>>>>
>>>>-      /* for runtime suspend, skip locking kfd */
>>>>-      if (!run_pm) {
>>>>-              mutex_lock(&kfd_processes_mutex);
>>>>-              /* For first KFD device suspend all the KFD processes */
>>>>-              if (++kfd_locked == 1)
>>>>-                      kfd_suspend_all_processes();
>>>>-              mutex_unlock(&kfd_processes_mutex);
>>>>-      }
>>>>-
>>>>+      kgd2kfd_suspend_process(kfd, run_pm);
>>>>       for (i = 0; i < kfd->num_nodes; i++) {
>>>>               node = kfd->nodes[i];
>>>>               node->dqm->ops.stop(node->dqm); @@ -1055,6 +1047,36 @@
>>>>int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
>>>>                       return ret;
>>>>       }
>>>>
>>>>+      ret = kgd2kfd_resume_process(kfd, run_pm);
>>>>+
>>>>+      return ret;
>>>>+}
>>>>+
>>>>+void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool run_pm) {
>>>>+      struct kfd_node *node;
>>>>+      int i;
>>>>+
>>>>+      if (!kfd->init_complete)
>>>>+              return;
>>>>+
>>>>+      /* for runtime suspend, skip locking kfd */
>>>>+      if (!run_pm) {
>>>>+              mutex_lock(&kfd_processes_mutex);
>>>>+              /* For first KFD device suspend all the KFD processes */
>>>>+              if (++kfd_locked == 1)
>>>>+                      kfd_suspend_all_processes();
>>>>+              mutex_unlock(&kfd_processes_mutex);
>>>>+      }
>>>>+}
>>>>+
>>>>+int kgd2kfd_resume_process(struct kfd_dev *kfd, bool run_pm) {
>>>>+      int ret, i;
>>>>+
>>>>+      if (!kfd->init_complete)
>>>>+              return 0;
>>>>+
>>>>       /* for runtime resume, skip unlocking kfd */
>>>>       if (!run_pm) {
>>>>               mutex_lock(&kfd_processes_mutex);
>>>>--
>>>>2.34.1
>>>
>>
>>

Reply via email to