[AMD Official Use Only - AMD Internal Distribution Only]

@Lazar, Lijo and @Chen, Horace
 Could you help to review again, thanks?

Emily Deng
Best Wishes



>-----Original Message-----
>From: Emily Deng <emily.d...@amd.com>
>Sent: Wednesday, June 4, 2025 3:48 PM
>To: amd-gfx@lists.freedesktop.org
>Cc: Deng, Emily <emily.d...@amd.com>
>Subject: [PATCH v3] drm/amdkfd: Move the process suspend and resume out of full
>access
>
>For the suspend and resume process, exclusive access is not required.
>Therefore, it can be moved out of the full access section to reduce the 
>duration of
>exclusive access.
>
>v3:
>Move suspend processes before hardware fini.
>Remove twice call for bare metal.
>
>Signed-off-by: Emily Deng <emily.d...@amd.com>
>---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 16 +++++++++
>drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 13 +++++++
>drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  9 +++--
> drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 40 +++++++++++++++++-----
> 4 files changed, 67 insertions(+), 11 deletions(-)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>index d8ac4b1051a8..0a8e7835d0fc 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>@@ -264,6 +264,22 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *adev,
>bool run_pm)
>       return r;
> }
>
>+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev, bool
>+run_pm) {
>+      if (adev->kfd.dev)
>+              kgd2kfd_suspend_process(adev->kfd.dev, run_pm); }
>+
>+int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev, bool
>+run_pm) {
>+      int r = 0;
>+
>+      if (adev->kfd.dev)
>+              r = kgd2kfd_resume_process(adev->kfd.dev, run_pm);
>+
>+      return r;
>+}
>+
> int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev,
>                           struct amdgpu_reset_context *reset_context)  { diff 
> --git
>a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>index b6ca41859b53..841ae8b75ab1 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>@@ -156,6 +156,8 @@ void amdgpu_amdkfd_fini(void);
>
> void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm);  int
>amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm);
>+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev, bool
>+run_pm); int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev,
>+bool run_pm);
> void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
>                       const void *ih_ring_entry);
> void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); @@ -413,6
>+415,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,  void
>kgd2kfd_device_exit(struct kfd_dev *kfd);  void kgd2kfd_suspend(struct kfd_dev 
>*kfd,
>bool run_pm);  int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm);
>+void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool run_pm); int
>+kgd2kfd_resume_process(struct kfd_dev *kfd, bool run_pm);
> int kgd2kfd_pre_reset(struct kfd_dev *kfd,
>                     struct amdgpu_reset_context *reset_context);  int
>kgd2kfd_post_reset(struct kfd_dev *kfd); @@ -463,6 +467,15 @@ static inline int
>kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
>       return 0;
> }
>
>+static inline void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool
>+run_pm) { }
>+
>+static inline int kgd2kfd_resume_process(struct kfd_dev *kfd, bool
>+run_pm) {
>+      return 0;
>+}
>+
> static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd,
>                                   struct amdgpu_reset_context *reset_context) 
>  { diff --
>git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>index 5289400879ec..08ff9917c62f 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>@@ -5061,6 +5061,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool
>notify_clients)
>       adev->in_suspend = true;
>
>       if (amdgpu_sriov_vf(adev)) {
>+              if (!adev->in_s0ix)
>+                      amdgpu_amdkfd_suspend_process(adev, adev->in_runpm);
>               amdgpu_virt_fini_data_exchange(adev);
>               r = amdgpu_virt_request_full_gpu(adev, false);
>               if (r)
>@@ -5080,7 +5082,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool
>notify_clients)
>       amdgpu_device_ip_suspend_phase1(adev);
>
>       if (!adev->in_s0ix) {
>-              amdgpu_amdkfd_suspend(adev, adev->in_runpm);
>+              amdgpu_amdkfd_suspend(adev, amdgpu_sriov_vf(adev) || adev-
>>in_runpm);
>               amdgpu_userq_suspend(adev);
>       }
>
>@@ -5178,7 +5180,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool
>notify_clients)
>       }
>
>       if (!adev->in_s0ix) {
>-              r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
>+              r = amdgpu_amdkfd_resume(adev, amdgpu_sriov_vf(adev) ||
>+adev->in_runpm);
>               if (r)
>                       goto exit;
>
>@@ -5197,6 +5199,9 @@ int amdgpu_device_resume(struct drm_device *dev, bool
>notify_clients)
>       if (amdgpu_sriov_vf(adev)) {
>               amdgpu_virt_init_data_exchange(adev);
>               amdgpu_virt_release_full_gpu(adev, true);
>+
>+              if (!adev->in_s0ix && !r)
>+                      r = amdgpu_amdkfd_resume_process(adev, adev->in_runpm);
>       }
>
>       if (r)
>diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>index bf0854bd5555..22c6ef7c42b6 100644
>--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>@@ -1027,15 +1027,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
>       if (!kfd->init_complete)
>               return;
>
>-      /* for runtime suspend, skip locking kfd */
>-      if (!run_pm) {
>-              mutex_lock(&kfd_processes_mutex);
>-              /* For first KFD device suspend all the KFD processes */
>-              if (++kfd_locked == 1)
>-                      kfd_suspend_all_processes();
>-              mutex_unlock(&kfd_processes_mutex);
>-      }
>-
>+      kgd2kfd_suspend_process(kfd, run_pm);
>       for (i = 0; i < kfd->num_nodes; i++) {
>               node = kfd->nodes[i];
>               node->dqm->ops.stop(node->dqm);
>@@ -1055,6 +1047,36 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
>                       return ret;
>       }
>
>+      ret = kgd2kfd_resume_process(kfd, run_pm);
>+
>+      return ret;
>+}
>+
>+void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool run_pm) {
>+      struct kfd_node *node;
>+      int i;
>+
>+      if (!kfd->init_complete)
>+              return;
>+
>+      /* for runtime suspend, skip locking kfd */
>+      if (!run_pm) {
>+              mutex_lock(&kfd_processes_mutex);
>+              /* For first KFD device suspend all the KFD processes */
>+              if (++kfd_locked == 1)
>+                      kfd_suspend_all_processes();
>+              mutex_unlock(&kfd_processes_mutex);
>+      }
>+}
>+
>+int kgd2kfd_resume_process(struct kfd_dev *kfd, bool run_pm) {
>+      int ret, i;
>+
>+      if (!kfd->init_complete)
>+              return 0;
>+
>       /* for runtime resume, skip unlocking kfd */
>       if (!run_pm) {
>               mutex_lock(&kfd_processes_mutex);
>--
>2.34.1

Reply via email to