[AMD Official Use Only - AMD Internal Distribution Only] @Lazar, Lijo and @Chen, Horace Could you help to review again, thanks?
Emily Deng Best Wishes >-----Original Message----- >From: Emily Deng <emily.d...@amd.com> >Sent: Wednesday, June 4, 2025 3:48 PM >To: amd-gfx@lists.freedesktop.org >Cc: Deng, Emily <emily.d...@amd.com> >Subject: [PATCH v3] drm/amdkfd: Move the process suspend and resume out of full >access > >For the suspend and resume process, exclusive access is not required. >Therefore, it can be moved out of the full access section to reduce the >duration of >exclusive access. > >v3: >Move suspend processes before hardware fini. >Remove twice call for bare metal. > >Signed-off-by: Emily Deng <emily.d...@amd.com> >--- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 16 +++++++++ >drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 13 +++++++ >drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 +++-- > drivers/gpu/drm/amd/amdkfd/kfd_device.c | 40 +++++++++++++++++----- > 4 files changed, 67 insertions(+), 11 deletions(-) > >diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c >b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c >index d8ac4b1051a8..0a8e7835d0fc 100644 >--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c >+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c >@@ -264,6 +264,22 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *adev, >bool run_pm) > return r; > } > >+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev, bool >+run_pm) { >+ if (adev->kfd.dev) >+ kgd2kfd_suspend_process(adev->kfd.dev, run_pm); } >+ >+int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev, bool >+run_pm) { >+ int r = 0; >+ >+ if (adev->kfd.dev) >+ r = kgd2kfd_resume_process(adev->kfd.dev, run_pm); >+ >+ return r; >+} >+ > int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev, > struct amdgpu_reset_context *reset_context) { diff > --git >a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >index b6ca41859b53..841ae8b75ab1 100644 >--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >@@ -156,6 +156,8 @@ void amdgpu_amdkfd_fini(void); > > void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm); int >amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm); >+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev, bool >+run_pm); int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev, >+bool run_pm); > void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, > const void *ih_ring_entry); > void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); @@ -413,6 >+415,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, void >kgd2kfd_device_exit(struct kfd_dev *kfd); void kgd2kfd_suspend(struct kfd_dev >*kfd, >bool run_pm); int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm); >+void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool run_pm); int >+kgd2kfd_resume_process(struct kfd_dev *kfd, bool run_pm); > int kgd2kfd_pre_reset(struct kfd_dev *kfd, > struct amdgpu_reset_context *reset_context); int >kgd2kfd_post_reset(struct kfd_dev *kfd); @@ -463,6 +467,15 @@ static inline int >kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) > return 0; > } > >+static inline void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool >+run_pm) { } >+ >+static inline int kgd2kfd_resume_process(struct kfd_dev *kfd, bool >+run_pm) { >+ return 0; >+} >+ > static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd, > struct amdgpu_reset_context *reset_context) > { diff -- >git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >index 5289400879ec..08ff9917c62f 100644 >--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >@@ -5061,6 +5061,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool >notify_clients) > adev->in_suspend = true; > > if (amdgpu_sriov_vf(adev)) { >+ if (!adev->in_s0ix) >+ amdgpu_amdkfd_suspend_process(adev, adev->in_runpm); > amdgpu_virt_fini_data_exchange(adev); > r = amdgpu_virt_request_full_gpu(adev, false); > if (r) >@@ -5080,7 +5082,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool >notify_clients) > amdgpu_device_ip_suspend_phase1(adev); > > if (!adev->in_s0ix) { >- amdgpu_amdkfd_suspend(adev, adev->in_runpm); >+ amdgpu_amdkfd_suspend(adev, amdgpu_sriov_vf(adev) || adev- >>in_runpm); > amdgpu_userq_suspend(adev); > } > >@@ -5178,7 +5180,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool >notify_clients) > } > > if (!adev->in_s0ix) { >- r = amdgpu_amdkfd_resume(adev, adev->in_runpm); >+ r = amdgpu_amdkfd_resume(adev, amdgpu_sriov_vf(adev) || >+adev->in_runpm); > if (r) > goto exit; > >@@ -5197,6 +5199,9 @@ int amdgpu_device_resume(struct drm_device *dev, bool >notify_clients) > if (amdgpu_sriov_vf(adev)) { > amdgpu_virt_init_data_exchange(adev); > amdgpu_virt_release_full_gpu(adev, true); >+ >+ if (!adev->in_s0ix && !r) >+ r = amdgpu_amdkfd_resume_process(adev, adev->in_runpm); > } > > if (r) >diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c >b/drivers/gpu/drm/amd/amdkfd/kfd_device.c >index bf0854bd5555..22c6ef7c42b6 100644 >--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c >+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c >@@ -1027,15 +1027,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) > if (!kfd->init_complete) > return; > >- /* for runtime suspend, skip locking kfd */ >- if (!run_pm) { >- mutex_lock(&kfd_processes_mutex); >- /* For first KFD device suspend all the KFD processes */ >- if (++kfd_locked == 1) >- kfd_suspend_all_processes(); >- mutex_unlock(&kfd_processes_mutex); >- } >- >+ kgd2kfd_suspend_process(kfd, run_pm); > for (i = 0; i < kfd->num_nodes; i++) { > node = kfd->nodes[i]; > node->dqm->ops.stop(node->dqm); >@@ -1055,6 +1047,36 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) > return ret; > } > >+ ret = kgd2kfd_resume_process(kfd, run_pm); >+ >+ return ret; >+} >+ >+void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool run_pm) { >+ struct kfd_node *node; >+ int i; >+ >+ if (!kfd->init_complete) >+ return; >+ >+ /* for runtime suspend, skip locking kfd */ >+ if (!run_pm) { >+ mutex_lock(&kfd_processes_mutex); >+ /* For first KFD device suspend all the KFD processes */ >+ if (++kfd_locked == 1) >+ kfd_suspend_all_processes(); >+ mutex_unlock(&kfd_processes_mutex); >+ } >+} >+ >+int kgd2kfd_resume_process(struct kfd_dev *kfd, bool run_pm) { >+ int ret, i; >+ >+ if (!kfd->init_complete) >+ return 0; >+ > /* for runtime resume, skip unlocking kfd */ > if (!run_pm) { > mutex_lock(&kfd_processes_mutex); >-- >2.34.1