[AMD Official Use Only - AMD Internal Distribution Only] Ping......
Emily Deng Best Wishes >-----Original Message----- >From: Deng, Emily <emily.d...@amd.com> >Sent: Wednesday, May 28, 2025 2:20 PM >To: Deng, Emily <emily.d...@amd.com>; amd-gfx@lists.freedesktop.org >Cc: Zhang, Owen(SRDC) <owen.zha...@amd.com> >Subject: RE: [PATCH v2] drm/amdkfd: Move the process suspend and resume out of >full access > >[AMD Official Use Only - AMD Internal Distribution Only] > >>-----Original Message----- >>From: Emily Deng <emily.d...@amd.com> >>Sent: Tuesday, May 27, 2025 6:50 PM >>To: amd-gfx@lists.freedesktop.org >>Cc: Deng, Emily <emily.d...@amd.com> >>Subject: [PATCH v2] drm/amdkfd: Move the process suspend and resume out >>of full access >> >>For the suspend and resume process, exclusive access is not required. >>Therefore, it can be moved out of the full access section to reduce the >>duration of exclusive access. >> >>Signed-off-by: Emily Deng <emily.d...@amd.com> >>--- >> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 16 +++++++++ >>drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 13 +++++++ >>drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++++-- >> drivers/gpu/drm/amd/amdkfd/kfd_device.c | 40 +++++++++++++++++----- >> 4 files changed, 70 insertions(+), 11 deletions(-) >> >>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c >>b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c >>index 4cec3a873995..ba07e9c6619d 100644 >>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c >>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c >>@@ -264,6 +264,22 @@ int amdgpu_amdkfd_resume(struct amdgpu_device >>*adev, bool run_pm) >> return r; >> } >> >>+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev, bool >>+run_pm) { >>+ if (adev->kfd.dev) >>+ kgd2kfd_suspend_process(adev->kfd.dev, run_pm); } >>+ >>+int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev, bool >>+run_pm) { >>+ int r = 0; >>+ >>+ if (adev->kfd.dev) >>+ r = kgd2kfd_resume_process(adev->kfd.dev, run_pm); >>+ >>+ return r; >>+} >>+ >> int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev, >> struct amdgpu_reset_context *reset_context) >>{ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >>b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >>index b6ca41859b53..841ae8b75ab1 100644 >>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >>@@ -156,6 +156,8 @@ void amdgpu_amdkfd_fini(void); >> >> void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm); >>int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm); >>+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev, bool >>+run_pm); int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev, >>+bool run_pm); >> void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, >> const void *ih_ring_entry); void >> amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); @@ -413,6 >>+415,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, void >>kgd2kfd_device_exit(struct kfd_dev *kfd); void kgd2kfd_suspend(struct >>kfd_dev *kfd, bool run_pm); int kgd2kfd_resume(struct kfd_dev *kfd, >>bool run_pm); >>+void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool run_pm); int >>+kgd2kfd_resume_process(struct kfd_dev *kfd, bool run_pm); >> int kgd2kfd_pre_reset(struct kfd_dev *kfd, >> struct amdgpu_reset_context *reset_context); int >>kgd2kfd_post_reset(struct kfd_dev *kfd); @@ -463,6 +467,15 @@ static >>inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) >> return 0; >> } >> >>+static inline void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool >>+run_pm) { } >>+ >>+static inline int kgd2kfd_resume_process(struct kfd_dev *kfd, bool >>+run_pm) { >>+ return 0; >>+} >>+ >> static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd, >> struct amdgpu_reset_context >>*reset_context) { diff -- git >>a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>index 625c416c7d45..6e29f8bd54bb 100644 >>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>@@ -5080,7 +5080,7 @@ int amdgpu_device_suspend(struct drm_device *dev, >>bool >>notify_clients) >> amdgpu_device_ip_suspend_phase1(adev); >> >> if (!adev->in_s0ix) { >>- amdgpu_amdkfd_suspend(adev, adev->in_runpm); >>+ amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_runtime(adev) >>+ || >>+adev->in_runpm); >> amdgpu_userq_suspend(adev); >> } >> >>@@ -5097,6 +5097,9 @@ int amdgpu_device_suspend(struct drm_device *dev, >>bool >>notify_clients) >> if (amdgpu_sriov_vf(adev)) >> amdgpu_virt_release_full_gpu(adev, false); >> >>+ if (!adev->in_s0ix) >>+ amdgpu_amdkfd_suspend_process(adev, adev->in_runpm); >>+ >> r = amdgpu_dpm_notify_rlc_state(adev, false); >> if (r) >> return r; >>@@ -5178,7 +5181,7 @@ int amdgpu_device_resume(struct drm_device *dev, >>bool >>notify_clients) >> } >> >> if (!adev->in_s0ix) { >>- r = amdgpu_amdkfd_resume(adev, adev->in_runpm); >>+ r = amdgpu_amdkfd_resume(adev, >>+ !amdgpu_sriov_runtime(adev) || >>+adev->in_runpm); >> if (r) >> goto exit; >> >>@@ -5199,6 +5202,11 @@ int amdgpu_device_resume(struct drm_device *dev, >>bool >>notify_clients) >> amdgpu_virt_release_full_gpu(adev, true); >> } >> >>+ if (!adev->in_s0ix) { >>+ r = amdgpu_amdkfd_resume_process(adev, adev->in_runpm); >>+ if (r) >>+ goto exit; >>+ } >> if (r) >> return r; >> >>diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c >>b/drivers/gpu/drm/amd/amdkfd/kfd_device.c >>index bf0854bd5555..22c6ef7c42b6 100644 >>--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c >>+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c >>@@ -1027,15 +1027,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool >run_pm) >> if (!kfd->init_complete) >> return; >> >>- /* for runtime suspend, skip locking kfd */ >>- if (!run_pm) { >>- mutex_lock(&kfd_processes_mutex); >>- /* For first KFD device suspend all the KFD processes */ >>- if (++kfd_locked == 1) >>- kfd_suspend_all_processes(); >>- mutex_unlock(&kfd_processes_mutex); >>- } >>- >>+ kgd2kfd_suspend_process(kfd, run_pm); >> for (i = 0; i < kfd->num_nodes; i++) { >> node = kfd->nodes[i]; >> node->dqm->ops.stop(node->dqm); @@ -1055,6 +1047,36 @@ >>int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) >> return ret; >> } >> >>+ ret = kgd2kfd_resume_process(kfd, run_pm); >>+ >>+ return ret; >>+} >>+ >>+void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool run_pm) { >>+ struct kfd_node *node; >>+ int i; >>+ >>+ if (!kfd->init_complete) >>+ return; >>+ >>+ /* for runtime suspend, skip locking kfd */ >>+ if (!run_pm) { >>+ mutex_lock(&kfd_processes_mutex); >>+ /* For first KFD device suspend all the KFD processes */ >>+ if (++kfd_locked == 1) >>+ kfd_suspend_all_processes(); >>+ mutex_unlock(&kfd_processes_mutex); >>+ } >>+} >>+ >>+int kgd2kfd_resume_process(struct kfd_dev *kfd, bool run_pm) { >>+ int ret, i; >>+ >>+ if (!kfd->init_complete) >>+ return 0; >>+ >> /* for runtime resume, skip unlocking kfd */ >> if (!run_pm) { >> mutex_lock(&kfd_processes_mutex); >>-- >>2.34.1 >