[AMD Official Use Only - AMD Internal Distribution Only] Ping......
Emily Deng Best Wishes >-----Original Message----- >From: Deng, Emily >Sent: Tuesday, June 3, 2025 5:11 PM >To: Koenig, Christian <christian.koe...@amd.com>; Chen, Horace ><horace.c...@amd.com> >Cc: amd-gfx@lists.freedesktop.org; Zhang, Owen(SRDC) ><owen.zha...@amd.com> >Subject: RE: [PATCH v2] drm/amdkfd: Move the process suspend and resume out of >full access > >Hi Christian and Horace, > Could you help to review this? > >Emily Deng >Best Wishes > > > >>-----Original Message----- >>From: Zhang, Owen(SRDC) <owen.zha...@amd.com> >>Sent: Friday, May 30, 2025 5:50 PM >>To: Deng, Emily <emily.d...@amd.com>; amd-gfx@lists.freedesktop.org >>Subject: RE: [PATCH v2] drm/amdkfd: Move the process suspend and resume >>out of full access >> >>[AMD Official Use Only - AMD Internal Distribution Only] >> >>Hi Team, @amd-gfx@lists.freedesktop.org >> >>Can you pls review and provide your inputs? Thanks for support! >> >> >>Rgds/Owen >> >>-----Original Message----- >>From: Deng, Emily <emily.d...@amd.com> >>Sent: Friday, May 30, 2025 9:39 AM >>To: amd-gfx@lists.freedesktop.org >>Cc: Zhang, Owen(SRDC) <owen.zha...@amd.com> >>Subject: RE: [PATCH v2] drm/amdkfd: Move the process suspend and resume >>out of full access >> >>[AMD Official Use Only - AMD Internal Distribution Only] >> >>Ping...... >> >>Emily Deng >>Best Wishes >> >> >> >>>-----Original Message----- >>>From: Deng, Emily <emily.d...@amd.com> >>>Sent: Wednesday, May 28, 2025 2:20 PM >>>To: Deng, Emily <emily.d...@amd.com>; amd-gfx@lists.freedesktop.org >>>Cc: Zhang, Owen(SRDC) <owen.zha...@amd.com> >>>Subject: RE: [PATCH v2] drm/amdkfd: Move the process suspend and >>>resume out of full access >>> >>>[AMD Official Use Only - AMD Internal Distribution Only] >>> >>>>-----Original Message----- >>>>From: Emily Deng <emily.d...@amd.com> >>>>Sent: Tuesday, May 27, 2025 6:50 PM >>>>To: amd-gfx@lists.freedesktop.org >>>>Cc: Deng, Emily <emily.d...@amd.com> >>>>Subject: [PATCH v2] drm/amdkfd: Move the process suspend and resume >>>>out of full access >>>> >>>>For the suspend and resume process, exclusive access is not required. >>>>Therefore, it can be moved out of the full access section to reduce >>>>the duration of exclusive access. >>>> >>>>Signed-off-by: Emily Deng <emily.d...@amd.com> >>>>--- >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 16 +++++++++ >>>>drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 13 +++++++ >>>>drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++++-- >>>> drivers/gpu/drm/amd/amdkfd/kfd_device.c | 40 +++++++++++++++++----- >>>> 4 files changed, 70 insertions(+), 11 deletions(-) >>>> >>>>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c >>>>b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c >>>>index 4cec3a873995..ba07e9c6619d 100644 >>>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c >>>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c >>>>@@ -264,6 +264,22 @@ int amdgpu_amdkfd_resume(struct amdgpu_device >>>>*adev, bool run_pm) >>>> return r; >>>> } >>>> >>>>+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev, bool >>>>+run_pm) { >>>>+ if (adev->kfd.dev) >>>>+ kgd2kfd_suspend_process(adev->kfd.dev, run_pm); } >>>>+ >>>>+int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev, bool >>>>+run_pm) { >>>>+ int r = 0; >>>>+ >>>>+ if (adev->kfd.dev) >>>>+ r = kgd2kfd_resume_process(adev->kfd.dev, run_pm); >>>>+ >>>>+ return r; >>>>+} >>>>+ >>>> int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev, >>>> struct amdgpu_reset_context >>>>*reset_context) { diff --git >>>>a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >>>>b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >>>>index b6ca41859b53..841ae8b75ab1 100644 >>>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >>>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >>>>@@ -156,6 +156,8 @@ void amdgpu_amdkfd_fini(void); >>>> >>>> void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm); >>>>int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm); >>>>+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev, bool >>>>+run_pm); int amdgpu_amdkfd_resume_process(struct amdgpu_device >>>>+*adev, bool run_pm); >>>> void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, >>>> const void *ih_ring_entry); void >>>> amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); @@ -413,6 >>>>+415,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, void >>>>kgd2kfd_device_exit(struct kfd_dev *kfd); void >>>>kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm); int >>>>kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm); >>>>+void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool run_pm); int >>>>+kgd2kfd_resume_process(struct kfd_dev *kfd, bool run_pm); >>>> int kgd2kfd_pre_reset(struct kfd_dev *kfd, >>>> struct amdgpu_reset_context *reset_context); >>>>int kgd2kfd_post_reset(struct kfd_dev *kfd); @@ -463,6 +467,15 @@ >>>>static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) >>>> return 0; >>>> } >>>> >>>>+static inline void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool >>>>+run_pm) { } >>>>+ >>>>+static inline int kgd2kfd_resume_process(struct kfd_dev *kfd, bool >>>>+run_pm) { >>>>+ return 0; >>>>+} >>>>+ >>>> static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd, >>>> struct amdgpu_reset_context >>>>*reset_context) { diff -- git >>>>a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>>b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>>index 625c416c7d45..6e29f8bd54bb 100644 >>>>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>>@@ -5080,7 +5080,7 @@ int amdgpu_device_suspend(struct drm_device >>>>*dev, bool >>>>notify_clients) >>>> amdgpu_device_ip_suspend_phase1(adev); >>>> >>>> if (!adev->in_s0ix) { >>>>- amdgpu_amdkfd_suspend(adev, adev->in_runpm); >>>>+ amdgpu_amdkfd_suspend(adev, >>>>+ !amdgpu_sriov_runtime(adev) >>>>+ || >>>>+adev->in_runpm); >>>> amdgpu_userq_suspend(adev); >>>> } >>>> >>>>@@ -5097,6 +5097,9 @@ int amdgpu_device_suspend(struct drm_device >>>>*dev, bool >>>>notify_clients) >>>> if (amdgpu_sriov_vf(adev)) >>>> amdgpu_virt_release_full_gpu(adev, false); >>>> >>>>+ if (!adev->in_s0ix) >>>>+ amdgpu_amdkfd_suspend_process(adev, adev->in_runpm); >>>>+ >>>> r = amdgpu_dpm_notify_rlc_state(adev, false); >>>> if (r) >>>> return r; >>>>@@ -5178,7 +5181,7 @@ int amdgpu_device_resume(struct drm_device >>>>*dev, bool >>>>notify_clients) >>>> } >>>> >>>> if (!adev->in_s0ix) { >>>>- r = amdgpu_amdkfd_resume(adev, adev->in_runpm); >>>>+ r = amdgpu_amdkfd_resume(adev, >>>>+ !amdgpu_sriov_runtime(adev) || >>>>+adev->in_runpm); >>>> if (r) >>>> goto exit; >>>> >>>>@@ -5199,6 +5202,11 @@ int amdgpu_device_resume(struct drm_device >>>>*dev, bool >>>>notify_clients) >>>> amdgpu_virt_release_full_gpu(adev, true); >>>> } >>>> >>>>+ if (!adev->in_s0ix) { >>>>+ r = amdgpu_amdkfd_resume_process(adev, adev->in_runpm); >>>>+ if (r) >>>>+ goto exit; >>>>+ } >>>> if (r) >>>> return r; >>>> >>>>diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c >>>>b/drivers/gpu/drm/amd/amdkfd/kfd_device.c >>>>index bf0854bd5555..22c6ef7c42b6 100644 >>>>--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c >>>>+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c >>>>@@ -1027,15 +1027,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool >>>run_pm) >>>> if (!kfd->init_complete) >>>> return; >>>> >>>>- /* for runtime suspend, skip locking kfd */ >>>>- if (!run_pm) { >>>>- mutex_lock(&kfd_processes_mutex); >>>>- /* For first KFD device suspend all the KFD processes */ >>>>- if (++kfd_locked == 1) >>>>- kfd_suspend_all_processes(); >>>>- mutex_unlock(&kfd_processes_mutex); >>>>- } >>>>- >>>>+ kgd2kfd_suspend_process(kfd, run_pm); >>>> for (i = 0; i < kfd->num_nodes; i++) { >>>> node = kfd->nodes[i]; >>>> node->dqm->ops.stop(node->dqm); @@ -1055,6 +1047,36 @@ >>>>int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) >>>> return ret; >>>> } >>>> >>>>+ ret = kgd2kfd_resume_process(kfd, run_pm); >>>>+ >>>>+ return ret; >>>>+} >>>>+ >>>>+void kgd2kfd_suspend_process(struct kfd_dev *kfd, bool run_pm) { >>>>+ struct kfd_node *node; >>>>+ int i; >>>>+ >>>>+ if (!kfd->init_complete) >>>>+ return; >>>>+ >>>>+ /* for runtime suspend, skip locking kfd */ >>>>+ if (!run_pm) { >>>>+ mutex_lock(&kfd_processes_mutex); >>>>+ /* For first KFD device suspend all the KFD processes */ >>>>+ if (++kfd_locked == 1) >>>>+ kfd_suspend_all_processes(); >>>>+ mutex_unlock(&kfd_processes_mutex); >>>>+ } >>>>+} >>>>+ >>>>+int kgd2kfd_resume_process(struct kfd_dev *kfd, bool run_pm) { >>>>+ int ret, i; >>>>+ >>>>+ if (!kfd->init_complete) >>>>+ return 0; >>>>+ >>>> /* for runtime resume, skip unlocking kfd */ >>>> if (!run_pm) { >>>> mutex_lock(&kfd_processes_mutex); >>>>-- >>>>2.34.1 >>> >> >>