[Public]

This change is based on Linux 6.19-rc7


Skip the expensive per-BO hmm_range_fault page table walk if no MMU
invalidation occurred since last successful submit. Here it's a single
seq counter compare vs the full HMM walk

On first submit the slow path runs and caches notifier_seq into
bo->last_valid_notifier_seq. Subsequent submits check with the cached seq
for the fast-path. If an MMU notifier fires, the seq changes accordingly
and the slow path runs again on affected BOs.

Signed-off-by: Wang, Beyond <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 25 +++++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  8 +++++++
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index ecdfe6cb36cc..140346e9cb92 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -891,6 +891,17 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
                bool userpage_invalidated = false;
                struct amdgpu_bo *bo = e->bo;

+               /*
+                * Skip the expensive HMM page table walk if no MMU 
invalidation occurred
+                * since last successful submit
+                */
+               if (bo->last_valid_notifier_seq &&
+                   !mmu_interval_read_retry(&bo->notifier, 
bo->last_valid_notifier_seq)) {
+                       e->range = NULL;
+                       e->user_invalidated = false;
+                       continue;
+               }
+
                e->range = amdgpu_hmm_range_alloc(NULL);
                if (unlikely(!e->range))
                        return -ENOMEM;
@@ -1326,7 +1337,19 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
         */
        r = 0;
        amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
-               r |= !amdgpu_hmm_range_valid(e->range);
+               struct amdgpu_bo *bo = e->bo;
+
+               if (!e->range) {
+                       if (mmu_interval_read_retry(&bo->notifier, 
bo->last_valid_notifier_seq))
+                               r = 1;
+                       continue;
+               }
+
+               if (!amdgpu_hmm_range_valid(e->range))
+                       r = 1;
+               else
+                       bo->last_valid_notifier_seq = 
e->range->hmm_range.notifier_seq;
+
                amdgpu_hmm_range_free(e->range);
                e->range = NULL;
        }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 52c2d1731aab..89dc9ee1176e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -115,6 +115,14 @@ struct amdgpu_bo {

 #ifdef CONFIG_MMU_NOTIFIER
        struct mmu_interval_notifier    notifier;
+
+       /*
+        * Cached notifier_seq from last successful CS submit.
+        * Used to skip the expensive HMM page table walk when
+        * no MMU invalidation has occurred since last validation.
+        * Zero means never validated (always takes the slow path).
+        */
+       unsigned long                   last_valid_notifier_seq;
 #endif
        struct kgd_mem                  *kfd_bo;

--
2.43.0



Thanks,
Beyond

Reply via email to