On 4/9/2025 6:45 PM, SRINIVASAN SHANMUGAM wrote:

On 4/9/2025 4:15 PM, Christian König wrote:
This reverts commit c2cc3648ba517a6c270500b5447d5a1efdad5936. Turned out
that this has some negative consequences for some workloads. Instead check
if the cleaner shader should run directly.

While at it remove amdgpu_vm_need_pipeline_sync(), we also check again
if the VMID has seen a GPU reset since last use and the gds switch
setiing can be handled more simply as well.

Also remove some duplicate checks and re-order and document the code.

v2: restructure the while function
v3: fix logic error pointed out by Srini
v4: fix typo in comment, fix crash caused by incorrect check
v5: once more fix the logic

Signed-off-by: Christian König <christian.koe...@amd.com>
Reviewed-by: Srinivasan Shanmugam <srinivasan.shanmu...@amd.com>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |  6 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 94 ++++++++++----------------
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  5 +-
  3 files changed, 39 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 802743efa3b3..30b58772598c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -189,10 +189,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
      }
        need_ctx_switch = ring->current_ctx != fence_ctx;
-    if (ring->funcs->emit_pipeline_sync && job &&
-        ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) ||
-         need_ctx_switch || amdgpu_vm_need_pipeline_sync(ring, job))) {
-
+    if ((job && (tmp = amdgpu_sync_get_fence(&job->explicit_sync))) ||


Direct assignment in if condition looks like may not be allowed, may be can we split this logic , something like below:?

/* Check if job is present and get the fence */
if (job) {
    tmp = amdgpu_sync_get_fence(&job->explicit_sync);
}

/* Check if pipe sync is needed */
if ((tmp || (amdgpu_sriov_vf(adev) && need_ctx_switch))) {
    need_pipe_sync = true;


+         (amdgpu_sriov_vf(adev) && need_ctx_switch)) {
          need_pipe_sync = true;
            if (tmp)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b5ddfcbbc9fc..8e99dbd70968 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -596,37 +596,6 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
      }
  }
  -/**
- * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job.
- *
- * @ring: ring on which the job will be submitted
- * @job: job to submit
- *
- * Returns:
- * True if sync is needed.
- */
-bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
-                  struct amdgpu_job *job)
-{
-    struct amdgpu_device *adev = ring->adev;
-    unsigned vmhub = ring->vm_hub;
-    struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-
-    if (job->vmid == 0)
-        return false;
-
-    if (job->vm_needs_flush || ring->has_compute_vm_bug)
-        return true;
-
-    if (ring->funcs->emit_gds_switch && job->gds_switch_needed)
-        return true;
-
-    if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid]))
-        return true;
-
-    return false;
-}
-
  /**
   * amdgpu_vm_flush - hardware flush the vm
   *
@@ -647,43 +616,49 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
      unsigned vmhub = ring->vm_hub;
      struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
      struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
-    bool spm_update_needed = job->spm_update_needed;
-    bool gds_switch_needed = ring->funcs->emit_gds_switch &&
-        job->gds_switch_needed;
-    bool vm_flush_needed = job->vm_needs_flush;
-    bool cleaner_shader_needed = false;
-    bool pasid_mapping_needed = false;
-    struct dma_fence *fence = NULL;
+    bool gds_switch_needed, vm_flush_needed, spm_update_needed,
+         cleaner_shader_needed


I think, should we initialize the "cleaner_shader_needed" here, cleaner_shader_needed = false?


or somehow, try to move below to here?

"     cleaner_shader_needed = adev->gfx.enable_cleaner_shader &&
         ring->funcs->emit_cleaner_shader && job->base.s_fence &&
         &job->base.s_fence->scheduled == isolation->spearhead;"?


, pasid_mapping_needed;
+    struct dma_fence *fence;
      unsigned int patch;
      int r;
  +    /* First of all figure out what needs to be done */
      if (amdgpu_vmid_had_gpu_reset(adev, id)) {
+        need_pipe_sync = true;
          gds_switch_needed = true;
          vm_flush_needed = true;
          pasid_mapping_needed = true;
          spm_update_needed = true;
+    } else {
+        gds_switch_needed = job->gds_switch_needed;
+        vm_flush_needed = job->vm_needs_flush;
+        mutex_lock(&id_mgr->lock);
+        pasid_mapping_needed = id->pasid != job->pasid ||
+            !id->pasid_mapping ||
+            !dma_fence_is_signaled(id->pasid_mapping);
+        mutex_unlock(&id_mgr->lock);
+        spm_update_needed = job->spm_update_needed;
+        need_pipe_sync |= ring->has_compute_vm_bug || vm_flush_needed ||
+            cleaner_shader_needed || gds_switch_needed;
      }
  -    mutex_lock(&id_mgr->lock);
-    if (id->pasid != job->pasid || !id->pasid_mapping ||
-        !dma_fence_is_signaled(id->pasid_mapping))
-        pasid_mapping_needed = true;
-    mutex_unlock(&id_mgr->lock);
-
+    need_pipe_sync &= !!ring->funcs->emit_pipeline_sync;
      gds_switch_needed &= !!ring->funcs->emit_gds_switch;
      vm_flush_needed &= !!ring->funcs->emit_vm_flush &&
              job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
      pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
          ring->funcs->emit_wreg;
+    spm_update_needed &= !!adev->gfx.rlc.funcs->update_spm_vmid;
        cleaner_shader_needed = adev->gfx.enable_cleaner_shader &&
          ring->funcs->emit_cleaner_shader && job->base.s_fence &&
          &job->base.s_fence->scheduled == isolation->spearhead;
        if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync &&
-        !cleaner_shader_needed)
+        !cleaner_shader_needed && !spm_update_needed)
          return 0;
  +    /* Then actually prepare the submission frame */
      amdgpu_ring_ib_begin(ring);
      if (ring->funcs->init_cond_exec)
          patch = amdgpu_ring_init_cond_exec(ring,
@@ -703,23 +678,34 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
      if (pasid_mapping_needed)
          amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
  -    if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid)
+    if (spm_update_needed)
          adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid);
  -    if (ring->funcs->emit_gds_switch &&
-        gds_switch_needed) {
+    if (gds_switch_needed)
          amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
                          job->gds_size, job->gws_base,
                          job->gws_size, job->oa_base,
                          job->oa_size);
-    }
        if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) {
          r = amdgpu_fence_emit(ring, &fence, NULL, 0);
          if (r)
              return r;
+    } else {
+        fence = NULL;
+    }
+
+    amdgpu_ring_patch_cond_exec(ring, patch);
+
+    /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
+    if (ring->funcs->emit_switch_buffer) {
+        amdgpu_ring_emit_switch_buffer(ring);
+        amdgpu_ring_emit_switch_buffer(ring);
      }
  +    amdgpu_ring_ib_end(ring);
+
+    /* And finally remember what the ring has executed */
      if (vm_flush_needed) {
          mutex_lock(&id_mgr->lock);
          dma_fence_put(id->last_flush);
@@ -749,16 +735,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
          mutex_unlock(&adev->enforce_isolation_mutex);
      }
      dma_fence_put(fence);
-
-    amdgpu_ring_patch_cond_exec(ring, patch);
-
-    /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
-    if (ring->funcs->emit_switch_buffer) {
-        amdgpu_ring_emit_switch_buffer(ring);
-        amdgpu_ring_emit_switch_buffer(ring);
-    }
-
-    amdgpu_ring_ib_end(ring);
      return 0;
  }
  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index daa2f9b33620..e9ecdb96bafa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -493,7 +493,8 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
                 struct ww_acquire_ctx *ticket,
                 int (*callback)(void *p, struct amdgpu_bo *bo),
                 void *param);
-int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
+int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
+            bool need_pipe_sync);
  int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
                struct amdgpu_vm *vm, bool immediate);
  int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
@@ -550,8 +551,6 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
                 uint32_t fragment_size_default, unsigned max_level,
                 unsigned max_bits);
  int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
-bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
-                  struct amdgpu_job *job);
  void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
    struct amdgpu_task_info *

Reply via email to