On Thu, May 8, 2025 at 8:05 AM Christian König <ckoenig.leichtzumer...@gmail.com> wrote: > > Check if the cleaner shader should run directly. > > While at it remove amdgpu_vm_need_pipeline_sync(), we also check again > if the VMID has seen a GPU reset since last use and the gds switch > setiing can be handled more simply as well. > > Also remove some duplicate checks and re-order and document the code. > > v2: restructure the while function > v3: fix logic error pointed out by Srini > v4: fix typo in comment, fix crash caused by incorrect check > v5: once more fix the logic > v6: separate cleaner shader checks as suggested by Srini > v7: re-order incorrect check > v8: separate the revert > > Signed-off-by: Christian König <christian.koe...@amd.com> > Reviewed-by: Srinivasan Shanmugam <srinivasan.shanmu...@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 6 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 106 ++++++++++--------------- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 5 +- > 3 files changed, 46 insertions(+), 71 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > index 5eab1c1a380c..30b58772598c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > @@ -189,10 +189,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, > unsigned int num_ibs, > } > > need_ctx_switch = ring->current_ctx != fence_ctx; > - if (ring->funcs->emit_pipeline_sync && job && > - ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) || > - (amdgpu_sriov_vf(adev) && need_ctx_switch) || > - amdgpu_vm_need_pipeline_sync(ring, job))) { > + if ((job && (tmp = amdgpu_sync_get_fence(&job->explicit_sync))) || > + (amdgpu_sriov_vf(adev) && need_ctx_switch)) { > need_pipe_sync = true; > > if (tmp) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index 0a80c011e678..31c423663b54 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -707,37 +707,6 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device > *adev) > } > } > > -/** > - * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job. > - * > - * @ring: ring on which the job will be submitted > - * @job: job to submit > - * > - * Returns: > - * True if sync is needed. > - */ > -bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, > - struct amdgpu_job *job) > -{ > - struct amdgpu_device *adev = ring->adev; > - unsigned vmhub = ring->vm_hub; > - struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; > - > - if (job->vmid == 0) > - return false; > - > - if (job->vm_needs_flush || ring->has_compute_vm_bug) > - return true; > - > - if (ring->funcs->emit_gds_switch && job->gds_switch_needed) > - return true; > - > - if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid])) > - return true; > - > - return false; > -} > - > /** > * amdgpu_vm_flush - hardware flush the vm > * > @@ -758,44 +727,52 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct > amdgpu_job *job, > unsigned vmhub = ring->vm_hub; > struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; > struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; > - bool spm_update_needed = job->spm_update_needed; > - bool gds_switch_needed = ring->funcs->emit_gds_switch && > - job->gds_switch_needed; > - bool vm_flush_needed = job->vm_needs_flush; > - bool cleaner_shader_needed = false; > - bool pasid_mapping_needed = false; > - struct dma_fence *fence = NULL; > + bool gds_switch_needed, vm_flush_needed, spm_update_needed, > + cleaner_shader_needed, pasid_mapping_needed;
Would be good to document what all of these flags are used for. E.g /* need_pipe_sync - if set, we wait for the last fence on the ring to signal before executing more commands * cleaner_shader_needed - if set we emit the cleaner shader to clean up GPRs and LDS before a new command is submitted * etc. */ > + struct dma_fence *fence; > unsigned int patch; > int r; > > + /* First of all figure out what needs to be done */ > if (amdgpu_vmid_had_gpu_reset(adev, id)) { Please add a comment here to explain why we set all of these to true if we had a GPU reset. > + need_pipe_sync = true; > gds_switch_needed = true; > vm_flush_needed = true; > pasid_mapping_needed = true; > spm_update_needed = true; > + cleaner_shader_needed = true; > + } else { Would be good to document all of these cases as well. > + gds_switch_needed = job->gds_switch_needed; > + vm_flush_needed = job->vm_needs_flush; > + mutex_lock(&id_mgr->lock); > + pasid_mapping_needed = id->pasid != job->pasid || > + !id->pasid_mapping || > + !dma_fence_is_signaled(id->pasid_mapping); > + mutex_unlock(&id_mgr->lock); > + spm_update_needed = job->spm_update_needed; E.g.: /* The spearhead marks the first submission from a new client. We need to run the cleaner shader * if it is requested by the job and we have a new spearhead so that we clean up before it runs. */ > + cleaner_shader_needed = job->run_cleaner_shader && > + job->base.s_fence && &job->base.s_fence->scheduled == > + isolation->spearhead; E.g., /* This will cause the queue to wait for the current fence on the ring to signal before new work executes (wait for idle). * This is needed as a workaround for some hardware (ring->has_compute_vm_bug), if we are updating * the vmid or page tables (vm_flush_needed), if we need to emit the cleaner shader (which must execute while the * queue is idle), or if the job uses gds and we need to update the gds mappings (gds_switch_needed). */ > + need_pipe_sync |= ring->has_compute_vm_bug || vm_flush_needed > || > + cleaner_shader_needed || gds_switch_needed; > } > > - mutex_lock(&id_mgr->lock); > - if (id->pasid != job->pasid || !id->pasid_mapping || > - !dma_fence_is_signaled(id->pasid_mapping)) > - pasid_mapping_needed = true; > - mutex_unlock(&id_mgr->lock); > - > + /* Then check the pre-requisites */ > + need_pipe_sync &= !!ring->funcs->emit_pipeline_sync; > gds_switch_needed &= !!ring->funcs->emit_gds_switch; > vm_flush_needed &= !!ring->funcs->emit_vm_flush && > job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET; > pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && > ring->funcs->emit_wreg; > - > - cleaner_shader_needed = job->run_cleaner_shader && > - adev->gfx.enable_cleaner_shader && > - ring->funcs->emit_cleaner_shader && job->base.s_fence && > - &job->base.s_fence->scheduled == isolation->spearhead; > + spm_update_needed &= !!adev->gfx.rlc.funcs->update_spm_vmid; > + cleaner_shader_needed &= adev->gfx.enable_cleaner_shader && > + ring->funcs->emit_cleaner_shader; > > if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync && > - !cleaner_shader_needed) > + !cleaner_shader_needed && !spm_update_needed) > return 0; > > + /* Then actually prepare the submission frame */ > amdgpu_ring_ib_begin(ring); > if (ring->funcs->init_cond_exec) > patch = amdgpu_ring_init_cond_exec(ring, > @@ -815,23 +792,34 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct > amdgpu_job *job, > if (pasid_mapping_needed) > amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); > > - if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid) > + if (spm_update_needed) > adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid); > > - if (ring->funcs->emit_gds_switch && > - gds_switch_needed) { > + if (gds_switch_needed) > amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, > job->gds_size, job->gws_base, > job->gws_size, job->oa_base, > job->oa_size); > - } > > if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) > { > r = amdgpu_fence_emit(ring, &fence, NULL, 0); > if (r) > return r; > + } else { > + fence = NULL; > + } > + > + amdgpu_ring_patch_cond_exec(ring, patch); > + > + /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */ > + if (ring->funcs->emit_switch_buffer) { > + amdgpu_ring_emit_switch_buffer(ring); > + amdgpu_ring_emit_switch_buffer(ring); > } > > + amdgpu_ring_ib_end(ring); > + > + /* And finally remember what the ring has executed */ > if (vm_flush_needed) { > mutex_lock(&id_mgr->lock); > dma_fence_put(id->last_flush); > @@ -861,16 +849,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct > amdgpu_job *job, > mutex_unlock(&adev->enforce_isolation_mutex); > } > dma_fence_put(fence); > - > - amdgpu_ring_patch_cond_exec(ring, patch); > - > - /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */ > - if (ring->funcs->emit_switch_buffer) { > - amdgpu_ring_emit_switch_buffer(ring); > - amdgpu_ring_emit_switch_buffer(ring); > - } > - > - amdgpu_ring_ib_end(ring); > return 0; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > index f3ad687125ad..c9578b7f670c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > @@ -498,7 +498,8 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct > amdgpu_vm *vm, > struct ww_acquire_ctx *ticket, > int (*callback)(void *p, struct amdgpu_bo *bo), > void *param); > -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool > need_pipe_sync); > +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, > + bool need_pipe_sync); > int amdgpu_vm_update_pdes(struct amdgpu_device *adev, > struct amdgpu_vm *vm, bool immediate); > int amdgpu_vm_clear_freed(struct amdgpu_device *adev, > @@ -559,8 +560,6 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, > uint32_t min_vm_size, > uint32_t fragment_size_default, unsigned max_level, > unsigned max_bits); > int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file > *filp); > -bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, > - struct amdgpu_job *job); > void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); > > struct amdgpu_task_info * > -- > 2.34.1 >