Am 10.04.25 um 05:52 schrieb SRINIVASAN SHANMUGAM: > > On 4/9/2025 7:16 PM, SRINIVASAN SHANMUGAM wrote: >> >> On 4/9/2025 7:11 PM, SRINIVASAN SHANMUGAM wrote: >>> >>> On 4/9/2025 6:45 PM, SRINIVASAN SHANMUGAM wrote: >>>> >>>> On 4/9/2025 4:15 PM, Christian König wrote: >>>>> This reverts commit c2cc3648ba517a6c270500b5447d5a1efdad5936. Turned out >>>>> that this has some negative consequences for some workloads. Instead check >>>>> if the cleaner shader should run directly. >>>>> >>>>> While at it remove amdgpu_vm_need_pipeline_sync(), we also check again >>>>> if the VMID has seen a GPU reset since last use and the gds switch >>>>> setiing can be handled more simply as well. >>>>> >>>>> Also remove some duplicate checks and re-order and document the code. >>>>> >>>>> v2: restructure the while function >>>>> v3: fix logic error pointed out by Srini >>>>> v4: fix typo in comment, fix crash caused by incorrect check >>>>> v5: once more fix the logic >>>>> >>>>> Signed-off-by: Christian König <christian.koe...@amd.com> >>>>> Reviewed-by: Srinivasan Shanmugam <srinivasan.shanmu...@amd.com> >>>>> --- >>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 6 +- >>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 94 ++++++++++---------------- >>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 5 +- >>>>> 3 files changed, 39 insertions(+), 66 deletions(-) >>>>> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c >>>>> index 802743efa3b3..30b58772598c 100644 >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c >>>>> @@ -189,10 +189,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, >>>>> unsigned int num_ibs, >>>>> } >>>>> need_ctx_switch = ring->current_ctx != fence_ctx; >>>>> - if (ring->funcs->emit_pipeline_sync && job && >>>>> - ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) || >>>>> - need_ctx_switch || amdgpu_vm_need_pipeline_sync(ring, job))) { >>>>> - >>>>> + if ((job && (tmp = amdgpu_sync_get_fence(&job->explicit_sync))) || >>>> >>>> >>>> Direct assignment in if condition looks like may not be allowed, may be >>>> can we split this logic , something like below:?
You can do direct assignment in if condition if you put it into an extra (), but I agree that we should clean that up at some point. Just not in this patch here since that is unrelated. >>>> >>>> /* Check if job is present and get the fence */ >>>> if (job) { >>>> tmp = amdgpu_sync_get_fence(&job->explicit_sync); >>>> } >>>> >>>> /* Check if pipe sync is needed */ >>>> if ((tmp || (amdgpu_sriov_vf(adev) && need_ctx_switch))) { >>>> need_pipe_sync = true; >>>> >>>> >>>>> + (amdgpu_sriov_vf(adev) && need_ctx_switch)) { >>>>> need_pipe_sync = true; >>>>> if (tmp) >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>>> index b5ddfcbbc9fc..8e99dbd70968 100644 >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>>> @@ -596,37 +596,6 @@ void amdgpu_vm_check_compute_bug(struct >>>>> amdgpu_device *adev) >>>>> } >>>>> } >>>>> -/** >>>>> - * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job. >>>>> - * >>>>> - * @ring: ring on which the job will be submitted >>>>> - * @job: job to submit >>>>> - * >>>>> - * Returns: >>>>> - * True if sync is needed. >>>>> - */ >>>>> -bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, >>>>> - struct amdgpu_job *job) >>>>> -{ >>>>> - struct amdgpu_device *adev = ring->adev; >>>>> - unsigned vmhub = ring->vm_hub; >>>>> - struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; >>>>> - >>>>> - if (job->vmid == 0) >>>>> - return false; >>>>> - >>>>> - if (job->vm_needs_flush || ring->has_compute_vm_bug) >>>>> - return true; >>>>> - >>>>> - if (ring->funcs->emit_gds_switch && job->gds_switch_needed) >>>>> - return true; >>>>> - >>>>> - if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid])) >>>>> - return true; >>>>> - >>>>> - return false; >>>>> -} >>>>> - >>>>> /** >>>>> * amdgpu_vm_flush - hardware flush the vm >>>>> * >>>>> @@ -647,43 +616,49 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, >>>>> struct amdgpu_job *job, >>>>> unsigned vmhub = ring->vm_hub; >>>>> struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; >>>>> struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; >>>>> - bool spm_update_needed = job->spm_update_needed; >>>>> - bool gds_switch_needed = ring->funcs->emit_gds_switch && >>>>> - job->gds_switch_needed; >>>>> - bool vm_flush_needed = job->vm_needs_flush; >>>>> - bool cleaner_shader_needed = false; >>>>> - bool pasid_mapping_needed = false; >>>>> - struct dma_fence *fence = NULL; >>>>> + bool gds_switch_needed, vm_flush_needed, spm_update_needed, >>>>> + cleaner_shader_needed >>>> >>>> >>>> I think, should we initialize the "cleaner_shader_needed" here, >>>> cleaner_shader_needed = false? >>>> >>> >>> or somehow, try to move below to here? >>> >>> " cleaner_shader_needed = adev->gfx.enable_cleaner_shader && >>> ring->funcs->emit_cleaner_shader && job->base.s_fence && >>> &job->base.s_fence->scheduled == isolation->spearhead;"? Oh, yeah that's a good idea going to add that. >>> >>>> >>>>> , pasid_mapping_needed; >>>>> + struct dma_fence *fence; >>>>> unsigned int patch; >>>>> int r; >>>>> + /* First of all figure out what needs to be done */ >>>>> if (amdgpu_vmid_had_gpu_reset(adev, id)) { >>>>> + need_pipe_sync = true; >>>>> gds_switch_needed = true; >>>>> vm_flush_needed = true; >>>>> pasid_mapping_needed = true; >>>>> spm_update_needed = true; >>>>> + } else { >>>>> + gds_switch_needed = job->gds_switch_needed; >>>>> + vm_flush_needed = job->vm_needs_flush; >>>>> + mutex_lock(&id_mgr->lock); >>>>> + pasid_mapping_needed = id->pasid != job->pasid || >>>>> + !id->pasid_mapping || >>>>> + !dma_fence_is_signaled(id->pasid_mapping); >>>>> + mutex_unlock(&id_mgr->lock); >>>>> + spm_update_needed = job->spm_update_needed; >>>>> + need_pipe_sync |= ring->has_compute_vm_bug || vm_flush_needed || >>>>> + cleaner_shader_needed >> >> Sorry here pls: >> >> " cleaner_shader_needed = adev->gfx.enable_cleaner_shader && >> ring->funcs->emit_cleaner_shader && job->base.s_fence && >> &job->base.s_fence->scheduled == isolation->spearhead;"? >> >> >>>>> || gds_switch_needed; >>>>> } >>>>> - mutex_lock(&id_mgr->lock); >>>>> - if (id->pasid != job->pasid || !id->pasid_mapping || >>>>> - !dma_fence_is_signaled(id->pasid_mapping)) >>>>> - pasid_mapping_needed = true; >>>>> - mutex_unlock(&id_mgr->lock); >>>>> - >>>>> + need_pipe_sync &= !!ring->funcs->emit_pipeline_sync; >>>>> gds_switch_needed &= !!ring->funcs->emit_gds_switch; >>>>> vm_flush_needed &= !!ring->funcs->emit_vm_flush && >>>>> job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET; >>>>> pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && >>>>> ring->funcs->emit_wreg; >>>>> + spm_update_needed &= !!adev->gfx.rlc.funcs->update_spm_vmid; >>>>> cleaner_shader_needed = adev->gfx.enable_cleaner_shader && >>>>> ring->funcs->emit_cleaner_shader && job->base.s_fence && >>>>> &job->base.s_fence->scheduled == isolation->spearhead; >>>>> if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync && >>>>> - !cleaner_shader_needed) >>>>> + !cleaner_shader_needed && !spm_update_needed) > > > Here do we need to explicitly add this "&& !spm_update_needed" check here > along with the other checks pls? cz pipeline_sync is independent of > "spm_update"? It isn't really necessary I think, but it shouldn't hurt either. Going to add that. Thanks, Christian. > > >>>>> return 0; >>>>> + /* Then actually prepare the submission frame */ >>>>> amdgpu_ring_ib_begin(ring); >>>>> if (ring->funcs->init_cond_exec) >>>>> patch = amdgpu_ring_init_cond_exec(ring, >>>>> @@ -703,23 +678,34 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, >>>>> struct amdgpu_job *job, >>>>> if (pasid_mapping_needed) >>>>> amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); >>>>> - if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid) >>>>> + if (spm_update_needed) >>>>> adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid); >>>>> - if (ring->funcs->emit_gds_switch && >>>>> - gds_switch_needed) { >>>>> + if (gds_switch_needed) >>>>> amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, >>>>> job->gds_size, job->gws_base, >>>>> job->gws_size, job->oa_base, >>>>> job->oa_size); >>>>> - } >>>>> if (vm_flush_needed || pasid_mapping_needed || >>>>> cleaner_shader_needed) { >>>>> r = amdgpu_fence_emit(ring, &fence, NULL, 0); >>>>> if (r) >>>>> return r; >>>>> + } else { >>>>> + fence = NULL; >>>>> + } >>>>> + >>>>> + amdgpu_ring_patch_cond_exec(ring, patch); >>>>> + >>>>> + /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */ >>>>> + if (ring->funcs->emit_switch_buffer) { >>>>> + amdgpu_ring_emit_switch_buffer(ring); >>>>> + amdgpu_ring_emit_switch_buffer(ring); >>>>> } >>>>> + amdgpu_ring_ib_end(ring); >>>>> + >>>>> + /* And finally remember what the ring has executed */ >>>>> if (vm_flush_needed) { >>>>> mutex_lock(&id_mgr->lock); >>>>> dma_fence_put(id->last_flush); >>>>> @@ -749,16 +735,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct >>>>> amdgpu_job *job, >>>>> mutex_unlock(&adev->enforce_isolation_mutex); >>>>> } >>>>> dma_fence_put(fence); >>>>> - >>>>> - amdgpu_ring_patch_cond_exec(ring, patch); >>>>> - >>>>> - /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */ >>>>> - if (ring->funcs->emit_switch_buffer) { >>>>> - amdgpu_ring_emit_switch_buffer(ring); >>>>> - amdgpu_ring_emit_switch_buffer(ring); >>>>> - } >>>>> - >>>>> - amdgpu_ring_ib_end(ring); >>>>> return 0; >>>>> } >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>>> index daa2f9b33620..e9ecdb96bafa 100644 >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>>> @@ -493,7 +493,8 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, >>>>> struct amdgpu_vm *vm, >>>>> struct ww_acquire_ctx *ticket, >>>>> int (*callback)(void *p, struct amdgpu_bo *bo), >>>>> void *param); >>>>> -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, >>>>> bool need_pipe_sync); >>>>> +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, >>>>> + bool need_pipe_sync); >>>>> int amdgpu_vm_update_pdes(struct amdgpu_device *adev, >>>>> struct amdgpu_vm *vm, bool immediate); >>>>> int amdgpu_vm_clear_freed(struct amdgpu_device *adev, >>>>> @@ -550,8 +551,6 @@ void amdgpu_vm_adjust_size(struct amdgpu_device >>>>> *adev, uint32_t min_vm_size, >>>>> uint32_t fragment_size_default, unsigned max_level, >>>>> unsigned max_bits); >>>>> int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file >>>>> *filp); >>>>> -bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, >>>>> - struct amdgpu_job *job); >>>>> void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); >>>>> struct amdgpu_task_info *