This reverts commit c2cc3648ba517a6c270500b5447d5a1efdad5936. Turned out that this has some negative consequences for some workloads. Instead check if the cleaner shader should run directly.
While at it remove amdgpu_vm_need_pipeline_sync(), we also check again if the VMID has seen a GPU reset since last use and the gds switch setiing can be handled more simply as well. Also remove some duplicate checks and re-order and document the code. v2: restructure the while function v3: fix logic error pointed out by Srini v4: fix typo in comment, fix crash caused by incorrect check v5: once more fix the logic v6: separate cleaner shader checks as suggested by Srini Signed-off-by: Christian König <christian.koe...@amd.com> Reviewed-by: Srinivasan Shanmugam <srinivasan.shanmu...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 106 ++++++++++--------------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 5 +- 3 files changed, 46 insertions(+), 71 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 802743efa3b3..30b58772598c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -189,10 +189,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, } need_ctx_switch = ring->current_ctx != fence_ctx; - if (ring->funcs->emit_pipeline_sync && job && - ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) || - need_ctx_switch || amdgpu_vm_need_pipeline_sync(ring, job))) { - + if ((job && (tmp = amdgpu_sync_get_fence(&job->explicit_sync))) || + (amdgpu_sriov_vf(adev) && need_ctx_switch)) { need_pipe_sync = true; if (tmp) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index dadf6715b98b..a0e9ab1afe96 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -596,37 +596,6 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev) } } -/** - * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job. - * - * @ring: ring on which the job will be submitted - * @job: job to submit - * - * Returns: - * True if sync is needed. - */ -bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, - struct amdgpu_job *job) -{ - struct amdgpu_device *adev = ring->adev; - unsigned vmhub = ring->vm_hub; - struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; - - if (job->vmid == 0) - return false; - - if (job->vm_needs_flush || ring->has_compute_vm_bug) - return true; - - if (ring->funcs->emit_gds_switch && job->gds_switch_needed) - return true; - - if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid])) - return true; - - return false; -} - /** * amdgpu_vm_flush - hardware flush the vm * @@ -647,44 +616,52 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, unsigned vmhub = ring->vm_hub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; - bool spm_update_needed = job->spm_update_needed; - bool gds_switch_needed = ring->funcs->emit_gds_switch && - job->gds_switch_needed; - bool vm_flush_needed = job->vm_needs_flush; - bool cleaner_shader_needed = false; - bool pasid_mapping_needed = false; - struct dma_fence *fence = NULL; + bool gds_switch_needed, vm_flush_needed, spm_update_needed, + cleaner_shader_needed, pasid_mapping_needed; + struct dma_fence *fence; unsigned int patch; int r; + /* First of all figure out what needs to be done */ if (amdgpu_vmid_had_gpu_reset(adev, id)) { + need_pipe_sync = true; gds_switch_needed = true; vm_flush_needed = true; pasid_mapping_needed = true; spm_update_needed = true; + cleaner_shader_needed = true; + } else { + gds_switch_needed = job->gds_switch_needed; + vm_flush_needed = job->vm_needs_flush; + mutex_lock(&id_mgr->lock); + pasid_mapping_needed = id->pasid != job->pasid || + !id->pasid_mapping || + !dma_fence_is_signaled(id->pasid_mapping); + mutex_unlock(&id_mgr->lock); + spm_update_needed = job->spm_update_needed; + need_pipe_sync |= ring->has_compute_vm_bug || vm_flush_needed || + cleaner_shader_needed || gds_switch_needed; + cleaner_shader_needed = job->run_cleaner_shader && + job->base.s_fence && &job->base.s_fence->scheduled == + isolation->spearhead; } - mutex_lock(&id_mgr->lock); - if (id->pasid != job->pasid || !id->pasid_mapping || - !dma_fence_is_signaled(id->pasid_mapping)) - pasid_mapping_needed = true; - mutex_unlock(&id_mgr->lock); - + /* Then check the pre-requisites */ + need_pipe_sync &= !!ring->funcs->emit_pipeline_sync; gds_switch_needed &= !!ring->funcs->emit_gds_switch; vm_flush_needed &= !!ring->funcs->emit_vm_flush && job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET; pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && ring->funcs->emit_wreg; - - cleaner_shader_needed = job->run_cleaner_shader && - adev->gfx.enable_cleaner_shader && - ring->funcs->emit_cleaner_shader && job->base.s_fence && - &job->base.s_fence->scheduled == isolation->spearhead; + spm_update_needed &= !!adev->gfx.rlc.funcs->update_spm_vmid; + cleaner_shader_needed &= adev->gfx.enable_cleaner_shader && + ring->funcs->emit_cleaner_shader; if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync && - !cleaner_shader_needed) + !cleaner_shader_needed && !spm_update_needed) return 0; + /* Then actually prepare the submission frame */ amdgpu_ring_ib_begin(ring); if (ring->funcs->init_cond_exec) patch = amdgpu_ring_init_cond_exec(ring, @@ -704,23 +681,34 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, if (pasid_mapping_needed) amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); - if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid) + if (spm_update_needed) adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid); - if (ring->funcs->emit_gds_switch && - gds_switch_needed) { + if (gds_switch_needed) amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, job->gds_size, job->gws_base, job->gws_size, job->oa_base, job->oa_size); - } if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) { r = amdgpu_fence_emit(ring, &fence, NULL, 0); if (r) return r; + } else { + fence = NULL; + } + + amdgpu_ring_patch_cond_exec(ring, patch); + + /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */ + if (ring->funcs->emit_switch_buffer) { + amdgpu_ring_emit_switch_buffer(ring); + amdgpu_ring_emit_switch_buffer(ring); } + amdgpu_ring_ib_end(ring); + + /* And finally remember what the ring has executed */ if (vm_flush_needed) { mutex_lock(&id_mgr->lock); dma_fence_put(id->last_flush); @@ -750,16 +738,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, mutex_unlock(&adev->enforce_isolation_mutex); } dma_fence_put(fence); - - amdgpu_ring_patch_cond_exec(ring, patch); - - /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */ - if (ring->funcs->emit_switch_buffer) { - amdgpu_ring_emit_switch_buffer(ring); - amdgpu_ring_emit_switch_buffer(ring); - } - - amdgpu_ring_ib_end(ring); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index daa2f9b33620..e9ecdb96bafa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -493,7 +493,8 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket, int (*callback)(void *p, struct amdgpu_bo *bo), void *param); -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync); +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, + bool need_pipe_sync); int amdgpu_vm_update_pdes(struct amdgpu_device *adev, struct amdgpu_vm *vm, bool immediate); int amdgpu_vm_clear_freed(struct amdgpu_device *adev, @@ -550,8 +551,6 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, uint32_t fragment_size_default, unsigned max_level, unsigned max_bits); int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); -bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, - struct amdgpu_job *job); void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); struct amdgpu_task_info * -- 2.34.1