This reverts commit c2cc3648ba517a6c270500b5447d5a1efdad5936. Turned out that this has some negative consequences for some workloads. Instead check if the cleaner shader should run directly.
While at it remove amdgpu_vm_need_pipeline_sync(), we also check again if the VMID has seen a GPU reset since last use and the gds switch setiing can be handled more simply as well. Also remove some duplicate checks and re-order and document the code. v2: restructure the while function v3: fix logic error pointed out by Srini Signed-off-by: Christian König <christian.koe...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 92 +++++++++----------------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 5 +- 3 files changed, 36 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 802743efa3b3..ff2ca984279a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -191,8 +191,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, need_ctx_switch = ring->current_ctx != fence_ctx; if (ring->funcs->emit_pipeline_sync && job && ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) || - need_ctx_switch || amdgpu_vm_need_pipeline_sync(ring, job))) { - + (amdgpu_sriov_vf(adev) && need_ctx_switch))) { need_pipe_sync = true; if (tmp) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b5ddfcbbc9fc..e6d7db8d40cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -596,37 +596,6 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev) } } -/** - * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job. - * - * @ring: ring on which the job will be submitted - * @job: job to submit - * - * Returns: - * True if sync is needed. - */ -bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, - struct amdgpu_job *job) -{ - struct amdgpu_device *adev = ring->adev; - unsigned vmhub = ring->vm_hub; - struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; - - if (job->vmid == 0) - return false; - - if (job->vm_needs_flush || ring->has_compute_vm_bug) - return true; - - if (ring->funcs->emit_gds_switch && job->gds_switch_needed) - return true; - - if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid])) - return true; - - return false; -} - /** * amdgpu_vm_flush - hardware flush the vm * @@ -647,29 +616,31 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, unsigned vmhub = ring->vm_hub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; - bool spm_update_needed = job->spm_update_needed; - bool gds_switch_needed = ring->funcs->emit_gds_switch && - job->gds_switch_needed; - bool vm_flush_needed = job->vm_needs_flush; - bool cleaner_shader_needed = false; - bool pasid_mapping_needed = false; - struct dma_fence *fence = NULL; + bool gds_switch_needed, vm_flush_needed, spm_update_needed, + cleaner_shader_needed, pasid_mapping_needed; + struct dma_fence *fence; unsigned int patch; int r; + /* First of all figure out what needs to be done */ if (amdgpu_vmid_had_gpu_reset(adev, id)) { + need_pipe_sync = true; gds_switch_needed = true; vm_flush_needed = true; pasid_mapping_needed = true; spm_update_needed = true; + } else { + need_pipe_sync |= ring->has_compute_vm_bug; + gds_switch_needed = job->gds_switch_needed; + vm_flush_needed = job->vm_needs_flush; + mutex_lock(&id_mgr->lock); + if (id->pasid != job->pasid || !id->pasid_mapping || + !dma_fence_is_signaled(id->pasid_mapping)) + pasid_mapping_needed = true; + mutex_unlock(&id_mgr->lock); + spm_update_needed = job->spm_update_needed; } - mutex_lock(&id_mgr->lock); - if (id->pasid != job->pasid || !id->pasid_mapping || - !dma_fence_is_signaled(id->pasid_mapping)) - pasid_mapping_needed = true; - mutex_unlock(&id_mgr->lock); - gds_switch_needed &= !!ring->funcs->emit_gds_switch; vm_flush_needed &= !!ring->funcs->emit_vm_flush && job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET; @@ -681,15 +652,17 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, &job->base.s_fence->scheduled == isolation->spearhead; if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync && - !cleaner_shader_needed) + !cleaner_shader_needed && !spm_update_needed) return 0; + /* Then aktually prepare the submission frame */ amdgpu_ring_ib_begin(ring); if (ring->funcs->init_cond_exec) patch = amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr); - if (need_pipe_sync) + if (need_pipe_sync || vm_flush_needed || cleaner_shader_needed || + gds_switch_needed) amdgpu_ring_emit_pipeline_sync(ring); if (cleaner_shader_needed) @@ -706,20 +679,31 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid) adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid); - if (ring->funcs->emit_gds_switch && - gds_switch_needed) { + if (gds_switch_needed) amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, job->gds_size, job->gws_base, job->gws_size, job->oa_base, job->oa_size); - } if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) { r = amdgpu_fence_emit(ring, &fence, NULL, 0); if (r) return r; + } else { + fence = NULL; + } + + amdgpu_ring_patch_cond_exec(ring, patch); + + /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */ + if (ring->funcs->emit_switch_buffer) { + amdgpu_ring_emit_switch_buffer(ring); + amdgpu_ring_emit_switch_buffer(ring); } + amdgpu_ring_ib_end(ring); + + /* And finally remember what the ring has executed */ if (vm_flush_needed) { mutex_lock(&id_mgr->lock); dma_fence_put(id->last_flush); @@ -749,16 +733,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, mutex_unlock(&adev->enforce_isolation_mutex); } dma_fence_put(fence); - - amdgpu_ring_patch_cond_exec(ring, patch); - - /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */ - if (ring->funcs->emit_switch_buffer) { - amdgpu_ring_emit_switch_buffer(ring); - amdgpu_ring_emit_switch_buffer(ring); - } - - amdgpu_ring_ib_end(ring); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index daa2f9b33620..e9ecdb96bafa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -493,7 +493,8 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket, int (*callback)(void *p, struct amdgpu_bo *bo), void *param); -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync); +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, + bool need_pipe_sync); int amdgpu_vm_update_pdes(struct amdgpu_device *adev, struct amdgpu_vm *vm, bool immediate); int amdgpu_vm_clear_freed(struct amdgpu_device *adev, @@ -550,8 +551,6 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, uint32_t fragment_size_default, unsigned max_level, unsigned max_bits); int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); -bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, - struct amdgpu_job *job); void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); struct amdgpu_task_info * -- 2.34.1