, pasid_mapping_needed;
+ struct dma_fence *fence;
unsigned int patch;
int r;
+ /* First of all figure out what needs to be done */
if (amdgpu_vmid_had_gpu_reset(adev, id)) {
+ need_pipe_sync = true;
gds_switch_needed = true;
vm_flush_needed = true;
pasid_mapping_needed = true;
spm_update_needed = true;
+ } else {
+ gds_switch_needed = job->gds_switch_needed;
+ vm_flush_needed = job->vm_needs_flush;
+ mutex_lock(&id_mgr->lock);
+ pasid_mapping_needed = id->pasid != job->pasid ||
+ !id->pasid_mapping ||
+ !dma_fence_is_signaled(id->pasid_mapping);
+ mutex_unlock(&id_mgr->lock);
+ spm_update_needed = job->spm_update_needed;
+ need_pipe_sync |= ring->has_compute_vm_bug ||
vm_flush_needed ||
+ cleaner_shader_needed || gds_switch_needed;
}
- mutex_lock(&id_mgr->lock);
- if (id->pasid != job->pasid || !id->pasid_mapping ||
- !dma_fence_is_signaled(id->pasid_mapping))
- pasid_mapping_needed = true;
- mutex_unlock(&id_mgr->lock);
-
+ need_pipe_sync &= !!ring->funcs->emit_pipeline_sync;
gds_switch_needed &= !!ring->funcs->emit_gds_switch;
vm_flush_needed &= !!ring->funcs->emit_vm_flush &&
job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
ring->funcs->emit_wreg;
+ spm_update_needed &= !!adev->gfx.rlc.funcs->update_spm_vmid;
cleaner_shader_needed = adev->gfx.enable_cleaner_shader &&
ring->funcs->emit_cleaner_shader && job->base.s_fence &&
&job->base.s_fence->scheduled == isolation->spearhead;
if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync &&
- !cleaner_shader_needed)
+ !cleaner_shader_needed && !spm_update_needed)
return 0;
+ /* Then actually prepare the submission frame */
amdgpu_ring_ib_begin(ring);
if (ring->funcs->init_cond_exec)
patch = amdgpu_ring_init_cond_exec(ring,
@@ -703,23 +678,34 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
struct amdgpu_job *job,
if (pasid_mapping_needed)
amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
- if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid)
+ if (spm_update_needed)
adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid);
- if (ring->funcs->emit_gds_switch &&
- gds_switch_needed) {
+ if (gds_switch_needed)
amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
job->gds_size, job->gws_base,
job->gws_size, job->oa_base,
job->oa_size);
- }
if (vm_flush_needed || pasid_mapping_needed ||
cleaner_shader_needed) {
r = amdgpu_fence_emit(ring, &fence, NULL, 0);
if (r)
return r;
+ } else {
+ fence = NULL;
+ }
+
+ amdgpu_ring_patch_cond_exec(ring, patch);
+
+ /* the double SWITCH_BUFFER here *cannot* be skipped by
COND_EXEC */
+ if (ring->funcs->emit_switch_buffer) {
+ amdgpu_ring_emit_switch_buffer(ring);
+ amdgpu_ring_emit_switch_buffer(ring);
}
+ amdgpu_ring_ib_end(ring);
+
+ /* And finally remember what the ring has executed */
if (vm_flush_needed) {
mutex_lock(&id_mgr->lock);
dma_fence_put(id->last_flush);
@@ -749,16 +735,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
struct amdgpu_job *job,
mutex_unlock(&adev->enforce_isolation_mutex);
}
dma_fence_put(fence);
-
- amdgpu_ring_patch_cond_exec(ring, patch);
-
- /* the double SWITCH_BUFFER here *cannot* be skipped by
COND_EXEC */
- if (ring->funcs->emit_switch_buffer) {
- amdgpu_ring_emit_switch_buffer(ring);
- amdgpu_ring_emit_switch_buffer(ring);
- }
-
- amdgpu_ring_ib_end(ring);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index daa2f9b33620..e9ecdb96bafa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -493,7 +493,8 @@ int amdgpu_vm_validate(struct amdgpu_device
*adev, struct amdgpu_vm *vm,
struct ww_acquire_ctx *ticket,
int (*callback)(void *p, struct amdgpu_bo *bo),
void *param);
-int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job
*job, bool need_pipe_sync);
+int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
+ bool need_pipe_sync);
int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
struct amdgpu_vm *vm, bool immediate);
int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
@@ -550,8 +551,6 @@ void amdgpu_vm_adjust_size(struct amdgpu_device
*adev, uint32_t min_vm_size,
uint32_t fragment_size_default, unsigned max_level,
unsigned max_bits);
int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct
drm_file *filp);
-bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
- struct amdgpu_job *job);
void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
struct amdgpu_task_info *