Re-emit the unprocessed state after resetting the queue. Signed-off-by: Alex Deucher <alexander.deuc...@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 39 +++++++++++++------------- 1 file changed, 20 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 02022c7b4de78..a68e1fe3a7d68 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -6278,21 +6278,6 @@ static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, ref, mask, 0x20); } -static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring, - unsigned vmid) -{ - struct amdgpu_device *adev = ring->adev; - uint32_t value = 0; - - value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); - value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); - value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); - value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - WREG32_SOC15(GC, 0, regSQ_CMD, value); - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); -} - static void gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, uint32_t me, uint32_t pipe, @@ -6815,6 +6800,8 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, if (amdgpu_sriov_vf(adev)) return -EINVAL; + amdgpu_ring_backup_unprocessed_commands(ring, &job->hw_fence.base, true); + r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, job->vmid, false); if (r) { @@ -6839,9 +6826,16 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, r = amdgpu_ring_test_ring(ring); if (r) return r; + dma_fence_set_error(&job->base.s_fence->finished, -ETIME); - amdgpu_fence_driver_force_completion(ring); + /* signal the fence of the bad job */ + amdgpu_fence_driver_guilty_force_completion(&job->hw_fence.base); atomic_inc(&ring->adev->gpu_reset_counter); + r = amdgpu_ring_reemit_unprocessed_commands(ring); + if (r) + /* if we fail to reemit, force complete all fences */ + amdgpu_fence_driver_force_completion(ring); + return 0; } @@ -6984,6 +6978,8 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, if (amdgpu_sriov_vf(adev)) return -EINVAL; + amdgpu_ring_backup_unprocessed_commands(ring, &job->hw_fence.base, true); + r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, job->vmid, true); if (r) { dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r); @@ -7006,9 +7002,16 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, r = amdgpu_ring_test_ring(ring); if (r) return r; + dma_fence_set_error(&job->base.s_fence->finished, -ETIME); - amdgpu_fence_driver_force_completion(ring); + /* signal the fence of the bad job */ + amdgpu_fence_driver_guilty_force_completion(&job->hw_fence.base); atomic_inc(&ring->adev->gpu_reset_counter); + r = amdgpu_ring_reemit_unprocessed_commands(ring); + if (r) + /* if we fail to reemit, force complete all fences */ + amdgpu_fence_driver_force_completion(ring); + return 0; } @@ -7245,7 +7248,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .emit_wreg = gfx_v11_0_ring_emit_wreg, .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, - .soft_recovery = gfx_v11_0_ring_soft_recovery, .emit_mem_sync = gfx_v11_0_emit_mem_sync, .reset = gfx_v11_0_reset_kgq, .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, @@ -7287,7 +7289,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { .emit_wreg = gfx_v11_0_ring_emit_wreg, .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, - .soft_recovery = gfx_v11_0_ring_soft_recovery, .emit_mem_sync = gfx_v11_0_emit_mem_sync, .reset = gfx_v11_0_reset_kcq, .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, -- 2.49.0