Move guilty logic into the ring reset callbacks.  This
allows each ring reset callback to better handle fence
errors and force completions in line with the reset
behavior for each IP.  It also allows us to remove
the ring guilty callback since that logic now lives
in the reset callback.

Signed-off-by: Alex Deucher <alexander.deuc...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  | 22 +---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  1 -
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   |  4 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c   |  4 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c   |  4 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    |  2 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c  |  2 +
 drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c   |  2 +
 drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c   |  2 +
 drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c   |  2 +
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c   |  2 +
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c |  2 +
 drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c |  2 +
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 56 ++++++++++++------------
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c   | 25 ++++++++++-
 drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c   | 27 ++++++++++--
 drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c   |  2 +
 drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c   |  2 +
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c    |  2 +
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c  |  2 +
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c  |  2 +
 drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c  |  2 +
 22 files changed, 116 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 461bd551546de..308d3889e46ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -91,7 +91,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
drm_sched_job *s_job)
        struct amdgpu_job *job = to_amdgpu_job(s_job);
        struct amdgpu_task_info *ti;
        struct amdgpu_device *adev = ring->adev;
-       bool set_error = false;
        int idx, r;
 
        if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
@@ -134,8 +133,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
drm_sched_job *s_job)
        if (unlikely(adev->debug_disable_gpu_ring_reset)) {
                dev_err(adev->dev, "Ring reset disabled by debug mask\n");
        } else if (amdgpu_gpu_recovery && ring->funcs->reset) {
-               bool is_guilty;
-
                dev_err(adev->dev, "Starting %s ring reset\n",
                        s_job->sched->name);
 
@@ -145,24 +142,8 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
drm_sched_job *s_job)
                 */
                drm_sched_wqueue_stop(&ring->sched);
 
-               /* for engine resets, we need to reset the engine,
-                * but individual queues may be unaffected.
-                * check here to make sure the accounting is correct.
-                */
-               if (ring->funcs->is_guilty)
-                       is_guilty = ring->funcs->is_guilty(ring);
-               else
-                       is_guilty = true;
-
-               if (is_guilty) {
-                       dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
-                       set_error = true;
-               }
-
                r = amdgpu_ring_reset(ring, job);
                if (!r) {
-                       if (is_guilty)
-                               atomic_inc(&ring->adev->gpu_reset_counter);
                        drm_sched_wqueue_start(&ring->sched);
                        dev_err(adev->dev, "Ring %s reset succeeded\n",
                                ring->sched.name);
@@ -173,8 +154,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
drm_sched_job *s_job)
                dev_err(adev->dev, "Ring %s reset failed\n", ring->sched.name);
        }
 
-       if (!set_error)
-               dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
+       dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
 
        if (amdgpu_device_should_recover_gpu(ring->adev)) {
                struct amdgpu_reset_context reset_context;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index ab5402d7ce9c8..2b3843f5218c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -270,7 +270,6 @@ struct amdgpu_ring_funcs {
        void (*patch_de)(struct amdgpu_ring *ring, unsigned offset);
        int (*reset)(struct amdgpu_ring *ring, struct amdgpu_job *job);
        void (*emit_cleaner_shader)(struct amdgpu_ring *ring);
-       bool (*is_guilty)(struct amdgpu_ring *ring);
 };
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 7a82c60d923ed..b57a21c0874c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -9579,7 +9579,9 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring,
        r = amdgpu_ring_test_ring(ring);
        if (r)
                return r;
+       dma_fence_set_error(&job->base.s_fence->finished, -ETIME);
        amdgpu_fence_driver_force_completion(ring);
+       atomic_inc(&ring->adev->gpu_reset_counter);
        return 0;
 }
 
@@ -9655,7 +9657,9 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
        r = amdgpu_ring_test_ring(ring);
        if (r)
                return r;
+       dma_fence_set_error(&job->base.s_fence->finished, -ETIME);
        amdgpu_fence_driver_force_completion(ring);
+       atomic_inc(&ring->adev->gpu_reset_counter);
        return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 9ad4f6971f8bf..02022c7b4de78 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -6839,7 +6839,9 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring,
        r = amdgpu_ring_test_ring(ring);
        if (r)
                return r;
+       dma_fence_set_error(&job->base.s_fence->finished, -ETIME);
        amdgpu_fence_driver_force_completion(ring);
+       atomic_inc(&ring->adev->gpu_reset_counter);
        return 0;
 }
 
@@ -7004,7 +7006,9 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring,
        r = amdgpu_ring_test_ring(ring);
        if (r)
                return r;
+       dma_fence_set_error(&job->base.s_fence->finished, -ETIME);
        amdgpu_fence_driver_force_completion(ring);
+       atomic_inc(&ring->adev->gpu_reset_counter);
        return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index 3c628e3de5000..a4e3ce81bc671 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -5339,7 +5339,9 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring,
        r = amdgpu_ring_test_ring(ring);
        if (r)
                return r;
+       dma_fence_set_error(&job->base.s_fence->finished, -ETIME);
        amdgpu_fence_driver_force_completion(ring);
+       atomic_inc(&ring->adev->gpu_reset_counter);
        return 0;
 }
 
@@ -5457,7 +5459,9 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring,
        r = amdgpu_ring_test_ring(ring);
        if (r)
                return r;
+       dma_fence_set_error(&job->base.s_fence->finished, -ETIME);
        amdgpu_fence_driver_force_completion(ring);
+       atomic_inc(&ring->adev->gpu_reset_counter);
        return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index e64b02bb04e26..f699c8b0f7488 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -7226,7 +7226,9 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
        r = amdgpu_ring_test_ring(ring);
        if (r)
                return r;
+       dma_fence_set_error(&job->base.s_fence->finished, -ETIME);
        amdgpu_fence_driver_force_completion(ring);
+       atomic_inc(&ring->adev->gpu_reset_counter);
        return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 0c2e80f73ba49..d9eea11f52fec 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -3623,7 +3623,9 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring,
        r = amdgpu_ring_test_ring(ring);
        if (r)
                return r;
+       dma_fence_set_error(&job->base.s_fence->finished, -ETIME);
        amdgpu_fence_driver_force_completion(ring);
+       atomic_inc(&ring->adev->gpu_reset_counter);
        return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
index cd7c45a77120f..f2058f263cc05 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
@@ -774,7 +774,9 @@ static int jpeg_v2_0_ring_reset(struct amdgpu_ring *ring,
        r = amdgpu_ring_test_helper(ring);
        if (r)
                return r;
+       dma_fence_set_error(&job->base.s_fence->finished, -ETIME);
        amdgpu_fence_driver_force_completion(ring);
+       atomic_inc(&ring->adev->gpu_reset_counter);
        return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
index d936f0063039c..5eb86291ccdd6 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -653,7 +653,9 @@ static int jpeg_v2_5_ring_reset(struct amdgpu_ring *ring,
        r = amdgpu_ring_test_helper(ring);
        if (r)
                return r;
+       dma_fence_set_error(&job->base.s_fence->finished, -ETIME);
        amdgpu_fence_driver_force_completion(ring);
+       atomic_inc(&ring->adev->gpu_reset_counter);
        return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
index 9e1ae935c6663..ff826611b600e 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
@@ -565,7 +565,9 @@ static int jpeg_v3_0_ring_reset(struct amdgpu_ring *ring,
        r = amdgpu_ring_test_helper(ring);
        if (r)
                return r;
+       dma_fence_set_error(&job->base.s_fence->finished, -ETIME);
        amdgpu_fence_driver_force_completion(ring);
+       atomic_inc(&ring->adev->gpu_reset_counter);
        return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
index da27eac1115ee..179dd420edb15 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
@@ -733,7 +733,9 @@ static int jpeg_v4_0_ring_reset(struct amdgpu_ring *ring,
        r = amdgpu_ring_test_helper(ring);
        if (r)
                return r;
+       dma_fence_set_error(&job->base.s_fence->finished, -ETIME);
        amdgpu_fence_driver_force_completion(ring);
+       atomic_inc(&ring->adev->gpu_reset_counter);
        return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index f1a6fe7f7b3af..c956f424fbbf9 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -1156,7 +1156,9 @@ static int jpeg_v4_0_3_ring_reset(struct amdgpu_ring 
*ring,
        r = amdgpu_ring_test_helper(ring);
        if (r)
                return r;
+       dma_fence_set_error(&job->base.s_fence->finished, -ETIME);
        amdgpu_fence_driver_force_completion(ring);
+       atomic_inc(&ring->adev->gpu_reset_counter);
        return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c
index 3d2b9d38c306a..ef9289f78a46a 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c
@@ -847,7 +847,9 @@ static int jpeg_v5_0_1_ring_reset(struct amdgpu_ring *ring,
        r = amdgpu_ring_test_helper(ring);
        if (r)
                return r;
+       dma_fence_set_error(&job->base.s_fence->finished, -ETIME);
        amdgpu_fence_driver_force_completion(ring);
+       atomic_inc(&ring->adev->gpu_reset_counter);
        return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 73328e213c247..fce8cc3ef066c 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -1648,44 +1648,30 @@ static bool sdma_v4_4_2_is_queue_selected(struct 
amdgpu_device *adev, uint32_t i
        return (context_status & SDMA_GFX_CONTEXT_STATUS__SELECTED_MASK) != 0;
 }
 
-static bool sdma_v4_4_2_ring_is_guilty(struct amdgpu_ring *ring)
-{
-       struct amdgpu_device *adev = ring->adev;
-       uint32_t instance_id = ring->me;
-
-       return sdma_v4_4_2_is_queue_selected(adev, instance_id, false);
-}
-
-static bool sdma_v4_4_2_page_ring_is_guilty(struct amdgpu_ring *ring)
-{
-       struct amdgpu_device *adev = ring->adev;
-       uint32_t instance_id = ring->me;
-
-       if (!adev->sdma.has_page_queue)
-               return false;
-
-       return sdma_v4_4_2_is_queue_selected(adev, instance_id, true);
-}
-
 static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring,
                                   struct amdgpu_job *job)
 {
        struct amdgpu_device *adev = ring->adev;
-       bool is_guilty = ring->funcs->is_guilty(ring);
        u32 id = GET_INST(SDMA0, ring->me);
+       bool is_guilty;
        int r;
 
        if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
                return -EOPNOTSUPP;
 
+       is_guilty = sdma_v4_4_2_is_queue_selected(adev, id,
+                                                 &adev->sdma.instance[id].page 
== ring);
+
        amdgpu_amdkfd_suspend(adev, false);
        r = amdgpu_sdma_reset_engine(adev, id);
        amdgpu_amdkfd_resume(adev, false);
        if (r)
                return r;
 
-       if (is_guilty)
-               amdgpu_fence_driver_force_completion(ring);
+       if (is_guilty) {
+               dma_fence_set_error(&job->base.s_fence->finished, -ETIME);
+               atomic_inc(&ring->adev->gpu_reset_counter);
+       }
 
        return 0;
 }
@@ -1731,8 +1717,8 @@ static int sdma_v4_4_2_stop_queue(struct amdgpu_ring 
*ring)
 static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring)
 {
        struct amdgpu_device *adev = ring->adev;
-       u32 inst_mask;
-       int i;
+       u32 inst_mask, tmp_mask;
+       int i, r;
 
        inst_mask = 1 << ring->me;
        udelay(50);
@@ -1749,7 +1735,25 @@ static int sdma_v4_4_2_restore_queue(struct amdgpu_ring 
*ring)
                return -ETIMEDOUT;
        }
 
-       return sdma_v4_4_2_inst_start(adev, inst_mask, true);
+       r = sdma_v4_4_2_inst_start(adev, inst_mask, true);
+       if (r) {
+               return r;
+       }
+
+       tmp_mask = inst_mask;
+       for_each_inst(i, tmp_mask) {
+               ring = &adev->sdma.instance[i].ring;
+
+               amdgpu_fence_driver_force_completion(ring);
+
+               if (adev->sdma.has_page_queue) {
+                       struct amdgpu_ring *page = &adev->sdma.instance[i].page;
+
+                       amdgpu_fence_driver_force_completion(page);
+               }
+       }
+
+       return r;
 }
 
 static int sdma_v4_4_2_set_trap_irq_state(struct amdgpu_device *adev,
@@ -2146,7 +2150,6 @@ static const struct amdgpu_ring_funcs 
sdma_v4_4_2_ring_funcs = {
        .emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait,
        .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
        .reset = sdma_v4_4_2_reset_queue,
-       .is_guilty = sdma_v4_4_2_ring_is_guilty,
 };
 
 static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = {
@@ -2179,7 +2182,6 @@ static const struct amdgpu_ring_funcs 
sdma_v4_4_2_page_ring_funcs = {
        .emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait,
        .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
        .reset = sdma_v4_4_2_reset_queue,
-       .is_guilty = sdma_v4_4_2_page_ring_is_guilty,
 };
 
 static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 8d1c43ed39994..4582a11b411dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -1538,17 +1538,35 @@ static int sdma_v5_0_soft_reset(struct amdgpu_ip_block 
*ip_block)
        return 0;
 }
 
+static bool sdma_v5_0_is_queue_selected(struct amdgpu_device *adev,
+                                       uint32_t instance_id)
+{
+       u32 context_status = RREG32(sdma_v5_0_get_reg_offset(adev, instance_id,
+                                                            
mmSDMA0_GFX_CONTEXT_STATUS));
+
+       /* Check if the SELECTED bit is set */
+       return (context_status & SDMA0_GFX_CONTEXT_STATUS__SELECTED_MASK) != 0;
+}
+
 static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring,
                                 struct amdgpu_job *job)
 {
        struct amdgpu_device *adev = ring->adev;
        u32 inst_id = ring->me;
+       bool is_guilty = sdma_v5_0_is_queue_selected(adev, inst_id);
        int r;
 
+       amdgpu_amdkfd_suspend(adev, false);
        r = amdgpu_sdma_reset_engine(adev, inst_id);
+       amdgpu_amdkfd_resume(adev, false);
        if (r)
                return r;
-       amdgpu_fence_driver_force_completion(ring);
+
+       if (is_guilty) {
+               dma_fence_set_error(&job->base.s_fence->finished, -ETIME);
+               atomic_inc(&ring->adev->gpu_reset_counter);
+       }
+
        return 0;
 }
 
@@ -1616,7 +1634,10 @@ static int sdma_v5_0_restore_queue(struct amdgpu_ring 
*ring)
 
        r = sdma_v5_0_gfx_resume_instance(adev, inst_id, true);
        amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
-       return r;
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static int sdma_v5_0_ring_preempt_ib(struct amdgpu_ring *ring)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index f700ac64fb616..711064ea22d5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -1451,17 +1451,35 @@ static int sdma_v5_2_wait_for_idle(struct 
amdgpu_ip_block *ip_block)
        return -ETIMEDOUT;
 }
 
+static bool sdma_v5_2_is_queue_selected(struct amdgpu_device *adev,
+                                       uint32_t instance_id)
+{
+       u32 context_status = RREG32(sdma_v5_2_get_reg_offset(adev, instance_id,
+                                                            
mmSDMA0_GFX_CONTEXT_STATUS));
+
+       /* Check if the SELECTED bit is set */
+       return (context_status & SDMA0_GFX_CONTEXT_STATUS__SELECTED_MASK) != 0;
+}
+
 static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring,
                                 struct amdgpu_job *job)
 {
        struct amdgpu_device *adev = ring->adev;
        u32 inst_id = ring->me;
+       bool is_guilty = sdma_v5_2_is_queue_selected(adev, inst_id);
        int r;
 
+       amdgpu_amdkfd_suspend(adev, false);
        r = amdgpu_sdma_reset_engine(adev, inst_id);
+       amdgpu_amdkfd_resume(adev, false);
        if (r)
                return r;
-       amdgpu_fence_driver_force_completion(ring);
+
+       if (is_guilty) {
+               dma_fence_set_error(&job->base.s_fence->finished, -ETIME);
+               atomic_inc(&ring->adev->gpu_reset_counter);
+       }
+
        return 0;
 }
 
@@ -1528,11 +1546,12 @@ static int sdma_v5_2_restore_queue(struct amdgpu_ring 
*ring)
        freeze = RREG32(sdma_v5_2_get_reg_offset(adev, inst_id, 
mmSDMA0_FREEZE));
        freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 0);
        WREG32(sdma_v5_2_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE), freeze);
-
        r = sdma_v5_2_gfx_resume_instance(adev, inst_id, true);
-
        amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
-       return r;
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static int sdma_v5_2_ring_preempt_ib(struct amdgpu_ring *ring)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index 25c01acac2cd9..abb5ad697fbb2 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -1563,7 +1563,9 @@ static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring,
        r = sdma_v6_0_gfx_resume_instance(adev, i, true);
        if (r)
                return r;
+       dma_fence_set_error(&job->base.s_fence->finished, -ETIME);
        amdgpu_fence_driver_force_completion(ring);
+       atomic_inc(&ring->adev->gpu_reset_counter);
        return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
index 97ea5392ab85d..76ae1a7849a56 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
@@ -828,7 +828,9 @@ static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring,
        r = sdma_v7_0_gfx_resume_instance(adev, i, true);
        if (r)
                return r;
+       dma_fence_set_error(&job->base.s_fence->finished, -ETIME);
        amdgpu_fence_driver_force_completion(ring);
+       atomic_inc(&ring->adev->gpu_reset_counter);
        return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index f3ff3c6c155fd..d68bd82f8eab0 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -1983,7 +1983,9 @@ static int vcn_v4_0_ring_reset(struct amdgpu_ring *ring,
        r = amdgpu_ring_test_helper(ring);
        if (r)
                return r;
+       dma_fence_set_error(&job->base.s_fence->finished, -ETIME);
        amdgpu_fence_driver_force_completion(ring);
+       atomic_inc(&ring->adev->gpu_reset_counter);
        return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index e15057333a459..a9d8ae4ab109a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -1624,7 +1624,9 @@ static int vcn_v4_0_3_ring_reset(struct amdgpu_ring *ring,
        r = amdgpu_ring_test_helper(ring);
        if (r)
                return r;
+       dma_fence_set_error(&job->base.s_fence->finished, -ETIME);
        amdgpu_fence_driver_force_completion(ring);
+       atomic_inc(&ring->adev->gpu_reset_counter);
        return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
index 9fd3127dc8828..93bc55756dcd6 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
@@ -1481,7 +1481,9 @@ static int vcn_v4_0_5_ring_reset(struct amdgpu_ring *ring,
        r = amdgpu_ring_test_helper(ring);
        if (r)
                return r;
+       dma_fence_set_error(&job->base.s_fence->finished, -ETIME);
        amdgpu_fence_driver_force_completion(ring);
+       atomic_inc(&ring->adev->gpu_reset_counter);
        return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
index c5afe2a7f9f5d..d74c1862ac860 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
@@ -1208,7 +1208,9 @@ static int vcn_v5_0_0_ring_reset(struct amdgpu_ring *ring,
        r = amdgpu_ring_test_helper(ring);
        if (r)
                return r;
+       dma_fence_set_error(&job->base.s_fence->finished, -ETIME);
        amdgpu_fence_driver_force_completion(ring);
+       atomic_inc(&ring->adev->gpu_reset_counter);
        return 0;
 }
 
-- 
2.49.0

Reply via email to