gfx10: re-emit unprocessed state on kgq reset

Christian König Wed, 28 May 2025 04:40:26 -0700

On 5/28/25 06:19, Alex Deucher wrote:
> Re-emit the unprocessed state after resetting the queue.
> 
> Signed-off-by: Alex Deucher <alexander.deuc...@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 19 ++++++++++++-------
>  1 file changed, 12 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 3193eb88b6889..f6e04cf21abcc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -9537,6 +9537,7 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring 
> *ring, unsigned int vmid)
>       struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
>       struct amdgpu_ring *kiq_ring = &kiq->ring;
>       unsigned long flags;
> +     unsigned int i;
>       u32 tmp;
>       u64 addr;
>       int r;
> @@ -9571,10 +9572,8 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring 
> *ring, unsigned int vmid)
>                                    SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 
> 0, 0xffffffff);
>       kiq->pmf->kiq_map_queues(kiq_ring, ring);
>       amdgpu_ring_commit(kiq_ring);
> -
> -     spin_unlock_irqrestore(&kiq->ring_lock, flags);
> -
>       r = amdgpu_ring_test_ring(kiq_ring);


I don't think we should do a ring test on the KIQ here That basically doesn't 
tells as much and might cause additional problems.

> +     spin_unlock_irqrestore(&kiq->ring_lock, flags);
>       if (r)
>               return r;
>  
> @@ -9584,7 +9583,15 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring 
> *ring, unsigned int vmid)
>               return r;
>       }
>  
> -     return amdgpu_ring_test_ring(ring);
> +     if (amdgpu_ring_alloc(ring, 8 + ring->ring_backup_entries_to_copy))
> +             return -ENOMEM;
> +     amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
> +                            ring->ring_backup_seq, 0);
> +     for (i = 0; i < ring->ring_backup_entries_to_copy; i++)
> +             amdgpu_ring_write(ring, ring->ring_backup[i]);
> +     amdgpu_ring_commit(ring);

I'm not sure if the commands are always relocatable. We should probably just 
instruct the ring to re-start with the original RPTR/WPTR.

That would also avoid the need to save/restore the ring content with the CPU.

Regards,
Christian.

> +
> +     return gfx_v10_0_ring_test_ib(ring, AMDGPU_QUEUE_RESET_TIMEOUT);
>  }
>  
>  static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
> @@ -9612,9 +9619,8 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
>       kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
>                                  0, 0);
>       amdgpu_ring_commit(kiq_ring);
> -     spin_unlock_irqrestore(&kiq->ring_lock, flags);
> -
>       r = amdgpu_ring_test_ring(kiq_ring);
> +     spin_unlock_irqrestore(&kiq->ring_lock, flags);
>       if (r)
>               return r;
>  
> @@ -9891,7 +9897,6 @@ static const struct amdgpu_ring_funcs 
> gfx_v10_0_ring_funcs_gfx = {
>       .emit_wreg = gfx_v10_0_ring_emit_wreg,
>       .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
>       .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
> -     .soft_recovery = gfx_v10_0_ring_soft_recovery,
>       .emit_mem_sync = gfx_v10_0_emit_mem_sync,
>       .reset = gfx_v10_0_reset_kgq,
>       .emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader,

Re: [PATCH 12/19] drm/amdgpu/gfx10: re-emit unprocessed state on kgq reset

Reply via email to