On 2025. december 15., hétfő 10:07:07 középső államokbeli zónaidő Alex Deucher 
wrote:
> If when we backup the ring contents for reemit before a
> ring reset, we skip jobs associated with the bad
> context, however, we need to make sure the fences
> are reemited as unprocessed submissions may depend on
> them.
> 
> v2: clean up fence handling, make helpers static

Nice work!
We definitely need this when amdgpu_sched_hw_submission>2.

This patch is:
Reviewed-by: Timur Kristóf <[email protected]>

> 
> Signed-off-by: Alex Deucher <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 24 ++++++++++++++++++-----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |  5 ++++-
>  2 files changed, 23 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index
> 334ddd6e48c06..3a23cce5f769a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> @@ -89,6 +89,16 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
>       return seq;
>  }
> 
> +static void amdgpu_fence_save_fence_wptr_start(struct amdgpu_fence *af)
> +{
> +     af->fence_wptr_start = af->ring->wptr;
> +}
> +
> +static void amdgpu_fence_save_fence_wptr_end(struct amdgpu_fence *af)
> +{
> +     af->fence_wptr_end = af->ring->wptr;
> +}
> +
>  /**
>   * amdgpu_fence_emit - emit a fence on the requested ring
>   *
> @@ -116,8 +126,10 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct
> amdgpu_fence *af, &ring->fence_drv.lock,
>                      adev->fence_context + ring->idx, seq);
> 
> +     amdgpu_fence_save_fence_wptr_start(af);
>       amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
>                              seq, flags | AMDGPU_FENCE_FLAG_INT);
> +     amdgpu_fence_save_fence_wptr_end(af);
>       amdgpu_fence_save_wptr(af);
>       pm_runtime_get_noresume(adev_to_drm(adev)->dev);
>       ptr = &ring->fence_drv.fences[seq & ring-
>fence_drv.num_fences_mask];
> @@ -743,10 +755,6 @@ void amdgpu_fence_driver_guilty_force_completion(struct
> amdgpu_fence *af) /* if we've already reemitted once then just cancel
> everything */ amdgpu_fence_driver_force_completion(af->ring);
>               af->ring->ring_backup_entries_to_copy = 0;
> -     } else {
> -             /* signal the guilty fence */
> -             amdgpu_fence_write(ring, (u32)af->base.seqno);
> -             amdgpu_fence_process(ring);
>       }
>  }
> 
> @@ -796,9 +804,15 @@ void amdgpu_ring_backup_unprocessed_commands(struct
> amdgpu_ring *ring, * just save the content from other contexts.
>                        */
>                       if (!fence->reemitted &&
> -                         (!guilty_fence || (fence->context != 
guilty_fence->context)))
> +                         (!guilty_fence || (fence->context != 
guilty_fence->context))) {
>                               
amdgpu_ring_backup_unprocessed_command(ring, wptr,
>                                                               
       fence->wptr);
> +                     } else if (!fence->reemitted) {
> +                             /* always save the fence */
> +                             
amdgpu_ring_backup_unprocessed_command(ring,
> +                                                             
       fence->fence_wptr_start,
> +                                                             
       fence->fence_wptr_end);
> +                     }
>                       wptr = fence->wptr;
>                       fence->reemitted++;
>               }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index
> d881829528976..87c9df6c2ecfe 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -146,12 +146,15 @@ struct amdgpu_fence {
>       struct amdgpu_ring              *ring;
>       ktime_t                         start_timestamp;
> 
> -     /* wptr for the fence for resets */
> +     /* wptr for the total submission for resets */
>       u64                             wptr;
>       /* fence context for resets */
>       u64                             context;
>       /* has this fence been reemitted */
>       unsigned int                    reemitted;
> +     /* wptr for the fence for the submission */
> +     u64                             fence_wptr_start;
> +     u64                             fence_wptr_end;
>  };
> 
>  extern const struct drm_sched_backend_ops amdgpu_sched_ops;




Reply via email to