On 2025. december 15., hétfő 10:07:07 középső államokbeli zónaidő Alex Deucher wrote: > If when we backup the ring contents for reemit before a > ring reset, we skip jobs associated with the bad > context, however, we need to make sure the fences > are reemited as unprocessed submissions may depend on > them. > > v2: clean up fence handling, make helpers static
Nice work! We definitely need this when amdgpu_sched_hw_submission>2. This patch is: Reviewed-by: Timur Kristóf <[email protected]> > > Signed-off-by: Alex Deucher <[email protected]> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 24 ++++++++++++++++++----- > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 ++++- > 2 files changed, 23 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index > 334ddd6e48c06..3a23cce5f769a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > @@ -89,6 +89,16 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) > return seq; > } > > +static void amdgpu_fence_save_fence_wptr_start(struct amdgpu_fence *af) > +{ > + af->fence_wptr_start = af->ring->wptr; > +} > + > +static void amdgpu_fence_save_fence_wptr_end(struct amdgpu_fence *af) > +{ > + af->fence_wptr_end = af->ring->wptr; > +} > + > /** > * amdgpu_fence_emit - emit a fence on the requested ring > * > @@ -116,8 +126,10 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct > amdgpu_fence *af, &ring->fence_drv.lock, > adev->fence_context + ring->idx, seq); > > + amdgpu_fence_save_fence_wptr_start(af); > amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, > seq, flags | AMDGPU_FENCE_FLAG_INT); > + amdgpu_fence_save_fence_wptr_end(af); > amdgpu_fence_save_wptr(af); > pm_runtime_get_noresume(adev_to_drm(adev)->dev); > ptr = &ring->fence_drv.fences[seq & ring- >fence_drv.num_fences_mask]; > @@ -743,10 +755,6 @@ void amdgpu_fence_driver_guilty_force_completion(struct > amdgpu_fence *af) /* if we've already reemitted once then just cancel > everything */ amdgpu_fence_driver_force_completion(af->ring); > af->ring->ring_backup_entries_to_copy = 0; > - } else { > - /* signal the guilty fence */ > - amdgpu_fence_write(ring, (u32)af->base.seqno); > - amdgpu_fence_process(ring); > } > } > > @@ -796,9 +804,15 @@ void amdgpu_ring_backup_unprocessed_commands(struct > amdgpu_ring *ring, * just save the content from other contexts. > */ > if (!fence->reemitted && > - (!guilty_fence || (fence->context != guilty_fence->context))) > + (!guilty_fence || (fence->context != guilty_fence->context))) { > amdgpu_ring_backup_unprocessed_command(ring, wptr, > fence->wptr); > + } else if (!fence->reemitted) { > + /* always save the fence */ > + amdgpu_ring_backup_unprocessed_command(ring, > + fence->fence_wptr_start, > + fence->fence_wptr_end); > + } > wptr = fence->wptr; > fence->reemitted++; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index > d881829528976..87c9df6c2ecfe 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > @@ -146,12 +146,15 @@ struct amdgpu_fence { > struct amdgpu_ring *ring; > ktime_t start_timestamp; > > - /* wptr for the fence for resets */ > + /* wptr for the total submission for resets */ > u64 wptr; > /* fence context for resets */ > u64 context; > /* has this fence been reemitted */ > unsigned int reemitted; > + /* wptr for the fence for the submission */ > + u64 fence_wptr_start; > + u64 fence_wptr_end; > }; > > extern const struct drm_sched_backend_ops amdgpu_sched_ops;
