We need to know the wptr and sequence number associated with a job so that we can re-emit the unprocessed state after a ring reset. Pre-allocate storage space for the ring buffer contents and add helpers to save off and re-emit the unprocessed state so that it can be re-emitted after the queue is reset.
Signed-off-by: Alex Deucher <alexander.deuc...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 90 +++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 27 +++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 14 ++++ 5 files changed, 139 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 569e0e5373927..da87a5539a90b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -135,12 +135,20 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_ATOMIC); if (am_fence == NULL) return -ENOMEM; + am_fence->context = 0; } else { /* take use of job-embedded fence */ am_fence = &job->hw_fence; + if (job->base.s_fence) { + struct dma_fence *finished = &job->base.s_fence->finished; + am_fence->context = finished->context; + } else { + am_fence->context = 0; + } } fence = &am_fence->base; am_fence->ring = ring; + am_fence->wptr = 0; seq = ++ring->fence_drv.sync_seq; if (job && job->job_run_counter) { @@ -748,6 +756,88 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring) amdgpu_fence_process(ring); } +/** + * amdgpu_fence_driver_guilty_force_completion - force signal of specified sequence + * + * @fence: fence of the ring to signal + * + */ +void amdgpu_fence_driver_guilty_force_completion(struct dma_fence *fence) +{ + struct amdgpu_fence *am_fence = container_of(fence, struct amdgpu_fence, base); + + amdgpu_fence_write(am_fence->ring, fence->seqno); + amdgpu_fence_process(am_fence->ring); +} + +void amdgpu_fence_save_wptr(struct dma_fence *fence) +{ + struct amdgpu_fence *am_fence = container_of(fence, struct amdgpu_fence, base); + + am_fence->wptr = am_fence->ring->wptr; +} + +static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring, + unsigned int idx, + u64 start_wptr, u32 end_wptr) +{ + unsigned int first_idx = start_wptr & ring->buf_mask; + unsigned int last_idx = end_wptr & ring->buf_mask; + unsigned int i, j, entries_to_copy; + + if (last_idx < first_idx) { + entries_to_copy = ring->buf_mask + 1 - first_idx; + for (i = 0; i < entries_to_copy; i++) + ring->ring_backup[idx + i] = ring->ring[first_idx + i]; + ring->ring_backup_entries_to_copy += entries_to_copy; + entries_to_copy = last_idx; + for (j = 0; j < entries_to_copy; j++) + ring->ring_backup[idx + i + j] = ring->ring[j]; + ring->ring_backup_entries_to_copy += entries_to_copy; + } else { + entries_to_copy = last_idx - first_idx; + for (i = 0; i < entries_to_copy; i++) + ring->ring_backup[idx + i] = ring->ring[first_idx + i]; + ring->ring_backup_entries_to_copy += entries_to_copy; + } +} + +void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, + struct dma_fence *f, + bool is_guilty) +{ + struct amdgpu_fence *bad_fence = + container_of(f, struct amdgpu_fence, base); + struct amdgpu_fence *fence; + struct dma_fence *unprocessed, **ptr; + u64 wptr, i; + + wptr = bad_fence->wptr; + ring->ring_backup_entries_to_copy = 0; + for (i = bad_fence->base.seqno + 1; i <= ring->fence_drv.sync_seq; ++i) { + ptr = &ring->fence_drv.fences[i & ring->fence_drv.num_fences_mask]; + rcu_read_lock(); + unprocessed = rcu_dereference(*ptr); + + if (unprocessed && !dma_fence_is_signaled(unprocessed)) { + fence = container_of(unprocessed, struct amdgpu_fence, base); + + /* save everything if the ring is not guilty, otherwise + * just save the content from other contexts. + */ + if (fence->wptr && + (!is_guilty || (fence->context != bad_fence->context))) { + amdgpu_ring_backup_unprocessed_command(ring, + ring->ring_backup_entries_to_copy, + wptr, + fence->wptr); + wptr = fence->wptr; + } + } + rcu_read_unlock(); + } +} + /* * Common fence implementation */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 802743efa3b39..789f9b2af8f99 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -138,7 +138,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, int vmid = AMDGPU_JOB_GET_VMID(job); bool need_pipe_sync = false; unsigned int cond_exec; - unsigned int i; int r = 0; @@ -306,6 +305,13 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, amdgpu_ring_ib_end(ring); amdgpu_ring_commit(ring); + + /* This must be last for resets to work properly + * as we need to save the wptr associated with this + * fence. + */ + amdgpu_fence_save_wptr(*f); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 308d3889e46ca..0ac51d7b4d78a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -89,8 +89,8 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) { struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); struct amdgpu_job *job = to_amdgpu_job(s_job); - struct amdgpu_task_info *ti; struct amdgpu_device *adev = ring->adev; + struct amdgpu_task_info *ti; int idx, r; if (!drm_dev_enter(adev_to_drm(adev), &idx)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 426834806fbf2..736ff5bafd520 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -333,6 +333,12 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, /* Initialize cached_rptr to 0 */ ring->cached_rptr = 0; + if (!ring->ring_backup) { + ring->ring_backup = kvzalloc(ring->ring_size, GFP_KERNEL); + if (!ring->ring_backup) + return -ENOMEM; + } + /* Allocate ring buffer */ if (ring->ring_obj == NULL) { r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE, @@ -342,6 +348,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, (void **)&ring->ring); if (r) { dev_err(adev->dev, "(%d) ring create failed\n", r); + kvfree(ring->ring_backup); return r; } amdgpu_ring_clear_ring(ring); @@ -385,6 +392,8 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) amdgpu_bo_free_kernel(&ring->ring_obj, &ring->gpu_addr, (void **)&ring->ring); + kvfree(ring->ring_backup); + ring->ring_backup = NULL; dma_fence_put(ring->vmid_wait); ring->vmid_wait = NULL; @@ -753,3 +762,21 @@ bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring) return true; } + +int amdgpu_ring_reemit_unprocessed_commands(struct amdgpu_ring *ring) +{ + unsigned int i; + int r; + + /* re-emit the unprocessed ring contents */ + if (ring->ring_backup_entries_to_copy) { + r = amdgpu_ring_alloc(ring, ring->ring_backup_entries_to_copy); + if (r) + return r; + for (i = 0; i < ring->ring_backup_entries_to_copy; i++) + amdgpu_ring_write(ring, ring->ring_backup[i]); + amdgpu_ring_commit(ring); + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 2b3843f5218c8..b73894254bb8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -141,6 +141,11 @@ struct amdgpu_fence { /* RB, DMA, etc. */ struct amdgpu_ring *ring; ktime_t start_timestamp; + + /* wptr for the fence for resets */ + u64 wptr; + /* fence context for resets */ + u64 context; }; extern const struct drm_sched_backend_ops amdgpu_sched_ops; @@ -148,6 +153,8 @@ extern const struct drm_sched_backend_ops amdgpu_sched_ops; void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error); void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); +void amdgpu_fence_driver_guilty_force_completion(struct dma_fence *fence); +void amdgpu_fence_save_wptr(struct dma_fence *fence); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring); int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, @@ -283,6 +290,9 @@ struct amdgpu_ring { struct amdgpu_bo *ring_obj; uint32_t *ring; + /* backups for resets */ + uint32_t *ring_backup; + unsigned int ring_backup_entries_to_copy; unsigned rptr_offs; u64 rptr_gpu_addr; volatile u32 *rptr_cpu_addr; @@ -549,4 +559,8 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev); void amdgpu_ib_pool_fini(struct amdgpu_device *adev); int amdgpu_ib_ring_tests(struct amdgpu_device *adev); bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring); +void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, + struct dma_fence *f, + bool is_guilty); +int amdgpu_ring_reemit_unprocessed_commands(struct amdgpu_ring *ring); #endif -- 2.49.0