Among the scheduler's statuses, the only one that indicates an error is DRM_GPU_SCHED_STAT_ENODEV. Any status other than DRM_GPU_SCHED_STAT_ENODEV signifies that the operation succeeded and the GPU is in a nominal state.
However, to provide more information about the GPU's status, it is needed to convey more information than just "OK". Therefore, rename DRM_GPU_SCHED_STAT_NOMINAL to DRM_GPU_SCHED_STAT_RESET, which better communicates the meaning of this status. The status DRM_GPU_SCHED_STAT_RESET indicates that the GPU has hung, but it has been successfully reset and is now in a nominal state again. Signed-off-by: Maíra Canal <mca...@igalia.com> Reviewed-by: Philipp Stanner <pha...@kernel.org> --- To: Min Ma <min...@amd.com> To: Lizhi Hou <lizhi....@amd.com> To: Oded Gabbay <ogab...@kernel.org> To: Frank Binns <frank.bi...@imgtec.com> To: Matt Coster <matt.cos...@imgtec.com> To: Qiang Yu <yuq...@gmail.com> To: Lyude Paul <ly...@redhat.com> To: Alex Deucher <alexander.deuc...@amd.com> To: Christian König <christian.koe...@amd.com> --- drivers/accel/amdxdna/aie2_ctx.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 +- drivers/gpu/drm/etnaviv/etnaviv_sched.c | 4 ++-- drivers/gpu/drm/imagination/pvr_queue.c | 4 ++-- drivers/gpu/drm/lima/lima_sched.c | 6 +++--- drivers/gpu/drm/nouveau/nouveau_exec.c | 2 +- drivers/gpu/drm/nouveau/nouveau_sched.c | 2 +- drivers/gpu/drm/panfrost/panfrost_job.c | 6 +++--- drivers/gpu/drm/panthor/panthor_mmu.c | 2 +- drivers/gpu/drm/panthor/panthor_sched.c | 2 +- drivers/gpu/drm/scheduler/sched_main.c | 2 +- drivers/gpu/drm/scheduler/tests/mock_scheduler.c | 2 +- drivers/gpu/drm/v3d/v3d_sched.c | 6 +++--- drivers/gpu/drm/xe/xe_guc_submit.c | 6 +++--- include/drm/gpu_scheduler.h | 4 ++-- 15 files changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index f20999f2d66864fd4a6b7069e866727c37befb39..2cff5419bd2facb59ff5df6388aba0512fd45d5c 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -361,7 +361,7 @@ aie2_sched_job_timedout(struct drm_sched_job *sched_job) aie2_hwctx_restart(xdna, hwctx); mutex_unlock(&xdna->dev_lock); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static const struct drm_sched_backend_ops sched_ops = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 87f7040d8a2bb1af2aa46a051f10df3074974672..ec73cf3a9024121bcd3110dcfe6ffc0a37943862 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -175,7 +175,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) exit: amdgpu_vm_put_task_info(ti); drm_dev_exit(idx); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index 76a3a3e517d8d9f654fb6b9e98e72910795cfc7a..7146069a98492f5fab2a49d96e2054f649e1fe3d 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -86,11 +86,11 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job drm_sched_resubmit_jobs(&gpu->sched); drm_sched_start(&gpu->sched, 0); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; out_no_timeout: list_add(&sched_job->list, &sched_job->sched->pending_list); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static void etnaviv_sched_free_job(struct drm_sched_job *sched_job) diff --git a/drivers/gpu/drm/imagination/pvr_queue.c b/drivers/gpu/drm/imagination/pvr_queue.c index 5a41ee79fed646a86344cd16e78efdb45ff02e43..fc415dd0d7a73631bd4144c9f35b9b294c625a12 100644 --- a/drivers/gpu/drm/imagination/pvr_queue.c +++ b/drivers/gpu/drm/imagination/pvr_queue.c @@ -803,7 +803,7 @@ static void pvr_queue_start(struct pvr_queue *queue) * the scheduler, and re-assign parent fences in the middle. * * Return: - * * DRM_GPU_SCHED_STAT_NOMINAL. + * * DRM_GPU_SCHED_STAT_RESET. */ static enum drm_gpu_sched_stat pvr_queue_timedout_job(struct drm_sched_job *s_job) @@ -854,7 +854,7 @@ pvr_queue_timedout_job(struct drm_sched_job *s_job) drm_sched_start(sched, 0); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } /** diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c index 954f4325b859b2977a2cc608a99a6ebb642f1000..739e8c6c6d909aa4263bad8a12ec07f0c6607bb2 100644 --- a/drivers/gpu/drm/lima/lima_sched.c +++ b/drivers/gpu/drm/lima/lima_sched.c @@ -412,7 +412,7 @@ static enum drm_gpu_sched_stat lima_sched_timedout_job(struct drm_sched_job *job */ if (dma_fence_is_signaled(task->fence)) { DRM_WARN("%s spurious timeout\n", lima_ip_name(ip)); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } /* @@ -429,7 +429,7 @@ static enum drm_gpu_sched_stat lima_sched_timedout_job(struct drm_sched_job *job if (dma_fence_is_signaled(task->fence)) { DRM_WARN("%s unexpectedly high interrupt latency\n", lima_ip_name(ip)); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } /* @@ -467,7 +467,7 @@ static enum drm_gpu_sched_stat lima_sched_timedout_job(struct drm_sched_job *job drm_sched_resubmit_jobs(&pipe->base); drm_sched_start(&pipe->base, 0); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static void lima_sched_free_job(struct drm_sched_job *job) diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c index 41b7c608c9054869ddadfe17c96100266e44c254..edbbda78bac90432c4877aa39a9587cf976705c7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_exec.c +++ b/drivers/gpu/drm/nouveau/nouveau_exec.c @@ -189,7 +189,7 @@ nouveau_exec_job_timeout(struct nouveau_job *job) NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n", chan->chid); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static const struct nouveau_job_ops nouveau_exec_job_ops = { diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c index 7d9c3418e76b6eb90ccfeea345d60e3d67118733..0cc0bc9f9952b1cfbc4015cb2c292fbf383459c9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.c +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c @@ -370,7 +370,7 @@ nouveau_sched_timedout_job(struct drm_sched_job *sched_job) { struct drm_gpu_scheduler *sched = sched_job->sched; struct nouveau_job *job = to_nouveau_job(sched_job); - enum drm_gpu_sched_stat stat = DRM_GPU_SCHED_STAT_NOMINAL; + enum drm_gpu_sched_stat stat = DRM_GPU_SCHED_STAT_RESET; drm_sched_stop(sched, sched_job); diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index 5657106c2f7d0a0ca6162850767f58f3200cce13..afcffe7f8fe9e11f84e4ab7e8f5a72f7bf583690 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -755,7 +755,7 @@ static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job * spurious. Bail out. */ if (dma_fence_is_signaled(job->done_fence)) - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; /* * Panfrost IRQ handler may take a long time to process an interrupt @@ -770,7 +770,7 @@ static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job if (dma_fence_is_signaled(job->done_fence)) { dev_warn(pfdev->dev, "unexpectedly high interrupt latency\n"); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p", @@ -786,7 +786,7 @@ static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job atomic_set(&pfdev->reset.pending, 1); panfrost_reset(pfdev, sched_job); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static void panfrost_reset_work(struct work_struct *work) diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index ed3712f8d6a9136b99c49d687ab84918959edd95..4140f697ba5af5769492d3bbb378e18aec8ade98 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -2259,7 +2259,7 @@ static enum drm_gpu_sched_stat panthor_vm_bind_timedout_job(struct drm_sched_job *sched_job) { WARN(1, "VM_BIND ops are synchronous for now, there should be no timeout!"); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static const struct drm_sched_backend_ops panthor_vm_bind_ops = { diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index a2248f692a030c1c84869b9a1948ad1cb0c0b490..8f17394cc82aad9eaf01e473cd9d3dea46fa3d61 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -3241,7 +3241,7 @@ queue_timedout_job(struct drm_sched_job *sched_job) queue_start(queue); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static void queue_free_job(struct drm_sched_job *sched_job) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index d94270cf8bf5c8d7b8ddb17db12118a4c55ca43c..4f2593f0c7d98a20a5d22f05e31c6929ec1cd8f9 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -527,7 +527,7 @@ static void drm_sched_job_timedout(struct work_struct *work) { struct drm_gpu_scheduler *sched; struct drm_sched_job *job; - enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_NOMINAL; + enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_RESET; sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work); diff --git a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c index 0d1d57213e058b1c0db1fbf308723216ce80bef3..ad96e8db13c414cd773654397b34f83943c15d74 100644 --- a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c +++ b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c @@ -231,7 +231,7 @@ mock_sched_timedout_job(struct drm_sched_job *sched_job) drm_sched_job_cleanup(sched_job); /* Mock job itself is freed by the kunit framework. */ - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static void mock_sched_free_job(struct drm_sched_job *sched_job) diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 35f131a46d0701cc8040d3b9654595a2bc260eab..e2b7f24d528e773968daea0f5b31c869584bb692 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -741,7 +741,7 @@ v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job) mutex_unlock(&v3d->reset_lock); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static void @@ -773,7 +773,7 @@ v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q, *timedout_ctra = ctra; v3d_sched_skip_reset(sched_job); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } return v3d_gpu_reset_for_timeout(v3d, sched_job); @@ -819,7 +819,7 @@ v3d_csd_job_timedout(struct drm_sched_job *sched_job) job->timedout_batches = batches; v3d_sched_skip_reset(sched_job); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } return v3d_gpu_reset_for_timeout(v3d, sched_job); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index df7a5a4eec74cc97c9934fb3ad383a2bae1d9a3d..ca2344b538c30b26542742ae9ea145836c940bbf 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1096,7 +1096,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) xe_sched_add_pending_job(sched, job); xe_sched_submission_start(sched); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } /* Kill the run_job entry point */ @@ -1265,7 +1265,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) /* Start fence signaling */ xe_hw_fence_irq_start(q->fence_irq); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; sched_enable: enable_scheduling(q); @@ -1278,7 +1278,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) xe_sched_add_pending_job(sched, job); xe_sched_submission_start(sched); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static void __guc_exec_queue_fini_async(struct work_struct *w) diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 190844370f48aee6ac9734ddfad65c8718f0a52a..ed300920996a8d830af5a4c011173ae4c25919dc 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -391,12 +391,12 @@ struct drm_sched_job { * enum drm_gpu_sched_stat - the scheduler's status * * @DRM_GPU_SCHED_STAT_NONE: Reserved. Do not use. - * @DRM_GPU_SCHED_STAT_NOMINAL: Operation succeeded. + * @DRM_GPU_SCHED_STAT_RESET: The GPU hung and successfully reset. * @DRM_GPU_SCHED_STAT_ENODEV: Error: Device is not available anymore. */ enum drm_gpu_sched_stat { DRM_GPU_SCHED_STAT_NONE, - DRM_GPU_SCHED_STAT_NOMINAL, + DRM_GPU_SCHED_STAT_RESET, DRM_GPU_SCHED_STAT_ENODEV, }; -- 2.50.0