The V3D scheduler wants a timed-out job to continue running if it made
progress.  However, the current DRM scheduler removes the timed-out job
from ring_mirror_list and thus the timer is not restarted automatically,
resulting in an infinite timeout.  We need stop and restart the DRM
scheduler to rearm the timer.

Fixes: 135517d3565b ("drm/scheduler: Avoid accessing freed bad job.")
Signed-off-by: Yukimasa Sugizaki <ys...@idein.jp>
---
 drivers/gpu/drm/v3d/v3d_sched.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 001216f22017..feef0c749e68 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -312,9 +312,24 @@ v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum 
v3d_queue q,
        u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q));
        u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q));

+       /* If we've made progress, skip reset and let the timer get
+        * rearmed.
+        */
        if (*timedout_ctca != ctca || *timedout_ctra != ctra) {
                *timedout_ctca = ctca;
                *timedout_ctra = ctra;
+
+               /* Because the timed-out job has been removed from
+                * ring_mirror_list in drm_sched_job_timedout(), we need to
+                * stop and restart the scheduler to rearm the timer.
+                * Holding the reset_lock is necessary for concurrent
+                * v3d_gpu_reset_for_timeout().
+                */
+               mutex_lock(&v3d->reset_lock);
+               drm_sched_stop(sched_job->sched, sched_job);
+               drm_sched_start(sched_job->sched, sched_job);
+               mutex_unlock(&v3d->reset_lock);
+
                return;
        }

@@ -359,6 +374,18 @@ v3d_csd_job_timedout(struct drm_sched_job *sched_job)
         */
        if (job->timedout_batches != batches) {
                job->timedout_batches = batches;
+
+               /* Because the timed-out job has been removed from
+                * ring_mirror_list in drm_sched_job_timedout(), we need to
+                * stop and restart the scheduler to rearm the timer.
+                * Holding the reset_lock is necessary for concurrent
+                * v3d_gpu_reset_for_timeout().
+                */
+               mutex_lock(&v3d->reset_lock);
+               drm_sched_stop(sched_job->sched, sched_job);
+               drm_sched_start(sched_job->sched, sched_job);
+               mutex_unlock(&v3d->reset_lock);
+
                return;
        }

--
2.7.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

Reply via email to