msm: preemption aware hangcheck

Akhil P Oommen Wed, 17 Sep 2025 12:28:18 -0700

On 9/11/2025 10:31 PM, Anna Maniscalco wrote:
> Rework hangcheck code to work well toghether with preemption.
> 
> Track the time a job has spent in a ring by storing timestamps of the
> `CP_ALWAYS_ON_CONTEXT` register at the beginning of a job and when
> switching rings as well as reading it back if the ring is currently
> active.
> 
> Signed-off-by: Anna Maniscalco <anna.maniscalco2...@gmail.com>
> ---
>  drivers/gpu/drm/msm/adreno/a5xx_gpu.c     |  3 +-
>  drivers/gpu/drm/msm/adreno/a6xx_gmu.c     |  3 +-
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c     | 28 +++++++++++++++--
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.h     |  1 +
>  drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 25 +++++++++++----
>  drivers/gpu/drm/msm/adreno/adreno_gpu.c   |  3 +-
>  drivers/gpu/drm/msm/msm_gpu.c             | 51 
> +++++++++++++++++++++++++------
>  drivers/gpu/drm/msm/msm_gpu.h             |  3 ++
>  drivers/gpu/drm/msm/msm_ringbuffer.h      |  6 ++++
>  9 files changed, 102 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c 
> b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> index 
> 4a04dc43a8e6764a113d0ade3dee94bd4c0083af..cb4775a35da0706e571eb27ce617044de84ca118
>  100644
> --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> @@ -1255,7 +1255,8 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
>               gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
>  
>       /* Turn off the hangcheck timer to keep it from bothering us */
> -     timer_delete(&gpu->hangcheck_timer);
> +     for (int i = 0; i < gpu->nr_rings; i++)
> +             timer_delete(&gpu->rb[i]->hangcheck_timer);
>  
>       kthread_queue_work(gpu->worker, &gpu->recover_work);
>  }
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c 
> b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> index 
> fc62fef2fed87f065cb8fa4e997abefe4ff11cd5..103c19fa8669f06a6c1627ced1daf2bcd60415db
>  100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> @@ -28,7 +28,8 @@ static void a6xx_gmu_fault(struct a6xx_gmu *gmu)
>       gmu->hung = true;
>  
>       /* Turn off the hangcheck timer while we are resetting */
> -     timer_delete(&gpu->hangcheck_timer);
> +     for (int i = 0; i < gpu->nr_rings; i++)
> +             timer_delete(&gpu->rb[i]->hangcheck_timer);
>  
>       /* Queue the GPU handler because we need to treat this as a recovery */
>       kthread_queue_work(gpu->worker, &gpu->recover_work);


I think a helper routine makes sense. We have to disable hangcheck
whenever recover_worker is queued.

> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
> b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> index 
> b8f8ae940b55f5578abdbdec6bf1e90a53e721a5..7647e3dfd50db7446589e67949ed08d0a422f543
>  100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> @@ -465,6 +465,9 @@ static void a7xx_submit(struct msm_gpu *gpu, struct 
> msm_gem_submit *submit)
>       get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
>               rbmemptr_stats(ring, index, alwayson_start));
>  
> +     get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_CONTEXT,
> +             rbmemptr(ring, last_job_start_ctx));
> +
>       OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
>       OUT_RING(ring, CP_SET_THREAD_BOTH);
>  
> @@ -1816,7 +1819,8 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
>               gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE));
>  
>       /* Turn off the hangcheck timer to keep it from bothering us */
> -     timer_delete(&gpu->hangcheck_timer);
> +     for (int i = 0; i < gpu->nr_rings; i++)
> +             timer_delete(&gpu->rb[i]->hangcheck_timer);
>  
>       /* Turn off interrupts to avoid triggering recovery again */
>       gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, 0);
> @@ -1839,7 +1843,8 @@ static void a7xx_sw_fuse_violation_irq(struct msm_gpu 
> *gpu)
>        */
>       if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING |
>                     A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) {
> -             timer_delete(&gpu->hangcheck_timer);
> +             for (int i = 0; i < gpu->nr_rings; i++)
> +                     timer_delete(&gpu->rb[i]->hangcheck_timer);
>  
>               kthread_queue_work(gpu->worker, &gpu->recover_work);
>       }
> @@ -2327,6 +2332,22 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, 
> uint64_t *value)
>       return 0;
>  }
>  
> +static int a6xx_get_ctx_timestamp(struct msm_ringbuffer *ring, uint64_t 
> *value)
> +{
> +     struct msm_gpu *gpu = ring->gpu;
> +     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> +     struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
> +
> +     guard(spinlock_irqsave)(&a6xx_gpu->eval_lock);

Is eval_lock initialized anywhere? Also why do we need this?

> +
> +     if (a6xx_in_preempt(a6xx_gpu) || ring != a6xx_gpu->cur_ring)
This will race with preemption. I think we should wrap the preempt state
check and the regread under the preempt lock.

> +             return -EBUSY;
> +
> +     *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_CONTEXT);With IFPC, we 
> cannot access a GX domain register (CP, RBBM etc) unless
we are certain that the GX is powered up. Could you please test this
series along with the IFPC series? If we hit the right timing, there
should be a GMU fence error in the dmesg. Not sure how easy it is to hit
that timing, but I believe there is a problem here conceptually.

> +
> +     return 0;
> +}
> +
>  static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu)
>  {
>       struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> @@ -2555,6 +2576,7 @@ static const struct adreno_gpu_funcs funcs = {
>               .get_rptr = a6xx_get_rptr,
>               .progress = a6xx_progress,
>               .sysprof_setup = a6xx_gmu_sysprof_setup,
> +             .get_ctx_timestamp = a6xx_get_ctx_timestamp,
>       },
>       .get_timestamp = a6xx_gmu_get_timestamp,
>  };
> @@ -2584,6 +2606,7 @@ static const struct adreno_gpu_funcs funcs_gmuwrapper = 
> {
>               .create_private_vm = a6xx_create_private_vm,
>               .get_rptr = a6xx_get_rptr,
>               .progress = a6xx_progress,
> +             .get_ctx_timestamp = a6xx_get_ctx_timestamp,
>       },
>       .get_timestamp = a6xx_get_timestamp,
>  };
> @@ -2616,6 +2639,7 @@ static const struct adreno_gpu_funcs funcs_a7xx = {
>               .get_rptr = a6xx_get_rptr,
>               .progress = a6xx_progress,
>               .sysprof_setup = a6xx_gmu_sysprof_setup,
> +             .get_ctx_timestamp = a6xx_get_ctx_timestamp,
>       },
>       .get_timestamp = a6xx_gmu_get_timestamp,
>  };
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h 
> b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> index 
> 0b17d36c36a9567e6afa4269ae7783ed3578e40e..7248d3d38c6d8a06cb4a536043bf4877179447cc
>  100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> @@ -80,6 +80,7 @@ struct a6xx_gpu {
>       struct drm_gem_object *preempt_postamble_bo;
>       void *preempt_postamble_ptr;
>       uint64_t preempt_postamble_iova;
> +     uint64_t preempt_postamble_cntreset_end;
>       uint64_t preempt_postamble_len;
>       bool postamble_enabled;
>  
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c 
> b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
> index 
> afc5f4aa3b17334027f3c20072cc3f059a9733b7..88a65549fa8038d4836eb5aeaea775d679415315
>  100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
> @@ -99,11 +99,15 @@ static void a6xx_preempt_timer(struct timer_list *t)
>       kthread_queue_work(gpu->worker, &gpu->recover_work);
>  }
>  
> -static void preempt_prepare_postamble(struct a6xx_gpu *a6xx_gpu)
> +static void preempt_prepare_postamble(struct a6xx_gpu *a6xx_gpu, struct 
> msm_ringbuffer *ring)
>  {
>       u32 *postamble = a6xx_gpu->preempt_postamble_ptr;
> +     uint64_t last_active_ctxcycles;
>       u32 count = 0;
>  
> +     if (ring)
> +             last_active_ctxcycles = rbmemptr(ring, last_active_ctxcycles);
> +
>       postamble[count++] = PKT7(CP_REG_RMW, 3);
>       postamble[count++] = REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD;
>       postamble[count++] = 0;
> @@ -118,6 +122,15 @@ static void preempt_prepare_postamble(struct a6xx_gpu 
> *a6xx_gpu)
>       postamble[count++] = CP_WAIT_REG_MEM_4_MASK(0x1);
>       postamble[count++] = CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0);
>  
> +     a6xx_gpu->preempt_postamble_cntreset_end = count;
> +
> +     postamble[count++] = PKT7(ring ? CP_REG_TO_MEM : CP_NOP, 3);
> +     postamble[count++] = CP_REG_TO_MEM_0_REG(REG_A6XX_CP_ALWAYS_ON_CONTEXT) 
> |
> +             CP_REG_TO_MEM_0_CNT(2) |
> +             CP_REG_TO_MEM_0_64B;
> +     postamble[count++] = lower_32_bits(last_active_ctxcycles);
> +     postamble[count++] = upper_32_bits(last_active_ctxcycles);
> +
>       a6xx_gpu->preempt_postamble_len = count;
>  
>       a6xx_gpu->postamble_enabled = true;
> @@ -129,9 +142,9 @@ static void preempt_disable_postamble(struct a6xx_gpu 
> *a6xx_gpu)
>  
>       /*
>        * Disable the postamble by replacing the first packet header with a NOP
> -      * that covers the whole buffer.
> +      * that skips the counters reset part.
>        */
> -     *postamble = PKT7(CP_NOP, (a6xx_gpu->preempt_postamble_len - 1));
> +     *postamble = PKT7(CP_NOP, (a6xx_gpu->preempt_postamble_cntreset_end - 
> 1));
>  
>       a6xx_gpu->postamble_enabled = false;
>  }
> @@ -338,8 +351,8 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu)
>       /* Enable or disable postamble as needed */
>       sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1;
>  
> -     if (!sysprof && !a6xx_gpu->postamble_enabled)
> -             preempt_prepare_postamble(a6xx_gpu);
> +     if (!sysprof)
> +             preempt_prepare_postamble(a6xx_gpu, ring);
>  
>       if (sysprof && a6xx_gpu->postamble_enabled)
>               preempt_disable_postamble(a6xx_gpu);
> @@ -454,7 +467,7 @@ void a6xx_preempt_init(struct msm_gpu *gpu)
>                       gpu->vm, &a6xx_gpu->preempt_postamble_bo,
>                       &a6xx_gpu->preempt_postamble_iova);
>  
> -     preempt_prepare_postamble(a6xx_gpu);
> +     preempt_prepare_postamble(a6xx_gpu, NULL);
>  
>       if (IS_ERR(a6xx_gpu->preempt_postamble_ptr))
>               goto fail;
> diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
> b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> index 
> afaa3cfefd357dc0230994c8b5830a14c6d7a352..58f1e2a95bbfb00feb5a3bb91853e6bb533ec631
>  100644
> --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> @@ -334,7 +334,8 @@ int adreno_fault_handler(struct msm_gpu *gpu, unsigned 
> long iova, int flags,
>               struct msm_gpu_fault_info fault_info = {};
>  
>               /* Turn off the hangcheck timer to keep it from bothering us */
> -             timer_delete(&gpu->hangcheck_timer);
> +             for (int i = 0; i < gpu->nr_rings; i++)
> +                     timer_delete(&gpu->rb[i]->hangcheck_timer);
>  
>               fault_info.ttbr0 = info->ttbr0;
>               fault_info.iova  = iova;
> diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
> index 
> 17759abc46d7d7af4117b1d71f1d5fba6ba0b61c..a3c5073aca1f65e450e0673262e8ca4bc7a5be6f
>  100644
> --- a/drivers/gpu/drm/msm/msm_gpu.c
> +++ b/drivers/gpu/drm/msm/msm_gpu.c
> @@ -463,7 +463,9 @@ static void recover_worker(struct kthread_work *work)
>       struct drm_device *dev = gpu->dev;
>       struct msm_drm_private *priv = dev->dev_private;
>       struct msm_gem_submit *submit;
> -     struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu);
> +     struct msm_ringbuffer *cur_ring = gpu->hung_ring ?
> +             gpu->hung_ring : gpu->funcs->active_ring(gpu);
> +     gpu->hung_ring = NULL;
>       char *comm = NULL, *cmd = NULL;
>       struct task_struct *task;
>       int i;
> @@ -613,11 +615,17 @@ void msm_gpu_fault_crashstate_capture(struct msm_gpu 
> *gpu, struct msm_gpu_fault_
>       mutex_unlock(&gpu->lock);
>  }
>  
> -static void hangcheck_timer_reset(struct msm_gpu *gpu)
> +static void hangcheck_ring_timer_reset(struct msm_gpu *gpu, struct 
> msm_ringbuffer *ring)
>  {
>       struct msm_drm_private *priv = gpu->dev->dev_private;
> -     mod_timer(&gpu->hangcheck_timer,
> -                     round_jiffies_up(jiffies + 
> msecs_to_jiffies(priv->hangcheck_period)));
> +     mod_timer(&ring->hangcheck_timer,
> +                       round_jiffies_up(jiffies + 
> msecs_to_jiffies(priv->hangcheck_period)));
> +}
> +
> +static void hangcheck_timer_reset(struct msm_gpu *gpu)
> +{
> +     for (int i = 0; i < gpu->nr_rings; i++)
> +             hangcheck_ring_timer_reset(gpu, gpu->rb[i]);

It triggers my OCD a bit that there are multiple timers flying around
waking up CPU clusters. But this is okay for now I guess. :)

>  }
>  
>  static bool made_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
> @@ -635,11 +643,33 @@ static bool made_progress(struct msm_gpu *gpu, struct 
> msm_ringbuffer *ring)
>       return true;
>  }
>  
> +static bool check_ring_timeout(struct msm_ringbuffer *ring, unsigned long 
> timeout)
> +{
> +     struct msm_gpu *gpu = ring->gpu;
> +     struct msm_ringbuffer *curr_ring = gpu->funcs->active_ring(gpu);
> +     u64 start, end;
> +     int ret;
> +
> +     if (!gpu->funcs->get_ctx_timestamp)
> +             return !made_progress(gpu, ring);
> +
> +     start = ring->memptrs->last_job_start_ctx;
> +
> +     if (!gpu->funcs->get_ctx_timestamp(ring, &end))
I suppose you want the other way. ie, if get_ctx_timestamp() returns
-EBUSY, which means gpu is either under preemption or in a different
ring, use the memptr data.

> +             end = ring->memptrs->last_active_ctxcycles;

Assuming my above comment is correct, if GPU is under preemption, there
is a chance that the postamble might not have got executed. So with a
stale 'end' value, the below calc may go wrong?

> +
> +     if (end >= start)
> +             return (end - start) < timeout;
> +     else
> +             return false;

In case of an infinite shader blocking preemption, wouldn't we always
return false here?

-Akhil> +}
> +
>  static void hangcheck_handler(struct timer_list *t)
>  {
> -     struct msm_gpu *gpu = timer_container_of(gpu, t, hangcheck_timer);
> +     struct msm_ringbuffer *ring = timer_container_of(ring, t, 
> hangcheck_timer);
> +     struct msm_gpu *gpu = ring->gpu;
> +     struct msm_drm_private *priv = gpu->dev->dev_private;
>       struct drm_device *dev = gpu->dev;
> -     struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
>       uint32_t fence = ring->memptrs->fence;
>  
>       if (fence != ring->hangcheck_fence) {
> @@ -647,7 +677,7 @@ static void hangcheck_handler(struct timer_list *t)
>               ring->hangcheck_fence = fence;
>               ring->hangcheck_progress_retries = 0;
>       } else if (fence_before(fence, ring->fctx->last_fence) &&
> -                     !made_progress(gpu, ring)) {
> +                     check_ring_timeout(ring, priv->hangcheck_period * 
> 192000)) {
>               /* no progress and not done.. hung! */
>               ring->hangcheck_fence = fence;
>               ring->hangcheck_progress_retries = 0;
> @@ -658,6 +688,7 @@ static void hangcheck_handler(struct timer_list *t)
>               DRM_DEV_ERROR(dev->dev, "%s:     submitted fence: %u\n",
>                               gpu->name, ring->fctx->last_fence);
>  
> +             gpu->hung_ring = ring;
>               kthread_queue_work(gpu->worker, &gpu->recover_work);
>       }
>  
> @@ -911,7 +942,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct 
> msm_gem_submit *submit)
>       submit->ring->cur_ctx_seqno = submit->queue->ctx->seqno;
>  
>       pm_runtime_put(&gpu->pdev->dev);
> -     hangcheck_timer_reset(gpu);
> +     hangcheck_ring_timer_reset(gpu, submit->ring);

Should we reset hangcheck whenever there is a submission to a random ring?

-Akhil

>  }
>  
>  /*
> @@ -1011,8 +1042,6 @@ int msm_gpu_init(struct drm_device *drm, struct 
> platform_device *pdev,
>       if (funcs->progress)
>               priv->hangcheck_period /= 2;
>  
> -     timer_setup(&gpu->hangcheck_timer, hangcheck_handler, 0);
> -
>       spin_lock_init(&gpu->perf_lock);
>  
>  
> @@ -1097,6 +1126,8 @@ int msm_gpu_init(struct drm_device *drm, struct 
> platform_device *pdev,
>                       goto fail;
>               }
>  
> +             timer_setup(&gpu->rb[i]->hangcheck_timer, hangcheck_handler, 0);
> +
>               memptrs += sizeof(struct msm_rbmemptrs);
>               memptrs_iova += sizeof(struct msm_rbmemptrs);
>       }
> diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
> index 
> a597f2bee30b6370ecc3639bfe1072c85993e789..7bf1b7f4bc4b61338bfa4c1463eb549f8c22d5c3
>  100644
> --- a/drivers/gpu/drm/msm/msm_gpu.h
> +++ b/drivers/gpu/drm/msm/msm_gpu.h
> @@ -93,6 +93,7 @@ struct msm_gpu_funcs {
>        */
>       bool (*progress)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
>       void (*sysprof_setup)(struct msm_gpu *gpu);
> +     int (*get_ctx_timestamp)(struct msm_ringbuffer *ring, uint64_t *value);
>  };
>  
>  /* Additional state for iommu faults: */
> @@ -257,6 +258,8 @@ struct msm_gpu {
>       /* work for handling GPU recovery: */
>       struct kthread_work recover_work;
>  
> +     struct msm_ringbuffer *hung_ring;
> +
>       /** retire_event: notified when submits are retired: */
>       wait_queue_head_t retire_event;
>  
> diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h 
> b/drivers/gpu/drm/msm/msm_ringbuffer.h
> index 
> d1e49f701c8176e50d2b9a5cca35acee67f75209..316247fb089f26bd657ccf8464a5039e1cd1ac45
>  100644
> --- a/drivers/gpu/drm/msm/msm_ringbuffer.h
> +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h
> @@ -37,6 +37,8 @@ struct msm_rbmemptrs {
>       volatile struct msm_gpu_submit_stats stats[MSM_GPU_SUBMIT_STATS_COUNT];
>       volatile u64 ttbr0;
>       volatile u32 context_idr;
> +     volatile u64 last_job_start_ctx;
> +     volatile u64 last_active_ctxcycles;
>  };
>  
>  struct msm_cp_state {
> @@ -73,6 +75,10 @@ struct msm_ringbuffer {
>       uint64_t memptrs_iova;
>       struct msm_fence_context *fctx;
>  
> +     /* Hang and Inactivity Detection:
> +      */
> +     struct timer_list hangcheck_timer;
> +
>       /**
>        * hangcheck_progress_retries:
>        *
>

Re: [PATCH 2/2] drm/msm: preemption aware hangcheck

Reply via email to