On 1 Nov 2022 15:33:10 -0700 Rob Clark <robdcl...@chromium.org>
> --- a/drivers/gpu/drm/msm/msm_gpu.c
> +++ b/drivers/gpu/drm/msm/msm_gpu.c
> @@ -500,6 +500,21 @@ static void hangcheck_timer_reset(struct msm_gpu *gpu)
>                       round_jiffies_up(jiffies + 
> msecs_to_jiffies(priv->hangcheck_period)));
>  }
>  
> +static bool made_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
> +{
> +     if (ring->hangcheck_progress_retries >= 
> DRM_MSM_HANGCHECK_PROGRESS_RETRIES)
> +             return false;
> +
> +     if (!gpu->funcs->progress)
> +             return false;

Retry can not make difference without the progress callback provided.

> +
> +     if (!gpu->funcs->progress(gpu, ring))
> +             return false;
> +
> +     ring->hangcheck_progress_retries++;
> +     return true;
> +}
> +
>  static void hangcheck_handler(struct timer_list *t)
>  {
>       struct msm_gpu *gpu = from_timer(gpu, t, hangcheck_timer);
> @@ -511,9 +526,12 @@ static void hangcheck_handler(struct timer_list *t)
>       if (fence != ring->hangcheck_fence) {
>               /* some progress has been made.. ya! */
>               ring->hangcheck_fence = fence;
> -     } else if (fence_before(fence, ring->fctx->last_fence)) {
> +             ring->hangcheck_progress_retries = 0;
> +     } else if (fence_before(fence, ring->fctx->last_fence) &&
> +                     !made_progress(gpu, ring)) {
>               /* no progress and not done.. hung! */
>               ring->hangcheck_fence = fence;
> +             ring->hangcheck_progress_retries = 0;
>               DRM_DEV_ERROR(dev->dev, "%s: hangcheck detected gpu lockup rb 
> %d!\n",
>                               gpu->name, ring->id);
>               DRM_DEV_ERROR(dev->dev, "%s:     completed fence: %u\n",

Cutting DRM_MSM_HANGCHECK_DEFAULT_PERIOD down to 250ms leads to report of
false hang detected in case of no ->progress implemented.

> diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
> index 585fd9c8d45a..d8f355e9f0b2 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.h
> +++ b/drivers/gpu/drm/msm/msm_gpu.h
> @@ -78,6 +78,8 @@ struct msm_gpu_funcs {
>       struct msm_gem_address_space *(*create_private_address_space)
>               (struct msm_gpu *gpu);
>       uint32_t (*get_rptr)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
> +
> +     bool (*progress)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
>  };
>  
>  /* Additional state for iommu faults: */
> @@ -236,7 +238,8 @@ struct msm_gpu {
>        */
>  #define DRM_MSM_INACTIVE_PERIOD   66 /* in ms (roughly four frames) */
>  
> -#define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 500 /* in ms */
> +#define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 250 /* in ms */
> +#define DRM_MSM_HANGCHECK_PROGRESS_RETRIES 3

Reply via email to