panfrost: Don't reset the GPU on job faults unless we really have to

Steven Price Mon, 28 Jun 2021 02:49:15 -0700

On 28/06/2021 08:42, Boris Brezillon wrote:
> If we can recover from a fault without a reset there's no reason to
> issue one.
> 
> v3:
> * Drop the mention of Valhall requiring a reset on JOB_BUS_FAULT
> * Set the fence error to -EINVAL instead of having per-exception
>   error codes
> 
> Signed-off-by: Boris Brezillon <boris.brezil...@collabora.com>


Reviewed-by: Steven Price <steven.pr...@arm.com>

> ---
>  drivers/gpu/drm/panfrost/panfrost_device.c |  9 +++++++++
>  drivers/gpu/drm/panfrost/panfrost_device.h |  2 ++
>  drivers/gpu/drm/panfrost/panfrost_job.c    | 16 ++++++++++++++--
>  3 files changed, 25 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c 
> b/drivers/gpu/drm/panfrost/panfrost_device.c
> index 736854542b05..f4e42009526d 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_device.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_device.c
> @@ -379,6 +379,15 @@ const char *panfrost_exception_name(u32 exception_code)
>       return panfrost_exception_infos[exception_code].name;
>  }
>  
> +bool panfrost_exception_needs_reset(const struct panfrost_device *pfdev,
> +                                 u32 exception_code)
> +{
> +     /* Right now, none of the GPU we support need a reset, but this
> +      * might change.
> +      */
> +     return false;
> +}
> +
>  void panfrost_device_reset(struct panfrost_device *pfdev)
>  {
>       panfrost_gpu_soft_reset(pfdev);
> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h 
> b/drivers/gpu/drm/panfrost/panfrost_device.h
> index 2dc8c0d1d987..d91f71366214 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_device.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_device.h
> @@ -244,6 +244,8 @@ enum drm_panfrost_exception_type {
>  };
>  
>  const char *panfrost_exception_name(u32 exception_code);
> +bool panfrost_exception_needs_reset(const struct panfrost_device *pfdev,
> +                                 u32 exception_code);
>  
>  static inline void
>  panfrost_device_schedule_reset(struct panfrost_device *pfdev)
> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c 
> b/drivers/gpu/drm/panfrost/panfrost_job.c
> index 4bd4d11377b7..b0f4857ca084 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> @@ -498,14 +498,26 @@ static void panfrost_job_handle_irq(struct 
> panfrost_device *pfdev, u32 status)
>               job_write(pfdev, JOB_INT_CLEAR, mask);
>  
>               if (status & JOB_INT_MASK_ERR(j)) {
> +                     u32 js_status = job_read(pfdev, JS_STATUS(j));
> +
>                       job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP);
>  
>                       dev_err(pfdev->dev, "js fault, js=%d, status=%s, 
> head=0x%x, tail=0x%x",
>                               j,
> -                             panfrost_exception_name(job_read(pfdev, 
> JS_STATUS(j))),
> +                             panfrost_exception_name(js_status),
>                               job_read(pfdev, JS_HEAD_LO(j)),
>                               job_read(pfdev, JS_TAIL_LO(j)));
> -                     drm_sched_fault(&pfdev->js->queue[j].sched);
> +
> +                     /* If we need a reset, signal it to the timeout
> +                      * handler, otherwise, update the fence error field and
> +                      * signal the job fence.
> +                      */
> +                     if (panfrost_exception_needs_reset(pfdev, js_status)) {
> +                             drm_sched_fault(&pfdev->js->queue[j].sched);
> +                     } else {
> +                             dma_fence_set_error(pfdev->jobs[j]->done_fence, 
> -EINVAL);
> +                             status |= JOB_INT_MASK_DONE(j);
> +                     }
>               }
>  
>               if (status & JOB_INT_MASK_DONE(j)) {
>

Re: [PATCH v4 12/14] drm/panfrost: Don't reset the GPU on job faults unless we really have to

Reply via email to