panfrost: Handle job HW submit errors

Boris Brezillon Mon, 02 Dec 2024 01:22:40 -0800

On Thu, 28 Nov 2024 21:06:18 +0000
Adrián Larumbe <adrian.laru...@collabora.com> wrote:


> Avoid waiting for the DRM scheduler job timedout handler, and instead, let
> the DRM scheduler core signal the error fence immediately when HW job
> submission fails.
> 
> That means we must also decrement the runtime-PM refcnt for the device,
> because the job will never be enqueued or inflight.
> 
> Signed-off-by: Adrián Larumbe <adrian.laru...@collabora.com>
> ---
>  drivers/gpu/drm/panfrost/panfrost_job.c | 20 ++++++++++++++++----
>  1 file changed, 16 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c 
> b/drivers/gpu/drm/panfrost/panfrost_job.c
> index f640d211cc3a..3f4f0682d69d 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> @@ -195,7 +195,7 @@ panfrost_enqueue_job(struct panfrost_device *pfdev, int 
> slot,
>       return 1;
>  }
>  
> -static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
> +static int panfrost_job_hw_submit(struct panfrost_job *job, int js)
>  {
>       struct panfrost_device *pfdev = job->pfdev;
>       unsigned int subslot;
> @@ -207,10 +207,11 @@ static void panfrost_job_hw_submit(struct panfrost_job 
> *job, int js)
>  
>       ret = pm_runtime_get_sync(pfdev->base.dev);
>       if (ret < 0)
> -             return;
> +             goto err_hwsubmit;
>  
>       if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js)))) {
> -             return;
> +             ret = -EINVAL;
> +             goto err_hwsubmit;
>       }
>  
>       cfg = panfrost_mmu_as_get(pfdev, job->mmu);
> @@ -261,6 +262,12 @@ static void panfrost_job_hw_submit(struct panfrost_job 
> *job, int js)
>                       job, js, subslot, jc_head, cfg & 0xf);
>       }
>       spin_unlock(&pfdev->js->job_lock);
> +
> +     return 0;
> +
> +err_hwsubmit:
> +     pm_runtime_put_autosuspend(pfdev->base.dev);
> +     return ret;
>  }
>  
>  static int panfrost_acquire_object_fences(struct drm_gem_object **bos,
> @@ -382,6 +389,7 @@ static struct dma_fence *panfrost_job_run(struct 
> drm_sched_job *sched_job)
>       struct panfrost_device *pfdev = job->pfdev;
>       int slot = panfrost_job_get_slot(job);
>       struct dma_fence *fence = NULL;
> +     int ret;
>  
>       if (unlikely(job->base.s_fence->finished.error))
>               return NULL;
> @@ -400,7 +408,11 @@ static struct dma_fence *panfrost_job_run(struct 
> drm_sched_job *sched_job)
>               dma_fence_put(job->done_fence);
>       job->done_fence = dma_fence_get(fence);
>  
> -     panfrost_job_hw_submit(job, slot);
> +     ret = panfrost_job_hw_submit(job, slot);
> +     if (ret) {
> +             dma_fence_put(job->done_fence);

If you call dma_fence_put() here, you need to set job->done_fence to
NULL, otherwise dma_fence_put() will be called again on an already
freed fence in panfrost_job_cleanup(). Question is, do we really need
to call dma_fence_put(job->done_fence) here? Can't we let the job
destructor take care of that?

> +             return ERR_PTR(ret);
> +     }
>  
>       return fence;
>  }

Re: [PATCH v2 3/8] drm/panfrost: Handle job HW submit errors

Reply via email to