On Tue, 29 Jun 2021 09:35:03 +0200
Boris Brezillon <boris.brezil...@collabora.com> wrote:

> @@ -379,57 +370,72 @@ void panfrost_job_enable_interrupts(struct 
> panfrost_device *pfdev)
>       job_write(pfdev, JOB_INT_MASK, irq_mask);
>  }
>  
> -static bool panfrost_scheduler_stop(struct panfrost_queue_state *queue,
> -                                 struct drm_sched_job *bad)
> +static void panfrost_reset(struct panfrost_device *pfdev,
> +                        struct drm_sched_job *bad)
>  {
> -     enum panfrost_queue_status old_status;
> -     bool stopped = false;
> +     unsigned int i;
> +     bool cookie;
>  
> -     mutex_lock(&queue->lock);
> -     old_status = atomic_xchg(&queue->status,
> -                              PANFROST_QUEUE_STATUS_STOPPED);
> -     if (old_status == PANFROST_QUEUE_STATUS_STOPPED)
> -             goto out;
> +     if (!atomic_read(&pfdev->reset.pending))
> +             return;
> +
> +     /* Stop the schedulers.
> +      *
> +      * FIXME: We temporarily get out of the dma_fence_signalling section
> +      * because the cleanup path generate lockdep splats when taking locks
> +      * to release job resources. We should rework the code to follow this
> +      * pattern:
> +      *
> +      *      try_lock
> +      *      if (locked)
> +      *              release
> +      *      else
> +      *              schedule_work_to_release_later
> +      */
> +     for (i = 0; i < NUM_JOB_SLOTS; i++)
> +             drm_sched_stop(&pfdev->js->queue[i].sched, bad);
> +
> +     cookie = dma_fence_begin_signalling();
>  
> -     WARN_ON(old_status != PANFROST_QUEUE_STATUS_ACTIVE);
> -     drm_sched_stop(&queue->sched, bad);
>       if (bad)
>               drm_sched_increase_karma(bad);
>  
> -     stopped = true;
> +     spin_lock(&pfdev->js->job_lock);
> +     for (i = 0; i < NUM_JOB_SLOTS; i++) {
> +             if (pfdev->jobs[i]) {
> +                     pm_runtime_put_noidle(pfdev->dev);
> +                     panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
> +                     pfdev->jobs[i] = NULL;
> +             }
> +     }
> +     spin_unlock(&pfdev->js->job_lock);
>  
> -     /*
> -      * Set the timeout to max so the timer doesn't get started
> -      * when we return from the timeout handler (restored in
> -      * panfrost_scheduler_start()).
> +     panfrost_device_reset(pfdev);
> +
> +     /* GPU has been reset, we can cancel timeout/fault work that may have
> +      * been queued in the meantime and clear the reset pending bit.
>        */
> -     queue->sched.timeout = MAX_SCHEDULE_TIMEOUT;
> +     atomic_set(&pfdev->reset.pending, 0);
> +     for (i = 0; i < NUM_JOB_SLOTS; i++)
> +             cancel_delayed_work(&pfdev->js->queue[i].sched.work_tdr);
>  

Those cancel_delayed_work() calls are useless, drm_sched_stop()
canceled those works already. I'll get rid of them in v6.

Reply via email to