Hi Christian,

Can you please review this patch? It is a continuation of the discussion at [1].
At first I was thinking of using a cancel for suspend instead of a mod(to an
arbitrarily large value), but I couldn't get it to fit in as I have an 
additional
constraint of being able to call these functions from an IRQ context.

These new functions race with other contexts, primarily finish_job(),
timedout_job() and recovery(), but I did go through the possible races between
these(I think). Please let me know what you think of this? I have not tested
this yet and if this is something in the right direction, I will put this
through my testing drill and polish it.

IMO I think I prefer the callback approach as it appears to be simple, less
error prone for both the scheduler and the drivers.

[1]  https://patchwork.freedesktop.org/patch/259914/

Signed-off-by: Sharat Masetty <smase...@codeaurora.org>
---
 drivers/gpu/drm/scheduler/sched_main.c | 81 +++++++++++++++++++++++++++++++++-
 include/drm/gpu_scheduler.h            |  5 +++
 2 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index c993d10..9645789 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -191,11 +191,84 @@ bool drm_sched_dependency_optimized(struct dma_fence* 
fence,
  */
 static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
 {
+       unsigned long flags;
+
+       spin_lock_irqsave(&sched->tdr_suspend_lock, flags);
+
+       sched->timeout_remaining = sched->timeout;
+
        if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
-           !list_empty(&sched->ring_mirror_list))
+           !list_empty(&sched->ring_mirror_list) && !sched->work_tdr_suspended)
                schedule_delayed_work(&sched->work_tdr, sched->timeout);
+
+       spin_unlock_irqrestore(&sched->tdr_suspend_lock, flags);
 }

+/**
+ * drm_sched_suspend_timeout - suspend timeout for reset worker
+ *
+ * @sched: scheduler instance for which to suspend the timeout
+ *
+ * Suspend the delayed work timeout for the scheduler. Note that
+ * this function can be called from an IRQ context.
+ */
+void drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
+{
+       unsigned long flags, timeout;
+
+       spin_lock_irqsave(&sched->tdr_suspend_lock, flags);
+
+       if (sched->work_tdr_suspended ||
+                       sched->timeout == MAX_SCHEDULE_TIMEOUT ||
+                       list_empty(&sched->ring_mirror_list))
+               goto done;
+
+       timeout = sched->work_tdr.timer.expires;
+
+       /*
+        * Reset timeout to an arbitrarily large value
+        */
+       mod_delayed_work(system_wq, &sched->work_tdr, sched->timeout * 10);
+
+       timeout -= jiffies;
+
+       /* FIXME: Can jiffies be after timeout? */
+       sched->timeout_remaining = time_after(jiffies, timeout)? 0: timeout;
+       sched->work_tdr_suspended = true;
+
+done:
+       spin_unlock_irqrestore(&sched->tdr_suspend_lock, flags);
+}
+EXPORT_SYMBOL(drm_sched_suspend_timeout);
+
+/**
+ * drm_sched_resume_timeout - resume timeout for reset worker
+ *
+ * @sched: scheduler instance for which to resume the timeout
+ *
+ * Resume the delayed work timeout for the scheduler. Note that
+ * this function can be called from an IRQ context.
+ */
+void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&sched->tdr_suspend_lock, flags);
+
+       if (!sched->work_tdr_suspended ||
+                       sched->timeout == MAX_SCHEDULE_TIMEOUT) {
+               spin_unlock_irqrestore(&sched->tdr_suspend_lock, flags);
+               return;
+       }
+
+       mod_delayed_work(system_wq, &sched->work_tdr, sched->timeout_remaining);
+
+       sched->work_tdr_suspended = false;
+
+       spin_unlock_irqrestore(&sched->tdr_suspend_lock, flags);
+}
+EXPORT_SYMBOL(drm_sched_resume_timeout);
+
 /* job_finish is called after hw fence signaled
  */
 static void drm_sched_job_finish(struct work_struct *work)
@@ -348,6 +421,11 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler 
*sched)
        struct drm_sched_job *s_job, *tmp;
        bool found_guilty = false;
        int r;
+       unsigned long flags;
+
+       spin_lock_irqsave(&sched->tdr_suspend_lock, flags);
+       sched->work_tdr_suspended = false;
+       spin_unlock_irqrestore(&sched->tdr_suspend_lock, flags);

        spin_lock(&sched->job_list_lock);
        list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
@@ -607,6 +685,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
        init_waitqueue_head(&sched->job_scheduled);
        INIT_LIST_HEAD(&sched->ring_mirror_list);
        spin_lock_init(&sched->job_list_lock);
+       spin_lock_init(&sched->tdr_suspend_lock);
        atomic_set(&sched->hw_rq_count, 0);
        INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
        atomic_set(&sched->num_jobs, 0);
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index d87b268..5d39572 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -278,6 +278,9 @@ struct drm_gpu_scheduler {
        atomic_t                        hw_rq_count;
        atomic64_t                      job_id_count;
        struct delayed_work             work_tdr;
+       unsigned long                   timeout_remaining;
+       bool                            work_tdr_suspended;
+       spinlock_t                      tdr_suspend_lock;
        struct task_struct              *thread;
        struct list_head                ring_mirror_list;
        spinlock_t                      job_list_lock;
@@ -300,6 +303,8 @@ void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched,
 bool drm_sched_dependency_optimized(struct dma_fence* fence,
                                    struct drm_sched_entity *entity);
 void drm_sched_job_kickout(struct drm_sched_job *s_job);
+void drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched);
+void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched);

 void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
                             struct drm_sched_entity *entity);
--
1.9.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

Reply via email to