spin_unlock() function enables irqs regardless of their state
before spin_lock() was called. This might result in an interrupt
while holding a lock further down in the execution, as seen in
GitLab issue #13399.

Try to remedy the problem by saving irq state before spin lock
acquisition.

v2: add irqs' state save/restore calls to all locks/unlocks in
 signal_irq_work() execution (Maciej)

Signed-off-by: Krzysztof Karas <krzysztof.ka...@intel.com>
---
This issue is hit rarely on CI and I was not able to reproduce
it locally. There might be more places where we should save and
restore irq state, so I am not adding "Closes" label for the
issue yet.

 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c   | 21 ++++++++++++-------
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  5 +++--
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c 
b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index cc866773ba6f..dd5542726b41 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -53,13 +53,15 @@ static void __intel_breadcrumbs_arm_irq(struct 
intel_breadcrumbs *b)
 
 static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 {
+       unsigned long flags;
+
        if (!b->irq_engine)
                return;
 
-       spin_lock(&b->irq_lock);
+       spin_lock_irqsave(&b->irq_lock, flags);
        if (!b->irq_armed)
                __intel_breadcrumbs_arm_irq(b);
-       spin_unlock(&b->irq_lock);
+       spin_unlock_irqrestore(&b->irq_lock, flags);
 }
 
 static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
@@ -76,10 +78,12 @@ static void __intel_breadcrumbs_disarm_irq(struct 
intel_breadcrumbs *b)
 
 static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
 {
-       spin_lock(&b->irq_lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&b->irq_lock, flags);
        if (b->irq_armed)
                __intel_breadcrumbs_disarm_irq(b);
-       spin_unlock(&b->irq_lock);
+       spin_unlock_irqrestore(&b->irq_lock, flags);
 }
 
 static void add_signaling_context(struct intel_breadcrumbs *b,
@@ -173,6 +177,7 @@ static void signal_irq_work(struct irq_work *work)
        const ktime_t timestamp = ktime_get();
        struct llist_node *signal, *sn;
        struct intel_context *ce;
+       unsigned long flags;
 
        signal = NULL;
        if (unlikely(!llist_empty(&b->signaled_requests)))
@@ -226,10 +231,10 @@ static void signal_irq_work(struct irq_work *work)
                         * spinlock as the callback chain may end up adding
                         * more signalers to the same context or engine.
                         */
-                       spin_lock(&ce->signal_lock);
+                       spin_lock_irqsave(&ce->signal_lock, flags);
                        list_del_rcu(&rq->signal_link);
                        release = remove_signaling_context(b, ce);
-                       spin_unlock(&ce->signal_lock);
+                       spin_unlock_irqrestore(&ce->signal_lock, flags);
                        if (release) {
                                if (intel_timeline_is_last(ce->timeline, rq))
                                        add_retire(b, ce->timeline);
@@ -254,11 +259,11 @@ static void signal_irq_work(struct irq_work *work)
                if (rq->engine->sched_engine->retire_inflight_request_prio)
                        
rq->engine->sched_engine->retire_inflight_request_prio(rq);
 
-               spin_lock(&rq->lock);
+               spin_lock_irqsave(&rq->lock, flags);
                list_replace(&rq->fence.cb_list, &cb_list);
                __dma_fence_signal__timestamp(&rq->fence, timestamp);
                __dma_fence_signal__notify(&rq->fence, &cb_list);
-               spin_unlock(&rq->lock);
+               spin_unlock_irqrestore(&rq->lock, flags);
 
                i915_request_put(rq);
        }
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 12f1ba7ca9c1..e9102f7246f5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -4338,10 +4338,11 @@ static void guc_bump_inflight_request_prio(struct 
i915_request *rq,
 static void guc_retire_inflight_request_prio(struct i915_request *rq)
 {
        struct intel_context *ce = request_to_scheduling_context(rq);
+       unsigned long flags;
 
-       spin_lock(&ce->guc_state.lock);
+       spin_lock_irqsave(&ce->guc_state.lock, flags);
        guc_prio_fini(rq, ce);
-       spin_unlock(&ce->guc_state.lock);
+       spin_unlock_irqrestore(&ce->guc_state.lock, flags);
 }
 
 static void sanitize_hwsp(struct intel_engine_cs *engine)
-- 
2.34.1

Reply via email to