Now that we're able to unsubmit requests, we can take advantage of it
during reset. Rather than resubmitting the previous workload directly to
GuC/ELSP, we can simply move the requests back to priority queue,
submitting from the tasklet instead.

v2: Move the tasklet schedule out for legacy ringbuffer submission
v3: Handle allocation error in lookup rather than in caller (Chris)

Cc: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Jeff McGee <jeff.mc...@intel.com>
Cc: Michel Thierry <michel.thie...@intel.com>
Cc: Mika Kuoppala <mika.kuopp...@linux.intel.com>
Signed-off-by: Michał Winiarski <michal.winiar...@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c            |   6 +-
 drivers/gpu/drm/i915/i915_guc_submission.c |  15 +--
 drivers/gpu/drm/i915/intel_lrc.c           | 141 +++++++++++++++++------------
 drivers/gpu/drm/i915/intel_lrc.h           |   1 +
 4 files changed, 89 insertions(+), 74 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a637cc0..28e21fd 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3019,15 +3019,11 @@ static void engine_set_wedged(struct intel_engine_cs 
*engine)
         */
 
        if (i915.enable_execlists) {
-               struct execlist_port *port = engine->execlist_port;
                unsigned long flags;
-               unsigned int n;
 
                spin_lock_irqsave(&engine->timeline->lock, flags);
 
-               for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
-                       i915_gem_request_put(port_request(&port[n]));
-               memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
+               intel_lr_clear_execlist_ports(engine);
                engine->execlist_queue = RB_ROOT;
                engine->execlist_first = NULL;
 
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c 
b/drivers/gpu/drm/i915/i915_guc_submission.c
index f89718c..f6a1f6e 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -534,21 +534,20 @@ static void i915_guc_submit(struct drm_i915_gem_request 
*rq)
        unsigned int engine_id = engine->id;
        struct intel_guc *guc = &rq->i915->guc;
        struct i915_guc_client *client = guc->execbuf_client;
-       unsigned long flags;
        int b_ret;
 
        /* WA to flush out the pending GMADR writes to ring buffer. */
        if (i915_vma_is_map_and_fenceable(rq->ring->vma))
                POSTING_READ_FW(GUC_STATUS);
 
-       spin_lock_irqsave(&client->wq_lock, flags);
+       spin_lock(&client->wq_lock);
 
        guc_wq_item_append(client, rq);
        b_ret = guc_ring_doorbell(client);
 
        client->submissions[engine_id] += 1;
 
-       spin_unlock_irqrestore(&client->wq_lock, flags);
+       spin_unlock(&client->wq_lock);
 }
 
 static void nested_enable_signaling(struct drm_i915_gem_request *rq)
@@ -1189,9 +1188,6 @@ int i915_guc_submission_enable(struct drm_i915_private 
*dev_priv)
        guc_interrupts_capture(dev_priv);
 
        for_each_engine(engine, dev_priv, id) {
-               struct execlist_port *port = engine->execlist_port;
-               int n;
-
                /* The tasklet was initialised by execlists, and may be in
                 * a state of flux (across a reset) and so we just want to
                 * take over the callback without changing any other state
@@ -1199,13 +1195,6 @@ int i915_guc_submission_enable(struct drm_i915_private 
*dev_priv)
                 */
                engine->irq_tasklet.func = i915_guc_irq_handler;
                clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-
-               for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
-                       if (!port_isset(&port[n]))
-                               break;
-
-                       i915_guc_submit(port_request(&port[n]));
-               }
        }
 
        return 0;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8fc852c..356a6d2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -291,6 +291,26 @@ uint64_t intel_lr_context_descriptor(struct 
i915_gem_context *ctx,
        return ctx->engine[engine->id].lrc_desc;
 }
 
+static inline struct execlist_port *
+execlists_last_port(struct intel_engine_cs *engine)
+{
+       return &engine->execlist_port[ARRAY_SIZE(engine->execlist_port) - 1];
+}
+
+void intel_lr_clear_execlist_ports(struct intel_engine_cs *engine)
+{
+       struct execlist_port *port = engine->execlist_port;
+       struct drm_i915_gem_request *rq;
+
+       while ((rq = port_request(port))) {
+               i915_gem_request_put(rq);
+               if (port == execlists_last_port(engine))
+                       break;
+               port++;
+       }
+       memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
+}
+
 static inline void
 execlists_context_status_change(struct drm_i915_gem_request *rq,
                                unsigned long status)
@@ -952,6 +972,36 @@ static int execlists_request_alloc(struct 
drm_i915_gem_request *request)
        return 0;
 }
 
+static void intel_lr_resubmit_requests(struct intel_engine_cs *engine)
+{
+       struct i915_priolist *p = &engine->default_priolist;
+       struct drm_i915_gem_request *rq, *rq_prev;
+       struct i915_priotree *pt;
+       bool first;
+       int last_prio;
+
+       lockdep_assert_held(&engine->timeline->lock);
+
+       last_prio = INT_MIN;
+
+       list_for_each_entry_safe_reverse(rq, rq_prev,
+                                        &engine->timeline->requests, link) {
+               if (i915_gem_request_completed(rq))
+                       break;
+
+               pt = &rq->priotree;
+               if (pt->priority != last_prio)
+                       p = priolist_lookup(engine, pt->priority,
+                                           &first);
+               __i915_gem_request_unsubmit(rq);
+               trace_i915_gem_request_out(rq);
+
+               /* lifo, since we're traversing timeline in reverse */
+               list_add(&pt->link, &p->requests);
+               last_prio = pt->priority;
+       }
+}
+
 /*
  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
@@ -1220,9 +1270,6 @@ static int intel_init_workaround_bb(struct 
intel_engine_cs *engine)
 static int gen8_init_common_ring(struct intel_engine_cs *engine)
 {
        struct drm_i915_private *dev_priv = engine->i915;
-       struct execlist_port *port = engine->execlist_port;
-       unsigned int n;
-       bool submit;
        int ret;
 
        ret = intel_mocs_init_engine(engine);
@@ -1241,26 +1288,6 @@ static int gen8_init_common_ring(struct intel_engine_cs 
*engine)
 
        DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
 
-       /* After a GPU reset, we may have requests to replay */
-       clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-
-       submit = false;
-       for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
-               if (!port_isset(&port[n]))
-                       break;
-
-               DRM_DEBUG_DRIVER("Restarting %s:%d from 0x%x\n",
-                                engine->name, n,
-                                port_request(&port[n])->global_seqno);
-
-               /* Discard the current inflight count */
-               port_set(&port[n], port_request(&port[n]));
-               submit = true;
-       }
-
-       if (submit && !i915.enable_guc_submission)
-               execlists_submit_ports(engine);
-
        return 0;
 }
 
@@ -1300,10 +1327,9 @@ static int gen9_init_render_ring(struct intel_engine_cs 
*engine)
 static void reset_common_ring(struct intel_engine_cs *engine,
                              struct drm_i915_gem_request *request)
 {
-       struct execlist_port *port = engine->execlist_port;
        struct intel_context *ce;
 
-       /* If the request was innocent, we leave the request in the ELSP
+       /* If the request was innocent, we leave the request intact
         * and will try to replay it on restarting. The context image may
         * have been corrupted by the reset, in which case we may have
         * to service a new GPU hang, but more likely we can continue on
@@ -1313,42 +1339,45 @@ static void reset_common_ring(struct intel_engine_cs 
*engine,
         * and have to at least restore the RING register in the context
         * image back to the expected values to skip over the guilty request.
         */
-       if (!request || request->fence.error != -EIO)
-               return;
-
-       /* We want a simple context + ring to execute the breadcrumb update.
-        * We cannot rely on the context being intact across the GPU hang,
-        * so clear it and rebuild just what we need for the breadcrumb.
-        * All pending requests for this context will be zapped, and any
-        * future request will be after userspace has had the opportunity
-        * to recreate its own state.
-        */
-       ce = &request->ctx->engine[engine->id];
-       execlists_init_reg_state(ce->lrc_reg_state,
-                                request->ctx, engine, ce->ring);
-
-       /* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
-       ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
-               i915_ggtt_offset(ce->ring->vma);
-       ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix;
+       if (request && request->fence.error == -EIO) {
+               /* We want a simple context + ring to execute the breadcrumb
+                * update. We cannot rely on the context being intact across
+                * the GPU hang, so clear it and rebuild just what we need for
+                * the breadcrumb. All pending requests for this context will
+                * be zapped, and any future request will be after userspace
+                * has had the opportunity to recreate its own state.
+                */
+               ce = &request->ctx->engine[engine->id];
+               execlists_init_reg_state(ce->lrc_reg_state,
+                                        request->ctx, engine, ce->ring);
 
-       request->ring->head = request->postfix;
-       intel_ring_update_space(request->ring);
 
-       /* Catch up with any missed context-switch interrupts */
-       if (request->ctx != port_request(port)->ctx) {
-               i915_gem_request_put(port_request(port));
-               port[0] = port[1];
-               memset(&port[1], 0, sizeof(port[1]));
+               /* Move the RING_HEAD onto the breadcrumb,
+                * past the hanging batch
+                */
+               ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
+                       i915_ggtt_offset(ce->ring->vma);
+               ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix;
+
+               request->ring->head = request->postfix;
+               intel_ring_update_space(request->ring);
+
+               /* Reset WaIdleLiteRestore:bdw,skl as well */
+               request->tail =
+                       intel_ring_wrap(request->ring,
+                                       request->wa_tail -
+                                       WA_TAIL_DWORDS * sizeof(u32));
+               assert_ring_tail_valid(request->ring, request->tail);
        }
 
-       GEM_BUG_ON(request->ctx != port_request(port)->ctx);
+       spin_lock_irq(&engine->timeline->lock);
+       intel_lr_resubmit_requests(engine);
+       spin_unlock_irq(&engine->timeline->lock);
 
-       /* Reset WaIdleLiteRestore:bdw,skl as well */
-       request->tail =
-               intel_ring_wrap(request->ring,
-                               request->wa_tail - WA_TAIL_DWORDS*sizeof(u32));
-       assert_ring_tail_valid(request->ring, request->tail);
+       intel_lr_clear_execlist_ports(engine);
+       clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+
+       tasklet_hi_schedule(&engine->irq_tasklet);
 }
 
 static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 52b3a1f..8e1ef4d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -83,6 +83,7 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context 
*ctx,
                                     struct intel_engine_cs *engine);
 
 /* Execlists */
+void intel_lr_clear_execlist_ports(struct intel_engine_cs *engine);
 int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv,
                                    int enable_execlists);
 
-- 
2.9.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to