i915: Enable commands submission via GuC

yu . dai Wed, 29 Apr 2015 15:16:00 -0700

From: Alex Dai <yu....@intel.com>

Add functions to submit work queue item and ring the door bell.
GuC TLB needs to be invalided if LRC context changes.


Issue: VIZ-4884
Signed-off-by: Alex Dai <yu....@intel.com>
---
 drivers/gpu/drm/i915/intel_guc.h        |   3 +
 drivers/gpu/drm/i915/intel_guc_client.c | 198 +++++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/intel_lrc.c        |  34 +++++-
 drivers/gpu/drm/i915/intel_lrc.h        |   2 +
 4 files changed, 230 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 892f974..f8065cf 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -31,6 +31,7 @@
 #define GUC_WQ_SIZE    (PAGE_SIZE * 2)
 
 struct i915_guc_client {
+       spinlock_t wq_lock;
        struct drm_i915_gem_object *client_obj;
        u32 priority;
        off_t doorbell_offset;
@@ -39,6 +40,8 @@ struct i915_guc_client {
        uint16_t doorbell_id;
        uint32_t ctx_index;
        uint32_t wq_size;
+       uint32_t wq_tail;
+       uint32_t cookie;
 };
 
 #define I915_MAX_DOORBELLS     256
diff --git a/drivers/gpu/drm/i915/intel_guc_client.c 
b/drivers/gpu/drm/i915/intel_guc_client.c
index 7922427..31934a3 100644
--- a/drivers/gpu/drm/i915/intel_guc_client.c
+++ b/drivers/gpu/drm/i915/intel_guc_client.c
@@ -22,6 +22,7 @@
  *
  */
 #include <linux/firmware.h>
+#include <linux/circ_buf.h>
 #include "i915_drv.h"
 #include "intel_guc.h"
 
@@ -52,6 +53,14 @@
  * Doorbells are interrupts to uKernel. A doorbell is a single cache line (QW)
  * mapped into process space.
  *
+ * Work Items:
+ * There are several types of work items that the host may place into a
+ * workqueue, each with its own requirements and limitations. Currently only
+ * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which
+ * represents in-order queue. The kernel driver packs ring tail pointer and an
+ * ELSP context descriptor dword into Work Item.
+ * See add_workqueue_item()
+ *
  */
 
 /*
@@ -395,6 +404,8 @@ i915_guc_client_alloc(struct drm_device *dev, u32 priority)
                /* XXX: evict a doorbell instead */
                goto err;
 
+       spin_lock_init(&client->wq_lock);
+
        init_ctx_desc(guc, client);
        init_proc_desc(guc, client);
        init_doorbell(guc, client);
@@ -414,6 +425,183 @@ err:
        return NULL;
 }
 
+/* Get valid workqueue item and return it back to offset */
+static int get_workqueue_space(struct i915_guc_client *gc, u32 *offset)
+{
+       struct guc_process_desc *desc;
+       void *base;
+       u32 size = sizeof(struct guc_wq_item);
+       int ret = 0, timeout_counter = 200;
+       unsigned long flags;
+
+       base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
+       desc = base + gc->proc_desc_offset;
+
+       while (timeout_counter-- > 0) {
+               spin_lock_irqsave(&gc->wq_lock, flags);
+
+               ret = wait_for_atomic(CIRC_SPACE(gc->wq_tail, desc->head,
+                               gc->wq_size) >= size, 1);
+
+               if (!ret) {
+                       *offset = gc->wq_tail;
+
+                       /* advance the tail for next workqueue item */
+                       gc->wq_tail += size;
+                       gc->wq_tail &= gc->wq_size - 1;
+
+                       /* this will break the loop */
+                       timeout_counter = 0;
+               }
+
+               spin_unlock_irqrestore(&gc->wq_lock, flags);
+       };
+
+       kunmap_atomic(base);
+
+       return ret;
+}
+
+static void guc_update_context(struct intel_context *ctx,
+                               struct intel_engine_cs *ring)
+{
+       struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
+       struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
+       struct page *page;
+       uint32_t *reg_state;
+
+       page = i915_gem_object_get_page(ctx_obj, 1);
+       reg_state = kmap_atomic(page);
+
+       reg_state[CTX_RING_BUFFER_START + 1] =
+                       i915_gem_obj_ggtt_offset(ringbuf->obj);
+
+       /* True PPGTT with dynamic page allocation: update PDP registers and
+        * point the unallocated PDPs to the scratch page
+        */
+       if (ctx->ppgtt) {
+               ASSIGN_CTX_PDP(ctx->ppgtt, reg_state, 3);
+               ASSIGN_CTX_PDP(ctx->ppgtt, reg_state, 2);
+               ASSIGN_CTX_PDP(ctx->ppgtt, reg_state, 1);
+               ASSIGN_CTX_PDP(ctx->ppgtt, reg_state, 0);
+       }
+
+       kunmap_atomic(reg_state);
+}
+
+static int add_workqueue_item(struct i915_guc_client *gc,
+                             struct intel_context *ctx,
+                             struct intel_engine_cs *ring)
+{
+       struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
+       struct drm_i915_gem_object *ctx_obj;
+       struct guc_wq_item *wqi;
+       void *base;
+       u32 wq_off = 0, tail = ringbuf->tail, wq_len;
+       int ret;
+
+       ctx_obj = ctx->engine[ring->id].state;
+
+       /* Need this because of the deferred pin ctx and ring */
+       /* Shall we move this right after ring is pinned? */
+       guc_update_context(ctx, ring);
+
+       ret = get_workqueue_space(gc, &wq_off);
+       if (ret)
+               return ret;
+
+       /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
+        * should not have the case where structure wqi is across page, neither
+        * wrapped to the beginning. This simplifies the implementation below.
+        *
+        * XXX: if not the case, we need save data to a temp wqi and copy it to
+        * workqueue buffer dw by dw.
+        */
+       WARN_ON(sizeof(struct guc_wq_item) != 16);
+       WARN_ON(wq_off & 3);
+
+       /* wq starts from the page after doorbell / process_desc */
+       base = kmap_atomic(i915_gem_object_get_page(gc->client_obj,
+                       (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT));
+       wq_off &= PAGE_SIZE - 1;
+       wqi = (struct guc_wq_item *)((char *)base + wq_off);
+
+       /* len does not include the header */
+       wq_len = sizeof(struct guc_wq_item) / sizeof(u32) - 1;
+       wqi->header = WQ_TYPE_INORDER |
+                       (wq_len << WQ_LEN_SHIFT) |
+                       (ring->id << WQ_TARGET_SHIFT) |
+                       WQ_NO_WCFLUSH_WAIT;
+
+       wqi->context_desc = (u32)execlists_ctx_descriptor(ring, ctx_obj);
+       /* tail index is in qw */
+       tail >>= 3;
+       wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
+       wqi->fence_id = 0; /*XXX: what fence to be here */
+
+       kunmap_atomic(base);
+
+       return 0;
+}
+
+static int ring_doorbell(struct i915_guc_client *gc)
+{
+       struct guc_process_desc *desc;
+       union guc_doorbell_qw db_cmp, db_exc, db_ret;
+       union guc_doorbell_qw *db;
+       void *base;
+       int attempt = 2, ret = -EAGAIN;
+
+       base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
+       desc = base + gc->proc_desc_offset;
+
+       /* Update the tail so it is visible to GuC */
+       desc->tail = gc->wq_tail;
+
+       /* current cookie */
+       db_cmp.db_status = GUC_DOORBELL_ENABLED;
+       db_cmp.cookie = gc->cookie;
+
+       /* cookie to be updated */
+       db_exc.db_status = GUC_DOORBELL_ENABLED;
+       db_exc.cookie = gc->cookie + 1;
+       if (db_exc.cookie == 0)
+               db_exc.cookie = 1;
+
+       /* pointer of current doorbell cacheline */
+       db = base + gc->doorbell_offset;
+
+       while (attempt--) {
+               /* lets ring the doorbell */
+               db_ret.value_qw = atomic64_cmpxchg((atomic64_t *)db,
+                       db_cmp.value_qw, db_exc.value_qw);
+
+               /* if the exchange was successfully executed */
+               if (db_ret.value_qw == db_cmp.value_qw) {
+                       /* db was successfully rung */
+                       gc->cookie = db_exc.cookie;
+                       ret = 0;
+                       break;
+               }
+
+               /* XXX: doorbell was lost and need to acquire it again */
+               if (db_ret.db_status == GUC_DOORBELL_DISABLED)
+                       break;
+
+               DRM_ERROR("Cookie mismatch. Expected %d, returned %d\n",
+                         db_cmp.cookie, db_ret.cookie);
+
+               /* update the cookie to newly read cookie from GuC */
+               db_cmp.cookie = db_ret.cookie;
+               db_exc.cookie = db_ret.cookie + 1;
+               if (db_exc.cookie == 0)
+                       db_exc.cookie = 1;
+       }
+
+       kunmap_atomic(base);
+       return ret;
+}
+
 /**
  * i915_guc_client_submit() - Submit commands through GuC
  * @client:    the guc client where commands will go through
@@ -426,5 +614,13 @@ int i915_guc_client_submit(struct i915_guc_client *client,
                           struct intel_context *ctx,
                           struct intel_engine_cs *ring)
 {
-       return 0;
+       int ret;
+
+       ret = add_workqueue_item(client, ctx, ring);
+       if (ret)
+               return ret;
+
+       ret = ring_doorbell(client);
+
+       return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8685205..15295db 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -187,8 +187,8 @@ u32 intel_execlists_ctx_id(struct drm_i915_gem_object 
*ctx_obj)
        return lrca >> 12;
 }
 
-static uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring,
-                                        struct drm_i915_gem_object *ctx_obj)
+uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring,
+                                 struct drm_i915_gem_object *ctx_obj)
 {
        struct drm_device *dev = ring->dev;
        uint64_t desc;
@@ -648,13 +648,17 @@ intel_logical_ring_advance_and_submit(struct 
intel_ringbuffer *ringbuf,
                                      struct drm_i915_gem_request *request)
 {
        struct intel_engine_cs *ring = ringbuf->ring;
+       struct drm_i915_private *dev_priv = ring->dev->dev_private;
 
        intel_logical_ring_advance(ringbuf);
 
        if (intel_ring_stopped(ring))
                return;
 
-       execlists_context_queue(ring, ctx, ringbuf->tail, request);
+       if (dev_priv->guc.execbuf_client)
+               i915_guc_client_submit(dev_priv->guc.execbuf_client, ctx, ring);
+       else
+               execlists_context_queue(ring, ctx, ringbuf->tail, request);
 }
 
 static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf,
@@ -918,18 +922,23 @@ static int intel_lr_context_pin(struct intel_engine_cs 
*ring,
 {
        struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
        struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
+       struct drm_i915_private *dev_priv = ring->dev->dev_private;
        int ret = 0;
 
        WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
        if (ctx->engine[ring->id].pin_count++ == 0) {
-               ret = i915_gem_obj_ggtt_pin(ctx_obj,
-                               GEN8_LR_CONTEXT_ALIGN, 0);
+               ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN,
+                               PIN_OFFSET_BIAS | GUC_WOPCM_SIZE_VALUE);
                if (ret)
                        goto reset_pin_count;
 
                ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
                if (ret)
                        goto unpin_ctx_obj;
+
+               /* Invalidate GuC TLB. */
+               if (i915.enable_guc_scheduling)
+                       I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
        }
 
        return ret;
@@ -1283,6 +1292,13 @@ static int intel_lr_context_render_state_init(struct 
intel_engine_cs *ring,
        ret = __i915_add_request(ring, file, so.obj);
        /* intel_logical_ring_add_request moves object to inactive if it
         * fails */
+
+       /* GuC firmware will try to collapse its DPC work queue if the new one
+        * is for same context. So the following breadcrumb could be amended to
+        * this batch and submitted as one batch. Wait here to make sure the
+        * context state init is finished before any other submission to GuC. */
+       if (!ret && i915.enable_guc_scheduling)
+               ret = i915_wait_request(so.obj->last_read_req);
 out:
        i915_gem_render_state_fini(&so);
        return ret;
@@ -1291,8 +1307,13 @@ out:
 static int gen8_init_rcs_context(struct intel_engine_cs *ring,
                       struct intel_context *ctx)
 {
+       struct drm_i915_private *dev_priv = ring->dev->dev_private;
        int ret;
 
+       /* Invalidate GuC TLB. */
+       if (i915.enable_guc_scheduling)
+               I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+
        ret = intel_logical_ring_workarounds_emit(ring, ctx);
        if (ret)
                return ret;
@@ -1819,7 +1840,8 @@ int intel_lr_context_deferred_create(struct intel_context 
*ctx,
        }
 
        if (is_global_default_ctx) {
-               ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
+               ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN,
+                               PIN_OFFSET_BIAS | GUC_WOPCM_SIZE_VALUE);
                if (ret) {
                        DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n",
                                        ret);
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 04d3a6d..19c9a02 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -85,6 +85,8 @@ int intel_execlists_submission(struct drm_device *dev, struct 
drm_file *file,
                               struct drm_i915_gem_object *batch_obj,
                               u64 exec_start, u32 dispatch_flags);
 u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj);
+uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring,
+                                 struct drm_i915_gem_object *ctx_obj);
 
 void intel_lrc_irq_handler(struct intel_engine_cs *ring);
 void intel_execlists_retire_requests(struct intel_engine_cs *ring);
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH v6 11/14] drm/i915: Enable commands submission via GuC

Reply via email to