From: Alex Dai <yu....@intel.com>

Add functions to submit work queue item and ring the door bell.
GuC TLB needs to be invalided if LRC context changes.

Issue: VIZ-4884
Signed-off-by: Alex Dai <yu....@intel.com>
---
 drivers/gpu/drm/i915/intel_guc.h           |   3 +
 drivers/gpu/drm/i915/intel_guc_client.c    | 175 ++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/intel_guc_scheduler.c |   2 +-
 drivers/gpu/drm/i915/intel_lrc.c           |  16 ++-
 4 files changed, 193 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 3228c68..498789d 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -31,6 +31,7 @@
 #define GUC_WQ_SIZE    (PAGE_SIZE * 2)
 
 struct i915_guc_client {
+       spinlock_t wq_lock;
        struct drm_i915_gem_object *client_obj;
        u32 priority;
        off_t doorbell_offset;
@@ -39,6 +40,8 @@ struct i915_guc_client {
        uint16_t doorbell_id;
        uint32_t ctx_index;
        uint32_t wq_size;
+       uint32_t wq_tail;
+       uint32_t cookie;
 };
 
 #define I915_MAX_DOORBELLS     256
diff --git a/drivers/gpu/drm/i915/intel_guc_client.c 
b/drivers/gpu/drm/i915/intel_guc_client.c
index f7672ff..2415bfa 100644
--- a/drivers/gpu/drm/i915/intel_guc_client.c
+++ b/drivers/gpu/drm/i915/intel_guc_client.c
@@ -22,6 +22,7 @@
  *
  */
 #include <linux/firmware.h>
+#include <linux/circ_buf.h>
 #include "i915_drv.h"
 #include "intel_guc.h"
 
@@ -372,6 +373,8 @@ i915_guc_client_alloc(struct drm_device *dev, u32 priority)
                /* XXX: evict a doorbell instead */
                goto err;
 
+       spin_lock_init(&client->wq_lock);
+
        init_ctx_desc(guc, client);
        init_proc_desc(guc, client);
        init_doorbell(guc, client);
@@ -391,9 +394,179 @@ err:
        return NULL;
 }
 
+
+/* Get valid workqueue item and return it back to offset */
+static int get_workqueue_space(struct i915_guc_client *gc, u32 *offset)
+{
+       struct guc_process_desc *desc;
+       void *base;
+       u32 size = sizeof(struct guc_wq_item);
+       int ret = 0, timeout_counter = 200;
+       unsigned long flags;
+
+       base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
+       desc = base + gc->proc_desc_offset;
+
+       while (timeout_counter-- > 0) {
+               spin_lock_irqsave(&gc->wq_lock, flags);
+
+               ret = wait_for(CIRC_SPACE(gc->wq_tail, desc->head,
+                              gc->wq_size) >= size, 1);
+
+               if (!ret) {
+                       *offset = gc->wq_tail;
+
+                       /* advance the tail for next workqueue item */
+                       gc->wq_tail += size;
+                       gc->wq_tail &= gc->wq_size - 1;
+
+                       /* this will break the loop */
+                       timeout_counter = 0;
+               }
+
+               spin_unlock_irqrestore(&gc->wq_lock, flags);
+       };
+
+       kunmap_atomic(base);
+
+       return ret;
+}
+
+
+static int add_workqueue_item(struct i915_guc_client *gc,
+                             struct intel_context *ctx,
+                             struct intel_engine_cs *ring)
+{
+       struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
+       struct drm_i915_gem_object *ctx_obj;
+       struct guc_wq_item *wqi;
+       void *base;
+       struct page *page;
+       u32 wq_off = 0, tail = ringbuf->tail, wq_len;
+       int ret;
+
+       ctx_obj = ctx->engine[ring->id].state;
+
+       WARN_ON(!i915_gem_obj_is_pinned(ctx_obj));
+       WARN_ON(!i915_gem_obj_is_pinned(ringbuf->obj));
+
+       /* Need this because of the deferred pin ctx and ring */
+       /* Shall we move this right after ring is pinned? */
+       page = i915_gem_object_get_page(ctx_obj, 1);
+       base = kmap_atomic(page);
+
+       *((u32 *)base + CTX_RING_BUFFER_START + 1) =
+               i915_gem_obj_ggtt_offset(ringbuf->obj);
+
+       kunmap_atomic(base);
+
+       ret = get_workqueue_space(gc, &wq_off);
+       if (ret)
+               return ret;
+
+       /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
+        * should not have the case where structure wqi is across page, neither
+        * wrapped to the beginning. This simplifies the implementation below.
+        *
+        * XXX: if not the case, we need save data to a temp wqi and copy it to
+        * workqueue buffer dw by dw.
+        */
+       WARN_ON(sizeof(struct guc_wq_item) != 16);
+       WARN_ON(wq_off & 3);
+
+       /* wq starts from the page after doorbell / process_desc */
+       base = kmap_atomic(i915_gem_object_get_page(gc->client_obj,
+                       (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT));
+       wq_off &= PAGE_SIZE - 1;
+       wqi = (struct guc_wq_item *)((char *)base + wq_off);
+
+       /* len does not include the header */
+       wq_len = sizeof(struct guc_wq_item) / sizeof(u32) - 1;
+       wqi->header = WQ_TYPE_INORDER |
+                       (wq_len << WQ_LEN_SHIFT) |
+                       (ring->id << WQ_TARGET_SHIFT) |
+                       WQ_NO_WCFLUSH_WAIT;
+
+       wqi->context_desc = (u32)execlists_ctx_descriptor(ring, ctx_obj);
+       /* tail index is in qw */
+       tail >>= 3;
+       wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
+       wqi->fence_id = 0; /*XXX: what fence to be here */
+
+       kunmap_atomic(base);
+
+       return 0;
+}
+
+static int ring_doorbell(struct i915_guc_client *gc)
+{
+       struct guc_process_desc *desc;
+       union guc_doorbell_qw db_cmp, db_exc, db_ret;
+       union guc_doorbell_qw *db;
+       void *base;
+       int attempt = 2, ret = -EAGAIN;
+
+       base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
+       desc = base + gc->proc_desc_offset;
+
+       /* Update the tail so it is visible to GuC */
+       desc->tail = gc->wq_tail;
+
+       /* current cookie */
+       db_cmp.db_status = GUC_DOORBELL_ENABLED;
+       db_cmp.cookie = gc->cookie;
+
+       /* cookie to be updated */
+       db_exc.db_status = GUC_DOORBELL_ENABLED;
+       db_exc.cookie = gc->cookie + 1;
+       if (db_exc.cookie == 0)
+               db_exc.cookie = 1;
+
+       /* pointer of current doorbell cacheline */
+       db = base + gc->doorbell_offset;
+
+       while (attempt--) {
+               /* lets ring the doorbell */
+               db_ret.value_qw = atomic64_cmpxchg((atomic64_t *)db,
+                       db_cmp.value_qw, db_exc.value_qw);
+
+               /* if the exchange was successfully executed */
+               if (db_ret.value_qw == db_cmp.value_qw) {
+                       /* db was successfully rung */
+                       gc->cookie = db_exc.cookie;
+                       ret = 0;
+                       break;
+               }
+
+               /* XXX: doorbell was lost and need to acquire it again */
+               if (db_ret.db_status == GUC_DOORBELL_DISABLED)
+                       break;
+
+               DRM_ERROR("Cookie mismatch. Expected %d, returned %d\n",
+                         db_cmp.cookie, db_ret.cookie);
+
+               /* update the cookie to newly read cookie from GuC */
+               db_cmp.cookie = db_ret.cookie;
+               db_exc.cookie = db_ret.cookie + 1;
+               if (db_exc.cookie == 0)
+                       db_exc.cookie = 1;
+       }
+
+       kunmap_atomic(base);
+       return ret;
+}
+
 int i915_guc_client_submit(struct i915_guc_client *client,
                           struct intel_context *ctx,
                           struct intel_engine_cs *ring)
 {
-       return 0;
+       int ret;
+
+       ret = add_workqueue_item(client, ctx, ring);
+       if (ret)
+               return ret;
+
+       ret = ring_doorbell(client);
+
+       return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_guc_scheduler.c 
b/drivers/gpu/drm/i915/intel_guc_scheduler.c
index 008f74c..c0b7231 100644
--- a/drivers/gpu/drm/i915/intel_guc_scheduler.c
+++ b/drivers/gpu/drm/i915/intel_guc_scheduler.c
@@ -160,6 +160,6 @@ bool sanitize_enable_guc_scheduling(struct drm_device *dev)
        if (!HAS_GUC_UCODE(dev) || !HAS_GUC_SCHED(dev))
                return false;
 
-       return i915.enable_guc_scheduling;
+       return i915.enable_execlists && i915.enable_guc_scheduling;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 2c18d59..967ebf7 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -735,13 +735,17 @@ intel_logical_ring_advance_and_submit(struct 
intel_ringbuffer *ringbuf,
                                      struct drm_i915_gem_request *request)
 {
        struct intel_engine_cs *ring = ringbuf->ring;
+       struct drm_i915_private *dev_priv = ring->dev->dev_private;
 
        intel_logical_ring_advance(ringbuf);
 
        if (intel_ring_stopped(ring))
                return;
 
-       execlists_context_queue(ring, ctx, ringbuf->tail, request);
+       if (dev_priv->guc.execbuf_client)
+               i915_guc_client_submit(dev_priv->guc.execbuf_client, ctx, ring);
+       else
+               execlists_context_queue(ring, ctx, ringbuf->tail, request);
 }
 
 static int intel_lr_context_pin(struct intel_engine_cs *ring,
@@ -749,6 +753,7 @@ static int intel_lr_context_pin(struct intel_engine_cs 
*ring,
 {
        struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
        struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
+       struct drm_i915_private *dev_priv = ring->dev->dev_private;
        int ret = 0;
 
        WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
@@ -761,6 +766,10 @@ static int intel_lr_context_pin(struct intel_engine_cs 
*ring,
                ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
                if (ret)
                        goto unpin_ctx_obj;
+
+               /* Invalidate GuC TLB. */
+               if (i915.enable_guc_scheduling)
+                       I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
        }
 
        return ret;
@@ -1305,8 +1314,13 @@ out:
 static int gen8_init_rcs_context(struct intel_engine_cs *ring,
                       struct intel_context *ctx)
 {
+       struct drm_i915_private *dev_priv = ring->dev->dev_private;
        int ret;
 
+       /* Invalidate GuC TLB. */
+       if (i915.enable_guc_scheduling)
+               I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+
        ret = intel_logical_ring_workarounds_emit(ring, ctx);
        if (ret)
                return ret;
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to