From: Alex Dai <yu....@intel.com>

Now that we keep the GuC client structure (and therefore the process
descriptor embedded therein) permanently mapped, we don't really need
to keep a local copy of the GuC's work-queue-head. So we can simplify
the code a little by not doing this. Also, optimise away a few calls
by caching results in local variables.

v2:
    Added local optimisations (Dave Gordon)

Signed-off-by: Alex Dai <yu....@intel.com>
Signed-off-by: Dave Gordon <david.s.gor...@intel.com>
---
 drivers/gpu/drm/i915/i915_guc_submission.c | 54 ++++++++++++------------------
 drivers/gpu/drm/i915/intel_guc.h           |  2 +-
 2 files changed, 23 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c 
b/drivers/gpu/drm/i915/i915_guc_submission.c
index 35231fd..5d35570 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -238,9 +238,6 @@ static int guc_ring_doorbell(struct i915_guc_client *gc)
                        db_exc.cookie = 1;
        }
 
-       /* Finally, update the cached copy of the GuC's WQ head */
-       gc->wq_head = desc->head;
-
        return ret;
 }
 
@@ -361,12 +358,14 @@ static void guc_init_proc_desc(struct intel_guc *guc,
 static void guc_init_ctx_desc(struct intel_guc *guc,
                              struct i915_guc_client *client)
 {
+       struct drm_i915_gem_object *client_obj = client->client_obj;
        struct drm_i915_private *dev_priv = guc_to_i915(guc);
        struct intel_engine_cs *engine;
        struct intel_context *ctx = client->owner;
        struct guc_context_desc desc;
        struct sg_table *sg;
        enum intel_engine_id id;
+       u32 gfx_addr;
 
        memset(&desc, 0, sizeof(desc));
 
@@ -395,16 +394,17 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
                lrc->context_desc = (u32)ctx_desc;
 
                /* The state page is after PPHWSP */
-               lrc->ring_lcra = i915_gem_obj_ggtt_offset(obj) +
-                               LRC_STATE_PN * PAGE_SIZE;
+               gfx_addr = i915_gem_obj_ggtt_offset(obj);
+               lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE;
                lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
                                (engine->guc_id << GUC_ELC_ENGINE_OFFSET);
 
                obj = ctx->engine[id].ringbuf->obj;
+               gfx_addr = i915_gem_obj_ggtt_offset(obj);
 
-               lrc->ring_begin = i915_gem_obj_ggtt_offset(obj);
-               lrc->ring_end = lrc->ring_begin + obj->base.size - 1;
-               lrc->ring_next_free_location = lrc->ring_begin;
+               lrc->ring_begin = gfx_addr;
+               lrc->ring_end = gfx_addr + obj->base.size - 1;
+               lrc->ring_next_free_location = gfx_addr;
                lrc->ring_current_tail_pointer_value = 0;
 
                desc.engines_used |= (1 << engine->guc_id);
@@ -413,22 +413,17 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
        WARN_ON(desc.engines_used == 0);
 
        /*
-        * The CPU address is only needed at certain points, so kmap_atomic on
-        * demand instead of storing it in the ctx descriptor.
-        * XXX: May make debug easier to have it mapped
+        * The doorbell, process descriptor, and workqueue are all parts
+        * of the client object, which the GuC will reference via the GGTT
         */
-       desc.db_trigger_cpu = 0;
-       desc.db_trigger_uk = client->doorbell_offset +
-               i915_gem_obj_ggtt_offset(client->client_obj);
-       desc.db_trigger_phy = client->doorbell_offset +
-               sg_dma_address(client->client_obj->pages->sgl);
-
-       desc.process_desc = client->proc_desc_offset +
-               i915_gem_obj_ggtt_offset(client->client_obj);
-
-       desc.wq_addr = client->wq_offset +
-               i915_gem_obj_ggtt_offset(client->client_obj);
-
+       gfx_addr = i915_gem_obj_ggtt_offset(client_obj);
+       desc.db_trigger_phy = sg_dma_address(client_obj->pages->sgl) +
+                               client->doorbell_offset;
+       desc.db_trigger_cpu = (uintptr_t)client->client_base +
+                               client->doorbell_offset;
+       desc.db_trigger_uk = gfx_addr + client->doorbell_offset;
+       desc.process_desc = gfx_addr + client->proc_desc_offset;
+       desc.wq_addr = gfx_addr + client->wq_offset;
        desc.wq_size = client->wq_size;
 
        /*
@@ -465,17 +460,10 @@ int i915_guc_wq_check_space(struct i915_guc_client *gc)
        if (!gc)
                return 0;
 
-       /* Quickly return if wq space is available since last time we cache the
-        * head position. */
-       if (CIRC_SPACE(gc->wq_tail, gc->wq_head, gc->wq_size) >= size)
-               return 0;
-
        desc = gc->client_base + gc->proc_desc_offset;
 
        while (timeout_counter-- > 0) {
-               gc->wq_head = desc->head;
-
-               if (CIRC_SPACE(gc->wq_tail, gc->wq_head, gc->wq_size) >= size) {
+               if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= size) {
                        ret = 0;
                        break;
                }
@@ -490,10 +478,12 @@ int i915_guc_wq_check_space(struct i915_guc_client *gc)
 static int guc_add_workqueue_item(struct i915_guc_client *gc,
                                  struct drm_i915_gem_request *rq)
 {
+       struct guc_process_desc *desc;
        struct guc_wq_item *wqi;
        u32 tail, wq_len, wq_off, space;
 
-       space = CIRC_SPACE(gc->wq_tail, gc->wq_head, gc->wq_size);
+       desc = gc->client_base + gc->proc_desc_offset;
+       space = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size);
        if (WARN_ON(space < sizeof(struct guc_wq_item)))
                return -ENOSPC; /* shouldn't happen */
 
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index f2c051e..cc32a18 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -68,7 +68,7 @@ struct i915_guc_client {
        uint32_t wq_offset;
        uint32_t wq_size;
        uint32_t wq_tail;
-       uint32_t wq_head;
+       uint32_t unused;                /* Was 'wq_head'                */
 
        /* GuC submission statistics & status */
        uint64_t submissions[GUC_MAX_ENGINES_NUM];
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to