On 11/02/16 21:00, Chris Wilson wrote:
> On Thu, Feb 11, 2016 at 06:03:09PM +0000, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
>>
>> Only caller to get_context_status ensures read pointer stays in
>> range so the WARN is impossible. Also, if the WARN would be
>> triggered by a hypothetical new caller stale status would be
>> returned to them.
>>
>> Maybe it is better to wrap the pointer in the function itself
>> then to avoid both and even results in smaller code.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
>> ---
>>   drivers/gpu/drm/i915/intel_lrc.c | 15 ++++++---------
>>   1 file changed, 6 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>> b/drivers/gpu/drm/i915/intel_lrc.c
>> index 89eb892df4ae..951f1e6af947 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -507,17 +507,16 @@ static bool execlists_check_remove_request(struct 
>> intel_engine_cs *ring,
>>      return false;
>>   }
>>   
>> -static void get_context_status(struct intel_engine_cs *ring,
>> -                           u8 read_pointer,
>> -                           u32 *status, u32 *context_id)
>> +static u32 get_context_status(struct intel_engine_cs *ring, u8 read_pointer,
>> +                          u32 *context_id)
>>   {
>>      struct drm_i915_private *dev_priv = ring->dev->dev_private;
>>   
>> -    if (WARN_ON(read_pointer >= GEN8_CSB_ENTRIES))
>> -            return;
>> +    read_pointer %= GEN8_CSB_ENTRIES;
>>   
>> -    *status = I915_READ(RING_CONTEXT_STATUS_BUF_LO(ring, read_pointer));
>>      *context_id = I915_READ(RING_CONTEXT_STATUS_BUF_HI(ring, read_pointer));
> 
> Micro-optimising hat says not to even do the uncached, spinlocked mmio
> read when not required.

You mean move the forcewake grab out from elsp write to cover all mmio
reads? These two patches make the irq handler around 3.7% smaller and
moving the forcewake/uncore lock shrinks that by a 1% more. Must be
faster as well, if someone could measure it. :)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 1e7ccd0a6573..77a64008f53d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -375,8 +375,6 @@ static void execlists_elsp_write(struct 
drm_i915_gem_request *rq0,
        rq0->elsp_submitted++;
 
        /* You must always write both descriptors in the order below. */
-       spin_lock(&dev_priv->uncore.lock);
-       intel_uncore_forcewake_get__locked(dev_priv, FORCEWAKE_ALL);
        I915_WRITE_FW(RING_ELSP(ring), upper_32_bits(desc[1]));
        I915_WRITE_FW(RING_ELSP(ring), lower_32_bits(desc[1]));
 
@@ -386,8 +384,6 @@ static void execlists_elsp_write(struct 
drm_i915_gem_request *rq0,
 
        /* ELSP is a wo register, use another nearby reg for posting */
        POSTING_READ_FW(RING_EXECLIST_STATUS_LO(ring));
-       intel_uncore_forcewake_put__locked(dev_priv, FORCEWAKE_ALL);
-       spin_unlock(&dev_priv->uncore.lock);
 }
 
 static int execlists_update_context(struct drm_i915_gem_request *rq)
@@ -513,9 +509,9 @@ static u32 get_context_status(struct intel_engine_cs *ring, 
u8 read_pointer,
 
        read_pointer %= GEN8_CSB_ENTRIES;
 
-       *context_id = I915_READ(RING_CONTEXT_STATUS_BUF_HI(ring, read_pointer));
+       *context_id = I915_READ_FW(RING_CONTEXT_STATUS_BUF_HI(ring, 
read_pointer));
 
-       return I915_READ(RING_CONTEXT_STATUS_BUF_LO(ring, read_pointer));
+       return I915_READ_FW(RING_CONTEXT_STATUS_BUF_LO(ring, read_pointer));
 }
 
 /**
@@ -535,15 +531,18 @@ void intel_lrc_irq_handler(struct intel_engine_cs *ring)
        u32 status_id;
        u32 submit_contexts = 0;
 
-       status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
+       spin_lock(&ring->execlist_lock);
+
+       spin_lock(&dev_priv->uncore.lock);
+       intel_uncore_forcewake_get__locked(dev_priv, FORCEWAKE_ALL);
+
+       status_pointer = I915_READ_FW(RING_CONTEXT_STATUS_PTR(ring));
 
        read_pointer = ring->next_context_status_buffer;
        write_pointer = GEN8_CSB_WRITE_PTR(status_pointer);
        if (read_pointer > write_pointer)
                write_pointer += GEN8_CSB_ENTRIES;
 
-       spin_lock(&ring->execlist_lock);
-
        while (read_pointer < write_pointer) {
                status = get_context_status(ring, ++read_pointer, &status_id);
 
@@ -569,22 +568,26 @@ void intel_lrc_irq_handler(struct intel_engine_cs *ring)
            GEN8_CTX_STATUS_ACTIVE_IDLE))))
                execlists_context_unqueue(ring);
 
-       spin_unlock(&ring->execlist_lock);
-
-       if (unlikely(submit_contexts > 2))
-               DRM_ERROR("More than two context complete events?\n");
-
        ring->next_context_status_buffer = write_pointer % GEN8_CSB_ENTRIES;
 
        /* Update the read pointer to the old write pointer. Manual ringbuffer
         * management ftw </sarcasm> */
-       I915_WRITE(RING_CONTEXT_STATUS_PTR(ring),
-                  _MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
-                                ring->next_context_status_buffer << 8));
+       I915_WRITE_FW(RING_CONTEXT_STATUS_PTR(ring),
+                     _MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
+                                   ring->next_context_status_buffer << 8));
+
+       intel_uncore_forcewake_put__locked(dev_priv, FORCEWAKE_ALL);
+       spin_unlock(&dev_priv->uncore.lock);
+
+       spin_unlock(&ring->execlist_lock);
+
+       if (unlikely(submit_contexts > 2))
+               DRM_ERROR("More than two context complete events?\n");
 }
 
 static int execlists_context_queue(struct drm_i915_gem_request *request)
 {
+       struct drm_i915_private *dev_priv = request->i915;
        struct intel_engine_cs *ring = request->ring;
        struct drm_i915_gem_request *cursor;
        int num_elements = 0;
@@ -616,9 +619,16 @@ static int execlists_context_queue(struct 
drm_i915_gem_request *request)
        }
 
        list_add_tail(&request->execlist_link, &ring->execlist_queue);
-       if (num_elements == 0)
+       if (num_elements == 0) {
+               spin_lock(&dev_priv->uncore.lock);
+               intel_uncore_forcewake_get__locked(dev_priv, FORCEWAKE_ALL);
+
                execlists_context_unqueue(ring);
 
+               intel_uncore_forcewake_put__locked(dev_priv, FORCEWAKE_ALL);
+               spin_unlock(&dev_priv->uncore.lock);
+       }
+
        spin_unlock_irq(&ring->execlist_lock);
 
        return 0;


Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to