[Intel-gfx] [PATCH] drm/i915: Change context lifecycle

2015-11-24 Thread Nick Hoath
Use the first retired request on a new context to unpin
the old context. This ensures that the hw context remains
bound until it has been saved.
Now that the context is pinned until later in the request/context
lifecycle, it no longer needs to be pinned from context_queue to
retire_requests.
This is to solve a hang with GuC submission, and a theoretical
issue with execlist submission.

v2: Moved the new pin to cover GuC submission (Alex Dai)
Moved the new unpin to request_retire to fix coverage leak
v3: Added switch to default context if freeing a still pinned
context just in case the hw was actually still using it
v4: Unwrapped context unpin to allow calling without a request

Signed-off-by: Nick Hoath 
Issue: VIZ-4277
Cc: Daniel Vetter 
Cc: David Gordon 
Cc: Chris Wilson 
Cc: Alex Dai 
---
 drivers/gpu/drm/i915/i915_drv.h  |  1 +
 drivers/gpu/drm/i915/i915_gem.c  |  9 -
 drivers/gpu/drm/i915/intel_lrc.c | 73 ++--
 drivers/gpu/drm/i915/intel_lrc.h |  1 +
 4 files changed, 65 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d5cf30b..4d2f44c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -889,6 +889,7 @@ struct intel_context {
struct {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
+   bool unsaved;
int pin_count;
} engine[I915_NUM_RINGS];
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e955499..6fee473 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1354,6 +1354,14 @@ static void i915_gem_request_retire(struct 
drm_i915_gem_request *request)
 {
trace_i915_gem_request_retire(request);
 
+   if (i915.enable_execlists) {
+   unsigned long flags;
+
+   spin_lock_irqsave(&request->ring->execlist_lock, flags);
+   intel_lr_context_complete_check(request);
+   spin_unlock_irqrestore(&request->ring->execlist_lock, flags);
+   }
+
/* We know the GPU must have read the request to have
 * sent us the seqno + interrupt, so use the position
 * of tail of the request to update the last known position
@@ -1384,7 +1392,6 @@ __i915_gem_request_retire__upto(struct 
drm_i915_gem_request *req)
do {
tmp = list_first_entry(&engine->request_list,
   typeof(*tmp), list);
-
i915_gem_request_retire(tmp);
} while (tmp != req);
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 06180dc..a527c21 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -566,9 +566,6 @@ static int execlists_context_queue(struct 
drm_i915_gem_request *request)
struct drm_i915_gem_request *cursor;
int num_elements = 0;
 
-   if (request->ctx != ring->default_context)
-   intel_lr_context_pin(request);
-
i915_gem_request_reference(request);
 
spin_lock_irq(&ring->execlist_lock);
@@ -728,10 +725,16 @@ intel_logical_ring_advance_and_submit(struct 
drm_i915_gem_request *request)
intel_logical_ring_advance(request->ringbuf);
 
request->tail = request->ringbuf->tail;
-
if (intel_ring_stopped(ring))
return;
 
+   if (request->ctx != ring->default_context) {
+   if (!request->ctx->engine[ring->id].unsaved) {
+   intel_lr_context_pin(request);
+   request->ctx->engine[ring->id].unsaved = true;
+   }
+   }
+
if (dev_priv->guc.execbuf_client)
i915_guc_submit(dev_priv->guc.execbuf_client, request);
else
@@ -958,12 +961,6 @@ void intel_execlists_retire_requests(struct 
intel_engine_cs *ring)
spin_unlock_irq(&ring->execlist_lock);
 
list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
-   struct intel_context *ctx = req->ctx;
-   struct drm_i915_gem_object *ctx_obj =
-   ctx->engine[ring->id].state;
-
-   if (ctx_obj && (ctx != ring->default_context))
-   intel_lr_context_unpin(req);
list_del(&req->execlist_link);
i915_gem_request_unreference(req);
}
@@ -1058,21 +1055,41 @@ reset_pin_count:
return ret;
 }
 
-void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
+static void intel_lr_context_unpin_no_req(struct intel_engine_cs *ring,
+   struct intel_context *ctx)
 {
-   struct intel_engine_cs *ring = rq->ring;
-   struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
-   struct 

Re: [Intel-gfx] [PATCH] drm/i915: Change context lifecycle

2015-11-25 Thread Nick Hoath

On 25/11/2015 01:11, Dai, Yu wrote:



On 11/24/2015 08:23 AM, Nick Hoath wrote:

Use the first retired request on a new context to unpin
the old context. This ensures that the hw context remains
bound until it has been saved.
Now that the context is pinned until later in the request/context
lifecycle, it no longer needs to be pinned from context_queue to
retire_requests.
This is to solve a hang with GuC submission, and a theoretical
issue with execlist submission.

v2: Moved the new pin to cover GuC submission (Alex Dai)
  Moved the new unpin to request_retire to fix coverage leak
v3: Added switch to default context if freeing a still pinned
  context just in case the hw was actually still using it
v4: Unwrapped context unpin to allow calling without a request

Signed-off-by: Nick Hoath 
Issue: VIZ-4277
Cc: Daniel Vetter 
Cc: David Gordon 
Cc: Chris Wilson 
Cc: Alex Dai 
---
   drivers/gpu/drm/i915/i915_drv.h  |  1 +
   drivers/gpu/drm/i915/i915_gem.c  |  9 -
   drivers/gpu/drm/i915/intel_lrc.c | 73 
++--
   drivers/gpu/drm/i915/intel_lrc.h |  1 +
   4 files changed, 65 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d5cf30b..4d2f44c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -889,6 +889,7 @@ struct intel_context {
struct {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
+   bool unsaved;
int pin_count;
} engine[I915_NUM_RINGS];

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e955499..6fee473 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1354,6 +1354,14 @@ static void i915_gem_request_retire(struct 
drm_i915_gem_request *request)
   {
trace_i915_gem_request_retire(request);

+   if (i915.enable_execlists) {
+   unsigned long flags;
+
+   spin_lock_irqsave(&request->ring->execlist_lock, flags);
+   intel_lr_context_complete_check(request);
+   spin_unlock_irqrestore(&request->ring->execlist_lock, flags);
+   }
+
/* We know the GPU must have read the request to have
 * sent us the seqno + interrupt, so use the position
 * of tail of the request to update the last known position
@@ -1384,7 +1392,6 @@ __i915_gem_request_retire__upto(struct 
drm_i915_gem_request *req)
do {
tmp = list_first_entry(&engine->request_list,
   typeof(*tmp), list);
-
i915_gem_request_retire(tmp);
} while (tmp != req);

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 06180dc..a527c21 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -566,9 +566,6 @@ static int execlists_context_queue(struct 
drm_i915_gem_request *request)
struct drm_i915_gem_request *cursor;
int num_elements = 0;

-   if (request->ctx != ring->default_context)
-   intel_lr_context_pin(request);
-
i915_gem_request_reference(request);

spin_lock_irq(&ring->execlist_lock);
@@ -728,10 +725,16 @@ intel_logical_ring_advance_and_submit(struct 
drm_i915_gem_request *request)
intel_logical_ring_advance(request->ringbuf);

request->tail = request->ringbuf->tail;
-
if (intel_ring_stopped(ring))
return;

+   if (request->ctx != ring->default_context) {
+   if (!request->ctx->engine[ring->id].unsaved) {
+   intel_lr_context_pin(request);
+   request->ctx->engine[ring->id].unsaved = true;
+   }
+   }
+
if (dev_priv->guc.execbuf_client)
i915_guc_submit(dev_priv->guc.execbuf_client, request);
else
@@ -958,12 +961,6 @@ void intel_execlists_retire_requests(struct 
intel_engine_cs *ring)
spin_unlock_irq(&ring->execlist_lock);

list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
-   struct intel_context *ctx = req->ctx;
-   struct drm_i915_gem_object *ctx_obj =
-   ctx->engine[ring->id].state;
-
-   if (ctx_obj && (ctx != ring->default_context))
-   intel_lr_context_unpin(req);
list_del(&req->execlist_link);
i915_gem_request_unreference(req);
}
@@ -1058,21 +1055,41 @@ reset_pin_count:
return ret;
   }

-void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
+static void intel_lr_context_unpin_no_req(struct intel_engine_cs *ring,
+   struct intel_context *ctx)
   {
-   struct intel_engine_cs *ring = rq->ring;
-   st

[Intel-gfx] [PATCH] drm/i915: Change context lifecycle

2015-11-25 Thread Nick Hoath
Use the first retired request on a new context to unpin
the old context. This ensures that the hw context remains
bound until it has been written back to by the GPU.
Now that the context is pinned until later in the request/context
lifecycle, it no longer needs to be pinned from context_queue to
retire_requests.

v2: Moved the new pin to cover GuC submission (Alex Dai)
Moved the new unpin to request_retire to fix coverage leak
v3: Added switch to default context if freeing a still pinned
context just in case the hw was actually still using it
v4: Unwrapped context unpin to allow calling without a request
v5: Only create a switch to idle context if the ring doesn't
already have a request pending on it (Alex Dai)
Rename unsaved to dirty to avoid double negatives (Dave Gordon)
Changed _no_req postfix to __ prefix for consistency (Dave Gordon)
Split out per engine cleanup from context_free as it
was getting unwieldy
Corrected locking (Dave Gordon)

Signed-off-by: Nick Hoath 
Issue: VIZ-4277
Cc: Daniel Vetter 
Cc: David Gordon 
Cc: Chris Wilson 
Cc: Alex Dai 
---
 drivers/gpu/drm/i915/i915_drv.h  |   1 +
 drivers/gpu/drm/i915/i915_gem.c  |   3 +
 drivers/gpu/drm/i915/intel_lrc.c | 124 +++
 drivers/gpu/drm/i915/intel_lrc.h |   1 +
 4 files changed, 105 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d5cf30b..e82717a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -889,6 +889,7 @@ struct intel_context {
struct {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
+   bool dirty;
int pin_count;
} engine[I915_NUM_RINGS];
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e955499..3829bc1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1354,6 +1354,9 @@ static void i915_gem_request_retire(struct 
drm_i915_gem_request *request)
 {
trace_i915_gem_request_retire(request);
 
+   if (i915.enable_execlists)
+   intel_lr_context_complete_check(request);
+
/* We know the GPU must have read the request to have
 * sent us the seqno + interrupt, so use the position
 * of tail of the request to update the last known position
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 06180dc..03d5bca 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -566,9 +566,6 @@ static int execlists_context_queue(struct 
drm_i915_gem_request *request)
struct drm_i915_gem_request *cursor;
int num_elements = 0;
 
-   if (request->ctx != ring->default_context)
-   intel_lr_context_pin(request);
-
i915_gem_request_reference(request);
 
spin_lock_irq(&ring->execlist_lock);
@@ -732,6 +729,13 @@ intel_logical_ring_advance_and_submit(struct 
drm_i915_gem_request *request)
if (intel_ring_stopped(ring))
return;
 
+   if (request->ctx != ring->default_context) {
+   if (!request->ctx->engine[ring->id].dirty) {
+   intel_lr_context_pin(request);
+   request->ctx->engine[ring->id].dirty = true;
+   }
+   }
+
if (dev_priv->guc.execbuf_client)
i915_guc_submit(dev_priv->guc.execbuf_client, request);
else
@@ -958,12 +962,6 @@ void intel_execlists_retire_requests(struct 
intel_engine_cs *ring)
spin_unlock_irq(&ring->execlist_lock);
 
list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
-   struct intel_context *ctx = req->ctx;
-   struct drm_i915_gem_object *ctx_obj =
-   ctx->engine[ring->id].state;
-
-   if (ctx_obj && (ctx != ring->default_context))
-   intel_lr_context_unpin(req);
list_del(&req->execlist_link);
i915_gem_request_unreference(req);
}
@@ -1058,21 +1056,39 @@ reset_pin_count:
return ret;
 }
 
-void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
+static void __intel_lr_context_unpin(struct intel_engine_cs *ring,
+   struct intel_context *ctx)
 {
-   struct intel_engine_cs *ring = rq->ring;
-   struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
-   struct intel_ringbuffer *ringbuf = rq->ringbuf;
-
+   struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
+   struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
if (ctx_obj) {
WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
-   if (--rq->ctx->engine[ring->id

Re: [Intel-gfx] [PATCH v2] drm/i915/guc: Clean up locks in GuC

2015-11-26 Thread Nick Hoath

On 25/11/2015 19:29, Dai, Yu wrote:

From: Alex Dai 

When GuC Work Queue is full, driver will wait GuC for avaliable

available

space by delaying 1ms. The wait needs to be out of spinlockirq /
unlock. Otherwise, lockup happens because jiffies won't be updated
dur to irq is disabled. The unnecessary locks has been cleared.

  duebeing  have

dev->struct_mutex is used instead where needed.

Issue is found in igt/gem_close_race.

v2: Clean up wq_lock too
v1: Clean up host2guc lock as well

Signed-off-by: Alex Dai 
---
  drivers/gpu/drm/i915/i915_debugfs.c| 12 +--
  drivers/gpu/drm/i915/i915_guc_submission.c | 32 +++---
  drivers/gpu/drm/i915/intel_guc.h   |  4 
  3 files changed, 13 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index a728ff1..d6b7817 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2473,15 +2473,15 @@ static int i915_guc_info(struct seq_file *m, void *data)
if (!HAS_GUC_SCHED(dev_priv->dev))
return 0;

+   if (mutex_lock_interruptible(&dev->struct_mutex))
+   return 0;
+
/* Take a local copy of the GuC data, so we can dump it at leisure */
-   spin_lock(&dev_priv->guc.host2guc_lock);
guc = dev_priv->guc;
-   if (guc.execbuf_client) {
-   spin_lock(&guc.execbuf_client->wq_lock);
+   if (guc.execbuf_client)
client = *guc.execbuf_client;
-   spin_unlock(&guc.execbuf_client->wq_lock);
-   }
-   spin_unlock(&dev_priv->guc.host2guc_lock);
+
+   mutex_unlock(&dev->struct_mutex);

seq_printf(m, "GuC total action count: %llu\n", guc.action_count);
seq_printf(m, "GuC action failure count: %u\n", guc.action_fail);
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c 
b/drivers/gpu/drm/i915/i915_guc_submission.c
index ed9f100..97996e5 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -86,7 +86,6 @@ static int host2guc_action(struct intel_guc *guc, u32 *data, 
u32 len)
return -EINVAL;

intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-   spin_lock(&dev_priv->guc.host2guc_lock);

dev_priv->guc.action_count += 1;
dev_priv->guc.action_cmd = data[0];
@@ -119,7 +118,6 @@ static int host2guc_action(struct intel_guc *guc, u32 
*data, u32 len)
}
dev_priv->guc.action_status = status;

-   spin_unlock(&dev_priv->guc.host2guc_lock);
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);

return ret;
@@ -249,6 +247,7 @@ static int guc_ring_doorbell(struct i915_guc_client *gc)
}

kunmap_atomic(base);
+

Unnecessary whitespace churn

return ret;
  }

@@ -292,16 +291,12 @@ static uint32_t select_doorbell_cacheline(struct 
intel_guc *guc)
const uint32_t cacheline_size = cache_line_size();
uint32_t offset;

-   spin_lock(&guc->host2guc_lock);
-
/* Doorbell uses a single cache line within a page */
offset = offset_in_page(guc->db_cacheline);

/* Moving to next cache line to reduce contention */
guc->db_cacheline += cacheline_size;

-   spin_unlock(&guc->host2guc_lock);
-
DRM_DEBUG_DRIVER("selected doorbell cacheline 0x%x, next 0x%x, linesize 
%u\n",
offset, guc->db_cacheline, cacheline_size);

@@ -322,13 +317,11 @@ static uint16_t assign_doorbell(struct intel_guc *guc, 
uint32_t priority)
const uint16_t end = start + half;
uint16_t id;

-   spin_lock(&guc->host2guc_lock);
id = find_next_zero_bit(guc->doorbell_bitmap, end, start);
if (id == end)
id = GUC_INVALID_DOORBELL_ID;
else
bitmap_set(guc->doorbell_bitmap, id, 1);
-   spin_unlock(&guc->host2guc_lock);

DRM_DEBUG_DRIVER("assigned %s priority doorbell id 0x%x\n",
hi_pri ? "high" : "normal", id);
@@ -338,9 +331,7 @@ static uint16_t assign_doorbell(struct intel_guc *guc, 
uint32_t priority)

  static void release_doorbell(struct intel_guc *guc, uint16_t id)
  {
-   spin_lock(&guc->host2guc_lock);
bitmap_clear(guc->doorbell_bitmap, id, 1);
-   spin_unlock(&guc->host2guc_lock);
  }

  /*
@@ -487,16 +478,13 @@ static int guc_get_workqueue_space(struct i915_guc_client 
*gc, u32 *offset)
struct guc_process_desc *desc;
void *base;
u32 size = sizeof(struct guc_wq_item);
-   int ret = 0, timeout_counter = 200;
+   int ret = -ETIMEDOUT, timeout_counter = 200;

base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
desc = base + gc->proc_desc_offset;

while (timeout_counter-- > 0) {
-   ret = wait_for_atomic(CIRC_SPACE(gc->wq_tail, de

Re: [Intel-gfx] [PATCH] drm/i915: Change context lifecycle

2015-11-26 Thread Nick Hoath

On 26/11/2015 08:48, Daniel Vetter wrote:

On Wed, Nov 25, 2015 at 05:02:44PM +0200, Mika Kuoppala wrote:

Nick Hoath  writes:


Use the first retired request on a new context to unpin
the old context. This ensures that the hw context remains
bound until it has been written back to by the GPU.
Now that the context is pinned until later in the request/context
lifecycle, it no longer needs to be pinned from context_queue to
retire_requests.

v2: Moved the new pin to cover GuC submission (Alex Dai)
 Moved the new unpin to request_retire to fix coverage leak
v3: Added switch to default context if freeing a still pinned
 context just in case the hw was actually still using it
v4: Unwrapped context unpin to allow calling without a request
v5: Only create a switch to idle context if the ring doesn't
 already have a request pending on it (Alex Dai)
 Rename unsaved to dirty to avoid double negatives (Dave Gordon)
 Changed _no_req postfix to __ prefix for consistency (Dave Gordon)
 Split out per engine cleanup from context_free as it
 was getting unwieldy
 Corrected locking (Dave Gordon)

Signed-off-by: Nick Hoath 
Issue: VIZ-4277
Cc: Daniel Vetter 
Cc: David Gordon 
Cc: Chris Wilson 
Cc: Alex Dai 
---
  drivers/gpu/drm/i915/i915_drv.h  |   1 +
  drivers/gpu/drm/i915/i915_gem.c  |   3 +
  drivers/gpu/drm/i915/intel_lrc.c | 124 +++
  drivers/gpu/drm/i915/intel_lrc.h |   1 +
  4 files changed, 105 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d5cf30b..e82717a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -889,6 +889,7 @@ struct intel_context {
struct {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
+   bool dirty;
int pin_count;
} engine[I915_NUM_RINGS];

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e955499..3829bc1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1354,6 +1354,9 @@ static void i915_gem_request_retire(struct 
drm_i915_gem_request *request)
  {
trace_i915_gem_request_retire(request);

+   if (i915.enable_execlists)
+   intel_lr_context_complete_check(request);
+
/* We know the GPU must have read the request to have
 * sent us the seqno + interrupt, so use the position
 * of tail of the request to update the last known position
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 06180dc..03d5bca 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -566,9 +566,6 @@ static int execlists_context_queue(struct 
drm_i915_gem_request *request)
struct drm_i915_gem_request *cursor;
int num_elements = 0;

-   if (request->ctx != ring->default_context)
-   intel_lr_context_pin(request);
-
i915_gem_request_reference(request);

spin_lock_irq(&ring->execlist_lock);
@@ -732,6 +729,13 @@ intel_logical_ring_advance_and_submit(struct 
drm_i915_gem_request *request)
if (intel_ring_stopped(ring))
return;

+   if (request->ctx != ring->default_context) {
+   if (!request->ctx->engine[ring->id].dirty) {
+   intel_lr_context_pin(request);
+   request->ctx->engine[ring->id].dirty = true;
+   }
+   }
+
if (dev_priv->guc.execbuf_client)
i915_guc_submit(dev_priv->guc.execbuf_client, request);
else
@@ -958,12 +962,6 @@ void intel_execlists_retire_requests(struct 
intel_engine_cs *ring)
spin_unlock_irq(&ring->execlist_lock);

list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
-   struct intel_context *ctx = req->ctx;
-   struct drm_i915_gem_object *ctx_obj =
-   ctx->engine[ring->id].state;
-
-   if (ctx_obj && (ctx != ring->default_context))
-   intel_lr_context_unpin(req);
list_del(&req->execlist_link);
i915_gem_request_unreference(req);
}
@@ -1058,21 +1056,39 @@ reset_pin_count:
return ret;
  }

-void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
+static void __intel_lr_context_unpin(struct intel_engine_cs *ring,
+   struct intel_context *ctx)
  {
-   struct intel_engine_cs *ring = rq->ring;
-   struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
-   struct intel_ringbuffer *ringbuf = rq->ringbuf;
-
+   struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
+   struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
if (ctx_obj) 

[Intel-gfx] [PATCH v6] drm/i915: Change context lifecycle

2015-11-26 Thread Nick Hoath
Use the first retired request on a new context to unpin
the old context. This ensures that the hw context remains
bound until it has been written back to by the GPU.
Now that the context is pinned until later in the request/context
lifecycle, it no longer needs to be pinned from context_queue to
retire_requests.
This fixes an issue with GuC submission where the GPU might not
have finished writing back the context before it is unpinned. This
results in a GPU hang.

v2: Moved the new pin to cover GuC submission (Alex Dai)
Moved the new unpin to request_retire to fix coverage leak
v3: Added switch to default context if freeing a still pinned
context just in case the hw was actually still using it
v4: Unwrapped context unpin to allow calling without a request
v5: Only create a switch to idle context if the ring doesn't
already have a request pending on it (Alex Dai)
Rename unsaved to dirty to avoid double negatives (Dave Gordon)
Changed _no_req postfix to __ prefix for consistency (Dave Gordon)
Split out per engine cleanup from context_free as it
was getting unwieldy
Corrected locking (Dave Gordon)
v6: Removed some bikeshedding (Mika Kuoppala)
Added explanation of the GuC hang that this fixes (Daniel Vetter)

Signed-off-by: Nick Hoath 
Issue: VIZ-4277
Cc: Daniel Vetter 
Cc: David Gordon 
Cc: Chris Wilson 
Cc: Alex Dai 
Cc: Mika Kuoppala 
---
 drivers/gpu/drm/i915/i915_drv.h  |   1 +
 drivers/gpu/drm/i915/i915_gem.c  |   3 +
 drivers/gpu/drm/i915/intel_lrc.c | 122 +++
 drivers/gpu/drm/i915/intel_lrc.h |   1 +
 4 files changed, 104 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d5cf30b..e82717a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -889,6 +889,7 @@ struct intel_context {
struct {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
+   bool dirty;
int pin_count;
} engine[I915_NUM_RINGS];
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e955499..3829bc1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1354,6 +1354,9 @@ static void i915_gem_request_retire(struct 
drm_i915_gem_request *request)
 {
trace_i915_gem_request_retire(request);
 
+   if (i915.enable_execlists)
+   intel_lr_context_complete_check(request);
+
/* We know the GPU must have read the request to have
 * sent us the seqno + interrupt, so use the position
 * of tail of the request to update the last known position
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 06180dc..dbe64ff 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -566,9 +566,6 @@ static int execlists_context_queue(struct 
drm_i915_gem_request *request)
struct drm_i915_gem_request *cursor;
int num_elements = 0;
 
-   if (request->ctx != ring->default_context)
-   intel_lr_context_pin(request);
-
i915_gem_request_reference(request);
 
spin_lock_irq(&ring->execlist_lock);
@@ -732,6 +729,13 @@ intel_logical_ring_advance_and_submit(struct 
drm_i915_gem_request *request)
if (intel_ring_stopped(ring))
return;
 
+   if (request->ctx != ring->default_context) {
+   if (!request->ctx->engine[ring->id].dirty) {
+   intel_lr_context_pin(request);
+   request->ctx->engine[ring->id].dirty = true;
+   }
+   }
+
if (dev_priv->guc.execbuf_client)
i915_guc_submit(dev_priv->guc.execbuf_client, request);
else
@@ -958,12 +962,6 @@ void intel_execlists_retire_requests(struct 
intel_engine_cs *ring)
spin_unlock_irq(&ring->execlist_lock);
 
list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
-   struct intel_context *ctx = req->ctx;
-   struct drm_i915_gem_object *ctx_obj =
-   ctx->engine[ring->id].state;
-
-   if (ctx_obj && (ctx != ring->default_context))
-   intel_lr_context_unpin(req);
list_del(&req->execlist_link);
i915_gem_request_unreference(req);
}
@@ -1058,21 +1056,39 @@ reset_pin_count:
return ret;
 }
 
-void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
+static void __intel_lr_context_unpin(struct intel_engine_cs *ring,
+   struct intel_context *ctx)
 {
-   struct intel_engine_cs *ring = rq->ring;
-   struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
-   struct intel_ringbuffer *ringbuf = rq->ringbuf;
-
+   struct drm_i915_gem_objec

[Intel-gfx] [PATCH] drm/i915: Extend LRC pinning to cover GPU context writeback

2015-12-01 Thread Nick Hoath
Use the first retired request on a new context to unpin
the old context. This ensures that the hw context remains
bound until it has been written back to by the GPU.
Now that the context is pinned until later in the request/context
lifecycle, it no longer needs to be pinned from context_queue to
retire_requests.
This fixes an issue with GuC submission where the GPU might not
have finished writing back the context before it is unpinned. This
results in a GPU hang.

v2: Moved the new pin to cover GuC submission (Alex Dai)
Moved the new unpin to request_retire to fix coverage leak
v3: Added switch to default context if freeing a still pinned
context just in case the hw was actually still using it
v4: Unwrapped context unpin to allow calling without a request
v5: Only create a switch to idle context if the ring doesn't
already have a request pending on it (Alex Dai)
Rename unsaved to dirty to avoid double negatives (Dave Gordon)
Changed _no_req postfix to __ prefix for consistency (Dave Gordon)
Split out per engine cleanup from context_free as it
was getting unwieldy
Corrected locking (Dave Gordon)
v6: Removed some bikeshedding (Mika Kuoppala)
Added explanation of the GuC hang that this fixes (Daniel Vetter)
v7: Removed extra per request pinning from ring reset code (Alex Dai)
Added forced ring unpin/clean in error case in context free (Alex Dai)

Signed-off-by: Nick Hoath 
Issue: VIZ-4277
Cc: Daniel Vetter 
Cc: David Gordon 
Cc: Chris Wilson 
Cc: Alex Dai 
Cc: Mika Kuoppala 
---
 drivers/gpu/drm/i915/i915_drv.h  |   1 +
 drivers/gpu/drm/i915/i915_gem.c  |   7 +-
 drivers/gpu/drm/i915/intel_lrc.c | 136 ---
 drivers/gpu/drm/i915/intel_lrc.h |   1 +
 4 files changed, 118 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d5cf30b..e82717a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -889,6 +889,7 @@ struct intel_context {
struct {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
+   bool dirty;
int pin_count;
} engine[I915_NUM_RINGS];
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e955499..69e9d96 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1354,6 +1354,9 @@ static void i915_gem_request_retire(struct 
drm_i915_gem_request *request)
 {
trace_i915_gem_request_retire(request);
 
+   if (i915.enable_execlists)
+   intel_lr_context_complete_check(request);
+
/* We know the GPU must have read the request to have
 * sent us the seqno + interrupt, so use the position
 * of tail of the request to update the last known position
@@ -2765,10 +2768,6 @@ static void i915_gem_reset_ring_cleanup(struct 
drm_i915_private *dev_priv,
struct drm_i915_gem_request,
execlist_link);
list_del(&submit_req->execlist_link);
-
-   if (submit_req->ctx != ring->default_context)
-   intel_lr_context_unpin(submit_req);
-
i915_gem_request_unreference(submit_req);
}
spin_unlock_irq(&ring->execlist_lock);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 06180dc..b4d9c8f 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -566,9 +566,6 @@ static int execlists_context_queue(struct 
drm_i915_gem_request *request)
struct drm_i915_gem_request *cursor;
int num_elements = 0;
 
-   if (request->ctx != ring->default_context)
-   intel_lr_context_pin(request);
-
i915_gem_request_reference(request);
 
spin_lock_irq(&ring->execlist_lock);
@@ -732,6 +729,13 @@ intel_logical_ring_advance_and_submit(struct 
drm_i915_gem_request *request)
if (intel_ring_stopped(ring))
return;
 
+   if (request->ctx != ring->default_context) {
+   if (!request->ctx->engine[ring->id].dirty) {
+   intel_lr_context_pin(request);
+   request->ctx->engine[ring->id].dirty = true;
+   }
+   }
+
if (dev_priv->guc.execbuf_client)
i915_guc_submit(dev_priv->guc.execbuf_client, request);
else
@@ -958,12 +962,6 @@ void intel_execlists_retire_requests(struct 
intel_engine_cs *ring)
spin_unlock_irq(&ring->execlist_lock);
 
list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
-   struct intel_context *ctx = req->ctx;
-   struct drm_i915_gem_object *ctx_obj =
-   ctx->e

[Intel-gfx] [PATCH v8] drm/i915: Extend LRC pinning to cover GPU context writeback

2015-12-07 Thread Nick Hoath
Use the first retired request on a new context to unpin
the old context. This ensures that the hw context remains
bound until it has been written back to by the GPU.
Now that the context is pinned until later in the request/context
lifecycle, it no longer needs to be pinned from context_queue to
retire_requests.
This fixes an issue with GuC submission where the GPU might not
have finished writing back the context before it is unpinned. This
results in a GPU hang.

v2: Moved the new pin to cover GuC submission (Alex Dai)
Moved the new unpin to request_retire to fix coverage leak
v3: Added switch to default context if freeing a still pinned
context just in case the hw was actually still using it
v4: Unwrapped context unpin to allow calling without a request
v5: Only create a switch to idle context if the ring doesn't
already have a request pending on it (Alex Dai)
Rename unsaved to dirty to avoid double negatives (Dave Gordon)
Changed _no_req postfix to __ prefix for consistency (Dave Gordon)
Split out per engine cleanup from context_free as it
was getting unwieldy
Corrected locking (Dave Gordon)
v6: Removed some bikeshedding (Mika Kuoppala)
Added explanation of the GuC hang that this fixes (Daniel Vetter)
v7: Removed extra per request pinning from ring reset code (Alex Dai)
Added forced ring unpin/clean in error case in context free (Alex Dai)
v8: Renamed lrc specific last_context to lrc_last_context as there
were some reset cases where the codepaths leaked (Mika Kuoppala)
NULL'd last_context in reset case - there was a pointer leak
if someone did reset->close context.
Signed-off-by: Nick Hoath 
Issue: VIZ-4277
Cc: Daniel Vetter 
Cc: David Gordon 
Cc: Chris Wilson 
Cc: Alex Dai 
Cc: Mika Kuoppala 
---
 drivers/gpu/drm/i915/i915_drv.h |   1 +
 drivers/gpu/drm/i915/i915_gem.c |   7 +-
 drivers/gpu/drm/i915/intel_lrc.c| 138 ++--
 drivers/gpu/drm/i915/intel_lrc.h|   1 +
 drivers/gpu/drm/i915/intel_ringbuffer.h |   1 +
 5 files changed, 121 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9ab3e25..a59ca13 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -884,6 +884,7 @@ struct intel_context {
struct {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
+   bool dirty;
int pin_count;
} engine[I915_NUM_RINGS];
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a6997a8..cd27ecc 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1362,6 +1362,9 @@ static void i915_gem_request_retire(struct 
drm_i915_gem_request *request)
 {
trace_i915_gem_request_retire(request);
 
+   if (i915.enable_execlists)
+   intel_lr_context_complete_check(request);
+
/* We know the GPU must have read the request to have
 * sent us the seqno + interrupt, so use the position
 * of tail of the request to update the last known position
@@ -2772,10 +2775,6 @@ static void i915_gem_reset_ring_cleanup(struct 
drm_i915_private *dev_priv,
struct drm_i915_gem_request,
execlist_link);
list_del(&submit_req->execlist_link);
-
-   if (submit_req->ctx != ring->default_context)
-   intel_lr_context_unpin(submit_req);
-
i915_gem_request_unreference(submit_req);
}
spin_unlock_irq(&ring->execlist_lock);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 4ebafab..f96fb51 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -571,9 +571,6 @@ static int execlists_context_queue(struct 
drm_i915_gem_request *request)
struct drm_i915_gem_request *cursor;
int num_elements = 0;
 
-   if (request->ctx != ring->default_context)
-   intel_lr_context_pin(request);
-
i915_gem_request_reference(request);
 
spin_lock_irq(&ring->execlist_lock);
@@ -737,6 +734,13 @@ intel_logical_ring_advance_and_submit(struct 
drm_i915_gem_request *request)
if (intel_ring_stopped(ring))
return;
 
+   if (request->ctx != ring->default_context) {
+   if (!request->ctx->engine[ring->id].dirty) {
+   intel_lr_context_pin(request);
+   request->ctx->engine[ring->id].dirty = true;
+   }
+   }
+
if (dev_priv->guc.execbuf_client)
i915_guc_submit(dev_priv->guc.execbuf_client, request);
else
@@ -963,12 +967,6 @@ void intel_execlists_retire_requests

[Intel-gfx] [PATCH] drm/i915: Fix context/engine cleanup order

2015-12-11 Thread Nick Hoath
Swap the order of context & engine cleanup, so that it is now
contexts, then engines.
This allows the context clean up code to do things like confirm
that ring->dev->struct_mutex is locked without a NULL pointer
dereference.
This came about as a result of the 'intel_ring_initialized() must
be simple and inline' patch now using ring->dev as an initialised
flag.
Rename the cleanup function to reflect what it actually does.
Also clean up some very annoying whitespace issues at the same time.

Signed-off-by: Nick Hoath 
Cc: Mika Kuoppala 
Cc: Daniel Vetter 
Cc: David Gordon 
Cc: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_dma.c |  4 ++--
 drivers/gpu/drm/i915/i915_drv.h |  2 +-
 drivers/gpu/drm/i915/i915_gem.c | 23 ---
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 84e2b20..a2857b0 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -449,7 +449,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
 
 cleanup_gem:
mutex_lock(&dev->struct_mutex);
-   i915_gem_cleanup_ringbuffer(dev);
+   i915_gem_cleanup_engines(dev);
i915_gem_context_fini(dev);
mutex_unlock(&dev->struct_mutex);
 cleanup_irq:
@@ -1188,8 +1188,8 @@ int i915_driver_unload(struct drm_device *dev)
 
intel_guc_ucode_fini(dev);
mutex_lock(&dev->struct_mutex);
-   i915_gem_cleanup_ringbuffer(dev);
i915_gem_context_fini(dev);
+   i915_gem_cleanup_engines(dev);
mutex_unlock(&dev->struct_mutex);
intel_fbc_cleanup_cfb(dev_priv);
i915_gem_cleanup_stolen(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5edd393..e317f88 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3016,7 +3016,7 @@ int i915_gem_init_rings(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
 int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice);
 void i915_gem_init_swizzling(struct drm_device *dev);
-void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
+void i915_gem_cleanup_engines(struct drm_device *dev);
 int __must_check i915_gpu_idle(struct drm_device *dev);
 int __must_check i915_gem_suspend(struct drm_device *dev);
 void __i915_add_request(struct drm_i915_gem_request *req,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8e2acde..04a22db 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4823,7 +4823,7 @@ i915_gem_init_hw(struct drm_device *dev)
 
ret = i915_gem_request_alloc(ring, ring->default_context, &req);
if (ret) {
-   i915_gem_cleanup_ringbuffer(dev);
+   i915_gem_cleanup_engines(dev);
goto out;
}
 
@@ -4836,7 +4836,7 @@ i915_gem_init_hw(struct drm_device *dev)
if (ret && ret != -EIO) {
DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret);
i915_gem_request_cancel(req);
-   i915_gem_cleanup_ringbuffer(dev);
+   i915_gem_cleanup_engines(dev);
goto out;
}
 
@@ -4844,7 +4844,7 @@ i915_gem_init_hw(struct drm_device *dev)
if (ret && ret != -EIO) {
DRM_ERROR("Context enable ring #%d failed %d\n", i, 
ret);
i915_gem_request_cancel(req);
-   i915_gem_cleanup_ringbuffer(dev);
+   i915_gem_cleanup_engines(dev);
goto out;
}
 
@@ -4919,7 +4919,7 @@ out_unlock:
 }
 
 void
-i915_gem_cleanup_ringbuffer(struct drm_device *dev)
+i915_gem_cleanup_engines(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *ring;
@@ -4928,13 +4928,14 @@ i915_gem_cleanup_ringbuffer(struct drm_device *dev)
for_each_ring(ring, dev_priv, i)
dev_priv->gt.cleanup_ring(ring);
 
-if (i915.enable_execlists)
-/*
- * Neither the BIOS, ourselves or any other kernel
- * expects the system to be in execlists mode on startup,
- * so we need to reset the GPU back to legacy mode.
- */
-intel_gpu_reset(dev);
+   if (i915.enable_execlists) {
+   /*
+* Neither the BIOS, ourselves or any other kernel
+* expects the system to be in execlists mode on startup,
+* so we need to reset the GPU back to legacy mode.
+*/
+   intel_gpu_reset(dev);
+   }
 }
 
 static void
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2] drm/i915: Fix context/engine cleanup order

2015-12-11 Thread Nick Hoath
Swap the order of context & engine cleanup, so that it is now
contexts, then engines.
This allows the context clean up code to do things like confirm
that ring->dev->struct_mutex is locked without a NULL pointer
dereference.
This came about as a result of the 'intel_ring_initialized() must
be simple and inline' patch now using ring->dev as an initialised
flag.
Rename the cleanup function to reflect what it actually does.
Also clean up some very annoying whitespace issues at the same time.

v2: Also make the fix in i915_load_modeset_init, not just
in i915_driver_unload (Chris Wilson)

Signed-off-by: Nick Hoath 
Reviewed-by: Chris Wilson 

Cc: Mika Kuoppala 
Cc: Daniel Vetter 
Cc: David Gordon 
Cc: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_dma.c |  4 ++--
 drivers/gpu/drm/i915/i915_drv.h |  2 +-
 drivers/gpu/drm/i915/i915_gem.c | 23 ---
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 84e2b20..4dad121 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -449,8 +449,8 @@ static int i915_load_modeset_init(struct drm_device *dev)
 
 cleanup_gem:
mutex_lock(&dev->struct_mutex);
-   i915_gem_cleanup_ringbuffer(dev);
i915_gem_context_fini(dev);
+   i915_gem_cleanup_engines(dev);
mutex_unlock(&dev->struct_mutex);
 cleanup_irq:
intel_guc_ucode_fini(dev);
@@ -1188,8 +1188,8 @@ int i915_driver_unload(struct drm_device *dev)
 
intel_guc_ucode_fini(dev);
mutex_lock(&dev->struct_mutex);
-   i915_gem_cleanup_ringbuffer(dev);
i915_gem_context_fini(dev);
+   i915_gem_cleanup_engines(dev);
mutex_unlock(&dev->struct_mutex);
intel_fbc_cleanup_cfb(dev_priv);
i915_gem_cleanup_stolen(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5edd393..e317f88 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3016,7 +3016,7 @@ int i915_gem_init_rings(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
 int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice);
 void i915_gem_init_swizzling(struct drm_device *dev);
-void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
+void i915_gem_cleanup_engines(struct drm_device *dev);
 int __must_check i915_gpu_idle(struct drm_device *dev);
 int __must_check i915_gem_suspend(struct drm_device *dev);
 void __i915_add_request(struct drm_i915_gem_request *req,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8e2acde..04a22db 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4823,7 +4823,7 @@ i915_gem_init_hw(struct drm_device *dev)
 
ret = i915_gem_request_alloc(ring, ring->default_context, &req);
if (ret) {
-   i915_gem_cleanup_ringbuffer(dev);
+   i915_gem_cleanup_engines(dev);
goto out;
}
 
@@ -4836,7 +4836,7 @@ i915_gem_init_hw(struct drm_device *dev)
if (ret && ret != -EIO) {
DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret);
i915_gem_request_cancel(req);
-   i915_gem_cleanup_ringbuffer(dev);
+   i915_gem_cleanup_engines(dev);
goto out;
}
 
@@ -4844,7 +4844,7 @@ i915_gem_init_hw(struct drm_device *dev)
if (ret && ret != -EIO) {
DRM_ERROR("Context enable ring #%d failed %d\n", i, 
ret);
i915_gem_request_cancel(req);
-   i915_gem_cleanup_ringbuffer(dev);
+   i915_gem_cleanup_engines(dev);
goto out;
}
 
@@ -4919,7 +4919,7 @@ out_unlock:
 }
 
 void
-i915_gem_cleanup_ringbuffer(struct drm_device *dev)
+i915_gem_cleanup_engines(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *ring;
@@ -4928,13 +4928,14 @@ i915_gem_cleanup_ringbuffer(struct drm_device *dev)
for_each_ring(ring, dev_priv, i)
dev_priv->gt.cleanup_ring(ring);
 
-if (i915.enable_execlists)
-/*
- * Neither the BIOS, ourselves or any other kernel
- * expects the system to be in execlists mode on startup,
- * so we need to reset the GPU back to legacy mode.
- */
-intel_gpu_reset(dev);
+   if (i915.enable_execlists) {
+   /*
+* Neither the BIOS, ourselves or any other kernel
+* expects the system to be in execlists mode on startup,
+* so we need to reset the GPU back to legacy m

[Intel-gfx] [PATCH] drm/i915: Fix context/engine cleanup order

2015-12-14 Thread Nick Hoath
Swap the order of context & engine cleanup, so that it is now
contexts, then engines.
This allows the context clean up code to do things like confirm
that ring->dev->struct_mutex is locked without a NULL pointer
dereference.
This came about as a result of the 'intel_ring_initialized() must
be simple and inline' patch now using ring->dev as an initialised
flag.
Rename the cleanup function to reflect what it actually does.
Also clean up some very annoying whitespace issues at the same time.
Previous code did a kunmap() on the wrong page, and didn't account for
the fact that the HWSP and the default context are the different offsets
within the same object.

v2: Also make the fix in i915_load_modeset_init, not just
in i915_driver_unload (Chris Wilson)
v3: Folded in Dave Gordon's fix for HWSP kunmap issues.

Signed-off-by: Nick Hoath 
Reviewed-by: Chris Wilson 

Cc: Mika Kuoppala 
Cc: Daniel Vetter 
Cc: David Gordon 
Cc: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_dma.c |  4 +--
 drivers/gpu/drm/i915/i915_drv.h |  2 +-
 drivers/gpu/drm/i915/i915_gem.c | 23 ++---
 drivers/gpu/drm/i915/i915_gem_context.c |  9 --
 drivers/gpu/drm/i915/intel_lrc.c| 57 +++--
 5 files changed, 62 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 84e2b20..4dad121 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -449,8 +449,8 @@ static int i915_load_modeset_init(struct drm_device *dev)
 
 cleanup_gem:
mutex_lock(&dev->struct_mutex);
-   i915_gem_cleanup_ringbuffer(dev);
i915_gem_context_fini(dev);
+   i915_gem_cleanup_engines(dev);
mutex_unlock(&dev->struct_mutex);
 cleanup_irq:
intel_guc_ucode_fini(dev);
@@ -1188,8 +1188,8 @@ int i915_driver_unload(struct drm_device *dev)
 
intel_guc_ucode_fini(dev);
mutex_lock(&dev->struct_mutex);
-   i915_gem_cleanup_ringbuffer(dev);
i915_gem_context_fini(dev);
+   i915_gem_cleanup_engines(dev);
mutex_unlock(&dev->struct_mutex);
intel_fbc_cleanup_cfb(dev_priv);
i915_gem_cleanup_stolen(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5edd393..e317f88 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3016,7 +3016,7 @@ int i915_gem_init_rings(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
 int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice);
 void i915_gem_init_swizzling(struct drm_device *dev);
-void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
+void i915_gem_cleanup_engines(struct drm_device *dev);
 int __must_check i915_gpu_idle(struct drm_device *dev);
 int __must_check i915_gem_suspend(struct drm_device *dev);
 void __i915_add_request(struct drm_i915_gem_request *req,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8e2acde..04a22db 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4823,7 +4823,7 @@ i915_gem_init_hw(struct drm_device *dev)
 
ret = i915_gem_request_alloc(ring, ring->default_context, &req);
if (ret) {
-   i915_gem_cleanup_ringbuffer(dev);
+   i915_gem_cleanup_engines(dev);
goto out;
}
 
@@ -4836,7 +4836,7 @@ i915_gem_init_hw(struct drm_device *dev)
if (ret && ret != -EIO) {
DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret);
i915_gem_request_cancel(req);
-   i915_gem_cleanup_ringbuffer(dev);
+   i915_gem_cleanup_engines(dev);
goto out;
}
 
@@ -4844,7 +4844,7 @@ i915_gem_init_hw(struct drm_device *dev)
if (ret && ret != -EIO) {
DRM_ERROR("Context enable ring #%d failed %d\n", i, 
ret);
i915_gem_request_cancel(req);
-   i915_gem_cleanup_ringbuffer(dev);
+   i915_gem_cleanup_engines(dev);
goto out;
}
 
@@ -4919,7 +4919,7 @@ out_unlock:
 }
 
 void
-i915_gem_cleanup_ringbuffer(struct drm_device *dev)
+i915_gem_cleanup_engines(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *ring;
@@ -4928,13 +4928,14 @@ i915_gem_cleanup_ringbuffer(struct drm_device *dev)
for_each_ring(ring, dev_priv, i)
dev_priv->gt.cleanup_ring(ring);
 
-if (i915.enable_execlists)
-/*
- * Neither the BIOS, ourselves or any other kernel
- * expects the system to be in execlists mode on startup,
-   

Re: [Intel-gfx] [PATCH 1/4] drm/i915: teardown default context in reverse, update comments

2015-12-17 Thread Nick Hoath

Reviewed-by: Nick Hoath 

On 16/12/2015 18:36, Gordon, David S wrote:

We set up engines in forwards order, so some things (notably the
default context) are "owned" by engine 0 (the render engine, aka "RCS").
For symmetry and to make sure such shared objects don't disappear too
early, we should generally run teardown loops in the reverse order,
so that engine 0 is processed last.

This patch changes i915_gem_context_fini() to do that, and clarifies the
comments in i915_gem_context_{init,fini}() about the refcounting of the
default {struct intel_)context: the refcount is just ONE, no matter how
many rings exist or are active, and this refcount is nominally ascribed
to the render ring (RCS), which is set up first and now torn down last.

Signed-off-by: Dave Gordon 
---
  drivers/gpu/drm/i915/i915_gem_context.c | 21 +
  1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 900ffd0..e143ea5 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -391,7 +391,13 @@ int i915_gem_context_init(struct drm_device *dev)
for (i = 0; i < I915_NUM_RINGS; i++) {
struct intel_engine_cs *ring = &dev_priv->ring[i];

-   /* NB: RCS will hold a ref for all rings */
+   /*
+* Although each engine has a pointer to the global default
+* context, they don't contribute to the refcount on the
+* context. We consider that RCS (which is set up first and
+* torn down last) holds this reference on behalf of all the
+* other engines
+*/
ring->default_context = ctx;
}

@@ -431,14 +437,21 @@ void i915_gem_context_fini(struct drm_device *dev)
i915_gem_object_ggtt_unpin(dctx->legacy_hw_ctx.rcs_state);
}

-   for (i = 0; i < I915_NUM_RINGS; i++) {
+   for (i = I915_NUM_RINGS; --i >= 0;) {
struct intel_engine_cs *ring = &dev_priv->ring[i];

-   if (ring->last_context)
+   if (ring->last_context) {
i915_gem_context_unreference(ring->last_context);
+   ring->last_context = NULL;
+   }

+   /*
+* These default_context pointers don't contribute to the
+* refcount on the context. We consider that RCS holds its
+* reference on behalf of all the other engines, so there's
+* just a single unreference() call below.
+*/
ring->default_context = NULL;
-   ring->last_context = NULL;
}

i915_gem_context_unreference(dctx);



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/4] drm/i915: mark the global default (intel_)context as such

2015-12-17 Thread Nick Hoath

On 16/12/2015 19:30, Chris Wilson wrote:

On Wed, Dec 16, 2015 at 07:22:52PM +, Dave Gordon wrote:

On 16/12/15 18:57, Chris Wilson wrote:

On Wed, Dec 16, 2015 at 06:36:49PM +, Dave Gordon wrote:

Some of the LRC-specific context-destruction code has to special-case
the global default context, because the HWSP is part of that context. At
present it deduces it indirectly by checking for the backpointer from
the engine to the context, but that's an unsafe assumption if the setup
and teardown code is reorganised. (It could also test !ctx->file_priv,
but again that's a detail that might be subject to change).

So here we explicitly flag the default context at the point of creation,
and then reorganise the code in intel_lr_context_free() not to rely on
the ring->default_pointer (still) being set up; to iterate over engines
in reverse (as this is teardown code); and to reduce the nesting level
so it's easier to read.

Signed-off-by: Dave Gordon 


#define intel_context_is_global(ctx) ((ctx)->file_priv == NULL)


The last sentence of the first paragraph of the commit message above
notes that we *could* use that as a test, but I don't regard it as a
safe test, in either direction. That is, it could give a false
negative if we someday associate some (internal) fd with the default
context, or (more likely) a false positive if the file association
were broken and the pointer nulled in an earlier stage of the
teardown of a non-global (user-created) context.

int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
 struct drm_i915_gem_context_destroy *args = data;
 struct drm_i915_file_private *file_priv = file->driver_priv;
 struct intel_context *ctx;
 int ret;

 if (args->ctx_id == DEFAULT_CONTEXT_HANDLE)
 return -ENOENT;

 ret = i915_mutex_lock_interruptible(dev);
 if (ret)
 return ret;

 ctx = i915_gem_context_get(file_priv, args->ctx_id);
 if (IS_ERR(ctx)) {
 mutex_unlock(&dev->struct_mutex);
 return PTR_ERR(ctx);
 }

 idr_remove(&ctx->file_priv->context_idr, ctx->user_handle);
 i915_gem_context_unreference(ctx);
 mutex_unlock(&dev->struct_mutex);

 DRM_DEBUG_DRIVER("HW context %d destroyed\n", args->ctx_id);
 return 0;
}

At present, i915_gem_context_destroy_ioctl() above removes the
context from the file's list-of-contexts but DOESN'T clear the
ctx->file_priv, which means there's a somewhat inconsistent (but
transient) state during which a soon-to-be-destroyed context links
to a file, but the file doesn't have a link back. It probably
doesn't matter, because the code holds the mutex across the two
operations ...


And that the ctx was created to belong to the file still holds true.


... unless of course the context's refcount isn't 1 at this point,
in which case I suppose someone else *might* go from the context to
the file and then be mystified as to why the context isn't on the
list ...

... and if we changed the code above, then file_priv would *always*
be NULL by the time the destructor was called!

So it's surely safer to have a flag that explicitly says "I'm the
global default context" than to guess based on some other contingent
property.


No, we have a flag that says this context was created belonging to a
file, with the corollary that only one context doesn't belong to any
file.

Using pointers like this to provide 'magic' secondary state information
just adds to the fragility of the driver.
So:
Reviewed-by: Nick Hoath 
to the original patch.

-Chris



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 3/4] drm/i915: tidy up initialisation failure paths (legacy)

2015-12-17 Thread Nick Hoath

On 16/12/2015 18:36, Gordon, David S wrote:

1. Fix intel_cleanup_ring_buffer() to handle the error cleanup
case where the ringbuffer has been allocated but map-and-pin
failed. Unpin it iff it's previously been mapped-and-pinned.

2. Fix the error path in intel_init_ring_buffer(), which already
called intel_destroy_ringbuffer_obj(), but failed to free the
actual ringbuffer structure. Calling intel_ringbuffer_free()
instead does both in one go.

3. With the above change, intel_destroy_ringbuffer_obj() is only
called in one place (intel_ringbuffer_free()), so flatten it
into that function.

4. move low-level register accesses from intel_cleanup_ring_buffer()
(which calls intel_stop_ring_buffer(ring) which calls stop_ring())
down into stop_ring() itself), which is already doing low-level
register accesses. Then, intel_cleanup_ring_buffer() no longer
needs 'dev_priv'.


Reviewed-by: Nick Hoath 


Signed-off-by: Dave Gordon 
---
  drivers/gpu/drm/i915/intel_ringbuffer.c | 47 +++--
  1 file changed, 22 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index eefce9a..2853754 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -549,6 +549,8 @@ static bool stop_ring(struct intel_engine_cs *ring)
I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
}

+   WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
+
return (I915_READ_HEAD(ring) & HEAD_ADDR) == 0;
  }

@@ -2057,12 +2059,6 @@ int intel_pin_and_map_ringbuffer_obj(struct drm_device 
*dev,
return 0;
  }

-static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
-{
-   drm_gem_object_unreference(&ringbuf->obj->base);
-   ringbuf->obj = NULL;
-}
-
  static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
  struct intel_ringbuffer *ringbuf)
  {
@@ -2125,11 +2121,14 @@ intel_engine_create_ringbuffer(struct intel_engine_cs 
*engine, int size)
  }

  void
-intel_ringbuffer_free(struct intel_ringbuffer *ring)
+intel_ringbuffer_free(struct intel_ringbuffer *ringbuf)
  {
-   intel_destroy_ringbuffer_obj(ring);
-   list_del(&ring->link);
-   kfree(ring);
+   if (ringbuf->obj) {
+   drm_gem_object_unreference(&ringbuf->obj->base);
+   ringbuf->obj = NULL;
+   }
+   list_del(&ringbuf->link);
+   kfree(ringbuf);
  }

  static int intel_init_ring_buffer(struct drm_device *dev,
@@ -2157,6 +2156,13 @@ static int intel_init_ring_buffer(struct drm_device *dev,
}
ring->buffer = ringbuf;

+   ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
+   if (ret) {
+   DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
+   ring->name, ret);
+   goto error;
+   }
+
if (I915_NEED_GFX_HWS(dev)) {
ret = init_status_page(ring);
if (ret)
@@ -2168,14 +2174,6 @@ static int intel_init_ring_buffer(struct drm_device *dev,
goto error;
}

-   ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
-   if (ret) {
-   DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
-   ring->name, ret);
-   intel_destroy_ringbuffer_obj(ringbuf);
-   goto error;
-   }
-
ret = i915_cmd_parser_init_ring(ring);
if (ret)
goto error;
@@ -2189,19 +2187,18 @@ error:

  void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
  {
-   struct drm_i915_private *dev_priv;
+   struct intel_ringbuffer *ringbuf;

if (!intel_ring_initialized(ring))
return;

-   dev_priv = to_i915(ring->dev);
-
-   if (ring->buffer) {
+   ringbuf = ring->buffer;
+   if (ringbuf) {
intel_stop_ring_buffer(ring);
-   WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & 
MODE_IDLE) == 0);

-   intel_unpin_ringbuffer_obj(ring->buffer);
-   intel_ringbuffer_free(ring->buffer);
+   if (ringbuf->virtual_start)
+   intel_unpin_ringbuffer_obj(ringbuf);
+   intel_ringbuffer_free(ringbuf);
ring->buffer = NULL;
}




___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 4/4] drm/i915: tidy up initialisation failure paths (GEM & LRC)

2015-12-17 Thread Nick Hoath

On 16/12/2015 18:36, Gordon, David S wrote:

1. add call to i915_gem_context_fini() to deallocate the default
context(s) if the call to init_rings() fails, so that we don't
leak the context in that situation.

2. remove useless code in intel_logical_ring_cleanup(), presumably
copypasted from legacy ringbuffer version at creation.



Reviewed-by: Nick Hoath 


Signed-off-by: Dave Gordon 
---
  drivers/gpu/drm/i915/i915_gem.c  |  5 -
  drivers/gpu/drm/i915/intel_lrc.c | 10 ++
  2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 66b1705..15f8989 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4890,8 +4890,11 @@ int i915_gem_init(struct drm_device *dev)
goto out_unlock;

ret = dev_priv->gt.init_rings(dev);
-   if (ret)
+   if (ret) {
+   i915_gem_context_fini(dev);
+   /* XXX: anything else to be undone here? */
goto out_unlock;
+   }

ret = i915_gem_init_hw(dev);
if (ret == -EIO) {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 23f90b2..cdb65eb 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1887,17 +1887,11 @@ static int gen8_init_rcs_context(struct 
drm_i915_gem_request *req)
   */
  void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
  {
-   struct drm_i915_private *dev_priv;
-
if (!intel_ring_initialized(ring))
return;

-   dev_priv = ring->dev->dev_private;
-
-   if (ring->buffer) {
-   intel_logical_ring_stop(ring);
-   WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
-   }
+   /* should not be set in LRC mode */
+   WARN_ON(ring->buffer);

if (ring->cleanup)
ring->cleanup(ring);



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v4] drm/i915: Fix context/engine cleanup order

2015-12-18 Thread Nick Hoath
Swap the order of context & engine cleanup, so that it is now
contexts, then engines.
This allows the context clean up code to do things like confirm
that ring->dev->struct_mutex is locked without a NULL pointer
dereference.
This came about as a result of the 'intel_ring_initialized() must
be simple and inline' patch now using ring->dev as an initialised
flag.
Rename the cleanup function to reflect what it actually does.
Also clean up some very annoying whitespace issues at the same time.
Previous code did a kunmap() on the wrong page, and didn't account for
the fact that the HWSP and the default context are the different offsets
within the same object.

v2: Also make the fix in i915_load_modeset_init, not just
in i915_driver_unload (Chris Wilson)
v3: Folded in Dave Gordon's fix for HWSP kunmap issues.
v4: Rebase over Dave Gordon's various cleanups

Signed-off-by: Nick Hoath 

Cc: Mika Kuoppala 
Cc: Daniel Vetter 
Cc: David Gordon 
Cc: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_dma.c  |  4 +--
 drivers/gpu/drm/i915/i915_drv.h  |  2 +-
 drivers/gpu/drm/i915/i915_gem.c  | 23 +
 drivers/gpu/drm/i915/intel_lrc.c | 55 
 4 files changed, 54 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 84e2b20..4dad121 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -449,8 +449,8 @@ static int i915_load_modeset_init(struct drm_device *dev)
 
 cleanup_gem:
mutex_lock(&dev->struct_mutex);
-   i915_gem_cleanup_ringbuffer(dev);
i915_gem_context_fini(dev);
+   i915_gem_cleanup_engines(dev);
mutex_unlock(&dev->struct_mutex);
 cleanup_irq:
intel_guc_ucode_fini(dev);
@@ -1188,8 +1188,8 @@ int i915_driver_unload(struct drm_device *dev)
 
intel_guc_ucode_fini(dev);
mutex_lock(&dev->struct_mutex);
-   i915_gem_cleanup_ringbuffer(dev);
i915_gem_context_fini(dev);
+   i915_gem_cleanup_engines(dev);
mutex_unlock(&dev->struct_mutex);
intel_fbc_cleanup_cfb(dev_priv);
i915_gem_cleanup_stolen(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4c24666..27bb401 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3018,7 +3018,7 @@ int i915_gem_init_rings(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
 int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice);
 void i915_gem_init_swizzling(struct drm_device *dev);
-void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
+void i915_gem_cleanup_engines(struct drm_device *dev);
 int __must_check i915_gpu_idle(struct drm_device *dev);
 int __must_check i915_gem_suspend(struct drm_device *dev);
 void __i915_add_request(struct drm_i915_gem_request *req,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 702c720..517676a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4823,7 +4823,7 @@ i915_gem_init_hw(struct drm_device *dev)
 
ret = i915_gem_request_alloc(ring, ring->default_context, &req);
if (ret) {
-   i915_gem_cleanup_ringbuffer(dev);
+   i915_gem_cleanup_engines(dev);
goto out;
}
 
@@ -4836,7 +4836,7 @@ i915_gem_init_hw(struct drm_device *dev)
if (ret && ret != -EIO) {
DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret);
i915_gem_request_cancel(req);
-   i915_gem_cleanup_ringbuffer(dev);
+   i915_gem_cleanup_engines(dev);
goto out;
}
 
@@ -4844,7 +4844,7 @@ i915_gem_init_hw(struct drm_device *dev)
if (ret && ret != -EIO) {
DRM_ERROR("Context enable ring #%d failed %d\n", i, 
ret);
i915_gem_request_cancel(req);
-   i915_gem_cleanup_ringbuffer(dev);
+   i915_gem_cleanup_engines(dev);
goto out;
}
 
@@ -4922,7 +4922,7 @@ out_unlock:
 }
 
 void
-i915_gem_cleanup_ringbuffer(struct drm_device *dev)
+i915_gem_cleanup_engines(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *ring;
@@ -4931,13 +4931,14 @@ i915_gem_cleanup_ringbuffer(struct drm_device *dev)
for_each_ring(ring, dev_priv, i)
dev_priv->gt.cleanup_ring(ring);
 
-if (i915.enable_execlists)
-/*
- * Neither the BIOS, ourselves or any other kernel
- * expects the system to be in execlists mode on startup,
- * so we need to reset

[Intel-gfx] [PATCH v9] drm/i915: Extend LRC pinning to cover GPU context writeback

2015-12-18 Thread Nick Hoath
Use the first retired request on a new context to unpin
the old context. This ensures that the hw context remains
bound until it has been written back to by the GPU.
Now that the context is pinned until later in the request/context
lifecycle, it no longer needs to be pinned from context_queue to
retire_requests.
This fixes an issue with GuC submission where the GPU might not
have finished writing back the context before it is unpinned. This
results in a GPU hang.

v2: Moved the new pin to cover GuC submission (Alex Dai)
Moved the new unpin to request_retire to fix coverage leak
v3: Added switch to default context if freeing a still pinned
context just in case the hw was actually still using it
v4: Unwrapped context unpin to allow calling without a request
v5: Only create a switch to idle context if the ring doesn't
already have a request pending on it (Alex Dai)
Rename unsaved to dirty to avoid double negatives (Dave Gordon)
Changed _no_req postfix to __ prefix for consistency (Dave Gordon)
Split out per engine cleanup from context_free as it
was getting unwieldy
Corrected locking (Dave Gordon)
v6: Removed some bikeshedding (Mika Kuoppala)
Added explanation of the GuC hang that this fixes (Daniel Vetter)
v7: Removed extra per request pinning from ring reset code (Alex Dai)
Added forced ring unpin/clean in error case in context free (Alex Dai)
v8: Renamed lrc specific last_context to lrc_last_context as there
were some reset cases where the codepaths leaked (Mika Kuoppala)
NULL'd last_context in reset case - there was a pointer leak
if someone did reset->close context.
v9: Rebase over "Fix context/engine cleanup order"

Signed-off-by: Nick Hoath 
Issue: VIZ-4277
Cc: Daniel Vetter 
Cc: David Gordon 
Cc: Chris Wilson 
Cc: Alex Dai 
Cc: Mika Kuoppala 
---
 drivers/gpu/drm/i915/i915_drv.h |   1 +
 drivers/gpu/drm/i915/i915_gem.c |   7 +-
 drivers/gpu/drm/i915/intel_lrc.c| 146 ++--
 drivers/gpu/drm/i915/intel_lrc.h|   1 +
 drivers/gpu/drm/i915/intel_ringbuffer.h |   1 +
 5 files changed, 124 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 27bb401..2f6f411 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -887,6 +887,7 @@ struct intel_context {
struct {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
+   bool dirty;
int pin_count;
} engine[I915_NUM_RINGS];
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 517676a..69e904f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1362,6 +1362,9 @@ static void i915_gem_request_retire(struct 
drm_i915_gem_request *request)
 {
trace_i915_gem_request_retire(request);
 
+   if (i915.enable_execlists)
+   intel_lr_context_complete_check(request);
+
/* We know the GPU must have read the request to have
 * sent us the seqno + interrupt, so use the position
 * of tail of the request to update the last known position
@@ -2772,10 +2775,6 @@ static void i915_gem_reset_ring_cleanup(struct 
drm_i915_private *dev_priv,
struct drm_i915_gem_request,
execlist_link);
list_del(&submit_req->execlist_link);
-
-   if (submit_req->ctx != ring->default_context)
-   intel_lr_context_unpin(submit_req);
-
i915_gem_request_unreference(submit_req);
}
spin_unlock_irq(&ring->execlist_lock);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d542a8d..01063f7 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -570,9 +570,6 @@ static int execlists_context_queue(struct 
drm_i915_gem_request *request)
struct drm_i915_gem_request *cursor;
int num_elements = 0;
 
-   if (request->ctx != ring->default_context)
-   intel_lr_context_pin(request);
-
i915_gem_request_reference(request);
 
spin_lock_irq(&ring->execlist_lock);
@@ -736,6 +733,13 @@ intel_logical_ring_advance_and_submit(struct 
drm_i915_gem_request *request)
if (intel_ring_stopped(ring))
return;
 
+   if (request->ctx != ring->default_context) {
+   if (!request->ctx->engine[ring->id].dirty) {
+   intel_lr_context_pin(request);
+   request->ctx->engine[ring->id].dirty = true;
+   }
+   }
+
if (dev_priv->guc.execbuf_client)
i915_guc_submit(dev_priv->guc.execbuf_client, request);
else
@

Re: [Intel-gfx] [PATCH v3] drm/i915: resize the GuC WOPCM for rc6

2016-05-06 Thread Nick Hoath

On 06/05/2016 08:01, Peter Antoine wrote:

On Thu, 5 May 2016, Dave Gordon wrote:


On 05/05/2016 15:02, Antoine, Peter wrote:

The attached version still does not explain that the WOPCM_TOP is to
tell the GuC not to use that space.


That's NOT what WOPCM_TOP means. The GuC is allowed to use the space
up to the value stored in the GUC_WOPCM_SIZE register (as the comment
above the #define says). Architecturally, this is allowed to be any
value greater than
(16K+sizeof internal SRAM (64, 128, or 256K)) and less than or equal
to GUC_WOPCM_TOP (which is a platform-independent constant), so we
normally choose the maximm allowed. Howver on BXT, we need to leave
some space at the top for the
RC6 image, hence the logic (and comments!) in guc_wopcm_size().

Yes, the firmware can use upto GUC_WOPCM_TOP and to leave the rest alone.


The extra information does not aid anybody as the information is used
internally within the GuC.

It may help the next person who has to figure out what's gone wrong on
some future chip that needs more than 64K for RC6!


You hid a if statement in a function (making the code harder to read and
more prone to error). Where maybe a slightly clearer comment was required.

And this patch has been held up two weeks just for a better comment.

Peter.


.Dave.


And what if the next reserved space is not for RC6?




But, I have not actual objection to the patch.

Peter.





Tested-by: Nick Hoath 
Reviewed-by: Nick Hoath 


--
Peter Antoine (Android Graphics Driver Software Engineer)
-
Intel Corporation (UK) Limited
Registered No. 1134945 (England)
Registered Office: Pipers Way, Swindon SN3 1RJ
VAT No: 860 2173 47
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v3] drm/i915: resize the GuC WOPCM for rc6

2016-05-06 Thread Nick Hoath

On 05/05/2016 16:04, Dave Gordon wrote:

On 05/05/2016 15:02, Antoine, Peter wrote:

The attached version still does not explain that the WOPCM_TOP is to tell the 
GuC not to use that space.


That's NOT what WOPCM_TOP means. The GuC is allowed to use the space up
to the value stored in the GUC_WOPCM_SIZE register (as the comment above
the #define says). Architecturally, this is allowed to be any value
greater than (16K+sizeof internal SRAM (64, 128, or 256K)) and less than
or equal to GUC_WOPCM_TOP (which is a platform-independent constant), so
we normally choose the maximm allowed. Howver on BXT, we need to leave
some space at the top for the RC6 image, hence the logic (and comments!)
in guc_wopcm_size().


The extra information does not aid anybody as the information is used 
internally within the GuC.

It may help the next person who has to figure out what's gone wrong on
some future chip that needs more than 64K for RC6!

.Dave.


But, I have not actual objection to the patch.

Peter.



Unfortunately Dave's patch locked my test system on bootup, so I've t-b 
& r-b'd Peter's.

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 0/2] drm/i915/guc: GuC firmware loading updates

2016-05-06 Thread Nick Hoath
Updates to Skylake firmware filename & support for loading
Broxton firmware.

Nick Hoath (1):
  drm/i915/guc: Add Broxton GuC firmware loading support

Tom O'Rourke (1):
  drm/i915/guc: Use major_minor version for filename

 drivers/gpu/drm/i915/intel_guc_loader.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/2] drm/i915/guc: Use major_minor version for filename

2016-05-06 Thread Nick Hoath
From: Tom O'Rourke 

Load guc firmware from file with major_minor number
in filename instead of using symolic link with only
major number.

This change is so that new firmwares can only be used
with a kernel change. This in case there is a regression
with a new firmware, it won't be used by default without
some testing.

Issue: VIZ-7713
Signed-off-by: Tom O'Rourke 
Signed-off-by: Nick Hoath 
Acked-by: Jani Nikula 
---
 drivers/gpu/drm/i915/intel_guc_loader.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c 
b/drivers/gpu/drm/i915/intel_guc_loader.c
index 58dbe30..46b01d7 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -59,7 +59,7 @@
  *
  */
 
-#define I915_SKL_GUC_UCODE "i915/skl_guc_ver6.bin"
+#define I915_SKL_GUC_UCODE "i915/skl_guc_ver6_1.bin"
 MODULE_FIRMWARE(I915_SKL_GUC_UCODE);
 
 /* User-friendly representation of an enum */
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/2] drm/i915/guc: Add Broxton GuC firmware loading support

2016-05-06 Thread Nick Hoath
Issue: VIZ-7772
Signed-off-by: Nick Hoath 
---
 drivers/gpu/drm/i915/intel_guc_loader.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c 
b/drivers/gpu/drm/i915/intel_guc_loader.c
index 46b01d7..d122e74 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -62,6 +62,9 @@
 #define I915_SKL_GUC_UCODE "i915/skl_guc_ver6_1.bin"
 MODULE_FIRMWARE(I915_SKL_GUC_UCODE);
 
+#define I915_BXT_GUC_UCODE "i915/bxt_guc_ver8_7.bin"
+MODULE_FIRMWARE(I915_BXT_GUC_UCODE);
+
 /* User-friendly representation of an enum */
 const char *intel_guc_fw_status_repr(enum intel_guc_fw_status status)
 {
@@ -644,6 +647,10 @@ void intel_guc_ucode_init(struct drm_device *dev)
fw_path = I915_SKL_GUC_UCODE;
guc_fw->guc_fw_major_wanted = 6;
guc_fw->guc_fw_minor_wanted = 1;
+   } else if (IS_BROXTON(dev)) {
+   fw_path = I915_BXT_GUC_UCODE;
+   guc_fw->guc_fw_major_wanted = 8;
+   guc_fw->guc_fw_minor_wanted = 7;
} else {
i915.enable_guc_submission = false;
fw_path = "";   /* unknown device */
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] ✗ Fi.CI.BAT: warning for drm/i915/guc: GuC firmware loading updates

2016-05-12 Thread Nick Hoath

On 09/05/2016 08:53, Patchwork wrote:

== Series Details ==

Series: drm/i915/guc: GuC firmware loading updates
URL   : https://patchwork.freedesktop.org/series/6818/
State : warning

== Summary ==

Series 6818v1 drm/i915/guc: GuC firmware loading updates
http://patchwork.freedesktop.org/api/1.0/series/6818/revisions/1/mbox/

Test core_auth:
 Subgroup basic-auth:
 pass   -> DMESG-WARN (bdw-ultra)

Already captured in:
https://bugs.freedesktop.org/show_bug.cgi?id=92235


Test pm_rpm:
 Subgroup basic-pci-d3-state:
 pass   -> SKIP   (hsw-brixbox)


Looks like a HW failure:
Test requirement not met in function enable_one_screen, file pm_rpm.c:329:
Test requirement: enable_one_screen_with_type(data, SCREEN_TYPE_ANY)


bdw-nuci7-2  total:219  pass:206  dwarn:0   dfail:0   fail:0   skip:13
bdw-ultratotal:219  pass:192  dwarn:1   dfail:0   fail:0   skip:26
bsw-nuc-2total:218  pass:174  dwarn:0   dfail:0   fail:2   skip:42
byt-nuc  total:218  pass:174  dwarn:0   dfail:0   fail:3   skip:41
hsw-brixbox  total:219  pass:192  dwarn:0   dfail:0   fail:0   skip:27
hsw-gt2  total:219  pass:197  dwarn:0   dfail:0   fail:1   skip:21
ivb-t430stotal:219  pass:188  dwarn:0   dfail:0   fail:0   skip:31
skl-i7k-2total:219  pass:191  dwarn:0   dfail:0   fail:0   skip:28
skl-nuci5total:219  pass:207  dwarn:0   dfail:0   fail:0   skip:12
snb-dellxps  total:37   pass:27   dwarn:0   dfail:0   fail:0   skip:9
snb-x220ttotal:219  pass:176  dwarn:0   dfail:0   fail:1   skip:42

Results at /archive/results/CI_IGT_test/Patchwork_2146/

447f2438e5ee3e9bb7d5cbe88fc44cdff74cc165 drm-intel-nightly: 
2016y-05m-08d-16h-21m-00s UTC integration manifest
b8d1e93 drm/i915/guc: Add Broxton GuC firmware loading support
9b4a018 drm/i915/guc: Use major_minor version for filename



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v3] drm/i915: resize the GuC WOPCM for rc6

2016-05-16 Thread Nick Hoath

On 06/05/2016 13:18, Gordon, David S wrote:

On 06/05/16 10:37, Nick Hoath wrote:

On 05/05/2016 16:04, Dave Gordon wrote:

On 05/05/2016 15:02, Antoine, Peter wrote:

The attached version still does not explain that the WOPCM_TOP is to
tell the GuC not to use that space.


That's NOT what WOPCM_TOP means. The GuC is allowed to use the space up
to the value stored in the GUC_WOPCM_SIZE register (as the comment above
the #define says). Architecturally, this is allowed to be any value
greater than (16K+sizeof internal SRAM (64, 128, or 256K)) and less than
or equal to GUC_WOPCM_TOP (which is a platform-independent constant), so
we normally choose the maximm allowed. Howver on BXT, we need to leave
some space at the top for the RC6 image, hence the logic (and comments!)
in guc_wopcm_size().


The extra information does not aid anybody as the information is used
internally within the GuC.

It may help the next person who has to figure out what's gone wrong on
some future chip that needs more than 64K for RC6!

.Dave.


But, I have not actual objection to the patch.

Peter.




Unfortunately Dave's patch locked my test system on bootup, so I've t-b
& r-b'd Peter's.


They're equivalent, unless your firmware happens to be between 458752
and 491520 bytes in size (in which case you have a problem anyway).

To check, I've run both versions, with debug printing the value chosen
(on SKL) and the value that would have been chosen on BXT, and they're
identical (and both work). So I think your build had some other problem
unrelated to the specific patch.

I've no problem with using Peter's patch for now, but it's not just a
matter of the comments; there's also the other use(s) of
GUC_WOP_(TOP,SIZE_VALUE), with ad-hoc additions or subtractions. So it
still needs fixing properly.

.Dave.

After a rebuild & a retest, Dave's patch works fine. Therefore for 
"drm/i915/bxt: reserve space for RC6 in the the GuC WOPCM":

Tested-by: Nick Hoath 
Reviewed-by: Nick Hoath 


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v6 3/7] drm/i915/guc: add enable_guc_loading parameter

2016-05-23 Thread Nick Hoath

On 20/05/2016 11:42, Tvrtko Ursulin wrote:

From: Dave Gordon 

Split the function of "enable_guc_submission" into two separate
options.  The new one ("enable_guc_loading") controls only the
*fetching and loading* of the GuC firmware image. The existing
one is redefined to control only the *use* of the GuC for batch
submission once the firmware is loaded.

In addition, the degree of control has been refined from a simple
bool to an integer key, allowing several options:
-1 (default) whatever the platform default is
  0  DISABLE  don't load/use the GuC
  1  BEST EFFORT  try to load/use the GuC, fallback if not available
  2  REQUIRE  must load/use the GuC, else leave the GPU wedged

The new platform default (as coded here) will be to attempt to
load the GuC iff the device has a GuC that requires firmware,
but not yet to use it for submission. A later patch will change
to enable it if appropriate.

v4:
 Changed some error-message levels, mostly ERROR->INFO, per
 review comments by Tvrtko Ursulin.

v5:
 Dropped one more error message, disabled GuC submission on
 hypothetical firmware-free devices [Tvrtko Ursulin].

v6:
 Logging tidy by Tvrtko Ursulin:
  * Do not log falling back to execlists when wedging the GPU.
  * Do not log fw load errors when load was disabled by user.
  * Pass down some error code from fw load for log message to
make more sense.

Signed-off-by: Dave Gordon 
Cc: Tvrtko Ursulin 
Reviewed-by: Tvrtko Ursulin  (v5)
Signed-off-by: Tvrtko Ursulin 

Reviewed-by: Nick Hoath  (v6)

---
  drivers/gpu/drm/i915/i915_gem.c|   5 +-
  drivers/gpu/drm/i915/i915_guc_submission.c |   4 +-
  drivers/gpu/drm/i915/i915_params.c |  14 +++-
  drivers/gpu/drm/i915/i915_params.h |   3 +-
  drivers/gpu/drm/i915/intel_guc_loader.c| 123 +
  5 files changed, 89 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 88dce5482f2f..1a3a07eca0d0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4868,11 +4868,8 @@ i915_gem_init_hw(struct drm_device *dev)
/* We can't enable contexts until all firmware is loaded */
if (HAS_GUC(dev)) {
ret = intel_guc_setup(dev);
-   if (ret) {
-   DRM_ERROR("Failed to initialize GuC, error %d\n", ret);
-   ret = -EIO;
+   if (ret)
goto out;
-   }
}

/*
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c 
b/drivers/gpu/drm/i915/i915_guc_submission.c
index 169242a8adff..916cd6778cf3 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -969,7 +969,7 @@ int intel_guc_suspend(struct drm_device *dev)
struct intel_context *ctx;
u32 data[3];

-   if (!i915.enable_guc_submission)
+   if (guc->guc_fw.guc_fw_load_status != GUC_FIRMWARE_SUCCESS)
return 0;

ctx = dev_priv->kernel_context;
@@ -995,7 +995,7 @@ int intel_guc_resume(struct drm_device *dev)
struct intel_context *ctx;
u32 data[3];

-   if (!i915.enable_guc_submission)
+   if (guc->guc_fw.guc_fw_load_status != GUC_FIRMWARE_SUCCESS)
return 0;

ctx = dev_priv->kernel_context;
diff --git a/drivers/gpu/drm/i915/i915_params.c 
b/drivers/gpu/drm/i915/i915_params.c
index cd74fb8e9387..21a323c01cdb 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -53,7 +53,8 @@ struct i915_params i915 __read_mostly = {
.verbose_state_checks = 1,
.nuclear_pageflip = 0,
.edp_vswing = 0,
-   .enable_guc_submission = false,
+   .enable_guc_loading = -1,
+   .enable_guc_submission = 0,
.guc_log_level = -1,
.enable_dp_mst = true,
.inject_load_failure = 0,
@@ -193,8 +194,15 @@ MODULE_PARM_DESC(edp_vswing,
 "(0=use value from vbt [default], 1=low power swing(200mV),"
 "2=default swing(400mV))");

-module_param_named_unsafe(enable_guc_submission, i915.enable_guc_submission, 
bool, 0400);
-MODULE_PARM_DESC(enable_guc_submission, "Enable GuC submission 
(default:false)");
+module_param_named_unsafe(enable_guc_loading, i915.enable_guc_loading, int, 
0400);
+MODULE_PARM_DESC(enable_guc_loading,
+   "Enable GuC firmware loading "
+   "(-1=auto [default], 0=never, 1=if available, 2=required)");
+
+module_param_named_unsafe(enable_guc_submission, i915.enable_guc_submission, 
int, 0400);
+MODULE_PARM_DESC(enable_guc_submission,
+   "Enable GuC submission "
+   "(-1=auto, 0=never [default], 1=if available, 2=required)");

  module_param_named(guc_log_level, i915.guc_log

[Intel-gfx] [PATCH 0/4] lrc lifecycle cleanups

2015-10-06 Thread Nick Hoath
These changes are a result of the requests made in VIZ-4277.
Make the lrc path more like the legacy submission path.
Attach the CPU mappings to vma (un)bind, so that the shrinker
also cleans those up.
Pin the CPU mappings while context is busy (pending bbs), so
that the mappings aren't released/made continuously as this is
an expensive process.

Nick Hoath (4):
  drm/i915: Unify execlist and legacy request life-cycles
  drm/i915: Improve dynamic management/eviction of lrc backing objects
  drm/i915: Add the CPU mapping of the hw context to the pinned items.
  drm/i915: Only update ringbuf address when necessary

 drivers/gpu/drm/i915/i915_debugfs.c |  14 ++--
 drivers/gpu/drm/i915/i915_drv.h |  14 +++-
 drivers/gpu/drm/i915/i915_gem.c |  70 +
 drivers/gpu/drm/i915/i915_gem_gtt.c |   8 ++
 drivers/gpu/drm/i915/i915_irq.c |  81 +---
 drivers/gpu/drm/i915/intel_lrc.c| 131 ++--
 drivers/gpu/drm/i915/intel_lrc.h|   2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c |  71 +
 drivers/gpu/drm/i915/intel_ringbuffer.h |   4 -
 9 files changed, 250 insertions(+), 145 deletions(-)

-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/4] drm/i915: Improve dynamic management/eviction of lrc backing objects

2015-10-06 Thread Nick Hoath
Shovel all context related objects through the active queue and obj
management.

- Added callback in vma_(un)bind to add CPU (un)mapping at same time
  if desired
- Inserted LRC hw context & ringbuf to vma active list

Issue: VIZ-4277
Signed-off-by: Nick Hoath 
---
 drivers/gpu/drm/i915/i915_drv.h |  4 ++
 drivers/gpu/drm/i915/i915_gem.c |  3 ++
 drivers/gpu/drm/i915/i915_gem_gtt.c |  8 
 drivers/gpu/drm/i915/intel_lrc.c| 28 +++--
 drivers/gpu/drm/i915/intel_ringbuffer.c | 71 ++---
 drivers/gpu/drm/i915/intel_ringbuffer.h |  3 --
 6 files changed, 79 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3d217f9..d660ee3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2169,6 +2169,10 @@ struct drm_i915_gem_object {
struct work_struct *work;
} userptr;
};
+
+   /** Support for automatic CPU side mapping of object */
+   int (*mmap)(struct drm_i915_gem_object *obj, bool unmap);
+   void *mappable;
 };
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fc82171..56e0e00 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3262,6 +3262,9 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool 
wait)
if (vma->pin_count)
return -EBUSY;
 
+   if (obj->mmap)
+   obj->mmap(obj, true);
+
BUG_ON(obj->pages == NULL);
 
if (wait) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 620d57e..786ec4b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3495,6 +3495,14 @@ int i915_vma_bind(struct i915_vma *vma, enum 
i915_cache_level cache_level,
 
vma->bound |= bind_flags;
 
+   if (vma->obj->mmap) {
+   ret = vma->obj->mmap(vma->obj, false);
+   if (ret) {
+   i915_vma_unbind(vma);
+   return ret;
+   }
+   }
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e8f5b6c..b807928 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -723,6 +723,18 @@ intel_logical_ring_advance_and_submit(struct 
drm_i915_gem_request *request)
 
intel_logical_ring_advance(request->ringbuf);
 
+   /* Push the hw context on to the active list */
+   i915_vma_move_to_active(
+   i915_gem_obj_to_ggtt(
+   request->ctx->engine[ring->id].state),
+   request);
+
+   /* Push the ringbuf on to the active list */
+   i915_vma_move_to_active(
+   i915_gem_obj_to_ggtt(
+   request->ctx->engine[ring->id].ringbuf->obj),
+   request);
+
request->tail = request->ringbuf->tail;
 
if (intel_ring_stopped(ring))
@@ -1006,10 +1018,15 @@ static int intel_lr_context_do_pin(struct 
intel_engine_cs *ring,
if (ret)
return ret;
 
-   ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
+   ret = i915_gem_obj_ggtt_pin(ringbuf->obj, PAGE_SIZE,
+   PIN_MAPPABLE);
if (ret)
goto unpin_ctx_obj;
 
+   ret = i915_gem_object_set_to_gtt_domain(ringbuf->obj, true);
+   if (ret)
+   goto unpin_rb_obj;
+
ctx_obj->dirty = true;
 
/* Invalidate GuC TLB. */
@@ -1018,6 +1035,8 @@ static int intel_lr_context_do_pin(struct intel_engine_cs 
*ring,
 
return ret;
 
+unpin_rb_obj:
+   i915_gem_object_ggtt_unpin(ringbuf->obj);
 unpin_ctx_obj:
i915_gem_object_ggtt_unpin(ctx_obj);
 
@@ -1052,7 +1071,7 @@ void intel_lr_context_unpin(struct drm_i915_gem_request 
*rq)
if (ctx_obj) {
WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
if (--rq->ctx->engine[ring->id].pin_count == 0) {
-   intel_unpin_ringbuffer_obj(ringbuf);
+   i915_gem_object_ggtt_unpin(ringbuf->obj);
i915_gem_object_ggtt_unpin(ctx_obj);
}
}
@@ -2369,7 +2388,7 @@ void intel_lr_context_free(struct intel_context *ctx)
struct intel_engine_cs *ring = ringbuf->ring;
 
if (ctx == ring->default_context) {
-   intel_unpin_ringbuffer_obj(ringbuf);
+   i915_gem_object_ggtt_unpin(ringbuf->obj);
i915_gem_object_ggtt_unpin(ctx_obj);
}

[Intel-gfx] [PATCH 3/4] drm/i915: Add the CPU mapping of the hw context to the pinned items.

2015-10-06 Thread Nick Hoath
Pin the hw ctx mapping so that it is not mapped/unmapped per bb
when doing GuC submission.

Issue: VIZ-4277
Cc: David Gordon 
Signed-off-by: Nick Hoath 
---
 drivers/gpu/drm/i915/i915_debugfs.c | 14 --
 drivers/gpu/drm/i915/i915_drv.h |  4 ++-
 drivers/gpu/drm/i915/intel_lrc.c| 56 +++--
 3 files changed, 50 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 3f2a7a7..e68cf5fa 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1970,10 +1970,9 @@ static int i915_context_status(struct seq_file *m, void 
*unused)
 
 static void i915_dump_lrc_obj(struct seq_file *m,
  struct intel_engine_cs *ring,
- struct drm_i915_gem_object *ctx_obj)
+ struct drm_i915_gem_object *ctx_obj,
+ uint32_t *reg_state)
 {
-   struct page *page;
-   uint32_t *reg_state;
int j;
unsigned long ggtt_offset = 0;
 
@@ -1996,17 +1995,13 @@ static void i915_dump_lrc_obj(struct seq_file *m,
return;
}
 
-   page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
-   if (!WARN_ON(page == NULL)) {
-   reg_state = kmap_atomic(page);
-
+   if (!WARN_ON(reg_state == NULL)) {
for (j = 0; j < 0x600 / sizeof(u32) / 4; j += 4) {
seq_printf(m, "\t[0x%08lx] 0x%08x 0x%08x 0x%08x 
0x%08x\n",
   ggtt_offset + 4096 + (j * 4),
   reg_state[j], reg_state[j + 1],
   reg_state[j + 2], reg_state[j + 3]);
}
-   kunmap_atomic(reg_state);
}
 
seq_putc(m, '\n');
@@ -2034,7 +2029,8 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
for_each_ring(ring, dev_priv, i) {
if (ring->default_context != ctx)
i915_dump_lrc_obj(m, ring,
- ctx->engine[i].state);
+ ctx->engine[i].state,
+ ctx->engine[i].reg_state);
}
}
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d660ee3..b49fd12 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -879,8 +879,10 @@ struct intel_context {
} legacy_hw_ctx;
 
/* Execlists */
-   struct {
+   struct intel_context_engine {
struct drm_i915_gem_object *state;
+   uint32_t *reg_state;
+   struct page *page;
struct intel_ringbuffer *ringbuf;
int pin_count;
} engine[I915_NUM_RINGS];
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b807928..55a4de56 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -360,16 +360,13 @@ static int execlists_update_context(struct 
drm_i915_gem_request *rq)
struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
struct drm_i915_gem_object *rb_obj = rq->ringbuf->obj;
-   struct page *page;
-   uint32_t *reg_state;
+   uint32_t *reg_state = rq->ctx->engine[ring->id].reg_state;
 
BUG_ON(!ctx_obj);
+   WARN_ON(!reg_state);
WARN_ON(!i915_gem_obj_is_pinned(ctx_obj));
WARN_ON(!i915_gem_obj_is_pinned(rb_obj));
 
-   page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
-   reg_state = kmap_atomic(page);
-
reg_state[CTX_RING_TAIL+1] = rq->tail;
reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj);
 
@@ -385,8 +382,6 @@ static int execlists_update_context(struct 
drm_i915_gem_request *rq)
ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
}
 
-   kunmap_atomic(reg_state);
-
return 0;
 }
 
@@ -1004,7 +999,31 @@ int logical_ring_flush_all_caches(struct 
drm_i915_gem_request *req)
return 0;
 }
 
-static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
+static int intel_mmap_hw_context(struct drm_i915_gem_object *obj,
+   bool unmap)
+{
+   int ret = 0;
+   struct intel_context_engine *ice =
+   (struct intel_context_engine *)obj->mappable;
+   struct page *page;
+   uint32_t *reg_state;
+
+   if (unmap) {
+   kunmap(ice->page);
+   ice->reg_state = NULL;
+   ice->page = NULL;
+   } else {
+   page = i915_gem_object_get_page(obj, LRC_STATE_PN);
+   reg_state = kmap(page);
+   ice->reg_state = reg_stat

[Intel-gfx] [PATCH 4/4] drm/i915: Only update ringbuf address when necessary

2015-10-06 Thread Nick Hoath
We now only need to update the address of the ringbuf object in the
hw context when it is pinned, and the hw context is first CPU mapped

Issue: VIZ-4277
Cc: David Gordon 
Signed-off-by: Nick Hoath 
---
 drivers/gpu/drm/i915/intel_lrc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 55a4de56..92a0ece 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -368,7 +368,6 @@ static int execlists_update_context(struct 
drm_i915_gem_request *rq)
WARN_ON(!i915_gem_obj_is_pinned(rb_obj));
 
reg_state[CTX_RING_TAIL+1] = rq->tail;
-   reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj);
 
if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
/* True 32b PPGTT with dynamic page allocation: update PDP
@@ -1046,6 +1045,9 @@ static int intel_lr_context_do_pin(
if (ret)
goto unpin_rb_obj;
 
+   ctx->engine[ring->id].reg_state[CTX_RING_BUFFER_START+1] =
+   i915_gem_obj_ggtt_offset(ringbuf->obj);
+
ctx_obj->dirty = true;
 
/* Invalidate GuC TLB. */
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/4] drm/i915: Unify execlist and legacy request life-cycles

2015-10-06 Thread Nick Hoath
There is a desire to simplify the i915 driver by reducing the number of
different code paths introduced by the LRC / execlists support.  As the
execlists request is now part of the gem request it is possible and
desirable to unify the request life-cycles for execlist and legacy
requests.

Added a context complete flag to a request which gets set during the
context switch interrupt.

Added a function i915_gem_request_retireable().  A request is considered
retireable if its seqno passed (i.e. the request has completed) and either
it was never submitted to the ELSP or its context completed.  This ensures
that context save is carried out before the last request for a context is
considered retireable.  retire_requests_ring() now uses
i915_gem_request_retireable() rather than request_complete() when deciding
which requests to retire. Requests that were not waiting for a context
switch interrupt (either as a result of being merged into a following
request or by being a legacy request) will be considered retireable as
soon as their seqno has passed.

Removed the extra request reference held for the execlist request.

Removed intel_execlists_retire_requests() and all references to
intel_engine_cs.execlist_retired_req_list.

Moved context unpinning into retire_requests_ring() for now.  Further work
is pending for the context pinning - this patch should allow us to use the
active list to track context and ring buffer objects later.

Changed gen8_cs_irq_handler() so that notify_ring() is called when
contexts complete as well as when a user interrupt occurs so that
notification happens when a request is complete and context save has
finished.

v2: Rebase over the read-read optimisation changes

v3: Reworked IRQ handler after removing IRQ handler cleanup patch

v4: Fixed various pin leaks

Issue: VIZ-4277
Signed-off-by: Thomas Daniel 
Signed-off-by: Nick Hoath 
---
 drivers/gpu/drm/i915/i915_drv.h |  6 +++
 drivers/gpu/drm/i915/i915_gem.c | 67 +--
 drivers/gpu/drm/i915/i915_irq.c | 81 +
 drivers/gpu/drm/i915/intel_lrc.c| 43 +++--
 drivers/gpu/drm/i915/intel_lrc.h|  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h |  1 -
 6 files changed, 118 insertions(+), 82 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fbf0ae9..3d217f9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2262,6 +2262,12 @@ struct drm_i915_gem_request {
/** Execlists no. of times this request has been sent to the ELSP */
int elsp_submitted;
 
+   /**
+* Execlists: whether this requests's context has completed after
+* submission to the ELSP
+*/
+   bool ctx_complete;
+
 };
 
 int i915_gem_request_alloc(struct intel_engine_cs *ring,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 52642af..fc82171 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1386,6 +1386,24 @@ __i915_gem_request_retire__upto(struct 
drm_i915_gem_request *req)
   typeof(*tmp), list);
 
i915_gem_request_retire(tmp);
+
+   if (i915.enable_execlists) {
+   struct intel_context *ctx = tmp->ctx;
+   struct drm_i915_private *dev_priv =
+   engine->dev->dev_private;
+   unsigned long flags;
+   struct drm_i915_gem_object *ctx_obj =
+   ctx->engine[engine->id].state;
+
+   spin_lock_irqsave(&engine->execlist_lock, flags);
+
+   if (ctx_obj && (ctx != engine->default_context))
+   intel_lr_context_unpin(tmp);
+
+   intel_runtime_pm_put(dev_priv);
+   spin_unlock_irqrestore(&engine->execlist_lock, flags);
+   }
+
} while (tmp != req);
 
WARN_ON(i915_verify_lists(engine->dev));
@@ -2359,6 +2377,12 @@ void i915_vma_move_to_active(struct i915_vma *vma,
list_move_tail(&vma->mm_list, &vma->vm->active_list);
 }
 
+static bool i915_gem_request_retireable(struct drm_i915_gem_request *req)
+{
+   return (i915_gem_request_completed(req, true) &&
+   (!req->elsp_submitted || req->ctx_complete));
+}
+
 static void
 i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
 {
@@ -2829,10 +2853,28 @@ i915_gem_retire_requests_ring(struct intel_engine_cs 
*ring)
   struct drm_i915_gem_request,
   list);
 
-   if (!i915_gem_request_completed(request, true))
+   if (!i915_gem_request_retireable(request))
break;
 
  

Re: [Intel-gfx] [PATCH 1/4] drm/i915: Unify execlist and legacy request life-cycles

2015-10-14 Thread Nick Hoath

On 14/10/2015 15:42, Dave Gordon wrote:

On 13/10/15 12:36, Chris Wilson wrote:

On Tue, Oct 13, 2015 at 01:29:56PM +0200, Daniel Vetter wrote:

On Fri, Oct 09, 2015 at 06:23:50PM +0100, Chris Wilson wrote:

On Fri, Oct 09, 2015 at 07:18:21PM +0200, Daniel Vetter wrote:

On Fri, Oct 09, 2015 at 10:45:35AM +0100, Chris Wilson wrote:

On Fri, Oct 09, 2015 at 11:15:08AM +0200, Daniel Vetter wrote:

My idea was to create a new request for 3. which gets signalled by the
scheduler in intel_lrc_irq_handler. My idea was that we'd only create
these when a ctx switch might occur to avoid overhead, but I guess if we
just outright delay all requests a notch if need that might work too. But
I'm really not sure on the implications of that (i.e. does the hardware
really unlod the ctx if it's idle?), and whether that would fly still with
the scheduler.

But figuring this one out here seems to be the cornestone of this reorg.
Without it we can't just throw contexts onto the active list.


(Let me see if I understand it correctly)

Basically the problem is that we can't trust the context object to be
synchronized until after the status interrupt. The way we handled that
for legacy is to track the currently bound context and keep the
vma->pin_count asserted until the request containing the switch away.
Doing the same for execlists would trivially fix the issue and if done
smartly allows us to share more code (been there, done that).

That satisfies me for keeping requests as a basic fence in the GPU
timeline and should keep everyone happy that the context can't vanish
until after it is complete. The only caveat is that we cannot evict the
most recent context. For legacy, we do a switch back to the always
pinned default context. For execlists we don't, but it still means we
should only have one context which cannot be evicted (like legacy). But
it does leave us with the issue that i915_gpu_idle() returns early and
i915_gem_context_fini() must keep the explicit gpu reset to be
absolutely sure that the pending context writes are completed before the
final context is unbound.


Yes, and that was what I originally had in mind. Meanwhile the scheduler
(will) happen and that means we won't have FIFO ordering. Which means when
we switch contexts (as opposed to just adding more to the ringbuffer of
the current one) we won't have any idea which context will be the next
one. Which also means we don't know which request to pick to retire the
old context. Hence why I think we need to be better.


But the scheduler does - it is also in charge of making sure the
retirement queue is in order. The essence is that we only actually pin
engine->last_context, which is chosen as we submit stuff to the hw.


Well I'm not sure how much it will reorder, but I'd expect it wants to
reorder stuff pretty freely. And as soon as it reorders context (ofc they
can't depend on each another) then the legacy hw ctx tracking won't work.

I think at least ...


Not the way it is written today, but the principle behind it still
stands. The last_context submitted to the hardware is pinned until a new
one is submitted (such that it remains bound in the GGTT until after the
context switch is complete due to the active reference). Instead of
doing the context tracking at the start of the execbuffer, the context
tracking needs to be pushed down to the submission backend/middleman.
-Chris


Does anyone actually know what guarantees (if any) the GPU provides
w.r.t access to context images vs. USER_INTERRUPTs and CSB-updated
interrupts? Does 'active->idle' really mean that the context has been
fully updated in memory (and can therefore be unmapped), or just that
the engine has stopped processing (but the context might not be saved
until it's known that it isn't going to be reactivated).

For example, it could implement this:

(End of last batch in current context)
1.  Update seqno
2.  Generate USER_INTERRUPT
3.  Engine finishes work
(HEAD == TAIL and no further contexts queued in ELSP)
4.  Save all per-context registers to context image
5.  Flush to memory and invalidate
6.  Update CSB
7.  Flush to memory
8.  Generate CSB-update interrupt.

(New batch in same context submitted via ELSP)
9.  Reload entire context image from memory
10. Update CSB
11. Generate CSB-update interrupt

Or this:
1. Update seqno
2. Generate USER_INTERRUPT
3. Engine finishes work
(HEAD == TAIL and no further contexts queued in ELSP)
4. Update CSB
5. Generate CSB-update interrupt.

(New batch in DIFFERENT context submitted via ELSP)
6. Save all per-context registers to old context image
7. Load entire context image from new image
8. Update CSB
9. Generate CSB-update interrupt

The former is synchronous and relatively easy to model, the latter is
more like the way le

Re: [Intel-gfx] [PATCH 2/4] drm/i915: Improve dynamic management/eviction of lrc backing objects

2015-10-16 Thread Nick Hoath

On 08/10/2015 14:35, Chris Wilson wrote:

On Wed, Oct 07, 2015 at 06:05:46PM +0200, Daniel Vetter wrote:

On Tue, Oct 06, 2015 at 03:52:02PM +0100, Nick Hoath wrote:

Shovel all context related objects through the active queue and obj
management.

- Added callback in vma_(un)bind to add CPU (un)mapping at same time
   if desired
- Inserted LRC hw context & ringbuf to vma active list

Issue: VIZ-4277
Signed-off-by: Nick Hoath 
---
  drivers/gpu/drm/i915/i915_drv.h |  4 ++
  drivers/gpu/drm/i915/i915_gem.c |  3 ++
  drivers/gpu/drm/i915/i915_gem_gtt.c |  8 
  drivers/gpu/drm/i915/intel_lrc.c| 28 +++--
  drivers/gpu/drm/i915/intel_ringbuffer.c | 71 ++---
  drivers/gpu/drm/i915/intel_ringbuffer.h |  3 --
  6 files changed, 79 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3d217f9..d660ee3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2169,6 +2169,10 @@ struct drm_i915_gem_object {
struct work_struct *work;
} userptr;
};
+
+   /** Support for automatic CPU side mapping of object */
+   int (*mmap)(struct drm_i915_gem_object *obj, bool unmap);


I don't think we need a map hook, that can still be done (if not done so
I disagree - this keeps the interface symmetrical. Searching for the 
do/undo code paths and finding they are in difference places, called via 
different routes makes code harder to follow.

already) by the callers. Also it's better to rename this to vma_unbind
(and it should be at the vma level I think) since there's other potential
Nope - the obj is created first, at a point where the map/unamp function 
can be known. Moving the map/unmap to the vma would mean having a 
callback path to the object just to set up the callback path when the 
vma is created anonymously at some later point.

users. So explicit maping, lazy unmapping for the kmaps we need. That's
the same design we're using for binding objects into gpu address spaces.

Also Chris Wilson has something similar, please align with him on the
precise design of this callback.


We need the unbind hook because of the movement in the first patch (it
is a separate issue, the code should work without it albeit having to
remap the ring/context state more often). The changelog in this patch
simply explains the i915_vma_move_to_active() additions. But to get the
shrink accurate we do need the context unpin on retirement and to do the
pin_count check in i915_vma_unbind() after waiting (rather than before,
as we currently do). However, the eviction code will not inspect the
active contexts objects yet (as it will continue to skip over the
ggtt->pin_count on them). The way I allowed ctx objects to be evicted was
to only keep the ctx->state pinned for the duration of the request
construction.

Note that I think it should be a vma->unbind hook not an object level
one (it is i915_vma_unbind, without only a modicum of object level state
being modified in that function).
-Chris



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/4] drm/i915: Improve dynamic management/eviction of lrc backing objects

2015-10-19 Thread Nick Hoath

On 19/10/2015 10:48, Daniel Vetter wrote:

On Fri, Oct 16, 2015 at 03:42:53PM +0100, Nick Hoath wrote:

On 08/10/2015 14:35, Chris Wilson wrote:

On Wed, Oct 07, 2015 at 06:05:46PM +0200, Daniel Vetter wrote:

On Tue, Oct 06, 2015 at 03:52:02PM +0100, Nick Hoath wrote:

Shovel all context related objects through the active queue and obj
management.

- Added callback in vma_(un)bind to add CPU (un)mapping at same time
   if desired
- Inserted LRC hw context & ringbuf to vma active list

Issue: VIZ-4277
Signed-off-by: Nick Hoath 
---
  drivers/gpu/drm/i915/i915_drv.h |  4 ++
  drivers/gpu/drm/i915/i915_gem.c |  3 ++
  drivers/gpu/drm/i915/i915_gem_gtt.c |  8 
  drivers/gpu/drm/i915/intel_lrc.c| 28 +++--
  drivers/gpu/drm/i915/intel_ringbuffer.c | 71 ++---
  drivers/gpu/drm/i915/intel_ringbuffer.h |  3 --
  6 files changed, 79 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3d217f9..d660ee3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2169,6 +2169,10 @@ struct drm_i915_gem_object {
struct work_struct *work;
} userptr;
};
+
+   /** Support for automatic CPU side mapping of object */
+   int (*mmap)(struct drm_i915_gem_object *obj, bool unmap);


I don't think we need a map hook, that can still be done (if not done so

I disagree - this keeps the interface symmetrical. Searching for the do/undo
code paths and finding they are in difference places, called via different
routes makes code harder to follow.

already) by the callers. Also it's better to rename this to vma_unbind
(and it should be at the vma level I think) since there's other potential

Nope - the obj is created first, at a point where the map/unamp function can
be known. Moving the map/unmap to the vma would mean having a callback path
to the object just to set up the callback path when the vma is created
anonymously at some later point.


One of the plans for this is to also use it for to-be-unpinned
framebuffers (4k buffers are huge ...). And in that case the unmap hook
only, and on the vma is the design we want. And since it also seems to
accomodate all the other users I do think it's the right choice.


I refer you to these words found on the mail list. The may be familiar:

As a rule of thumb for refactoring and share infastructure we use the 
following recipe in drm:

- first driver implements things as straightforward as possible
- 2nd user copypastes
- 3rd one has the duty to figure out whether some refactoring is in order
  or not.

The code as I have written it works best and simplest for my use case. 
If someone else wants to refactor it differently to shoe horn in their 
use case, that's up to them.





Like I said, explicit setup and lazy, implicit cleanup is kinda how a lot
of things in gem work.


The most dangerous phrase in the language is ‘we’ve always done it this 
way.’ - Grace Hopper



-Daniel


users. So explicit maping, lazy unmapping for the kmaps we need. That's
the same design we're using for binding objects into gpu address spaces.

Also Chris Wilson has something similar, please align with him on the
precise design of this callback.


We need the unbind hook because of the movement in the first patch (it
is a separate issue, the code should work without it albeit having to
remap the ring/context state more often). The changelog in this patch
simply explains the i915_vma_move_to_active() additions. But to get the
shrink accurate we do need the context unpin on retirement and to do the
pin_count check in i915_vma_unbind() after waiting (rather than before,
as we currently do). However, the eviction code will not inspect the
active contexts objects yet (as it will continue to skip over the
ggtt->pin_count on them). The way I allowed ctx objects to be evicted was
to only keep the ctx->state pinned for the duration of the request
construction.

Note that I think it should be a vma->unbind hook not an object level
one (it is i915_vma_unbind, without only a modicum of object level state
being modified in that function).
-Chris







___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 3/6] drm/i915: Unify execlist and legacy request life-cycles

2015-10-20 Thread Nick Hoath
There is a desire to simplify the i915 driver by reducing the number of
different code paths introduced by the LRC / execlists support.  As the
execlists request is now part of the gem request it is possible and
desirable to unify the request life-cycles for execlist and legacy
requests.

A request is considered retireable if its seqno passed (i.e. the request
has completed) and either it was never submitted to the ELSP or its
context completed.  This ensures that context save is carried out before
the last request for a context is considered retireable. request_complete()
now checks the elsp_submitted count when deciding if a request is complete.
Requests that were not waiting for a context
switch interrupt (either as a result of being merged into a following
request or by being a legacy request) will be considered retireable as
soon as their seqno has passed.

Removed the extra request reference held for the execlist request.

Removed intel_execlists_retire_requests() and all references to
intel_engine_cs.execlist_retired_req_list.

Changed gen8_cs_irq_handler() so that notify_ring() is called when
contexts complete as well as when a user interrupt occurs so that
notification happens when a request is complete and context save has
finished.

v2: Rebase over the read-read optimisation changes

v3: Reworked IRQ handler after removing IRQ handler cleanup patch

v4: Fixed various pin leaks

v5: Removed ctx_complete flag & associated changes. Removed extraneous
request pin of context.
(Chris Wilson/Daniel Vetter)

Issue: VIZ-4277
Signed-off-by: Thomas Daniel 
Signed-off-by: Nick Hoath 
Cc: Daniel Vetter 
Cc: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h |  2 +-
 drivers/gpu/drm/i915/i915_gem.c | 23 -
 drivers/gpu/drm/i915/i915_irq.c |  7 ++---
 drivers/gpu/drm/i915/intel_lrc.c| 45 -
 drivers/gpu/drm/i915/intel_lrc.h|  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h |  1 -
 6 files changed, 21 insertions(+), 59 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8afda45..ae08e57 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2914,7 +2914,7 @@ static inline bool i915_gem_request_completed(struct 
drm_i915_gem_request *req,
 
seqno = req->ring->get_seqno(req->ring, lazy_coherency);
 
-   return i915_seqno_passed(seqno, req->seqno);
+   return i915_seqno_passed(seqno, req->seqno) && !req->elsp_submitted;
 }
 
 int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e57061a..290a1ac 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2848,12 +2848,16 @@ i915_gem_retire_requests_ring(struct intel_engine_cs 
*ring)
 
if (!list_empty(&obj->last_read_req[ring->id]->list))
break;
+   if (!i915_gem_request_completed(obj->last_read_req[ring->id],
+   true))
+   break;
 
i915_gem_object_retire__read(obj, ring->id);
}
 
if (unlikely(ring->trace_irq_req &&
-i915_gem_request_completed(ring->trace_irq_req, true))) {
+i915_gem_request_completed(ring->trace_irq_req,
+   true))) {
ring->irq_put(ring);
i915_gem_request_assign(&ring->trace_irq_req, NULL);
}
@@ -2872,15 +2876,6 @@ i915_gem_retire_requests(struct drm_device *dev)
for_each_ring(ring, dev_priv, i) {
i915_gem_retire_requests_ring(ring);
idle &= list_empty(&ring->request_list);
-   if (i915.enable_execlists) {
-   unsigned long flags;
-
-   spin_lock_irqsave(&ring->execlist_lock, flags);
-   idle &= list_empty(&ring->execlist_queue);
-   spin_unlock_irqrestore(&ring->execlist_lock, flags);
-
-   intel_execlists_retire_requests(ring);
-   }
}
 
if (idle)
@@ -2956,12 +2951,14 @@ i915_gem_object_flush_active(struct drm_i915_gem_object 
*obj)
if (req == NULL)
continue;
 
-   if (list_empty(&req->list))
-   goto retire;
+   if (list_empty(&req->list)) {
+   if (i915_gem_request_completed(req, true))
+   i915_gem_object_retire__read(obj, i);
+   continue;
+   }
 
if (i915_gem_request_completed(req, true)) {
__i915_gem_request_retire__upto(req);
-retire:
i915_gem_object

[Intel-gfx] [PATCH 4/6] drm/i915: Improve dynamic management/eviction of lrc backing objects

2015-10-20 Thread Nick Hoath
Shovel all context related objects through the active queue and obj
management.

- Added callback in vma_(un)bind to add CPU (un)mapping at same time
  if desired
- Inserted LRC hw context & ringbuf to vma active list

Issue: VIZ-4277
Signed-off-by: Nick Hoath 
---
 drivers/gpu/drm/i915/i915_drv.h |  4 ++
 drivers/gpu/drm/i915/i915_gem.c |  3 ++
 drivers/gpu/drm/i915/i915_gem_gtt.c |  8 
 drivers/gpu/drm/i915/intel_lrc.c| 28 +++--
 drivers/gpu/drm/i915/intel_ringbuffer.c | 71 ++---
 drivers/gpu/drm/i915/intel_ringbuffer.h |  3 --
 6 files changed, 79 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ae08e57..0dd4ace 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2161,6 +2161,10 @@ struct drm_i915_gem_object {
struct work_struct *work;
} userptr;
};
+
+   /** Support for automatic CPU side mapping of object */
+   int (*mmap)(struct drm_i915_gem_object *obj, bool unmap);
+   void *mappable;
 };
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 290a1ac..8bd318a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3222,6 +3222,9 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool 
wait)
if (vma->pin_count)
return -EBUSY;
 
+   if (obj->mmap)
+   obj->mmap(obj, true);
+
BUG_ON(obj->pages == NULL);
 
if (wait) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 43f35d1..2812757 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3517,6 +3517,14 @@ int i915_vma_bind(struct i915_vma *vma, enum 
i915_cache_level cache_level,
 
vma->bound |= bind_flags;
 
+   if (vma->obj->mmap) {
+   ret = vma->obj->mmap(vma->obj, false);
+   if (ret) {
+   i915_vma_unbind(vma);
+   return ret;
+   }
+   }
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8428ebd..069950e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -719,6 +719,18 @@ intel_logical_ring_advance_and_submit(struct 
drm_i915_gem_request *request)
 
intel_logical_ring_advance(request->ringbuf);
 
+   /* Push the hw context on to the active list */
+   i915_vma_move_to_active(
+   i915_gem_obj_to_ggtt(
+   request->ctx->engine[ring->id].state),
+   request);
+
+   /* Push the ringbuf on to the active list */
+   i915_vma_move_to_active(
+   i915_gem_obj_to_ggtt(
+   request->ctx->engine[ring->id].ringbuf->obj),
+   request);
+
request->tail = request->ringbuf->tail;
 
if (intel_ring_stopped(ring))
@@ -987,10 +999,15 @@ static int intel_lr_context_do_pin(struct intel_engine_cs 
*ring,
if (ret)
return ret;
 
-   ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
+   ret = i915_gem_obj_ggtt_pin(ringbuf->obj, PAGE_SIZE,
+   PIN_MAPPABLE);
if (ret)
goto unpin_ctx_obj;
 
+   ret = i915_gem_object_set_to_gtt_domain(ringbuf->obj, true);
+   if (ret)
+   goto unpin_rb_obj;
+
ctx_obj->dirty = true;
 
/* Invalidate GuC TLB. */
@@ -999,6 +1016,8 @@ static int intel_lr_context_do_pin(struct intel_engine_cs 
*ring,
 
return ret;
 
+unpin_rb_obj:
+   i915_gem_object_ggtt_unpin(ringbuf->obj);
 unpin_ctx_obj:
i915_gem_object_ggtt_unpin(ctx_obj);
 
@@ -1033,7 +1052,7 @@ void intel_lr_context_unpin(struct drm_i915_gem_request 
*rq)
if (ctx_obj) {
WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
if (--rq->ctx->engine[ring->id].pin_count == 0) {
-   intel_unpin_ringbuffer_obj(ringbuf);
+   i915_gem_object_ggtt_unpin(ringbuf->obj);
i915_gem_object_ggtt_unpin(ctx_obj);
}
}
@@ -2351,7 +2370,7 @@ void intel_lr_context_free(struct intel_context *ctx)
struct intel_engine_cs *ring = ringbuf->ring;
 
if (ctx == ring->default_context) {
-   intel_unpin_ringbuffer_obj(ringbuf);
+   i915_gem_object_ggtt_unpin(ringbuf->obj);
i915_gem_object_ggtt_unpin(ctx_obj);
}
W

[Intel-gfx] [PATCH 1/6] drm/i195: Rename gt_irq_handler variable

2015-10-20 Thread Nick Hoath
Renamed tmp variable to the more descriptive iir. (Daniel Vetter/
Thomas Daniel)

Issue: VIZ-4277
Signed-off-by: Nick Hoath 
Cc: Daniel Vetter 
Cc: David Gordon 
Cc: Thomas Daniel 
---
 drivers/gpu/drm/i915/i915_irq.c | 46 -
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index d68328f..fbf9153 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1296,64 +1296,64 @@ static irqreturn_t gen8_gt_irq_handler(struct 
drm_i915_private *dev_priv,
irqreturn_t ret = IRQ_NONE;
 
if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
-   u32 tmp = I915_READ_FW(GEN8_GT_IIR(0));
-   if (tmp) {
-   I915_WRITE_FW(GEN8_GT_IIR(0), tmp);
+   u32 iir = I915_READ_FW(GEN8_GT_IIR(0));
+   if (iir) {
+   I915_WRITE_FW(GEN8_GT_IIR(0), iir);
ret = IRQ_HANDLED;
 
-   if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << 
GEN8_RCS_IRQ_SHIFT))
+   if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << 
GEN8_RCS_IRQ_SHIFT))
intel_lrc_irq_handler(&dev_priv->ring[RCS]);
-   if (tmp & (GT_RENDER_USER_INTERRUPT << 
GEN8_RCS_IRQ_SHIFT))
+   if (iir & (GT_RENDER_USER_INTERRUPT << 
GEN8_RCS_IRQ_SHIFT))
notify_ring(&dev_priv->ring[RCS]);
 
-   if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << 
GEN8_BCS_IRQ_SHIFT))
+   if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << 
GEN8_BCS_IRQ_SHIFT))
intel_lrc_irq_handler(&dev_priv->ring[BCS]);
-   if (tmp & (GT_RENDER_USER_INTERRUPT << 
GEN8_BCS_IRQ_SHIFT))
+   if (iir & (GT_RENDER_USER_INTERRUPT << 
GEN8_BCS_IRQ_SHIFT))
notify_ring(&dev_priv->ring[BCS]);
} else
DRM_ERROR("The master control interrupt lied (GT0)!\n");
}
 
if (master_ctl & (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) {
-   u32 tmp = I915_READ_FW(GEN8_GT_IIR(1));
-   if (tmp) {
-   I915_WRITE_FW(GEN8_GT_IIR(1), tmp);
+   u32 iir = I915_READ_FW(GEN8_GT_IIR(1));
+   if (iir) {
+   I915_WRITE_FW(GEN8_GT_IIR(1), iir);
ret = IRQ_HANDLED;
 
-   if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << 
GEN8_VCS1_IRQ_SHIFT))
+   if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << 
GEN8_VCS1_IRQ_SHIFT))
intel_lrc_irq_handler(&dev_priv->ring[VCS]);
-   if (tmp & (GT_RENDER_USER_INTERRUPT << 
GEN8_VCS1_IRQ_SHIFT))
+   if (iir & (GT_RENDER_USER_INTERRUPT << 
GEN8_VCS1_IRQ_SHIFT))
notify_ring(&dev_priv->ring[VCS]);
 
-   if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << 
GEN8_VCS2_IRQ_SHIFT))
+   if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << 
GEN8_VCS2_IRQ_SHIFT))
intel_lrc_irq_handler(&dev_priv->ring[VCS2]);
-   if (tmp & (GT_RENDER_USER_INTERRUPT << 
GEN8_VCS2_IRQ_SHIFT))
+   if (iir & (GT_RENDER_USER_INTERRUPT << 
GEN8_VCS2_IRQ_SHIFT))
notify_ring(&dev_priv->ring[VCS2]);
} else
DRM_ERROR("The master control interrupt lied (GT1)!\n");
}
 
if (master_ctl & GEN8_GT_VECS_IRQ) {
-   u32 tmp = I915_READ_FW(GEN8_GT_IIR(3));
-   if (tmp) {
-   I915_WRITE_FW(GEN8_GT_IIR(3), tmp);
+   u32 iir = I915_READ_FW(GEN8_GT_IIR(3));
+   if (iir) {
+   I915_WRITE_FW(GEN8_GT_IIR(3), iir);
ret = IRQ_HANDLED;
 
-   if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << 
GEN8_VECS_IRQ_SHIFT))
+   if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << 
GEN8_VECS_IRQ_SHIFT))
intel_lrc_irq_handler(&dev_priv->ring[VECS]);
-   if (tmp & (GT_RENDER_USER_INTERRUPT << 
GEN8_VECS_IRQ_SHIFT))
+   if (iir & (GT_RENDER_USER_INTERRUPT << 
GEN8_VECS_IRQ_SHIFT))
notify_ring(&dev_priv->ring[VECS]);
} else
DRM_ERROR("The master control interrupt lied (GT3)!\n");
}
 
if (master_ctl & GEN8_GT_PM_IRQ) {
-   

[Intel-gfx] [PATCH 2/6] drm/i915: Break out common code from gen8_gt_irq_handler

2015-10-20 Thread Nick Hoath
Break out common code from gen8_gt_irq_handler and put it in to
an always inlined function. gcc optimises out the shift at compile
time. (Thomas Daniel/Daniel Vetter/Chris Wilson)

Issue: VIZ-4277
Signed-off-by: Nick Hoath 
Cc: Thomas Daniel 
Cc: Daniel Vetter 
Cc: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_irq.c | 40 
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index fbf9153..7837f5e 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1290,6 +1290,16 @@ static void snb_gt_irq_handler(struct drm_device *dev,
ivybridge_parity_error_irq_handler(dev, gt_iir);
 }
 
+static __always_inline void
+   gen8_cs_irq_handler(struct intel_engine_cs *ring, u32 iir,
+   int test_shift)
+{
+   if (iir & (GT_RENDER_USER_INTERRUPT << test_shift))
+   notify_ring(ring);
+   if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift))
+   intel_lrc_irq_handler(ring);
+}
+
 static irqreturn_t gen8_gt_irq_handler(struct drm_i915_private *dev_priv,
   u32 master_ctl)
 {
@@ -1301,15 +1311,11 @@ static irqreturn_t gen8_gt_irq_handler(struct 
drm_i915_private *dev_priv,
I915_WRITE_FW(GEN8_GT_IIR(0), iir);
ret = IRQ_HANDLED;
 
-   if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << 
GEN8_RCS_IRQ_SHIFT))
-   intel_lrc_irq_handler(&dev_priv->ring[RCS]);
-   if (iir & (GT_RENDER_USER_INTERRUPT << 
GEN8_RCS_IRQ_SHIFT))
-   notify_ring(&dev_priv->ring[RCS]);
+   gen8_cs_irq_handler(&dev_priv->ring[RCS],
+   iir, GEN8_RCS_IRQ_SHIFT);
 
-   if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << 
GEN8_BCS_IRQ_SHIFT))
-   intel_lrc_irq_handler(&dev_priv->ring[BCS]);
-   if (iir & (GT_RENDER_USER_INTERRUPT << 
GEN8_BCS_IRQ_SHIFT))
-   notify_ring(&dev_priv->ring[BCS]);
+   gen8_cs_irq_handler(&dev_priv->ring[BCS],
+   iir, GEN8_BCS_IRQ_SHIFT);
} else
DRM_ERROR("The master control interrupt lied (GT0)!\n");
}
@@ -1320,15 +1326,11 @@ static irqreturn_t gen8_gt_irq_handler(struct 
drm_i915_private *dev_priv,
I915_WRITE_FW(GEN8_GT_IIR(1), iir);
ret = IRQ_HANDLED;
 
-   if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << 
GEN8_VCS1_IRQ_SHIFT))
-   intel_lrc_irq_handler(&dev_priv->ring[VCS]);
-   if (iir & (GT_RENDER_USER_INTERRUPT << 
GEN8_VCS1_IRQ_SHIFT))
-   notify_ring(&dev_priv->ring[VCS]);
+   gen8_cs_irq_handler(&dev_priv->ring[VCS],
+   iir, GEN8_VCS1_IRQ_SHIFT);
 
-   if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << 
GEN8_VCS2_IRQ_SHIFT))
-   intel_lrc_irq_handler(&dev_priv->ring[VCS2]);
-   if (iir & (GT_RENDER_USER_INTERRUPT << 
GEN8_VCS2_IRQ_SHIFT))
-   notify_ring(&dev_priv->ring[VCS2]);
+   gen8_cs_irq_handler(&dev_priv->ring[VCS2],
+   iir, GEN8_VCS2_IRQ_SHIFT);
} else
DRM_ERROR("The master control interrupt lied (GT1)!\n");
}
@@ -1339,10 +1341,8 @@ static irqreturn_t gen8_gt_irq_handler(struct 
drm_i915_private *dev_priv,
I915_WRITE_FW(GEN8_GT_IIR(3), iir);
ret = IRQ_HANDLED;
 
-   if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << 
GEN8_VECS_IRQ_SHIFT))
-   intel_lrc_irq_handler(&dev_priv->ring[VECS]);
-   if (iir & (GT_RENDER_USER_INTERRUPT << 
GEN8_VECS_IRQ_SHIFT))
-   notify_ring(&dev_priv->ring[VECS]);
+   gen8_cs_irq_handler(&dev_priv->ring[VECS],
+   iir, GEN8_VECS_IRQ_SHIFT);
} else
DRM_ERROR("The master control interrupt lied (GT3)!\n");
}
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 6/6] drm/i915: Only update ringbuf address when necessary

2015-10-20 Thread Nick Hoath
We now only need to update the address of the ringbuf object in the
hw context when it is pinned, and the hw context is first CPU mapped

Issue: VIZ-4277
Signed-off-by: Nick Hoath 
Cc: David Gordon 
---
 drivers/gpu/drm/i915/intel_lrc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a35efcd..2e529a4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -368,7 +368,6 @@ static int execlists_update_context(struct 
drm_i915_gem_request *rq)
WARN_ON(!i915_gem_obj_is_pinned(rb_obj));
 
reg_state[CTX_RING_TAIL+1] = rq->tail;
-   reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj);
 
if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
/* True 32b PPGTT with dynamic page allocation: update PDP
@@ -1027,6 +1026,9 @@ static int intel_lr_context_do_pin(
if (ret)
goto unpin_rb_obj;
 
+   ctx->engine[ring->id].reg_state[CTX_RING_BUFFER_START+1] =
+   i915_gem_obj_ggtt_offset(ringbuf->obj);
+
ctx_obj->dirty = true;
 
/* Invalidate GuC TLB. */
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 5/6] drm/i915: Add the CPU mapping of the hw context to the pinned items.

2015-10-20 Thread Nick Hoath
Pin the hw ctx mapping so that it is not mapped/unmapped per bb
when doing GuC submission.

v2: Removed interim development extra mapping. (Daniel Vetter)

Issue: VIZ-4277
Signed-off-by: Nick Hoath 
Cc: David Gordon 
Cc: Daniel Vetter 
---
 drivers/gpu/drm/i915/i915_debugfs.c | 14 ---
 drivers/gpu/drm/i915/i915_drv.h |  4 +++-
 drivers/gpu/drm/i915/intel_lrc.c| 46 ++---
 3 files changed, 40 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index a3b22bd..f0a172e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1970,10 +1970,9 @@ static int i915_context_status(struct seq_file *m, void 
*unused)
 
 static void i915_dump_lrc_obj(struct seq_file *m,
  struct intel_engine_cs *ring,
- struct drm_i915_gem_object *ctx_obj)
+ struct drm_i915_gem_object *ctx_obj,
+ uint32_t *reg_state)
 {
-   struct page *page;
-   uint32_t *reg_state;
int j;
unsigned long ggtt_offset = 0;
 
@@ -1996,17 +1995,13 @@ static void i915_dump_lrc_obj(struct seq_file *m,
return;
}
 
-   page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
-   if (!WARN_ON(page == NULL)) {
-   reg_state = kmap_atomic(page);
-
+   if (!WARN_ON(reg_state == NULL)) {
for (j = 0; j < 0x600 / sizeof(u32) / 4; j += 4) {
seq_printf(m, "\t[0x%08lx] 0x%08x 0x%08x 0x%08x 
0x%08x\n",
   ggtt_offset + 4096 + (j * 4),
   reg_state[j], reg_state[j + 1],
   reg_state[j + 2], reg_state[j + 3]);
}
-   kunmap_atomic(reg_state);
}
 
seq_putc(m, '\n');
@@ -2034,7 +2029,8 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
for_each_ring(ring, dev_priv, i) {
if (ring->default_context != ctx)
i915_dump_lrc_obj(m, ring,
- ctx->engine[i].state);
+ ctx->engine[i].state,
+ ctx->engine[i].reg_state);
}
}
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0dd4ace..dc69d67 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -881,8 +881,10 @@ struct intel_context {
} legacy_hw_ctx;
 
/* Execlists */
-   struct {
+   struct intel_context_engine {
struct drm_i915_gem_object *state;
+   uint32_t *reg_state;
+   struct page *page;
struct intel_ringbuffer *ringbuf;
int pin_count;
} engine[I915_NUM_RINGS];
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 069950e..a35efcd 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -360,16 +360,13 @@ static int execlists_update_context(struct 
drm_i915_gem_request *rq)
struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
struct drm_i915_gem_object *rb_obj = rq->ringbuf->obj;
-   struct page *page;
-   uint32_t *reg_state;
+   uint32_t *reg_state = rq->ctx->engine[ring->id].reg_state;
 
BUG_ON(!ctx_obj);
+   WARN_ON(!reg_state);
WARN_ON(!i915_gem_obj_is_pinned(ctx_obj));
WARN_ON(!i915_gem_obj_is_pinned(rb_obj));
 
-   page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
-   reg_state = kmap_atomic(page);
-
reg_state[CTX_RING_TAIL+1] = rq->tail;
reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj);
 
@@ -385,8 +382,6 @@ static int execlists_update_context(struct 
drm_i915_gem_request *rq)
ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
}
 
-   kunmap_atomic(reg_state);
-
return 0;
 }
 
@@ -985,7 +980,31 @@ int logical_ring_flush_all_caches(struct 
drm_i915_gem_request *req)
return 0;
 }
 
-static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
+static int intel_mmap_hw_context(struct drm_i915_gem_object *obj,
+   bool unmap)
+{
+   int ret = 0;
+   struct intel_context_engine *ice =
+   (struct intel_context_engine *)obj->mappable;
+   struct page *page;
+   uint32_t *reg_state;
+
+   if (unmap) {
+   kunmap(ice->page);
+   ice->reg_state = NULL;
+   ice->page = NULL;
+   } else {
+   page = i915_gem_object_get_page(obj, LRC_STATE_PN);
+ 

[Intel-gfx] [PATCH 0/6] lrc lifecycle cleanups

2015-10-20 Thread Nick Hoath
These changes are a result of the requests made in VIZ-4277.
Make the lrc path more like the legacy submission path.
Attach the CPU mappings to vma (un)bind, so that the shrinker
also cleans those up.
Pin the CPU mappings while context is busy (pending requests), so
that the mappings aren't released/made continuously as this is
an expensive process.

V2: Removed unecessary changes in the lrc retiring code path
Removed unecessary map/unmap

Nick Hoath (6):
  drm/i195: Rename gt_irq_handler variable
  drm/i915: Break out common code from gen8_gt_irq_handler
  drm/i915: Unify execlist and legacy request life-cycles
  drm/i915: Improve dynamic management/eviction of lrc backing objects
  drm/i915: Add the CPU mapping of the hw context to the  pinned
items.
  drm/i915: Only update ringbuf address when necessary

 drivers/gpu/drm/i915/i915_debugfs.c |  14 ++--
 drivers/gpu/drm/i915/i915_drv.h |  10 ++-
 drivers/gpu/drm/i915/i915_gem.c |  26 +++
 drivers/gpu/drm/i915/i915_gem_gtt.c |   8 +++
 drivers/gpu/drm/i915/i915_irq.c |  67 -
 drivers/gpu/drm/i915/intel_lrc.c| 123 +---
 drivers/gpu/drm/i915/intel_lrc.h|   2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c |  71 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h |   4 --
 9 files changed, 173 insertions(+), 152 deletions(-)

-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Change context lifecycle

2015-11-09 Thread Nick Hoath
Use the first retired request on a new context to unpin
the old context. This ensures that the hw context remains
bound until it has been saved.
Now that the context is pinned until later in the request/context
lifecycle, it no longer needs to be pinned from context_queue to
retire_requests.
The refcount on the context also has to be extended to cover this
new longer period.

Signed-off-by: Nick Hoath 
Issue: VIZ-4277
Cc: Daniel Vetter 
Cc: David Gordon 
Cc: Chris Wilson 
Cc: Alex Dai 
---
 drivers/gpu/drm/i915/i915_drv.h  |  1 +
 drivers/gpu/drm/i915/i915_gem.c  |  7 +
 drivers/gpu/drm/i915/intel_lrc.c | 58 +---
 drivers/gpu/drm/i915/intel_lrc.h |  1 +
 4 files changed, 57 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 20cd6d8..778b14a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -884,6 +884,7 @@ struct intel_context {
struct {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
+   bool unsaved;
int pin_count;
} engine[I915_NUM_RINGS];
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f1e3fde..273946d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1385,6 +1385,13 @@ __i915_gem_request_retire__upto(struct 
drm_i915_gem_request *req)
tmp = list_first_entry(&engine->request_list,
   typeof(*tmp), list);
 
+   if (i915.enable_execlists) {
+   unsigned long flags;
+
+   spin_lock_irqsave(&engine->execlist_lock, flags);
+   intel_lr_context_complete_check(tmp);
+   spin_unlock_irqrestore(&engine->execlist_lock, flags);
+   }
i915_gem_request_retire(tmp);
} while (tmp != req);
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 06180dc..d82e903 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -566,13 +566,17 @@ static int execlists_context_queue(struct 
drm_i915_gem_request *request)
struct drm_i915_gem_request *cursor;
int num_elements = 0;
 
-   if (request->ctx != ring->default_context)
-   intel_lr_context_pin(request);
-
i915_gem_request_reference(request);
 
spin_lock_irq(&ring->execlist_lock);
 
+   if (request->ctx != ring->default_context) {
+   if (!request->ctx->engine[ring->id].unsaved) {
+   intel_lr_context_pin(request);
+   request->ctx->engine[ring->id].unsaved = true;
+   }
+   }
+
list_for_each_entry(cursor, &ring->execlist_queue, execlist_link)
if (++num_elements > 2)
break;
@@ -958,12 +962,6 @@ void intel_execlists_retire_requests(struct 
intel_engine_cs *ring)
spin_unlock_irq(&ring->execlist_lock);
 
list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
-   struct intel_context *ctx = req->ctx;
-   struct drm_i915_gem_object *ctx_obj =
-   ctx->engine[ring->id].state;
-
-   if (ctx_obj && (ctx != ring->default_context))
-   intel_lr_context_unpin(req);
list_del(&req->execlist_link);
i915_gem_request_unreference(req);
}
@@ -1073,6 +1071,31 @@ void intel_lr_context_unpin(struct drm_i915_gem_request 
*rq)
}
 }
 
+void intel_lr_context_complete_check(struct drm_i915_gem_request *req)
+{
+   struct intel_engine_cs *ring = req->ring;
+
+   assert_spin_locked(&ring->execlist_lock);
+
+   if (ring->last_context && ring->last_context != req->ctx) {
+   if (req->ctx != ring->default_context
+   && ring->last_context->engine[ring->id].unsaved) {
+   /* Create fake request for unpinning the old context */
+   struct drm_i915_gem_request tmp;
+
+   tmp.ring = ring;
+   tmp.ctx = ring->last_context;
+   tmp.ringbuf =
+   ring->last_context->engine[ring->id].ringbuf;
+
+   intel_lr_context_unpin(&tmp);
+   ring->last_context->engine[ring->id].unsaved = false;
+   ring->last_context = NULL;
+   }
+   }
+   ring->last_context = req->ctx;
+}
+
 static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request 
*req)
 {
int ret, i;
@@ -2390,7 +2413,22 @@

[Intel-gfx] [PATCH v10] drm/i915: Extend LRC pinning to cover GPU context writeback

2016-01-13 Thread Nick Hoath
Use the first retired request on a new context to unpin
the old context. This ensures that the hw context remains
bound until it has been written back to by the GPU.
Now that the context is pinned until later in the request/context
lifecycle, it no longer needs to be pinned from context_queue to
retire_requests.
This fixes an issue with GuC submission where the GPU might not
have finished writing back the context before it is unpinned. This
results in a GPU hang.

v2: Moved the new pin to cover GuC submission (Alex Dai)
Moved the new unpin to request_retire to fix coverage leak
v3: Added switch to default context if freeing a still pinned
context just in case the hw was actually still using it
v4: Unwrapped context unpin to allow calling without a request
v5: Only create a switch to idle context if the ring doesn't
already have a request pending on it (Alex Dai)
Rename unsaved to dirty to avoid double negatives (Dave Gordon)
Changed _no_req postfix to __ prefix for consistency (Dave Gordon)
Split out per engine cleanup from context_free as it
was getting unwieldy
Corrected locking (Dave Gordon)
v6: Removed some bikeshedding (Mika Kuoppala)
Added explanation of the GuC hang that this fixes (Daniel Vetter)
v7: Removed extra per request pinning from ring reset code (Alex Dai)
Added forced ring unpin/clean in error case in context free (Alex Dai)
v8: Renamed lrc specific last_context to lrc_last_context as there
were some reset cases where the codepaths leaked (Mika Kuoppala)
NULL'd last_context in reset case - there was a pointer leak
if someone did reset->close context.
v9: Rebase over "Fix context/engine cleanup order"
v10: Rebase over nightly, remove WARN_ON which caused the
dependency on dev.

Signed-off-by: Nick Hoath 
Issue: VIZ-4277
Cc: Daniel Vetter 
Cc: David Gordon 
Cc: Chris Wilson 
Cc: Alex Dai 
Cc: Mika Kuoppala 
---
 drivers/gpu/drm/i915/i915_drv.h |   1 +
 drivers/gpu/drm/i915/i915_gem.c |   3 +
 drivers/gpu/drm/i915/intel_lrc.c| 138 ++--
 drivers/gpu/drm/i915/intel_lrc.h|   1 +
 drivers/gpu/drm/i915/intel_ringbuffer.h |   1 +
 5 files changed, 121 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 104bd18..d28e10a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -882,6 +882,7 @@ struct intel_context {
struct {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
+   bool dirty;
int pin_count;
} engine[I915_NUM_RINGS];
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ddc21d4..7b79405 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1413,6 +1413,9 @@ static void i915_gem_request_retire(struct 
drm_i915_gem_request *request)
 {
trace_i915_gem_request_retire(request);
 
+   if (i915.enable_execlists)
+   intel_lr_context_complete_check(request);
+
/* We know the GPU must have read the request to have
 * sent us the seqno + interrupt, so use the position
 * of tail of the request to update the last known position
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5027699..b661058 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -585,9 +585,6 @@ static int execlists_context_queue(struct 
drm_i915_gem_request *request)
struct drm_i915_gem_request *cursor;
int num_elements = 0;
 
-   if (request->ctx != ring->default_context)
-   intel_lr_context_pin(request);
-
i915_gem_request_reference(request);
 
spin_lock_irq(&ring->execlist_lock);
@@ -763,6 +760,13 @@ intel_logical_ring_advance_and_submit(struct 
drm_i915_gem_request *request)
if (intel_ring_stopped(ring))
return;
 
+   if (request->ctx != ring->default_context) {
+   if (!request->ctx->engine[ring->id].dirty) {
+   intel_lr_context_pin(request);
+   request->ctx->engine[ring->id].dirty = true;
+   }
+   }
+
if (dev_priv->guc.execbuf_client)
i915_guc_submit(dev_priv->guc.execbuf_client, request);
else
@@ -989,12 +993,6 @@ void intel_execlists_retire_requests(struct 
intel_engine_cs *ring)
spin_unlock_irq(&ring->execlist_lock);
 
list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
-   struct intel_context *ctx = req->ctx;
-   struct drm_i915_gem_object *ctx_obj =
-   ctx->engine[ring->id].state;
-
-   if (ctx_obj && (ctx != ring->default_context))

Re: [Intel-gfx] ✗ failure: Fi.CI.BAT

2016-01-14 Thread Nick Hoath

On 14/01/2016 07:20, Patchwork wrote:

== Summary ==

Built on 058740f8fced6851aeda34f366f5330322cd585f drm-intel-nightly: 
2016y-01m-13d-17h-07m-44s UTC integration manifest

Test gem_ctx_basic:
 pass   -> FAIL   (bdw-ultra)


Test failed to load - not patch related


Test gem_ctx_param_basic:
 Subgroup non-root-set:
 pass   -> DMESG-WARN (bsw-nuc-2)


gem driver allocated a poisoned slab - not patch related


Test kms_flip:
 Subgroup basic-flip-vs-dpms:
 pass   -> SKIP   (bsw-nuc-2)


test reqs not met - not patch related


 dmesg-warn -> PASS   (ilk-hp8440p)


warn to PASS



bdw-nuci7total:138  pass:128  dwarn:1   dfail:0   fail:0   skip:9
bdw-ultratotal:138  pass:131  dwarn:0   dfail:0   fail:1   skip:6
bsw-nuc-2total:141  pass:113  dwarn:3   dfail:0   fail:0   skip:25
hsw-brixbox  total:141  pass:134  dwarn:0   dfail:0   fail:0   skip:7
hsw-gt2  total:141  pass:137  dwarn:0   dfail:0   fail:0   skip:4
ilk-hp8440p  total:141  pass:101  dwarn:3   dfail:0   fail:0   skip:37
ivb-t430stotal:135  pass:122  dwarn:3   dfail:4   fail:0   skip:6
skl-i5k-2total:141  pass:131  dwarn:2   dfail:0   fail:0   skip:8
skl-i7k-2total:141  pass:131  dwarn:2   dfail:0   fail:0   skip:8
snb-dellxps  total:141  pass:122  dwarn:5   dfail:0   fail:0   skip:14
snb-x220ttotal:141  pass:122  dwarn:5   dfail:0   fail:1   skip:13

Results at /archive/results/CI_IGT_test/Patchwork_1174/



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v10] drm/i915: Extend LRC pinning to cover GPU context writeback

2016-01-14 Thread Nick Hoath

On 14/01/2016 11:36, Chris Wilson wrote:

On Wed, Jan 13, 2016 at 04:19:45PM +, Nick Hoath wrote:

+   if (ctx->engine[ring->id].dirty) {
+   struct drm_i915_gem_request *req = NULL;
+
+   /**
+* If there is already a request pending on
+* this ring, wait for that to complete,
+* otherwise create a switch to idle request
+*/
+   if (list_empty(&ring->request_list)) {
+   int ret;
+
+   ret = i915_gem_request_alloc(
+   ring,
+   ring->default_context,
+   &req);
+   if (!ret)
+   i915_add_request(req);
+   else
+   DRM_DEBUG("Failed to ensure context saved");
+   } else {
+   req = list_first_entry(
+   &ring->request_list,
+   typeof(*req), list);
+   }
+   if (req) {
+   ret = i915_wait_request(req);
+   if (ret != 0) {
+   /**
+* If we get here, there's probably been a ring
+* reset, so we just clean up the dirty flag.&
+* pin count.
+*/
+   ctx->engine[ring->id].dirty = false;
+   __intel_lr_context_unpin(
+   ring,
+   ctx);
+   }
+   }


If you were to take a lr_context_pin on the last_context, and only
release that pin when you change to a new context, you do not need to


That what this patch does.


introduce a blocking context-close, nor do you need to introduce the
usage of default_context.


The use of default_context here is to stop a context hanging around 
after it is no longer needed.




(lr_context_pin should take a reference on the ctx to prevent early
freeeing ofc).


You can't clear the reference on the ctx in an interrupt context.



The code at that point starts to look v.v.similar to legacy, right down
to the need to use a GPU reset during shutdown to prevent writing back
the context image. (Which you still currently need to get rid of the
default context now.)
-Chris



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v10] drm/i915: Extend LRC pinning to cover GPU context writeback

2016-01-14 Thread Nick Hoath

On 14/01/2016 12:31, Chris Wilson wrote:

On Thu, Jan 14, 2016 at 11:56:07AM +, Nick Hoath wrote:

On 14/01/2016 11:36, Chris Wilson wrote:

On Wed, Jan 13, 2016 at 04:19:45PM +, Nick Hoath wrote:

+   if (ctx->engine[ring->id].dirty) {
+   struct drm_i915_gem_request *req = NULL;
+
+   /**
+* If there is already a request pending on
+* this ring, wait for that to complete,
+* otherwise create a switch to idle request
+*/
+   if (list_empty(&ring->request_list)) {
+   int ret;
+
+   ret = i915_gem_request_alloc(
+   ring,
+   ring->default_context,
+   &req);
+   if (!ret)
+   i915_add_request(req);
+   else
+   DRM_DEBUG("Failed to ensure context saved");
+   } else {
+   req = list_first_entry(
+   &ring->request_list,
+   typeof(*req), list);
+   }
+   if (req) {
+   ret = i915_wait_request(req);
+   if (ret != 0) {
+   /**
+* If we get here, there's probably been a ring
+* reset, so we just clean up the dirty flag.&
+* pin count.
+*/
+   ctx->engine[ring->id].dirty = false;
+   __intel_lr_context_unpin(
+   ring,
+   ctx);
+   }
+   }


If you were to take a lr_context_pin on the last_context, and only
release that pin when you change to a new context, you do not need to


That what this patch does.


introduce a blocking context-close, nor do you need to introduce the
usage of default_context.


The use of default_context here is to stop a context hanging around
after it is no longer needed.


By blocking, which is not acceptable. Also we can eliminate the
default_context and so pinning that opposed to the last_context serves
no purpose other than by chance having a more preferrable position when
it comes to defragmentation. But you don't enable that anyway and we


Enabling the shrinker on execlists is something I'm working on which is
predicated on this patch. Also why is blocking on closing a context not
acceptable?


have alternative strategies now that avoid the issue with fragmentation
of the mappable aperture.


(lr_context_pin should take a reference on the ctx to prevent early
freeeing ofc).


You can't clear the reference on the ctx in an interrupt context.


The execlists submission should moved out of the interrupt context, for
the very simple reason that it is causing machine panics. userspace
submits a workload, machine lockups


Create a jira, and I'm sure we'll look at making that change.


-Chris



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v10] drm/i915: Extend LRC pinning to cover GPU context writeback

2016-01-15 Thread Nick Hoath

On 14/01/2016 12:37, Nick Hoath wrote:

On 14/01/2016 12:31, Chris Wilson wrote:

On Thu, Jan 14, 2016 at 11:56:07AM +, Nick Hoath wrote:

On 14/01/2016 11:36, Chris Wilson wrote:

On Wed, Jan 13, 2016 at 04:19:45PM +, Nick Hoath wrote:

+   if (ctx->engine[ring->id].dirty) {
+   struct drm_i915_gem_request *req = NULL;
+
+   /**
+* If there is already a request pending on
+* this ring, wait for that to complete,
+* otherwise create a switch to idle request
+*/
+   if (list_empty(&ring->request_list)) {
+   int ret;
+
+   ret = i915_gem_request_alloc(
+   ring,
+   ring->default_context,
+   &req);
+   if (!ret)
+   i915_add_request(req);
+   else
+   DRM_DEBUG("Failed to ensure context saved");
+   } else {
+   req = list_first_entry(
+   &ring->request_list,
+   typeof(*req), list);
+   }
+   if (req) {
+   ret = i915_wait_request(req);
+   if (ret != 0) {
+   /**
+* If we get here, there's probably been a ring
+* reset, so we just clean up the dirty flag.&
+* pin count.
+*/
+   ctx->engine[ring->id].dirty = false;
+   __intel_lr_context_unpin(
+   ring,
+   ctx);
+   }
+   }


If you were to take a lr_context_pin on the last_context, and only
release that pin when you change to a new context, you do not need to


That what this patch does.


introduce a blocking context-close, nor do you need to introduce the
usage of default_context.


The use of default_context here is to stop a context hanging around
after it is no longer needed.


By blocking, which is not acceptable. Also we can eliminate the
default_context and so pinning that opposed to the last_context serves
no purpose other than by chance having a more preferrable position when
it comes to defragmentation. But you don't enable that anyway and we


Enabling the shrinker on execlists is something I'm working on which is
predicated on this patch. Also why is blocking on closing a context not
acceptable?



As a clarification: Without rewriting the execlist code to not submit or 
cleanup from an interrupt handler, we can't use refcounting to allow non 
blocking closing.



have alternative strategies now that avoid the issue with fragmentation
of the mappable aperture.


(lr_context_pin should take a reference on the ctx to prevent early
freeeing ofc).


You can't clear the reference on the ctx in an interrupt context.


The execlists submission should moved out of the interrupt context, for
the very simple reason that it is causing machine panics. userspace
submits a workload, machine lockups


Create a jira, and I'm sure we'll look at making that change.


-Chris



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v3 1/3] drm/i915: simplify allocation of driver-internal requests

2016-01-18 Thread Nick Hoath

On 07/01/2016 10:20, Dave Gordon wrote:

There are a number of places where the driver needs a request, but isn't
working on behalf of any specific user or in a specific context. At
present, we associate them with the per-engine default context. A future
patch will abolish those per-engine context pointers; but we can already
eliminate a lot of the references to them, just by making the allocator
allow NULL as a shorthand for "an appropriate context for this ring",
which will mean that the callers don't need to know anything about how
the "appropriate context" is found (e.g. per-ring vs per-device, etc).

So this patch renames the existing i915_gem_request_alloc(), and makes
it local (static inline), and replaces it with a wrapper that provides
a default if the context is NULL, and also has a nicer calling
convention (doesn't require a pointer to an output parameter). Then we
change all callers to use the new convention:
OLD:
err = i915_gem_request_alloc(ring, user_ctx, &req);
if (err) ...
NEW:
req = i915_gem_request_alloc(ring, user_ctx);
if (IS_ERR(req)) ...
OLD:
err = i915_gem_request_alloc(ring, ring->default_context, &req);
if (err) ...
NEW:
req = i915_gem_request_alloc(ring, NULL);
if (IS_ERR(req)) ...

Signed-off-by: Dave Gordon 


Reviewed-by: Nick Hoath 


---
  drivers/gpu/drm/i915/i915_drv.h|  6 ++--
  drivers/gpu/drm/i915/i915_gem.c| 55 +++---
  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 14 +---
  drivers/gpu/drm/i915/intel_display.c   |  6 ++--
  drivers/gpu/drm/i915/intel_lrc.c   |  9 +++--
  drivers/gpu/drm/i915/intel_overlay.c   | 24 ++---
  6 files changed, 74 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c6dd4db..c2b000a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2260,9 +2260,9 @@ struct drm_i915_gem_request {

  };

-int i915_gem_request_alloc(struct intel_engine_cs *ring,
-  struct intel_context *ctx,
-  struct drm_i915_gem_request **req_out);
+struct drm_i915_gem_request * __must_check
+i915_gem_request_alloc(struct intel_engine_cs *engine,
+  struct intel_context *ctx);
  void i915_gem_request_cancel(struct drm_i915_gem_request *req);
  void i915_gem_request_free(struct kref *req_ref);
  int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6c60e04..c908ed1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2688,9 +2688,10 @@ void i915_gem_request_free(struct kref *req_ref)
kmem_cache_free(req->i915->requests, req);
  }

-int i915_gem_request_alloc(struct intel_engine_cs *ring,
-  struct intel_context *ctx,
-  struct drm_i915_gem_request **req_out)
+static inline int
+__i915_gem_request_alloc(struct intel_engine_cs *ring,
+struct intel_context *ctx,
+struct drm_i915_gem_request **req_out)
  {
struct drm_i915_private *dev_priv = to_i915(ring->dev);
struct drm_i915_gem_request *req;
@@ -2753,6 +2754,31 @@ err:
return ret;
  }

+/**
+ * i915_gem_request_alloc - allocate a request structure
+ *
+ * @engine: engine that we wish to issue the request on.
+ * @ctx: context that the request will be associated with.
+ *   This can be NULL if the request is not directly related to
+ *   any specific user context, in which case this function will
+ *   choose an appropriate context to use.
+ *
+ * Returns a pointer to the allocated request if successful,
+ * or an error code if not.
+ */
+struct drm_i915_gem_request *
+i915_gem_request_alloc(struct intel_engine_cs *engine,
+  struct intel_context *ctx)
+{
+   struct drm_i915_gem_request *req;
+   int err;
+
+   if (ctx == NULL)
+   ctx = engine->default_context;
+   err = __i915_gem_request_alloc(engine, ctx, &req);
+   return err ? ERR_PTR(err) : req;
+}
+
  void i915_gem_request_cancel(struct drm_i915_gem_request *req)
  {
intel_ring_reserved_space_cancel(req->ringbuf);
@@ -3170,9 +3196,13 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
return 0;

if (*to_req == NULL) {
-   ret = i915_gem_request_alloc(to, to->default_context, 
to_req);
-   if (ret)
-   return ret;
+   struct drm_i915_gem_request *req;
+
+   req = i915_gem_request_alloc(to, NULL);
+   if (IS_ERR(req))
+   

Re: [Intel-gfx] [PATCH v3 2/3] drm/i915: abolish separate per-ring default_context pointers

2016-01-18 Thread Nick Hoath

On 07/01/2016 10:20, Dave Gordon wrote:

Now that we've eliminated a lot of uses of ring->default_context,
we can eliminate the pointer itself.

All the engines share the same default intel_context, so we can just
keep a single reference to it in the dev_priv structure rather than one
in each of the engine[] elements. This make refcounting more sensible
too, as we now have a refcount of one for the one pointer, rather than
a refcount of one but multiple pointers.

 From an idea by Chris Wilson.

Signed-off-by: Dave Gordon 


Reviewed-by: Nick Hoath 


---
  drivers/gpu/drm/i915/i915_debugfs.c|  4 ++--
  drivers/gpu/drm/i915/i915_drv.h|  2 ++
  drivers/gpu/drm/i915/i915_gem.c|  6 +++---
  drivers/gpu/drm/i915/i915_gem_context.c| 22 --
  drivers/gpu/drm/i915/i915_gpu_error.c  |  2 +-
  drivers/gpu/drm/i915/i915_guc_submission.c |  6 +++---
  drivers/gpu/drm/i915/intel_lrc.c   | 24 +---
  drivers/gpu/drm/i915/intel_ringbuffer.h|  1 -
  8 files changed, 32 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 0fc38bb..2613708 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1943,7 +1943,7 @@ static int i915_context_status(struct seq_file *m, void 
*unused)
seq_puts(m, "HW context ");
describe_ctx(m, ctx);
for_each_ring(ring, dev_priv, i) {
-   if (ring->default_context == ctx)
+   if (dev_priv->kernel_context == ctx)
seq_printf(m, "(default context %s) ",
   ring->name);
}
@@ -2039,7 +2039,7 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)

list_for_each_entry(ctx, &dev_priv->context_list, link) {
for_each_ring(ring, dev_priv, i) {
-   if (ring->default_context != ctx)
+   if (dev_priv->kernel_context != ctx)
i915_dump_lrc_obj(m, ring,
  ctx->engine[i].state);
}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c2b000a..aef86a8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1940,6 +1940,8 @@ struct drm_i915_private {
void (*stop_ring)(struct intel_engine_cs *ring);
} gt;

+   struct intel_context *kernel_context;
+
bool edp_low_vswing;

/* perform PHY state sanity checks? */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c908ed1..8f101121 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2678,7 +2678,7 @@ void i915_gem_request_free(struct kref *req_ref)

if (ctx) {
if (i915.enable_execlists) {
-   if (ctx != req->ring->default_context)
+   if (ctx != req->i915->kernel_context)
intel_lr_context_unpin(req);
}

@@ -2774,7 +2774,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
int err;

if (ctx == NULL)
-   ctx = engine->default_context;
+   ctx = to_i915(engine->dev)->kernel_context;
err = __i915_gem_request_alloc(engine, ctx, &req);
return err ? ERR_PTR(err) : req;
  }
@@ -4862,7 +4862,7 @@ i915_gem_init_hw(struct drm_device *dev)
 */
init_unused_rings(dev);

-   BUG_ON(!dev_priv->ring[RCS].default_context);
+   BUG_ON(!dev_priv->kernel_context);

ret = i915_ppgtt_init_hw(dev);
if (ret) {
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 900ffd0..e1d767e 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -354,11 +354,10 @@ int i915_gem_context_init(struct drm_device *dev)
  {
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_context *ctx;
-   int i;

/* Init should only be called once per module load. Eventually the
 * restriction on the context_disabled check can be loosened. */
-   if (WARN_ON(dev_priv->ring[RCS].default_context))
+   if (WARN_ON(dev_priv->kernel_context))
return 0;

if (intel_vgpu_active(dev) && HAS_LOGICAL_RING_CONTEXTS(dev)) {
@@ -388,12 +387,7 @@ int i915_gem_context_init(struct drm_device *dev)
return PTR_ERR(ctx);
}

-   for (i = 0; i < I915_NUM_RINGS; i++) {
-   struct intel_engine_cs *ring = &dev_priv->ring[i];
-
-   /* NB: RCS will hold a ref for all rings */
-   ring-

Re: [Intel-gfx] [PATCH v3 3/3] drm/i915: tidy up a few leftovers

2016-01-18 Thread Nick Hoath

On 07/01/2016 10:20, Dave Gordon wrote:

There are a few bits of code which the transformations implemented by
the previous patch reveal to be suboptimal, once the notion of a per-
ring default context has gone away. So this tidies up the leftovers.

It could have been squashed into the previous patch, but that would have
made that patch less clearly a simple transformation. In particular, any
change which alters the code block structure or indentation has been
deferred into this separate patch, because such things tend to make diffs
more difficult to read.

Signed-off-by: Dave Gordon 


Reviewed-by: Nick Hoath 


---
  drivers/gpu/drm/i915/i915_debugfs.c | 15 +--
  drivers/gpu/drm/i915/i915_gem.c |  6 ++
  drivers/gpu/drm/i915/intel_lrc.c| 38 +
  3 files changed, 24 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 2613708..bbb23da 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1942,11 +1942,8 @@ static int i915_context_status(struct seq_file *m, void 
*unused)

seq_puts(m, "HW context ");
describe_ctx(m, ctx);
-   for_each_ring(ring, dev_priv, i) {
-   if (dev_priv->kernel_context == ctx)
-   seq_printf(m, "(default context %s) ",
-  ring->name);
-   }
+   if (ctx == dev_priv->kernel_context)
+   seq_printf(m, "(kernel context) ");

if (i915.enable_execlists) {
seq_putc(m, '\n');
@@ -2037,13 +2034,11 @@ static int i915_dump_lrc(struct seq_file *m, void 
*unused)
if (ret)
return ret;

-   list_for_each_entry(ctx, &dev_priv->context_list, link) {
-   for_each_ring(ring, dev_priv, i) {
-   if (dev_priv->kernel_context != ctx)
+   list_for_each_entry(ctx, &dev_priv->context_list, link)
+   if (ctx != dev_priv->kernel_context)
+   for_each_ring(ring, dev_priv, i)
i915_dump_lrc_obj(m, ring,
  ctx->engine[i].state);
-   }
-   }

mutex_unlock(&dev->struct_mutex);

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8f101121..4f45eb2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2677,10 +2677,8 @@ void i915_gem_request_free(struct kref *req_ref)
i915_gem_request_remove_from_client(req);

if (ctx) {
-   if (i915.enable_execlists) {
-   if (ctx != req->i915->kernel_context)
-   intel_lr_context_unpin(req);
-   }
+   if (i915.enable_execlists && ctx != req->i915->kernel_context)
+   intel_lr_context_unpin(req);

i915_gem_context_unreference(ctx);
}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5a3..8c4c9b9 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -660,16 +660,10 @@ static int execlists_move_to_gpu(struct 
drm_i915_gem_request *req,

  int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request 
*request)
  {
-   int ret;
+   int ret = 0;

request->ringbuf = request->ctx->engine[request->ring->id].ringbuf;

-   if (request->ctx != request->i915->kernel_context) {
-   ret = intel_lr_context_pin(request);
-   if (ret)
-   return ret;
-   }
-
if (i915.enable_guc_submission) {
/*
 * Check that the GuC has space for the request before
@@ -683,7 +677,10 @@ int intel_logical_ring_alloc_request_extras(struct 
drm_i915_gem_request *request
return ret;
}

-   return 0;
+   if (request->ctx != request->i915->kernel_context)
+   ret = intel_lr_context_pin(request);
+
+   return ret;
  }

  static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
@@ -2382,22 +2379,21 @@ void intel_lr_context_free(struct intel_context *ctx)
  {
int i;

-   for (i = 0; i < I915_NUM_RINGS; i++) {
+   for (i = I915_NUM_RINGS; --i >= 0; ) {
+   struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf;
struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;

-   if (ctx_obj) {
-   struct intel_ringbuffer *ringbuf =
-   ctx->engine[i].ringbuf;
-   struct intel_engine_cs *

[Intel-gfx] [PATCH v11] drm/i915: Extend LRC pinning to cover GPU context writeback

2016-01-19 Thread Nick Hoath
Use the first retired request on a new context to unpin
the old context. This ensures that the hw context remains
bound until it has been written back to by the GPU.
Now that the context is pinned until later in the request/context
lifecycle, it no longer needs to be pinned from context_queue to
retire_requests.
This fixes an issue with GuC submission where the GPU might not
have finished writing back the context before it is unpinned. This
results in a GPU hang.

v2: Moved the new pin to cover GuC submission (Alex Dai)
Moved the new unpin to request_retire to fix coverage leak
v3: Added switch to default context if freeing a still pinned
context just in case the hw was actually still using it
v4: Unwrapped context unpin to allow calling without a request
v5: Only create a switch to idle context if the ring doesn't
already have a request pending on it (Alex Dai)
Rename unsaved to dirty to avoid double negatives (Dave Gordon)
Changed _no_req postfix to __ prefix for consistency (Dave Gordon)
Split out per engine cleanup from context_free as it
was getting unwieldy
Corrected locking (Dave Gordon)
v6: Removed some bikeshedding (Mika Kuoppala)
Added explanation of the GuC hang that this fixes (Daniel Vetter)
v7: Removed extra per request pinning from ring reset code (Alex Dai)
Added forced ring unpin/clean in error case in context free (Alex Dai)
v8: Renamed lrc specific last_context to lrc_last_context as there
were some reset cases where the codepaths leaked (Mika Kuoppala)
NULL'd last_context in reset case - there was a pointer leak
if someone did reset->close context.
v9: Rebase over "Fix context/engine cleanup order"
v10: Rebase over nightly, remove WARN_ON which caused the
dependency on dev.
v11: Kick BAT rerun

Signed-off-by: Nick Hoath 
Issue: VIZ-4277
Cc: Daniel Vetter 
Cc: David Gordon 
Cc: Chris Wilson 
Cc: Alex Dai 
Cc: Mika Kuoppala 
---
 drivers/gpu/drm/i915/i915_drv.h |   1 +
 drivers/gpu/drm/i915/i915_gem.c |   3 +
 drivers/gpu/drm/i915/intel_lrc.c| 138 ++--
 drivers/gpu/drm/i915/intel_lrc.h|   1 +
 drivers/gpu/drm/i915/intel_ringbuffer.h |   1 +
 5 files changed, 121 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 104bd18..d28e10a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -882,6 +882,7 @@ struct intel_context {
struct {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
+   bool dirty;
int pin_count;
} engine[I915_NUM_RINGS];
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ddc21d4..7b79405 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1413,6 +1413,9 @@ static void i915_gem_request_retire(struct 
drm_i915_gem_request *request)
 {
trace_i915_gem_request_retire(request);
 
+   if (i915.enable_execlists)
+   intel_lr_context_complete_check(request);
+
/* We know the GPU must have read the request to have
 * sent us the seqno + interrupt, so use the position
 * of tail of the request to update the last known position
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5027699..b661058 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -585,9 +585,6 @@ static int execlists_context_queue(struct 
drm_i915_gem_request *request)
struct drm_i915_gem_request *cursor;
int num_elements = 0;
 
-   if (request->ctx != ring->default_context)
-   intel_lr_context_pin(request);
-
i915_gem_request_reference(request);
 
spin_lock_irq(&ring->execlist_lock);
@@ -763,6 +760,13 @@ intel_logical_ring_advance_and_submit(struct 
drm_i915_gem_request *request)
if (intel_ring_stopped(ring))
return;
 
+   if (request->ctx != ring->default_context) {
+   if (!request->ctx->engine[ring->id].dirty) {
+   intel_lr_context_pin(request);
+   request->ctx->engine[ring->id].dirty = true;
+   }
+   }
+
if (dev_priv->guc.execbuf_client)
i915_guc_submit(dev_priv->guc.execbuf_client, request);
else
@@ -989,12 +993,6 @@ void intel_execlists_retire_requests(struct 
intel_engine_cs *ring)
spin_unlock_irq(&ring->execlist_lock);
 
list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
-   struct intel_context *ctx = req->ctx;
-   struct drm_i915_gem_object *ctx_obj =
-   ctx->engine[ring->id].state;
-
-   if (ctx_obj && (ctx != ring->default_context))

Re: [Intel-gfx] [PATCH 3/3] drm/i915: Fix premature LRC unpin in GuC mode

2016-01-20 Thread Nick Hoath

On 20/01/2016 14:06, Tvrtko Ursulin wrote:


On 20/01/16 13:55, Chris Wilson wrote:

On Wed, Jan 20, 2016 at 01:40:57PM +, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin 

In GuC mode LRC pinning lifetime depends exclusively on the
request liftime. Since that is terminated by the seqno update
that opens up a race condition between GPU finishing writing
out the context image and the driver unpinning the LRC.

To extend the LRC lifetime we will employ a similar approach
to what legacy ringbuffer submission does.

We will start tracking the last submitted context per engine
and keep it pinned until it is replaced by another one.

Note that the driver unload path is a bit fragile and could
benefit greatly from efforts to unify the legacy and exec
list submission code paths.

At the moment i915_gem_context_fini has special casing for the
two which are potentialy not needed, and also depends on
i915_gem_cleanup_ringbuffer running before itself.

Signed-off-by: Tvrtko Ursulin 
Issue: VIZ-4277
Cc: Chris Wilson 
Cc: Nick Hoath 
---
I cannot test this with GuC but it passes BAT with execlists
and some real world smoke tests.
---
   drivers/gpu/drm/i915/i915_gem_context.c | 4 +++-
   drivers/gpu/drm/i915/intel_lrc.c| 7 +++
   2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index c25083c78ba7..0b419e165836 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -438,7 +438,9 @@ void i915_gem_context_fini(struct drm_device *dev)
for (i = 0; i < I915_NUM_RINGS; i++) {
struct intel_engine_cs *ring = &dev_priv->ring[i];

-   if (ring->last_context)
+   if (ring->last_context && i915.enable_execlists)
+   intel_lr_context_unpin(ring->last_context, ring);
+   else if (ring->last_context)
i915_gem_context_unreference(ring->last_context);

ring->default_context = NULL;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5c3f57fed916..b8a7e126d6d2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -918,6 +918,7 @@ int intel_execlists_submission(struct 
i915_execbuffer_params *params,
struct intel_engine_cs  *ring = params->ring;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_ringbuffer *ringbuf = 
params->ctx->engine[ring->id].ringbuf;
+   struct intel_context*ctx = params->request->ctx;
u64 exec_start;
int instp_mode;
u32 instp_mask;
@@ -982,6 +983,12 @@ int intel_execlists_submission(struct 
i915_execbuffer_params *params,

trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);

+   if (ring->last_context && ring->last_context != ctx) {
+   intel_lr_context_unpin(ring->last_context, ring);
+   intel_lr_context_pin(ctx, ring);
+   ring->last_context = ctx;
+   }


I think this is the wrong location and should be part of submitting the
context inside the engine (because intel_execlists_submission should not
as it is entirely duplicating the common GEM batch submision code and
the unique part is engine->add_request()).


So into engine->emit_request you are saying? That works just as well
AFAICS, just making sure I understood correctly.


I think it should go in to intel_logical_ring_advance_and_submit. The 
extra pinning is being put in place to cover GPU usage of the pin. It 
should probably therefore go in to the last common place between 
execlists & GUC, as close to hardware submission as possible.





Note that it should be:

if (engine->last_context != request->ctx) {
if (engine->last_context)
intel_lr_context_unpin(engine->last_context, engine);
engine->last_context = request->ctx;
intel_lr_context_pin(engine->last_context, engine);
}


Ooops!

Regards,

Tvrtko



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/8] drm/i915/gen9: Add GEN8_CS_CHICKEN1 to HW whitelist

2016-01-21 Thread Nick Hoath

On 13/01/2016 10:06, Arun Siluvery wrote:

Required for WaEnablePreemptionGranularityControlByUMD:skl,bxt

Signed-off-by: Arun Siluvery 


Reviewed-by: Nick Hoath 


---
  drivers/gpu/drm/i915/i915_reg.h | 2 ++
  drivers/gpu/drm/i915/intel_ringbuffer.c | 6 ++
  2 files changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 6668bb0..1067ff0 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -5998,6 +5998,8 @@ enum skl_disp_power_wells {
  #define FF_SLICE_CS_CHICKEN2  _MMIO(0x20e4)
  #define  GEN9_TSG_BARRIER_ACK_DISABLE (1<<8)

+#define GEN8_CS_CHICKEN1   _MMIO(0x2580)
+
  /* GEN7 chicken */
  #define GEN7_COMMON_SLICE_CHICKEN1_MMIO(0x7010)
  # define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC((1<<10) | (1<<26))
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 354da81..35e78ed 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -909,6 +909,7 @@ static int gen9_init_workarounds(struct intel_engine_cs 
*ring)
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
uint32_t tmp;
+   int ret;

/* WaEnableLbsSlaRetryTimerDecrement:skl */
I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
@@ -979,6 +980,11 @@ static int gen9_init_workarounds(struct intel_engine_cs 
*ring)
/* WaDisableSTUnitPowerOptimization:skl,bxt */
WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);

+   /* WaEnablePreemptionGranularityControlByUMD:skl,bxt */
+   ret= wa_ring_whitelist_reg(ring, GEN8_CS_CHICKEN1);
+   if (ret)
+   return ret;
+
return 0;
  }




___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 3/8] drm/i915/gen9: Add HDC_CHICKEN1 to HW whitelist

2016-01-21 Thread Nick Hoath

On 13/01/2016 10:06, Arun Siluvery wrote:

Required for WaAllowUMDToModifyHDCChicken1:skl,bxt

Signed-off-by: Arun Siluvery 


Reviewed-by: Nick Hoath 


---
  drivers/gpu/drm/i915/i915_reg.h | 2 ++
  drivers/gpu/drm/i915/intel_ringbuffer.c | 5 +
  2 files changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 1067ff0..16ef377 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -6045,6 +6045,8 @@ enum skl_disp_power_wells {
  #define  HDC_FORCE_NON_COHERENT   (1<<4)
  #define  HDC_BARRIER_PERFORMANCE_DISABLE  (1<<10)

+#define GEN8_HDC_CHICKEN1  _MMIO(0x7304)
+
  /* GEN9 chicken */
  #define SLICE_ECO_CHICKEN0_MMIO(0x7308)
  #define   PIXEL_MASK_CAMMING_DISABLE  (1 << 14)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 35e78ed..2241a92 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -985,6 +985,11 @@ static int gen9_init_workarounds(struct intel_engine_cs 
*ring)
if (ret)
return ret;

+   /* WaAllowUMDToModifyHDCChicken1:skl,bxt */
+   ret = wa_ring_whitelist_reg(ring, GEN8_HDC_CHICKEN1);
+   if (ret)
+   return ret;
+
return 0;
  }




___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 4/8] drm/i915/bxt: Add GEN9_CS_DEBUG_MODE1 to HW whitelist

2016-01-21 Thread Nick Hoath

On 13/01/2016 10:06, Arun Siluvery wrote:

Required for,
WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt
WaDisableObjectLevelPreemptionForInstancedDraw:bxt
WaDisableObjectLevelPreemtionForInstanceId:bxt

According to WA database these are only applicable for BXT:A0 but since
A0 and A1 shares the same GT these are extended for A1 as well.

These are also required for SKL until B0 but not adding them because they
are pre-production steppings.

Signed-off-by: Arun Siluvery 
---
  drivers/gpu/drm/i915/i915_reg.h | 1 +
  drivers/gpu/drm/i915/intel_ringbuffer.c | 9 +
  2 files changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 16ef377..eabd2af 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -5998,6 +5998,7 @@ enum skl_disp_power_wells {
  #define FF_SLICE_CS_CHICKEN2  _MMIO(0x20e4)
  #define  GEN9_TSG_BARRIER_ACK_DISABLE (1<<8)

+#define GEN9_CS_DEBUG_MODE1_MMIO(0x20EC)


The pattern seems to be lc for hex (0x20ec)


  #define GEN8_CS_CHICKEN1  _MMIO(0x2580)

  /* GEN7 chicken */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 2241a92..7a46cf1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1132,6 +1132,15 @@ static int bxt_init_workarounds(struct intel_engine_cs 
*ring)
GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
}

+   /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */
+   /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */
+   /* WaDisableObjectLevelPreemtionForInstanceId:bxt */
+   if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
+   ret = wa_ring_whitelist_reg(ring, GEN9_CS_DEBUG_MODE1);
+   if (ret)
+   return ret;
+   }
+
return 0;
  }




___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 5/8] drm/i915/bxt: Add GEN8_L3SQCREG4 to HW whitelist

2016-01-21 Thread Nick Hoath

On 13/01/2016 10:06, Arun Siluvery wrote:

Required for WaDisableLSQCROPERFforOCL:bxt

According to WA database these are only applicable for BXT:A0 but since
A0 and A1 shares the same GT these are extended for A1 as well.

Signed-off-by: Arun Siluvery 


Reviewed-by: Nick Hoath 


---
  drivers/gpu/drm/i915/intel_ringbuffer.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 7a46cf1..5eb4eea 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1135,10 +1135,15 @@ static int bxt_init_workarounds(struct intel_engine_cs 
*ring)
/* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */
/* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */
/* WaDisableObjectLevelPreemtionForInstanceId:bxt */
+   /* WaDisableLSQCROPERFforOCL:bxt */
if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
ret = wa_ring_whitelist_reg(ring, GEN9_CS_DEBUG_MODE1);
if (ret)
return ret;
+
+   ret = wa_ring_whitelist_reg(ring, GEN8_L3SQCREG4);
+   if (ret)
+   return ret;
}

return 0;



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 6/8] drm/i915/skl: Add GEN8_L3SQCREG4 to HW whitelist

2016-01-21 Thread Nick Hoath

On 13/01/2016 10:06, Arun Siluvery wrote:

Required for WaDisableLSQCROPERFforOCL:skl

Signed-off-by: Arun Siluvery 


Reviewed-by: Nick Hoath 


---
  drivers/gpu/drm/i915/intel_ringbuffer.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 5eb4eea..b8dbd2c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1097,6 +1097,11 @@ static int skl_init_workarounds(struct intel_engine_cs 
*ring)
GEN7_HALF_SLICE_CHICKEN1,
GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);

+   /* WaDisableLSQCROPERFforOCL:skl */
+   ret = wa_ring_whitelist_reg(ring, GEN8_L3SQCREG4);
+   if (ret)
+   return ret;
+
return skl_tune_iz_hashing(ring);
  }




___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 7/8] drm/i915/skl: Enable Per context Preemption granularity control

2016-01-21 Thread Nick Hoath

On 13/01/2016 10:06, Arun Siluvery wrote:

Per context preemption granularity control is only available from SKL:E0+

Cc: Dave Gordon 
Signed-off-by: Arun Siluvery 
---
  drivers/gpu/drm/i915/i915_reg.h |  3 +++
  drivers/gpu/drm/i915/intel_ringbuffer.c | 10 ++
  2 files changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index eabd2af..97774a3 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -5995,6 +5995,9 @@ enum skl_disp_power_wells {
  #define SKL_DFSM_CDCLK_LIMIT_450  (2 << 23)
  #define SKL_DFSM_CDCLK_LIMIT_337_5(3 << 23)

+#define GEN7_FF_SLICE_CS_CHICKEN1  _MMIO(0x20E0)


0x20e0?


+#define   GEN9_FFSC_PERCTX_PREEMPT_CTRL(1<<14)
+
  #define FF_SLICE_CS_CHICKEN2  _MMIO(0x20e4)
  #define  GEN9_TSG_BARRIER_ACK_DISABLE (1<<8)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index b8dbd2c..5a2ad10 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1045,6 +1045,16 @@ static int skl_init_workarounds(struct intel_engine_cs 
*ring)
if (ret)
return ret;

+   /*
+* Actual WA is to disable percontext preemption granularity control
+* until D0 which is the default case so this is equivalent to
+* !WaDisablePerCtxtPreemptionGranularityControl:skl
+*/
+   if (IS_SKL_REVID(dev, SKL_REVID_E0, REVID_FOREVER)) {
+   I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
+  _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
+   }
+
if (IS_SKL_REVID(dev, 0, SKL_REVID_D0)) {
/* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */
I915_WRITE(FF_SLICE_CS_CHICKEN2,



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v2 7/8] drm/i915/skl: Enable Per context Preemption granularity control

2016-01-21 Thread Nick Hoath

On 21/01/2016 14:00, Arun Siluvery wrote:

Per context preemption granularity control is only available from SKL:E0+

Cc: Dave Gordon 
Signed-off-by: Arun Siluvery 


Reviewed-by: Nick Hoath 


---
  drivers/gpu/drm/i915/i915_reg.h |  3 +++
  drivers/gpu/drm/i915/intel_ringbuffer.c | 10 ++
  2 files changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index c51e7e9..65e32a3 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -5995,6 +5995,9 @@ enum skl_disp_power_wells {
  #define SKL_DFSM_CDCLK_LIMIT_450  (2 << 23)
  #define SKL_DFSM_CDCLK_LIMIT_337_5(3 << 23)

+#define GEN7_FF_SLICE_CS_CHICKEN1  _MMIO(0x20e0)
+#define   GEN9_FFSC_PERCTX_PREEMPT_CTRL(1<<14)
+
  #define FF_SLICE_CS_CHICKEN2  _MMIO(0x20e4)
  #define  GEN9_TSG_BARRIER_ACK_DISABLE (1<<8)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index ce64519..e91fb70 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1044,6 +1044,16 @@ static int skl_init_workarounds(struct intel_engine_cs 
*ring)
if (ret)
return ret;

+   /*
+* Actual WA is to disable percontext preemption granularity control
+* until D0 which is the default case so this is equivalent to
+* !WaDisablePerCtxtPreemptionGranularityControl:skl
+*/
+   if (IS_SKL_REVID(dev, SKL_REVID_E0, REVID_FOREVER)) {
+   I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
+  _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
+   }
+
if (IS_SKL_REVID(dev, 0, SKL_REVID_D0)) {
/* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */
I915_WRITE(FF_SLICE_CS_CHICKEN2,



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v2 8/8] drm/i915/gen9: Add WaOCLCoherentLineFlush

2016-01-21 Thread Nick Hoath

On 21/01/2016 14:00, Arun Siluvery wrote:

This is mainly required for preemption.

Cc: Dave Gordon 
Signed-off-by: Arun Siluvery 


Reviewed-by: Nick Hoath 


---
  drivers/gpu/drm/i915/intel_ringbuffer.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e91fb70..f26f274 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -979,6 +979,10 @@ static int gen9_init_workarounds(struct intel_engine_cs 
*ring)
/* WaDisableSTUnitPowerOptimization:skl,bxt */
WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);

+   /* WaOCLCoherentLineFlush:skl,bxt */
+   I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
+   GEN8_LQSC_FLUSH_COHERENT_LINES));
+
/* WaEnablePreemptionGranularityControlByUMD:skl,bxt */
ret= wa_ring_whitelist_reg(ring, GEN8_CS_CHICKEN1);
if (ret)



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v2 4/8] drm/i915/bxt: Add GEN9_CS_DEBUG_MODE1 to HW whitelist

2016-01-21 Thread Nick Hoath

On 21/01/2016 14:00, Arun Siluvery wrote:

Required for,
WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt
WaDisableObjectLevelPreemptionForInstancedDraw:bxt
WaDisableObjectLevelPreemtionForInstanceId:bxt

According to WA database these are only applicable for BXT:A0 but since
A0 and A1 shares the same GT these are extended for A1 as well.

These are also required for SKL until B0 but not adding them because they
are pre-production steppings.

v2: use lower case in register defines (Nick)

Signed-off-by: Arun Siluvery 


Reviewed-by: Nick Hoath 


---
  drivers/gpu/drm/i915/i915_reg.h | 1 +
  drivers/gpu/drm/i915/intel_ringbuffer.c | 9 +
  2 files changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index ed887cf..c51e7e9 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -5998,6 +5998,7 @@ enum skl_disp_power_wells {
  #define FF_SLICE_CS_CHICKEN2  _MMIO(0x20e4)
  #define  GEN9_TSG_BARRIER_ACK_DISABLE (1<<8)

+#define GEN9_CS_DEBUG_MODE1_MMIO(0x20ec)
  #define GEN8_CS_CHICKEN1  _MMIO(0x2580)

  /* GEN7 chicken */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index fea632f..72e89b6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1131,6 +1131,15 @@ static int bxt_init_workarounds(struct intel_engine_cs 
*ring)
GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
}

+   /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */
+   /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */
+   /* WaDisableObjectLevelPreemtionForInstanceId:bxt */
+   if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
+   ret = wa_ring_whitelist_reg(ring, GEN9_CS_DEBUG_MODE1);
+   if (ret)
+   return ret;
+   }
+
return 0;
  }




___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v12] drm/i915: Extend LRC pinning to cover GPU context writeback

2016-01-22 Thread Nick Hoath
Use the first retired request on a new context to unpin
the old context. This ensures that the hw context remains
bound until it has been written back to by the GPU.
Now that the context is pinned until later in the request/context
lifecycle, it no longer needs to be pinned from context_queue to
retire_requests.
This fixes an issue with GuC submission where the GPU might not
have finished writing back the context before it is unpinned. This
results in a GPU hang.

v2: Moved the new pin to cover GuC submission (Alex Dai)
Moved the new unpin to request_retire to fix coverage leak
v3: Added switch to default context if freeing a still pinned
context just in case the hw was actually still using it
v4: Unwrapped context unpin to allow calling without a request
v5: Only create a switch to idle context if the ring doesn't
already have a request pending on it (Alex Dai)
Rename unsaved to dirty to avoid double negatives (Dave Gordon)
Changed _no_req postfix to __ prefix for consistency (Dave Gordon)
Split out per engine cleanup from context_free as it
was getting unwieldy
Corrected locking (Dave Gordon)
v6: Removed some bikeshedding (Mika Kuoppala)
Added explanation of the GuC hang that this fixes (Daniel Vetter)
v7: Removed extra per request pinning from ring reset code (Alex Dai)
Added forced ring unpin/clean in error case in context free (Alex Dai)
v8: Renamed lrc specific last_context to lrc_last_context as there
were some reset cases where the codepaths leaked (Mika Kuoppala)
NULL'd last_context in reset case - there was a pointer leak
if someone did reset->close context.
v9: Rebase over "Fix context/engine cleanup order"
v10: Rebase over nightly, remove WARN_ON which caused the
dependency on dev.
v11: Kick BAT rerun
v12: Rebase

Signed-off-by: Nick Hoath 
Issue: VIZ-4277
---
 drivers/gpu/drm/i915/intel_lrc.c | 37 +++--
 1 file changed, 15 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index dbf3729..b469817 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -779,10 +779,10 @@ intel_logical_ring_advance_and_submit(struct 
drm_i915_gem_request *request)
if (intel_ring_stopped(request->ring))
return 0;
 
-   if (request->ctx != ring->default_context) {
-   if (!request->ctx->engine[ring->id].dirty) {
+   if (request->ctx != request->ctx->i915->kernel_context) {
+   if (!request->ctx->engine[request->ring->id].dirty) {
intel_lr_context_pin(request);
-   request->ctx->engine[ring->id].dirty = true;
+   request->ctx->engine[request->ring->id].dirty = true;
}
}
 
@@ -2447,9 +2447,7 @@ intel_lr_context_clean_ring(struct intel_context *ctx,
struct drm_i915_gem_object *ctx_obj,
struct intel_ringbuffer *ringbuf)
 {
-   int ret;
-
-   if (ctx == ring->default_context) {
+   if (ctx == ctx->i915->kernel_context) {
intel_unpin_ringbuffer_obj(ringbuf);
i915_gem_object_ggtt_unpin(ctx_obj);
}
@@ -2463,13 +2461,10 @@ intel_lr_context_clean_ring(struct intel_context *ctx,
 * otherwise create a switch to idle request
 */
if (list_empty(&ring->request_list)) {
-   int ret;
-
-   ret = i915_gem_request_alloc(
+   req = i915_gem_request_alloc(
ring,
-   ring->default_context,
-   &req);
-   if (!ret)
+   NULL);
+   if (!IS_ERR(req))
i915_add_request(req);
else
DRM_DEBUG("Failed to ensure context saved");
@@ -2479,6 +2474,8 @@ intel_lr_context_clean_ring(struct intel_context *ctx,
typeof(*req), list);
}
if (req) {
+   int ret;
+
ret = i915_wait_request(req);
if (ret != 0) {
/**
@@ -2515,17 +2512,13 @@ void intel_lr_context_free(struct intel_context *ctx)
struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf;
struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
 
-   if (!ctx_obj)
-   continue;
-
-   if (ctx == ctx->i915->kernel_context) {
-   intel_unpin_ringbuffer_obj(ringbuf);
-   i915

Re: [Intel-gfx] [PATCH v12] drm/i915: Extend LRC pinning to cover GPU context writeback

2016-01-26 Thread Nick Hoath

On 25/01/2016 18:19, Daniel Vetter wrote:

On Fri, Jan 22, 2016 at 02:25:27PM +, Nick Hoath wrote:

Use the first retired request on a new context to unpin
the old context. This ensures that the hw context remains
bound until it has been written back to by the GPU.
Now that the context is pinned until later in the request/context
lifecycle, it no longer needs to be pinned from context_queue to
retire_requests.
This fixes an issue with GuC submission where the GPU might not
have finished writing back the context before it is unpinned. This
results in a GPU hang.

v2: Moved the new pin to cover GuC submission (Alex Dai)
 Moved the new unpin to request_retire to fix coverage leak
v3: Added switch to default context if freeing a still pinned
 context just in case the hw was actually still using it
v4: Unwrapped context unpin to allow calling without a request
v5: Only create a switch to idle context if the ring doesn't
 already have a request pending on it (Alex Dai)
 Rename unsaved to dirty to avoid double negatives (Dave Gordon)
 Changed _no_req postfix to __ prefix for consistency (Dave Gordon)
 Split out per engine cleanup from context_free as it
 was getting unwieldy
 Corrected locking (Dave Gordon)
v6: Removed some bikeshedding (Mika Kuoppala)
 Added explanation of the GuC hang that this fixes (Daniel Vetter)
v7: Removed extra per request pinning from ring reset code (Alex Dai)
 Added forced ring unpin/clean in error case in context free (Alex Dai)
v8: Renamed lrc specific last_context to lrc_last_context as there
 were some reset cases where the codepaths leaked (Mika Kuoppala)
 NULL'd last_context in reset case - there was a pointer leak
 if someone did reset->close context.
v9: Rebase over "Fix context/engine cleanup order"
v10: Rebase over nightly, remove WARN_ON which caused the
 dependency on dev.
v11: Kick BAT rerun
v12: Rebase

Signed-off-by: Nick Hoath 
Issue: VIZ-4277


When resending patches, please include everyone who ever commented on this
in Cc: lines here. It's for the record and helps in assigning blame when
things inevitably blow up again ;-)


Even when it's just a resend to cause a BAT run for coverage?


-Daniel


---
  drivers/gpu/drm/i915/intel_lrc.c | 37 +++--
  1 file changed, 15 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index dbf3729..b469817 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -779,10 +779,10 @@ intel_logical_ring_advance_and_submit(struct 
drm_i915_gem_request *request)
if (intel_ring_stopped(request->ring))
return 0;

-   if (request->ctx != ring->default_context) {
-   if (!request->ctx->engine[ring->id].dirty) {
+   if (request->ctx != request->ctx->i915->kernel_context) {
+   if (!request->ctx->engine[request->ring->id].dirty) {
intel_lr_context_pin(request);
-   request->ctx->engine[ring->id].dirty = true;
+   request->ctx->engine[request->ring->id].dirty = true;
}
}

@@ -2447,9 +2447,7 @@ intel_lr_context_clean_ring(struct intel_context *ctx,
struct drm_i915_gem_object *ctx_obj,
struct intel_ringbuffer *ringbuf)
  {
-   int ret;
-
-   if (ctx == ring->default_context) {
+   if (ctx == ctx->i915->kernel_context) {
intel_unpin_ringbuffer_obj(ringbuf);
i915_gem_object_ggtt_unpin(ctx_obj);
}
@@ -2463,13 +2461,10 @@ intel_lr_context_clean_ring(struct intel_context *ctx,
 * otherwise create a switch to idle request
 */
if (list_empty(&ring->request_list)) {
-   int ret;
-
-   ret = i915_gem_request_alloc(
+   req = i915_gem_request_alloc(
ring,
-   ring->default_context,
-   &req);
-   if (!ret)
+   NULL);
+   if (!IS_ERR(req))
i915_add_request(req);
else
DRM_DEBUG("Failed to ensure context saved");
@@ -2479,6 +2474,8 @@ intel_lr_context_clean_ring(struct intel_context *ctx,
typeof(*req), list);
}
if (req) {
+   int ret;
+
ret = i915_wait_request(req);
if (ret != 0) {
/**
@@ -2515,17 +2512,13 @@ void intel_lr_context_free(struct intel_co

[Intel-gfx] [RFC] drm/i195: Add flag to enable virtual mappings above 4Gb

2015-03-18 Thread Nick Hoath
Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset hardware
workarounds require that GeneralStateOffset & InstructionBaseOffset
are restricted to a 32 bit address space.

This is a preparatory patch prior to supporting 64bit virtual memory
allocations.

Allow the user space to flag that a mapping can occur beyond
the 32bit limit. This allows backward compatibility and user space
drivers that haven't been enhanced to support these workarounds to
function.

Signed-off-by: Nick Hoath 
---
 drivers/gpu/drm/i915/i915_drv.h |  6 ++
 drivers/gpu/drm/i915/i915_gem.c | 18 +++---
 include/uapi/drm/i915_drm.h |  7 ++-
 3 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3cc0196..1e6fc1d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2066,6 +2066,12 @@ struct drm_i915_gem_object {
unsigned int has_dma_mapping:1;
 
unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
+
+   /**
+* If the object should be mapped in to the bottom 4Gb
+* memory space only, then this flag should not be set
+*/
+   unsigned int hi_mem:1;
 
struct sg_table *pages;
int pages_pin_count;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 61134ab..efa782c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -395,7 +395,9 @@ static int
 i915_gem_create(struct drm_file *file,
struct drm_device *dev,
uint64_t size,
-   uint32_t *handle_p)
+   uint32_t *handle_p,
+   uint32_t flags
+   )
 {
struct drm_i915_gem_object *obj;
int ret;
@@ -410,6 +412,9 @@ i915_gem_create(struct drm_file *file,
if (obj == NULL)
return -ENOMEM;
 
+   if (flags & I915_CREATE_FLAG_HI_MEM)
+   obj->hi_mem = 1;
+
ret = drm_gem_handle_create(file, &obj->base, &handle);
/* drop reference from allocate - handle holds it now */
drm_gem_object_unreference_unlocked(&obj->base);
@@ -429,7 +434,8 @@ i915_gem_dumb_create(struct drm_file *file,
args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
args->size = args->pitch * args->height;
return i915_gem_create(file, dev,
-  args->size, &args->handle);
+  args->size, &args->handle,
+  I915_CREATE_FLAG_HI_MEM);
 }
 
 /**
@@ -440,9 +446,10 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
  struct drm_file *file)
 {
struct drm_i915_gem_create *args = data;
 
return i915_gem_create(file, dev,
-  args->size, &args->handle);
+  args->size, &args->handle,
+  args->flags);
 }
 
 static inline int
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 6eed16b..eb2e7d9 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -428,6 +428,8 @@ struct drm_i915_gem_init {
__u64 gtt_end;
 };
 
+#define I915_CREATE_FLAG_HI_MEM0x0001
+
 struct drm_i915_gem_create {
/**
 * Requested size for the object.
@@ -441,7 +443,10 @@ struct drm_i915_gem_create {
 * Object handles are nonzero.
 */
__u32 handle;
-   __u32 pad;
+   /**
+* Object creation flags
+*/
+   __u32 flags;
 };
 
 struct drm_i915_gem_pread {
-- 
2.1.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 12/49] drm/i915/bxt: HardWare WorkAround ring initialisation for Broxton

2015-03-19 Thread Nick Hoath

On 17/03/2015 09:39, Imre Deak wrote:

From: Nick Hoath 

Adds framework for Broxton HW WAs

Signed-off-by: Nick Hoath 
Signed-off-by: Imre Deak 


Reviewed-by: Nick Hoath 


---
  drivers/gpu/drm/i915/intel_ringbuffer.c | 12 ++--
  1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 441e250..abe062a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1027,6 +1027,13 @@ static int skl_init_workarounds(struct intel_engine_cs 
*ring)
return skl_tune_iz_hashing(ring);
  }

+static int bxt_init_workarounds(struct intel_engine_cs *ring)
+{
+   gen9_init_workarounds(ring);
+
+   return 0;
+}
+
  int init_workarounds_ring(struct intel_engine_cs *ring)
  {
struct drm_device *dev = ring->dev;
@@ -1044,8 +1051,9 @@ int init_workarounds_ring(struct intel_engine_cs *ring)

if (IS_SKYLAKE(dev))
return skl_init_workarounds(ring);
-   else if (IS_GEN9(dev))
-   return gen9_init_workarounds(ring);
+
+   if (IS_BROXTON(dev))
+   return bxt_init_workarounds(ring);

return 0;
  }



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 13/49] drm/i915/bxt: add bxt_init_clock_gating

2015-03-19 Thread Nick Hoath

On 17/03/2015 09:39, Imre Deak wrote:

Signed-off-by: Imre Deak 
---
  drivers/gpu/drm/i915/intel_pm.c | 12 +++-
  1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index b89ab4d..3d4a7c3 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -94,6 +94,11 @@ static void skl_init_clock_gating(struct drm_device *dev)
   GEN8_LQSC_RO_PERF_DIS);
  }

+static void bxt_init_clock_gating(struct drm_device *dev)
+{
+   gen9_init_clock_gating(dev);
+}
+
  static void i915_pineview_get_mem_freq(struct drm_device *dev)
  {
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -6503,7 +6508,12 @@ void intel_init_pm(struct drm_device *dev)
if (INTEL_INFO(dev)->gen >= 9) {
skl_setup_wm_latency(dev);

-   dev_priv->display.init_clock_gating = skl_init_clock_gating;
+   if (IS_BROXTON(dev))
+   dev_priv->display.init_clock_gating =
+   bxt_init_clock_gating;
+   else
+   dev_priv->display.init_clock_gating =
+   skl_init_clock_gating;


This doesn't match the style in: "HardWare WorkAround ring 
initialisation for Broxton", where we explicitly check the IS_BROXTON 
and IS_SKYLAKE state.



dev_priv->display.update_wm = skl_update_wm;
dev_priv->display.update_sprite_wm = skl_update_sprite_wm;
} else if (HAS_PCH_SPLIT(dev)) {



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915/bxt: Add Broxton steppings

2015-03-20 Thread Nick Hoath
Signed-off-by: Nick Hoath 
---
 drivers/gpu/drm/i915/i915_drv.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index eb38cd1..eec271a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2328,6 +2328,10 @@ struct drm_i915_cmd_table {
 #define SKL_REVID_D0   (0x3)
 #define SKL_REVID_E0   (0x4)
 
+#define BXT_REVID_A0   (0x0)
+#define BXT_REVID_B0   (0x3)
+#define BXT_REVID_C0   (0x6)
+
 /*
  * The genX designation typically refers to the render engine, so render
  * capability related checks should use IS_GEN, while display and other checks
-- 
2.1.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 16/49] drm/i915/bxt: add WaDisableMaskBasedCammingInRCC workaround

2015-03-20 Thread Nick Hoath

On 17/03/2015 09:39, Imre Deak wrote:

From: Ben Widawsky 

Signed-off-by: Ben Widawsky 
Signed-off-by: Imre Deak 
---
  drivers/gpu/drm/i915/i915_reg.h | 4 
  drivers/gpu/drm/i915/intel_ringbuffer.c | 9 +
  2 files changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b7ba061..1d074e8 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -5346,6 +5346,10 @@ enum skl_disp_power_wells {
  #define  HDC_FORCE_NON_COHERENT   (1<<4)
  #define  HDC_BARRIER_PERFORMANCE_DISABLE  (1<<10)

+/* GEN9 chicken */
+#define SLICE_ECO_CHICKEN0 0x7308
+#define   PIXEL_MASK_CAMMING_DISABLE   (1 << 14)
+
  /* WaCatErrorRejectionIssue */
  #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG0x9030
  #define  GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB (1<<11)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index abe062a..e23cbdc 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -966,6 +966,15 @@ static int gen9_init_workarounds(struct intel_engine_cs 
*ring)
WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
  GEN9_CCS_TLB_PREFETCH_ENABLE);

+   /*
+* FIXME: don't apply the following on BXT for stepping C. On BXT A0
+* the flag reads back as 0.
+*/


I've just posted a patch with the stepping macros. You can use these in 
the same way as for Skylake.



+   /* WaDisableMaskBasedCammingInRCC:bxtA */
+   if (IS_BROXTON(dev))
+   WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
+ PIXEL_MASK_CAMMING_DISABLE);
+
return 0;
  }




___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 17/49] drm/i915/skl: add WaDisableMaskBasedCammingInRCC workaround

2015-03-20 Thread Nick Hoath

On 17/03/2015 09:39, Imre Deak wrote:

From: Ben Widawsky 

Signed-off-by: Ben Widawsky 
Signed-off-by: Imre Deak 
---
  drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e23cbdc..000f608 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -970,8 +970,8 @@ static int gen9_init_workarounds(struct intel_engine_cs 
*ring)
 * FIXME: don't apply the following on BXT for stepping C. On BXT A0
 * the flag reads back as 0.
 */
-   /* WaDisableMaskBasedCammingInRCC:bxtA */
-   if (IS_BROXTON(dev))
+   /* WaDisableMaskBasedCammingInRCC:sklC,bxtA */
+   if (INTEL_REVID(dev) == SKL_REVID_C0 || IS_BROXTON(dev))
This looks wrong. (IS_BROXTON && BXT_REVID_C0) || (IS_SKYLAKE && 
SKL_REVID_C0) please.

WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
  PIXEL_MASK_CAMMING_DISABLE);




___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 14/49] drm/i915/bxt: add GEN8_SDEUNIT_CLOCK_GATE_DISABLE workaround

2015-03-20 Thread Nick Hoath

On 17/03/2015 13:06, Imre Deak wrote:

On ti, 2015-03-17 at 11:35 +0100, Daniel Vetter wrote:

On Tue, Mar 17, 2015 at 11:39:40AM +0200, Imre Deak wrote:

Signed-off-by: Imre Deak 
---
  drivers/gpu/drm/i915/intel_pm.c | 11 +++
  1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 3d4a7c3..d5dd0b3 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -96,7 +96,18 @@ static void skl_init_clock_gating(struct drm_device *dev)

  static void bxt_init_clock_gating(struct drm_device *dev)
  {
+   struct drm_i915_private *dev_priv = dev->dev_private;
+
gen9_init_clock_gating(dev);
+
+   /*
+* FIXME:
+* GEN8_SDEUNIT_CLOCK_GATE_DISABLE applies on A0 only.


We have pci revid macros now. Do you have plans to roll similar ones out
for bxt?


Yes. It may be that for BXT we also need to look at the PCI_REVISION_ID
field besides PCI_CLASS_REVISION, I still have to figure out the exact
mapping. (And also understand the meaning/difference between SOC vs. GT
revision IDs).


I've posted a patch with the Broxton revision ID's from the specs.



--Imre

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i195/bxt: Add A1 stepping for Broxton

2015-03-20 Thread Nick Hoath
This stepping isn't listed separately in the specs, so needs confirmation.

Signed-off-by: Nick Hoath 
---
 drivers/gpu/drm/i915/i915_drv.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index eec271a..68fb41a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2329,6 +2329,7 @@ struct drm_i915_cmd_table {
 #define SKL_REVID_E0   (0x4)
 
 #define BXT_REVID_A0   (0x0)
+#define BXT_REVID_A1   (0x1)
 #define BXT_REVID_B0   (0x3)
 #define BXT_REVID_C0   (0x6)
 
-- 
2.1.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 16/49] drm/i915/bxt: add WaDisableMaskBasedCammingInRCC workaround

2015-03-25 Thread Nick Hoath

On 20/03/2015 10:25, Deak, Imre wrote:

On Fri, 2015-03-20 at 09:05 +, Nick Hoath wrote:

On 17/03/2015 09:39, Imre Deak wrote:

From: Ben Widawsky 

Signed-off-by: Ben Widawsky 
Signed-off-by: Imre Deak 
---
   drivers/gpu/drm/i915/i915_reg.h | 4 
   drivers/gpu/drm/i915/intel_ringbuffer.c | 9 +
   2 files changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b7ba061..1d074e8 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -5346,6 +5346,10 @@ enum skl_disp_power_wells {
   #define  HDC_FORCE_NON_COHERENT  (1<<4)
   #define  HDC_BARRIER_PERFORMANCE_DISABLE (1<<10)

+/* GEN9 chicken */
+#define SLICE_ECO_CHICKEN0 0x7308
+#define   PIXEL_MASK_CAMMING_DISABLE   (1 << 14)
+
   /* WaCatErrorRejectionIssue */
   #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG   0x9030
   #define  GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB(1<<11)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index abe062a..e23cbdc 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -966,6 +966,15 @@ static int gen9_init_workarounds(struct intel_engine_cs 
*ring)
WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
  GEN9_CCS_TLB_PREFETCH_ENABLE);

+   /*
+* FIXME: don't apply the following on BXT for stepping C. On BXT A0
+* the flag reads back as 0.
+*/


I've just posted a patch with the stepping macros. You can use these in
the same way as for Skylake.


I'm not so happy to make these changes at this point. Without them we
still have a correct - even if conservative - behavior on other
steppings. There are quite a few places marked with FIXME that need
improvement in a similar way and I'd leave them as-is for now to keep as
close as possible to the good known working state (as of the power-on)
and to make merging of this initial patchset fast.



In that case:
Reviewed-by: Nick Hoath 




+   /* WaDisableMaskBasedCammingInRCC:bxtA */
+   if (IS_BROXTON(dev))
+   WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
+ PIXEL_MASK_CAMMING_DISABLE);
+
return 0;
   }









___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 14/49] drm/i915/bxt: add GEN8_SDEUNIT_CLOCK_GATE_DISABLE workaround

2015-03-25 Thread Nick Hoath

On 20/03/2015 10:37, Deak, Imre wrote:

On Fri, 2015-03-20 at 09:08 +, Nick Hoath wrote:

On 17/03/2015 13:06, Imre Deak wrote:

On ti, 2015-03-17 at 11:35 +0100, Daniel Vetter wrote:

On Tue, Mar 17, 2015 at 11:39:40AM +0200, Imre Deak wrote:

Signed-off-by: Imre Deak 
---
   drivers/gpu/drm/i915/intel_pm.c | 11 +++
   1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 3d4a7c3..d5dd0b3 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -96,7 +96,18 @@ static void skl_init_clock_gating(struct drm_device *dev)

   static void bxt_init_clock_gating(struct drm_device *dev)
   {
+   struct drm_i915_private *dev_priv = dev->dev_private;
+
gen9_init_clock_gating(dev);
+
+   /*
+* FIXME:
+* GEN8_SDEUNIT_CLOCK_GATE_DISABLE applies on A0 only.


We have pci revid macros now. Do you have plans to roll similar ones out
for bxt?


Yes. It may be that for BXT we also need to look at the PCI_REVISION_ID
field besides PCI_CLASS_REVISION, I still have to figure out the exact
mapping. (And also understand the meaning/difference between SOC vs. GT
revision IDs).


Ok, the above is red herring. PCI_REVISION_ID is just the 8 low bits of
PCI_CLASS_REVISION, so we can reuse INTEL_REVID as-is.


I've posted a patch with the Broxton revision ID's from the specs.


It looks ok, but I prefer adding them as a follow-up to this patchset.



Reviewed-by: Nick Hoath 

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915/bxt: Enable existing gen9 harware workarounds for Broxton

2015-04-07 Thread Nick Hoath
Signed-off-by: Nick Hoath 
---
 drivers/gpu/drm/i915/i915_reg.h |  8 +
 drivers/gpu/drm/i915/intel_pm.c |  2 ++
 drivers/gpu/drm/i915/intel_ringbuffer.c | 53 +++--
 3 files changed, 41 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 7e1a0fd9..91eef06 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -5339,6 +5339,7 @@ enum skl_disp_power_wells {
 
 #define GEN8_L3SQCREG4 0xb118
 #define  GEN8_LQSC_RO_PERF_DIS (1<<27)
+#define  GEN8_PIPELINE_FLUSH_COHERENT_LINES(1<<21)
 
 /* GEN8 chicken */
 #define HDC_CHICKEN0   0x7300
@@ -7324,4 +7325,11 @@ enum skl_disp_power_wells {
 #define _PALETTE_A (dev_priv->info.display_mmio_offset + 0xa000)
 #define _PALETTE_B (dev_priv->info.display_mmio_offset + 0xa800)
 
+/*
+ * Chicken Registers for LLC/eLLC Hot Spotting Avoidance Mode for
+ * 3D/Media Compressed Resources
+ */
+#define GEN9_CHICKEN_MISC1_REG 0x42080
+#define GEN9_CHICKEN_MISC1_NEW_HASH_ENABLE (1<<15)
+
 #endif /* _I915_REG_H_ */
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index c3c473d..bbb5d64 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -96,6 +96,8 @@ static void skl_init_clock_gating(struct drm_device *dev)
 
 static void bxt_init_clock_gating(struct drm_device *dev)
 {
+   struct drm_i915_private *dev_priv = dev->dev_private;
+
gen9_init_clock_gating(dev);
 
/* WaVSRefCountFullforceMissDisable:bxt */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 44c7b99..741bdfa 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -916,7 +916,7 @@ static int gen9_init_workarounds(struct intel_engine_cs 
*ring)
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
 
-   /* WaDisablePartialInstShootdown:skl */
+   /* WaDisablePartialInstShootdown:skl,bxt */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 
@@ -924,45 +924,43 @@ static int gen9_init_workarounds(struct intel_engine_cs 
*ring)
WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
  GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
 
-   if (INTEL_REVID(dev) == SKL_REVID_A0 ||
-   INTEL_REVID(dev) == SKL_REVID_B0) {
-   /* WaDisableDgMirrorFixInHalfSliceChicken5:skl */
+   if (
+   (IS_SKYLAKE(dev) && (INTEL_REVID(dev) == SKL_REVID_A0 ||
+   INTEL_REVID(dev) == SKL_REVID_B0)) ||
+   (IS_BROXTON(dev) && INTEL_REVID(dev) == BXT_REVID_A0)
+   ) {
+   /* WaDisableDgMirrorFixInHalfSliceChicken5:skl,bxt */
WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
  GEN9_DG_MIRROR_FIX_ENABLE);
}
 
-   if (IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) {
-   /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl */
+   if (
+   (IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) ||
+   (IS_BROXTON(dev) && INTEL_REVID(dev) == BXT_REVID_A0)
+  ) {
+   /* 
WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,
  GEN9_RHWO_OPTIMIZATION_DISABLE);
WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN0,
  DISABLE_PIXEL_MASK_CAMMING);
}
 
-   if (INTEL_REVID(dev) >= SKL_REVID_C0) {
-   /* WaEnableYV12BugFixInHalfSliceChicken7:skl */
+   if (
+   (IS_SKYLAKE(dev) && INTEL_REVID(dev) >= SKL_REVID_C0) ||
+   (IS_BROXTON(dev))
+  ) {
+   /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */
WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
  GEN9_ENABLE_YV12_BUGFIX);
}
 
-   if (INTEL_REVID(dev) <= SKL_REVID_D0) {
-   /*
-*Use Force Non-Coherent whenever executing a 3D context. This
-* is a workaround for a possible hang in the unlikely event
-* a TLB invalidation occurs during a PSD flush.
-*/
-   /* WaForceEnableNonCoherent:skl */
-   WA_SET_BIT_MASKED(HDC_CHICKEN0,
- HDC_FORCE_NON_COHERENT);
-   }
-
-   /* Wa4x4STCOptimizationDisable:skl */
+   /* Wa4x4STCOptimizationDisable:skl,bxt */
WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
 
-   /* WaDisablePartialResolveInVc

Re: [Intel-gfx] [PATCH v2 13/49] drm/i915/bxt: add bxt_init_clock_gating

2015-04-08 Thread Nick Hoath

On 27/03/2015 12:00, Deak, Imre wrote:

v2:
- Make the condition to select between SKL and BXT consistent with the
   corresponding condition in init_workarounds_ring (Nick)



Reviewed-by: Nick Hoath 


Signed-off-by: Imre Deak 
---
  drivers/gpu/drm/i915/intel_pm.c | 12 +++-
  1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index c52f8b7..8a8d52a 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -94,6 +94,11 @@ static void skl_init_clock_gating(struct drm_device *dev)
   GEN8_LQSC_RO_PERF_DIS);
  }

+static void bxt_init_clock_gating(struct drm_device *dev)
+{
+   gen9_init_clock_gating(dev);
+}
+
  static void i915_pineview_get_mem_freq(struct drm_device *dev)
  {
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -6548,7 +6553,12 @@ void intel_init_pm(struct drm_device *dev)
if (INTEL_INFO(dev)->gen >= 9) {
skl_setup_wm_latency(dev);

-   dev_priv->display.init_clock_gating = skl_init_clock_gating;
+   if (IS_BROXTON(dev))
+   dev_priv->display.init_clock_gating =
+   bxt_init_clock_gating;
+   else if (IS_SKYLAKE(dev))
+   dev_priv->display.init_clock_gating =
+   skl_init_clock_gating;
dev_priv->display.update_wm = skl_update_wm;
dev_priv->display.update_sprite_wm = skl_update_sprite_wm;
} else if (HAS_PCH_SPLIT(dev)) {



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 11/49] drm/i915/gen9: fix PIPE_CONTROL flush for VS_INVALIDATE

2015-04-08 Thread Nick Hoath

On 17/03/2015 09:39, Imre Deak wrote:

On GEN9+ per specification a NULL PIPE_CONTROL needs to be emitted
before any PIPE_CONTROL command with the VS_INVALIDATE flag set.

Signed-off-by: Imre Deak 


Reviewed-by: Nick Hoath 


---
  drivers/gpu/drm/i915/intel_lrc.c | 19 ++-
  1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index fcb074b..71aeeb3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1262,6 +1262,7 @@ static int gen8_emit_flush_render(struct intel_ringbuffer 
*ringbuf,
  {
struct intel_engine_cs *ring = ringbuf->ring;
u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+   bool vf_flush_wa;
u32 flags = 0;
int ret;

@@ -1283,10 +1284,26 @@ static int gen8_emit_flush_render(struct 
intel_ringbuffer *ringbuf,
flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
}

-   ret = intel_logical_ring_begin(ringbuf, ctx, 6);
+   /*
+* On GEN9+ Before VF_CACHE_INVALIDATE we need to emit a NULL pipe
+* control.
+*/
+   vf_flush_wa = INTEL_INFO(ring->dev)->gen >= 9 &&
+ flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
+
+   ret = intel_logical_ring_begin(ringbuf, ctx, vf_flush_wa ? 12 : 6);
if (ret)
return ret;

+   if (vf_flush_wa) {
+   intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
+   intel_logical_ring_emit(ringbuf, 0);
+   intel_logical_ring_emit(ringbuf, 0);
+   intel_logical_ring_emit(ringbuf, 0);
+   intel_logical_ring_emit(ringbuf, 0);
+   intel_logical_ring_emit(ringbuf, 0);
+   }
+
intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
intel_logical_ring_emit(ringbuf, flags);
intel_logical_ring_emit(ringbuf, scratch_addr);



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 15/49] drm/i915/bxt: add GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ workaround

2015-04-08 Thread Nick Hoath

On 17/03/2015 09:39, Imre Deak wrote:

From: Ben Widawsky 

Signed-off-by: Ben Widawsky 
Signed-off-by: Imre Deak 
---
  drivers/gpu/drm/i915/i915_reg.h | 1 +
  drivers/gpu/drm/i915/intel_pm.c | 4 +++-
  2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 3369a11..b7ba061 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -6104,6 +6104,7 @@ enum skl_disp_power_wells {
  #define GEN8_UCGCTL6  0x9430
  #define   GEN8_GAPSUNIT_CLOCK_GATE_DISABLE(1<<24)
  #define   GEN8_SDEUNIT_CLOCK_GATE_DISABLE (1<<14)
+#define   GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ (1<<28)

  #define GEN6_GFXPAUSE 0xA000
  #define GEN6_RPNSWREQ 0xA008
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index d5dd0b3..52d3c02 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -103,10 +103,12 @@ static void bxt_init_clock_gating(struct drm_device *dev)
/*
 * FIXME:
 * GEN8_SDEUNIT_CLOCK_GATE_DISABLE applies on A0 only.
+* GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
 */
 /* WaDisableSDEUnitClockGating:bxt */


I can't find where WaDisableSDEUnitClockGating is listed as required for 
BXT?



I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
-  GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+  GEN8_SDEUNIT_CLOCK_GATE_DISABLE |
+  GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);

  }




___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 18/49] drm/i915/bxt: add workaround to avoid PTE corruption

2015-04-08 Thread Nick Hoath

On 17/03/2015 09:39, Imre Deak wrote:

From: Robert Beckett 

Set TLBPF in TILECTL. This fixes an issue with BXT HW seeing
corrupted pte entries.

v2:
- move the workaround to bxt_init_clock_gating (imre)

Signed-off-by: Robert Beckett  (v1)
Signed-off-by: Imre Deak 


Reviewed-by: Nick Hoath 


---
  drivers/gpu/drm/i915/i915_reg.h | 1 +
  drivers/gpu/drm/i915/intel_pm.c | 2 ++
  2 files changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 1d074e8..d69d7b9 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1151,6 +1151,7 @@ enum skl_disp_power_wells {
  /* control register for cpu gtt access */
  #define TILECTL   0x101000
  #define   TILECTL_SWZCTL  (1 << 0)
+#define   TILECTL_TLBPF(1 << 1)
  #define   TILECTL_TLB_PREFETCH_DIS(1 << 2)
  #define   TILECTL_BACKSNOOP_DIS   (1 << 3)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 52d3c02..d3f2557 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -110,6 +110,8 @@ static void bxt_init_clock_gating(struct drm_device *dev)
   GEN8_SDEUNIT_CLOCK_GATE_DISABLE |
   GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);

+   /* FIXME: apply on A0 only */
+   I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
  }

  static void i915_pineview_get_mem_freq(struct drm_device *dev)



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 15/49] drm/i915/bxt: add GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ workaround

2015-04-08 Thread Nick Hoath

On 08/04/2015 14:10, Deak, Imre wrote:

On ke, 2015-04-08 at 14:04 +0100, Nick Hoath wrote:

On 17/03/2015 09:39, Imre Deak wrote:

From: Ben Widawsky 

Signed-off-by: Ben Widawsky 
Signed-off-by: Imre Deak 
---
   drivers/gpu/drm/i915/i915_reg.h | 1 +
   drivers/gpu/drm/i915/intel_pm.c | 4 +++-
   2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 3369a11..b7ba061 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -6104,6 +6104,7 @@ enum skl_disp_power_wells {
   #define GEN8_UCGCTL6 0x9430
   #define   GEN8_GAPSUNIT_CLOCK_GATE_DISABLE   (1<<24)
   #define   GEN8_SDEUNIT_CLOCK_GATE_DISABLE(1<<14)
+#define   GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ (1<<28)

   #define GEN6_GFXPAUSE0xA000
   #define GEN6_RPNSWREQ0xA008
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index d5dd0b3..52d3c02 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -103,10 +103,12 @@ static void bxt_init_clock_gating(struct drm_device *dev)
/*
 * FIXME:
 * GEN8_SDEUNIT_CLOCK_GATE_DISABLE applies on A0 only.
+* GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.


Shouldn't this WA therefore have a check for 3x6 around it?


 */
 /* WaDisableSDEUnitClockGating:bxt */


I can't find where WaDisableSDEUnitClockGating is listed as required for
BXT?


It's specified in BSpec GEN8_UCGCTL6 (0x9430) as required for BXT A0.

--Imre



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 17/49] drm/i915/skl: add WaDisableMaskBasedCammingInRCC workaround

2015-04-08 Thread Nick Hoath

On 20/03/2015 10:33, Deak, Imre wrote:

On Fri, 2015-03-20 at 09:07 +, Nick Hoath wrote:

On 17/03/2015 09:39, Imre Deak wrote:

From: Ben Widawsky 

Signed-off-by: Ben Widawsky 
Signed-off-by: Imre Deak 


Bearing in mind having to revisit all these with the stepping checks:
Reviewed-by: Nick Hoath 


---
   drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++--
   1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e23cbdc..000f608 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -970,8 +970,8 @@ static int gen9_init_workarounds(struct intel_engine_cs 
*ring)
 * FIXME: don't apply the following on BXT for stepping C. On BXT A0
 * the flag reads back as 0.
 */
-   /* WaDisableMaskBasedCammingInRCC:bxtA */
-   if (IS_BROXTON(dev))
+   /* WaDisableMaskBasedCammingInRCC:sklC,bxtA */
+   if (INTEL_REVID(dev) == SKL_REVID_C0 || IS_BROXTON(dev))

This looks wrong. (IS_BROXTON && BXT_REVID_C0) || (IS_SKYLAKE &&
SKL_REVID_C0) please.


It's correct though. gen9_init_workarounds() is called for Skylake or
Broxton, so the condition is true either on Broxton regardless of the
stepping, or on Skylake if the revid matches.

Also on Broxton we have to _exclude_ the workaround on C0, so if we add
the revid check for Broxton too, then we have to rewrite the condition
to:

(IS_BROXTON && INTEL_REVID != BXT_REVID_C0) || (IS_SKYLAKE &&
INTEL_REVID == SKL_REVID_C0)


WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
  PIXEL_MASK_CAMMING_DISABLE);









___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 15/49] drm/i915/bxt: add GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ workaround

2015-04-08 Thread Nick Hoath

On 08/04/2015 14:38, Nick Hoath wrote:

On 08/04/2015 14:10, Deak, Imre wrote:

On ke, 2015-04-08 at 14:04 +0100, Nick Hoath wrote:

On 17/03/2015 09:39, Imre Deak wrote:

From: Ben Widawsky 

Signed-off-by: Ben Widawsky 
Signed-off-by: Imre Deak 


Reviewed-by: Nick Hoath 


---
   drivers/gpu/drm/i915/i915_reg.h | 1 +
   drivers/gpu/drm/i915/intel_pm.c | 4 +++-
   2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h
b/drivers/gpu/drm/i915/i915_reg.h
index 3369a11..b7ba061 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -6104,6 +6104,7 @@ enum skl_disp_power_wells {
   #define GEN8_UCGCTL60x9430
   #define   GEN8_GAPSUNIT_CLOCK_GATE_DISABLE(1<<24)
   #define   GEN8_SDEUNIT_CLOCK_GATE_DISABLE(1<<14)
+#define   GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ (1<<28)

   #define GEN6_GFXPAUSE0xA000
   #define GEN6_RPNSWREQ0xA008
diff --git a/drivers/gpu/drm/i915/intel_pm.c
b/drivers/gpu/drm/i915/intel_pm.c
index d5dd0b3..52d3c02 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -103,10 +103,12 @@ static void bxt_init_clock_gating(struct
drm_device *dev)
   /*
* FIXME:
* GEN8_SDEUNIT_CLOCK_GATE_DISABLE applies on A0 only.
+ * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT
SKUs only.


Shouldn't this WA therefore have a check for 3x6 around it?


*/
/* WaDisableSDEUnitClockGating:bxt */


I can't find where WaDisableSDEUnitClockGating is listed as required for
BXT?


It's specified in BSpec GEN8_UCGCTL6 (0x9430) as required for BXT A0.

--Imre





___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 2/3] Removed duplicate members from submit_request

2014-11-11 Thread Nick Hoath
Where there were duplicate variables for the tail, context and ring (engine)
in the gem request and the execlist queue item, use the one from the request
and remove the duplicate from the execlist queue item.

Issue: VIZ-4274
Signed-off-by: Nick Hoath 
---
 drivers/gpu/drm/i915/i915_debugfs.c |  4 ++--
 drivers/gpu/drm/i915/i915_gem.c |  2 +-
 drivers/gpu/drm/i915/intel_lrc.c| 21 +
 drivers/gpu/drm/i915/intel_lrc.h|  4 
 4 files changed, 12 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 0d6af1c..45da79e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1891,11 +1891,11 @@ static int i915_execlists(struct seq_file *m, void 
*data)
if (head_req) {
struct drm_i915_gem_object *ctx_obj;
 
-   ctx_obj = head_req->ctx->engine[ring_id].state;
+   ctx_obj = head_req->request->ctx->engine[ring_id].state;
seq_printf(m, "\tHead request id: %u\n",
   intel_execlists_ctx_id(ctx_obj));
seq_printf(m, "\tHead request tail: %u\n",
-  head_req->tail);
+  head_req->request->tail);
}
 
seq_putc(m, '\n');
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b8e7018..f55bfdc 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2623,7 +2623,7 @@ static void i915_gem_reset_ring_cleanup(struct 
drm_i915_private *dev_priv,
execlist_link);
list_del(&submit_req->execlist_link);
intel_runtime_pm_put(dev_priv);
-   i915_gem_context_unreference(submit_req->ctx);
+   i915_gem_context_unreference(submit_req->request->ctx);
kfree(submit_req);
}
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8f301ac..3daf8ea 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -396,7 +396,7 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *ring)
 execlist_link) {
if (!req0) {
req0 = cursor;
-   } else if (req0->ctx == cursor->ctx) {
+   } else if (req0->request->ctx == cursor->request->ctx) {
/* Same ctx: ignore first request, as second request
 * will update tail past first request's workload */
cursor->elsp_submitted = req0->elsp_submitted;
@@ -411,9 +411,9 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *ring)
 
WARN_ON(req1 && req1->elsp_submitted);
 
-   execlists_submit_contexts(ring, req0->ctx, req0->tail,
- req1 ? req1->ctx : NULL,
- req1 ? req1->tail : 0);
+   execlists_submit_contexts(ring, req0->request->ctx, req0->request->tail,
+ req1 ? req1->request->ctx : NULL,
+ req1 ? req1->request->tail : 0);
 
req0->elsp_submitted++;
if (req1)
@@ -434,7 +434,7 @@ static bool execlists_check_remove_request(struct 
intel_engine_cs *ring,
 
if (head_req != NULL) {
struct drm_i915_gem_object *ctx_obj =
-   head_req->ctx->engine[ring->id].state;
+   head_req->request->ctx->engine[ring->id].state;
if (intel_execlists_ctx_id(ctx_obj) == request_id) {
WARN(head_req->elsp_submitted == 0,
 "Never submitted head request\n");
@@ -514,13 +514,13 @@ static void execlists_free_request_task(struct 
work_struct *work)
 {
struct intel_ctx_submit_request *req =
container_of(work, struct intel_ctx_submit_request, work);
-   struct drm_device *dev = req->ring->dev;
+   struct drm_device *dev = req->request->ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
 
intel_runtime_pm_put(dev_priv);
 
mutex_lock(&dev->struct_mutex);
-   i915_gem_context_unreference(req->ctx);
+   i915_gem_context_unreference(req->request->ctx);
i915_gem_request_unreference(req->request);
mutex_unlock(&dev->struct_mutex);
 
@@ -540,10 +540,6 @@ static int execlists_context_queue(struct intel_engine_cs 
*ring,
req = kzalloc(sizeof(*req), GFP_KERNEL);
if (req == NULL)
return -ENOMEM;
-  

[Intel-gfx] [RFC 1/3] execlist queue items to hold ptr/ref to gem_request

2014-11-11 Thread Nick Hoath
Add a reference and pointer from the execlist queue item to the associated
gem request. For execlist requests that don't have a request, create one
as a placeholder.

This patchset requires John Harrison's "Replace seqno values with request 
structures"
patchset.

Issue: VIZ-4274
Signed-off-by: Nick Hoath 
---
 drivers/gpu/drm/i915/intel_lrc.c | 31 +--
 drivers/gpu/drm/i915/intel_lrc.h |  5 -
 2 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 09d548d..8f301ac 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -521,6 +521,7 @@ static void execlists_free_request_task(struct work_struct 
*work)
 
mutex_lock(&dev->struct_mutex);
i915_gem_context_unreference(req->ctx);
+   i915_gem_request_unreference(req->request);
mutex_unlock(&dev->struct_mutex);
 
kfree(req);
@@ -528,7 +529,8 @@ static void execlists_free_request_task(struct work_struct 
*work)
 
 static int execlists_context_queue(struct intel_engine_cs *ring,
   struct intel_context *to,
-  u32 tail)
+  u32 tail,
+  struct drm_i915_gem_request *request)
 {
struct intel_ctx_submit_request *req = NULL, *cursor;
struct drm_i915_private *dev_priv = ring->dev->dev_private;
@@ -544,6 +546,22 @@ static int execlists_context_queue(struct intel_engine_cs 
*ring,
req->tail = tail;
INIT_WORK(&req->work, execlists_free_request_task);
 
+   if(!request)
+   {
+   /*
+* If there isn't a request associated with this submission,
+* create one as a temporary holder.
+*/
+   WARN(1, "execlist context submission without request");
+   request = kzalloc(sizeof(*request), GFP_KERNEL);
+   if (request == NULL)
+   return -ENOMEM;
+   request->ctx = to;
+   request->ring = ring;
+   }
+   req->request = request;
+   i915_gem_request_reference(request);
+
intel_runtime_pm_get(dev_priv);
 
spin_lock_irqsave(&ring->execlist_lock, flags);
@@ -778,7 +796,8 @@ int logical_ring_flush_all_caches(struct intel_ringbuffer 
*ringbuf)
  * on a queue waiting for the ELSP to be ready to accept a new context 
submission. At that
  * point, the tail *inside* the context is updated and the ELSP written to.
  */
-void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
+void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf,
+  struct drm_i915_gem_request *request)
 {
struct intel_engine_cs *ring = ringbuf->ring;
struct intel_context *ctx = ringbuf->FIXME_lrc_ctx;
@@ -788,7 +807,7 @@ void intel_logical_ring_advance_and_submit(struct 
intel_ringbuffer *ringbuf)
if (intel_ring_stopped(ring))
return;
 
-   execlists_context_queue(ring, ctx, ringbuf->tail);
+   execlists_context_queue(ring, ctx, ringbuf->tail, request);
 }
 
 static int logical_ring_alloc_request(struct intel_engine_cs *ring,
@@ -876,7 +895,7 @@ static int logical_ring_wait_for_space(struct 
intel_ringbuffer *ringbuf,
return ret;
 
/* Force the context submission in case we have been skipping it */
-   intel_logical_ring_advance_and_submit(ringbuf);
+   intel_logical_ring_advance_and_submit(ringbuf, NULL);
 
/* With GEM the hangcheck timer should kick us out of the loop,
 * leaving it early runs the risk of corrupting GEM state (due
@@ -1183,7 +1202,7 @@ static void gen8_set_seqno(struct intel_engine_cs *ring, 
u32 seqno)
intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
 }
 
-static int gen8_emit_request(struct intel_ringbuffer *ringbuf)
+static int gen8_emit_request(struct intel_ringbuffer *ringbuf, struct 
drm_i915_gem_request *request)
 {
struct intel_engine_cs *ring = ringbuf->ring;
u32 cmd;
@@ -1205,7 +1224,7 @@ static int gen8_emit_request(struct intel_ringbuffer 
*ringbuf)
i915_gem_request_get_seqno(ring->outstanding_lazy_request));
intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
intel_logical_ring_emit(ringbuf, MI_NOOP);
-   intel_logical_ring_advance_and_submit(ringbuf);
+   intel_logical_ring_advance_and_submit(ringbuf, request);
 
return 0;
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 33c3b4b..6f81669 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -37,7 +37,8 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring);
 int intel_logical_rings_in

[Intel-gfx] [RFC 3/3] drm/i915: Remove FIXME_lrc_ctx backpointer

2014-11-11 Thread Nick Hoath
The first pass implementation of execlists required a backpointer to the 
context to be held
in the intel_ringbuffer. However the context pointer is available higher in the 
call stack.
Remove the backpointer from the ring buffer structure and instead pass it down 
through the
call stack.

v2: Integrate this changeset with the removal of duplicate request/execlist 
queue item members.

Signed-off-by: Nick Hoath 
Issue: VIZ-4268
---
 drivers/gpu/drm/i915/i915_gem.c |  7 ++--
 drivers/gpu/drm/i915/intel_lrc.c| 67 +
 drivers/gpu/drm/i915/intel_lrc.h|  8 +++-
 drivers/gpu/drm/i915/intel_ringbuffer.h | 12 +++---
 4 files changed, 56 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f55bfdc..11bd207 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2370,8 +2370,7 @@ int __i915_add_request(struct intel_engine_cs *ring,
return -ENOMEM;
 
if (i915.enable_execlists) {
-   struct intel_context *ctx = request->ctx;
-   ringbuf = ctx->engine[ring->id].ringbuf;
+   ringbuf = request->ctx->engine[ring->id].ringbuf;
} else
ringbuf = ring->buffer;
 
@@ -2384,7 +2383,7 @@ int __i915_add_request(struct intel_engine_cs *ring,
 * what.
 */
if (i915.enable_execlists) {
-   ret = logical_ring_flush_all_caches(ringbuf);
+   ret = logical_ring_flush_all_caches(ringbuf, request->ctx);
if (ret)
return ret;
} else {
@@ -2406,7 +2405,7 @@ int __i915_add_request(struct intel_engine_cs *ring,
request_ring_position = intel_ring_get_tail(ringbuf);
 
if (i915.enable_execlists) {
-   ret = ring->emit_request(ringbuf, request);
+   ret = ring->emit_request(ringbuf, request->ctx, request);
if (ret)
return ret;
} else {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 3daf8ea..792186e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -555,6 +555,10 @@ static int execlists_context_queue(struct intel_engine_cs 
*ring,
request->ctx = to;
request->ring = ring;
}
+   else
+   {
+   WARN_ON(to != request->ctx);
+   }
req->request = request;
i915_gem_request_reference(request);
i915_gem_context_reference(req->request->ctx);
@@ -591,7 +595,8 @@ static int execlists_context_queue(struct intel_engine_cs 
*ring,
return 0;
 }
 
-static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf)
+static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf,
+ struct intel_context *ctx)
 {
struct intel_engine_cs *ring = ringbuf->ring;
uint32_t flush_domains;
@@ -601,7 +606,8 @@ static int logical_ring_invalidate_all_caches(struct 
intel_ringbuffer *ringbuf)
if (ring->gpu_caches_dirty)
flush_domains = I915_GEM_GPU_DOMAINS;
 
-   ret = ring->emit_flush(ringbuf, I915_GEM_GPU_DOMAINS, flush_domains);
+   ret = ring->emit_flush(ringbuf, ctx,
+  I915_GEM_GPU_DOMAINS, flush_domains);
if (ret)
return ret;
 
@@ -610,6 +616,7 @@ static int logical_ring_invalidate_all_caches(struct 
intel_ringbuffer *ringbuf)
 }
 
 static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
+struct intel_context *ctx,
 struct list_head *vmas)
 {
struct intel_engine_cs *ring = ringbuf->ring;
@@ -637,7 +644,7 @@ static int execlists_move_to_gpu(struct intel_ringbuffer 
*ringbuf,
/* Unconditionally invalidate gpu caches and ensure that we do flush
 * any residual writes from the previous batch.
 */
-   return logical_ring_invalidate_all_caches(ringbuf);
+   return logical_ring_invalidate_all_caches(ringbuf, ctx);
 }
 
 /**
@@ -717,13 +724,13 @@ int intel_execlists_submission(struct drm_device *dev, 
struct drm_file *file,
return -EINVAL;
}
 
-   ret = execlists_move_to_gpu(ringbuf, vmas);
+   ret = execlists_move_to_gpu(ringbuf, ctx, vmas);
if (ret)
return ret;
 
if (ring == &dev_priv->ring[RCS] &&
instp_mode != dev_priv->relative_constants_mode) {
-   ret = intel_logical_ring_begin(ringbuf, 4);
+   ret = intel_logical_ring_begin(ringbuf, ctx, 4);
if (ret)
return ret;
 
@@ -736,7 +743,7 @@ int intel_execlists_submission(struct drm_device *dev, 
struct drm_file *file,
dev_priv-

[Intel-gfx] [PATCH 3/5] drm/i915: Remove FIXME_lrc_ctx backpointer

2014-11-12 Thread Nick Hoath
The first pass implementation of execlists required a backpointer to the 
context to be held
in the intel_ringbuffer. However the context pointer is available higher in the 
call stack.
Remove the backpointer from the ring buffer structure and instead pass it down 
through the
call stack.

v2: Integrate this changeset with the removal of duplicate request/execlist 
queue item members.

Signed-off-by: Nick Hoath 
Issue: VIZ-4268
---
 drivers/gpu/drm/i915/i915_gem.c |  7 ++--
 drivers/gpu/drm/i915/intel_lrc.c| 67 +
 drivers/gpu/drm/i915/intel_lrc.h|  8 +++-
 drivers/gpu/drm/i915/intel_ringbuffer.h | 12 +++---
 4 files changed, 56 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e5f521f..bd5a1e2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2451,8 +2451,7 @@ int __i915_add_request(struct intel_engine_cs *ring,
return -ENOMEM;
 
if (i915.enable_execlists) {
-   struct intel_context *ctx = request->ctx;
-   ringbuf = ctx->engine[ring->id].ringbuf;
+   ringbuf = request->ctx->engine[ring->id].ringbuf;
} else
ringbuf = ring->buffer;
 
@@ -2465,7 +2464,7 @@ int __i915_add_request(struct intel_engine_cs *ring,
 * what.
 */
if (i915.enable_execlists) {
-   ret = logical_ring_flush_all_caches(ringbuf);
+   ret = logical_ring_flush_all_caches(ringbuf, request->ctx);
if (ret)
return ret;
} else {
@@ -2487,7 +2486,7 @@ int __i915_add_request(struct intel_engine_cs *ring,
request_ring_position = intel_ring_get_tail(ringbuf);
 
if (i915.enable_execlists) {
-   ret = ring->emit_request(ringbuf, request);
+   ret = ring->emit_request(ringbuf, request->ctx, request);
if (ret)
return ret;
} else {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 0e2e33b..4bd9572 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -555,6 +555,10 @@ static int execlists_context_queue(struct intel_engine_cs 
*ring,
request->ctx = to;
request->ring = ring;
}
+   else
+   {
+   WARN_ON(to != request->ctx);
+   }
req->request = request;
i915_gem_request_reference(request);
i915_gem_context_reference(req->request->ctx);
@@ -591,7 +595,8 @@ static int execlists_context_queue(struct intel_engine_cs 
*ring,
return 0;
 }
 
-static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf)
+static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf,
+ struct intel_context *ctx)
 {
struct intel_engine_cs *ring = ringbuf->ring;
uint32_t flush_domains;
@@ -601,7 +606,8 @@ static int logical_ring_invalidate_all_caches(struct 
intel_ringbuffer *ringbuf)
if (ring->gpu_caches_dirty)
flush_domains = I915_GEM_GPU_DOMAINS;
 
-   ret = ring->emit_flush(ringbuf, I915_GEM_GPU_DOMAINS, flush_domains);
+   ret = ring->emit_flush(ringbuf, ctx,
+  I915_GEM_GPU_DOMAINS, flush_domains);
if (ret)
return ret;
 
@@ -610,6 +616,7 @@ static int logical_ring_invalidate_all_caches(struct 
intel_ringbuffer *ringbuf)
 }
 
 static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
+struct intel_context *ctx,
 struct list_head *vmas)
 {
struct intel_engine_cs *ring = ringbuf->ring;
@@ -637,7 +644,7 @@ static int execlists_move_to_gpu(struct intel_ringbuffer 
*ringbuf,
/* Unconditionally invalidate gpu caches and ensure that we do flush
 * any residual writes from the previous batch.
 */
-   return logical_ring_invalidate_all_caches(ringbuf);
+   return logical_ring_invalidate_all_caches(ringbuf, ctx);
 }
 
 /**
@@ -717,13 +724,13 @@ int intel_execlists_submission(struct drm_device *dev, 
struct drm_file *file,
return -EINVAL;
}
 
-   ret = execlists_move_to_gpu(ringbuf, vmas);
+   ret = execlists_move_to_gpu(ringbuf, ctx, vmas);
if (ret)
return ret;
 
if (ring == &dev_priv->ring[RCS] &&
instp_mode != dev_priv->relative_constants_mode) {
-   ret = intel_logical_ring_begin(ringbuf, 4);
+   ret = intel_logical_ring_begin(ringbuf, ctx, 4);
if (ret)
return ret;
 
@@ -736,7 +743,7 @@ int intel_execlists_submission(struct drm_device *dev, 
struct drm_file *file,
dev_priv-

[Intel-gfx] [PATCH 4/5] drm/i915: Subsume intel_ctx_submit_request in to drm_i915_gem_request

2014-11-12 Thread Nick Hoath
Move all remaining elements that were unique to execlists queue items
in to the associated request.

Signed-off-by: Nick Hoath 
Issue: VIZ-4274
---
 drivers/gpu/drm/i915/i915_debugfs.c |  8 +++
 drivers/gpu/drm/i915/i915_drv.h | 22 +
 drivers/gpu/drm/i915/i915_gem.c |  6 ++---
 drivers/gpu/drm/i915/intel_lrc.c| 47 +
 drivers/gpu/drm/i915/intel_lrc.h| 28 --
 5 files changed, 50 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 45da79e..9ce9a02 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1851,7 +1851,7 @@ static int i915_execlists(struct seq_file *m, void *data)
intel_runtime_pm_get(dev_priv);
 
for_each_ring(ring, dev_priv, ring_id) {
-   struct intel_ctx_submit_request *head_req = NULL;
+   struct drm_i915_gem_request *head_req = NULL;
int count = 0;
unsigned long flags;
 
@@ -1884,18 +1884,18 @@ static int i915_execlists(struct seq_file *m, void 
*data)
list_for_each(cursor, &ring->execlist_queue)
count++;
head_req = list_first_entry_or_null(&ring->execlist_queue,
-   struct intel_ctx_submit_request, execlist_link);
+   struct drm_i915_gem_request, execlist_link);
spin_unlock_irqrestore(&ring->execlist_lock, flags);
 
seq_printf(m, "\t%d requests in queue\n", count);
if (head_req) {
struct drm_i915_gem_object *ctx_obj;
 
-   ctx_obj = head_req->request->ctx->engine[ring_id].state;
+   ctx_obj = head_req->ctx->engine[ring_id].state;
seq_printf(m, "\tHead request id: %u\n",
   intel_execlists_ctx_id(ctx_obj));
seq_printf(m, "\tHead request tail: %u\n",
-  head_req->request->tail);
+  head_req->tail);
}
 
seq_putc(m, '\n');
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index afa9c35..0fe238c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2027,6 +2027,28 @@ struct drm_i915_gem_request {
struct list_head free_list;
 
uint32_t uniq;
+
+   /**
+* The ELSP only accepts two elements at a time, so we queue 
context/tail
+* pairs on a given queue (ring->execlist_queue) until the hardware is
+* available. The queue serves a double purpose: we also use it to keep 
track
+* of the up to 2 contexts currently in the hardware (usually one in 
execution
+* and the other queued up by the GPU): We only remove elements from 
the head
+* of the queue when the hardware informs us that an element has been
+* completed.
+*
+* All accesses to the queue are mediated by a spinlock 
(ring->execlist_lock).
+*/
+
+   /** Execlist link in the submission queue.*/
+   struct list_head execlist_link;
+
+   /** Execlists workqueue for processing this request in a bottom half */
+   struct work_struct work;
+
+   /** Execlists no. of times this request has been sent to the ELSP */
+   int elsp_submitted;
+
 };
 
 void i915_gem_request_free(struct kref *req_ref);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index bd5a1e2..4d2d2e5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2696,14 +2696,14 @@ static void i915_gem_reset_ring_cleanup(struct 
drm_i915_private *dev_priv,
}
 
while (!list_empty(&ring->execlist_queue)) {
-   struct intel_ctx_submit_request *submit_req;
+   struct drm_i915_gem_request *submit_req;
 
submit_req = list_first_entry(&ring->execlist_queue,
-   struct intel_ctx_submit_request,
+   struct drm_i915_gem_request,
execlist_link);
list_del(&submit_req->execlist_link);
intel_runtime_pm_put(dev_priv);
-   i915_gem_context_unreference(submit_req->request->ctx);
+   i915_gem_context_unreference(submit_req->ctx);
kfree(submit_req);
}
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 4bd9572..b6ec012 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -382,8 +382,8 @@ static void execlists_submit_contexts(struct 
intel_engine_cs *ring,
 
 static void execlists_context_un

[Intel-gfx] [PATCH 1/5] drm/i915: execlist request keeps ptr/ref to gem_request

2014-11-12 Thread Nick Hoath
Add a reference and pointer from the execlist queue item to the associated
gem request. For execlist requests that don't have a request, create one
as a placeholder.

This patchset requires John Harrison's "Replace seqno values with request 
structures"
patchset.

Issue: VIZ-4274
Signed-off-by: Nick Hoath 
---
 drivers/gpu/drm/i915/intel_lrc.c | 31 +--
 drivers/gpu/drm/i915/intel_lrc.h |  5 -
 2 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index c8b3827..593471f 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -521,6 +521,7 @@ static void execlists_free_request_task(struct work_struct 
*work)
 
mutex_lock(&dev->struct_mutex);
i915_gem_context_unreference(req->ctx);
+   i915_gem_request_unreference(req->request);
mutex_unlock(&dev->struct_mutex);
 
kfree(req);
@@ -528,7 +529,8 @@ static void execlists_free_request_task(struct work_struct 
*work)
 
 static int execlists_context_queue(struct intel_engine_cs *ring,
   struct intel_context *to,
-  u32 tail)
+  u32 tail,
+  struct drm_i915_gem_request *request)
 {
struct intel_ctx_submit_request *req = NULL, *cursor;
struct drm_i915_private *dev_priv = ring->dev->dev_private;
@@ -544,6 +546,22 @@ static int execlists_context_queue(struct intel_engine_cs 
*ring,
req->tail = tail;
INIT_WORK(&req->work, execlists_free_request_task);
 
+   if(!request)
+   {
+   /*
+* If there isn't a request associated with this submission,
+* create one as a temporary holder.
+*/
+   WARN(1, "execlist context submission without request");
+   request = kzalloc(sizeof(*request), GFP_KERNEL);
+   if (request == NULL)
+   return -ENOMEM;
+   request->ctx = to;
+   request->ring = ring;
+   }
+   req->request = request;
+   i915_gem_request_reference(request);
+
intel_runtime_pm_get(dev_priv);
 
spin_lock_irqsave(&ring->execlist_lock, flags);
@@ -778,7 +796,8 @@ int logical_ring_flush_all_caches(struct intel_ringbuffer 
*ringbuf)
  * on a queue waiting for the ELSP to be ready to accept a new context 
submission. At that
  * point, the tail *inside* the context is updated and the ELSP written to.
  */
-void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
+void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf,
+  struct drm_i915_gem_request *request)
 {
struct intel_engine_cs *ring = ringbuf->ring;
struct intel_context *ctx = ringbuf->FIXME_lrc_ctx;
@@ -788,7 +807,7 @@ void intel_logical_ring_advance_and_submit(struct 
intel_ringbuffer *ringbuf)
if (intel_ring_stopped(ring))
return;
 
-   execlists_context_queue(ring, ctx, ringbuf->tail);
+   execlists_context_queue(ring, ctx, ringbuf->tail, request);
 }
 
 static int logical_ring_alloc_request(struct intel_engine_cs *ring,
@@ -876,7 +895,7 @@ static int logical_ring_wait_for_space(struct 
intel_ringbuffer *ringbuf,
return ret;
 
/* Force the context submission in case we have been skipping it */
-   intel_logical_ring_advance_and_submit(ringbuf);
+   intel_logical_ring_advance_and_submit(ringbuf, NULL);
 
/* With GEM the hangcheck timer should kick us out of the loop,
 * leaving it early runs the risk of corrupting GEM state (due
@@ -1221,7 +1240,7 @@ static void gen8_set_seqno(struct intel_engine_cs *ring, 
u32 seqno)
intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
 }
 
-static int gen8_emit_request(struct intel_ringbuffer *ringbuf)
+static int gen8_emit_request(struct intel_ringbuffer *ringbuf, struct 
drm_i915_gem_request *request)
 {
struct intel_engine_cs *ring = ringbuf->ring;
u32 cmd;
@@ -1243,7 +1262,7 @@ static int gen8_emit_request(struct intel_ringbuffer 
*ringbuf)
i915_gem_request_get_seqno(ring->outstanding_lazy_request));
intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
intel_logical_ring_emit(ringbuf, MI_NOOP);
-   intel_logical_ring_advance_and_submit(ringbuf);
+   intel_logical_ring_advance_and_submit(ringbuf, request);
 
return 0;
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 33c3b4b..6f81669 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -37,7 +37,8 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring);
 int intel_logical_rings_in

[Intel-gfx] [PATCH 5/5] drm/i915: Change workaround execlist submission to use gem requests.

2014-11-12 Thread Nick Hoath
Signed-off-by: Nick Hoath 
Issue: VIZ-4274
---
 drivers/gpu/drm/i915/intel_lrc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b6ec012..f3f1428 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1030,11 +1030,11 @@ static int intel_logical_ring_workarounds_emit(struct 
intel_engine_cs *ring,
return 0;
 
ring->gpu_caches_dirty = true;
-   ret = logical_ring_flush_all_caches(ringbuf);
+   ret = logical_ring_flush_all_caches(ringbuf, ctx);
if (ret)
return ret;
 
-   ret = intel_logical_ring_begin(ringbuf, w->count * 2 + 2);
+   ret = intel_logical_ring_begin(ringbuf, ctx, w->count * 2 + 2);
if (ret)
return ret;
 
@@ -1048,7 +1048,7 @@ static int intel_logical_ring_workarounds_emit(struct 
intel_engine_cs *ring,
intel_logical_ring_advance(ringbuf);
 
ring->gpu_caches_dirty = true;
-   ret = logical_ring_flush_all_caches(ringbuf);
+   ret = logical_ring_flush_all_caches(ringbuf, ctx);
if (ret)
return ret;
 
-- 
2.1.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 0/5] drm/i915: Untangle execlist tracking

2014-11-12 Thread Nick Hoath
This patchset merges execlist queue items in to gem requests. It does this by 
using the reference count added by John Harrison's "Replace seqno values with
request structures" patchset to ensure that the gem request is available for
the whole execlist submission lifespan.


v2: merge intel_ctx_submit_request and drm_i915_gem_request, rebase changes &
   add cover letter

Issue: VIZ-4274

Nick Hoath (5):
  drm/i915: execlist request keeps ptr/ref to gem_request
  drm/i915: Removed duplicate members from submit_request
  drm/i915: Remove FIXME_lrc_ctx backpointer
  drm/i915: Subsume intel_ctx_submit_request in to drm_i915_gem_request
  drm/i915: Change workaround execlist submission to use gem requests.

 drivers/gpu/drm/i915/i915_debugfs.c |   4 +-
 drivers/gpu/drm/i915/i915_drv.h |  22 ++
 drivers/gpu/drm/i915/i915_gem.c |  11 ++-
 drivers/gpu/drm/i915/intel_lrc.c| 126 +++-
 drivers/gpu/drm/i915/intel_lrc.h|  41 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.h |  12 ++-
 6 files changed, 119 insertions(+), 97 deletions(-)

-- 
2.1.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/5] drm/i915: Removed duplicate members from submit_request

2014-11-12 Thread Nick Hoath
Where there were duplicate variables for the tail, context and ring (engine)
in the gem request and the execlist queue item, use the one from the request
and remove the duplicate from the execlist queue item.

Issue: VIZ-4274
Signed-off-by: Nick Hoath 
---
 drivers/gpu/drm/i915/i915_debugfs.c |  4 ++--
 drivers/gpu/drm/i915/i915_gem.c |  2 +-
 drivers/gpu/drm/i915/intel_lrc.c| 21 +
 drivers/gpu/drm/i915/intel_lrc.h|  4 
 4 files changed, 12 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 0d6af1c..45da79e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1891,11 +1891,11 @@ static int i915_execlists(struct seq_file *m, void 
*data)
if (head_req) {
struct drm_i915_gem_object *ctx_obj;
 
-   ctx_obj = head_req->ctx->engine[ring_id].state;
+   ctx_obj = head_req->request->ctx->engine[ring_id].state;
seq_printf(m, "\tHead request id: %u\n",
   intel_execlists_ctx_id(ctx_obj));
seq_printf(m, "\tHead request tail: %u\n",
-  head_req->tail);
+  head_req->request->tail);
}
 
seq_putc(m, '\n');
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 860c296..e5f521f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2704,7 +2704,7 @@ static void i915_gem_reset_ring_cleanup(struct 
drm_i915_private *dev_priv,
execlist_link);
list_del(&submit_req->execlist_link);
intel_runtime_pm_put(dev_priv);
-   i915_gem_context_unreference(submit_req->ctx);
+   i915_gem_context_unreference(submit_req->request->ctx);
kfree(submit_req);
}
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 593471f..0e2e33b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -396,7 +396,7 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *ring)
 execlist_link) {
if (!req0) {
req0 = cursor;
-   } else if (req0->ctx == cursor->ctx) {
+   } else if (req0->request->ctx == cursor->request->ctx) {
/* Same ctx: ignore first request, as second request
 * will update tail past first request's workload */
cursor->elsp_submitted = req0->elsp_submitted;
@@ -411,9 +411,9 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *ring)
 
WARN_ON(req1 && req1->elsp_submitted);
 
-   execlists_submit_contexts(ring, req0->ctx, req0->tail,
- req1 ? req1->ctx : NULL,
- req1 ? req1->tail : 0);
+   execlists_submit_contexts(ring, req0->request->ctx, req0->request->tail,
+ req1 ? req1->request->ctx : NULL,
+ req1 ? req1->request->tail : 0);
 
req0->elsp_submitted++;
if (req1)
@@ -434,7 +434,7 @@ static bool execlists_check_remove_request(struct 
intel_engine_cs *ring,
 
if (head_req != NULL) {
struct drm_i915_gem_object *ctx_obj =
-   head_req->ctx->engine[ring->id].state;
+   head_req->request->ctx->engine[ring->id].state;
if (intel_execlists_ctx_id(ctx_obj) == request_id) {
WARN(head_req->elsp_submitted == 0,
 "Never submitted head request\n");
@@ -514,13 +514,13 @@ static void execlists_free_request_task(struct 
work_struct *work)
 {
struct intel_ctx_submit_request *req =
container_of(work, struct intel_ctx_submit_request, work);
-   struct drm_device *dev = req->ring->dev;
+   struct drm_device *dev = req->request->ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
 
intel_runtime_pm_put(dev_priv);
 
mutex_lock(&dev->struct_mutex);
-   i915_gem_context_unreference(req->ctx);
+   i915_gem_context_unreference(req->request->ctx);
i915_gem_request_unreference(req->request);
mutex_unlock(&dev->struct_mutex);
 
@@ -540,10 +540,6 @@ static int execlists_context_queue(struct intel_engine_cs 
*ring,
req = kzalloc(sizeof(*req), GFP_KERNEL);
if (req == NULL)
return -ENOMEM;
-  

Re: [Intel-gfx] [PATCH 4/5] drm/i915: Subsume intel_ctx_submit_request in to drm_i915_gem_request

2014-11-12 Thread Nick Hoath

On 12/11/2014 11:24, Chris Wilson wrote:

On Wed, Nov 12, 2014 at 10:53:26AM +, Nick Hoath wrote:
seq_putc(m, '\n');

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index afa9c35..0fe238c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2027,6 +2027,28 @@ struct drm_i915_gem_request {
struct list_head free_list;

uint32_t uniq;
+
+   /**
+* The ELSP only accepts two elements at a time, so we queue 
context/tail
+* pairs on a given queue (ring->execlist_queue) until the hardware is
+* available. The queue serves a double purpose: we also use it to keep 
track
+* of the up to 2 contexts currently in the hardware (usually one in 
execution
+* and the other queued up by the GPU): We only remove elements from 
the head
+* of the queue when the hardware informs us that an element has been
+* completed.
+*
+* All accesses to the queue are mediated by a spinlock 
(ring->execlist_lock).
+*/
+
+   /** Execlist link in the submission queue.*/
+   struct list_head execlist_link;


This is redundant. The request should only be one of the pending or active
lists at any time.

This is used by the pending execlist requests list owned by the 
intel_engine_cs. The request isn't in both the active and pending 
execlist engine lists.

+   /** Execlists workqueue for processing this request in a bottom half */
+   struct work_struct work;


For what purpose? This is not needed.
This worker is currently used to free up execlist requests. This goes 
away when Thomas Daniel's patchset is merged.
I have spotted a bug in the cleanup handler with the merged 
requests/execlists cleanup though.



+   /** Execlists no. of times this request has been sent to the ELSP */
+   int elsp_submitted;


A request can only be submitted exactly once at any time. This
bookkeeping is not part of the request.
This is a refcount to preserve the request if it has been resubmitted 
due to preemption or TDR, due to a race condition between the HW 
finishing with the item and the cleanup/resubmission. Have a look at

e1fee72c2ea2e9c0c6e6743d32a6832f21337d6c which contains a much better
description of why this exists.


Still not detangled I am afraid.
-Chris



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/4] drm/i915/bxt: Enable WaDSRefCountFullforceMissDisable

2015-06-29 Thread Nick Hoath
From: Rafael Barbalho 

Signed-off-by: Rafael Barbalho 
Signed-off-by: Nick Hoath 
---
 drivers/gpu/drm/i915/intel_pm.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index d635d0a..f29e575 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -60,10 +60,13 @@ static void gen9_init_clock_gating(struct drm_device *dev)
I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
   GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
 
-   /* WaVSRefCountFullforceMissDisable:skl,bxt */
+   /*
+* WaVSRefCountFullforceMissDisable:skl,bxt
+* WaDSRefCountFullforceMissDisable:skl,bxt
+*/
I915_WRITE(GEN7_FF_THREAD_MODE,
   I915_READ(GEN7_FF_THREAD_MODE) &
-  ~(GEN7_FF_VS_REF_CNT_FFME));
+  ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
 }
 
 static void skl_init_clock_gating(struct drm_device *dev)
-- 
2.1.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/4] drm/i915/bxt: Enable WaVSRefCountFullforceMissDisable

2015-06-29 Thread Nick Hoath
From: Rafael Barbalho 

Signed-off-by: Rafael Barbalho 
Signed-off-by: Nick Hoath 
---
 drivers/gpu/drm/i915/intel_pm.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 32ff034..d635d0a 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -59,6 +59,11 @@ static void gen9_init_clock_gating(struct drm_device *dev)
/* WaEnableLbsSlaRetryTimerDecrement:skl */
I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
   GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
+
+   /* WaVSRefCountFullforceMissDisable:skl,bxt */
+   I915_WRITE(GEN7_FF_THREAD_MODE,
+  I915_READ(GEN7_FF_THREAD_MODE) &
+  ~(GEN7_FF_VS_REF_CNT_FFME));
 }
 
 static void skl_init_clock_gating(struct drm_device *dev)
-- 
2.1.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 3/4] drm/i915/bxt: Enable WaOCLCoherentLineFlush

2015-06-29 Thread Nick Hoath
Signed-off-by: Nick Hoath 
Cc: Rafael Barbalho 
---
 drivers/gpu/drm/i915/i915_reg.h | 1 +
 drivers/gpu/drm/i915/intel_pm.c | 4 
 2 files changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b9f6b8c..115911a 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -5807,6 +5807,7 @@ enum skl_disp_power_wells {
 #define  GEN7_WA_L3_CHICKEN_MODE   0x2000
 
 #define GEN7_L3SQCREG4 0xb034
+#define  GEN8_PIPELINE_FLUSH_COHERENT_LINES(1<<21)
 #define  L3SQ_URB_READ_CAM_MATCH_DISABLE   (1<<27)
 
 #define GEN8_L3SQCREG4 0xb118
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index f29e575..26ef146 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -67,6 +67,10 @@ static void gen9_init_clock_gating(struct drm_device *dev)
I915_WRITE(GEN7_FF_THREAD_MODE,
   I915_READ(GEN7_FF_THREAD_MODE) &
   ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
+
+   /* WaOCLCoherentLineFlush:skl,bxt */
+   I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
+   GEN8_PIPELINE_FLUSH_COHERENT_LINES);
 }
 
 static void skl_init_clock_gating(struct drm_device *dev)
-- 
2.1.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 4/4] drm/i915/bxt: Clean up bxt_init_clock_gating

2015-06-29 Thread Nick Hoath
Add stepping check for A0 workarounds, and remove the associated
FIXME tags.
Split out unrelated WAs for later condition checking.

v2: Fixed format (PeterL)
v3: Corrected stepping check for WaDisableSDEUnitClockGating
- Ignoring comment, following hardware spec instead. (ChrisH)
Added description for TILECTL setting (JonB)

Cc: Peter Lawthers 
Cc: Chris Harris 
Cc: Jon Bloomfield 
Signed-off-by: Nick Hoath 
---
 drivers/gpu/drm/i915/intel_pm.c | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 26ef146..86a4ced 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -115,18 +115,24 @@ static void bxt_init_clock_gating(struct drm_device *dev)
 
gen9_init_clock_gating(dev);
 
+   /* WaDisableSDEUnitClockGating:bxt */
+   I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
+  GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+
/*
 * FIXME:
-* GEN8_SDEUNIT_CLOCK_GATE_DISABLE applies on A0 only.
 * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
 */
-/* WaDisableSDEUnitClockGating:bxt */
I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
-  GEN8_SDEUNIT_CLOCK_GATE_DISABLE |
   GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
 
-   /* FIXME: apply on A0 only */
-   I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
+   if (INTEL_REVID(dev) == BXT_REVID_A0) {
+   /*
+* Hardware specification requires this bit to be
+* set to 1 for A0
+*/
+   I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
+   }
 }
 
 static void i915_pineview_get_mem_freq(struct drm_device *dev)
-- 
2.1.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 0/4] drm/i915: Extra GEN 9 workaround patches

2015-06-29 Thread Nick Hoath
Nick Hoath (2):
  drm/i915/bxt: Enable WaOCLCoherentLineFlush
  drm/i915/bxt: Clean up bxt_init_clock_gating

Rafael Barbalho (2):
  drm/i915/bxt: Enable WaVSRefCountFullforceMissDisable
  drm/i915/bxt: Enable WaDSRefCountFullforceMissDisable

 drivers/gpu/drm/i915/i915_reg.h |  1 +
 drivers/gpu/drm/i915/intel_pm.c | 28 +++-
 2 files changed, 24 insertions(+), 5 deletions(-)

-- 
2.1.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/4] drm/i915/bxt: Enable WaVSRefCountFullforceMissDisable

2015-06-29 Thread Nick Hoath

On 29/06/2015 15:08, Mika Kuoppala wrote:


Hi,

Nick Hoath  writes:


From: Rafael Barbalho 

Signed-off-by: Rafael Barbalho 
Signed-off-by: Nick Hoath 
---
  drivers/gpu/drm/i915/intel_pm.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 32ff034..d635d0a 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -59,6 +59,11 @@ static void gen9_init_clock_gating(struct drm_device *dev)
/* WaEnableLbsSlaRetryTimerDecrement:skl */
I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
   GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
+
+   /* WaVSRefCountFullforceMissDisable:skl,bxt */
+   I915_WRITE(GEN7_FF_THREAD_MODE,
+  I915_READ(GEN7_FF_THREAD_MODE) &
+  ~(GEN7_FF_VS_REF_CNT_FFME));
  }



This bit 19 seems to be about Tesselation DOP gating disable
with gen9+ onwards. And with that workaroundname, the applicability
should be hsw,bdw. I am confused.



The specs say these WAs are required for GEN9+, BDW & HSW. So I'm
at a loss to see the confusion.


-Mika



  static void skl_init_clock_gating(struct drm_device *dev)
--
2.1.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Split alloc from init for lrc

2015-08-18 Thread Nick Hoath

On 18/08/2015 15:31, Chris Wilson wrote:

On Tue, Aug 18, 2015 at 03:23:32PM +0100, Nick Hoath wrote:

Extend init/init_hw split to context init.
- Move context initialisation in to i915_gem_init_hw
- Move one off initialisation for render ring to
 i915_gem_validate_context
- Move default context initialisation to logical_ring_init

Rename intel_lr_context_deferred_create to
intel_lr_context_deferred_alloc, to reflect reduced functionality &
alloc/init split.

This patch is intended to split out the allocation of resources & initialisation
to allow easier reuse of code for resume/gpu reset.

v2: Removed function ptr wrapping of do_switch_context (Daniel Vetter)
 Left ->init_context int intel_lr_context_deferred_alloc (Daniel Vetter)
 Remove unnecessary init flag & ring type test. (Daniel Vetter)
 Improve commit message (Daniel Vetter)

Issue: VIZ-4798
Signed-off-by: Nick Hoath 
Cc: Daniel Vetter 
---
  drivers/gpu/drm/i915/i915_drv.h|   1 -
  drivers/gpu/drm/i915/i915_gem.c|  12 +--
  drivers/gpu/drm/i915/i915_gem_execbuffer.c |   3 +-
  drivers/gpu/drm/i915/intel_lrc.c   | 147 ++---
  drivers/gpu/drm/i915/intel_lrc.h   |   4 +-
  5 files changed, 80 insertions(+), 87 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f7fd519..844ccf0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -880,7 +880,6 @@ struct intel_context {
} legacy_hw_ctx;

/* Execlists */
-   bool rcs_initialized;
struct {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 73293b4..3ccef2d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4603,14 +4603,8 @@ int i915_gem_init_rings(struct drm_device *dev)
goto cleanup_vebox_ring;
}

-   ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
-   if (ret)
-   goto cleanup_bsd2_ring;
-
return 0;

-cleanup_bsd2_ring:
-   intel_cleanup_ring_buffer(&dev_priv->ring[VCS2]);
  cleanup_vebox_ring:
intel_cleanup_ring_buffer(&dev_priv->ring[VECS]);
  cleanup_blt_ring:
@@ -4629,6 +4623,7 @@ i915_gem_init_hw(struct drm_device *dev)
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *ring;
int ret, i, j;
+   struct drm_i915_gem_request *req;

if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
return -EIO;
@@ -4680,9 +4675,12 @@ i915_gem_init_hw(struct drm_device *dev)
goto out;
}

+   ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
+   if (ret)
+   goto out;


This is the wrong location. Just kill set_seqno, the experiment has run
its course and we now have a n igt to exercise seqno wraparound.
It has to be here as the seqno has to be initialised before it is used 
to create requests for the initialisation.
According to the commit history, the seqno has to be initialised to 
non-zero for proper functioning. Is this no longer true?

Maybe it should just be set to 1 instead of ~0-0x1000



-Chris



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Split alloc from init for lrc

2015-08-19 Thread Nick Hoath
Extend init/init_hw split to context init.
   - Move context initialisation in to i915_gem_init_hw
   - Move one off initialisation for render ring to
i915_gem_validate_context
   - Move default context initialisation to logical_ring_init

Rename intel_lr_context_deferred_create to
intel_lr_context_deferred_alloc, to reflect reduced functionality &
alloc/init split.

This patch is intended to split out the allocation of resources & initialisation
to allow easier reuse of code for resume/gpu reset.

v2: Removed function ptr wrapping of do_switch_context (Daniel Vetter)
Left ->init_context int intel_lr_context_deferred_alloc (Daniel Vetter)
Remove unnecessary init flag & ring type test. (Daniel Vetter)
Improve commit message (Daniel Vetter)
v3: On init/reinit, set the hw next sequence number to the sw next sequence
number. This is set to 1 at driver load time. This prevents the seqno
being reset on reinit (Chris Wilson)

Issue: VIZ-4798
Signed-off-by: Nick Hoath 
Cc: Daniel Vetter 
Cc: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h|   1 -
 drivers/gpu/drm/i915/i915_gem.c|  18 ++--
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   3 +-
 drivers/gpu/drm/i915/intel_lrc.c   | 147 ++---
 drivers/gpu/drm/i915/intel_lrc.h   |   4 +-
 5 files changed, 86 insertions(+), 87 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f7fd519..844ccf0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -880,7 +880,6 @@ struct intel_context {
} legacy_hw_ctx;
 
/* Execlists */
-   bool rcs_initialized;
struct {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 73293b4..eb7c1f2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4603,14 +4603,8 @@ int i915_gem_init_rings(struct drm_device *dev)
goto cleanup_vebox_ring;
}
 
-   ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
-   if (ret)
-   goto cleanup_bsd2_ring;
-
return 0;
 
-cleanup_bsd2_ring:
-   intel_cleanup_ring_buffer(&dev_priv->ring[VCS2]);
 cleanup_vebox_ring:
intel_cleanup_ring_buffer(&dev_priv->ring[VECS]);
 cleanup_blt_ring:
@@ -4629,6 +4623,7 @@ i915_gem_init_hw(struct drm_device *dev)
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *ring;
int ret, i, j;
+   struct drm_i915_gem_request *req;
 
if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
return -EIO;
@@ -4680,9 +4675,12 @@ i915_gem_init_hw(struct drm_device *dev)
goto out;
}
 
+   ret = i915_gem_set_seqno(dev, dev_priv->next_seqno);
+   if (ret)
+   goto out;
+
/* Now it is safe to go back round and do everything else: */
for_each_ring(ring, dev_priv, i) {
-   struct drm_i915_gem_request *req;
 
WARN_ON(!ring->default_context);
 
@@ -4881,6 +4879,12 @@ i915_gem_load(struct drm_device *dev)
dev_priv->num_fence_regs =
I915_READ(vgtif_reg(avail_rs.fence_num));
 
+   /*
+* Set initial sequence number for requests.
+ */
+   dev_priv->next_seqno = 1;
+   dev_priv->last_seqno = ~((uint32_t)0);
+
/* Initialize fence registers to zero */
INIT_LIST_HEAD(&dev_priv->mm.fence_list);
i915_gem_restore_fences(dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 923a3c4..95f1a0d 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -994,6 +994,7 @@ i915_gem_validate_context(struct drm_device *dev, struct 
drm_file *file,
 {
struct intel_context *ctx = NULL;
struct i915_ctx_hang_stats *hs;
+   int ret;
 
if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE)
return ERR_PTR(-EINVAL);
@@ -1009,7 +1010,7 @@ i915_gem_validate_context(struct drm_device *dev, struct 
drm_file *file,
}
 
if (i915.enable_execlists && !ctx->engine[ring->id].state) {
-   int ret = intel_lr_context_deferred_create(ctx, ring);
+   ret = intel_lr_context_deferred_alloc(ctx, ring);
if (ret) {
DRM_DEBUG("Could not create LRC %u: %d\n", ctx_id, ret);
return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 138964a..d0dc6b5 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1426,11 +1426,31 @@

Re: [Intel-gfx] [PATCH] drm/i915: Split alloc from init for lrc

2015-08-19 Thread Nick Hoath

On 19/08/2015 13:37, Chris Wilson wrote:

On Wed, Aug 19, 2015 at 01:24:28PM +0100, Nick Hoath wrote:

Extend init/init_hw split to context init.
- Move context initialisation in to i915_gem_init_hw
- Move one off initialisation for render ring to
 i915_gem_validate_context
- Move default context initialisation to logical_ring_init

Rename intel_lr_context_deferred_create to
intel_lr_context_deferred_alloc, to reflect reduced functionality &
alloc/init split.

This patch is intended to split out the allocation of resources & initialisation
to allow easier reuse of code for resume/gpu reset.

v2: Removed function ptr wrapping of do_switch_context (Daniel Vetter)
 Left ->init_context int intel_lr_context_deferred_alloc (Daniel Vetter)
 Remove unnecessary init flag & ring type test. (Daniel Vetter)
 Improve commit message (Daniel Vetter)
v3: On init/reinit, set the hw next sequence number to the sw next sequence
 number. This is set to 1 at driver load time. This prevents the seqno
 being reset on reinit (Chris Wilson)

Issue: VIZ-4798
Signed-off-by: Nick Hoath 
Cc: Daniel Vetter 
Cc: Chris Wilson 
---
  drivers/gpu/drm/i915/i915_drv.h|   1 -
  drivers/gpu/drm/i915/i915_gem.c|  18 ++--
  drivers/gpu/drm/i915/i915_gem_execbuffer.c |   3 +-
  drivers/gpu/drm/i915/intel_lrc.c   | 147 ++---
  drivers/gpu/drm/i915/intel_lrc.h   |   4 +-
  5 files changed, 86 insertions(+), 87 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f7fd519..844ccf0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -880,7 +880,6 @@ struct intel_context {
} legacy_hw_ctx;

/* Execlists */
-   bool rcs_initialized;
struct {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 73293b4..eb7c1f2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4603,14 +4603,8 @@ int i915_gem_init_rings(struct drm_device *dev)
goto cleanup_vebox_ring;
}

-   ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
-   if (ret)
-   goto cleanup_bsd2_ring;
-
return 0;

-cleanup_bsd2_ring:
-   intel_cleanup_ring_buffer(&dev_priv->ring[VCS2]);
  cleanup_vebox_ring:
intel_cleanup_ring_buffer(&dev_priv->ring[VECS]);
  cleanup_blt_ring:
@@ -4629,6 +4623,7 @@ i915_gem_init_hw(struct drm_device *dev)
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *ring;
int ret, i, j;
+   struct drm_i915_gem_request *req;

if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
return -EIO;
@@ -4680,9 +4675,12 @@ i915_gem_init_hw(struct drm_device *dev)
goto out;
}

+   ret = i915_gem_set_seqno(dev, dev_priv->next_seqno);
+   if (ret)
+   goto out;


The only reason to do this would be to ensure that the contents of the
registers are valid (assuming we take over from ourselves). The right
value to use then is last_seqno.

i915_gem_set_seqno uses the following code:
ret = i915_gem_init_seqno(dev, seqno - 1);
..
dev_priv->next_seqno = seqno;
dev_priv->last_seqno = seqno - 1;

So using last_seqno would rewind the seqno by one...



diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 923a3c4..95f1a0d 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -994,6 +994,7 @@ i915_gem_validate_context(struct drm_device *dev, struct 
drm_file *file,
  {
struct intel_context *ctx = NULL;
struct i915_ctx_hang_stats *hs;
+   int ret;

if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE)
return ERR_PTR(-EINVAL);
@@ -1009,7 +1010,7 @@ i915_gem_validate_context(struct drm_device *dev, struct 
drm_file *file,
}

if (i915.enable_execlists && !ctx->engine[ring->id].state) {
-   int ret = intel_lr_context_deferred_create(ctx, ring);
+   ret = intel_lr_context_deferred_alloc(ctx, ring);
if (ret) {
DRM_DEBUG("Could not create LRC %u: %d\n", ctx_id, ret);
return ERR_PTR(ret);


Still modifying this for no reason, and you still haven't realised this
call is redundant (hint there is already a hook in alloc_request).

 From last year:
http://cgit.freedesktop.org/~ickle/linux-2.6/commit/?id=37fbd370152211688bc5bce3d28d13233cfe7d8b

More recent (i.e a couple of months ago):
http://cgit.freedesktop.org/~ickle/linux-2.6/commit/?h=nightly&id=ba4950a8f489d54ec

[Intel-gfx] [PATCH] drm/i915: Split alloc from init for lrc

2015-09-04 Thread Nick Hoath
Extend init/init_hw split to context init.
   - Move context initialisation in to i915_gem_init_hw
   - Move one off initialisation for render ring to
i915_gem_validate_context
   - Move default context initialisation to logical_ring_init

Rename intel_lr_context_deferred_create to
intel_lr_context_deferred_alloc, to reflect reduced functionality &
alloc/init split.

This patch is intended to split out the allocation of resources &
initialisation to allow easier reuse of code for resume/gpu reset.

v2: Removed function ptr wrapping of do_switch_context (Daniel Vetter)
Left ->init_context int intel_lr_context_deferred_alloc
(Daniel Vetter)
Remove unnecessary init flag & ring type test. (Daniel Vetter)
Improve commit message (Daniel Vetter)
v3: On init/reinit, set the hw next sequence number to the sw next
sequence number. This is set to 1 at driver load time. This prevents
the seqno being reset on reinit (Chris Wilson)
v4: Set seqno back to ~0 - 0x1000 at start-of-day, and increment by 0x100
on reset.
This makes it obvious which bbs are which after a reset. (David Gordon
& John Harrison)
Rebase.

Issue: VIZ-4798
Signed-off-by: Nick Hoath 
Cc: Daniel Vetter 
Cc: Chris Wilson 
Cc: John Harrison 
Cc: David Gordon 
---
 drivers/gpu/drm/i915/i915_drv.h|   1 -
 drivers/gpu/drm/i915/i915_gem.c|  24 +++--
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   3 +-
 drivers/gpu/drm/i915/intel_lrc.c   | 155 ++---
 drivers/gpu/drm/i915/intel_lrc.h   |   4 +-
 5 files changed, 93 insertions(+), 94 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1287007..ded7158 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -888,7 +888,6 @@ struct intel_context {
} legacy_hw_ctx;
 
/* Execlists */
-   bool rcs_initialized;
struct {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 41263cd..c8125a5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4613,14 +4613,8 @@ int i915_gem_init_rings(struct drm_device *dev)
goto cleanup_vebox_ring;
}
 
-   ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
-   if (ret)
-   goto cleanup_bsd2_ring;
-
return 0;
 
-cleanup_bsd2_ring:
-   intel_cleanup_ring_buffer(&dev_priv->ring[VCS2]);
 cleanup_vebox_ring:
intel_cleanup_ring_buffer(&dev_priv->ring[VECS]);
 cleanup_blt_ring:
@@ -4639,6 +4633,7 @@ i915_gem_init_hw(struct drm_device *dev)
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *ring;
int ret, i, j;
+   struct drm_i915_gem_request *req;
 
if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
return -EIO;
@@ -4706,9 +4701,16 @@ i915_gem_init_hw(struct drm_device *dev)
goto out;
}
 
+   /*
+* Increment the next seqno by 0x100 so we have a visible break
+* on re-initialisation
+*/
+   ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100);
+   if (ret)
+   goto out;
+
/* Now it is safe to go back round and do everything else: */
for_each_ring(ring, dev_priv, i) {
-   struct drm_i915_gem_request *req;
 
WARN_ON(!ring->default_context);
 
@@ -4907,6 +4909,14 @@ i915_gem_load(struct drm_device *dev)
dev_priv->num_fence_regs =
I915_READ(vgtif_reg(avail_rs.fence_num));
 
+   /*
+* Set initial sequence number for requests.
+* Using this number allows the wraparound to happen early,
+* catching any obvious problems.
+*/
+   dev_priv->next_seqno = ((u32)~0 - 0x1100);
+   dev_priv->last_seqno = ((u32)~0 - 0x1101);
+
/* Initialize fence registers to zero */
INIT_LIST_HEAD(&dev_priv->mm.fence_list);
i915_gem_restore_fences(dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a953d49..64674dc 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -994,6 +994,7 @@ i915_gem_validate_context(struct drm_device *dev, struct 
drm_file *file,
 {
struct intel_context *ctx = NULL;
struct i915_ctx_hang_stats *hs;
+   int ret;
 
if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE)
return ERR_PTR(-EINVAL);
@@ -1009,7 +1010,7 @@ i915_gem_validate_context(struct drm_device *dev, struct 
drm_file *file,
}
 
if (i915.enable_execlists && !ctx->engine[ring->id].st

  1   2   3   >