[Intel-gfx] [PATCH] drm/i915: Change context lifecycle
Use the first retired request on a new context to unpin the old context. This ensures that the hw context remains bound until it has been saved. Now that the context is pinned until later in the request/context lifecycle, it no longer needs to be pinned from context_queue to retire_requests. This is to solve a hang with GuC submission, and a theoretical issue with execlist submission. v2: Moved the new pin to cover GuC submission (Alex Dai) Moved the new unpin to request_retire to fix coverage leak v3: Added switch to default context if freeing a still pinned context just in case the hw was actually still using it v4: Unwrapped context unpin to allow calling without a request Signed-off-by: Nick Hoath Issue: VIZ-4277 Cc: Daniel Vetter Cc: David Gordon Cc: Chris Wilson Cc: Alex Dai --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 9 - drivers/gpu/drm/i915/intel_lrc.c | 73 ++-- drivers/gpu/drm/i915/intel_lrc.h | 1 + 4 files changed, 65 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d5cf30b..4d2f44c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -889,6 +889,7 @@ struct intel_context { struct { struct drm_i915_gem_object *state; struct intel_ringbuffer *ringbuf; + bool unsaved; int pin_count; } engine[I915_NUM_RINGS]; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e955499..6fee473 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1354,6 +1354,14 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) { trace_i915_gem_request_retire(request); + if (i915.enable_execlists) { + unsigned long flags; + + spin_lock_irqsave(&request->ring->execlist_lock, flags); + intel_lr_context_complete_check(request); + spin_unlock_irqrestore(&request->ring->execlist_lock, flags); + } + /* We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position * of tail of the request to update the last known position @@ -1384,7 +1392,6 @@ __i915_gem_request_retire__upto(struct drm_i915_gem_request *req) do { tmp = list_first_entry(&engine->request_list, typeof(*tmp), list); - i915_gem_request_retire(tmp); } while (tmp != req); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 06180dc..a527c21 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -566,9 +566,6 @@ static int execlists_context_queue(struct drm_i915_gem_request *request) struct drm_i915_gem_request *cursor; int num_elements = 0; - if (request->ctx != ring->default_context) - intel_lr_context_pin(request); - i915_gem_request_reference(request); spin_lock_irq(&ring->execlist_lock); @@ -728,10 +725,16 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) intel_logical_ring_advance(request->ringbuf); request->tail = request->ringbuf->tail; - if (intel_ring_stopped(ring)) return; + if (request->ctx != ring->default_context) { + if (!request->ctx->engine[ring->id].unsaved) { + intel_lr_context_pin(request); + request->ctx->engine[ring->id].unsaved = true; + } + } + if (dev_priv->guc.execbuf_client) i915_guc_submit(dev_priv->guc.execbuf_client, request); else @@ -958,12 +961,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring) spin_unlock_irq(&ring->execlist_lock); list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) { - struct intel_context *ctx = req->ctx; - struct drm_i915_gem_object *ctx_obj = - ctx->engine[ring->id].state; - - if (ctx_obj && (ctx != ring->default_context)) - intel_lr_context_unpin(req); list_del(&req->execlist_link); i915_gem_request_unreference(req); } @@ -1058,21 +1055,41 @@ reset_pin_count: return ret; } -void intel_lr_context_unpin(struct drm_i915_gem_request *rq) +static void intel_lr_context_unpin_no_req(struct intel_engine_cs *ring, + struct intel_context *ctx) { - struct intel_engine_cs *ring = rq->ring; - struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state; - struct
Re: [Intel-gfx] [PATCH] drm/i915: Change context lifecycle
On 25/11/2015 01:11, Dai, Yu wrote: On 11/24/2015 08:23 AM, Nick Hoath wrote: Use the first retired request on a new context to unpin the old context. This ensures that the hw context remains bound until it has been saved. Now that the context is pinned until later in the request/context lifecycle, it no longer needs to be pinned from context_queue to retire_requests. This is to solve a hang with GuC submission, and a theoretical issue with execlist submission. v2: Moved the new pin to cover GuC submission (Alex Dai) Moved the new unpin to request_retire to fix coverage leak v3: Added switch to default context if freeing a still pinned context just in case the hw was actually still using it v4: Unwrapped context unpin to allow calling without a request Signed-off-by: Nick Hoath Issue: VIZ-4277 Cc: Daniel Vetter Cc: David Gordon Cc: Chris Wilson Cc: Alex Dai --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 9 - drivers/gpu/drm/i915/intel_lrc.c | 73 ++-- drivers/gpu/drm/i915/intel_lrc.h | 1 + 4 files changed, 65 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d5cf30b..4d2f44c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -889,6 +889,7 @@ struct intel_context { struct { struct drm_i915_gem_object *state; struct intel_ringbuffer *ringbuf; + bool unsaved; int pin_count; } engine[I915_NUM_RINGS]; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e955499..6fee473 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1354,6 +1354,14 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) { trace_i915_gem_request_retire(request); + if (i915.enable_execlists) { + unsigned long flags; + + spin_lock_irqsave(&request->ring->execlist_lock, flags); + intel_lr_context_complete_check(request); + spin_unlock_irqrestore(&request->ring->execlist_lock, flags); + } + /* We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position * of tail of the request to update the last known position @@ -1384,7 +1392,6 @@ __i915_gem_request_retire__upto(struct drm_i915_gem_request *req) do { tmp = list_first_entry(&engine->request_list, typeof(*tmp), list); - i915_gem_request_retire(tmp); } while (tmp != req); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 06180dc..a527c21 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -566,9 +566,6 @@ static int execlists_context_queue(struct drm_i915_gem_request *request) struct drm_i915_gem_request *cursor; int num_elements = 0; - if (request->ctx != ring->default_context) - intel_lr_context_pin(request); - i915_gem_request_reference(request); spin_lock_irq(&ring->execlist_lock); @@ -728,10 +725,16 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) intel_logical_ring_advance(request->ringbuf); request->tail = request->ringbuf->tail; - if (intel_ring_stopped(ring)) return; + if (request->ctx != ring->default_context) { + if (!request->ctx->engine[ring->id].unsaved) { + intel_lr_context_pin(request); + request->ctx->engine[ring->id].unsaved = true; + } + } + if (dev_priv->guc.execbuf_client) i915_guc_submit(dev_priv->guc.execbuf_client, request); else @@ -958,12 +961,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring) spin_unlock_irq(&ring->execlist_lock); list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) { - struct intel_context *ctx = req->ctx; - struct drm_i915_gem_object *ctx_obj = - ctx->engine[ring->id].state; - - if (ctx_obj && (ctx != ring->default_context)) - intel_lr_context_unpin(req); list_del(&req->execlist_link); i915_gem_request_unreference(req); } @@ -1058,21 +1055,41 @@ reset_pin_count: return ret; } -void intel_lr_context_unpin(struct drm_i915_gem_request *rq) +static void intel_lr_context_unpin_no_req(struct intel_engine_cs *ring, + struct intel_context *ctx) { - struct intel_engine_cs *ring = rq->ring; - st
[Intel-gfx] [PATCH] drm/i915: Change context lifecycle
Use the first retired request on a new context to unpin the old context. This ensures that the hw context remains bound until it has been written back to by the GPU. Now that the context is pinned until later in the request/context lifecycle, it no longer needs to be pinned from context_queue to retire_requests. v2: Moved the new pin to cover GuC submission (Alex Dai) Moved the new unpin to request_retire to fix coverage leak v3: Added switch to default context if freeing a still pinned context just in case the hw was actually still using it v4: Unwrapped context unpin to allow calling without a request v5: Only create a switch to idle context if the ring doesn't already have a request pending on it (Alex Dai) Rename unsaved to dirty to avoid double negatives (Dave Gordon) Changed _no_req postfix to __ prefix for consistency (Dave Gordon) Split out per engine cleanup from context_free as it was getting unwieldy Corrected locking (Dave Gordon) Signed-off-by: Nick Hoath Issue: VIZ-4277 Cc: Daniel Vetter Cc: David Gordon Cc: Chris Wilson Cc: Alex Dai --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 3 + drivers/gpu/drm/i915/intel_lrc.c | 124 +++ drivers/gpu/drm/i915/intel_lrc.h | 1 + 4 files changed, 105 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d5cf30b..e82717a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -889,6 +889,7 @@ struct intel_context { struct { struct drm_i915_gem_object *state; struct intel_ringbuffer *ringbuf; + bool dirty; int pin_count; } engine[I915_NUM_RINGS]; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e955499..3829bc1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1354,6 +1354,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) { trace_i915_gem_request_retire(request); + if (i915.enable_execlists) + intel_lr_context_complete_check(request); + /* We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position * of tail of the request to update the last known position diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 06180dc..03d5bca 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -566,9 +566,6 @@ static int execlists_context_queue(struct drm_i915_gem_request *request) struct drm_i915_gem_request *cursor; int num_elements = 0; - if (request->ctx != ring->default_context) - intel_lr_context_pin(request); - i915_gem_request_reference(request); spin_lock_irq(&ring->execlist_lock); @@ -732,6 +729,13 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) if (intel_ring_stopped(ring)) return; + if (request->ctx != ring->default_context) { + if (!request->ctx->engine[ring->id].dirty) { + intel_lr_context_pin(request); + request->ctx->engine[ring->id].dirty = true; + } + } + if (dev_priv->guc.execbuf_client) i915_guc_submit(dev_priv->guc.execbuf_client, request); else @@ -958,12 +962,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring) spin_unlock_irq(&ring->execlist_lock); list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) { - struct intel_context *ctx = req->ctx; - struct drm_i915_gem_object *ctx_obj = - ctx->engine[ring->id].state; - - if (ctx_obj && (ctx != ring->default_context)) - intel_lr_context_unpin(req); list_del(&req->execlist_link); i915_gem_request_unreference(req); } @@ -1058,21 +1056,39 @@ reset_pin_count: return ret; } -void intel_lr_context_unpin(struct drm_i915_gem_request *rq) +static void __intel_lr_context_unpin(struct intel_engine_cs *ring, + struct intel_context *ctx) { - struct intel_engine_cs *ring = rq->ring; - struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state; - struct intel_ringbuffer *ringbuf = rq->ringbuf; - + struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state; + struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; if (ctx_obj) { WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex)); - if (--rq->ctx->engine[ring->id
Re: [Intel-gfx] [PATCH v2] drm/i915/guc: Clean up locks in GuC
On 25/11/2015 19:29, Dai, Yu wrote: From: Alex Dai When GuC Work Queue is full, driver will wait GuC for avaliable available space by delaying 1ms. The wait needs to be out of spinlockirq / unlock. Otherwise, lockup happens because jiffies won't be updated dur to irq is disabled. The unnecessary locks has been cleared. duebeing have dev->struct_mutex is used instead where needed. Issue is found in igt/gem_close_race. v2: Clean up wq_lock too v1: Clean up host2guc lock as well Signed-off-by: Alex Dai --- drivers/gpu/drm/i915/i915_debugfs.c| 12 +-- drivers/gpu/drm/i915/i915_guc_submission.c | 32 +++--- drivers/gpu/drm/i915/intel_guc.h | 4 3 files changed, 13 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index a728ff1..d6b7817 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2473,15 +2473,15 @@ static int i915_guc_info(struct seq_file *m, void *data) if (!HAS_GUC_SCHED(dev_priv->dev)) return 0; + if (mutex_lock_interruptible(&dev->struct_mutex)) + return 0; + /* Take a local copy of the GuC data, so we can dump it at leisure */ - spin_lock(&dev_priv->guc.host2guc_lock); guc = dev_priv->guc; - if (guc.execbuf_client) { - spin_lock(&guc.execbuf_client->wq_lock); + if (guc.execbuf_client) client = *guc.execbuf_client; - spin_unlock(&guc.execbuf_client->wq_lock); - } - spin_unlock(&dev_priv->guc.host2guc_lock); + + mutex_unlock(&dev->struct_mutex); seq_printf(m, "GuC total action count: %llu\n", guc.action_count); seq_printf(m, "GuC action failure count: %u\n", guc.action_fail); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index ed9f100..97996e5 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -86,7 +86,6 @@ static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len) return -EINVAL; intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - spin_lock(&dev_priv->guc.host2guc_lock); dev_priv->guc.action_count += 1; dev_priv->guc.action_cmd = data[0]; @@ -119,7 +118,6 @@ static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len) } dev_priv->guc.action_status = status; - spin_unlock(&dev_priv->guc.host2guc_lock); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); return ret; @@ -249,6 +247,7 @@ static int guc_ring_doorbell(struct i915_guc_client *gc) } kunmap_atomic(base); + Unnecessary whitespace churn return ret; } @@ -292,16 +291,12 @@ static uint32_t select_doorbell_cacheline(struct intel_guc *guc) const uint32_t cacheline_size = cache_line_size(); uint32_t offset; - spin_lock(&guc->host2guc_lock); - /* Doorbell uses a single cache line within a page */ offset = offset_in_page(guc->db_cacheline); /* Moving to next cache line to reduce contention */ guc->db_cacheline += cacheline_size; - spin_unlock(&guc->host2guc_lock); - DRM_DEBUG_DRIVER("selected doorbell cacheline 0x%x, next 0x%x, linesize %u\n", offset, guc->db_cacheline, cacheline_size); @@ -322,13 +317,11 @@ static uint16_t assign_doorbell(struct intel_guc *guc, uint32_t priority) const uint16_t end = start + half; uint16_t id; - spin_lock(&guc->host2guc_lock); id = find_next_zero_bit(guc->doorbell_bitmap, end, start); if (id == end) id = GUC_INVALID_DOORBELL_ID; else bitmap_set(guc->doorbell_bitmap, id, 1); - spin_unlock(&guc->host2guc_lock); DRM_DEBUG_DRIVER("assigned %s priority doorbell id 0x%x\n", hi_pri ? "high" : "normal", id); @@ -338,9 +331,7 @@ static uint16_t assign_doorbell(struct intel_guc *guc, uint32_t priority) static void release_doorbell(struct intel_guc *guc, uint16_t id) { - spin_lock(&guc->host2guc_lock); bitmap_clear(guc->doorbell_bitmap, id, 1); - spin_unlock(&guc->host2guc_lock); } /* @@ -487,16 +478,13 @@ static int guc_get_workqueue_space(struct i915_guc_client *gc, u32 *offset) struct guc_process_desc *desc; void *base; u32 size = sizeof(struct guc_wq_item); - int ret = 0, timeout_counter = 200; + int ret = -ETIMEDOUT, timeout_counter = 200; base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0)); desc = base + gc->proc_desc_offset; while (timeout_counter-- > 0) { - ret = wait_for_atomic(CIRC_SPACE(gc->wq_tail, de
Re: [Intel-gfx] [PATCH] drm/i915: Change context lifecycle
On 26/11/2015 08:48, Daniel Vetter wrote: On Wed, Nov 25, 2015 at 05:02:44PM +0200, Mika Kuoppala wrote: Nick Hoath writes: Use the first retired request on a new context to unpin the old context. This ensures that the hw context remains bound until it has been written back to by the GPU. Now that the context is pinned until later in the request/context lifecycle, it no longer needs to be pinned from context_queue to retire_requests. v2: Moved the new pin to cover GuC submission (Alex Dai) Moved the new unpin to request_retire to fix coverage leak v3: Added switch to default context if freeing a still pinned context just in case the hw was actually still using it v4: Unwrapped context unpin to allow calling without a request v5: Only create a switch to idle context if the ring doesn't already have a request pending on it (Alex Dai) Rename unsaved to dirty to avoid double negatives (Dave Gordon) Changed _no_req postfix to __ prefix for consistency (Dave Gordon) Split out per engine cleanup from context_free as it was getting unwieldy Corrected locking (Dave Gordon) Signed-off-by: Nick Hoath Issue: VIZ-4277 Cc: Daniel Vetter Cc: David Gordon Cc: Chris Wilson Cc: Alex Dai --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 3 + drivers/gpu/drm/i915/intel_lrc.c | 124 +++ drivers/gpu/drm/i915/intel_lrc.h | 1 + 4 files changed, 105 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d5cf30b..e82717a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -889,6 +889,7 @@ struct intel_context { struct { struct drm_i915_gem_object *state; struct intel_ringbuffer *ringbuf; + bool dirty; int pin_count; } engine[I915_NUM_RINGS]; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e955499..3829bc1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1354,6 +1354,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) { trace_i915_gem_request_retire(request); + if (i915.enable_execlists) + intel_lr_context_complete_check(request); + /* We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position * of tail of the request to update the last known position diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 06180dc..03d5bca 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -566,9 +566,6 @@ static int execlists_context_queue(struct drm_i915_gem_request *request) struct drm_i915_gem_request *cursor; int num_elements = 0; - if (request->ctx != ring->default_context) - intel_lr_context_pin(request); - i915_gem_request_reference(request); spin_lock_irq(&ring->execlist_lock); @@ -732,6 +729,13 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) if (intel_ring_stopped(ring)) return; + if (request->ctx != ring->default_context) { + if (!request->ctx->engine[ring->id].dirty) { + intel_lr_context_pin(request); + request->ctx->engine[ring->id].dirty = true; + } + } + if (dev_priv->guc.execbuf_client) i915_guc_submit(dev_priv->guc.execbuf_client, request); else @@ -958,12 +962,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring) spin_unlock_irq(&ring->execlist_lock); list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) { - struct intel_context *ctx = req->ctx; - struct drm_i915_gem_object *ctx_obj = - ctx->engine[ring->id].state; - - if (ctx_obj && (ctx != ring->default_context)) - intel_lr_context_unpin(req); list_del(&req->execlist_link); i915_gem_request_unreference(req); } @@ -1058,21 +1056,39 @@ reset_pin_count: return ret; } -void intel_lr_context_unpin(struct drm_i915_gem_request *rq) +static void __intel_lr_context_unpin(struct intel_engine_cs *ring, + struct intel_context *ctx) { - struct intel_engine_cs *ring = rq->ring; - struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state; - struct intel_ringbuffer *ringbuf = rq->ringbuf; - + struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state; + struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; if (ctx_obj)
[Intel-gfx] [PATCH v6] drm/i915: Change context lifecycle
Use the first retired request on a new context to unpin the old context. This ensures that the hw context remains bound until it has been written back to by the GPU. Now that the context is pinned until later in the request/context lifecycle, it no longer needs to be pinned from context_queue to retire_requests. This fixes an issue with GuC submission where the GPU might not have finished writing back the context before it is unpinned. This results in a GPU hang. v2: Moved the new pin to cover GuC submission (Alex Dai) Moved the new unpin to request_retire to fix coverage leak v3: Added switch to default context if freeing a still pinned context just in case the hw was actually still using it v4: Unwrapped context unpin to allow calling without a request v5: Only create a switch to idle context if the ring doesn't already have a request pending on it (Alex Dai) Rename unsaved to dirty to avoid double negatives (Dave Gordon) Changed _no_req postfix to __ prefix for consistency (Dave Gordon) Split out per engine cleanup from context_free as it was getting unwieldy Corrected locking (Dave Gordon) v6: Removed some bikeshedding (Mika Kuoppala) Added explanation of the GuC hang that this fixes (Daniel Vetter) Signed-off-by: Nick Hoath Issue: VIZ-4277 Cc: Daniel Vetter Cc: David Gordon Cc: Chris Wilson Cc: Alex Dai Cc: Mika Kuoppala --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 3 + drivers/gpu/drm/i915/intel_lrc.c | 122 +++ drivers/gpu/drm/i915/intel_lrc.h | 1 + 4 files changed, 104 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d5cf30b..e82717a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -889,6 +889,7 @@ struct intel_context { struct { struct drm_i915_gem_object *state; struct intel_ringbuffer *ringbuf; + bool dirty; int pin_count; } engine[I915_NUM_RINGS]; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e955499..3829bc1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1354,6 +1354,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) { trace_i915_gem_request_retire(request); + if (i915.enable_execlists) + intel_lr_context_complete_check(request); + /* We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position * of tail of the request to update the last known position diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 06180dc..dbe64ff 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -566,9 +566,6 @@ static int execlists_context_queue(struct drm_i915_gem_request *request) struct drm_i915_gem_request *cursor; int num_elements = 0; - if (request->ctx != ring->default_context) - intel_lr_context_pin(request); - i915_gem_request_reference(request); spin_lock_irq(&ring->execlist_lock); @@ -732,6 +729,13 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) if (intel_ring_stopped(ring)) return; + if (request->ctx != ring->default_context) { + if (!request->ctx->engine[ring->id].dirty) { + intel_lr_context_pin(request); + request->ctx->engine[ring->id].dirty = true; + } + } + if (dev_priv->guc.execbuf_client) i915_guc_submit(dev_priv->guc.execbuf_client, request); else @@ -958,12 +962,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring) spin_unlock_irq(&ring->execlist_lock); list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) { - struct intel_context *ctx = req->ctx; - struct drm_i915_gem_object *ctx_obj = - ctx->engine[ring->id].state; - - if (ctx_obj && (ctx != ring->default_context)) - intel_lr_context_unpin(req); list_del(&req->execlist_link); i915_gem_request_unreference(req); } @@ -1058,21 +1056,39 @@ reset_pin_count: return ret; } -void intel_lr_context_unpin(struct drm_i915_gem_request *rq) +static void __intel_lr_context_unpin(struct intel_engine_cs *ring, + struct intel_context *ctx) { - struct intel_engine_cs *ring = rq->ring; - struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state; - struct intel_ringbuffer *ringbuf = rq->ringbuf; - + struct drm_i915_gem_objec
[Intel-gfx] [PATCH] drm/i915: Extend LRC pinning to cover GPU context writeback
Use the first retired request on a new context to unpin the old context. This ensures that the hw context remains bound until it has been written back to by the GPU. Now that the context is pinned until later in the request/context lifecycle, it no longer needs to be pinned from context_queue to retire_requests. This fixes an issue with GuC submission where the GPU might not have finished writing back the context before it is unpinned. This results in a GPU hang. v2: Moved the new pin to cover GuC submission (Alex Dai) Moved the new unpin to request_retire to fix coverage leak v3: Added switch to default context if freeing a still pinned context just in case the hw was actually still using it v4: Unwrapped context unpin to allow calling without a request v5: Only create a switch to idle context if the ring doesn't already have a request pending on it (Alex Dai) Rename unsaved to dirty to avoid double negatives (Dave Gordon) Changed _no_req postfix to __ prefix for consistency (Dave Gordon) Split out per engine cleanup from context_free as it was getting unwieldy Corrected locking (Dave Gordon) v6: Removed some bikeshedding (Mika Kuoppala) Added explanation of the GuC hang that this fixes (Daniel Vetter) v7: Removed extra per request pinning from ring reset code (Alex Dai) Added forced ring unpin/clean in error case in context free (Alex Dai) Signed-off-by: Nick Hoath Issue: VIZ-4277 Cc: Daniel Vetter Cc: David Gordon Cc: Chris Wilson Cc: Alex Dai Cc: Mika Kuoppala --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 7 +- drivers/gpu/drm/i915/intel_lrc.c | 136 --- drivers/gpu/drm/i915/intel_lrc.h | 1 + 4 files changed, 118 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d5cf30b..e82717a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -889,6 +889,7 @@ struct intel_context { struct { struct drm_i915_gem_object *state; struct intel_ringbuffer *ringbuf; + bool dirty; int pin_count; } engine[I915_NUM_RINGS]; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e955499..69e9d96 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1354,6 +1354,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) { trace_i915_gem_request_retire(request); + if (i915.enable_execlists) + intel_lr_context_complete_check(request); + /* We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position * of tail of the request to update the last known position @@ -2765,10 +2768,6 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, struct drm_i915_gem_request, execlist_link); list_del(&submit_req->execlist_link); - - if (submit_req->ctx != ring->default_context) - intel_lr_context_unpin(submit_req); - i915_gem_request_unreference(submit_req); } spin_unlock_irq(&ring->execlist_lock); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 06180dc..b4d9c8f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -566,9 +566,6 @@ static int execlists_context_queue(struct drm_i915_gem_request *request) struct drm_i915_gem_request *cursor; int num_elements = 0; - if (request->ctx != ring->default_context) - intel_lr_context_pin(request); - i915_gem_request_reference(request); spin_lock_irq(&ring->execlist_lock); @@ -732,6 +729,13 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) if (intel_ring_stopped(ring)) return; + if (request->ctx != ring->default_context) { + if (!request->ctx->engine[ring->id].dirty) { + intel_lr_context_pin(request); + request->ctx->engine[ring->id].dirty = true; + } + } + if (dev_priv->guc.execbuf_client) i915_guc_submit(dev_priv->guc.execbuf_client, request); else @@ -958,12 +962,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring) spin_unlock_irq(&ring->execlist_lock); list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) { - struct intel_context *ctx = req->ctx; - struct drm_i915_gem_object *ctx_obj = - ctx->e
[Intel-gfx] [PATCH v8] drm/i915: Extend LRC pinning to cover GPU context writeback
Use the first retired request on a new context to unpin the old context. This ensures that the hw context remains bound until it has been written back to by the GPU. Now that the context is pinned until later in the request/context lifecycle, it no longer needs to be pinned from context_queue to retire_requests. This fixes an issue with GuC submission where the GPU might not have finished writing back the context before it is unpinned. This results in a GPU hang. v2: Moved the new pin to cover GuC submission (Alex Dai) Moved the new unpin to request_retire to fix coverage leak v3: Added switch to default context if freeing a still pinned context just in case the hw was actually still using it v4: Unwrapped context unpin to allow calling without a request v5: Only create a switch to idle context if the ring doesn't already have a request pending on it (Alex Dai) Rename unsaved to dirty to avoid double negatives (Dave Gordon) Changed _no_req postfix to __ prefix for consistency (Dave Gordon) Split out per engine cleanup from context_free as it was getting unwieldy Corrected locking (Dave Gordon) v6: Removed some bikeshedding (Mika Kuoppala) Added explanation of the GuC hang that this fixes (Daniel Vetter) v7: Removed extra per request pinning from ring reset code (Alex Dai) Added forced ring unpin/clean in error case in context free (Alex Dai) v8: Renamed lrc specific last_context to lrc_last_context as there were some reset cases where the codepaths leaked (Mika Kuoppala) NULL'd last_context in reset case - there was a pointer leak if someone did reset->close context. Signed-off-by: Nick Hoath Issue: VIZ-4277 Cc: Daniel Vetter Cc: David Gordon Cc: Chris Wilson Cc: Alex Dai Cc: Mika Kuoppala --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 7 +- drivers/gpu/drm/i915/intel_lrc.c| 138 ++-- drivers/gpu/drm/i915/intel_lrc.h| 1 + drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 5 files changed, 121 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9ab3e25..a59ca13 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -884,6 +884,7 @@ struct intel_context { struct { struct drm_i915_gem_object *state; struct intel_ringbuffer *ringbuf; + bool dirty; int pin_count; } engine[I915_NUM_RINGS]; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a6997a8..cd27ecc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1362,6 +1362,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) { trace_i915_gem_request_retire(request); + if (i915.enable_execlists) + intel_lr_context_complete_check(request); + /* We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position * of tail of the request to update the last known position @@ -2772,10 +2775,6 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, struct drm_i915_gem_request, execlist_link); list_del(&submit_req->execlist_link); - - if (submit_req->ctx != ring->default_context) - intel_lr_context_unpin(submit_req); - i915_gem_request_unreference(submit_req); } spin_unlock_irq(&ring->execlist_lock); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 4ebafab..f96fb51 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -571,9 +571,6 @@ static int execlists_context_queue(struct drm_i915_gem_request *request) struct drm_i915_gem_request *cursor; int num_elements = 0; - if (request->ctx != ring->default_context) - intel_lr_context_pin(request); - i915_gem_request_reference(request); spin_lock_irq(&ring->execlist_lock); @@ -737,6 +734,13 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) if (intel_ring_stopped(ring)) return; + if (request->ctx != ring->default_context) { + if (!request->ctx->engine[ring->id].dirty) { + intel_lr_context_pin(request); + request->ctx->engine[ring->id].dirty = true; + } + } + if (dev_priv->guc.execbuf_client) i915_guc_submit(dev_priv->guc.execbuf_client, request); else @@ -963,12 +967,6 @@ void intel_execlists_retire_requests
[Intel-gfx] [PATCH] drm/i915: Fix context/engine cleanup order
Swap the order of context & engine cleanup, so that it is now contexts, then engines. This allows the context clean up code to do things like confirm that ring->dev->struct_mutex is locked without a NULL pointer dereference. This came about as a result of the 'intel_ring_initialized() must be simple and inline' patch now using ring->dev as an initialised flag. Rename the cleanup function to reflect what it actually does. Also clean up some very annoying whitespace issues at the same time. Signed-off-by: Nick Hoath Cc: Mika Kuoppala Cc: Daniel Vetter Cc: David Gordon Cc: Chris Wilson --- drivers/gpu/drm/i915/i915_dma.c | 4 ++-- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 23 --- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 84e2b20..a2857b0 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -449,7 +449,7 @@ static int i915_load_modeset_init(struct drm_device *dev) cleanup_gem: mutex_lock(&dev->struct_mutex); - i915_gem_cleanup_ringbuffer(dev); + i915_gem_cleanup_engines(dev); i915_gem_context_fini(dev); mutex_unlock(&dev->struct_mutex); cleanup_irq: @@ -1188,8 +1188,8 @@ int i915_driver_unload(struct drm_device *dev) intel_guc_ucode_fini(dev); mutex_lock(&dev->struct_mutex); - i915_gem_cleanup_ringbuffer(dev); i915_gem_context_fini(dev); + i915_gem_cleanup_engines(dev); mutex_unlock(&dev->struct_mutex); intel_fbc_cleanup_cfb(dev_priv); i915_gem_cleanup_stolen(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5edd393..e317f88 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3016,7 +3016,7 @@ int i915_gem_init_rings(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev); int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice); void i915_gem_init_swizzling(struct drm_device *dev); -void i915_gem_cleanup_ringbuffer(struct drm_device *dev); +void i915_gem_cleanup_engines(struct drm_device *dev); int __must_check i915_gpu_idle(struct drm_device *dev); int __must_check i915_gem_suspend(struct drm_device *dev); void __i915_add_request(struct drm_i915_gem_request *req, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8e2acde..04a22db 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4823,7 +4823,7 @@ i915_gem_init_hw(struct drm_device *dev) ret = i915_gem_request_alloc(ring, ring->default_context, &req); if (ret) { - i915_gem_cleanup_ringbuffer(dev); + i915_gem_cleanup_engines(dev); goto out; } @@ -4836,7 +4836,7 @@ i915_gem_init_hw(struct drm_device *dev) if (ret && ret != -EIO) { DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret); i915_gem_request_cancel(req); - i915_gem_cleanup_ringbuffer(dev); + i915_gem_cleanup_engines(dev); goto out; } @@ -4844,7 +4844,7 @@ i915_gem_init_hw(struct drm_device *dev) if (ret && ret != -EIO) { DRM_ERROR("Context enable ring #%d failed %d\n", i, ret); i915_gem_request_cancel(req); - i915_gem_cleanup_ringbuffer(dev); + i915_gem_cleanup_engines(dev); goto out; } @@ -4919,7 +4919,7 @@ out_unlock: } void -i915_gem_cleanup_ringbuffer(struct drm_device *dev) +i915_gem_cleanup_engines(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; @@ -4928,13 +4928,14 @@ i915_gem_cleanup_ringbuffer(struct drm_device *dev) for_each_ring(ring, dev_priv, i) dev_priv->gt.cleanup_ring(ring); -if (i915.enable_execlists) -/* - * Neither the BIOS, ourselves or any other kernel - * expects the system to be in execlists mode on startup, - * so we need to reset the GPU back to legacy mode. - */ -intel_gpu_reset(dev); + if (i915.enable_execlists) { + /* +* Neither the BIOS, ourselves or any other kernel +* expects the system to be in execlists mode on startup, +* so we need to reset the GPU back to legacy mode. +*/ + intel_gpu_reset(dev); + } } static void -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2] drm/i915: Fix context/engine cleanup order
Swap the order of context & engine cleanup, so that it is now contexts, then engines. This allows the context clean up code to do things like confirm that ring->dev->struct_mutex is locked without a NULL pointer dereference. This came about as a result of the 'intel_ring_initialized() must be simple and inline' patch now using ring->dev as an initialised flag. Rename the cleanup function to reflect what it actually does. Also clean up some very annoying whitespace issues at the same time. v2: Also make the fix in i915_load_modeset_init, not just in i915_driver_unload (Chris Wilson) Signed-off-by: Nick Hoath Reviewed-by: Chris Wilson Cc: Mika Kuoppala Cc: Daniel Vetter Cc: David Gordon Cc: Chris Wilson --- drivers/gpu/drm/i915/i915_dma.c | 4 ++-- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 23 --- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 84e2b20..4dad121 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -449,8 +449,8 @@ static int i915_load_modeset_init(struct drm_device *dev) cleanup_gem: mutex_lock(&dev->struct_mutex); - i915_gem_cleanup_ringbuffer(dev); i915_gem_context_fini(dev); + i915_gem_cleanup_engines(dev); mutex_unlock(&dev->struct_mutex); cleanup_irq: intel_guc_ucode_fini(dev); @@ -1188,8 +1188,8 @@ int i915_driver_unload(struct drm_device *dev) intel_guc_ucode_fini(dev); mutex_lock(&dev->struct_mutex); - i915_gem_cleanup_ringbuffer(dev); i915_gem_context_fini(dev); + i915_gem_cleanup_engines(dev); mutex_unlock(&dev->struct_mutex); intel_fbc_cleanup_cfb(dev_priv); i915_gem_cleanup_stolen(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5edd393..e317f88 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3016,7 +3016,7 @@ int i915_gem_init_rings(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev); int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice); void i915_gem_init_swizzling(struct drm_device *dev); -void i915_gem_cleanup_ringbuffer(struct drm_device *dev); +void i915_gem_cleanup_engines(struct drm_device *dev); int __must_check i915_gpu_idle(struct drm_device *dev); int __must_check i915_gem_suspend(struct drm_device *dev); void __i915_add_request(struct drm_i915_gem_request *req, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8e2acde..04a22db 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4823,7 +4823,7 @@ i915_gem_init_hw(struct drm_device *dev) ret = i915_gem_request_alloc(ring, ring->default_context, &req); if (ret) { - i915_gem_cleanup_ringbuffer(dev); + i915_gem_cleanup_engines(dev); goto out; } @@ -4836,7 +4836,7 @@ i915_gem_init_hw(struct drm_device *dev) if (ret && ret != -EIO) { DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret); i915_gem_request_cancel(req); - i915_gem_cleanup_ringbuffer(dev); + i915_gem_cleanup_engines(dev); goto out; } @@ -4844,7 +4844,7 @@ i915_gem_init_hw(struct drm_device *dev) if (ret && ret != -EIO) { DRM_ERROR("Context enable ring #%d failed %d\n", i, ret); i915_gem_request_cancel(req); - i915_gem_cleanup_ringbuffer(dev); + i915_gem_cleanup_engines(dev); goto out; } @@ -4919,7 +4919,7 @@ out_unlock: } void -i915_gem_cleanup_ringbuffer(struct drm_device *dev) +i915_gem_cleanup_engines(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; @@ -4928,13 +4928,14 @@ i915_gem_cleanup_ringbuffer(struct drm_device *dev) for_each_ring(ring, dev_priv, i) dev_priv->gt.cleanup_ring(ring); -if (i915.enable_execlists) -/* - * Neither the BIOS, ourselves or any other kernel - * expects the system to be in execlists mode on startup, - * so we need to reset the GPU back to legacy mode. - */ -intel_gpu_reset(dev); + if (i915.enable_execlists) { + /* +* Neither the BIOS, ourselves or any other kernel +* expects the system to be in execlists mode on startup, +* so we need to reset the GPU back to legacy m
[Intel-gfx] [PATCH] drm/i915: Fix context/engine cleanup order
Swap the order of context & engine cleanup, so that it is now contexts, then engines. This allows the context clean up code to do things like confirm that ring->dev->struct_mutex is locked without a NULL pointer dereference. This came about as a result of the 'intel_ring_initialized() must be simple and inline' patch now using ring->dev as an initialised flag. Rename the cleanup function to reflect what it actually does. Also clean up some very annoying whitespace issues at the same time. Previous code did a kunmap() on the wrong page, and didn't account for the fact that the HWSP and the default context are the different offsets within the same object. v2: Also make the fix in i915_load_modeset_init, not just in i915_driver_unload (Chris Wilson) v3: Folded in Dave Gordon's fix for HWSP kunmap issues. Signed-off-by: Nick Hoath Reviewed-by: Chris Wilson Cc: Mika Kuoppala Cc: Daniel Vetter Cc: David Gordon Cc: Chris Wilson --- drivers/gpu/drm/i915/i915_dma.c | 4 +-- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 23 ++--- drivers/gpu/drm/i915/i915_gem_context.c | 9 -- drivers/gpu/drm/i915/intel_lrc.c| 57 +++-- 5 files changed, 62 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 84e2b20..4dad121 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -449,8 +449,8 @@ static int i915_load_modeset_init(struct drm_device *dev) cleanup_gem: mutex_lock(&dev->struct_mutex); - i915_gem_cleanup_ringbuffer(dev); i915_gem_context_fini(dev); + i915_gem_cleanup_engines(dev); mutex_unlock(&dev->struct_mutex); cleanup_irq: intel_guc_ucode_fini(dev); @@ -1188,8 +1188,8 @@ int i915_driver_unload(struct drm_device *dev) intel_guc_ucode_fini(dev); mutex_lock(&dev->struct_mutex); - i915_gem_cleanup_ringbuffer(dev); i915_gem_context_fini(dev); + i915_gem_cleanup_engines(dev); mutex_unlock(&dev->struct_mutex); intel_fbc_cleanup_cfb(dev_priv); i915_gem_cleanup_stolen(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5edd393..e317f88 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3016,7 +3016,7 @@ int i915_gem_init_rings(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev); int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice); void i915_gem_init_swizzling(struct drm_device *dev); -void i915_gem_cleanup_ringbuffer(struct drm_device *dev); +void i915_gem_cleanup_engines(struct drm_device *dev); int __must_check i915_gpu_idle(struct drm_device *dev); int __must_check i915_gem_suspend(struct drm_device *dev); void __i915_add_request(struct drm_i915_gem_request *req, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8e2acde..04a22db 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4823,7 +4823,7 @@ i915_gem_init_hw(struct drm_device *dev) ret = i915_gem_request_alloc(ring, ring->default_context, &req); if (ret) { - i915_gem_cleanup_ringbuffer(dev); + i915_gem_cleanup_engines(dev); goto out; } @@ -4836,7 +4836,7 @@ i915_gem_init_hw(struct drm_device *dev) if (ret && ret != -EIO) { DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret); i915_gem_request_cancel(req); - i915_gem_cleanup_ringbuffer(dev); + i915_gem_cleanup_engines(dev); goto out; } @@ -4844,7 +4844,7 @@ i915_gem_init_hw(struct drm_device *dev) if (ret && ret != -EIO) { DRM_ERROR("Context enable ring #%d failed %d\n", i, ret); i915_gem_request_cancel(req); - i915_gem_cleanup_ringbuffer(dev); + i915_gem_cleanup_engines(dev); goto out; } @@ -4919,7 +4919,7 @@ out_unlock: } void -i915_gem_cleanup_ringbuffer(struct drm_device *dev) +i915_gem_cleanup_engines(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; @@ -4928,13 +4928,14 @@ i915_gem_cleanup_ringbuffer(struct drm_device *dev) for_each_ring(ring, dev_priv, i) dev_priv->gt.cleanup_ring(ring); -if (i915.enable_execlists) -/* - * Neither the BIOS, ourselves or any other kernel - * expects the system to be in execlists mode on startup, -
Re: [Intel-gfx] [PATCH 1/4] drm/i915: teardown default context in reverse, update comments
Reviewed-by: Nick Hoath On 16/12/2015 18:36, Gordon, David S wrote: We set up engines in forwards order, so some things (notably the default context) are "owned" by engine 0 (the render engine, aka "RCS"). For symmetry and to make sure such shared objects don't disappear too early, we should generally run teardown loops in the reverse order, so that engine 0 is processed last. This patch changes i915_gem_context_fini() to do that, and clarifies the comments in i915_gem_context_{init,fini}() about the refcounting of the default {struct intel_)context: the refcount is just ONE, no matter how many rings exist or are active, and this refcount is nominally ascribed to the render ring (RCS), which is set up first and now torn down last. Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/i915_gem_context.c | 21 + 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 900ffd0..e143ea5 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -391,7 +391,13 @@ int i915_gem_context_init(struct drm_device *dev) for (i = 0; i < I915_NUM_RINGS; i++) { struct intel_engine_cs *ring = &dev_priv->ring[i]; - /* NB: RCS will hold a ref for all rings */ + /* +* Although each engine has a pointer to the global default +* context, they don't contribute to the refcount on the +* context. We consider that RCS (which is set up first and +* torn down last) holds this reference on behalf of all the +* other engines +*/ ring->default_context = ctx; } @@ -431,14 +437,21 @@ void i915_gem_context_fini(struct drm_device *dev) i915_gem_object_ggtt_unpin(dctx->legacy_hw_ctx.rcs_state); } - for (i = 0; i < I915_NUM_RINGS; i++) { + for (i = I915_NUM_RINGS; --i >= 0;) { struct intel_engine_cs *ring = &dev_priv->ring[i]; - if (ring->last_context) + if (ring->last_context) { i915_gem_context_unreference(ring->last_context); + ring->last_context = NULL; + } + /* +* These default_context pointers don't contribute to the +* refcount on the context. We consider that RCS holds its +* reference on behalf of all the other engines, so there's +* just a single unreference() call below. +*/ ring->default_context = NULL; - ring->last_context = NULL; } i915_gem_context_unreference(dctx); ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 2/4] drm/i915: mark the global default (intel_)context as such
On 16/12/2015 19:30, Chris Wilson wrote: On Wed, Dec 16, 2015 at 07:22:52PM +, Dave Gordon wrote: On 16/12/15 18:57, Chris Wilson wrote: On Wed, Dec 16, 2015 at 06:36:49PM +, Dave Gordon wrote: Some of the LRC-specific context-destruction code has to special-case the global default context, because the HWSP is part of that context. At present it deduces it indirectly by checking for the backpointer from the engine to the context, but that's an unsafe assumption if the setup and teardown code is reorganised. (It could also test !ctx->file_priv, but again that's a detail that might be subject to change). So here we explicitly flag the default context at the point of creation, and then reorganise the code in intel_lr_context_free() not to rely on the ring->default_pointer (still) being set up; to iterate over engines in reverse (as this is teardown code); and to reduce the nesting level so it's easier to read. Signed-off-by: Dave Gordon #define intel_context_is_global(ctx) ((ctx)->file_priv == NULL) The last sentence of the first paragraph of the commit message above notes that we *could* use that as a test, but I don't regard it as a safe test, in either direction. That is, it could give a false negative if we someday associate some (internal) fd with the default context, or (more likely) a false positive if the file association were broken and the pointer nulled in an earlier stage of the teardown of a non-global (user-created) context. int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_context_destroy *args = data; struct drm_i915_file_private *file_priv = file->driver_priv; struct intel_context *ctx; int ret; if (args->ctx_id == DEFAULT_CONTEXT_HANDLE) return -ENOENT; ret = i915_mutex_lock_interruptible(dev); if (ret) return ret; ctx = i915_gem_context_get(file_priv, args->ctx_id); if (IS_ERR(ctx)) { mutex_unlock(&dev->struct_mutex); return PTR_ERR(ctx); } idr_remove(&ctx->file_priv->context_idr, ctx->user_handle); i915_gem_context_unreference(ctx); mutex_unlock(&dev->struct_mutex); DRM_DEBUG_DRIVER("HW context %d destroyed\n", args->ctx_id); return 0; } At present, i915_gem_context_destroy_ioctl() above removes the context from the file's list-of-contexts but DOESN'T clear the ctx->file_priv, which means there's a somewhat inconsistent (but transient) state during which a soon-to-be-destroyed context links to a file, but the file doesn't have a link back. It probably doesn't matter, because the code holds the mutex across the two operations ... And that the ctx was created to belong to the file still holds true. ... unless of course the context's refcount isn't 1 at this point, in which case I suppose someone else *might* go from the context to the file and then be mystified as to why the context isn't on the list ... ... and if we changed the code above, then file_priv would *always* be NULL by the time the destructor was called! So it's surely safer to have a flag that explicitly says "I'm the global default context" than to guess based on some other contingent property. No, we have a flag that says this context was created belonging to a file, with the corollary that only one context doesn't belong to any file. Using pointers like this to provide 'magic' secondary state information just adds to the fragility of the driver. So: Reviewed-by: Nick Hoath to the original patch. -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 3/4] drm/i915: tidy up initialisation failure paths (legacy)
On 16/12/2015 18:36, Gordon, David S wrote: 1. Fix intel_cleanup_ring_buffer() to handle the error cleanup case where the ringbuffer has been allocated but map-and-pin failed. Unpin it iff it's previously been mapped-and-pinned. 2. Fix the error path in intel_init_ring_buffer(), which already called intel_destroy_ringbuffer_obj(), but failed to free the actual ringbuffer structure. Calling intel_ringbuffer_free() instead does both in one go. 3. With the above change, intel_destroy_ringbuffer_obj() is only called in one place (intel_ringbuffer_free()), so flatten it into that function. 4. move low-level register accesses from intel_cleanup_ring_buffer() (which calls intel_stop_ring_buffer(ring) which calls stop_ring()) down into stop_ring() itself), which is already doing low-level register accesses. Then, intel_cleanup_ring_buffer() no longer needs 'dev_priv'. Reviewed-by: Nick Hoath Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/intel_ringbuffer.c | 47 +++-- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index eefce9a..2853754 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -549,6 +549,8 @@ static bool stop_ring(struct intel_engine_cs *ring) I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING)); } + WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0); + return (I915_READ_HEAD(ring) & HEAD_ADDR) == 0; } @@ -2057,12 +2059,6 @@ int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev, return 0; } -static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) -{ - drm_gem_object_unreference(&ringbuf->obj->base); - ringbuf->obj = NULL; -} - static int intel_alloc_ringbuffer_obj(struct drm_device *dev, struct intel_ringbuffer *ringbuf) { @@ -2125,11 +2121,14 @@ intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size) } void -intel_ringbuffer_free(struct intel_ringbuffer *ring) +intel_ringbuffer_free(struct intel_ringbuffer *ringbuf) { - intel_destroy_ringbuffer_obj(ring); - list_del(&ring->link); - kfree(ring); + if (ringbuf->obj) { + drm_gem_object_unreference(&ringbuf->obj->base); + ringbuf->obj = NULL; + } + list_del(&ringbuf->link); + kfree(ringbuf); } static int intel_init_ring_buffer(struct drm_device *dev, @@ -2157,6 +2156,13 @@ static int intel_init_ring_buffer(struct drm_device *dev, } ring->buffer = ringbuf; + ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf); + if (ret) { + DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n", + ring->name, ret); + goto error; + } + if (I915_NEED_GFX_HWS(dev)) { ret = init_status_page(ring); if (ret) @@ -2168,14 +2174,6 @@ static int intel_init_ring_buffer(struct drm_device *dev, goto error; } - ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf); - if (ret) { - DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n", - ring->name, ret); - intel_destroy_ringbuffer_obj(ringbuf); - goto error; - } - ret = i915_cmd_parser_init_ring(ring); if (ret) goto error; @@ -2189,19 +2187,18 @@ error: void intel_cleanup_ring_buffer(struct intel_engine_cs *ring) { - struct drm_i915_private *dev_priv; + struct intel_ringbuffer *ringbuf; if (!intel_ring_initialized(ring)) return; - dev_priv = to_i915(ring->dev); - - if (ring->buffer) { + ringbuf = ring->buffer; + if (ringbuf) { intel_stop_ring_buffer(ring); - WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0); - intel_unpin_ringbuffer_obj(ring->buffer); - intel_ringbuffer_free(ring->buffer); + if (ringbuf->virtual_start) + intel_unpin_ringbuffer_obj(ringbuf); + intel_ringbuffer_free(ringbuf); ring->buffer = NULL; } ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 4/4] drm/i915: tidy up initialisation failure paths (GEM & LRC)
On 16/12/2015 18:36, Gordon, David S wrote: 1. add call to i915_gem_context_fini() to deallocate the default context(s) if the call to init_rings() fails, so that we don't leak the context in that situation. 2. remove useless code in intel_logical_ring_cleanup(), presumably copypasted from legacy ringbuffer version at creation. Reviewed-by: Nick Hoath Signed-off-by: Dave Gordon --- drivers/gpu/drm/i915/i915_gem.c | 5 - drivers/gpu/drm/i915/intel_lrc.c | 10 ++ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 66b1705..15f8989 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4890,8 +4890,11 @@ int i915_gem_init(struct drm_device *dev) goto out_unlock; ret = dev_priv->gt.init_rings(dev); - if (ret) + if (ret) { + i915_gem_context_fini(dev); + /* XXX: anything else to be undone here? */ goto out_unlock; + } ret = i915_gem_init_hw(dev); if (ret == -EIO) { diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 23f90b2..cdb65eb 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1887,17 +1887,11 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req) */ void intel_logical_ring_cleanup(struct intel_engine_cs *ring) { - struct drm_i915_private *dev_priv; - if (!intel_ring_initialized(ring)) return; - dev_priv = ring->dev->dev_private; - - if (ring->buffer) { - intel_logical_ring_stop(ring); - WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0); - } + /* should not be set in LRC mode */ + WARN_ON(ring->buffer); if (ring->cleanup) ring->cleanup(ring); ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v4] drm/i915: Fix context/engine cleanup order
Swap the order of context & engine cleanup, so that it is now contexts, then engines. This allows the context clean up code to do things like confirm that ring->dev->struct_mutex is locked without a NULL pointer dereference. This came about as a result of the 'intel_ring_initialized() must be simple and inline' patch now using ring->dev as an initialised flag. Rename the cleanup function to reflect what it actually does. Also clean up some very annoying whitespace issues at the same time. Previous code did a kunmap() on the wrong page, and didn't account for the fact that the HWSP and the default context are the different offsets within the same object. v2: Also make the fix in i915_load_modeset_init, not just in i915_driver_unload (Chris Wilson) v3: Folded in Dave Gordon's fix for HWSP kunmap issues. v4: Rebase over Dave Gordon's various cleanups Signed-off-by: Nick Hoath Cc: Mika Kuoppala Cc: Daniel Vetter Cc: David Gordon Cc: Chris Wilson --- drivers/gpu/drm/i915/i915_dma.c | 4 +-- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 23 + drivers/gpu/drm/i915/intel_lrc.c | 55 4 files changed, 54 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 84e2b20..4dad121 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -449,8 +449,8 @@ static int i915_load_modeset_init(struct drm_device *dev) cleanup_gem: mutex_lock(&dev->struct_mutex); - i915_gem_cleanup_ringbuffer(dev); i915_gem_context_fini(dev); + i915_gem_cleanup_engines(dev); mutex_unlock(&dev->struct_mutex); cleanup_irq: intel_guc_ucode_fini(dev); @@ -1188,8 +1188,8 @@ int i915_driver_unload(struct drm_device *dev) intel_guc_ucode_fini(dev); mutex_lock(&dev->struct_mutex); - i915_gem_cleanup_ringbuffer(dev); i915_gem_context_fini(dev); + i915_gem_cleanup_engines(dev); mutex_unlock(&dev->struct_mutex); intel_fbc_cleanup_cfb(dev_priv); i915_gem_cleanup_stolen(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4c24666..27bb401 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3018,7 +3018,7 @@ int i915_gem_init_rings(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev); int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice); void i915_gem_init_swizzling(struct drm_device *dev); -void i915_gem_cleanup_ringbuffer(struct drm_device *dev); +void i915_gem_cleanup_engines(struct drm_device *dev); int __must_check i915_gpu_idle(struct drm_device *dev); int __must_check i915_gem_suspend(struct drm_device *dev); void __i915_add_request(struct drm_i915_gem_request *req, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 702c720..517676a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4823,7 +4823,7 @@ i915_gem_init_hw(struct drm_device *dev) ret = i915_gem_request_alloc(ring, ring->default_context, &req); if (ret) { - i915_gem_cleanup_ringbuffer(dev); + i915_gem_cleanup_engines(dev); goto out; } @@ -4836,7 +4836,7 @@ i915_gem_init_hw(struct drm_device *dev) if (ret && ret != -EIO) { DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret); i915_gem_request_cancel(req); - i915_gem_cleanup_ringbuffer(dev); + i915_gem_cleanup_engines(dev); goto out; } @@ -4844,7 +4844,7 @@ i915_gem_init_hw(struct drm_device *dev) if (ret && ret != -EIO) { DRM_ERROR("Context enable ring #%d failed %d\n", i, ret); i915_gem_request_cancel(req); - i915_gem_cleanup_ringbuffer(dev); + i915_gem_cleanup_engines(dev); goto out; } @@ -4922,7 +4922,7 @@ out_unlock: } void -i915_gem_cleanup_ringbuffer(struct drm_device *dev) +i915_gem_cleanup_engines(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; @@ -4931,13 +4931,14 @@ i915_gem_cleanup_ringbuffer(struct drm_device *dev) for_each_ring(ring, dev_priv, i) dev_priv->gt.cleanup_ring(ring); -if (i915.enable_execlists) -/* - * Neither the BIOS, ourselves or any other kernel - * expects the system to be in execlists mode on startup, - * so we need to reset
[Intel-gfx] [PATCH v9] drm/i915: Extend LRC pinning to cover GPU context writeback
Use the first retired request on a new context to unpin the old context. This ensures that the hw context remains bound until it has been written back to by the GPU. Now that the context is pinned until later in the request/context lifecycle, it no longer needs to be pinned from context_queue to retire_requests. This fixes an issue with GuC submission where the GPU might not have finished writing back the context before it is unpinned. This results in a GPU hang. v2: Moved the new pin to cover GuC submission (Alex Dai) Moved the new unpin to request_retire to fix coverage leak v3: Added switch to default context if freeing a still pinned context just in case the hw was actually still using it v4: Unwrapped context unpin to allow calling without a request v5: Only create a switch to idle context if the ring doesn't already have a request pending on it (Alex Dai) Rename unsaved to dirty to avoid double negatives (Dave Gordon) Changed _no_req postfix to __ prefix for consistency (Dave Gordon) Split out per engine cleanup from context_free as it was getting unwieldy Corrected locking (Dave Gordon) v6: Removed some bikeshedding (Mika Kuoppala) Added explanation of the GuC hang that this fixes (Daniel Vetter) v7: Removed extra per request pinning from ring reset code (Alex Dai) Added forced ring unpin/clean in error case in context free (Alex Dai) v8: Renamed lrc specific last_context to lrc_last_context as there were some reset cases where the codepaths leaked (Mika Kuoppala) NULL'd last_context in reset case - there was a pointer leak if someone did reset->close context. v9: Rebase over "Fix context/engine cleanup order" Signed-off-by: Nick Hoath Issue: VIZ-4277 Cc: Daniel Vetter Cc: David Gordon Cc: Chris Wilson Cc: Alex Dai Cc: Mika Kuoppala --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 7 +- drivers/gpu/drm/i915/intel_lrc.c| 146 ++-- drivers/gpu/drm/i915/intel_lrc.h| 1 + drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 5 files changed, 124 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 27bb401..2f6f411 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -887,6 +887,7 @@ struct intel_context { struct { struct drm_i915_gem_object *state; struct intel_ringbuffer *ringbuf; + bool dirty; int pin_count; } engine[I915_NUM_RINGS]; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 517676a..69e904f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1362,6 +1362,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) { trace_i915_gem_request_retire(request); + if (i915.enable_execlists) + intel_lr_context_complete_check(request); + /* We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position * of tail of the request to update the last known position @@ -2772,10 +2775,6 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, struct drm_i915_gem_request, execlist_link); list_del(&submit_req->execlist_link); - - if (submit_req->ctx != ring->default_context) - intel_lr_context_unpin(submit_req); - i915_gem_request_unreference(submit_req); } spin_unlock_irq(&ring->execlist_lock); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d542a8d..01063f7 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -570,9 +570,6 @@ static int execlists_context_queue(struct drm_i915_gem_request *request) struct drm_i915_gem_request *cursor; int num_elements = 0; - if (request->ctx != ring->default_context) - intel_lr_context_pin(request); - i915_gem_request_reference(request); spin_lock_irq(&ring->execlist_lock); @@ -736,6 +733,13 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) if (intel_ring_stopped(ring)) return; + if (request->ctx != ring->default_context) { + if (!request->ctx->engine[ring->id].dirty) { + intel_lr_context_pin(request); + request->ctx->engine[ring->id].dirty = true; + } + } + if (dev_priv->guc.execbuf_client) i915_guc_submit(dev_priv->guc.execbuf_client, request); else @
Re: [Intel-gfx] [PATCH v3] drm/i915: resize the GuC WOPCM for rc6
On 06/05/2016 08:01, Peter Antoine wrote: On Thu, 5 May 2016, Dave Gordon wrote: On 05/05/2016 15:02, Antoine, Peter wrote: The attached version still does not explain that the WOPCM_TOP is to tell the GuC not to use that space. That's NOT what WOPCM_TOP means. The GuC is allowed to use the space up to the value stored in the GUC_WOPCM_SIZE register (as the comment above the #define says). Architecturally, this is allowed to be any value greater than (16K+sizeof internal SRAM (64, 128, or 256K)) and less than or equal to GUC_WOPCM_TOP (which is a platform-independent constant), so we normally choose the maximm allowed. Howver on BXT, we need to leave some space at the top for the RC6 image, hence the logic (and comments!) in guc_wopcm_size(). Yes, the firmware can use upto GUC_WOPCM_TOP and to leave the rest alone. The extra information does not aid anybody as the information is used internally within the GuC. It may help the next person who has to figure out what's gone wrong on some future chip that needs more than 64K for RC6! You hid a if statement in a function (making the code harder to read and more prone to error). Where maybe a slightly clearer comment was required. And this patch has been held up two weeks just for a better comment. Peter. .Dave. And what if the next reserved space is not for RC6? But, I have not actual objection to the patch. Peter. Tested-by: Nick Hoath Reviewed-by: Nick Hoath -- Peter Antoine (Android Graphics Driver Software Engineer) - Intel Corporation (UK) Limited Registered No. 1134945 (England) Registered Office: Pipers Way, Swindon SN3 1RJ VAT No: 860 2173 47 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3] drm/i915: resize the GuC WOPCM for rc6
On 05/05/2016 16:04, Dave Gordon wrote: On 05/05/2016 15:02, Antoine, Peter wrote: The attached version still does not explain that the WOPCM_TOP is to tell the GuC not to use that space. That's NOT what WOPCM_TOP means. The GuC is allowed to use the space up to the value stored in the GUC_WOPCM_SIZE register (as the comment above the #define says). Architecturally, this is allowed to be any value greater than (16K+sizeof internal SRAM (64, 128, or 256K)) and less than or equal to GUC_WOPCM_TOP (which is a platform-independent constant), so we normally choose the maximm allowed. Howver on BXT, we need to leave some space at the top for the RC6 image, hence the logic (and comments!) in guc_wopcm_size(). The extra information does not aid anybody as the information is used internally within the GuC. It may help the next person who has to figure out what's gone wrong on some future chip that needs more than 64K for RC6! .Dave. But, I have not actual objection to the patch. Peter. Unfortunately Dave's patch locked my test system on bootup, so I've t-b & r-b'd Peter's. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 0/2] drm/i915/guc: GuC firmware loading updates
Updates to Skylake firmware filename & support for loading Broxton firmware. Nick Hoath (1): drm/i915/guc: Add Broxton GuC firmware loading support Tom O'Rourke (1): drm/i915/guc: Use major_minor version for filename drivers/gpu/drm/i915/intel_guc_loader.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/2] drm/i915/guc: Use major_minor version for filename
From: Tom O'Rourke Load guc firmware from file with major_minor number in filename instead of using symolic link with only major number. This change is so that new firmwares can only be used with a kernel change. This in case there is a regression with a new firmware, it won't be used by default without some testing. Issue: VIZ-7713 Signed-off-by: Tom O'Rourke Signed-off-by: Nick Hoath Acked-by: Jani Nikula --- drivers/gpu/drm/i915/intel_guc_loader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 58dbe30..46b01d7 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -59,7 +59,7 @@ * */ -#define I915_SKL_GUC_UCODE "i915/skl_guc_ver6.bin" +#define I915_SKL_GUC_UCODE "i915/skl_guc_ver6_1.bin" MODULE_FIRMWARE(I915_SKL_GUC_UCODE); /* User-friendly representation of an enum */ -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/2] drm/i915/guc: Add Broxton GuC firmware loading support
Issue: VIZ-7772 Signed-off-by: Nick Hoath --- drivers/gpu/drm/i915/intel_guc_loader.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 46b01d7..d122e74 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -62,6 +62,9 @@ #define I915_SKL_GUC_UCODE "i915/skl_guc_ver6_1.bin" MODULE_FIRMWARE(I915_SKL_GUC_UCODE); +#define I915_BXT_GUC_UCODE "i915/bxt_guc_ver8_7.bin" +MODULE_FIRMWARE(I915_BXT_GUC_UCODE); + /* User-friendly representation of an enum */ const char *intel_guc_fw_status_repr(enum intel_guc_fw_status status) { @@ -644,6 +647,10 @@ void intel_guc_ucode_init(struct drm_device *dev) fw_path = I915_SKL_GUC_UCODE; guc_fw->guc_fw_major_wanted = 6; guc_fw->guc_fw_minor_wanted = 1; + } else if (IS_BROXTON(dev)) { + fw_path = I915_BXT_GUC_UCODE; + guc_fw->guc_fw_major_wanted = 8; + guc_fw->guc_fw_minor_wanted = 7; } else { i915.enable_guc_submission = false; fw_path = ""; /* unknown device */ -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] ✗ Fi.CI.BAT: warning for drm/i915/guc: GuC firmware loading updates
On 09/05/2016 08:53, Patchwork wrote: == Series Details == Series: drm/i915/guc: GuC firmware loading updates URL : https://patchwork.freedesktop.org/series/6818/ State : warning == Summary == Series 6818v1 drm/i915/guc: GuC firmware loading updates http://patchwork.freedesktop.org/api/1.0/series/6818/revisions/1/mbox/ Test core_auth: Subgroup basic-auth: pass -> DMESG-WARN (bdw-ultra) Already captured in: https://bugs.freedesktop.org/show_bug.cgi?id=92235 Test pm_rpm: Subgroup basic-pci-d3-state: pass -> SKIP (hsw-brixbox) Looks like a HW failure: Test requirement not met in function enable_one_screen, file pm_rpm.c:329: Test requirement: enable_one_screen_with_type(data, SCREEN_TYPE_ANY) bdw-nuci7-2 total:219 pass:206 dwarn:0 dfail:0 fail:0 skip:13 bdw-ultratotal:219 pass:192 dwarn:1 dfail:0 fail:0 skip:26 bsw-nuc-2total:218 pass:174 dwarn:0 dfail:0 fail:2 skip:42 byt-nuc total:218 pass:174 dwarn:0 dfail:0 fail:3 skip:41 hsw-brixbox total:219 pass:192 dwarn:0 dfail:0 fail:0 skip:27 hsw-gt2 total:219 pass:197 dwarn:0 dfail:0 fail:1 skip:21 ivb-t430stotal:219 pass:188 dwarn:0 dfail:0 fail:0 skip:31 skl-i7k-2total:219 pass:191 dwarn:0 dfail:0 fail:0 skip:28 skl-nuci5total:219 pass:207 dwarn:0 dfail:0 fail:0 skip:12 snb-dellxps total:37 pass:27 dwarn:0 dfail:0 fail:0 skip:9 snb-x220ttotal:219 pass:176 dwarn:0 dfail:0 fail:1 skip:42 Results at /archive/results/CI_IGT_test/Patchwork_2146/ 447f2438e5ee3e9bb7d5cbe88fc44cdff74cc165 drm-intel-nightly: 2016y-05m-08d-16h-21m-00s UTC integration manifest b8d1e93 drm/i915/guc: Add Broxton GuC firmware loading support 9b4a018 drm/i915/guc: Use major_minor version for filename ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3] drm/i915: resize the GuC WOPCM for rc6
On 06/05/2016 13:18, Gordon, David S wrote: On 06/05/16 10:37, Nick Hoath wrote: On 05/05/2016 16:04, Dave Gordon wrote: On 05/05/2016 15:02, Antoine, Peter wrote: The attached version still does not explain that the WOPCM_TOP is to tell the GuC not to use that space. That's NOT what WOPCM_TOP means. The GuC is allowed to use the space up to the value stored in the GUC_WOPCM_SIZE register (as the comment above the #define says). Architecturally, this is allowed to be any value greater than (16K+sizeof internal SRAM (64, 128, or 256K)) and less than or equal to GUC_WOPCM_TOP (which is a platform-independent constant), so we normally choose the maximm allowed. Howver on BXT, we need to leave some space at the top for the RC6 image, hence the logic (and comments!) in guc_wopcm_size(). The extra information does not aid anybody as the information is used internally within the GuC. It may help the next person who has to figure out what's gone wrong on some future chip that needs more than 64K for RC6! .Dave. But, I have not actual objection to the patch. Peter. Unfortunately Dave's patch locked my test system on bootup, so I've t-b & r-b'd Peter's. They're equivalent, unless your firmware happens to be between 458752 and 491520 bytes in size (in which case you have a problem anyway). To check, I've run both versions, with debug printing the value chosen (on SKL) and the value that would have been chosen on BXT, and they're identical (and both work). So I think your build had some other problem unrelated to the specific patch. I've no problem with using Peter's patch for now, but it's not just a matter of the comments; there's also the other use(s) of GUC_WOP_(TOP,SIZE_VALUE), with ad-hoc additions or subtractions. So it still needs fixing properly. .Dave. After a rebuild & a retest, Dave's patch works fine. Therefore for "drm/i915/bxt: reserve space for RC6 in the the GuC WOPCM": Tested-by: Nick Hoath Reviewed-by: Nick Hoath ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v6 3/7] drm/i915/guc: add enable_guc_loading parameter
On 20/05/2016 11:42, Tvrtko Ursulin wrote: From: Dave Gordon Split the function of "enable_guc_submission" into two separate options. The new one ("enable_guc_loading") controls only the *fetching and loading* of the GuC firmware image. The existing one is redefined to control only the *use* of the GuC for batch submission once the firmware is loaded. In addition, the degree of control has been refined from a simple bool to an integer key, allowing several options: -1 (default) whatever the platform default is 0 DISABLE don't load/use the GuC 1 BEST EFFORT try to load/use the GuC, fallback if not available 2 REQUIRE must load/use the GuC, else leave the GPU wedged The new platform default (as coded here) will be to attempt to load the GuC iff the device has a GuC that requires firmware, but not yet to use it for submission. A later patch will change to enable it if appropriate. v4: Changed some error-message levels, mostly ERROR->INFO, per review comments by Tvrtko Ursulin. v5: Dropped one more error message, disabled GuC submission on hypothetical firmware-free devices [Tvrtko Ursulin]. v6: Logging tidy by Tvrtko Ursulin: * Do not log falling back to execlists when wedging the GPU. * Do not log fw load errors when load was disabled by user. * Pass down some error code from fw load for log message to make more sense. Signed-off-by: Dave Gordon Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin (v5) Signed-off-by: Tvrtko Ursulin Reviewed-by: Nick Hoath (v6) --- drivers/gpu/drm/i915/i915_gem.c| 5 +- drivers/gpu/drm/i915/i915_guc_submission.c | 4 +- drivers/gpu/drm/i915/i915_params.c | 14 +++- drivers/gpu/drm/i915/i915_params.h | 3 +- drivers/gpu/drm/i915/intel_guc_loader.c| 123 + 5 files changed, 89 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 88dce5482f2f..1a3a07eca0d0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4868,11 +4868,8 @@ i915_gem_init_hw(struct drm_device *dev) /* We can't enable contexts until all firmware is loaded */ if (HAS_GUC(dev)) { ret = intel_guc_setup(dev); - if (ret) { - DRM_ERROR("Failed to initialize GuC, error %d\n", ret); - ret = -EIO; + if (ret) goto out; - } } /* diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 169242a8adff..916cd6778cf3 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -969,7 +969,7 @@ int intel_guc_suspend(struct drm_device *dev) struct intel_context *ctx; u32 data[3]; - if (!i915.enable_guc_submission) + if (guc->guc_fw.guc_fw_load_status != GUC_FIRMWARE_SUCCESS) return 0; ctx = dev_priv->kernel_context; @@ -995,7 +995,7 @@ int intel_guc_resume(struct drm_device *dev) struct intel_context *ctx; u32 data[3]; - if (!i915.enable_guc_submission) + if (guc->guc_fw.guc_fw_load_status != GUC_FIRMWARE_SUCCESS) return 0; ctx = dev_priv->kernel_context; diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index cd74fb8e9387..21a323c01cdb 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -53,7 +53,8 @@ struct i915_params i915 __read_mostly = { .verbose_state_checks = 1, .nuclear_pageflip = 0, .edp_vswing = 0, - .enable_guc_submission = false, + .enable_guc_loading = -1, + .enable_guc_submission = 0, .guc_log_level = -1, .enable_dp_mst = true, .inject_load_failure = 0, @@ -193,8 +194,15 @@ MODULE_PARM_DESC(edp_vswing, "(0=use value from vbt [default], 1=low power swing(200mV)," "2=default swing(400mV))"); -module_param_named_unsafe(enable_guc_submission, i915.enable_guc_submission, bool, 0400); -MODULE_PARM_DESC(enable_guc_submission, "Enable GuC submission (default:false)"); +module_param_named_unsafe(enable_guc_loading, i915.enable_guc_loading, int, 0400); +MODULE_PARM_DESC(enable_guc_loading, + "Enable GuC firmware loading " + "(-1=auto [default], 0=never, 1=if available, 2=required)"); + +module_param_named_unsafe(enable_guc_submission, i915.enable_guc_submission, int, 0400); +MODULE_PARM_DESC(enable_guc_submission, + "Enable GuC submission " + "(-1=auto, 0=never [default], 1=if available, 2=required)"); module_param_named(guc_log_level, i915.guc_log
[Intel-gfx] [PATCH 0/4] lrc lifecycle cleanups
These changes are a result of the requests made in VIZ-4277. Make the lrc path more like the legacy submission path. Attach the CPU mappings to vma (un)bind, so that the shrinker also cleans those up. Pin the CPU mappings while context is busy (pending bbs), so that the mappings aren't released/made continuously as this is an expensive process. Nick Hoath (4): drm/i915: Unify execlist and legacy request life-cycles drm/i915: Improve dynamic management/eviction of lrc backing objects drm/i915: Add the CPU mapping of the hw context to the pinned items. drm/i915: Only update ringbuf address when necessary drivers/gpu/drm/i915/i915_debugfs.c | 14 ++-- drivers/gpu/drm/i915/i915_drv.h | 14 +++- drivers/gpu/drm/i915/i915_gem.c | 70 + drivers/gpu/drm/i915/i915_gem_gtt.c | 8 ++ drivers/gpu/drm/i915/i915_irq.c | 81 +--- drivers/gpu/drm/i915/intel_lrc.c| 131 ++-- drivers/gpu/drm/i915/intel_lrc.h| 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 71 + drivers/gpu/drm/i915/intel_ringbuffer.h | 4 - 9 files changed, 250 insertions(+), 145 deletions(-) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/4] drm/i915: Improve dynamic management/eviction of lrc backing objects
Shovel all context related objects through the active queue and obj management. - Added callback in vma_(un)bind to add CPU (un)mapping at same time if desired - Inserted LRC hw context & ringbuf to vma active list Issue: VIZ-4277 Signed-off-by: Nick Hoath --- drivers/gpu/drm/i915/i915_drv.h | 4 ++ drivers/gpu/drm/i915/i915_gem.c | 3 ++ drivers/gpu/drm/i915/i915_gem_gtt.c | 8 drivers/gpu/drm/i915/intel_lrc.c| 28 +++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 71 ++--- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 -- 6 files changed, 79 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3d217f9..d660ee3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2169,6 +2169,10 @@ struct drm_i915_gem_object { struct work_struct *work; } userptr; }; + + /** Support for automatic CPU side mapping of object */ + int (*mmap)(struct drm_i915_gem_object *obj, bool unmap); + void *mappable; }; #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fc82171..56e0e00 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3262,6 +3262,9 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait) if (vma->pin_count) return -EBUSY; + if (obj->mmap) + obj->mmap(obj, true); + BUG_ON(obj->pages == NULL); if (wait) { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 620d57e..786ec4b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3495,6 +3495,14 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, vma->bound |= bind_flags; + if (vma->obj->mmap) { + ret = vma->obj->mmap(vma->obj, false); + if (ret) { + i915_vma_unbind(vma); + return ret; + } + } + return 0; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e8f5b6c..b807928 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -723,6 +723,18 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) intel_logical_ring_advance(request->ringbuf); + /* Push the hw context on to the active list */ + i915_vma_move_to_active( + i915_gem_obj_to_ggtt( + request->ctx->engine[ring->id].state), + request); + + /* Push the ringbuf on to the active list */ + i915_vma_move_to_active( + i915_gem_obj_to_ggtt( + request->ctx->engine[ring->id].ringbuf->obj), + request); + request->tail = request->ringbuf->tail; if (intel_ring_stopped(ring)) @@ -1006,10 +1018,15 @@ static int intel_lr_context_do_pin(struct intel_engine_cs *ring, if (ret) return ret; - ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf); + ret = i915_gem_obj_ggtt_pin(ringbuf->obj, PAGE_SIZE, + PIN_MAPPABLE); if (ret) goto unpin_ctx_obj; + ret = i915_gem_object_set_to_gtt_domain(ringbuf->obj, true); + if (ret) + goto unpin_rb_obj; + ctx_obj->dirty = true; /* Invalidate GuC TLB. */ @@ -1018,6 +1035,8 @@ static int intel_lr_context_do_pin(struct intel_engine_cs *ring, return ret; +unpin_rb_obj: + i915_gem_object_ggtt_unpin(ringbuf->obj); unpin_ctx_obj: i915_gem_object_ggtt_unpin(ctx_obj); @@ -1052,7 +1071,7 @@ void intel_lr_context_unpin(struct drm_i915_gem_request *rq) if (ctx_obj) { WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex)); if (--rq->ctx->engine[ring->id].pin_count == 0) { - intel_unpin_ringbuffer_obj(ringbuf); + i915_gem_object_ggtt_unpin(ringbuf->obj); i915_gem_object_ggtt_unpin(ctx_obj); } } @@ -2369,7 +2388,7 @@ void intel_lr_context_free(struct intel_context *ctx) struct intel_engine_cs *ring = ringbuf->ring; if (ctx == ring->default_context) { - intel_unpin_ringbuffer_obj(ringbuf); + i915_gem_object_ggtt_unpin(ringbuf->obj); i915_gem_object_ggtt_unpin(ctx_obj); }
[Intel-gfx] [PATCH 3/4] drm/i915: Add the CPU mapping of the hw context to the pinned items.
Pin the hw ctx mapping so that it is not mapped/unmapped per bb when doing GuC submission. Issue: VIZ-4277 Cc: David Gordon Signed-off-by: Nick Hoath --- drivers/gpu/drm/i915/i915_debugfs.c | 14 -- drivers/gpu/drm/i915/i915_drv.h | 4 ++- drivers/gpu/drm/i915/intel_lrc.c| 56 +++-- 3 files changed, 50 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 3f2a7a7..e68cf5fa 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1970,10 +1970,9 @@ static int i915_context_status(struct seq_file *m, void *unused) static void i915_dump_lrc_obj(struct seq_file *m, struct intel_engine_cs *ring, - struct drm_i915_gem_object *ctx_obj) + struct drm_i915_gem_object *ctx_obj, + uint32_t *reg_state) { - struct page *page; - uint32_t *reg_state; int j; unsigned long ggtt_offset = 0; @@ -1996,17 +1995,13 @@ static void i915_dump_lrc_obj(struct seq_file *m, return; } - page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); - if (!WARN_ON(page == NULL)) { - reg_state = kmap_atomic(page); - + if (!WARN_ON(reg_state == NULL)) { for (j = 0; j < 0x600 / sizeof(u32) / 4; j += 4) { seq_printf(m, "\t[0x%08lx] 0x%08x 0x%08x 0x%08x 0x%08x\n", ggtt_offset + 4096 + (j * 4), reg_state[j], reg_state[j + 1], reg_state[j + 2], reg_state[j + 3]); } - kunmap_atomic(reg_state); } seq_putc(m, '\n'); @@ -2034,7 +2029,8 @@ static int i915_dump_lrc(struct seq_file *m, void *unused) for_each_ring(ring, dev_priv, i) { if (ring->default_context != ctx) i915_dump_lrc_obj(m, ring, - ctx->engine[i].state); + ctx->engine[i].state, + ctx->engine[i].reg_state); } } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d660ee3..b49fd12 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -879,8 +879,10 @@ struct intel_context { } legacy_hw_ctx; /* Execlists */ - struct { + struct intel_context_engine { struct drm_i915_gem_object *state; + uint32_t *reg_state; + struct page *page; struct intel_ringbuffer *ringbuf; int pin_count; } engine[I915_NUM_RINGS]; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index b807928..55a4de56 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -360,16 +360,13 @@ static int execlists_update_context(struct drm_i915_gem_request *rq) struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt; struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state; struct drm_i915_gem_object *rb_obj = rq->ringbuf->obj; - struct page *page; - uint32_t *reg_state; + uint32_t *reg_state = rq->ctx->engine[ring->id].reg_state; BUG_ON(!ctx_obj); + WARN_ON(!reg_state); WARN_ON(!i915_gem_obj_is_pinned(ctx_obj)); WARN_ON(!i915_gem_obj_is_pinned(rb_obj)); - page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); - reg_state = kmap_atomic(page); - reg_state[CTX_RING_TAIL+1] = rq->tail; reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj); @@ -385,8 +382,6 @@ static int execlists_update_context(struct drm_i915_gem_request *rq) ASSIGN_CTX_PDP(ppgtt, reg_state, 0); } - kunmap_atomic(reg_state); - return 0; } @@ -1004,7 +999,31 @@ int logical_ring_flush_all_caches(struct drm_i915_gem_request *req) return 0; } -static int intel_lr_context_do_pin(struct intel_engine_cs *ring, +static int intel_mmap_hw_context(struct drm_i915_gem_object *obj, + bool unmap) +{ + int ret = 0; + struct intel_context_engine *ice = + (struct intel_context_engine *)obj->mappable; + struct page *page; + uint32_t *reg_state; + + if (unmap) { + kunmap(ice->page); + ice->reg_state = NULL; + ice->page = NULL; + } else { + page = i915_gem_object_get_page(obj, LRC_STATE_PN); + reg_state = kmap(page); + ice->reg_state = reg_stat
[Intel-gfx] [PATCH 4/4] drm/i915: Only update ringbuf address when necessary
We now only need to update the address of the ringbuf object in the hw context when it is pinned, and the hw context is first CPU mapped Issue: VIZ-4277 Cc: David Gordon Signed-off-by: Nick Hoath --- drivers/gpu/drm/i915/intel_lrc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 55a4de56..92a0ece 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -368,7 +368,6 @@ static int execlists_update_context(struct drm_i915_gem_request *rq) WARN_ON(!i915_gem_obj_is_pinned(rb_obj)); reg_state[CTX_RING_TAIL+1] = rq->tail; - reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj); if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { /* True 32b PPGTT with dynamic page allocation: update PDP @@ -1046,6 +1045,9 @@ static int intel_lr_context_do_pin( if (ret) goto unpin_rb_obj; + ctx->engine[ring->id].reg_state[CTX_RING_BUFFER_START+1] = + i915_gem_obj_ggtt_offset(ringbuf->obj); + ctx_obj->dirty = true; /* Invalidate GuC TLB. */ -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/4] drm/i915: Unify execlist and legacy request life-cycles
There is a desire to simplify the i915 driver by reducing the number of different code paths introduced by the LRC / execlists support. As the execlists request is now part of the gem request it is possible and desirable to unify the request life-cycles for execlist and legacy requests. Added a context complete flag to a request which gets set during the context switch interrupt. Added a function i915_gem_request_retireable(). A request is considered retireable if its seqno passed (i.e. the request has completed) and either it was never submitted to the ELSP or its context completed. This ensures that context save is carried out before the last request for a context is considered retireable. retire_requests_ring() now uses i915_gem_request_retireable() rather than request_complete() when deciding which requests to retire. Requests that were not waiting for a context switch interrupt (either as a result of being merged into a following request or by being a legacy request) will be considered retireable as soon as their seqno has passed. Removed the extra request reference held for the execlist request. Removed intel_execlists_retire_requests() and all references to intel_engine_cs.execlist_retired_req_list. Moved context unpinning into retire_requests_ring() for now. Further work is pending for the context pinning - this patch should allow us to use the active list to track context and ring buffer objects later. Changed gen8_cs_irq_handler() so that notify_ring() is called when contexts complete as well as when a user interrupt occurs so that notification happens when a request is complete and context save has finished. v2: Rebase over the read-read optimisation changes v3: Reworked IRQ handler after removing IRQ handler cleanup patch v4: Fixed various pin leaks Issue: VIZ-4277 Signed-off-by: Thomas Daniel Signed-off-by: Nick Hoath --- drivers/gpu/drm/i915/i915_drv.h | 6 +++ drivers/gpu/drm/i915/i915_gem.c | 67 +-- drivers/gpu/drm/i915/i915_irq.c | 81 + drivers/gpu/drm/i915/intel_lrc.c| 43 +++-- drivers/gpu/drm/i915/intel_lrc.h| 2 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - 6 files changed, 118 insertions(+), 82 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index fbf0ae9..3d217f9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2262,6 +2262,12 @@ struct drm_i915_gem_request { /** Execlists no. of times this request has been sent to the ELSP */ int elsp_submitted; + /** +* Execlists: whether this requests's context has completed after +* submission to the ELSP +*/ + bool ctx_complete; + }; int i915_gem_request_alloc(struct intel_engine_cs *ring, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 52642af..fc82171 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1386,6 +1386,24 @@ __i915_gem_request_retire__upto(struct drm_i915_gem_request *req) typeof(*tmp), list); i915_gem_request_retire(tmp); + + if (i915.enable_execlists) { + struct intel_context *ctx = tmp->ctx; + struct drm_i915_private *dev_priv = + engine->dev->dev_private; + unsigned long flags; + struct drm_i915_gem_object *ctx_obj = + ctx->engine[engine->id].state; + + spin_lock_irqsave(&engine->execlist_lock, flags); + + if (ctx_obj && (ctx != engine->default_context)) + intel_lr_context_unpin(tmp); + + intel_runtime_pm_put(dev_priv); + spin_unlock_irqrestore(&engine->execlist_lock, flags); + } + } while (tmp != req); WARN_ON(i915_verify_lists(engine->dev)); @@ -2359,6 +2377,12 @@ void i915_vma_move_to_active(struct i915_vma *vma, list_move_tail(&vma->mm_list, &vma->vm->active_list); } +static bool i915_gem_request_retireable(struct drm_i915_gem_request *req) +{ + return (i915_gem_request_completed(req, true) && + (!req->elsp_submitted || req->ctx_complete)); +} + static void i915_gem_object_retire__write(struct drm_i915_gem_object *obj) { @@ -2829,10 +2853,28 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) struct drm_i915_gem_request, list); - if (!i915_gem_request_completed(request, true)) + if (!i915_gem_request_retireable(request)) break;
Re: [Intel-gfx] [PATCH 1/4] drm/i915: Unify execlist and legacy request life-cycles
On 14/10/2015 15:42, Dave Gordon wrote: On 13/10/15 12:36, Chris Wilson wrote: On Tue, Oct 13, 2015 at 01:29:56PM +0200, Daniel Vetter wrote: On Fri, Oct 09, 2015 at 06:23:50PM +0100, Chris Wilson wrote: On Fri, Oct 09, 2015 at 07:18:21PM +0200, Daniel Vetter wrote: On Fri, Oct 09, 2015 at 10:45:35AM +0100, Chris Wilson wrote: On Fri, Oct 09, 2015 at 11:15:08AM +0200, Daniel Vetter wrote: My idea was to create a new request for 3. which gets signalled by the scheduler in intel_lrc_irq_handler. My idea was that we'd only create these when a ctx switch might occur to avoid overhead, but I guess if we just outright delay all requests a notch if need that might work too. But I'm really not sure on the implications of that (i.e. does the hardware really unlod the ctx if it's idle?), and whether that would fly still with the scheduler. But figuring this one out here seems to be the cornestone of this reorg. Without it we can't just throw contexts onto the active list. (Let me see if I understand it correctly) Basically the problem is that we can't trust the context object to be synchronized until after the status interrupt. The way we handled that for legacy is to track the currently bound context and keep the vma->pin_count asserted until the request containing the switch away. Doing the same for execlists would trivially fix the issue and if done smartly allows us to share more code (been there, done that). That satisfies me for keeping requests as a basic fence in the GPU timeline and should keep everyone happy that the context can't vanish until after it is complete. The only caveat is that we cannot evict the most recent context. For legacy, we do a switch back to the always pinned default context. For execlists we don't, but it still means we should only have one context which cannot be evicted (like legacy). But it does leave us with the issue that i915_gpu_idle() returns early and i915_gem_context_fini() must keep the explicit gpu reset to be absolutely sure that the pending context writes are completed before the final context is unbound. Yes, and that was what I originally had in mind. Meanwhile the scheduler (will) happen and that means we won't have FIFO ordering. Which means when we switch contexts (as opposed to just adding more to the ringbuffer of the current one) we won't have any idea which context will be the next one. Which also means we don't know which request to pick to retire the old context. Hence why I think we need to be better. But the scheduler does - it is also in charge of making sure the retirement queue is in order. The essence is that we only actually pin engine->last_context, which is chosen as we submit stuff to the hw. Well I'm not sure how much it will reorder, but I'd expect it wants to reorder stuff pretty freely. And as soon as it reorders context (ofc they can't depend on each another) then the legacy hw ctx tracking won't work. I think at least ... Not the way it is written today, but the principle behind it still stands. The last_context submitted to the hardware is pinned until a new one is submitted (such that it remains bound in the GGTT until after the context switch is complete due to the active reference). Instead of doing the context tracking at the start of the execbuffer, the context tracking needs to be pushed down to the submission backend/middleman. -Chris Does anyone actually know what guarantees (if any) the GPU provides w.r.t access to context images vs. USER_INTERRUPTs and CSB-updated interrupts? Does 'active->idle' really mean that the context has been fully updated in memory (and can therefore be unmapped), or just that the engine has stopped processing (but the context might not be saved until it's known that it isn't going to be reactivated). For example, it could implement this: (End of last batch in current context) 1. Update seqno 2. Generate USER_INTERRUPT 3. Engine finishes work (HEAD == TAIL and no further contexts queued in ELSP) 4. Save all per-context registers to context image 5. Flush to memory and invalidate 6. Update CSB 7. Flush to memory 8. Generate CSB-update interrupt. (New batch in same context submitted via ELSP) 9. Reload entire context image from memory 10. Update CSB 11. Generate CSB-update interrupt Or this: 1. Update seqno 2. Generate USER_INTERRUPT 3. Engine finishes work (HEAD == TAIL and no further contexts queued in ELSP) 4. Update CSB 5. Generate CSB-update interrupt. (New batch in DIFFERENT context submitted via ELSP) 6. Save all per-context registers to old context image 7. Load entire context image from new image 8. Update CSB 9. Generate CSB-update interrupt The former is synchronous and relatively easy to model, the latter is more like the way le
Re: [Intel-gfx] [PATCH 2/4] drm/i915: Improve dynamic management/eviction of lrc backing objects
On 08/10/2015 14:35, Chris Wilson wrote: On Wed, Oct 07, 2015 at 06:05:46PM +0200, Daniel Vetter wrote: On Tue, Oct 06, 2015 at 03:52:02PM +0100, Nick Hoath wrote: Shovel all context related objects through the active queue and obj management. - Added callback in vma_(un)bind to add CPU (un)mapping at same time if desired - Inserted LRC hw context & ringbuf to vma active list Issue: VIZ-4277 Signed-off-by: Nick Hoath --- drivers/gpu/drm/i915/i915_drv.h | 4 ++ drivers/gpu/drm/i915/i915_gem.c | 3 ++ drivers/gpu/drm/i915/i915_gem_gtt.c | 8 drivers/gpu/drm/i915/intel_lrc.c| 28 +++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 71 ++--- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 -- 6 files changed, 79 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3d217f9..d660ee3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2169,6 +2169,10 @@ struct drm_i915_gem_object { struct work_struct *work; } userptr; }; + + /** Support for automatic CPU side mapping of object */ + int (*mmap)(struct drm_i915_gem_object *obj, bool unmap); I don't think we need a map hook, that can still be done (if not done so I disagree - this keeps the interface symmetrical. Searching for the do/undo code paths and finding they are in difference places, called via different routes makes code harder to follow. already) by the callers. Also it's better to rename this to vma_unbind (and it should be at the vma level I think) since there's other potential Nope - the obj is created first, at a point where the map/unamp function can be known. Moving the map/unmap to the vma would mean having a callback path to the object just to set up the callback path when the vma is created anonymously at some later point. users. So explicit maping, lazy unmapping for the kmaps we need. That's the same design we're using for binding objects into gpu address spaces. Also Chris Wilson has something similar, please align with him on the precise design of this callback. We need the unbind hook because of the movement in the first patch (it is a separate issue, the code should work without it albeit having to remap the ring/context state more often). The changelog in this patch simply explains the i915_vma_move_to_active() additions. But to get the shrink accurate we do need the context unpin on retirement and to do the pin_count check in i915_vma_unbind() after waiting (rather than before, as we currently do). However, the eviction code will not inspect the active contexts objects yet (as it will continue to skip over the ggtt->pin_count on them). The way I allowed ctx objects to be evicted was to only keep the ctx->state pinned for the duration of the request construction. Note that I think it should be a vma->unbind hook not an object level one (it is i915_vma_unbind, without only a modicum of object level state being modified in that function). -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 2/4] drm/i915: Improve dynamic management/eviction of lrc backing objects
On 19/10/2015 10:48, Daniel Vetter wrote: On Fri, Oct 16, 2015 at 03:42:53PM +0100, Nick Hoath wrote: On 08/10/2015 14:35, Chris Wilson wrote: On Wed, Oct 07, 2015 at 06:05:46PM +0200, Daniel Vetter wrote: On Tue, Oct 06, 2015 at 03:52:02PM +0100, Nick Hoath wrote: Shovel all context related objects through the active queue and obj management. - Added callback in vma_(un)bind to add CPU (un)mapping at same time if desired - Inserted LRC hw context & ringbuf to vma active list Issue: VIZ-4277 Signed-off-by: Nick Hoath --- drivers/gpu/drm/i915/i915_drv.h | 4 ++ drivers/gpu/drm/i915/i915_gem.c | 3 ++ drivers/gpu/drm/i915/i915_gem_gtt.c | 8 drivers/gpu/drm/i915/intel_lrc.c| 28 +++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 71 ++--- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 -- 6 files changed, 79 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3d217f9..d660ee3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2169,6 +2169,10 @@ struct drm_i915_gem_object { struct work_struct *work; } userptr; }; + + /** Support for automatic CPU side mapping of object */ + int (*mmap)(struct drm_i915_gem_object *obj, bool unmap); I don't think we need a map hook, that can still be done (if not done so I disagree - this keeps the interface symmetrical. Searching for the do/undo code paths and finding they are in difference places, called via different routes makes code harder to follow. already) by the callers. Also it's better to rename this to vma_unbind (and it should be at the vma level I think) since there's other potential Nope - the obj is created first, at a point where the map/unamp function can be known. Moving the map/unmap to the vma would mean having a callback path to the object just to set up the callback path when the vma is created anonymously at some later point. One of the plans for this is to also use it for to-be-unpinned framebuffers (4k buffers are huge ...). And in that case the unmap hook only, and on the vma is the design we want. And since it also seems to accomodate all the other users I do think it's the right choice. I refer you to these words found on the mail list. The may be familiar: As a rule of thumb for refactoring and share infastructure we use the following recipe in drm: - first driver implements things as straightforward as possible - 2nd user copypastes - 3rd one has the duty to figure out whether some refactoring is in order or not. The code as I have written it works best and simplest for my use case. If someone else wants to refactor it differently to shoe horn in their use case, that's up to them. Like I said, explicit setup and lazy, implicit cleanup is kinda how a lot of things in gem work. The most dangerous phrase in the language is ‘we’ve always done it this way.’ - Grace Hopper -Daniel users. So explicit maping, lazy unmapping for the kmaps we need. That's the same design we're using for binding objects into gpu address spaces. Also Chris Wilson has something similar, please align with him on the precise design of this callback. We need the unbind hook because of the movement in the first patch (it is a separate issue, the code should work without it albeit having to remap the ring/context state more often). The changelog in this patch simply explains the i915_vma_move_to_active() additions. But to get the shrink accurate we do need the context unpin on retirement and to do the pin_count check in i915_vma_unbind() after waiting (rather than before, as we currently do). However, the eviction code will not inspect the active contexts objects yet (as it will continue to skip over the ggtt->pin_count on them). The way I allowed ctx objects to be evicted was to only keep the ctx->state pinned for the duration of the request construction. Note that I think it should be a vma->unbind hook not an object level one (it is i915_vma_unbind, without only a modicum of object level state being modified in that function). -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 3/6] drm/i915: Unify execlist and legacy request life-cycles
There is a desire to simplify the i915 driver by reducing the number of different code paths introduced by the LRC / execlists support. As the execlists request is now part of the gem request it is possible and desirable to unify the request life-cycles for execlist and legacy requests. A request is considered retireable if its seqno passed (i.e. the request has completed) and either it was never submitted to the ELSP or its context completed. This ensures that context save is carried out before the last request for a context is considered retireable. request_complete() now checks the elsp_submitted count when deciding if a request is complete. Requests that were not waiting for a context switch interrupt (either as a result of being merged into a following request or by being a legacy request) will be considered retireable as soon as their seqno has passed. Removed the extra request reference held for the execlist request. Removed intel_execlists_retire_requests() and all references to intel_engine_cs.execlist_retired_req_list. Changed gen8_cs_irq_handler() so that notify_ring() is called when contexts complete as well as when a user interrupt occurs so that notification happens when a request is complete and context save has finished. v2: Rebase over the read-read optimisation changes v3: Reworked IRQ handler after removing IRQ handler cleanup patch v4: Fixed various pin leaks v5: Removed ctx_complete flag & associated changes. Removed extraneous request pin of context. (Chris Wilson/Daniel Vetter) Issue: VIZ-4277 Signed-off-by: Thomas Daniel Signed-off-by: Nick Hoath Cc: Daniel Vetter Cc: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 23 - drivers/gpu/drm/i915/i915_irq.c | 7 ++--- drivers/gpu/drm/i915/intel_lrc.c| 45 - drivers/gpu/drm/i915/intel_lrc.h| 2 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - 6 files changed, 21 insertions(+), 59 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8afda45..ae08e57 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2914,7 +2914,7 @@ static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, seqno = req->ring->get_seqno(req->ring, lazy_coherency); - return i915_seqno_passed(seqno, req->seqno); + return i915_seqno_passed(seqno, req->seqno) && !req->elsp_submitted; } int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e57061a..290a1ac 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2848,12 +2848,16 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) if (!list_empty(&obj->last_read_req[ring->id]->list)) break; + if (!i915_gem_request_completed(obj->last_read_req[ring->id], + true)) + break; i915_gem_object_retire__read(obj, ring->id); } if (unlikely(ring->trace_irq_req && -i915_gem_request_completed(ring->trace_irq_req, true))) { +i915_gem_request_completed(ring->trace_irq_req, + true))) { ring->irq_put(ring); i915_gem_request_assign(&ring->trace_irq_req, NULL); } @@ -2872,15 +2876,6 @@ i915_gem_retire_requests(struct drm_device *dev) for_each_ring(ring, dev_priv, i) { i915_gem_retire_requests_ring(ring); idle &= list_empty(&ring->request_list); - if (i915.enable_execlists) { - unsigned long flags; - - spin_lock_irqsave(&ring->execlist_lock, flags); - idle &= list_empty(&ring->execlist_queue); - spin_unlock_irqrestore(&ring->execlist_lock, flags); - - intel_execlists_retire_requests(ring); - } } if (idle) @@ -2956,12 +2951,14 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj) if (req == NULL) continue; - if (list_empty(&req->list)) - goto retire; + if (list_empty(&req->list)) { + if (i915_gem_request_completed(req, true)) + i915_gem_object_retire__read(obj, i); + continue; + } if (i915_gem_request_completed(req, true)) { __i915_gem_request_retire__upto(req); -retire: i915_gem_object
[Intel-gfx] [PATCH 4/6] drm/i915: Improve dynamic management/eviction of lrc backing objects
Shovel all context related objects through the active queue and obj management. - Added callback in vma_(un)bind to add CPU (un)mapping at same time if desired - Inserted LRC hw context & ringbuf to vma active list Issue: VIZ-4277 Signed-off-by: Nick Hoath --- drivers/gpu/drm/i915/i915_drv.h | 4 ++ drivers/gpu/drm/i915/i915_gem.c | 3 ++ drivers/gpu/drm/i915/i915_gem_gtt.c | 8 drivers/gpu/drm/i915/intel_lrc.c| 28 +++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 71 ++--- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 -- 6 files changed, 79 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ae08e57..0dd4ace 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2161,6 +2161,10 @@ struct drm_i915_gem_object { struct work_struct *work; } userptr; }; + + /** Support for automatic CPU side mapping of object */ + int (*mmap)(struct drm_i915_gem_object *obj, bool unmap); + void *mappable; }; #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 290a1ac..8bd318a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3222,6 +3222,9 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait) if (vma->pin_count) return -EBUSY; + if (obj->mmap) + obj->mmap(obj, true); + BUG_ON(obj->pages == NULL); if (wait) { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 43f35d1..2812757 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3517,6 +3517,14 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, vma->bound |= bind_flags; + if (vma->obj->mmap) { + ret = vma->obj->mmap(vma->obj, false); + if (ret) { + i915_vma_unbind(vma); + return ret; + } + } + return 0; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8428ebd..069950e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -719,6 +719,18 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) intel_logical_ring_advance(request->ringbuf); + /* Push the hw context on to the active list */ + i915_vma_move_to_active( + i915_gem_obj_to_ggtt( + request->ctx->engine[ring->id].state), + request); + + /* Push the ringbuf on to the active list */ + i915_vma_move_to_active( + i915_gem_obj_to_ggtt( + request->ctx->engine[ring->id].ringbuf->obj), + request); + request->tail = request->ringbuf->tail; if (intel_ring_stopped(ring)) @@ -987,10 +999,15 @@ static int intel_lr_context_do_pin(struct intel_engine_cs *ring, if (ret) return ret; - ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf); + ret = i915_gem_obj_ggtt_pin(ringbuf->obj, PAGE_SIZE, + PIN_MAPPABLE); if (ret) goto unpin_ctx_obj; + ret = i915_gem_object_set_to_gtt_domain(ringbuf->obj, true); + if (ret) + goto unpin_rb_obj; + ctx_obj->dirty = true; /* Invalidate GuC TLB. */ @@ -999,6 +1016,8 @@ static int intel_lr_context_do_pin(struct intel_engine_cs *ring, return ret; +unpin_rb_obj: + i915_gem_object_ggtt_unpin(ringbuf->obj); unpin_ctx_obj: i915_gem_object_ggtt_unpin(ctx_obj); @@ -1033,7 +1052,7 @@ void intel_lr_context_unpin(struct drm_i915_gem_request *rq) if (ctx_obj) { WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex)); if (--rq->ctx->engine[ring->id].pin_count == 0) { - intel_unpin_ringbuffer_obj(ringbuf); + i915_gem_object_ggtt_unpin(ringbuf->obj); i915_gem_object_ggtt_unpin(ctx_obj); } } @@ -2351,7 +2370,7 @@ void intel_lr_context_free(struct intel_context *ctx) struct intel_engine_cs *ring = ringbuf->ring; if (ctx == ring->default_context) { - intel_unpin_ringbuffer_obj(ringbuf); + i915_gem_object_ggtt_unpin(ringbuf->obj); i915_gem_object_ggtt_unpin(ctx_obj); } W
[Intel-gfx] [PATCH 1/6] drm/i195: Rename gt_irq_handler variable
Renamed tmp variable to the more descriptive iir. (Daniel Vetter/ Thomas Daniel) Issue: VIZ-4277 Signed-off-by: Nick Hoath Cc: Daniel Vetter Cc: David Gordon Cc: Thomas Daniel --- drivers/gpu/drm/i915/i915_irq.c | 46 - 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index d68328f..fbf9153 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1296,64 +1296,64 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_i915_private *dev_priv, irqreturn_t ret = IRQ_NONE; if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) { - u32 tmp = I915_READ_FW(GEN8_GT_IIR(0)); - if (tmp) { - I915_WRITE_FW(GEN8_GT_IIR(0), tmp); + u32 iir = I915_READ_FW(GEN8_GT_IIR(0)); + if (iir) { + I915_WRITE_FW(GEN8_GT_IIR(0), iir); ret = IRQ_HANDLED; - if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT)) + if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT)) intel_lrc_irq_handler(&dev_priv->ring[RCS]); - if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT)) + if (iir & (GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT)) notify_ring(&dev_priv->ring[RCS]); - if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT)) + if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT)) intel_lrc_irq_handler(&dev_priv->ring[BCS]); - if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT)) + if (iir & (GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT)) notify_ring(&dev_priv->ring[BCS]); } else DRM_ERROR("The master control interrupt lied (GT0)!\n"); } if (master_ctl & (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) { - u32 tmp = I915_READ_FW(GEN8_GT_IIR(1)); - if (tmp) { - I915_WRITE_FW(GEN8_GT_IIR(1), tmp); + u32 iir = I915_READ_FW(GEN8_GT_IIR(1)); + if (iir) { + I915_WRITE_FW(GEN8_GT_IIR(1), iir); ret = IRQ_HANDLED; - if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT)) + if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT)) intel_lrc_irq_handler(&dev_priv->ring[VCS]); - if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT)) + if (iir & (GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT)) notify_ring(&dev_priv->ring[VCS]); - if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT)) + if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT)) intel_lrc_irq_handler(&dev_priv->ring[VCS2]); - if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT)) + if (iir & (GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT)) notify_ring(&dev_priv->ring[VCS2]); } else DRM_ERROR("The master control interrupt lied (GT1)!\n"); } if (master_ctl & GEN8_GT_VECS_IRQ) { - u32 tmp = I915_READ_FW(GEN8_GT_IIR(3)); - if (tmp) { - I915_WRITE_FW(GEN8_GT_IIR(3), tmp); + u32 iir = I915_READ_FW(GEN8_GT_IIR(3)); + if (iir) { + I915_WRITE_FW(GEN8_GT_IIR(3), iir); ret = IRQ_HANDLED; - if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT)) + if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT)) intel_lrc_irq_handler(&dev_priv->ring[VECS]); - if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT)) + if (iir & (GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT)) notify_ring(&dev_priv->ring[VECS]); } else DRM_ERROR("The master control interrupt lied (GT3)!\n"); } if (master_ctl & GEN8_GT_PM_IRQ) { -
[Intel-gfx] [PATCH 2/6] drm/i915: Break out common code from gen8_gt_irq_handler
Break out common code from gen8_gt_irq_handler and put it in to an always inlined function. gcc optimises out the shift at compile time. (Thomas Daniel/Daniel Vetter/Chris Wilson) Issue: VIZ-4277 Signed-off-by: Nick Hoath Cc: Thomas Daniel Cc: Daniel Vetter Cc: Chris Wilson --- drivers/gpu/drm/i915/i915_irq.c | 40 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index fbf9153..7837f5e 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1290,6 +1290,16 @@ static void snb_gt_irq_handler(struct drm_device *dev, ivybridge_parity_error_irq_handler(dev, gt_iir); } +static __always_inline void + gen8_cs_irq_handler(struct intel_engine_cs *ring, u32 iir, + int test_shift) +{ + if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) + notify_ring(ring); + if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) + intel_lrc_irq_handler(ring); +} + static irqreturn_t gen8_gt_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) { @@ -1301,15 +1311,11 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_i915_private *dev_priv, I915_WRITE_FW(GEN8_GT_IIR(0), iir); ret = IRQ_HANDLED; - if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT)) - intel_lrc_irq_handler(&dev_priv->ring[RCS]); - if (iir & (GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT)) - notify_ring(&dev_priv->ring[RCS]); + gen8_cs_irq_handler(&dev_priv->ring[RCS], + iir, GEN8_RCS_IRQ_SHIFT); - if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT)) - intel_lrc_irq_handler(&dev_priv->ring[BCS]); - if (iir & (GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT)) - notify_ring(&dev_priv->ring[BCS]); + gen8_cs_irq_handler(&dev_priv->ring[BCS], + iir, GEN8_BCS_IRQ_SHIFT); } else DRM_ERROR("The master control interrupt lied (GT0)!\n"); } @@ -1320,15 +1326,11 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_i915_private *dev_priv, I915_WRITE_FW(GEN8_GT_IIR(1), iir); ret = IRQ_HANDLED; - if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT)) - intel_lrc_irq_handler(&dev_priv->ring[VCS]); - if (iir & (GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT)) - notify_ring(&dev_priv->ring[VCS]); + gen8_cs_irq_handler(&dev_priv->ring[VCS], + iir, GEN8_VCS1_IRQ_SHIFT); - if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT)) - intel_lrc_irq_handler(&dev_priv->ring[VCS2]); - if (iir & (GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT)) - notify_ring(&dev_priv->ring[VCS2]); + gen8_cs_irq_handler(&dev_priv->ring[VCS2], + iir, GEN8_VCS2_IRQ_SHIFT); } else DRM_ERROR("The master control interrupt lied (GT1)!\n"); } @@ -1339,10 +1341,8 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_i915_private *dev_priv, I915_WRITE_FW(GEN8_GT_IIR(3), iir); ret = IRQ_HANDLED; - if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT)) - intel_lrc_irq_handler(&dev_priv->ring[VECS]); - if (iir & (GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT)) - notify_ring(&dev_priv->ring[VECS]); + gen8_cs_irq_handler(&dev_priv->ring[VECS], + iir, GEN8_VECS_IRQ_SHIFT); } else DRM_ERROR("The master control interrupt lied (GT3)!\n"); } -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 6/6] drm/i915: Only update ringbuf address when necessary
We now only need to update the address of the ringbuf object in the hw context when it is pinned, and the hw context is first CPU mapped Issue: VIZ-4277 Signed-off-by: Nick Hoath Cc: David Gordon --- drivers/gpu/drm/i915/intel_lrc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index a35efcd..2e529a4 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -368,7 +368,6 @@ static int execlists_update_context(struct drm_i915_gem_request *rq) WARN_ON(!i915_gem_obj_is_pinned(rb_obj)); reg_state[CTX_RING_TAIL+1] = rq->tail; - reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj); if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { /* True 32b PPGTT with dynamic page allocation: update PDP @@ -1027,6 +1026,9 @@ static int intel_lr_context_do_pin( if (ret) goto unpin_rb_obj; + ctx->engine[ring->id].reg_state[CTX_RING_BUFFER_START+1] = + i915_gem_obj_ggtt_offset(ringbuf->obj); + ctx_obj->dirty = true; /* Invalidate GuC TLB. */ -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 5/6] drm/i915: Add the CPU mapping of the hw context to the pinned items.
Pin the hw ctx mapping so that it is not mapped/unmapped per bb when doing GuC submission. v2: Removed interim development extra mapping. (Daniel Vetter) Issue: VIZ-4277 Signed-off-by: Nick Hoath Cc: David Gordon Cc: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 14 --- drivers/gpu/drm/i915/i915_drv.h | 4 +++- drivers/gpu/drm/i915/intel_lrc.c| 46 ++--- 3 files changed, 40 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index a3b22bd..f0a172e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1970,10 +1970,9 @@ static int i915_context_status(struct seq_file *m, void *unused) static void i915_dump_lrc_obj(struct seq_file *m, struct intel_engine_cs *ring, - struct drm_i915_gem_object *ctx_obj) + struct drm_i915_gem_object *ctx_obj, + uint32_t *reg_state) { - struct page *page; - uint32_t *reg_state; int j; unsigned long ggtt_offset = 0; @@ -1996,17 +1995,13 @@ static void i915_dump_lrc_obj(struct seq_file *m, return; } - page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); - if (!WARN_ON(page == NULL)) { - reg_state = kmap_atomic(page); - + if (!WARN_ON(reg_state == NULL)) { for (j = 0; j < 0x600 / sizeof(u32) / 4; j += 4) { seq_printf(m, "\t[0x%08lx] 0x%08x 0x%08x 0x%08x 0x%08x\n", ggtt_offset + 4096 + (j * 4), reg_state[j], reg_state[j + 1], reg_state[j + 2], reg_state[j + 3]); } - kunmap_atomic(reg_state); } seq_putc(m, '\n'); @@ -2034,7 +2029,8 @@ static int i915_dump_lrc(struct seq_file *m, void *unused) for_each_ring(ring, dev_priv, i) { if (ring->default_context != ctx) i915_dump_lrc_obj(m, ring, - ctx->engine[i].state); + ctx->engine[i].state, + ctx->engine[i].reg_state); } } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0dd4ace..dc69d67 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -881,8 +881,10 @@ struct intel_context { } legacy_hw_ctx; /* Execlists */ - struct { + struct intel_context_engine { struct drm_i915_gem_object *state; + uint32_t *reg_state; + struct page *page; struct intel_ringbuffer *ringbuf; int pin_count; } engine[I915_NUM_RINGS]; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 069950e..a35efcd 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -360,16 +360,13 @@ static int execlists_update_context(struct drm_i915_gem_request *rq) struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt; struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state; struct drm_i915_gem_object *rb_obj = rq->ringbuf->obj; - struct page *page; - uint32_t *reg_state; + uint32_t *reg_state = rq->ctx->engine[ring->id].reg_state; BUG_ON(!ctx_obj); + WARN_ON(!reg_state); WARN_ON(!i915_gem_obj_is_pinned(ctx_obj)); WARN_ON(!i915_gem_obj_is_pinned(rb_obj)); - page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); - reg_state = kmap_atomic(page); - reg_state[CTX_RING_TAIL+1] = rq->tail; reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj); @@ -385,8 +382,6 @@ static int execlists_update_context(struct drm_i915_gem_request *rq) ASSIGN_CTX_PDP(ppgtt, reg_state, 0); } - kunmap_atomic(reg_state); - return 0; } @@ -985,7 +980,31 @@ int logical_ring_flush_all_caches(struct drm_i915_gem_request *req) return 0; } -static int intel_lr_context_do_pin(struct intel_engine_cs *ring, +static int intel_mmap_hw_context(struct drm_i915_gem_object *obj, + bool unmap) +{ + int ret = 0; + struct intel_context_engine *ice = + (struct intel_context_engine *)obj->mappable; + struct page *page; + uint32_t *reg_state; + + if (unmap) { + kunmap(ice->page); + ice->reg_state = NULL; + ice->page = NULL; + } else { + page = i915_gem_object_get_page(obj, LRC_STATE_PN); +
[Intel-gfx] [PATCH 0/6] lrc lifecycle cleanups
These changes are a result of the requests made in VIZ-4277. Make the lrc path more like the legacy submission path. Attach the CPU mappings to vma (un)bind, so that the shrinker also cleans those up. Pin the CPU mappings while context is busy (pending requests), so that the mappings aren't released/made continuously as this is an expensive process. V2: Removed unecessary changes in the lrc retiring code path Removed unecessary map/unmap Nick Hoath (6): drm/i195: Rename gt_irq_handler variable drm/i915: Break out common code from gen8_gt_irq_handler drm/i915: Unify execlist and legacy request life-cycles drm/i915: Improve dynamic management/eviction of lrc backing objects drm/i915: Add the CPU mapping of the hw context to the pinned items. drm/i915: Only update ringbuf address when necessary drivers/gpu/drm/i915/i915_debugfs.c | 14 ++-- drivers/gpu/drm/i915/i915_drv.h | 10 ++- drivers/gpu/drm/i915/i915_gem.c | 26 +++ drivers/gpu/drm/i915/i915_gem_gtt.c | 8 +++ drivers/gpu/drm/i915/i915_irq.c | 67 - drivers/gpu/drm/i915/intel_lrc.c| 123 +--- drivers/gpu/drm/i915/intel_lrc.h| 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 71 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 4 -- 9 files changed, 173 insertions(+), 152 deletions(-) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915: Change context lifecycle
Use the first retired request on a new context to unpin the old context. This ensures that the hw context remains bound until it has been saved. Now that the context is pinned until later in the request/context lifecycle, it no longer needs to be pinned from context_queue to retire_requests. The refcount on the context also has to be extended to cover this new longer period. Signed-off-by: Nick Hoath Issue: VIZ-4277 Cc: Daniel Vetter Cc: David Gordon Cc: Chris Wilson Cc: Alex Dai --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 7 + drivers/gpu/drm/i915/intel_lrc.c | 58 +--- drivers/gpu/drm/i915/intel_lrc.h | 1 + 4 files changed, 57 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 20cd6d8..778b14a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -884,6 +884,7 @@ struct intel_context { struct { struct drm_i915_gem_object *state; struct intel_ringbuffer *ringbuf; + bool unsaved; int pin_count; } engine[I915_NUM_RINGS]; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f1e3fde..273946d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1385,6 +1385,13 @@ __i915_gem_request_retire__upto(struct drm_i915_gem_request *req) tmp = list_first_entry(&engine->request_list, typeof(*tmp), list); + if (i915.enable_execlists) { + unsigned long flags; + + spin_lock_irqsave(&engine->execlist_lock, flags); + intel_lr_context_complete_check(tmp); + spin_unlock_irqrestore(&engine->execlist_lock, flags); + } i915_gem_request_retire(tmp); } while (tmp != req); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 06180dc..d82e903 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -566,13 +566,17 @@ static int execlists_context_queue(struct drm_i915_gem_request *request) struct drm_i915_gem_request *cursor; int num_elements = 0; - if (request->ctx != ring->default_context) - intel_lr_context_pin(request); - i915_gem_request_reference(request); spin_lock_irq(&ring->execlist_lock); + if (request->ctx != ring->default_context) { + if (!request->ctx->engine[ring->id].unsaved) { + intel_lr_context_pin(request); + request->ctx->engine[ring->id].unsaved = true; + } + } + list_for_each_entry(cursor, &ring->execlist_queue, execlist_link) if (++num_elements > 2) break; @@ -958,12 +962,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring) spin_unlock_irq(&ring->execlist_lock); list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) { - struct intel_context *ctx = req->ctx; - struct drm_i915_gem_object *ctx_obj = - ctx->engine[ring->id].state; - - if (ctx_obj && (ctx != ring->default_context)) - intel_lr_context_unpin(req); list_del(&req->execlist_link); i915_gem_request_unreference(req); } @@ -1073,6 +1071,31 @@ void intel_lr_context_unpin(struct drm_i915_gem_request *rq) } } +void intel_lr_context_complete_check(struct drm_i915_gem_request *req) +{ + struct intel_engine_cs *ring = req->ring; + + assert_spin_locked(&ring->execlist_lock); + + if (ring->last_context && ring->last_context != req->ctx) { + if (req->ctx != ring->default_context + && ring->last_context->engine[ring->id].unsaved) { + /* Create fake request for unpinning the old context */ + struct drm_i915_gem_request tmp; + + tmp.ring = ring; + tmp.ctx = ring->last_context; + tmp.ringbuf = + ring->last_context->engine[ring->id].ringbuf; + + intel_lr_context_unpin(&tmp); + ring->last_context->engine[ring->id].unsaved = false; + ring->last_context = NULL; + } + } + ring->last_context = req->ctx; +} + static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) { int ret, i; @@ -2390,7 +2413,22 @@
[Intel-gfx] [PATCH v10] drm/i915: Extend LRC pinning to cover GPU context writeback
Use the first retired request on a new context to unpin the old context. This ensures that the hw context remains bound until it has been written back to by the GPU. Now that the context is pinned until later in the request/context lifecycle, it no longer needs to be pinned from context_queue to retire_requests. This fixes an issue with GuC submission where the GPU might not have finished writing back the context before it is unpinned. This results in a GPU hang. v2: Moved the new pin to cover GuC submission (Alex Dai) Moved the new unpin to request_retire to fix coverage leak v3: Added switch to default context if freeing a still pinned context just in case the hw was actually still using it v4: Unwrapped context unpin to allow calling without a request v5: Only create a switch to idle context if the ring doesn't already have a request pending on it (Alex Dai) Rename unsaved to dirty to avoid double negatives (Dave Gordon) Changed _no_req postfix to __ prefix for consistency (Dave Gordon) Split out per engine cleanup from context_free as it was getting unwieldy Corrected locking (Dave Gordon) v6: Removed some bikeshedding (Mika Kuoppala) Added explanation of the GuC hang that this fixes (Daniel Vetter) v7: Removed extra per request pinning from ring reset code (Alex Dai) Added forced ring unpin/clean in error case in context free (Alex Dai) v8: Renamed lrc specific last_context to lrc_last_context as there were some reset cases where the codepaths leaked (Mika Kuoppala) NULL'd last_context in reset case - there was a pointer leak if someone did reset->close context. v9: Rebase over "Fix context/engine cleanup order" v10: Rebase over nightly, remove WARN_ON which caused the dependency on dev. Signed-off-by: Nick Hoath Issue: VIZ-4277 Cc: Daniel Vetter Cc: David Gordon Cc: Chris Wilson Cc: Alex Dai Cc: Mika Kuoppala --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 3 + drivers/gpu/drm/i915/intel_lrc.c| 138 ++-- drivers/gpu/drm/i915/intel_lrc.h| 1 + drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 5 files changed, 121 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 104bd18..d28e10a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -882,6 +882,7 @@ struct intel_context { struct { struct drm_i915_gem_object *state; struct intel_ringbuffer *ringbuf; + bool dirty; int pin_count; } engine[I915_NUM_RINGS]; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ddc21d4..7b79405 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1413,6 +1413,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) { trace_i915_gem_request_retire(request); + if (i915.enable_execlists) + intel_lr_context_complete_check(request); + /* We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position * of tail of the request to update the last known position diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 5027699..b661058 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -585,9 +585,6 @@ static int execlists_context_queue(struct drm_i915_gem_request *request) struct drm_i915_gem_request *cursor; int num_elements = 0; - if (request->ctx != ring->default_context) - intel_lr_context_pin(request); - i915_gem_request_reference(request); spin_lock_irq(&ring->execlist_lock); @@ -763,6 +760,13 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) if (intel_ring_stopped(ring)) return; + if (request->ctx != ring->default_context) { + if (!request->ctx->engine[ring->id].dirty) { + intel_lr_context_pin(request); + request->ctx->engine[ring->id].dirty = true; + } + } + if (dev_priv->guc.execbuf_client) i915_guc_submit(dev_priv->guc.execbuf_client, request); else @@ -989,12 +993,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring) spin_unlock_irq(&ring->execlist_lock); list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) { - struct intel_context *ctx = req->ctx; - struct drm_i915_gem_object *ctx_obj = - ctx->engine[ring->id].state; - - if (ctx_obj && (ctx != ring->default_context))
Re: [Intel-gfx] ✗ failure: Fi.CI.BAT
On 14/01/2016 07:20, Patchwork wrote: == Summary == Built on 058740f8fced6851aeda34f366f5330322cd585f drm-intel-nightly: 2016y-01m-13d-17h-07m-44s UTC integration manifest Test gem_ctx_basic: pass -> FAIL (bdw-ultra) Test failed to load - not patch related Test gem_ctx_param_basic: Subgroup non-root-set: pass -> DMESG-WARN (bsw-nuc-2) gem driver allocated a poisoned slab - not patch related Test kms_flip: Subgroup basic-flip-vs-dpms: pass -> SKIP (bsw-nuc-2) test reqs not met - not patch related dmesg-warn -> PASS (ilk-hp8440p) warn to PASS bdw-nuci7total:138 pass:128 dwarn:1 dfail:0 fail:0 skip:9 bdw-ultratotal:138 pass:131 dwarn:0 dfail:0 fail:1 skip:6 bsw-nuc-2total:141 pass:113 dwarn:3 dfail:0 fail:0 skip:25 hsw-brixbox total:141 pass:134 dwarn:0 dfail:0 fail:0 skip:7 hsw-gt2 total:141 pass:137 dwarn:0 dfail:0 fail:0 skip:4 ilk-hp8440p total:141 pass:101 dwarn:3 dfail:0 fail:0 skip:37 ivb-t430stotal:135 pass:122 dwarn:3 dfail:4 fail:0 skip:6 skl-i5k-2total:141 pass:131 dwarn:2 dfail:0 fail:0 skip:8 skl-i7k-2total:141 pass:131 dwarn:2 dfail:0 fail:0 skip:8 snb-dellxps total:141 pass:122 dwarn:5 dfail:0 fail:0 skip:14 snb-x220ttotal:141 pass:122 dwarn:5 dfail:0 fail:1 skip:13 Results at /archive/results/CI_IGT_test/Patchwork_1174/ ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v10] drm/i915: Extend LRC pinning to cover GPU context writeback
On 14/01/2016 11:36, Chris Wilson wrote: On Wed, Jan 13, 2016 at 04:19:45PM +, Nick Hoath wrote: + if (ctx->engine[ring->id].dirty) { + struct drm_i915_gem_request *req = NULL; + + /** +* If there is already a request pending on +* this ring, wait for that to complete, +* otherwise create a switch to idle request +*/ + if (list_empty(&ring->request_list)) { + int ret; + + ret = i915_gem_request_alloc( + ring, + ring->default_context, + &req); + if (!ret) + i915_add_request(req); + else + DRM_DEBUG("Failed to ensure context saved"); + } else { + req = list_first_entry( + &ring->request_list, + typeof(*req), list); + } + if (req) { + ret = i915_wait_request(req); + if (ret != 0) { + /** +* If we get here, there's probably been a ring +* reset, so we just clean up the dirty flag.& +* pin count. +*/ + ctx->engine[ring->id].dirty = false; + __intel_lr_context_unpin( + ring, + ctx); + } + } If you were to take a lr_context_pin on the last_context, and only release that pin when you change to a new context, you do not need to That what this patch does. introduce a blocking context-close, nor do you need to introduce the usage of default_context. The use of default_context here is to stop a context hanging around after it is no longer needed. (lr_context_pin should take a reference on the ctx to prevent early freeeing ofc). You can't clear the reference on the ctx in an interrupt context. The code at that point starts to look v.v.similar to legacy, right down to the need to use a GPU reset during shutdown to prevent writing back the context image. (Which you still currently need to get rid of the default context now.) -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v10] drm/i915: Extend LRC pinning to cover GPU context writeback
On 14/01/2016 12:31, Chris Wilson wrote: On Thu, Jan 14, 2016 at 11:56:07AM +, Nick Hoath wrote: On 14/01/2016 11:36, Chris Wilson wrote: On Wed, Jan 13, 2016 at 04:19:45PM +, Nick Hoath wrote: + if (ctx->engine[ring->id].dirty) { + struct drm_i915_gem_request *req = NULL; + + /** +* If there is already a request pending on +* this ring, wait for that to complete, +* otherwise create a switch to idle request +*/ + if (list_empty(&ring->request_list)) { + int ret; + + ret = i915_gem_request_alloc( + ring, + ring->default_context, + &req); + if (!ret) + i915_add_request(req); + else + DRM_DEBUG("Failed to ensure context saved"); + } else { + req = list_first_entry( + &ring->request_list, + typeof(*req), list); + } + if (req) { + ret = i915_wait_request(req); + if (ret != 0) { + /** +* If we get here, there's probably been a ring +* reset, so we just clean up the dirty flag.& +* pin count. +*/ + ctx->engine[ring->id].dirty = false; + __intel_lr_context_unpin( + ring, + ctx); + } + } If you were to take a lr_context_pin on the last_context, and only release that pin when you change to a new context, you do not need to That what this patch does. introduce a blocking context-close, nor do you need to introduce the usage of default_context. The use of default_context here is to stop a context hanging around after it is no longer needed. By blocking, which is not acceptable. Also we can eliminate the default_context and so pinning that opposed to the last_context serves no purpose other than by chance having a more preferrable position when it comes to defragmentation. But you don't enable that anyway and we Enabling the shrinker on execlists is something I'm working on which is predicated on this patch. Also why is blocking on closing a context not acceptable? have alternative strategies now that avoid the issue with fragmentation of the mappable aperture. (lr_context_pin should take a reference on the ctx to prevent early freeeing ofc). You can't clear the reference on the ctx in an interrupt context. The execlists submission should moved out of the interrupt context, for the very simple reason that it is causing machine panics. userspace submits a workload, machine lockups Create a jira, and I'm sure we'll look at making that change. -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v10] drm/i915: Extend LRC pinning to cover GPU context writeback
On 14/01/2016 12:37, Nick Hoath wrote: On 14/01/2016 12:31, Chris Wilson wrote: On Thu, Jan 14, 2016 at 11:56:07AM +, Nick Hoath wrote: On 14/01/2016 11:36, Chris Wilson wrote: On Wed, Jan 13, 2016 at 04:19:45PM +, Nick Hoath wrote: + if (ctx->engine[ring->id].dirty) { + struct drm_i915_gem_request *req = NULL; + + /** +* If there is already a request pending on +* this ring, wait for that to complete, +* otherwise create a switch to idle request +*/ + if (list_empty(&ring->request_list)) { + int ret; + + ret = i915_gem_request_alloc( + ring, + ring->default_context, + &req); + if (!ret) + i915_add_request(req); + else + DRM_DEBUG("Failed to ensure context saved"); + } else { + req = list_first_entry( + &ring->request_list, + typeof(*req), list); + } + if (req) { + ret = i915_wait_request(req); + if (ret != 0) { + /** +* If we get here, there's probably been a ring +* reset, so we just clean up the dirty flag.& +* pin count. +*/ + ctx->engine[ring->id].dirty = false; + __intel_lr_context_unpin( + ring, + ctx); + } + } If you were to take a lr_context_pin on the last_context, and only release that pin when you change to a new context, you do not need to That what this patch does. introduce a blocking context-close, nor do you need to introduce the usage of default_context. The use of default_context here is to stop a context hanging around after it is no longer needed. By blocking, which is not acceptable. Also we can eliminate the default_context and so pinning that opposed to the last_context serves no purpose other than by chance having a more preferrable position when it comes to defragmentation. But you don't enable that anyway and we Enabling the shrinker on execlists is something I'm working on which is predicated on this patch. Also why is blocking on closing a context not acceptable? As a clarification: Without rewriting the execlist code to not submit or cleanup from an interrupt handler, we can't use refcounting to allow non blocking closing. have alternative strategies now that avoid the issue with fragmentation of the mappable aperture. (lr_context_pin should take a reference on the ctx to prevent early freeeing ofc). You can't clear the reference on the ctx in an interrupt context. The execlists submission should moved out of the interrupt context, for the very simple reason that it is causing machine panics. userspace submits a workload, machine lockups Create a jira, and I'm sure we'll look at making that change. -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3 1/3] drm/i915: simplify allocation of driver-internal requests
On 07/01/2016 10:20, Dave Gordon wrote: There are a number of places where the driver needs a request, but isn't working on behalf of any specific user or in a specific context. At present, we associate them with the per-engine default context. A future patch will abolish those per-engine context pointers; but we can already eliminate a lot of the references to them, just by making the allocator allow NULL as a shorthand for "an appropriate context for this ring", which will mean that the callers don't need to know anything about how the "appropriate context" is found (e.g. per-ring vs per-device, etc). So this patch renames the existing i915_gem_request_alloc(), and makes it local (static inline), and replaces it with a wrapper that provides a default if the context is NULL, and also has a nicer calling convention (doesn't require a pointer to an output parameter). Then we change all callers to use the new convention: OLD: err = i915_gem_request_alloc(ring, user_ctx, &req); if (err) ... NEW: req = i915_gem_request_alloc(ring, user_ctx); if (IS_ERR(req)) ... OLD: err = i915_gem_request_alloc(ring, ring->default_context, &req); if (err) ... NEW: req = i915_gem_request_alloc(ring, NULL); if (IS_ERR(req)) ... Signed-off-by: Dave Gordon Reviewed-by: Nick Hoath --- drivers/gpu/drm/i915/i915_drv.h| 6 ++-- drivers/gpu/drm/i915/i915_gem.c| 55 +++--- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 14 +--- drivers/gpu/drm/i915/intel_display.c | 6 ++-- drivers/gpu/drm/i915/intel_lrc.c | 9 +++-- drivers/gpu/drm/i915/intel_overlay.c | 24 ++--- 6 files changed, 74 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c6dd4db..c2b000a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2260,9 +2260,9 @@ struct drm_i915_gem_request { }; -int i915_gem_request_alloc(struct intel_engine_cs *ring, - struct intel_context *ctx, - struct drm_i915_gem_request **req_out); +struct drm_i915_gem_request * __must_check +i915_gem_request_alloc(struct intel_engine_cs *engine, + struct intel_context *ctx); void i915_gem_request_cancel(struct drm_i915_gem_request *req); void i915_gem_request_free(struct kref *req_ref); int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6c60e04..c908ed1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2688,9 +2688,10 @@ void i915_gem_request_free(struct kref *req_ref) kmem_cache_free(req->i915->requests, req); } -int i915_gem_request_alloc(struct intel_engine_cs *ring, - struct intel_context *ctx, - struct drm_i915_gem_request **req_out) +static inline int +__i915_gem_request_alloc(struct intel_engine_cs *ring, +struct intel_context *ctx, +struct drm_i915_gem_request **req_out) { struct drm_i915_private *dev_priv = to_i915(ring->dev); struct drm_i915_gem_request *req; @@ -2753,6 +2754,31 @@ err: return ret; } +/** + * i915_gem_request_alloc - allocate a request structure + * + * @engine: engine that we wish to issue the request on. + * @ctx: context that the request will be associated with. + * This can be NULL if the request is not directly related to + * any specific user context, in which case this function will + * choose an appropriate context to use. + * + * Returns a pointer to the allocated request if successful, + * or an error code if not. + */ +struct drm_i915_gem_request * +i915_gem_request_alloc(struct intel_engine_cs *engine, + struct intel_context *ctx) +{ + struct drm_i915_gem_request *req; + int err; + + if (ctx == NULL) + ctx = engine->default_context; + err = __i915_gem_request_alloc(engine, ctx, &req); + return err ? ERR_PTR(err) : req; +} + void i915_gem_request_cancel(struct drm_i915_gem_request *req) { intel_ring_reserved_space_cancel(req->ringbuf); @@ -3170,9 +3196,13 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, return 0; if (*to_req == NULL) { - ret = i915_gem_request_alloc(to, to->default_context, to_req); - if (ret) - return ret; + struct drm_i915_gem_request *req; + + req = i915_gem_request_alloc(to, NULL); + if (IS_ERR(req)) +
Re: [Intel-gfx] [PATCH v3 2/3] drm/i915: abolish separate per-ring default_context pointers
On 07/01/2016 10:20, Dave Gordon wrote: Now that we've eliminated a lot of uses of ring->default_context, we can eliminate the pointer itself. All the engines share the same default intel_context, so we can just keep a single reference to it in the dev_priv structure rather than one in each of the engine[] elements. This make refcounting more sensible too, as we now have a refcount of one for the one pointer, rather than a refcount of one but multiple pointers. From an idea by Chris Wilson. Signed-off-by: Dave Gordon Reviewed-by: Nick Hoath --- drivers/gpu/drm/i915/i915_debugfs.c| 4 ++-- drivers/gpu/drm/i915/i915_drv.h| 2 ++ drivers/gpu/drm/i915/i915_gem.c| 6 +++--- drivers/gpu/drm/i915/i915_gem_context.c| 22 -- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- drivers/gpu/drm/i915/i915_guc_submission.c | 6 +++--- drivers/gpu/drm/i915/intel_lrc.c | 24 +--- drivers/gpu/drm/i915/intel_ringbuffer.h| 1 - 8 files changed, 32 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 0fc38bb..2613708 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1943,7 +1943,7 @@ static int i915_context_status(struct seq_file *m, void *unused) seq_puts(m, "HW context "); describe_ctx(m, ctx); for_each_ring(ring, dev_priv, i) { - if (ring->default_context == ctx) + if (dev_priv->kernel_context == ctx) seq_printf(m, "(default context %s) ", ring->name); } @@ -2039,7 +2039,7 @@ static int i915_dump_lrc(struct seq_file *m, void *unused) list_for_each_entry(ctx, &dev_priv->context_list, link) { for_each_ring(ring, dev_priv, i) { - if (ring->default_context != ctx) + if (dev_priv->kernel_context != ctx) i915_dump_lrc_obj(m, ring, ctx->engine[i].state); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c2b000a..aef86a8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1940,6 +1940,8 @@ struct drm_i915_private { void (*stop_ring)(struct intel_engine_cs *ring); } gt; + struct intel_context *kernel_context; + bool edp_low_vswing; /* perform PHY state sanity checks? */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c908ed1..8f101121 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2678,7 +2678,7 @@ void i915_gem_request_free(struct kref *req_ref) if (ctx) { if (i915.enable_execlists) { - if (ctx != req->ring->default_context) + if (ctx != req->i915->kernel_context) intel_lr_context_unpin(req); } @@ -2774,7 +2774,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, int err; if (ctx == NULL) - ctx = engine->default_context; + ctx = to_i915(engine->dev)->kernel_context; err = __i915_gem_request_alloc(engine, ctx, &req); return err ? ERR_PTR(err) : req; } @@ -4862,7 +4862,7 @@ i915_gem_init_hw(struct drm_device *dev) */ init_unused_rings(dev); - BUG_ON(!dev_priv->ring[RCS].default_context); + BUG_ON(!dev_priv->kernel_context); ret = i915_ppgtt_init_hw(dev); if (ret) { diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 900ffd0..e1d767e 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -354,11 +354,10 @@ int i915_gem_context_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_context *ctx; - int i; /* Init should only be called once per module load. Eventually the * restriction on the context_disabled check can be loosened. */ - if (WARN_ON(dev_priv->ring[RCS].default_context)) + if (WARN_ON(dev_priv->kernel_context)) return 0; if (intel_vgpu_active(dev) && HAS_LOGICAL_RING_CONTEXTS(dev)) { @@ -388,12 +387,7 @@ int i915_gem_context_init(struct drm_device *dev) return PTR_ERR(ctx); } - for (i = 0; i < I915_NUM_RINGS; i++) { - struct intel_engine_cs *ring = &dev_priv->ring[i]; - - /* NB: RCS will hold a ref for all rings */ - ring-
Re: [Intel-gfx] [PATCH v3 3/3] drm/i915: tidy up a few leftovers
On 07/01/2016 10:20, Dave Gordon wrote: There are a few bits of code which the transformations implemented by the previous patch reveal to be suboptimal, once the notion of a per- ring default context has gone away. So this tidies up the leftovers. It could have been squashed into the previous patch, but that would have made that patch less clearly a simple transformation. In particular, any change which alters the code block structure or indentation has been deferred into this separate patch, because such things tend to make diffs more difficult to read. Signed-off-by: Dave Gordon Reviewed-by: Nick Hoath --- drivers/gpu/drm/i915/i915_debugfs.c | 15 +-- drivers/gpu/drm/i915/i915_gem.c | 6 ++ drivers/gpu/drm/i915/intel_lrc.c| 38 + 3 files changed, 24 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 2613708..bbb23da 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1942,11 +1942,8 @@ static int i915_context_status(struct seq_file *m, void *unused) seq_puts(m, "HW context "); describe_ctx(m, ctx); - for_each_ring(ring, dev_priv, i) { - if (dev_priv->kernel_context == ctx) - seq_printf(m, "(default context %s) ", - ring->name); - } + if (ctx == dev_priv->kernel_context) + seq_printf(m, "(kernel context) "); if (i915.enable_execlists) { seq_putc(m, '\n'); @@ -2037,13 +2034,11 @@ static int i915_dump_lrc(struct seq_file *m, void *unused) if (ret) return ret; - list_for_each_entry(ctx, &dev_priv->context_list, link) { - for_each_ring(ring, dev_priv, i) { - if (dev_priv->kernel_context != ctx) + list_for_each_entry(ctx, &dev_priv->context_list, link) + if (ctx != dev_priv->kernel_context) + for_each_ring(ring, dev_priv, i) i915_dump_lrc_obj(m, ring, ctx->engine[i].state); - } - } mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8f101121..4f45eb2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2677,10 +2677,8 @@ void i915_gem_request_free(struct kref *req_ref) i915_gem_request_remove_from_client(req); if (ctx) { - if (i915.enable_execlists) { - if (ctx != req->i915->kernel_context) - intel_lr_context_unpin(req); - } + if (i915.enable_execlists && ctx != req->i915->kernel_context) + intel_lr_context_unpin(req); i915_gem_context_unreference(ctx); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 5a3..8c4c9b9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -660,16 +660,10 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req, int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request) { - int ret; + int ret = 0; request->ringbuf = request->ctx->engine[request->ring->id].ringbuf; - if (request->ctx != request->i915->kernel_context) { - ret = intel_lr_context_pin(request); - if (ret) - return ret; - } - if (i915.enable_guc_submission) { /* * Check that the GuC has space for the request before @@ -683,7 +677,10 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request return ret; } - return 0; + if (request->ctx != request->i915->kernel_context) + ret = intel_lr_context_pin(request); + + return ret; } static int logical_ring_wait_for_space(struct drm_i915_gem_request *req, @@ -2382,22 +2379,21 @@ void intel_lr_context_free(struct intel_context *ctx) { int i; - for (i = 0; i < I915_NUM_RINGS; i++) { + for (i = I915_NUM_RINGS; --i >= 0; ) { + struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf; struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state; - if (ctx_obj) { - struct intel_ringbuffer *ringbuf = - ctx->engine[i].ringbuf; - struct intel_engine_cs *
[Intel-gfx] [PATCH v11] drm/i915: Extend LRC pinning to cover GPU context writeback
Use the first retired request on a new context to unpin the old context. This ensures that the hw context remains bound until it has been written back to by the GPU. Now that the context is pinned until later in the request/context lifecycle, it no longer needs to be pinned from context_queue to retire_requests. This fixes an issue with GuC submission where the GPU might not have finished writing back the context before it is unpinned. This results in a GPU hang. v2: Moved the new pin to cover GuC submission (Alex Dai) Moved the new unpin to request_retire to fix coverage leak v3: Added switch to default context if freeing a still pinned context just in case the hw was actually still using it v4: Unwrapped context unpin to allow calling without a request v5: Only create a switch to idle context if the ring doesn't already have a request pending on it (Alex Dai) Rename unsaved to dirty to avoid double negatives (Dave Gordon) Changed _no_req postfix to __ prefix for consistency (Dave Gordon) Split out per engine cleanup from context_free as it was getting unwieldy Corrected locking (Dave Gordon) v6: Removed some bikeshedding (Mika Kuoppala) Added explanation of the GuC hang that this fixes (Daniel Vetter) v7: Removed extra per request pinning from ring reset code (Alex Dai) Added forced ring unpin/clean in error case in context free (Alex Dai) v8: Renamed lrc specific last_context to lrc_last_context as there were some reset cases where the codepaths leaked (Mika Kuoppala) NULL'd last_context in reset case - there was a pointer leak if someone did reset->close context. v9: Rebase over "Fix context/engine cleanup order" v10: Rebase over nightly, remove WARN_ON which caused the dependency on dev. v11: Kick BAT rerun Signed-off-by: Nick Hoath Issue: VIZ-4277 Cc: Daniel Vetter Cc: David Gordon Cc: Chris Wilson Cc: Alex Dai Cc: Mika Kuoppala --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 3 + drivers/gpu/drm/i915/intel_lrc.c| 138 ++-- drivers/gpu/drm/i915/intel_lrc.h| 1 + drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 5 files changed, 121 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 104bd18..d28e10a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -882,6 +882,7 @@ struct intel_context { struct { struct drm_i915_gem_object *state; struct intel_ringbuffer *ringbuf; + bool dirty; int pin_count; } engine[I915_NUM_RINGS]; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ddc21d4..7b79405 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1413,6 +1413,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) { trace_i915_gem_request_retire(request); + if (i915.enable_execlists) + intel_lr_context_complete_check(request); + /* We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position * of tail of the request to update the last known position diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 5027699..b661058 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -585,9 +585,6 @@ static int execlists_context_queue(struct drm_i915_gem_request *request) struct drm_i915_gem_request *cursor; int num_elements = 0; - if (request->ctx != ring->default_context) - intel_lr_context_pin(request); - i915_gem_request_reference(request); spin_lock_irq(&ring->execlist_lock); @@ -763,6 +760,13 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) if (intel_ring_stopped(ring)) return; + if (request->ctx != ring->default_context) { + if (!request->ctx->engine[ring->id].dirty) { + intel_lr_context_pin(request); + request->ctx->engine[ring->id].dirty = true; + } + } + if (dev_priv->guc.execbuf_client) i915_guc_submit(dev_priv->guc.execbuf_client, request); else @@ -989,12 +993,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring) spin_unlock_irq(&ring->execlist_lock); list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) { - struct intel_context *ctx = req->ctx; - struct drm_i915_gem_object *ctx_obj = - ctx->engine[ring->id].state; - - if (ctx_obj && (ctx != ring->default_context))
Re: [Intel-gfx] [PATCH 3/3] drm/i915: Fix premature LRC unpin in GuC mode
On 20/01/2016 14:06, Tvrtko Ursulin wrote: On 20/01/16 13:55, Chris Wilson wrote: On Wed, Jan 20, 2016 at 01:40:57PM +, Tvrtko Ursulin wrote: From: Tvrtko Ursulin In GuC mode LRC pinning lifetime depends exclusively on the request liftime. Since that is terminated by the seqno update that opens up a race condition between GPU finishing writing out the context image and the driver unpinning the LRC. To extend the LRC lifetime we will employ a similar approach to what legacy ringbuffer submission does. We will start tracking the last submitted context per engine and keep it pinned until it is replaced by another one. Note that the driver unload path is a bit fragile and could benefit greatly from efforts to unify the legacy and exec list submission code paths. At the moment i915_gem_context_fini has special casing for the two which are potentialy not needed, and also depends on i915_gem_cleanup_ringbuffer running before itself. Signed-off-by: Tvrtko Ursulin Issue: VIZ-4277 Cc: Chris Wilson Cc: Nick Hoath --- I cannot test this with GuC but it passes BAT with execlists and some real world smoke tests. --- drivers/gpu/drm/i915/i915_gem_context.c | 4 +++- drivers/gpu/drm/i915/intel_lrc.c| 7 +++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index c25083c78ba7..0b419e165836 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -438,7 +438,9 @@ void i915_gem_context_fini(struct drm_device *dev) for (i = 0; i < I915_NUM_RINGS; i++) { struct intel_engine_cs *ring = &dev_priv->ring[i]; - if (ring->last_context) + if (ring->last_context && i915.enable_execlists) + intel_lr_context_unpin(ring->last_context, ring); + else if (ring->last_context) i915_gem_context_unreference(ring->last_context); ring->default_context = NULL; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 5c3f57fed916..b8a7e126d6d2 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -918,6 +918,7 @@ int intel_execlists_submission(struct i915_execbuffer_params *params, struct intel_engine_cs *ring = params->ring; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_ringbuffer *ringbuf = params->ctx->engine[ring->id].ringbuf; + struct intel_context*ctx = params->request->ctx; u64 exec_start; int instp_mode; u32 instp_mask; @@ -982,6 +983,12 @@ int intel_execlists_submission(struct i915_execbuffer_params *params, trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); + if (ring->last_context && ring->last_context != ctx) { + intel_lr_context_unpin(ring->last_context, ring); + intel_lr_context_pin(ctx, ring); + ring->last_context = ctx; + } I think this is the wrong location and should be part of submitting the context inside the engine (because intel_execlists_submission should not as it is entirely duplicating the common GEM batch submision code and the unique part is engine->add_request()). So into engine->emit_request you are saying? That works just as well AFAICS, just making sure I understood correctly. I think it should go in to intel_logical_ring_advance_and_submit. The extra pinning is being put in place to cover GPU usage of the pin. It should probably therefore go in to the last common place between execlists & GUC, as close to hardware submission as possible. Note that it should be: if (engine->last_context != request->ctx) { if (engine->last_context) intel_lr_context_unpin(engine->last_context, engine); engine->last_context = request->ctx; intel_lr_context_pin(engine->last_context, engine); } Ooops! Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 2/8] drm/i915/gen9: Add GEN8_CS_CHICKEN1 to HW whitelist
On 13/01/2016 10:06, Arun Siluvery wrote: Required for WaEnablePreemptionGranularityControlByUMD:skl,bxt Signed-off-by: Arun Siluvery Reviewed-by: Nick Hoath --- drivers/gpu/drm/i915/i915_reg.h | 2 ++ drivers/gpu/drm/i915/intel_ringbuffer.c | 6 ++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 6668bb0..1067ff0 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5998,6 +5998,8 @@ enum skl_disp_power_wells { #define FF_SLICE_CS_CHICKEN2 _MMIO(0x20e4) #define GEN9_TSG_BARRIER_ACK_DISABLE (1<<8) +#define GEN8_CS_CHICKEN1 _MMIO(0x2580) + /* GEN7 chicken */ #define GEN7_COMMON_SLICE_CHICKEN1_MMIO(0x7010) # define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC((1<<10) | (1<<26)) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 354da81..35e78ed 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -909,6 +909,7 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; uint32_t tmp; + int ret; /* WaEnableLbsSlaRetryTimerDecrement:skl */ I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | @@ -979,6 +980,11 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) /* WaDisableSTUnitPowerOptimization:skl,bxt */ WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); + /* WaEnablePreemptionGranularityControlByUMD:skl,bxt */ + ret= wa_ring_whitelist_reg(ring, GEN8_CS_CHICKEN1); + if (ret) + return ret; + return 0; } ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 3/8] drm/i915/gen9: Add HDC_CHICKEN1 to HW whitelist
On 13/01/2016 10:06, Arun Siluvery wrote: Required for WaAllowUMDToModifyHDCChicken1:skl,bxt Signed-off-by: Arun Siluvery Reviewed-by: Nick Hoath --- drivers/gpu/drm/i915/i915_reg.h | 2 ++ drivers/gpu/drm/i915/intel_ringbuffer.c | 5 + 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 1067ff0..16ef377 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6045,6 +6045,8 @@ enum skl_disp_power_wells { #define HDC_FORCE_NON_COHERENT (1<<4) #define HDC_BARRIER_PERFORMANCE_DISABLE (1<<10) +#define GEN8_HDC_CHICKEN1 _MMIO(0x7304) + /* GEN9 chicken */ #define SLICE_ECO_CHICKEN0_MMIO(0x7308) #define PIXEL_MASK_CAMMING_DISABLE (1 << 14) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 35e78ed..2241a92 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -985,6 +985,11 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) if (ret) return ret; + /* WaAllowUMDToModifyHDCChicken1:skl,bxt */ + ret = wa_ring_whitelist_reg(ring, GEN8_HDC_CHICKEN1); + if (ret) + return ret; + return 0; } ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 4/8] drm/i915/bxt: Add GEN9_CS_DEBUG_MODE1 to HW whitelist
On 13/01/2016 10:06, Arun Siluvery wrote: Required for, WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt WaDisableObjectLevelPreemptionForInstancedDraw:bxt WaDisableObjectLevelPreemtionForInstanceId:bxt According to WA database these are only applicable for BXT:A0 but since A0 and A1 shares the same GT these are extended for A1 as well. These are also required for SKL until B0 but not adding them because they are pre-production steppings. Signed-off-by: Arun Siluvery --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 9 + 2 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 16ef377..eabd2af 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5998,6 +5998,7 @@ enum skl_disp_power_wells { #define FF_SLICE_CS_CHICKEN2 _MMIO(0x20e4) #define GEN9_TSG_BARRIER_ACK_DISABLE (1<<8) +#define GEN9_CS_DEBUG_MODE1_MMIO(0x20EC) The pattern seems to be lc for hex (0x20ec) #define GEN8_CS_CHICKEN1 _MMIO(0x2580) /* GEN7 chicken */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 2241a92..7a46cf1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1132,6 +1132,15 @@ static int bxt_init_workarounds(struct intel_engine_cs *ring) GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); } + /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */ + /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */ + /* WaDisableObjectLevelPreemtionForInstanceId:bxt */ + if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) { + ret = wa_ring_whitelist_reg(ring, GEN9_CS_DEBUG_MODE1); + if (ret) + return ret; + } + return 0; } ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 5/8] drm/i915/bxt: Add GEN8_L3SQCREG4 to HW whitelist
On 13/01/2016 10:06, Arun Siluvery wrote: Required for WaDisableLSQCROPERFforOCL:bxt According to WA database these are only applicable for BXT:A0 but since A0 and A1 shares the same GT these are extended for A1 as well. Signed-off-by: Arun Siluvery Reviewed-by: Nick Hoath --- drivers/gpu/drm/i915/intel_ringbuffer.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 7a46cf1..5eb4eea 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1135,10 +1135,15 @@ static int bxt_init_workarounds(struct intel_engine_cs *ring) /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */ /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */ /* WaDisableObjectLevelPreemtionForInstanceId:bxt */ + /* WaDisableLSQCROPERFforOCL:bxt */ if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) { ret = wa_ring_whitelist_reg(ring, GEN9_CS_DEBUG_MODE1); if (ret) return ret; + + ret = wa_ring_whitelist_reg(ring, GEN8_L3SQCREG4); + if (ret) + return ret; } return 0; ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 6/8] drm/i915/skl: Add GEN8_L3SQCREG4 to HW whitelist
On 13/01/2016 10:06, Arun Siluvery wrote: Required for WaDisableLSQCROPERFforOCL:skl Signed-off-by: Arun Siluvery Reviewed-by: Nick Hoath --- drivers/gpu/drm/i915/intel_ringbuffer.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 5eb4eea..b8dbd2c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1097,6 +1097,11 @@ static int skl_init_workarounds(struct intel_engine_cs *ring) GEN7_HALF_SLICE_CHICKEN1, GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + /* WaDisableLSQCROPERFforOCL:skl */ + ret = wa_ring_whitelist_reg(ring, GEN8_L3SQCREG4); + if (ret) + return ret; + return skl_tune_iz_hashing(ring); } ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 7/8] drm/i915/skl: Enable Per context Preemption granularity control
On 13/01/2016 10:06, Arun Siluvery wrote: Per context preemption granularity control is only available from SKL:E0+ Cc: Dave Gordon Signed-off-by: Arun Siluvery --- drivers/gpu/drm/i915/i915_reg.h | 3 +++ drivers/gpu/drm/i915/intel_ringbuffer.c | 10 ++ 2 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index eabd2af..97774a3 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5995,6 +5995,9 @@ enum skl_disp_power_wells { #define SKL_DFSM_CDCLK_LIMIT_450 (2 << 23) #define SKL_DFSM_CDCLK_LIMIT_337_5(3 << 23) +#define GEN7_FF_SLICE_CS_CHICKEN1 _MMIO(0x20E0) 0x20e0? +#define GEN9_FFSC_PERCTX_PREEMPT_CTRL(1<<14) + #define FF_SLICE_CS_CHICKEN2 _MMIO(0x20e4) #define GEN9_TSG_BARRIER_ACK_DISABLE (1<<8) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b8dbd2c..5a2ad10 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1045,6 +1045,16 @@ static int skl_init_workarounds(struct intel_engine_cs *ring) if (ret) return ret; + /* +* Actual WA is to disable percontext preemption granularity control +* until D0 which is the default case so this is equivalent to +* !WaDisablePerCtxtPreemptionGranularityControl:skl +*/ + if (IS_SKL_REVID(dev, SKL_REVID_E0, REVID_FOREVER)) { + I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, + _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); + } + if (IS_SKL_REVID(dev, 0, SKL_REVID_D0)) { /* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */ I915_WRITE(FF_SLICE_CS_CHICKEN2, ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v2 7/8] drm/i915/skl: Enable Per context Preemption granularity control
On 21/01/2016 14:00, Arun Siluvery wrote: Per context preemption granularity control is only available from SKL:E0+ Cc: Dave Gordon Signed-off-by: Arun Siluvery Reviewed-by: Nick Hoath --- drivers/gpu/drm/i915/i915_reg.h | 3 +++ drivers/gpu/drm/i915/intel_ringbuffer.c | 10 ++ 2 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c51e7e9..65e32a3 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5995,6 +5995,9 @@ enum skl_disp_power_wells { #define SKL_DFSM_CDCLK_LIMIT_450 (2 << 23) #define SKL_DFSM_CDCLK_LIMIT_337_5(3 << 23) +#define GEN7_FF_SLICE_CS_CHICKEN1 _MMIO(0x20e0) +#define GEN9_FFSC_PERCTX_PREEMPT_CTRL(1<<14) + #define FF_SLICE_CS_CHICKEN2 _MMIO(0x20e4) #define GEN9_TSG_BARRIER_ACK_DISABLE (1<<8) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ce64519..e91fb70 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1044,6 +1044,16 @@ static int skl_init_workarounds(struct intel_engine_cs *ring) if (ret) return ret; + /* +* Actual WA is to disable percontext preemption granularity control +* until D0 which is the default case so this is equivalent to +* !WaDisablePerCtxtPreemptionGranularityControl:skl +*/ + if (IS_SKL_REVID(dev, SKL_REVID_E0, REVID_FOREVER)) { + I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, + _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); + } + if (IS_SKL_REVID(dev, 0, SKL_REVID_D0)) { /* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */ I915_WRITE(FF_SLICE_CS_CHICKEN2, ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v2 8/8] drm/i915/gen9: Add WaOCLCoherentLineFlush
On 21/01/2016 14:00, Arun Siluvery wrote: This is mainly required for preemption. Cc: Dave Gordon Signed-off-by: Arun Siluvery Reviewed-by: Nick Hoath --- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e91fb70..f26f274 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -979,6 +979,10 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) /* WaDisableSTUnitPowerOptimization:skl,bxt */ WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); + /* WaOCLCoherentLineFlush:skl,bxt */ + I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | + GEN8_LQSC_FLUSH_COHERENT_LINES)); + /* WaEnablePreemptionGranularityControlByUMD:skl,bxt */ ret= wa_ring_whitelist_reg(ring, GEN8_CS_CHICKEN1); if (ret) ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v2 4/8] drm/i915/bxt: Add GEN9_CS_DEBUG_MODE1 to HW whitelist
On 21/01/2016 14:00, Arun Siluvery wrote: Required for, WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt WaDisableObjectLevelPreemptionForInstancedDraw:bxt WaDisableObjectLevelPreemtionForInstanceId:bxt According to WA database these are only applicable for BXT:A0 but since A0 and A1 shares the same GT these are extended for A1 as well. These are also required for SKL until B0 but not adding them because they are pre-production steppings. v2: use lower case in register defines (Nick) Signed-off-by: Arun Siluvery Reviewed-by: Nick Hoath --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 9 + 2 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ed887cf..c51e7e9 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5998,6 +5998,7 @@ enum skl_disp_power_wells { #define FF_SLICE_CS_CHICKEN2 _MMIO(0x20e4) #define GEN9_TSG_BARRIER_ACK_DISABLE (1<<8) +#define GEN9_CS_DEBUG_MODE1_MMIO(0x20ec) #define GEN8_CS_CHICKEN1 _MMIO(0x2580) /* GEN7 chicken */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index fea632f..72e89b6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1131,6 +1131,15 @@ static int bxt_init_workarounds(struct intel_engine_cs *ring) GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); } + /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */ + /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */ + /* WaDisableObjectLevelPreemtionForInstanceId:bxt */ + if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) { + ret = wa_ring_whitelist_reg(ring, GEN9_CS_DEBUG_MODE1); + if (ret) + return ret; + } + return 0; } ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v12] drm/i915: Extend LRC pinning to cover GPU context writeback
Use the first retired request on a new context to unpin the old context. This ensures that the hw context remains bound until it has been written back to by the GPU. Now that the context is pinned until later in the request/context lifecycle, it no longer needs to be pinned from context_queue to retire_requests. This fixes an issue with GuC submission where the GPU might not have finished writing back the context before it is unpinned. This results in a GPU hang. v2: Moved the new pin to cover GuC submission (Alex Dai) Moved the new unpin to request_retire to fix coverage leak v3: Added switch to default context if freeing a still pinned context just in case the hw was actually still using it v4: Unwrapped context unpin to allow calling without a request v5: Only create a switch to idle context if the ring doesn't already have a request pending on it (Alex Dai) Rename unsaved to dirty to avoid double negatives (Dave Gordon) Changed _no_req postfix to __ prefix for consistency (Dave Gordon) Split out per engine cleanup from context_free as it was getting unwieldy Corrected locking (Dave Gordon) v6: Removed some bikeshedding (Mika Kuoppala) Added explanation of the GuC hang that this fixes (Daniel Vetter) v7: Removed extra per request pinning from ring reset code (Alex Dai) Added forced ring unpin/clean in error case in context free (Alex Dai) v8: Renamed lrc specific last_context to lrc_last_context as there were some reset cases where the codepaths leaked (Mika Kuoppala) NULL'd last_context in reset case - there was a pointer leak if someone did reset->close context. v9: Rebase over "Fix context/engine cleanup order" v10: Rebase over nightly, remove WARN_ON which caused the dependency on dev. v11: Kick BAT rerun v12: Rebase Signed-off-by: Nick Hoath Issue: VIZ-4277 --- drivers/gpu/drm/i915/intel_lrc.c | 37 +++-- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index dbf3729..b469817 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -779,10 +779,10 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) if (intel_ring_stopped(request->ring)) return 0; - if (request->ctx != ring->default_context) { - if (!request->ctx->engine[ring->id].dirty) { + if (request->ctx != request->ctx->i915->kernel_context) { + if (!request->ctx->engine[request->ring->id].dirty) { intel_lr_context_pin(request); - request->ctx->engine[ring->id].dirty = true; + request->ctx->engine[request->ring->id].dirty = true; } } @@ -2447,9 +2447,7 @@ intel_lr_context_clean_ring(struct intel_context *ctx, struct drm_i915_gem_object *ctx_obj, struct intel_ringbuffer *ringbuf) { - int ret; - - if (ctx == ring->default_context) { + if (ctx == ctx->i915->kernel_context) { intel_unpin_ringbuffer_obj(ringbuf); i915_gem_object_ggtt_unpin(ctx_obj); } @@ -2463,13 +2461,10 @@ intel_lr_context_clean_ring(struct intel_context *ctx, * otherwise create a switch to idle request */ if (list_empty(&ring->request_list)) { - int ret; - - ret = i915_gem_request_alloc( + req = i915_gem_request_alloc( ring, - ring->default_context, - &req); - if (!ret) + NULL); + if (!IS_ERR(req)) i915_add_request(req); else DRM_DEBUG("Failed to ensure context saved"); @@ -2479,6 +2474,8 @@ intel_lr_context_clean_ring(struct intel_context *ctx, typeof(*req), list); } if (req) { + int ret; + ret = i915_wait_request(req); if (ret != 0) { /** @@ -2515,17 +2512,13 @@ void intel_lr_context_free(struct intel_context *ctx) struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf; struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state; - if (!ctx_obj) - continue; - - if (ctx == ctx->i915->kernel_context) { - intel_unpin_ringbuffer_obj(ringbuf); - i915
Re: [Intel-gfx] [PATCH v12] drm/i915: Extend LRC pinning to cover GPU context writeback
On 25/01/2016 18:19, Daniel Vetter wrote: On Fri, Jan 22, 2016 at 02:25:27PM +, Nick Hoath wrote: Use the first retired request on a new context to unpin the old context. This ensures that the hw context remains bound until it has been written back to by the GPU. Now that the context is pinned until later in the request/context lifecycle, it no longer needs to be pinned from context_queue to retire_requests. This fixes an issue with GuC submission where the GPU might not have finished writing back the context before it is unpinned. This results in a GPU hang. v2: Moved the new pin to cover GuC submission (Alex Dai) Moved the new unpin to request_retire to fix coverage leak v3: Added switch to default context if freeing a still pinned context just in case the hw was actually still using it v4: Unwrapped context unpin to allow calling without a request v5: Only create a switch to idle context if the ring doesn't already have a request pending on it (Alex Dai) Rename unsaved to dirty to avoid double negatives (Dave Gordon) Changed _no_req postfix to __ prefix for consistency (Dave Gordon) Split out per engine cleanup from context_free as it was getting unwieldy Corrected locking (Dave Gordon) v6: Removed some bikeshedding (Mika Kuoppala) Added explanation of the GuC hang that this fixes (Daniel Vetter) v7: Removed extra per request pinning from ring reset code (Alex Dai) Added forced ring unpin/clean in error case in context free (Alex Dai) v8: Renamed lrc specific last_context to lrc_last_context as there were some reset cases where the codepaths leaked (Mika Kuoppala) NULL'd last_context in reset case - there was a pointer leak if someone did reset->close context. v9: Rebase over "Fix context/engine cleanup order" v10: Rebase over nightly, remove WARN_ON which caused the dependency on dev. v11: Kick BAT rerun v12: Rebase Signed-off-by: Nick Hoath Issue: VIZ-4277 When resending patches, please include everyone who ever commented on this in Cc: lines here. It's for the record and helps in assigning blame when things inevitably blow up again ;-) Even when it's just a resend to cause a BAT run for coverage? -Daniel --- drivers/gpu/drm/i915/intel_lrc.c | 37 +++-- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index dbf3729..b469817 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -779,10 +779,10 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) if (intel_ring_stopped(request->ring)) return 0; - if (request->ctx != ring->default_context) { - if (!request->ctx->engine[ring->id].dirty) { + if (request->ctx != request->ctx->i915->kernel_context) { + if (!request->ctx->engine[request->ring->id].dirty) { intel_lr_context_pin(request); - request->ctx->engine[ring->id].dirty = true; + request->ctx->engine[request->ring->id].dirty = true; } } @@ -2447,9 +2447,7 @@ intel_lr_context_clean_ring(struct intel_context *ctx, struct drm_i915_gem_object *ctx_obj, struct intel_ringbuffer *ringbuf) { - int ret; - - if (ctx == ring->default_context) { + if (ctx == ctx->i915->kernel_context) { intel_unpin_ringbuffer_obj(ringbuf); i915_gem_object_ggtt_unpin(ctx_obj); } @@ -2463,13 +2461,10 @@ intel_lr_context_clean_ring(struct intel_context *ctx, * otherwise create a switch to idle request */ if (list_empty(&ring->request_list)) { - int ret; - - ret = i915_gem_request_alloc( + req = i915_gem_request_alloc( ring, - ring->default_context, - &req); - if (!ret) + NULL); + if (!IS_ERR(req)) i915_add_request(req); else DRM_DEBUG("Failed to ensure context saved"); @@ -2479,6 +2474,8 @@ intel_lr_context_clean_ring(struct intel_context *ctx, typeof(*req), list); } if (req) { + int ret; + ret = i915_wait_request(req); if (ret != 0) { /** @@ -2515,17 +2512,13 @@ void intel_lr_context_free(struct intel_co
[Intel-gfx] [RFC] drm/i195: Add flag to enable virtual mappings above 4Gb
Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset hardware workarounds require that GeneralStateOffset & InstructionBaseOffset are restricted to a 32 bit address space. This is a preparatory patch prior to supporting 64bit virtual memory allocations. Allow the user space to flag that a mapping can occur beyond the 32bit limit. This allows backward compatibility and user space drivers that haven't been enhanced to support these workarounds to function. Signed-off-by: Nick Hoath --- drivers/gpu/drm/i915/i915_drv.h | 6 ++ drivers/gpu/drm/i915/i915_gem.c | 18 +++--- include/uapi/drm/i915_drm.h | 7 ++- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3cc0196..1e6fc1d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2066,6 +2066,12 @@ struct drm_i915_gem_object { unsigned int has_dma_mapping:1; unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS; + + /** +* If the object should be mapped in to the bottom 4Gb +* memory space only, then this flag should not be set +*/ + unsigned int hi_mem:1; struct sg_table *pages; int pages_pin_count; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 61134ab..efa782c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -395,7 +395,9 @@ static int i915_gem_create(struct drm_file *file, struct drm_device *dev, uint64_t size, - uint32_t *handle_p) + uint32_t *handle_p, + uint32_t flags + ) { struct drm_i915_gem_object *obj; int ret; @@ -410,6 +412,9 @@ i915_gem_create(struct drm_file *file, if (obj == NULL) return -ENOMEM; + if (flags & I915_CREATE_FLAG_HI_MEM) + obj->hi_mem = 1; + ret = drm_gem_handle_create(file, &obj->base, &handle); /* drop reference from allocate - handle holds it now */ drm_gem_object_unreference_unlocked(&obj->base); @@ -429,7 +434,8 @@ i915_gem_dumb_create(struct drm_file *file, args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); args->size = args->pitch * args->height; return i915_gem_create(file, dev, - args->size, &args->handle); + args->size, &args->handle, + I915_CREATE_FLAG_HI_MEM); } /** @@ -440,9 +446,10 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_create *args = data; return i915_gem_create(file, dev, - args->size, &args->handle); + args->size, &args->handle, + args->flags); } static inline int diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 6eed16b..eb2e7d9 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -428,6 +428,8 @@ struct drm_i915_gem_init { __u64 gtt_end; }; +#define I915_CREATE_FLAG_HI_MEM0x0001 + struct drm_i915_gem_create { /** * Requested size for the object. @@ -441,7 +443,10 @@ struct drm_i915_gem_create { * Object handles are nonzero. */ __u32 handle; - __u32 pad; + /** +* Object creation flags +*/ + __u32 flags; }; struct drm_i915_gem_pread { -- 2.1.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 12/49] drm/i915/bxt: HardWare WorkAround ring initialisation for Broxton
On 17/03/2015 09:39, Imre Deak wrote: From: Nick Hoath Adds framework for Broxton HW WAs Signed-off-by: Nick Hoath Signed-off-by: Imre Deak Reviewed-by: Nick Hoath --- drivers/gpu/drm/i915/intel_ringbuffer.c | 12 ++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 441e250..abe062a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1027,6 +1027,13 @@ static int skl_init_workarounds(struct intel_engine_cs *ring) return skl_tune_iz_hashing(ring); } +static int bxt_init_workarounds(struct intel_engine_cs *ring) +{ + gen9_init_workarounds(ring); + + return 0; +} + int init_workarounds_ring(struct intel_engine_cs *ring) { struct drm_device *dev = ring->dev; @@ -1044,8 +1051,9 @@ int init_workarounds_ring(struct intel_engine_cs *ring) if (IS_SKYLAKE(dev)) return skl_init_workarounds(ring); - else if (IS_GEN9(dev)) - return gen9_init_workarounds(ring); + + if (IS_BROXTON(dev)) + return bxt_init_workarounds(ring); return 0; } ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 13/49] drm/i915/bxt: add bxt_init_clock_gating
On 17/03/2015 09:39, Imre Deak wrote: Signed-off-by: Imre Deak --- drivers/gpu/drm/i915/intel_pm.c | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index b89ab4d..3d4a7c3 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -94,6 +94,11 @@ static void skl_init_clock_gating(struct drm_device *dev) GEN8_LQSC_RO_PERF_DIS); } +static void bxt_init_clock_gating(struct drm_device *dev) +{ + gen9_init_clock_gating(dev); +} + static void i915_pineview_get_mem_freq(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -6503,7 +6508,12 @@ void intel_init_pm(struct drm_device *dev) if (INTEL_INFO(dev)->gen >= 9) { skl_setup_wm_latency(dev); - dev_priv->display.init_clock_gating = skl_init_clock_gating; + if (IS_BROXTON(dev)) + dev_priv->display.init_clock_gating = + bxt_init_clock_gating; + else + dev_priv->display.init_clock_gating = + skl_init_clock_gating; This doesn't match the style in: "HardWare WorkAround ring initialisation for Broxton", where we explicitly check the IS_BROXTON and IS_SKYLAKE state. dev_priv->display.update_wm = skl_update_wm; dev_priv->display.update_sprite_wm = skl_update_sprite_wm; } else if (HAS_PCH_SPLIT(dev)) { ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915/bxt: Add Broxton steppings
Signed-off-by: Nick Hoath --- drivers/gpu/drm/i915/i915_drv.h | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index eb38cd1..eec271a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2328,6 +2328,10 @@ struct drm_i915_cmd_table { #define SKL_REVID_D0 (0x3) #define SKL_REVID_E0 (0x4) +#define BXT_REVID_A0 (0x0) +#define BXT_REVID_B0 (0x3) +#define BXT_REVID_C0 (0x6) + /* * The genX designation typically refers to the render engine, so render * capability related checks should use IS_GEN, while display and other checks -- 2.1.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 16/49] drm/i915/bxt: add WaDisableMaskBasedCammingInRCC workaround
On 17/03/2015 09:39, Imre Deak wrote: From: Ben Widawsky Signed-off-by: Ben Widawsky Signed-off-by: Imre Deak --- drivers/gpu/drm/i915/i915_reg.h | 4 drivers/gpu/drm/i915/intel_ringbuffer.c | 9 + 2 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index b7ba061..1d074e8 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5346,6 +5346,10 @@ enum skl_disp_power_wells { #define HDC_FORCE_NON_COHERENT (1<<4) #define HDC_BARRIER_PERFORMANCE_DISABLE (1<<10) +/* GEN9 chicken */ +#define SLICE_ECO_CHICKEN0 0x7308 +#define PIXEL_MASK_CAMMING_DISABLE (1 << 14) + /* WaCatErrorRejectionIssue */ #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG0x9030 #define GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB (1<<11) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index abe062a..e23cbdc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -966,6 +966,15 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, GEN9_CCS_TLB_PREFETCH_ENABLE); + /* +* FIXME: don't apply the following on BXT for stepping C. On BXT A0 +* the flag reads back as 0. +*/ I've just posted a patch with the stepping macros. You can use these in the same way as for Skylake. + /* WaDisableMaskBasedCammingInRCC:bxtA */ + if (IS_BROXTON(dev)) + WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0, + PIXEL_MASK_CAMMING_DISABLE); + return 0; } ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 17/49] drm/i915/skl: add WaDisableMaskBasedCammingInRCC workaround
On 17/03/2015 09:39, Imre Deak wrote: From: Ben Widawsky Signed-off-by: Ben Widawsky Signed-off-by: Imre Deak --- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e23cbdc..000f608 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -970,8 +970,8 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) * FIXME: don't apply the following on BXT for stepping C. On BXT A0 * the flag reads back as 0. */ - /* WaDisableMaskBasedCammingInRCC:bxtA */ - if (IS_BROXTON(dev)) + /* WaDisableMaskBasedCammingInRCC:sklC,bxtA */ + if (INTEL_REVID(dev) == SKL_REVID_C0 || IS_BROXTON(dev)) This looks wrong. (IS_BROXTON && BXT_REVID_C0) || (IS_SKYLAKE && SKL_REVID_C0) please. WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0, PIXEL_MASK_CAMMING_DISABLE); ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 14/49] drm/i915/bxt: add GEN8_SDEUNIT_CLOCK_GATE_DISABLE workaround
On 17/03/2015 13:06, Imre Deak wrote: On ti, 2015-03-17 at 11:35 +0100, Daniel Vetter wrote: On Tue, Mar 17, 2015 at 11:39:40AM +0200, Imre Deak wrote: Signed-off-by: Imre Deak --- drivers/gpu/drm/i915/intel_pm.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 3d4a7c3..d5dd0b3 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -96,7 +96,18 @@ static void skl_init_clock_gating(struct drm_device *dev) static void bxt_init_clock_gating(struct drm_device *dev) { + struct drm_i915_private *dev_priv = dev->dev_private; + gen9_init_clock_gating(dev); + + /* +* FIXME: +* GEN8_SDEUNIT_CLOCK_GATE_DISABLE applies on A0 only. We have pci revid macros now. Do you have plans to roll similar ones out for bxt? Yes. It may be that for BXT we also need to look at the PCI_REVISION_ID field besides PCI_CLASS_REVISION, I still have to figure out the exact mapping. (And also understand the meaning/difference between SOC vs. GT revision IDs). I've posted a patch with the Broxton revision ID's from the specs. --Imre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i195/bxt: Add A1 stepping for Broxton
This stepping isn't listed separately in the specs, so needs confirmation. Signed-off-by: Nick Hoath --- drivers/gpu/drm/i915/i915_drv.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index eec271a..68fb41a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2329,6 +2329,7 @@ struct drm_i915_cmd_table { #define SKL_REVID_E0 (0x4) #define BXT_REVID_A0 (0x0) +#define BXT_REVID_A1 (0x1) #define BXT_REVID_B0 (0x3) #define BXT_REVID_C0 (0x6) -- 2.1.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 16/49] drm/i915/bxt: add WaDisableMaskBasedCammingInRCC workaround
On 20/03/2015 10:25, Deak, Imre wrote: On Fri, 2015-03-20 at 09:05 +, Nick Hoath wrote: On 17/03/2015 09:39, Imre Deak wrote: From: Ben Widawsky Signed-off-by: Ben Widawsky Signed-off-by: Imre Deak --- drivers/gpu/drm/i915/i915_reg.h | 4 drivers/gpu/drm/i915/intel_ringbuffer.c | 9 + 2 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index b7ba061..1d074e8 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5346,6 +5346,10 @@ enum skl_disp_power_wells { #define HDC_FORCE_NON_COHERENT (1<<4) #define HDC_BARRIER_PERFORMANCE_DISABLE (1<<10) +/* GEN9 chicken */ +#define SLICE_ECO_CHICKEN0 0x7308 +#define PIXEL_MASK_CAMMING_DISABLE (1 << 14) + /* WaCatErrorRejectionIssue */ #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG 0x9030 #define GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB(1<<11) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index abe062a..e23cbdc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -966,6 +966,15 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, GEN9_CCS_TLB_PREFETCH_ENABLE); + /* +* FIXME: don't apply the following on BXT for stepping C. On BXT A0 +* the flag reads back as 0. +*/ I've just posted a patch with the stepping macros. You can use these in the same way as for Skylake. I'm not so happy to make these changes at this point. Without them we still have a correct - even if conservative - behavior on other steppings. There are quite a few places marked with FIXME that need improvement in a similar way and I'd leave them as-is for now to keep as close as possible to the good known working state (as of the power-on) and to make merging of this initial patchset fast. In that case: Reviewed-by: Nick Hoath + /* WaDisableMaskBasedCammingInRCC:bxtA */ + if (IS_BROXTON(dev)) + WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0, + PIXEL_MASK_CAMMING_DISABLE); + return 0; } ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 14/49] drm/i915/bxt: add GEN8_SDEUNIT_CLOCK_GATE_DISABLE workaround
On 20/03/2015 10:37, Deak, Imre wrote: On Fri, 2015-03-20 at 09:08 +, Nick Hoath wrote: On 17/03/2015 13:06, Imre Deak wrote: On ti, 2015-03-17 at 11:35 +0100, Daniel Vetter wrote: On Tue, Mar 17, 2015 at 11:39:40AM +0200, Imre Deak wrote: Signed-off-by: Imre Deak --- drivers/gpu/drm/i915/intel_pm.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 3d4a7c3..d5dd0b3 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -96,7 +96,18 @@ static void skl_init_clock_gating(struct drm_device *dev) static void bxt_init_clock_gating(struct drm_device *dev) { + struct drm_i915_private *dev_priv = dev->dev_private; + gen9_init_clock_gating(dev); + + /* +* FIXME: +* GEN8_SDEUNIT_CLOCK_GATE_DISABLE applies on A0 only. We have pci revid macros now. Do you have plans to roll similar ones out for bxt? Yes. It may be that for BXT we also need to look at the PCI_REVISION_ID field besides PCI_CLASS_REVISION, I still have to figure out the exact mapping. (And also understand the meaning/difference between SOC vs. GT revision IDs). Ok, the above is red herring. PCI_REVISION_ID is just the 8 low bits of PCI_CLASS_REVISION, so we can reuse INTEL_REVID as-is. I've posted a patch with the Broxton revision ID's from the specs. It looks ok, but I prefer adding them as a follow-up to this patchset. Reviewed-by: Nick Hoath ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915/bxt: Enable existing gen9 harware workarounds for Broxton
Signed-off-by: Nick Hoath --- drivers/gpu/drm/i915/i915_reg.h | 8 + drivers/gpu/drm/i915/intel_pm.c | 2 ++ drivers/gpu/drm/i915/intel_ringbuffer.c | 53 +++-- 3 files changed, 41 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 7e1a0fd9..91eef06 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5339,6 +5339,7 @@ enum skl_disp_power_wells { #define GEN8_L3SQCREG4 0xb118 #define GEN8_LQSC_RO_PERF_DIS (1<<27) +#define GEN8_PIPELINE_FLUSH_COHERENT_LINES(1<<21) /* GEN8 chicken */ #define HDC_CHICKEN0 0x7300 @@ -7324,4 +7325,11 @@ enum skl_disp_power_wells { #define _PALETTE_A (dev_priv->info.display_mmio_offset + 0xa000) #define _PALETTE_B (dev_priv->info.display_mmio_offset + 0xa800) +/* + * Chicken Registers for LLC/eLLC Hot Spotting Avoidance Mode for + * 3D/Media Compressed Resources + */ +#define GEN9_CHICKEN_MISC1_REG 0x42080 +#define GEN9_CHICKEN_MISC1_NEW_HASH_ENABLE (1<<15) + #endif /* _I915_REG_H_ */ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c3c473d..bbb5d64 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -96,6 +96,8 @@ static void skl_init_clock_gating(struct drm_device *dev) static void bxt_init_clock_gating(struct drm_device *dev) { + struct drm_i915_private *dev_priv = dev->dev_private; + gen9_init_clock_gating(dev); /* WaVSRefCountFullforceMissDisable:bxt */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 44c7b99..741bdfa 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -916,7 +916,7 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; - /* WaDisablePartialInstShootdown:skl */ + /* WaDisablePartialInstShootdown:skl,bxt */ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); @@ -924,45 +924,43 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); - if (INTEL_REVID(dev) == SKL_REVID_A0 || - INTEL_REVID(dev) == SKL_REVID_B0) { - /* WaDisableDgMirrorFixInHalfSliceChicken5:skl */ + if ( + (IS_SKYLAKE(dev) && (INTEL_REVID(dev) == SKL_REVID_A0 || + INTEL_REVID(dev) == SKL_REVID_B0)) || + (IS_BROXTON(dev) && INTEL_REVID(dev) == BXT_REVID_A0) + ) { + /* WaDisableDgMirrorFixInHalfSliceChicken5:skl,bxt */ WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, GEN9_DG_MIRROR_FIX_ENABLE); } - if (IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) { - /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl */ + if ( + (IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) || + (IS_BROXTON(dev) && INTEL_REVID(dev) == BXT_REVID_A0) + ) { + /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */ WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1, GEN9_RHWO_OPTIMIZATION_DISABLE); WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN0, DISABLE_PIXEL_MASK_CAMMING); } - if (INTEL_REVID(dev) >= SKL_REVID_C0) { - /* WaEnableYV12BugFixInHalfSliceChicken7:skl */ + if ( + (IS_SKYLAKE(dev) && INTEL_REVID(dev) >= SKL_REVID_C0) || + (IS_BROXTON(dev)) + ) { + /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */ WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, GEN9_ENABLE_YV12_BUGFIX); } - if (INTEL_REVID(dev) <= SKL_REVID_D0) { - /* -*Use Force Non-Coherent whenever executing a 3D context. This -* is a workaround for a possible hang in the unlikely event -* a TLB invalidation occurs during a PSD flush. -*/ - /* WaForceEnableNonCoherent:skl */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FORCE_NON_COHERENT); - } - - /* Wa4x4STCOptimizationDisable:skl */ + /* Wa4x4STCOptimizationDisable:skl,bxt */ WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); - /* WaDisablePartialResolveInVc
Re: [Intel-gfx] [PATCH v2 13/49] drm/i915/bxt: add bxt_init_clock_gating
On 27/03/2015 12:00, Deak, Imre wrote: v2: - Make the condition to select between SKL and BXT consistent with the corresponding condition in init_workarounds_ring (Nick) Reviewed-by: Nick Hoath Signed-off-by: Imre Deak --- drivers/gpu/drm/i915/intel_pm.c | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c52f8b7..8a8d52a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -94,6 +94,11 @@ static void skl_init_clock_gating(struct drm_device *dev) GEN8_LQSC_RO_PERF_DIS); } +static void bxt_init_clock_gating(struct drm_device *dev) +{ + gen9_init_clock_gating(dev); +} + static void i915_pineview_get_mem_freq(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -6548,7 +6553,12 @@ void intel_init_pm(struct drm_device *dev) if (INTEL_INFO(dev)->gen >= 9) { skl_setup_wm_latency(dev); - dev_priv->display.init_clock_gating = skl_init_clock_gating; + if (IS_BROXTON(dev)) + dev_priv->display.init_clock_gating = + bxt_init_clock_gating; + else if (IS_SKYLAKE(dev)) + dev_priv->display.init_clock_gating = + skl_init_clock_gating; dev_priv->display.update_wm = skl_update_wm; dev_priv->display.update_sprite_wm = skl_update_sprite_wm; } else if (HAS_PCH_SPLIT(dev)) { ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 11/49] drm/i915/gen9: fix PIPE_CONTROL flush for VS_INVALIDATE
On 17/03/2015 09:39, Imre Deak wrote: On GEN9+ per specification a NULL PIPE_CONTROL needs to be emitted before any PIPE_CONTROL command with the VS_INVALIDATE flag set. Signed-off-by: Imre Deak Reviewed-by: Nick Hoath --- drivers/gpu/drm/i915/intel_lrc.c | 19 ++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index fcb074b..71aeeb3 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1262,6 +1262,7 @@ static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf, { struct intel_engine_cs *ring = ringbuf->ring; u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; + bool vf_flush_wa; u32 flags = 0; int ret; @@ -1283,10 +1284,26 @@ static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf, flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; } - ret = intel_logical_ring_begin(ringbuf, ctx, 6); + /* +* On GEN9+ Before VF_CACHE_INVALIDATE we need to emit a NULL pipe +* control. +*/ + vf_flush_wa = INTEL_INFO(ring->dev)->gen >= 9 && + flags & PIPE_CONTROL_VF_CACHE_INVALIDATE; + + ret = intel_logical_ring_begin(ringbuf, ctx, vf_flush_wa ? 12 : 6); if (ret) return ret; + if (vf_flush_wa) { + intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); + intel_logical_ring_emit(ringbuf, 0); + intel_logical_ring_emit(ringbuf, 0); + intel_logical_ring_emit(ringbuf, 0); + intel_logical_ring_emit(ringbuf, 0); + intel_logical_ring_emit(ringbuf, 0); + } + intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); intel_logical_ring_emit(ringbuf, flags); intel_logical_ring_emit(ringbuf, scratch_addr); ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 15/49] drm/i915/bxt: add GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ workaround
On 17/03/2015 09:39, Imre Deak wrote: From: Ben Widawsky Signed-off-by: Ben Widawsky Signed-off-by: Imre Deak --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3369a11..b7ba061 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6104,6 +6104,7 @@ enum skl_disp_power_wells { #define GEN8_UCGCTL6 0x9430 #define GEN8_GAPSUNIT_CLOCK_GATE_DISABLE(1<<24) #define GEN8_SDEUNIT_CLOCK_GATE_DISABLE (1<<14) +#define GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ (1<<28) #define GEN6_GFXPAUSE 0xA000 #define GEN6_RPNSWREQ 0xA008 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index d5dd0b3..52d3c02 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -103,10 +103,12 @@ static void bxt_init_clock_gating(struct drm_device *dev) /* * FIXME: * GEN8_SDEUNIT_CLOCK_GATE_DISABLE applies on A0 only. +* GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only. */ /* WaDisableSDEUnitClockGating:bxt */ I can't find where WaDisableSDEUnitClockGating is listed as required for BXT? I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | - GEN8_SDEUNIT_CLOCK_GATE_DISABLE); + GEN8_SDEUNIT_CLOCK_GATE_DISABLE | + GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ); } ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 18/49] drm/i915/bxt: add workaround to avoid PTE corruption
On 17/03/2015 09:39, Imre Deak wrote: From: Robert Beckett Set TLBPF in TILECTL. This fixes an issue with BXT HW seeing corrupted pte entries. v2: - move the workaround to bxt_init_clock_gating (imre) Signed-off-by: Robert Beckett (v1) Signed-off-by: Imre Deak Reviewed-by: Nick Hoath --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 1d074e8..d69d7b9 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1151,6 +1151,7 @@ enum skl_disp_power_wells { /* control register for cpu gtt access */ #define TILECTL 0x101000 #define TILECTL_SWZCTL (1 << 0) +#define TILECTL_TLBPF(1 << 1) #define TILECTL_TLB_PREFETCH_DIS(1 << 2) #define TILECTL_BACKSNOOP_DIS (1 << 3) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 52d3c02..d3f2557 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -110,6 +110,8 @@ static void bxt_init_clock_gating(struct drm_device *dev) GEN8_SDEUNIT_CLOCK_GATE_DISABLE | GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ); + /* FIXME: apply on A0 only */ + I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF); } static void i915_pineview_get_mem_freq(struct drm_device *dev) ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 15/49] drm/i915/bxt: add GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ workaround
On 08/04/2015 14:10, Deak, Imre wrote: On ke, 2015-04-08 at 14:04 +0100, Nick Hoath wrote: On 17/03/2015 09:39, Imre Deak wrote: From: Ben Widawsky Signed-off-by: Ben Widawsky Signed-off-by: Imre Deak --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3369a11..b7ba061 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6104,6 +6104,7 @@ enum skl_disp_power_wells { #define GEN8_UCGCTL6 0x9430 #define GEN8_GAPSUNIT_CLOCK_GATE_DISABLE (1<<24) #define GEN8_SDEUNIT_CLOCK_GATE_DISABLE(1<<14) +#define GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ (1<<28) #define GEN6_GFXPAUSE0xA000 #define GEN6_RPNSWREQ0xA008 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index d5dd0b3..52d3c02 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -103,10 +103,12 @@ static void bxt_init_clock_gating(struct drm_device *dev) /* * FIXME: * GEN8_SDEUNIT_CLOCK_GATE_DISABLE applies on A0 only. +* GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only. Shouldn't this WA therefore have a check for 3x6 around it? */ /* WaDisableSDEUnitClockGating:bxt */ I can't find where WaDisableSDEUnitClockGating is listed as required for BXT? It's specified in BSpec GEN8_UCGCTL6 (0x9430) as required for BXT A0. --Imre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 17/49] drm/i915/skl: add WaDisableMaskBasedCammingInRCC workaround
On 20/03/2015 10:33, Deak, Imre wrote: On Fri, 2015-03-20 at 09:07 +, Nick Hoath wrote: On 17/03/2015 09:39, Imre Deak wrote: From: Ben Widawsky Signed-off-by: Ben Widawsky Signed-off-by: Imre Deak Bearing in mind having to revisit all these with the stepping checks: Reviewed-by: Nick Hoath --- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e23cbdc..000f608 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -970,8 +970,8 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) * FIXME: don't apply the following on BXT for stepping C. On BXT A0 * the flag reads back as 0. */ - /* WaDisableMaskBasedCammingInRCC:bxtA */ - if (IS_BROXTON(dev)) + /* WaDisableMaskBasedCammingInRCC:sklC,bxtA */ + if (INTEL_REVID(dev) == SKL_REVID_C0 || IS_BROXTON(dev)) This looks wrong. (IS_BROXTON && BXT_REVID_C0) || (IS_SKYLAKE && SKL_REVID_C0) please. It's correct though. gen9_init_workarounds() is called for Skylake or Broxton, so the condition is true either on Broxton regardless of the stepping, or on Skylake if the revid matches. Also on Broxton we have to _exclude_ the workaround on C0, so if we add the revid check for Broxton too, then we have to rewrite the condition to: (IS_BROXTON && INTEL_REVID != BXT_REVID_C0) || (IS_SKYLAKE && INTEL_REVID == SKL_REVID_C0) WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0, PIXEL_MASK_CAMMING_DISABLE); ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 15/49] drm/i915/bxt: add GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ workaround
On 08/04/2015 14:38, Nick Hoath wrote: On 08/04/2015 14:10, Deak, Imre wrote: On ke, 2015-04-08 at 14:04 +0100, Nick Hoath wrote: On 17/03/2015 09:39, Imre Deak wrote: From: Ben Widawsky Signed-off-by: Ben Widawsky Signed-off-by: Imre Deak Reviewed-by: Nick Hoath --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3369a11..b7ba061 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6104,6 +6104,7 @@ enum skl_disp_power_wells { #define GEN8_UCGCTL60x9430 #define GEN8_GAPSUNIT_CLOCK_GATE_DISABLE(1<<24) #define GEN8_SDEUNIT_CLOCK_GATE_DISABLE(1<<14) +#define GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ (1<<28) #define GEN6_GFXPAUSE0xA000 #define GEN6_RPNSWREQ0xA008 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index d5dd0b3..52d3c02 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -103,10 +103,12 @@ static void bxt_init_clock_gating(struct drm_device *dev) /* * FIXME: * GEN8_SDEUNIT_CLOCK_GATE_DISABLE applies on A0 only. + * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only. Shouldn't this WA therefore have a check for 3x6 around it? */ /* WaDisableSDEUnitClockGating:bxt */ I can't find where WaDisableSDEUnitClockGating is listed as required for BXT? It's specified in BSpec GEN8_UCGCTL6 (0x9430) as required for BXT A0. --Imre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 2/3] Removed duplicate members from submit_request
Where there were duplicate variables for the tail, context and ring (engine) in the gem request and the execlist queue item, use the one from the request and remove the duplicate from the execlist queue item. Issue: VIZ-4274 Signed-off-by: Nick Hoath --- drivers/gpu/drm/i915/i915_debugfs.c | 4 ++-- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c| 21 + drivers/gpu/drm/i915/intel_lrc.h| 4 4 files changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 0d6af1c..45da79e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1891,11 +1891,11 @@ static int i915_execlists(struct seq_file *m, void *data) if (head_req) { struct drm_i915_gem_object *ctx_obj; - ctx_obj = head_req->ctx->engine[ring_id].state; + ctx_obj = head_req->request->ctx->engine[ring_id].state; seq_printf(m, "\tHead request id: %u\n", intel_execlists_ctx_id(ctx_obj)); seq_printf(m, "\tHead request tail: %u\n", - head_req->tail); + head_req->request->tail); } seq_putc(m, '\n'); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b8e7018..f55bfdc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2623,7 +2623,7 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, execlist_link); list_del(&submit_req->execlist_link); intel_runtime_pm_put(dev_priv); - i915_gem_context_unreference(submit_req->ctx); + i915_gem_context_unreference(submit_req->request->ctx); kfree(submit_req); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8f301ac..3daf8ea 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -396,7 +396,7 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) execlist_link) { if (!req0) { req0 = cursor; - } else if (req0->ctx == cursor->ctx) { + } else if (req0->request->ctx == cursor->request->ctx) { /* Same ctx: ignore first request, as second request * will update tail past first request's workload */ cursor->elsp_submitted = req0->elsp_submitted; @@ -411,9 +411,9 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) WARN_ON(req1 && req1->elsp_submitted); - execlists_submit_contexts(ring, req0->ctx, req0->tail, - req1 ? req1->ctx : NULL, - req1 ? req1->tail : 0); + execlists_submit_contexts(ring, req0->request->ctx, req0->request->tail, + req1 ? req1->request->ctx : NULL, + req1 ? req1->request->tail : 0); req0->elsp_submitted++; if (req1) @@ -434,7 +434,7 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring, if (head_req != NULL) { struct drm_i915_gem_object *ctx_obj = - head_req->ctx->engine[ring->id].state; + head_req->request->ctx->engine[ring->id].state; if (intel_execlists_ctx_id(ctx_obj) == request_id) { WARN(head_req->elsp_submitted == 0, "Never submitted head request\n"); @@ -514,13 +514,13 @@ static void execlists_free_request_task(struct work_struct *work) { struct intel_ctx_submit_request *req = container_of(work, struct intel_ctx_submit_request, work); - struct drm_device *dev = req->ring->dev; + struct drm_device *dev = req->request->ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; intel_runtime_pm_put(dev_priv); mutex_lock(&dev->struct_mutex); - i915_gem_context_unreference(req->ctx); + i915_gem_context_unreference(req->request->ctx); i915_gem_request_unreference(req->request); mutex_unlock(&dev->struct_mutex); @@ -540,10 +540,6 @@ static int execlists_context_queue(struct intel_engine_cs *ring, req = kzalloc(sizeof(*req), GFP_KERNEL); if (req == NULL) return -ENOMEM; -
[Intel-gfx] [RFC 1/3] execlist queue items to hold ptr/ref to gem_request
Add a reference and pointer from the execlist queue item to the associated gem request. For execlist requests that don't have a request, create one as a placeholder. This patchset requires John Harrison's "Replace seqno values with request structures" patchset. Issue: VIZ-4274 Signed-off-by: Nick Hoath --- drivers/gpu/drm/i915/intel_lrc.c | 31 +-- drivers/gpu/drm/i915/intel_lrc.h | 5 - 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 09d548d..8f301ac 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -521,6 +521,7 @@ static void execlists_free_request_task(struct work_struct *work) mutex_lock(&dev->struct_mutex); i915_gem_context_unreference(req->ctx); + i915_gem_request_unreference(req->request); mutex_unlock(&dev->struct_mutex); kfree(req); @@ -528,7 +529,8 @@ static void execlists_free_request_task(struct work_struct *work) static int execlists_context_queue(struct intel_engine_cs *ring, struct intel_context *to, - u32 tail) + u32 tail, + struct drm_i915_gem_request *request) { struct intel_ctx_submit_request *req = NULL, *cursor; struct drm_i915_private *dev_priv = ring->dev->dev_private; @@ -544,6 +546,22 @@ static int execlists_context_queue(struct intel_engine_cs *ring, req->tail = tail; INIT_WORK(&req->work, execlists_free_request_task); + if(!request) + { + /* +* If there isn't a request associated with this submission, +* create one as a temporary holder. +*/ + WARN(1, "execlist context submission without request"); + request = kzalloc(sizeof(*request), GFP_KERNEL); + if (request == NULL) + return -ENOMEM; + request->ctx = to; + request->ring = ring; + } + req->request = request; + i915_gem_request_reference(request); + intel_runtime_pm_get(dev_priv); spin_lock_irqsave(&ring->execlist_lock, flags); @@ -778,7 +796,8 @@ int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf) * on a queue waiting for the ELSP to be ready to accept a new context submission. At that * point, the tail *inside* the context is updated and the ELSP written to. */ -void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) +void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf, + struct drm_i915_gem_request *request) { struct intel_engine_cs *ring = ringbuf->ring; struct intel_context *ctx = ringbuf->FIXME_lrc_ctx; @@ -788,7 +807,7 @@ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) if (intel_ring_stopped(ring)) return; - execlists_context_queue(ring, ctx, ringbuf->tail); + execlists_context_queue(ring, ctx, ringbuf->tail, request); } static int logical_ring_alloc_request(struct intel_engine_cs *ring, @@ -876,7 +895,7 @@ static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf, return ret; /* Force the context submission in case we have been skipping it */ - intel_logical_ring_advance_and_submit(ringbuf); + intel_logical_ring_advance_and_submit(ringbuf, NULL); /* With GEM the hangcheck timer should kick us out of the loop, * leaving it early runs the risk of corrupting GEM state (due @@ -1183,7 +1202,7 @@ static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno) intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno); } -static int gen8_emit_request(struct intel_ringbuffer *ringbuf) +static int gen8_emit_request(struct intel_ringbuffer *ringbuf, struct drm_i915_gem_request *request) { struct intel_engine_cs *ring = ringbuf->ring; u32 cmd; @@ -1205,7 +1224,7 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf) i915_gem_request_get_seqno(ring->outstanding_lazy_request)); intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_advance_and_submit(ringbuf); + intel_logical_ring_advance_and_submit(ringbuf, request); return 0; } diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 33c3b4b..6f81669 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -37,7 +37,8 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring); int intel_logical_rings_in
[Intel-gfx] [RFC 3/3] drm/i915: Remove FIXME_lrc_ctx backpointer
The first pass implementation of execlists required a backpointer to the context to be held in the intel_ringbuffer. However the context pointer is available higher in the call stack. Remove the backpointer from the ring buffer structure and instead pass it down through the call stack. v2: Integrate this changeset with the removal of duplicate request/execlist queue item members. Signed-off-by: Nick Hoath Issue: VIZ-4268 --- drivers/gpu/drm/i915/i915_gem.c | 7 ++-- drivers/gpu/drm/i915/intel_lrc.c| 67 + drivers/gpu/drm/i915/intel_lrc.h| 8 +++- drivers/gpu/drm/i915/intel_ringbuffer.h | 12 +++--- 4 files changed, 56 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f55bfdc..11bd207 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2370,8 +2370,7 @@ int __i915_add_request(struct intel_engine_cs *ring, return -ENOMEM; if (i915.enable_execlists) { - struct intel_context *ctx = request->ctx; - ringbuf = ctx->engine[ring->id].ringbuf; + ringbuf = request->ctx->engine[ring->id].ringbuf; } else ringbuf = ring->buffer; @@ -2384,7 +2383,7 @@ int __i915_add_request(struct intel_engine_cs *ring, * what. */ if (i915.enable_execlists) { - ret = logical_ring_flush_all_caches(ringbuf); + ret = logical_ring_flush_all_caches(ringbuf, request->ctx); if (ret) return ret; } else { @@ -2406,7 +2405,7 @@ int __i915_add_request(struct intel_engine_cs *ring, request_ring_position = intel_ring_get_tail(ringbuf); if (i915.enable_execlists) { - ret = ring->emit_request(ringbuf, request); + ret = ring->emit_request(ringbuf, request->ctx, request); if (ret) return ret; } else { diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 3daf8ea..792186e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -555,6 +555,10 @@ static int execlists_context_queue(struct intel_engine_cs *ring, request->ctx = to; request->ring = ring; } + else + { + WARN_ON(to != request->ctx); + } req->request = request; i915_gem_request_reference(request); i915_gem_context_reference(req->request->ctx); @@ -591,7 +595,8 @@ static int execlists_context_queue(struct intel_engine_cs *ring, return 0; } -static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf) +static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf, + struct intel_context *ctx) { struct intel_engine_cs *ring = ringbuf->ring; uint32_t flush_domains; @@ -601,7 +606,8 @@ static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf) if (ring->gpu_caches_dirty) flush_domains = I915_GEM_GPU_DOMAINS; - ret = ring->emit_flush(ringbuf, I915_GEM_GPU_DOMAINS, flush_domains); + ret = ring->emit_flush(ringbuf, ctx, + I915_GEM_GPU_DOMAINS, flush_domains); if (ret) return ret; @@ -610,6 +616,7 @@ static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf) } static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf, +struct intel_context *ctx, struct list_head *vmas) { struct intel_engine_cs *ring = ringbuf->ring; @@ -637,7 +644,7 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf, /* Unconditionally invalidate gpu caches and ensure that we do flush * any residual writes from the previous batch. */ - return logical_ring_invalidate_all_caches(ringbuf); + return logical_ring_invalidate_all_caches(ringbuf, ctx); } /** @@ -717,13 +724,13 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, return -EINVAL; } - ret = execlists_move_to_gpu(ringbuf, vmas); + ret = execlists_move_to_gpu(ringbuf, ctx, vmas); if (ret) return ret; if (ring == &dev_priv->ring[RCS] && instp_mode != dev_priv->relative_constants_mode) { - ret = intel_logical_ring_begin(ringbuf, 4); + ret = intel_logical_ring_begin(ringbuf, ctx, 4); if (ret) return ret; @@ -736,7 +743,7 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, dev_priv-
[Intel-gfx] [PATCH 3/5] drm/i915: Remove FIXME_lrc_ctx backpointer
The first pass implementation of execlists required a backpointer to the context to be held in the intel_ringbuffer. However the context pointer is available higher in the call stack. Remove the backpointer from the ring buffer structure and instead pass it down through the call stack. v2: Integrate this changeset with the removal of duplicate request/execlist queue item members. Signed-off-by: Nick Hoath Issue: VIZ-4268 --- drivers/gpu/drm/i915/i915_gem.c | 7 ++-- drivers/gpu/drm/i915/intel_lrc.c| 67 + drivers/gpu/drm/i915/intel_lrc.h| 8 +++- drivers/gpu/drm/i915/intel_ringbuffer.h | 12 +++--- 4 files changed, 56 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e5f521f..bd5a1e2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2451,8 +2451,7 @@ int __i915_add_request(struct intel_engine_cs *ring, return -ENOMEM; if (i915.enable_execlists) { - struct intel_context *ctx = request->ctx; - ringbuf = ctx->engine[ring->id].ringbuf; + ringbuf = request->ctx->engine[ring->id].ringbuf; } else ringbuf = ring->buffer; @@ -2465,7 +2464,7 @@ int __i915_add_request(struct intel_engine_cs *ring, * what. */ if (i915.enable_execlists) { - ret = logical_ring_flush_all_caches(ringbuf); + ret = logical_ring_flush_all_caches(ringbuf, request->ctx); if (ret) return ret; } else { @@ -2487,7 +2486,7 @@ int __i915_add_request(struct intel_engine_cs *ring, request_ring_position = intel_ring_get_tail(ringbuf); if (i915.enable_execlists) { - ret = ring->emit_request(ringbuf, request); + ret = ring->emit_request(ringbuf, request->ctx, request); if (ret) return ret; } else { diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0e2e33b..4bd9572 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -555,6 +555,10 @@ static int execlists_context_queue(struct intel_engine_cs *ring, request->ctx = to; request->ring = ring; } + else + { + WARN_ON(to != request->ctx); + } req->request = request; i915_gem_request_reference(request); i915_gem_context_reference(req->request->ctx); @@ -591,7 +595,8 @@ static int execlists_context_queue(struct intel_engine_cs *ring, return 0; } -static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf) +static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf, + struct intel_context *ctx) { struct intel_engine_cs *ring = ringbuf->ring; uint32_t flush_domains; @@ -601,7 +606,8 @@ static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf) if (ring->gpu_caches_dirty) flush_domains = I915_GEM_GPU_DOMAINS; - ret = ring->emit_flush(ringbuf, I915_GEM_GPU_DOMAINS, flush_domains); + ret = ring->emit_flush(ringbuf, ctx, + I915_GEM_GPU_DOMAINS, flush_domains); if (ret) return ret; @@ -610,6 +616,7 @@ static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf) } static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf, +struct intel_context *ctx, struct list_head *vmas) { struct intel_engine_cs *ring = ringbuf->ring; @@ -637,7 +644,7 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf, /* Unconditionally invalidate gpu caches and ensure that we do flush * any residual writes from the previous batch. */ - return logical_ring_invalidate_all_caches(ringbuf); + return logical_ring_invalidate_all_caches(ringbuf, ctx); } /** @@ -717,13 +724,13 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, return -EINVAL; } - ret = execlists_move_to_gpu(ringbuf, vmas); + ret = execlists_move_to_gpu(ringbuf, ctx, vmas); if (ret) return ret; if (ring == &dev_priv->ring[RCS] && instp_mode != dev_priv->relative_constants_mode) { - ret = intel_logical_ring_begin(ringbuf, 4); + ret = intel_logical_ring_begin(ringbuf, ctx, 4); if (ret) return ret; @@ -736,7 +743,7 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, dev_priv-
[Intel-gfx] [PATCH 4/5] drm/i915: Subsume intel_ctx_submit_request in to drm_i915_gem_request
Move all remaining elements that were unique to execlists queue items in to the associated request. Signed-off-by: Nick Hoath Issue: VIZ-4274 --- drivers/gpu/drm/i915/i915_debugfs.c | 8 +++ drivers/gpu/drm/i915/i915_drv.h | 22 + drivers/gpu/drm/i915/i915_gem.c | 6 ++--- drivers/gpu/drm/i915/intel_lrc.c| 47 + drivers/gpu/drm/i915/intel_lrc.h| 28 -- 5 files changed, 50 insertions(+), 61 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 45da79e..9ce9a02 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1851,7 +1851,7 @@ static int i915_execlists(struct seq_file *m, void *data) intel_runtime_pm_get(dev_priv); for_each_ring(ring, dev_priv, ring_id) { - struct intel_ctx_submit_request *head_req = NULL; + struct drm_i915_gem_request *head_req = NULL; int count = 0; unsigned long flags; @@ -1884,18 +1884,18 @@ static int i915_execlists(struct seq_file *m, void *data) list_for_each(cursor, &ring->execlist_queue) count++; head_req = list_first_entry_or_null(&ring->execlist_queue, - struct intel_ctx_submit_request, execlist_link); + struct drm_i915_gem_request, execlist_link); spin_unlock_irqrestore(&ring->execlist_lock, flags); seq_printf(m, "\t%d requests in queue\n", count); if (head_req) { struct drm_i915_gem_object *ctx_obj; - ctx_obj = head_req->request->ctx->engine[ring_id].state; + ctx_obj = head_req->ctx->engine[ring_id].state; seq_printf(m, "\tHead request id: %u\n", intel_execlists_ctx_id(ctx_obj)); seq_printf(m, "\tHead request tail: %u\n", - head_req->request->tail); + head_req->tail); } seq_putc(m, '\n'); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index afa9c35..0fe238c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2027,6 +2027,28 @@ struct drm_i915_gem_request { struct list_head free_list; uint32_t uniq; + + /** +* The ELSP only accepts two elements at a time, so we queue context/tail +* pairs on a given queue (ring->execlist_queue) until the hardware is +* available. The queue serves a double purpose: we also use it to keep track +* of the up to 2 contexts currently in the hardware (usually one in execution +* and the other queued up by the GPU): We only remove elements from the head +* of the queue when the hardware informs us that an element has been +* completed. +* +* All accesses to the queue are mediated by a spinlock (ring->execlist_lock). +*/ + + /** Execlist link in the submission queue.*/ + struct list_head execlist_link; + + /** Execlists workqueue for processing this request in a bottom half */ + struct work_struct work; + + /** Execlists no. of times this request has been sent to the ELSP */ + int elsp_submitted; + }; void i915_gem_request_free(struct kref *req_ref); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index bd5a1e2..4d2d2e5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2696,14 +2696,14 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, } while (!list_empty(&ring->execlist_queue)) { - struct intel_ctx_submit_request *submit_req; + struct drm_i915_gem_request *submit_req; submit_req = list_first_entry(&ring->execlist_queue, - struct intel_ctx_submit_request, + struct drm_i915_gem_request, execlist_link); list_del(&submit_req->execlist_link); intel_runtime_pm_put(dev_priv); - i915_gem_context_unreference(submit_req->request->ctx); + i915_gem_context_unreference(submit_req->ctx); kfree(submit_req); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 4bd9572..b6ec012 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -382,8 +382,8 @@ static void execlists_submit_contexts(struct intel_engine_cs *ring, static void execlists_context_un
[Intel-gfx] [PATCH 1/5] drm/i915: execlist request keeps ptr/ref to gem_request
Add a reference and pointer from the execlist queue item to the associated gem request. For execlist requests that don't have a request, create one as a placeholder. This patchset requires John Harrison's "Replace seqno values with request structures" patchset. Issue: VIZ-4274 Signed-off-by: Nick Hoath --- drivers/gpu/drm/i915/intel_lrc.c | 31 +-- drivers/gpu/drm/i915/intel_lrc.h | 5 - 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c8b3827..593471f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -521,6 +521,7 @@ static void execlists_free_request_task(struct work_struct *work) mutex_lock(&dev->struct_mutex); i915_gem_context_unreference(req->ctx); + i915_gem_request_unreference(req->request); mutex_unlock(&dev->struct_mutex); kfree(req); @@ -528,7 +529,8 @@ static void execlists_free_request_task(struct work_struct *work) static int execlists_context_queue(struct intel_engine_cs *ring, struct intel_context *to, - u32 tail) + u32 tail, + struct drm_i915_gem_request *request) { struct intel_ctx_submit_request *req = NULL, *cursor; struct drm_i915_private *dev_priv = ring->dev->dev_private; @@ -544,6 +546,22 @@ static int execlists_context_queue(struct intel_engine_cs *ring, req->tail = tail; INIT_WORK(&req->work, execlists_free_request_task); + if(!request) + { + /* +* If there isn't a request associated with this submission, +* create one as a temporary holder. +*/ + WARN(1, "execlist context submission without request"); + request = kzalloc(sizeof(*request), GFP_KERNEL); + if (request == NULL) + return -ENOMEM; + request->ctx = to; + request->ring = ring; + } + req->request = request; + i915_gem_request_reference(request); + intel_runtime_pm_get(dev_priv); spin_lock_irqsave(&ring->execlist_lock, flags); @@ -778,7 +796,8 @@ int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf) * on a queue waiting for the ELSP to be ready to accept a new context submission. At that * point, the tail *inside* the context is updated and the ELSP written to. */ -void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) +void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf, + struct drm_i915_gem_request *request) { struct intel_engine_cs *ring = ringbuf->ring; struct intel_context *ctx = ringbuf->FIXME_lrc_ctx; @@ -788,7 +807,7 @@ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) if (intel_ring_stopped(ring)) return; - execlists_context_queue(ring, ctx, ringbuf->tail); + execlists_context_queue(ring, ctx, ringbuf->tail, request); } static int logical_ring_alloc_request(struct intel_engine_cs *ring, @@ -876,7 +895,7 @@ static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf, return ret; /* Force the context submission in case we have been skipping it */ - intel_logical_ring_advance_and_submit(ringbuf); + intel_logical_ring_advance_and_submit(ringbuf, NULL); /* With GEM the hangcheck timer should kick us out of the loop, * leaving it early runs the risk of corrupting GEM state (due @@ -1221,7 +1240,7 @@ static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno) intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno); } -static int gen8_emit_request(struct intel_ringbuffer *ringbuf) +static int gen8_emit_request(struct intel_ringbuffer *ringbuf, struct drm_i915_gem_request *request) { struct intel_engine_cs *ring = ringbuf->ring; u32 cmd; @@ -1243,7 +1262,7 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf) i915_gem_request_get_seqno(ring->outstanding_lazy_request)); intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_advance_and_submit(ringbuf); + intel_logical_ring_advance_and_submit(ringbuf, request); return 0; } diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 33c3b4b..6f81669 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -37,7 +37,8 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring); int intel_logical_rings_in
[Intel-gfx] [PATCH 5/5] drm/i915: Change workaround execlist submission to use gem requests.
Signed-off-by: Nick Hoath Issue: VIZ-4274 --- drivers/gpu/drm/i915/intel_lrc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index b6ec012..f3f1428 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1030,11 +1030,11 @@ static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring, return 0; ring->gpu_caches_dirty = true; - ret = logical_ring_flush_all_caches(ringbuf); + ret = logical_ring_flush_all_caches(ringbuf, ctx); if (ret) return ret; - ret = intel_logical_ring_begin(ringbuf, w->count * 2 + 2); + ret = intel_logical_ring_begin(ringbuf, ctx, w->count * 2 + 2); if (ret) return ret; @@ -1048,7 +1048,7 @@ static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring, intel_logical_ring_advance(ringbuf); ring->gpu_caches_dirty = true; - ret = logical_ring_flush_all_caches(ringbuf); + ret = logical_ring_flush_all_caches(ringbuf, ctx); if (ret) return ret; -- 2.1.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 0/5] drm/i915: Untangle execlist tracking
This patchset merges execlist queue items in to gem requests. It does this by using the reference count added by John Harrison's "Replace seqno values with request structures" patchset to ensure that the gem request is available for the whole execlist submission lifespan. v2: merge intel_ctx_submit_request and drm_i915_gem_request, rebase changes & add cover letter Issue: VIZ-4274 Nick Hoath (5): drm/i915: execlist request keeps ptr/ref to gem_request drm/i915: Removed duplicate members from submit_request drm/i915: Remove FIXME_lrc_ctx backpointer drm/i915: Subsume intel_ctx_submit_request in to drm_i915_gem_request drm/i915: Change workaround execlist submission to use gem requests. drivers/gpu/drm/i915/i915_debugfs.c | 4 +- drivers/gpu/drm/i915/i915_drv.h | 22 ++ drivers/gpu/drm/i915/i915_gem.c | 11 ++- drivers/gpu/drm/i915/intel_lrc.c| 126 +++- drivers/gpu/drm/i915/intel_lrc.h| 41 ++- drivers/gpu/drm/i915/intel_ringbuffer.h | 12 ++- 6 files changed, 119 insertions(+), 97 deletions(-) -- 2.1.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/5] drm/i915: Removed duplicate members from submit_request
Where there were duplicate variables for the tail, context and ring (engine) in the gem request and the execlist queue item, use the one from the request and remove the duplicate from the execlist queue item. Issue: VIZ-4274 Signed-off-by: Nick Hoath --- drivers/gpu/drm/i915/i915_debugfs.c | 4 ++-- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c| 21 + drivers/gpu/drm/i915/intel_lrc.h| 4 4 files changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 0d6af1c..45da79e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1891,11 +1891,11 @@ static int i915_execlists(struct seq_file *m, void *data) if (head_req) { struct drm_i915_gem_object *ctx_obj; - ctx_obj = head_req->ctx->engine[ring_id].state; + ctx_obj = head_req->request->ctx->engine[ring_id].state; seq_printf(m, "\tHead request id: %u\n", intel_execlists_ctx_id(ctx_obj)); seq_printf(m, "\tHead request tail: %u\n", - head_req->tail); + head_req->request->tail); } seq_putc(m, '\n'); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 860c296..e5f521f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2704,7 +2704,7 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, execlist_link); list_del(&submit_req->execlist_link); intel_runtime_pm_put(dev_priv); - i915_gem_context_unreference(submit_req->ctx); + i915_gem_context_unreference(submit_req->request->ctx); kfree(submit_req); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 593471f..0e2e33b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -396,7 +396,7 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) execlist_link) { if (!req0) { req0 = cursor; - } else if (req0->ctx == cursor->ctx) { + } else if (req0->request->ctx == cursor->request->ctx) { /* Same ctx: ignore first request, as second request * will update tail past first request's workload */ cursor->elsp_submitted = req0->elsp_submitted; @@ -411,9 +411,9 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) WARN_ON(req1 && req1->elsp_submitted); - execlists_submit_contexts(ring, req0->ctx, req0->tail, - req1 ? req1->ctx : NULL, - req1 ? req1->tail : 0); + execlists_submit_contexts(ring, req0->request->ctx, req0->request->tail, + req1 ? req1->request->ctx : NULL, + req1 ? req1->request->tail : 0); req0->elsp_submitted++; if (req1) @@ -434,7 +434,7 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring, if (head_req != NULL) { struct drm_i915_gem_object *ctx_obj = - head_req->ctx->engine[ring->id].state; + head_req->request->ctx->engine[ring->id].state; if (intel_execlists_ctx_id(ctx_obj) == request_id) { WARN(head_req->elsp_submitted == 0, "Never submitted head request\n"); @@ -514,13 +514,13 @@ static void execlists_free_request_task(struct work_struct *work) { struct intel_ctx_submit_request *req = container_of(work, struct intel_ctx_submit_request, work); - struct drm_device *dev = req->ring->dev; + struct drm_device *dev = req->request->ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; intel_runtime_pm_put(dev_priv); mutex_lock(&dev->struct_mutex); - i915_gem_context_unreference(req->ctx); + i915_gem_context_unreference(req->request->ctx); i915_gem_request_unreference(req->request); mutex_unlock(&dev->struct_mutex); @@ -540,10 +540,6 @@ static int execlists_context_queue(struct intel_engine_cs *ring, req = kzalloc(sizeof(*req), GFP_KERNEL); if (req == NULL) return -ENOMEM; -
Re: [Intel-gfx] [PATCH 4/5] drm/i915: Subsume intel_ctx_submit_request in to drm_i915_gem_request
On 12/11/2014 11:24, Chris Wilson wrote: On Wed, Nov 12, 2014 at 10:53:26AM +, Nick Hoath wrote: seq_putc(m, '\n'); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index afa9c35..0fe238c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2027,6 +2027,28 @@ struct drm_i915_gem_request { struct list_head free_list; uint32_t uniq; + + /** +* The ELSP only accepts two elements at a time, so we queue context/tail +* pairs on a given queue (ring->execlist_queue) until the hardware is +* available. The queue serves a double purpose: we also use it to keep track +* of the up to 2 contexts currently in the hardware (usually one in execution +* and the other queued up by the GPU): We only remove elements from the head +* of the queue when the hardware informs us that an element has been +* completed. +* +* All accesses to the queue are mediated by a spinlock (ring->execlist_lock). +*/ + + /** Execlist link in the submission queue.*/ + struct list_head execlist_link; This is redundant. The request should only be one of the pending or active lists at any time. This is used by the pending execlist requests list owned by the intel_engine_cs. The request isn't in both the active and pending execlist engine lists. + /** Execlists workqueue for processing this request in a bottom half */ + struct work_struct work; For what purpose? This is not needed. This worker is currently used to free up execlist requests. This goes away when Thomas Daniel's patchset is merged. I have spotted a bug in the cleanup handler with the merged requests/execlists cleanup though. + /** Execlists no. of times this request has been sent to the ELSP */ + int elsp_submitted; A request can only be submitted exactly once at any time. This bookkeeping is not part of the request. This is a refcount to preserve the request if it has been resubmitted due to preemption or TDR, due to a race condition between the HW finishing with the item and the cleanup/resubmission. Have a look at e1fee72c2ea2e9c0c6e6743d32a6832f21337d6c which contains a much better description of why this exists. Still not detangled I am afraid. -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/4] drm/i915/bxt: Enable WaDSRefCountFullforceMissDisable
From: Rafael Barbalho Signed-off-by: Rafael Barbalho Signed-off-by: Nick Hoath --- drivers/gpu/drm/i915/intel_pm.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index d635d0a..f29e575 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -60,10 +60,13 @@ static void gen9_init_clock_gating(struct drm_device *dev) I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); - /* WaVSRefCountFullforceMissDisable:skl,bxt */ + /* +* WaVSRefCountFullforceMissDisable:skl,bxt +* WaDSRefCountFullforceMissDisable:skl,bxt +*/ I915_WRITE(GEN7_FF_THREAD_MODE, I915_READ(GEN7_FF_THREAD_MODE) & - ~(GEN7_FF_VS_REF_CNT_FFME)); + ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); } static void skl_init_clock_gating(struct drm_device *dev) -- 2.1.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/4] drm/i915/bxt: Enable WaVSRefCountFullforceMissDisable
From: Rafael Barbalho Signed-off-by: Rafael Barbalho Signed-off-by: Nick Hoath --- drivers/gpu/drm/i915/intel_pm.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 32ff034..d635d0a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -59,6 +59,11 @@ static void gen9_init_clock_gating(struct drm_device *dev) /* WaEnableLbsSlaRetryTimerDecrement:skl */ I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); + + /* WaVSRefCountFullforceMissDisable:skl,bxt */ + I915_WRITE(GEN7_FF_THREAD_MODE, + I915_READ(GEN7_FF_THREAD_MODE) & + ~(GEN7_FF_VS_REF_CNT_FFME)); } static void skl_init_clock_gating(struct drm_device *dev) -- 2.1.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 3/4] drm/i915/bxt: Enable WaOCLCoherentLineFlush
Signed-off-by: Nick Hoath Cc: Rafael Barbalho --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 4 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index b9f6b8c..115911a 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5807,6 +5807,7 @@ enum skl_disp_power_wells { #define GEN7_WA_L3_CHICKEN_MODE 0x2000 #define GEN7_L3SQCREG4 0xb034 +#define GEN8_PIPELINE_FLUSH_COHERENT_LINES(1<<21) #define L3SQ_URB_READ_CAM_MATCH_DISABLE (1<<27) #define GEN8_L3SQCREG4 0xb118 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f29e575..26ef146 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -67,6 +67,10 @@ static void gen9_init_clock_gating(struct drm_device *dev) I915_WRITE(GEN7_FF_THREAD_MODE, I915_READ(GEN7_FF_THREAD_MODE) & ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); + + /* WaOCLCoherentLineFlush:skl,bxt */ + I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | + GEN8_PIPELINE_FLUSH_COHERENT_LINES); } static void skl_init_clock_gating(struct drm_device *dev) -- 2.1.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 4/4] drm/i915/bxt: Clean up bxt_init_clock_gating
Add stepping check for A0 workarounds, and remove the associated FIXME tags. Split out unrelated WAs for later condition checking. v2: Fixed format (PeterL) v3: Corrected stepping check for WaDisableSDEUnitClockGating - Ignoring comment, following hardware spec instead. (ChrisH) Added description for TILECTL setting (JonB) Cc: Peter Lawthers Cc: Chris Harris Cc: Jon Bloomfield Signed-off-by: Nick Hoath --- drivers/gpu/drm/i915/intel_pm.c | 16 +++- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 26ef146..86a4ced 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -115,18 +115,24 @@ static void bxt_init_clock_gating(struct drm_device *dev) gen9_init_clock_gating(dev); + /* WaDisableSDEUnitClockGating:bxt */ + I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | + GEN8_SDEUNIT_CLOCK_GATE_DISABLE); + /* * FIXME: -* GEN8_SDEUNIT_CLOCK_GATE_DISABLE applies on A0 only. * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only. */ -/* WaDisableSDEUnitClockGating:bxt */ I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | - GEN8_SDEUNIT_CLOCK_GATE_DISABLE | GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ); - /* FIXME: apply on A0 only */ - I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF); + if (INTEL_REVID(dev) == BXT_REVID_A0) { + /* +* Hardware specification requires this bit to be +* set to 1 for A0 +*/ + I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF); + } } static void i915_pineview_get_mem_freq(struct drm_device *dev) -- 2.1.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 0/4] drm/i915: Extra GEN 9 workaround patches
Nick Hoath (2): drm/i915/bxt: Enable WaOCLCoherentLineFlush drm/i915/bxt: Clean up bxt_init_clock_gating Rafael Barbalho (2): drm/i915/bxt: Enable WaVSRefCountFullforceMissDisable drm/i915/bxt: Enable WaDSRefCountFullforceMissDisable drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 28 +++- 2 files changed, 24 insertions(+), 5 deletions(-) -- 2.1.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 1/4] drm/i915/bxt: Enable WaVSRefCountFullforceMissDisable
On 29/06/2015 15:08, Mika Kuoppala wrote: Hi, Nick Hoath writes: From: Rafael Barbalho Signed-off-by: Rafael Barbalho Signed-off-by: Nick Hoath --- drivers/gpu/drm/i915/intel_pm.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 32ff034..d635d0a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -59,6 +59,11 @@ static void gen9_init_clock_gating(struct drm_device *dev) /* WaEnableLbsSlaRetryTimerDecrement:skl */ I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); + + /* WaVSRefCountFullforceMissDisable:skl,bxt */ + I915_WRITE(GEN7_FF_THREAD_MODE, + I915_READ(GEN7_FF_THREAD_MODE) & + ~(GEN7_FF_VS_REF_CNT_FFME)); } This bit 19 seems to be about Tesselation DOP gating disable with gen9+ onwards. And with that workaroundname, the applicability should be hsw,bdw. I am confused. The specs say these WAs are required for GEN9+, BDW & HSW. So I'm at a loss to see the confusion. -Mika static void skl_init_clock_gating(struct drm_device *dev) -- 2.1.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: Split alloc from init for lrc
On 18/08/2015 15:31, Chris Wilson wrote: On Tue, Aug 18, 2015 at 03:23:32PM +0100, Nick Hoath wrote: Extend init/init_hw split to context init. - Move context initialisation in to i915_gem_init_hw - Move one off initialisation for render ring to i915_gem_validate_context - Move default context initialisation to logical_ring_init Rename intel_lr_context_deferred_create to intel_lr_context_deferred_alloc, to reflect reduced functionality & alloc/init split. This patch is intended to split out the allocation of resources & initialisation to allow easier reuse of code for resume/gpu reset. v2: Removed function ptr wrapping of do_switch_context (Daniel Vetter) Left ->init_context int intel_lr_context_deferred_alloc (Daniel Vetter) Remove unnecessary init flag & ring type test. (Daniel Vetter) Improve commit message (Daniel Vetter) Issue: VIZ-4798 Signed-off-by: Nick Hoath Cc: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h| 1 - drivers/gpu/drm/i915/i915_gem.c| 12 +-- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +- drivers/gpu/drm/i915/intel_lrc.c | 147 ++--- drivers/gpu/drm/i915/intel_lrc.h | 4 +- 5 files changed, 80 insertions(+), 87 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f7fd519..844ccf0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -880,7 +880,6 @@ struct intel_context { } legacy_hw_ctx; /* Execlists */ - bool rcs_initialized; struct { struct drm_i915_gem_object *state; struct intel_ringbuffer *ringbuf; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 73293b4..3ccef2d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4603,14 +4603,8 @@ int i915_gem_init_rings(struct drm_device *dev) goto cleanup_vebox_ring; } - ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); - if (ret) - goto cleanup_bsd2_ring; - return 0; -cleanup_bsd2_ring: - intel_cleanup_ring_buffer(&dev_priv->ring[VCS2]); cleanup_vebox_ring: intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); cleanup_blt_ring: @@ -4629,6 +4623,7 @@ i915_gem_init_hw(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; int ret, i, j; + struct drm_i915_gem_request *req; if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) return -EIO; @@ -4680,9 +4675,12 @@ i915_gem_init_hw(struct drm_device *dev) goto out; } + ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); + if (ret) + goto out; This is the wrong location. Just kill set_seqno, the experiment has run its course and we now have a n igt to exercise seqno wraparound. It has to be here as the seqno has to be initialised before it is used to create requests for the initialisation. According to the commit history, the seqno has to be initialised to non-zero for proper functioning. Is this no longer true? Maybe it should just be set to 1 instead of ~0-0x1000 -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915: Split alloc from init for lrc
Extend init/init_hw split to context init. - Move context initialisation in to i915_gem_init_hw - Move one off initialisation for render ring to i915_gem_validate_context - Move default context initialisation to logical_ring_init Rename intel_lr_context_deferred_create to intel_lr_context_deferred_alloc, to reflect reduced functionality & alloc/init split. This patch is intended to split out the allocation of resources & initialisation to allow easier reuse of code for resume/gpu reset. v2: Removed function ptr wrapping of do_switch_context (Daniel Vetter) Left ->init_context int intel_lr_context_deferred_alloc (Daniel Vetter) Remove unnecessary init flag & ring type test. (Daniel Vetter) Improve commit message (Daniel Vetter) v3: On init/reinit, set the hw next sequence number to the sw next sequence number. This is set to 1 at driver load time. This prevents the seqno being reset on reinit (Chris Wilson) Issue: VIZ-4798 Signed-off-by: Nick Hoath Cc: Daniel Vetter Cc: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h| 1 - drivers/gpu/drm/i915/i915_gem.c| 18 ++-- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +- drivers/gpu/drm/i915/intel_lrc.c | 147 ++--- drivers/gpu/drm/i915/intel_lrc.h | 4 +- 5 files changed, 86 insertions(+), 87 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f7fd519..844ccf0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -880,7 +880,6 @@ struct intel_context { } legacy_hw_ctx; /* Execlists */ - bool rcs_initialized; struct { struct drm_i915_gem_object *state; struct intel_ringbuffer *ringbuf; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 73293b4..eb7c1f2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4603,14 +4603,8 @@ int i915_gem_init_rings(struct drm_device *dev) goto cleanup_vebox_ring; } - ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); - if (ret) - goto cleanup_bsd2_ring; - return 0; -cleanup_bsd2_ring: - intel_cleanup_ring_buffer(&dev_priv->ring[VCS2]); cleanup_vebox_ring: intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); cleanup_blt_ring: @@ -4629,6 +4623,7 @@ i915_gem_init_hw(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; int ret, i, j; + struct drm_i915_gem_request *req; if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) return -EIO; @@ -4680,9 +4675,12 @@ i915_gem_init_hw(struct drm_device *dev) goto out; } + ret = i915_gem_set_seqno(dev, dev_priv->next_seqno); + if (ret) + goto out; + /* Now it is safe to go back round and do everything else: */ for_each_ring(ring, dev_priv, i) { - struct drm_i915_gem_request *req; WARN_ON(!ring->default_context); @@ -4881,6 +4879,12 @@ i915_gem_load(struct drm_device *dev) dev_priv->num_fence_regs = I915_READ(vgtif_reg(avail_rs.fence_num)); + /* +* Set initial sequence number for requests. + */ + dev_priv->next_seqno = 1; + dev_priv->last_seqno = ~((uint32_t)0); + /* Initialize fence registers to zero */ INIT_LIST_HEAD(&dev_priv->mm.fence_list); i915_gem_restore_fences(dev); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 923a3c4..95f1a0d 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -994,6 +994,7 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, { struct intel_context *ctx = NULL; struct i915_ctx_hang_stats *hs; + int ret; if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE) return ERR_PTR(-EINVAL); @@ -1009,7 +1010,7 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, } if (i915.enable_execlists && !ctx->engine[ring->id].state) { - int ret = intel_lr_context_deferred_create(ctx, ring); + ret = intel_lr_context_deferred_alloc(ctx, ring); if (ret) { DRM_DEBUG("Could not create LRC %u: %d\n", ctx_id, ret); return ERR_PTR(ret); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 138964a..d0dc6b5 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1426,11 +1426,31 @@
Re: [Intel-gfx] [PATCH] drm/i915: Split alloc from init for lrc
On 19/08/2015 13:37, Chris Wilson wrote: On Wed, Aug 19, 2015 at 01:24:28PM +0100, Nick Hoath wrote: Extend init/init_hw split to context init. - Move context initialisation in to i915_gem_init_hw - Move one off initialisation for render ring to i915_gem_validate_context - Move default context initialisation to logical_ring_init Rename intel_lr_context_deferred_create to intel_lr_context_deferred_alloc, to reflect reduced functionality & alloc/init split. This patch is intended to split out the allocation of resources & initialisation to allow easier reuse of code for resume/gpu reset. v2: Removed function ptr wrapping of do_switch_context (Daniel Vetter) Left ->init_context int intel_lr_context_deferred_alloc (Daniel Vetter) Remove unnecessary init flag & ring type test. (Daniel Vetter) Improve commit message (Daniel Vetter) v3: On init/reinit, set the hw next sequence number to the sw next sequence number. This is set to 1 at driver load time. This prevents the seqno being reset on reinit (Chris Wilson) Issue: VIZ-4798 Signed-off-by: Nick Hoath Cc: Daniel Vetter Cc: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h| 1 - drivers/gpu/drm/i915/i915_gem.c| 18 ++-- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +- drivers/gpu/drm/i915/intel_lrc.c | 147 ++--- drivers/gpu/drm/i915/intel_lrc.h | 4 +- 5 files changed, 86 insertions(+), 87 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f7fd519..844ccf0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -880,7 +880,6 @@ struct intel_context { } legacy_hw_ctx; /* Execlists */ - bool rcs_initialized; struct { struct drm_i915_gem_object *state; struct intel_ringbuffer *ringbuf; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 73293b4..eb7c1f2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4603,14 +4603,8 @@ int i915_gem_init_rings(struct drm_device *dev) goto cleanup_vebox_ring; } - ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); - if (ret) - goto cleanup_bsd2_ring; - return 0; -cleanup_bsd2_ring: - intel_cleanup_ring_buffer(&dev_priv->ring[VCS2]); cleanup_vebox_ring: intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); cleanup_blt_ring: @@ -4629,6 +4623,7 @@ i915_gem_init_hw(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; int ret, i, j; + struct drm_i915_gem_request *req; if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) return -EIO; @@ -4680,9 +4675,12 @@ i915_gem_init_hw(struct drm_device *dev) goto out; } + ret = i915_gem_set_seqno(dev, dev_priv->next_seqno); + if (ret) + goto out; The only reason to do this would be to ensure that the contents of the registers are valid (assuming we take over from ourselves). The right value to use then is last_seqno. i915_gem_set_seqno uses the following code: ret = i915_gem_init_seqno(dev, seqno - 1); .. dev_priv->next_seqno = seqno; dev_priv->last_seqno = seqno - 1; So using last_seqno would rewind the seqno by one... diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 923a3c4..95f1a0d 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -994,6 +994,7 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, { struct intel_context *ctx = NULL; struct i915_ctx_hang_stats *hs; + int ret; if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE) return ERR_PTR(-EINVAL); @@ -1009,7 +1010,7 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, } if (i915.enable_execlists && !ctx->engine[ring->id].state) { - int ret = intel_lr_context_deferred_create(ctx, ring); + ret = intel_lr_context_deferred_alloc(ctx, ring); if (ret) { DRM_DEBUG("Could not create LRC %u: %d\n", ctx_id, ret); return ERR_PTR(ret); Still modifying this for no reason, and you still haven't realised this call is redundant (hint there is already a hook in alloc_request). From last year: http://cgit.freedesktop.org/~ickle/linux-2.6/commit/?id=37fbd370152211688bc5bce3d28d13233cfe7d8b More recent (i.e a couple of months ago): http://cgit.freedesktop.org/~ickle/linux-2.6/commit/?h=nightly&id=ba4950a8f489d54ec
[Intel-gfx] [PATCH] drm/i915: Split alloc from init for lrc
Extend init/init_hw split to context init. - Move context initialisation in to i915_gem_init_hw - Move one off initialisation for render ring to i915_gem_validate_context - Move default context initialisation to logical_ring_init Rename intel_lr_context_deferred_create to intel_lr_context_deferred_alloc, to reflect reduced functionality & alloc/init split. This patch is intended to split out the allocation of resources & initialisation to allow easier reuse of code for resume/gpu reset. v2: Removed function ptr wrapping of do_switch_context (Daniel Vetter) Left ->init_context int intel_lr_context_deferred_alloc (Daniel Vetter) Remove unnecessary init flag & ring type test. (Daniel Vetter) Improve commit message (Daniel Vetter) v3: On init/reinit, set the hw next sequence number to the sw next sequence number. This is set to 1 at driver load time. This prevents the seqno being reset on reinit (Chris Wilson) v4: Set seqno back to ~0 - 0x1000 at start-of-day, and increment by 0x100 on reset. This makes it obvious which bbs are which after a reset. (David Gordon & John Harrison) Rebase. Issue: VIZ-4798 Signed-off-by: Nick Hoath Cc: Daniel Vetter Cc: Chris Wilson Cc: John Harrison Cc: David Gordon --- drivers/gpu/drm/i915/i915_drv.h| 1 - drivers/gpu/drm/i915/i915_gem.c| 24 +++-- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +- drivers/gpu/drm/i915/intel_lrc.c | 155 ++--- drivers/gpu/drm/i915/intel_lrc.h | 4 +- 5 files changed, 93 insertions(+), 94 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1287007..ded7158 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -888,7 +888,6 @@ struct intel_context { } legacy_hw_ctx; /* Execlists */ - bool rcs_initialized; struct { struct drm_i915_gem_object *state; struct intel_ringbuffer *ringbuf; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 41263cd..c8125a5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4613,14 +4613,8 @@ int i915_gem_init_rings(struct drm_device *dev) goto cleanup_vebox_ring; } - ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); - if (ret) - goto cleanup_bsd2_ring; - return 0; -cleanup_bsd2_ring: - intel_cleanup_ring_buffer(&dev_priv->ring[VCS2]); cleanup_vebox_ring: intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); cleanup_blt_ring: @@ -4639,6 +4633,7 @@ i915_gem_init_hw(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; int ret, i, j; + struct drm_i915_gem_request *req; if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) return -EIO; @@ -4706,9 +4701,16 @@ i915_gem_init_hw(struct drm_device *dev) goto out; } + /* +* Increment the next seqno by 0x100 so we have a visible break +* on re-initialisation +*/ + ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100); + if (ret) + goto out; + /* Now it is safe to go back round and do everything else: */ for_each_ring(ring, dev_priv, i) { - struct drm_i915_gem_request *req; WARN_ON(!ring->default_context); @@ -4907,6 +4909,14 @@ i915_gem_load(struct drm_device *dev) dev_priv->num_fence_regs = I915_READ(vgtif_reg(avail_rs.fence_num)); + /* +* Set initial sequence number for requests. +* Using this number allows the wraparound to happen early, +* catching any obvious problems. +*/ + dev_priv->next_seqno = ((u32)~0 - 0x1100); + dev_priv->last_seqno = ((u32)~0 - 0x1101); + /* Initialize fence registers to zero */ INIT_LIST_HEAD(&dev_priv->mm.fence_list); i915_gem_restore_fences(dev); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a953d49..64674dc 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -994,6 +994,7 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, { struct intel_context *ctx = NULL; struct i915_ctx_hang_stats *hs; + int ret; if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE) return ERR_PTR(-EINVAL); @@ -1009,7 +1010,7 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, } if (i915.enable_execlists && !ctx->engine[ring->id].st