Re: [Intel-gfx] [PATCH igt] gem_concurrent_blit: Don't call igt_require() outside of a subtest/fixture
On Fri, Jan 08, 2016 at 09:10:38AM +, Chris Wilson wrote: > gem_concurrent_blit tries to ensure that it doesn't try and run a test > that would grind the system to a halt, i.e. unexpectedly cause swap > thrashing. It currently calls intel_require_memory(), but outside of > the subtest (as the tests use fork, it cannot do requirement testing > within the test children) - but intel_require_memory() calls > igt_require() and triggers and abort. Wrapping that initial require > within an igt_fixture() stops the abort(), but also prevents any further > testing. > > This patch restructures the requirement checking to ordinary conditions, > which though allowing the test to run, also prevents listing of subtests > on machines which cannot handle them. > --- > lib/igt_aux.h | 2 ++ > lib/intel_os.c | 53 +++- > tests/gem_concurrent_all.c | 67 > +- > 3 files changed, 85 insertions(+), 37 deletions(-) > > diff --git a/lib/igt_aux.h b/lib/igt_aux.h > index 6e11ee6..5a88c2a 100644 > --- a/lib/igt_aux.h > +++ b/lib/igt_aux.h > @@ -88,6 +88,8 @@ uint64_t intel_get_total_swap_mb(void); > > #define CHECK_RAM 0x1 > #define CHECK_SWAP 0x2 > +int __intel_check_memory(uint32_t count, uint64_t size, unsigned mode, > + uint64_t *out_required, uint64_t *out_total); > void intel_require_memory(uint32_t count, uint64_t size, unsigned mode); > int intel_num_objects_for_memory(uint32_t size, unsigned mode); > > diff --git a/lib/intel_os.c b/lib/intel_os.c > index dba9e17..90f9bb3 100644 > --- a/lib/intel_os.c > +++ b/lib/intel_os.c > @@ -192,6 +192,38 @@ intel_get_total_swap_mb(void) > return retval / (1024*1024); > } > Please add the usual gtkdoc boilerplate here with a mention of intel_check_memory. Ack with that. -Daniel > +int __intel_check_memory(uint32_t count, uint64_t size, unsigned mode, > + uint64_t *out_required, uint64_t *out_total) > +{ > +/* rough estimate of how many bytes the kernel requires to track each object > */ > +#define KERNEL_BO_OVERHEAD 512 > + uint64_t required, total; > + > + required = count; > + required *= size + KERNEL_BO_OVERHEAD; > + required = ALIGN(required, 4096); > + > + igt_debug("Checking %'u surfaces of size %'llu bytes (total %'llu) > against %s%s\n", > + count, (long long)size, (long long)required, > + mode & (CHECK_RAM | CHECK_SWAP) ? "RAM" : "", > + mode & CHECK_SWAP ? " + swap": ""); > + > + total = 0; > + if (mode & (CHECK_RAM | CHECK_SWAP)) > + total += intel_get_avail_ram_mb(); > + if (mode & CHECK_SWAP) > + total += intel_get_total_swap_mb(); > + total *= 1024 * 1024; > + > + if (out_required) > + *out_required = required; > + > + if (out_total) > + *out_total = total; > + > + return required < total; > +} > + > /** > * intel_require_memory: > * @count: number of surfaces that will be created > @@ -217,27 +249,10 @@ intel_get_total_swap_mb(void) > */ > void intel_require_memory(uint32_t count, uint64_t size, unsigned mode) > { > -/* rough estimate of how many bytes the kernel requires to track each object > */ > -#define KERNEL_BO_OVERHEAD 512 > uint64_t required, total; > > - required = count; > - required *= size + KERNEL_BO_OVERHEAD; > - required = ALIGN(required, 4096); > - > - igt_debug("Checking %'u surfaces of size %'llu bytes (total %'llu) > against %s%s\n", > - count, (long long)size, (long long)required, > - mode & (CHECK_RAM | CHECK_SWAP) ? "RAM" : "", > - mode & CHECK_SWAP ? " + swap": ""); > - > - total = 0; > - if (mode & (CHECK_RAM | CHECK_SWAP)) > - total += intel_get_avail_ram_mb(); > - if (mode & CHECK_SWAP) > - total += intel_get_total_swap_mb(); > - total *= 1024 * 1024; > - > - igt_skip_on_f(total <= required, > + igt_skip_on_f(!__intel_check_memory(count, size, mode, > + &required, &total), > "Estimated that we need %'llu bytes for the test, but > only have %'llu bytes available (%s%s)\n", > (long long)required, (long long)total, > mode & (CHECK_RAM | CHECK_SWAP) ? "RAM" : "", > diff --git a/tests/gem_concurrent_all.c b/tests/gem_concurrent_all.c > index 0e873c4..9a2fb6d 100644 > --- a/tests/gem_concurrent_all.c > +++ b/tests/gem_concurrent_all.c > @@ -155,9 +155,9 @@ static bool can_create_stolen(void) > static drm_intel_bo * > (*create_func)(drm_intel_bufmgr *bufmgr, uint64_t size); > > -static void create_cpu_require(void) > +static bool create_cpu_require(void) > { > - igt_require(create_func != create_stolen_bo); > + return create_func != create_stolen_bo; > } > > static drm_intel_bo * > @@ -375,7 +375,7 @@ gpu_
Re: [Intel-gfx] [PATCH 01/13] drm/i915/bdw+: Replace list_del+list_add_tail with list_move_tail
On Fri, Jan 08, 2016 at 11:29:40AM +, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin > > Same effect for slightly less source code and resulting binary. > > Signed-off-by: Tvrtko Ursulin Reviewed-by: Daniel Vetter > --- > drivers/gpu/drm/i915/intel_lrc.c | 15 ++- > 1 file changed, 6 insertions(+), 9 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c > b/drivers/gpu/drm/i915/intel_lrc.c > index 23839ff04e27..8b6071fcd743 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -431,9 +431,8 @@ static void execlists_context_unqueue(struct > intel_engine_cs *ring) > /* Same ctx: ignore first request, as second request >* will update tail past first request's workload */ > cursor->elsp_submitted = req0->elsp_submitted; > - list_del(&req0->execlist_link); > - list_add_tail(&req0->execlist_link, > - &ring->execlist_retired_req_list); > + list_move_tail(&req0->execlist_link, > +&ring->execlist_retired_req_list); > req0 = cursor; > } else { > req1 = cursor; > @@ -485,9 +484,8 @@ static bool execlists_check_remove_request(struct > intel_engine_cs *ring, >"Never submitted head request\n"); > > if (--head_req->elsp_submitted <= 0) { > - list_del(&head_req->execlist_link); > - list_add_tail(&head_req->execlist_link, > - &ring->execlist_retired_req_list); > + list_move_tail(&head_req->execlist_link, > + > &ring->execlist_retired_req_list); > return true; Aside: Some of this code is over-indented ... -Daniel > } > } > @@ -608,9 +606,8 @@ static int execlists_context_queue(struct > drm_i915_gem_request *request) > if (request->ctx == tail_req->ctx) { > WARN(tail_req->elsp_submitted != 0, > "More than 2 already-submitted reqs queued\n"); > - list_del(&tail_req->execlist_link); > - list_add_tail(&tail_req->execlist_link, > - &ring->execlist_retired_req_list); > + list_move_tail(&tail_req->execlist_link, > +&ring->execlist_retired_req_list); > } > } > > -- > 1.9.1 > > ___ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 02/13] drm/i915: Don't need a timer to wake us up
On Fri, Jan 08, 2016 at 11:29:41AM +, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin > > Looks like the sleeping loop in __i915_wait_request can be > simplified by using io_schedule_timeout instead of setting > up and destroying a timer. > > Signed-off-by: Tvrtko Ursulin > Cc: Chris Wilson io_schedule_timeout was only added in commit 9cff8adeaa34b5d2802f03f89803da57856b3b72 Author: NeilBrown Date: Fri Feb 13 15:49:17 2015 +1100 sched: Prevent recursion in io_schedule() (well the EXPORT_SYMBOL for it), that was iirc why this was open-coded. Please add this to your commit message. Reviewed-by: Daniel Vetter > --- > drivers/gpu/drm/i915/i915_gem.c | 28 > 1 file changed, 8 insertions(+), 20 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 6c60e04fc09c..de98dc41fb9f 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -1135,11 +1135,6 @@ i915_gem_check_wedge(struct i915_gpu_error *error, > return 0; > } > > -static void fake_irq(unsigned long data) > -{ > - wake_up_process((struct task_struct *)data); > -} > - > static bool missed_irq(struct drm_i915_private *dev_priv, > struct intel_engine_cs *ring) > { > @@ -1291,7 +1286,7 @@ int __i915_wait_request(struct drm_i915_gem_request > *req, > } > > for (;;) { > - struct timer_list timer; > + long sched_timeout; > > prepare_to_wait(&ring->irq_queue, &wait, state); > > @@ -1321,21 +1316,14 @@ int __i915_wait_request(struct drm_i915_gem_request > *req, > break; > } > > - timer.function = NULL; > - if (timeout || missed_irq(dev_priv, ring)) { > - unsigned long expire; > - > - setup_timer_on_stack(&timer, fake_irq, (unsigned > long)current); > - expire = missed_irq(dev_priv, ring) ? jiffies + 1 : > timeout_expire; > - mod_timer(&timer, expire); > - } > - > - io_schedule(); > + if (timeout) > + sched_timeout = timeout_expire - jiffies; > + else if (missed_irq(dev_priv, ring)) > + sched_timeout = 1; > + else > + sched_timeout = MAX_SCHEDULE_TIMEOUT; > > - if (timer.function) { > - del_singleshot_timer_sync(&timer); > - destroy_timer_on_stack(&timer); > - } > + io_schedule_timeout(sched_timeout); > } > if (!irq_test_in_progress) > ring->irq_put(ring); > -- > 1.9.1 > > ___ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 03/13] drm/i915: Avoid invariant conditionals in lrc interrupt handler
On Fri, Jan 08, 2016 at 11:29:42AM +, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin > > There is no need to check on what Gen we are running on every > interrupt and every command submission. We can instead set up > some of that when engines are initialized, store it in the > engine structure and use it directly at runtime. > > Signed-off-by: Tvrtko Ursulin > --- > drivers/gpu/drm/i915/intel_lrc.c| 36 > ++--- > drivers/gpu/drm/i915/intel_ringbuffer.h | 2 ++ > 2 files changed, 22 insertions(+), 16 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c > b/drivers/gpu/drm/i915/intel_lrc.c > index 8b6071fcd743..84977a6e6f3f 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -298,29 +298,15 @@ uint64_t intel_lr_context_descriptor(struct > intel_context *ctx, >struct intel_engine_cs *ring) > { > struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state; > - uint64_t desc; > + uint64_t desc = ring->ctx_desc_template; > uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj) + > LRC_PPHWSP_PN * PAGE_SIZE; > > WARN_ON(lrca & 0x0FFFULL); > > - desc = GEN8_CTX_VALID; > - desc |= GEN8_CTX_ADDRESSING_MODE(dev) << GEN8_CTX_ADDRESSING_MODE_SHIFT; > - if (IS_GEN8(ctx_obj->base.dev)) > - desc |= GEN8_CTX_L3LLC_COHERENT; > - desc |= GEN8_CTX_PRIVILEGE; > desc |= lrca; > desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT; > > - /* TODO: WaDisableLiteRestore when we start using semaphore > - * signalling between Command Streamers */ > - /* desc |= GEN8_CTX_FORCE_RESTORE; */ > - > - /* WaEnableForceRestoreInCtxtDescForVCS:skl */ > - /* WaEnableForceRestoreInCtxtDescForVCS:bxt */ > - if (disable_lite_restore_wa(ring)) > - desc |= GEN8_CTX_FORCE_RESTORE; > - > return desc; > } tbh I'd go full monty and just cache the entire context descriptor. -Daniel > > @@ -556,7 +542,7 @@ void intel_lrc_irq_handler(struct intel_engine_cs *ring) > } > } > > - if (disable_lite_restore_wa(ring)) { > + if (ring->disable_lite_restore_wa) { > /* Prevent a ctx to preempt itself */ > if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) && > (submit_contexts != 0)) > @@ -1980,6 +1966,24 @@ static int logical_ring_init(struct drm_device *dev, > struct intel_engine_cs *rin > goto error; > } > > + ring->disable_lite_restore_wa = disable_lite_restore_wa(ring); > + > + ring->ctx_desc_template = GEN8_CTX_VALID; > + ring->ctx_desc_template |= GEN8_CTX_ADDRESSING_MODE(dev) << > +GEN8_CTX_ADDRESSING_MODE_SHIFT; > + if (IS_GEN8(dev)) > + ring->ctx_desc_template |= GEN8_CTX_L3LLC_COHERENT; > + ring->ctx_desc_template |= GEN8_CTX_PRIVILEGE; > + > + /* TODO: WaDisableLiteRestore when we start using semaphore > + * signalling between Command Streamers */ > + /* ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; */ > + > + /* WaEnableForceRestoreInCtxtDescForVCS:skl */ > + /* WaEnableForceRestoreInCtxtDescForVCS:bxt */ > + if (ring->disable_lite_restore_wa) > + ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; > + > return 0; > > error: > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h > b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 49574ffe54bc..0b91a4b77359 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -268,6 +268,8 @@ struct intel_engine_cs { > struct list_head execlist_queue; > struct list_head execlist_retired_req_list; > u8 next_context_status_buffer; > + bool disable_lite_restore_wa; > + u32 ctx_desc_template; > u32 irq_keep_mask; /* bitmask for interrupts that should > not be masked */ > int (*emit_request)(struct drm_i915_gem_request *request); > int (*emit_flush)(struct drm_i915_gem_request *request, > -- > 1.9.1 > > ___ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 04/13] drm/i915: Fail engine initialization if LRCA is incorrectly aligned
On Fri, Jan 08, 2016 at 11:29:43AM +, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin > > LRCA can change only when it goes from unpinned to pinned so it > makes sense to check its alignment at that point rather than at > every batch buffer submission. > > Furthermore, if we check it at pin time we can actually > gracefuly fail the engine initialization rather than just > spamming the logs at runtime with WARNs. > > v2: Return ENODEV for bad alignment. (Chris Wilson) > > Signed-off-by: Tvrtko Ursulin > --- > drivers/gpu/drm/i915/intel_lrc.c | 9 +++-- > 1 file changed, 7 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c > b/drivers/gpu/drm/i915/intel_lrc.c > index 84977a6e6f3f..ff146a15d395 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -302,8 +302,6 @@ uint64_t intel_lr_context_descriptor(struct intel_context > *ctx, > uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj) + > LRC_PPHWSP_PN * PAGE_SIZE; > > - WARN_ON(lrca & 0x0FFFULL); > - > desc |= lrca; > desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT; > > @@ -1030,6 +1028,7 @@ static int intel_lr_context_do_pin(struct > intel_engine_cs *ring, > { > struct drm_device *dev = ring->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > + u64 lrca; > int ret = 0; > > WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex)); > @@ -1038,6 +1037,12 @@ static int intel_lr_context_do_pin(struct > intel_engine_cs *ring, > if (ret) > return ret; > > + lrca = i915_gem_obj_ggtt_offset(ctx_obj) + LRC_PPHWSP_PN * PAGE_SIZE; > + if (WARN_ON(lrca & 0x0FFFULL)) { Essentially this checks that it's page-aligned (which is a fundamental assumption of how we place objects we depend upon everywhere) and that it fits within the 4G hw limit of the global gtt (again we assume our code is correct that way). tbh I'd just drop entirely, it's a useless check. -Daniel > + ret = -ENODEV; > + goto unpin_ctx_obj; > + } > + > ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf); > if (ret) > goto unpin_ctx_obj; > -- > 1.9.1 > > ___ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 05/13] drm/i915: Cache LRCA in the context
On Fri, Jan 08, 2016 at 11:29:44AM +, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin > > We are not allowed to call i915_gem_obj_ggtt_offset from irq > context without the big kernel lock held. > > LRCA lifetime is well defined so cache it so it can be looked up > cheaply from the interrupt context and at command submission > time. > > v2: Added irq context reasoning to the commit message. (Daniel Vetter) > > Signed-off-by: Tvrtko Ursulin A i915_obj_for_each_vma macro with a WARN_ON(!mutex_is_locked(dev->struct_mutex)) would be awesome to validate this. Especially since this is by far not the only time I've seen this bug. Needs to be a follow-up though to avoid stalling this fix. > --- > drivers/gpu/drm/i915/i915_debugfs.c | 15 ++ > drivers/gpu/drm/i915/i915_drv.h | 1 + > drivers/gpu/drm/i915/intel_lrc.c| 40 > - > drivers/gpu/drm/i915/intel_lrc.h| 3 ++- > 4 files changed, 26 insertions(+), 33 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c > b/drivers/gpu/drm/i915/i915_debugfs.c > index 3b05bd189eab..714a45cf8a51 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -1976,12 +1976,13 @@ static int i915_context_status(struct seq_file *m, > void *unused) > } > > static void i915_dump_lrc_obj(struct seq_file *m, > - struct intel_engine_cs *ring, > - struct drm_i915_gem_object *ctx_obj) > + struct intel_context *ctx, > + struct intel_engine_cs *ring) > { > struct page *page; > uint32_t *reg_state; > int j; > + struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state; > unsigned long ggtt_offset = 0; > > if (ctx_obj == NULL) { > @@ -1991,7 +1992,7 @@ static void i915_dump_lrc_obj(struct seq_file *m, > } > > seq_printf(m, "CONTEXT: %s %u\n", ring->name, > -intel_execlists_ctx_id(ctx_obj)); > +intel_execlists_ctx_id(ctx, ring)); > > if (!i915_gem_obj_ggtt_bound(ctx_obj)) > seq_puts(m, "\tNot bound in GGTT\n"); > @@ -2040,8 +2041,7 @@ static int i915_dump_lrc(struct seq_file *m, void > *unused) > list_for_each_entry(ctx, &dev_priv->context_list, link) { > for_each_ring(ring, dev_priv, i) { > if (ring->default_context != ctx) > - i915_dump_lrc_obj(m, ring, > - ctx->engine[i].state); > + i915_dump_lrc_obj(m, ctx, ring); > } > } > > @@ -2115,11 +2115,8 @@ static int i915_execlists(struct seq_file *m, void > *data) > > seq_printf(m, "\t%d requests in queue\n", count); > if (head_req) { > - struct drm_i915_gem_object *ctx_obj; > - > - ctx_obj = head_req->ctx->engine[ring_id].state; > seq_printf(m, "\tHead request id: %u\n", > -intel_execlists_ctx_id(ctx_obj)); > +intel_execlists_ctx_id(head_req->ctx, ring)); > seq_printf(m, "\tHead request tail: %u\n", > head_req->tail); > } > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 8cf655c6fc03..b77a5d84eac2 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -881,6 +881,7 @@ struct intel_context { > struct drm_i915_gem_object *state; > struct intel_ringbuffer *ringbuf; > int pin_count; > + u32 lrca; lrc_offset imo. Consistent with our other usage in the driver, and actually readable. Please apply liberally everywhere else (I know that bsepc calls it lrca, but we don't need to follow bad naming styles blindly). With that Reviewed-by: Daniel Vetter > } engine[I915_NUM_RINGS]; > > struct list_head link; > diff --git a/drivers/gpu/drm/i915/intel_lrc.c > b/drivers/gpu/drm/i915/intel_lrc.c > index ff146a15d395..ffe004de22b0 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -265,7 +265,8 @@ int intel_sanitize_enable_execlists(struct drm_device > *dev, int enable_execlists > > /** > * intel_execlists_ctx_id() - get the Execlists Context ID > - * @ctx_obj: Logical Ring Context backing object. > + * @ctx: Context to get the ID for > + * @ring: Engine to get the ID for > * > * Do not confuse with ctx->id! Unfortunately we have a name overload > * here: the old context ID we pass to userspace as a handler so that > @@ -275,14 +276,12 @@ int intel_sanitize_enable_execlists(struct drm_device > *dev, int enable_execlists > * > * Return: 20-bits globally unique context ID. > */ > -u32 intel_execlists_ctx_id(struct drm_i915_gem_object *
Re: [Intel-gfx] [PATCH 06/13] drm/i915: Only grab timestamps when needed
On Fri, Jan 08, 2016 at 11:29:45AM +, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin > > No need to call ktime_get_raw_ns twice per unlimited wait and can > also elimate a local variable. > > Signed-off-by: Tvrtko Ursulin > --- > drivers/gpu/drm/i915/i915_gem.c | 12 +++- > 1 file changed, 7 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index de98dc41fb9f..c4f69579eb7a 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -1246,7 +1246,7 @@ int __i915_wait_request(struct drm_i915_gem_request > *req, > int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; > DEFINE_WAIT(wait); > unsigned long timeout_expire; > - s64 before, now; > + s64 before = 0; Is gcc really this dense? Should be easy for it to spot that both branches depend upon the same condition. Please remove that assignment. With that changed: Reviewed-by: Daniel Vetter > int ret; > > WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled"); > @@ -1266,14 +1266,17 @@ int __i915_wait_request(struct drm_i915_gem_request > *req, > return -ETIME; > > timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout); > + > + /* > + * Record current time in case interrupted by signal, or wedged. > + */ > + before = ktime_get_raw_ns(); > } > > if (INTEL_INFO(dev_priv)->gen >= 6) > gen6_rps_boost(dev_priv, rps, req->emitted_jiffies); > > - /* Record current time in case interrupted by signal, or wedged */ > trace_i915_gem_request_wait_begin(req); > - before = ktime_get_raw_ns(); > > /* Optimistic spin for the next jiffie before touching IRQs */ > ret = __i915_spin_request(req, state); > @@ -1331,11 +1334,10 @@ int __i915_wait_request(struct drm_i915_gem_request > *req, > finish_wait(&ring->irq_queue, &wait); > > out: > - now = ktime_get_raw_ns(); > trace_i915_gem_request_wait_end(req); > > if (timeout) { > - s64 tres = *timeout - (now - before); > + s64 tres = *timeout - (ktime_get_raw_ns() - before); > > *timeout = tres < 0 ? 0 : tres; > > -- > 1.9.1 > > ___ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 07/13] drm/i915: Introduce dedicated object VMA iterator
On Fri, Jan 08, 2016 at 01:29:14PM +, Tvrtko Ursulin wrote: > > On 08/01/16 11:29, Tvrtko Ursulin wrote: > >From: Tvrtko Ursulin > > > >Purpose is to catch places which iterate the object VMA list > >without holding the big lock. > > > >Implemented by open coding list_for_each_entry to make the > >macro compatible with existing call sites. > > > >Signed-off-by: Tvrtko Ursulin > >Cc: Daniel Vetter > >--- > > drivers/gpu/drm/i915/i915_debugfs.c | 8 > > drivers/gpu/drm/i915/i915_drv.h | 6 ++ > > drivers/gpu/drm/i915/i915_gem.c | 24 > > drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- > > drivers/gpu/drm/i915/i915_gem_shrinker.c | 2 +- > > drivers/gpu/drm/i915/i915_gpu_error.c| 4 ++-- > > 6 files changed, 26 insertions(+), 20 deletions(-) > > > >diff --git a/drivers/gpu/drm/i915/i915_debugfs.c > >b/drivers/gpu/drm/i915/i915_debugfs.c > >index 714a45cf8a51..d7c2a3201161 100644 > >--- a/drivers/gpu/drm/i915/i915_debugfs.c > >+++ b/drivers/gpu/drm/i915/i915_debugfs.c > >@@ -117,7 +117,7 @@ static u64 i915_gem_obj_total_ggtt_size(struct > >drm_i915_gem_object *obj) > > u64 size = 0; > > struct i915_vma *vma; > > > >-list_for_each_entry(vma, &obj->vma_list, vma_link) { > >+i915_gem_obj_for_each_vma(vma, obj) { > > if (i915_is_ggtt(vma->vm) && > > drm_mm_node_allocated(&vma->node)) > > size += vma->node.size; > >@@ -155,7 +155,7 @@ describe_obj(struct seq_file *m, struct > >drm_i915_gem_object *obj) > >obj->madv == I915_MADV_DONTNEED ? " purgeable" : ""); > > if (obj->base.name) > > seq_printf(m, " (name: %d)", obj->base.name); > >-list_for_each_entry(vma, &obj->vma_list, vma_link) { > >+i915_gem_obj_for_each_vma(vma, obj) { > > if (vma->pin_count > 0) > > pin_count++; > > } > >@@ -164,7 +164,7 @@ describe_obj(struct seq_file *m, struct > >drm_i915_gem_object *obj) > > seq_printf(m, " (display)"); > > if (obj->fence_reg != I915_FENCE_REG_NONE) > > seq_printf(m, " (fence: %d)", obj->fence_reg); > >-list_for_each_entry(vma, &obj->vma_list, vma_link) { > >+i915_gem_obj_for_each_vma(vma, obj) { > > seq_printf(m, " (%sgtt offset: %08llx, size: %08llx", > >i915_is_ggtt(vma->vm) ? "g" : "pp", > >vma->node.start, vma->node.size); > >@@ -342,7 +342,7 @@ static int per_file_stats(int id, void *ptr, void *data) > > stats->shared += obj->base.size; > > > > if (USES_FULL_PPGTT(obj->base.dev)) { > >-list_for_each_entry(vma, &obj->vma_list, vma_link) { > >+i915_gem_obj_for_each_vma(vma, obj) { > > struct i915_hw_ppgtt *ppgtt; > > > > if (!drm_mm_node_allocated(&vma->node)) > >diff --git a/drivers/gpu/drm/i915/i915_drv.h > >b/drivers/gpu/drm/i915/i915_drv.h > >index b77a5d84eac2..0406a020dfcc 100644 > >--- a/drivers/gpu/drm/i915/i915_drv.h > >+++ b/drivers/gpu/drm/i915/i915_drv.h > >@@ -2852,6 +2852,12 @@ struct drm_i915_gem_object > >*i915_gem_object_create_from_data( > > void i915_gem_free_object(struct drm_gem_object *obj); > > void i915_gem_vma_destroy(struct i915_vma *vma); > > > >+#define i915_gem_obj_for_each_vma(vma, obj) \ > >+for (WARN_ON_ONCE(!mutex_is_locked(&(obj)->base.dev->struct_mutex)), \ > >+ vma = list_first_entry(&(obj)->vma_list, typeof(*vma), vma_link);\ > >+ &vma->vma_link != (&(obj)->vma_list); \ > >+ vma = list_next_entry(vma, vma_link)) > >+ > > > Unfortunately error capture is not happy with this approach. Can't even see > that error capture attempts to grab the mutex anywhere. > > So what? Drop the idea or add a "doing error capture" flag somewhere? Fix the bugs. Not surprise at all that we've screwed this up all over the place ;-) Afaics modeset code isn't much better either ... -Daniel -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 07/13] drm/i915: Introduce dedicated object VMA iterator
On Fri, Jan 08, 2016 at 11:44:04AM +, Chris Wilson wrote: > On Fri, Jan 08, 2016 at 11:29:46AM +, Tvrtko Ursulin wrote: > > From: Tvrtko Ursulin > > > > Purpose is to catch places which iterate the object VMA list > > without holding the big lock. > > > > Implemented by open coding list_for_each_entry to make the > > macro compatible with existing call sites. > > > > Signed-off-by: Tvrtko Ursulin > > Cc: Daniel Vetter > > +#define i915_gem_obj_for_each_vma(vma, obj) \ > > + for (WARN_ON_ONCE(!mutex_is_locked(&(obj)->base.dev->struct_mutex)), \ > > Let's not go around adding WARN(!mutex_locked) to GEM code when > lockdep_assert_held doesn't add overhead outside of testing. Hm yeah I still prefere WARN_ON for modeset code (where it doesn't matter) because of increased test coverage. But for gem it indeed makes more sense to only do this for lockdep-enabled builds. CI runs with lockdep, so we're good. -Daniel -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 11/13] drm/i915: Cache ringbuffer GTT address
On Fri, Jan 08, 2016 at 11:29:50AM +, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin > > Purpose is to avoid calling i915_gem_obj_ggtt_offset from the > interrupt context without the big lock held. > > Signed-off-by: Tvrtko Ursulin > --- > drivers/gpu/drm/i915/intel_lrc.c| 3 +-- > drivers/gpu/drm/i915/intel_ringbuffer.c | 3 +++ > drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + > 3 files changed, 5 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c > b/drivers/gpu/drm/i915/intel_lrc.c > index 5b3795815d8e..70c511ef6b12 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -345,7 +345,6 @@ static int execlists_update_context(struct > drm_i915_gem_request *rq) > struct intel_engine_cs *ring = rq->ring; > struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt; > struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state; > - struct drm_i915_gem_object *rb_obj = rq->ringbuf->obj; > struct page *page; > uint32_t *reg_state; > > @@ -355,7 +354,7 @@ static int execlists_update_context(struct > drm_i915_gem_request *rq) > reg_state = kmap_atomic(page); > > reg_state[CTX_RING_TAIL+1] = rq->tail; > - reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj); > + reg_state[CTX_RING_BUFFER_START+1] = rq->ringbuf->gtt_start; > > if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { > /* True 32b PPGTT with dynamic page allocation: update PDP > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c > b/drivers/gpu/drm/i915/intel_ringbuffer.c > index 339701d7a9a5..9094ce254125 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -1988,6 +1988,7 @@ void intel_unpin_ringbuffer_obj(struct intel_ringbuffer > *ringbuf) > else > iounmap(ringbuf->virtual_start); > ringbuf->virtual_start = NULL; > + ringbuf->gtt_start = 0; > i915_gem_object_ggtt_unpin(ringbuf->obj); > } > > @@ -2054,6 +2055,8 @@ int intel_pin_and_map_ringbuffer_obj(struct drm_device > *dev, > } > } > > + ringbuf->gtt_start = i915_gem_obj_ggtt_offset(obj); > + > return 0; > } > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h > b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 0b91a4b77359..25d3716228ae 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -98,6 +98,7 @@ struct intel_ring_hangcheck { > struct intel_ringbuffer { > struct drm_i915_gem_object *obj; > void __iomem *virtual_start; > + u64 gtt_start; gtt_offset, because consistency. Or vma, as Chris suggested. -Daniel > > struct intel_engine_cs *ring; > struct list_head link; > -- > 1.9.1 > > ___ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 09/13] drm/i915: Remove two impossible asserts
On Fri, Jan 08, 2016 at 11:29:48AM +, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin > > Engine initialization would have failed if those two weren't > pinned and calling i915_gem_obj_is_pinned is illegal from irq > context without the big lock held. > > Signed-off-by: Tvrtko Ursulin Reviewed-by: Daniel Vetter > --- > drivers/gpu/drm/i915/intel_lrc.c | 2 -- > 1 file changed, 2 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c > b/drivers/gpu/drm/i915/intel_lrc.c > index ffe004de22b0..5b3795815d8e 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -350,8 +350,6 @@ static int execlists_update_context(struct > drm_i915_gem_request *rq) > uint32_t *reg_state; > > BUG_ON(!ctx_obj); > - WARN_ON(!i915_gem_obj_is_pinned(ctx_obj)); > - WARN_ON(!i915_gem_obj_is_pinned(rb_obj)); > > page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN); > reg_state = kmap_atomic(page); > -- > 1.9.1 > > ___ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: Support to enable TRTT on GEN9
On Mon, Jan 11, 2016 at 01:09:50PM +0530, Goel, Akash wrote: > > > On 1/10/2016 11:09 PM, Chris Wilson wrote: > >On Sat, Jan 09, 2016 at 05:00:21PM +0530, akash.g...@intel.com wrote: > >>From: Akash Goel > >> > >>Gen9 has an additional address translation hardware support in form of > >>Tiled Resource Translation Table (TR-TT) which provides an extra level > >>of abstraction over PPGTT. > >>This is useful for mapping Sparse/Tiled texture resources. > >>Sparse resources are created as virtual-only allocations. Regions of the > >>resource that the application intends to use is bound to the physical memory > >>on the fly and can be re-bound to different memory allocations over the > >>lifetime of the resource. > >> > >>TR-TT is tightly coupled with PPGTT, a new instance of TR-TT will be > >>required > >>for a new PPGTT instance, but TR-TT may not enabled for every context. > >>1/16th of the 48bit PPGTT space is earmarked for the translation by TR-TT, > >>which such chunk to use is conveyed to HW through a register. > >>Any GFX address, which lies in that reserved 44 bit range will be translated > >>through TR-TT first and then through PPGTT to get the actual physical > >>address, > >>so the output of translation from TR-TT will be a PPGTT offset. > >> > >>TRTT is constructed as a 3 level tile Table. Each tile is 64KB is size which > >>leaves behind 44-16=28 address bits. 28bits are partitioned as 9+9+10, and > >>each level is contained within a 4KB page hence L3 and L2 is composed of > >>512 64b entries and L1 is composed of 1024 32b entries. > >> > >>There is a provision to keep TR-TT Tables in virtual space, where the pages > >>of > >>TRTT tables will be mapped to PPGTT. > >>Currently this is the supported mode, in this mode UMD will have a full > >>control > >>on TR-TT management, with bare minimum support from KMD. > >>So the entries of L3 table will contain the PPGTT offset of L2 Table pages, > >>similarly entries of L2 table will contain the PPGTT offset of L1 Table > >>pages. > >>The entries of L1 table will contain the PPGTT offset of BOs actually > >>backing > >>the Sparse resources. > > > >>The assumption here is that UMD only will do the complete PPGTT address > >>space > >>management and use the Soft Pin API for all the buffer objects associated > >>with > >>a given Context. > > > >That is a poor assumption, and not one required for this to work. > > > This is not a strict requirement. > But I thought that conflicts will be minimized if UMD itself can do > the full address space management. > At least UMD has to ensure that PPGTT offset of L3 table remains > same throughout. Yes, userspace must control that object, and that would require softpin to preserve it across execbuffer calls. The kernel does not require that all addresses be handled in userspace afterwards, that's the language I wish to avoid. (Hence I don't like using "assumption" as that just invites userspace to break the kernel.) > >>So UMD will also have to allocate the L3/L2/L1 table pages > >>as a regular GEM BO only & assign them a PPGTT address through the Soft Pin > >>API. > >>UMD would have to emit the MI_STORE_DATA_IMM commands in the batch buffer to > >>program the relevant entries of L3/L2/L1 tables. > > > >This only applies to te TR-TT L1-L3 cache, right? > > > Yes applies only to the TR-TT L1-L3 tables. > The backing pages of L3/L2/L1 tables shall be allocated as a BO, > which should be assigned a PPGTT address. > The table entries could be written directly also by UMD by mmapping > the table BOs, but adding MI_STORE_DATA_IMM commands in the batch > buffer itself would help to achieve serialization (implicitly). Can you tighten up the phrasing here? My first read was that you indeed for all PTE writes to be in userspace, which is scary. "UMD will then allocate the L3/L32/L1 page tables for TR-TT as a regular bo, and will use softpin to assign it to the l3_table_address when used. UMD will also maintain the entries in the TR-TT page tables using regular batch commands (MI_STORE_DATA_IMM), or via mmapping of the page table bo." > >>autonomously and KMD will be oblivious of it. > >>The BOs must not be assigned an address from TR-TT segment, they will be > >>mapped > > > >s/The BOs/Any object/ > > > Ok will use 'Any object' > >>to PPGTT in a regular way by KMD > > > >s/using the Soft Pin offset provided by UMD// as this is irrelevant. > > > You mean to say that it is needless or inappropriate to state that > KMD will use the Soft PIN offset provided by UMD, it doesn't matter > that whether the Soft PIN offset is used or KMD itself assigns an > address. I just want to avoid implying that userspace must use softpin on every single bo for this to work. (Mainly because I don't really want userspace to have to do full address space management, as we will always have to do the double check inside the kernel. Unless there is a real need (e.g. svm), I'd rather improve the kernel allocator/verification, rather than
Re: [Intel-gfx] [PATCH igt] gem_concurrent_blit: Don't call igt_require() outside of a subtest/fixture
On Mon, Jan 11, 2016 at 09:00:13AM +0100, Daniel Vetter wrote: > On Fri, Jan 08, 2016 at 09:10:38AM +, Chris Wilson wrote: > > gem_concurrent_blit tries to ensure that it doesn't try and run a test > > that would grind the system to a halt, i.e. unexpectedly cause swap > > thrashing. It currently calls intel_require_memory(), but outside of > > the subtest (as the tests use fork, it cannot do requirement testing > > within the test children) - but intel_require_memory() calls > > igt_require() and triggers and abort. Wrapping that initial require > > within an igt_fixture() stops the abort(), but also prevents any further > > testing. > > > > This patch restructures the requirement checking to ordinary conditions, > > which though allowing the test to run, also prevents listing of subtests > > on machines which cannot handle them. > > > > --- > > lib/igt_aux.h | 2 ++ > > lib/intel_os.c | 53 +++- > > tests/gem_concurrent_all.c | 67 > > +- > > 3 files changed, 85 insertions(+), 37 deletions(-) > > > > diff --git a/lib/igt_aux.h b/lib/igt_aux.h > > index 6e11ee6..5a88c2a 100644 > > --- a/lib/igt_aux.h > > +++ b/lib/igt_aux.h > > @@ -88,6 +88,8 @@ uint64_t intel_get_total_swap_mb(void); > > > > #define CHECK_RAM 0x1 > > #define CHECK_SWAP 0x2 > > +int __intel_check_memory(uint32_t count, uint64_t size, unsigned mode, > > +uint64_t *out_required, uint64_t *out_total); > > void intel_require_memory(uint32_t count, uint64_t size, unsigned mode); > > int intel_num_objects_for_memory(uint32_t size, unsigned mode); > > > > diff --git a/lib/intel_os.c b/lib/intel_os.c > > index dba9e17..90f9bb3 100644 > > --- a/lib/intel_os.c > > +++ b/lib/intel_os.c > > @@ -192,6 +192,38 @@ intel_get_total_swap_mb(void) > > return retval / (1024*1024); > > } > > > > Please add the usual gtkdoc boilerplate here with a mention of > intel_check_memory. Ack with that. You were meant to object about how this breaks test runners and suggest how we can do this without that breakage. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] ✗ warning: Fi.CI.BAT
== Summary == Built on ff88655b3a5467bbc3be8c67d3e05ebf182557d3 drm-intel-nightly: 2016y-01m-11d-07h-30m-16s UTC integration manifest Test gem_storedw_loop: Subgroup basic-render: dmesg-warn -> PASS (bdw-ultra) dmesg-warn -> PASS (skl-i7k-2) UNSTABLE Test kms_flip: Subgroup basic-flip-vs-dpms: dmesg-warn -> PASS (ilk-hp8440p) Subgroup basic-flip-vs-modeset: pass -> DMESG-WARN (ilk-hp8440p) Test kms_pipe_crc_basic: Subgroup read-crc-pipe-b: dmesg-warn -> PASS (byt-nuc) bdw-ultratotal:138 pass:132 dwarn:0 dfail:0 fail:0 skip:6 bsw-nuc-2total:141 pass:114 dwarn:3 dfail:0 fail:0 skip:24 byt-nuc total:141 pass:119 dwarn:7 dfail:0 fail:0 skip:15 hsw-brixbox total:141 pass:134 dwarn:0 dfail:0 fail:0 skip:7 hsw-gt2 total:141 pass:137 dwarn:0 dfail:0 fail:0 skip:4 hsw-xps12total:138 pass:133 dwarn:1 dfail:0 fail:0 skip:4 ilk-hp8440p total:141 pass:100 dwarn:4 dfail:0 fail:0 skip:37 skl-i5k-2total:141 pass:132 dwarn:1 dfail:0 fail:0 skip:8 skl-i7k-2total:141 pass:132 dwarn:1 dfail:0 fail:0 skip:8 snb-dellxps total:141 pass:122 dwarn:5 dfail:0 fail:0 skip:14 snb-x220ttotal:141 pass:122 dwarn:5 dfail:0 fail:1 skip:13 Results at /archive/results/CI_IGT_test/Patchwork_1110/ ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] ✗ failure: Fi.CI.BAT
== Summary == HEAD is now at ff88655 drm-intel-nightly: 2016y-01m-11d-07h-30m-16s UTC integration manifest Applying: drm/i915: Use passed plane state for sprite planes, v4. Using index info to reconstruct a base tree... M drivers/gpu/drm/i915/intel_drv.h M drivers/gpu/drm/i915/intel_sprite.c Falling back to patching base and 3-way merge... Auto-merging drivers/gpu/drm/i915/intel_sprite.c CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/intel_sprite.c Patch failed at 0001 drm/i915: Use passed plane state for sprite planes, v4. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH igt] core/sighelper: Interrupt everyone in the process group
On Mon, Jan 11, 2016 at 08:57:33AM +0100, Daniel Vetter wrote: > On Fri, Jan 08, 2016 at 08:44:29AM +, Chris Wilson wrote: > > Some stress tests create both the signal helper and a lot of competing > > processes. In these tests, the parent is just waiting upon the children, > > and the intention is not to keep waking up the waiting parent, but to > > keep interrupting the children (as we hope to trigger races in our > > kernel code). kill(-pid) sends the signal to all members of the process > > group, not just the target pid. > > I don't really have any clue about unix pgroups, but the -pid disappeared > compared to the previous version. -getppid(). I felt it was clearer to pass along the "negative pid = process group" after setting up the process group. > > We also switch from using SIGUSR1 to SIGCONT to paper over a race > > condition when forking children that saw the default signal action being > > run (and thus killing the child). > > I thought I fixed that race by first installing the new signal handler, > then forking. Ok, rechecked and it's the SYS_getpid stuff, so another > race. Still I thought signal handlers would survive a fork? So did irc. They didn't appear to as the children would sporadically die with SIGUSR1. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 2/2] drm/i915/gen9: Calculate edram size
On Mon, Jan 11, 2016 at 08:50:43AM +0100, Daniel Vetter wrote: > On Fri, Jan 08, 2016 at 06:58:45PM +0200, Mika Kuoppala wrote: > > With gen9+ the edram capabilities are defined so > > that we can calculate the edram (ellc) size accordingly. > > > > Note that there are undefined combinations for some subset of > > edram capability bits. Return the closest size for undefined indexes. > > Even if we get it wrong with beginning of future gen enabling, the size > > information is currently only used for boot message and in debugfs entry. > > > > Signed-off-by: Mika Kuoppala > > --- > > drivers/gpu/drm/i915/i915_reg.h | 14 ++ > > drivers/gpu/drm/i915/intel_uncore.c | 9 + > > 2 files changed, 19 insertions(+), 4 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_reg.h > > b/drivers/gpu/drm/i915/i915_reg.h > > index f88b75ec5047..52283c44a0c1 100644 > > --- a/drivers/gpu/drm/i915/i915_reg.h > > +++ b/drivers/gpu/drm/i915/i915_reg.h > > @@ -6820,6 +6820,20 @@ enum skl_disp_power_wells { > > #defineIDIHASHMSK(x) (((x) & 0x3f) << 16) > > #define HSW_EDRAM_CAP _MMIO(0x120010) > > #defineEDRAM_ENABLED 0x1 > > +#defineEDRAM_NUM_BANKS(cap)(((cap) >> 1) & 0xf) > > +#define__EDRAM_WAYS(cap) (((cap) >> 5) & 0x7) > > +#defineEDRAM_NUM_WAYS(cap) ({ \ > > + int __ways[8] = { 4, 8, 12, 16, 16, 16, 16, 16 }; \ > > + __ways[__EDRAM_WAYS(cap)]; \ > > +}) > > +#define__EDRAM_SETS(cap) (((cap) >> 8) & 0x3) > > +#defineEDRAM_NUM_SETS(cap) ({ \ > > + int __sets[4] = { 1, 1, 2, 2 }; \ > > + __sets[__EDRAM_SETS(cap)]; \ > > +}) > > +#defineEDRAM_SIZE(cap) (EDRAM_NUM_BANKS(cap) * \ > > +EDRAM_NUM_WAYS(cap) * \ > > +EDRAM_NUM_SETS(cap)) > > Please just make a function out of this, no reason to make it into a > hard-to-read macro. E.g. gen9_edram_size or whatever is the first gen this > starts to be valid for. May I humbly suggest intel_uncore_edram_size_mb() ? ;) -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH igt] core/sighelper: Interrupt everyone in the process group
On Mon, Jan 11, 2016 at 08:54:59AM +, Chris Wilson wrote: > On Mon, Jan 11, 2016 at 08:57:33AM +0100, Daniel Vetter wrote: > > On Fri, Jan 08, 2016 at 08:44:29AM +, Chris Wilson wrote: > > > Some stress tests create both the signal helper and a lot of competing > > > processes. In these tests, the parent is just waiting upon the children, > > > and the intention is not to keep waking up the waiting parent, but to > > > keep interrupting the children (as we hope to trigger races in our > > > kernel code). kill(-pid) sends the signal to all members of the process > > > group, not just the target pid. > > > > I don't really have any clue about unix pgroups, but the -pid disappeared > > compared to the previous version. > > -getppid(). > > I felt it was clearer to pass along the "negative pid = process group" > after setting up the process group. Oh, I was blind ... Yeah looks better, but please add a bigger comment around that code explaining why we need a group and why we use SIG_CONT. With that acked-by: me. Cheers, Daniel > > > We also switch from using SIGUSR1 to SIGCONT to paper over a race > > > condition when forking children that saw the default signal action being > > > run (and thus killing the child). > > > > I thought I fixed that race by first installing the new signal handler, > > then forking. Ok, rechecked and it's the SYS_getpid stuff, so another > > race. Still I thought signal handlers would survive a fork? > > So did irc. They didn't appear to as the children would sporadically > die with SIGUSR1. Could be that libc is doing something funny, iirc they have piles of fork helpers to make fork more reliable (breaking locks and stuff like that), but then in turn break the abstraction. -Daniel -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH igt] gem_concurrent_blit: Don't call igt_require() outside of a subtest/fixture
On Mon, Jan 11, 2016 at 08:52:24AM +, Chris Wilson wrote: > On Mon, Jan 11, 2016 at 09:00:13AM +0100, Daniel Vetter wrote: > > On Fri, Jan 08, 2016 at 09:10:38AM +, Chris Wilson wrote: > > > gem_concurrent_blit tries to ensure that it doesn't try and run a test > > > that would grind the system to a halt, i.e. unexpectedly cause swap > > > thrashing. It currently calls intel_require_memory(), but outside of > > > the subtest (as the tests use fork, it cannot do requirement testing > > > within the test children) - but intel_require_memory() calls > > > igt_require() and triggers and abort. Wrapping that initial require > > > within an igt_fixture() stops the abort(), but also prevents any further > > > testing. > > > > > > This patch restructures the requirement checking to ordinary conditions, > > > which though allowing the test to run, also prevents listing of subtests > > > on machines which cannot handle them. > > > > > > > --- > > > lib/igt_aux.h | 2 ++ > > > lib/intel_os.c | 53 +++- > > > tests/gem_concurrent_all.c | 67 > > > +- > > > 3 files changed, 85 insertions(+), 37 deletions(-) > > > > > > diff --git a/lib/igt_aux.h b/lib/igt_aux.h > > > index 6e11ee6..5a88c2a 100644 > > > --- a/lib/igt_aux.h > > > +++ b/lib/igt_aux.h > > > @@ -88,6 +88,8 @@ uint64_t intel_get_total_swap_mb(void); > > > > > > #define CHECK_RAM 0x1 > > > #define CHECK_SWAP 0x2 > > > +int __intel_check_memory(uint32_t count, uint64_t size, unsigned mode, > > > + uint64_t *out_required, uint64_t *out_total); > > > void intel_require_memory(uint32_t count, uint64_t size, unsigned mode); > > > int intel_num_objects_for_memory(uint32_t size, unsigned mode); > > > > > > diff --git a/lib/intel_os.c b/lib/intel_os.c > > > index dba9e17..90f9bb3 100644 > > > --- a/lib/intel_os.c > > > +++ b/lib/intel_os.c > > > @@ -192,6 +192,38 @@ intel_get_total_swap_mb(void) > > > return retval / (1024*1024); > > > } > > > > > > > Please add the usual gtkdoc boilerplate here with a mention of > > intel_check_memory. Ack with that. > > You were meant to object about how this breaks test runners and suggest > how we can do this without that breakage. Oh I didn't realize that you're moving things out of igt_fixture/igt_subtest blocks again ... Ack retracted, I'll check what happened to my coffee meanwhile ;-) -Daniel -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] ✗ failure: Fi.CI.BAT
== Summary == Built on ff88655b3a5467bbc3be8c67d3e05ebf182557d3 drm-intel-nightly: 2016y-01m-11d-07h-30m-16s UTC integration manifest Test gem_storedw_loop: Subgroup basic-render: pass -> DMESG-WARN (skl-i5k-2) UNSTABLE dmesg-warn -> PASS (bdw-ultra) Test kms_flip: Subgroup basic-flip-vs-dpms: dmesg-warn -> PASS (ilk-hp8440p) Test kms_pipe_crc_basic: Subgroup read-crc-pipe-b: pass -> DMESG-WARN (ilk-hp8440p) dmesg-warn -> PASS (byt-nuc) bdw-ultratotal:138 pass:132 dwarn:0 dfail:0 fail:0 skip:6 bsw-nuc-2total:141 pass:114 dwarn:3 dfail:0 fail:0 skip:24 byt-nuc total:141 pass:119 dwarn:7 dfail:0 fail:0 skip:15 hsw-brixbox total:141 pass:134 dwarn:0 dfail:0 fail:0 skip:7 hsw-gt2 total:141 pass:137 dwarn:0 dfail:0 fail:0 skip:4 hsw-xps12total:138 pass:133 dwarn:1 dfail:0 fail:0 skip:4 ilk-hp8440p total:141 pass:100 dwarn:4 dfail:0 fail:0 skip:37 skl-i5k-2total:141 pass:131 dwarn:2 dfail:0 fail:0 skip:8 skl-i7k-2total:141 pass:131 dwarn:2 dfail:0 fail:0 skip:8 snb-dellxps total:141 pass:122 dwarn:5 dfail:0 fail:0 skip:14 snb-x220ttotal:141 pass:122 dwarn:5 dfail:0 fail:1 skip:13 HANGED ivb-t430s in igt@kms_pipe_crc_basic@nonblocking-crc-pipe-b Results at /archive/results/CI_IGT_test/Patchwork_1112/ ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 002/190] drm/i915: Move the mb() following release-mmap into release-mmap
As paranoia, we want to ensure that the CPU's PTEs have been revoked for the object before we return from i915_gem_release_mmap(). This allows us to rely on there being no outstanding memory accesses and guarantees serialisation of the code against concurrent access just by calling i915_gem_release_mmap(). v2: Reduce the mb() into a wmb() following the revoke. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: "Goel, Akash" Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6c60e04fc09c..3ab529669448 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1962,11 +1962,21 @@ out: void i915_gem_release_mmap(struct drm_i915_gem_object *obj) { + /* Serialisation between user GTT access and our code depends upon +* revoking the CPU's PTE whilst the mutex is held. The next user +* pagefault then has to wait until we release the mutex. +*/ + lockdep_assert_held(&obj->base.dev->struct_mutex); + if (!obj->fault_mappable) return; drm_vma_node_unmap(&obj->base.vma_node, obj->base.dev->anon_inode->i_mapping); + + /* Ensure that the CPU's PTE are revoked before we return */ + wmb(); + obj->fault_mappable = false; } @@ -3269,9 +3279,6 @@ static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) return; - /* Wait for any direct GTT access to complete */ - mb(); - old_read_domains = obj->base.read_domains; old_write_domain = obj->base.write_domain; -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 004/190] drm/i915: Fix some invalid requests cancellations
As we add the VMA to the request early, it may be cancelled during execbuf reservation. This will leave the context object pointing to a dangling request; i915_wait_request() simply skips the wait and so we may unbind the object whilst it is still active. However, if at any point we make a change to the hardware (and equally importantly our bookkeeping in the driver), we cannot cancel the request as what has already been written must be submitted. Submitting a partial request is far easier than trying to unwind the incomplete change. Unfortunately this patch undoes the excess breadcrumb usage that olr prevented, e.g. if we interrupt batchbuffer submission then we submit the requests along with the memory writes and interrupt (even though we do no real work). Disassociating requests from breadcrumbs (and semaphores) is a topic for a past/future series, but now much more important. Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: sta...@vger.kernel.org --- drivers/gpu/drm/i915/i915_drv.h| 1 - drivers/gpu/drm/i915/i915_gem.c| 7 ++- drivers/gpu/drm/i915/i915_gem_context.c| 21 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +--- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 1 - 6 files changed, 17 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 747d2d84a18c..ec20814adb0c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2813,7 +2813,6 @@ int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); void i915_gem_execbuffer_move_to_active(struct list_head *vmas, struct drm_i915_gem_request *req); -void i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params); int i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, struct drm_i915_gem_execbuffer2 *args, struct list_head *vmas); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3ab529669448..fd24877eb0a0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3384,12 +3384,9 @@ int i915_gpu_idle(struct drm_device *dev) return ret; ret = i915_switch_context(req); - if (ret) { - i915_gem_request_cancel(req); - return ret; - } - i915_add_request_no_flush(req); + if (ret) + return ret; } ret = intel_ring_idle(ring); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index c25083c78ba7..e5e9a8918f19 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -661,7 +661,6 @@ static int do_switch(struct drm_i915_gem_request *req) struct drm_i915_private *dev_priv = ring->dev->dev_private; struct intel_context *from = ring->last_context; u32 hw_flags = 0; - bool uninitialized = false; int ret, i; if (from != NULL && ring == &dev_priv->ring[RCS]) { @@ -768,6 +767,15 @@ static int do_switch(struct drm_i915_gem_request *req) to->remap_slice &= ~(1init_context) { + ret = ring->init_context(req); + if (ret) + goto unpin_out; + } + to->legacy_hw_ctx.initialized = true; + } + /* The backing object for the context is done after switching to the * *next* context. Therefore we cannot retire the previous context until * the next context has already started running. In fact, the below code @@ -791,21 +799,10 @@ static int do_switch(struct drm_i915_gem_request *req) i915_gem_context_unreference(from); } - uninitialized = !to->legacy_hw_ctx.initialized; - to->legacy_hw_ctx.initialized = true; - done: i915_gem_context_reference(to); ring->last_context = to; - if (uninitialized) { - if (ring->init_context) { - ret = ring->init_context(req); - if (ret) - DRM_ERROR("ring init context: %d\n", ret); - } - } - return 0; unpin_out: diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index dccb517361b3..b8186bd061c1 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1136,7 +1136,7 @@ i915_gem_ex
[Intel-gfx] [PATCH 001/190] drm: Release driver references to handle before making it available again
When userspace closes a handle, we remove it from the file->object_idr and then tell the driver to drop its references to that file/handle. However, as the file/handle is already available again for reuse, it may be reallocated back to userspace and active on a new object before the driver has had a chance to drop the old file/handle references. Whilst calling back into the driver, we have to drop the file->table_lock spinlock and so to prevent reusing the closed handle we mark that handle as stale in the idr, perform the callback and then remove the handle. We set the stale handle to point to the NULL object, then any idr_find() whilst the driver is removing the handle will return NULL, just as if the handle is already removed from idr. v2: Use NULL rather than an ERR_PTR to avoid having to adjust callers. idr_alloc() tracks existing handles using an internal bitmap, so we are free to use the NULL object as our stale identifier. Signed-off-by: Chris Wilson Cc: dri-de...@lists.freedesktop.org Cc: David Airlie Cc: Daniel Vetter Cc: Rob Clark Cc: Ville Syrjälä Cc: Thierry Reding --- drivers/gpu/drm/drm_gem.c | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 2e8c77e71e1f..d1909d1a1eb4 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -294,18 +294,21 @@ drm_gem_handle_delete(struct drm_file *filp, u32 handle) spin_lock(&filp->table_lock); /* Check if we currently have a reference on the object */ - obj = idr_find(&filp->object_idr, handle); - if (obj == NULL) { + obj = idr_replace(&filp->object_idr, NULL, handle); + if (IS_ERR(obj)) { spin_unlock(&filp->table_lock); return -EINVAL; } dev = obj->dev; + spin_unlock(&filp->table_lock); /* Release reference and decrement refcount. */ + drm_gem_object_release_handle(handle, obj, filp); + + spin_lock(&filp->table_lock); idr_remove(&filp->object_idr, handle); spin_unlock(&filp->table_lock); - drm_gem_object_release_handle(handle, obj, filp); return 0; } EXPORT_SYMBOL(drm_gem_handle_delete); -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 006/190] drm/i915: Add GEM debugging Kconfig option
Currently there is a #define to enable extra BUG_ON for debugging requests and associated activities. I want to expand its use to cover all of GEM internals (so that we can saturate the code with asserts). We can add a Kconfig option to make it easier to enable - with the usual caveats of not enabling unless explicitly requested. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin --- drivers/gpu/drm/i915/Kconfig.debug | 8 drivers/gpu/drm/i915/i915_drv.h| 6 ++ drivers/gpu/drm/i915/i915_gem.c| 12 +--- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 1f10ee228eda..7fa6b97635e5 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -10,3 +10,11 @@ config DRM_I915_WERROR ---help--- Add -Werror to the build flags for (and only for) i915.ko. Do not enable this unless you are writing code for the i915.ko module. + +config DRM_I915_DEBUG_GEM + bool "Insert extra checks into the GEM internals" + default n + depends on DRM_I915_WERROR + ---help--- + Enable extra sanity checks (including BUGs) that may slow the + system down and if hit hang the machine. diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ec20814adb0c..1a6168affadd 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2271,6 +2271,12 @@ struct drm_i915_gem_request { }; +#ifdef CONFIG_DRM_I915_DEBUG_GEM +#define GEM_BUG_ON(expr) BUG_ON(expr) +#else +#define GEM_BUG_ON(expr) +#endif + int i915_gem_request_alloc(struct intel_engine_cs *ring, struct intel_context *ctx, struct drm_i915_gem_request **req_out); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fd24877eb0a0..99fd6aa4dd62 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -38,8 +38,6 @@ #include #include -#define RQ_BUG_ON(expr) - static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); static void @@ -1520,7 +1518,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, i915_gem_object_retire__read(obj, i); } - RQ_BUG_ON(obj->active); + GEM_BUG_ON(obj->active); } return 0; @@ -2430,8 +2428,8 @@ void i915_vma_move_to_active(struct i915_vma *vma, static void i915_gem_object_retire__write(struct drm_i915_gem_object *obj) { - RQ_BUG_ON(obj->last_write_req == NULL); - RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring))); + GEM_BUG_ON(obj->last_write_req == NULL); + GEM_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring))); i915_gem_request_assign(&obj->last_write_req, NULL); intel_fb_obj_flush(obj, true, ORIGIN_CS); @@ -2442,8 +2440,8 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) { struct i915_vma *vma; - RQ_BUG_ON(obj->last_read_req[ring] == NULL); - RQ_BUG_ON(!(obj->active & (1 << ring))); + GEM_BUG_ON(obj->last_read_req[ring] == NULL); + GEM_BUG_ON(!(obj->active & (1 << ring))); list_del_init(&obj->ring_list[ring]); i915_gem_request_assign(&obj->last_read_req[ring], NULL); -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 007/190] drm/i915: Hide the atomic_read(reset_counter) behind a helper
This is principally a little bit of syntatic sugar to hide the atomic_read()s throughout the code to retrieve the current reset_counter. It also provides the other utility functions to check the reset state on the already read reset_counter, so that (in later patches) we can read it once and do multiple tests rather than risk the value changing between tests. v2: Be strictly on converting existing i915_reset_in_progress() over to the more verbose i915_reset_in_progress_or_wedged(). Signed-off-by: Chris Wilson Cc: Daniel Vetter Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 4 ++-- drivers/gpu/drm/i915/i915_drv.h | 32 drivers/gpu/drm/i915/i915_gem.c | 16 drivers/gpu/drm/i915/i915_irq.c | 2 +- drivers/gpu/drm/i915/intel_display.c| 18 +++--- drivers/gpu/drm/i915/intel_lrc.c| 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++-- 7 files changed, 53 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e3377abc0d4d..932af05b8eec 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4696,7 +4696,7 @@ i915_wedged_get(void *data, u64 *val) struct drm_device *dev = data; struct drm_i915_private *dev_priv = dev->dev_private; - *val = atomic_read(&dev_priv->gpu_error.reset_counter); + *val = i915_reset_counter(&dev_priv->gpu_error); return 0; } @@ -4715,7 +4715,7 @@ i915_wedged_set(void *data, u64 val) * while it is writing to 'i915_wedged' */ - if (i915_reset_in_progress(&dev_priv->gpu_error)) + if (i915_reset_in_progress_or_wedged(&dev_priv->gpu_error)) return -EAGAIN; intel_runtime_pm_get(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1a6168affadd..b274237726de 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2983,20 +2983,44 @@ void i915_gem_retire_requests_ring(struct intel_engine_cs *ring); int __must_check i915_gem_check_wedge(struct i915_gpu_error *error, bool interruptible); +static inline u32 i915_reset_counter(struct i915_gpu_error *error) +{ + return atomic_read(&error->reset_counter); +} + +static inline bool __i915_reset_in_progress(u32 reset) +{ + return unlikely(reset & I915_RESET_IN_PROGRESS_FLAG); +} + +static inline bool __i915_reset_in_progress_or_wedged(u32 reset) +{ + return unlikely(reset & (I915_RESET_IN_PROGRESS_FLAG | I915_WEDGED)); +} + +static inline bool __i915_terminally_wedged(u32 reset) +{ + return unlikely(reset & I915_WEDGED); +} + static inline bool i915_reset_in_progress(struct i915_gpu_error *error) { - return unlikely(atomic_read(&error->reset_counter) - & (I915_RESET_IN_PROGRESS_FLAG | I915_WEDGED)); + return __i915_reset_in_progress(i915_reset_counter(error)); +} + +static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error *error) +{ + return __i915_reset_in_progress_or_wedged(i915_reset_counter(error)); } static inline bool i915_terminally_wedged(struct i915_gpu_error *error) { - return atomic_read(&error->reset_counter) & I915_WEDGED; + return __i915_terminally_wedged(i915_reset_counter(error)); } static inline u32 i915_reset_count(struct i915_gpu_error *error) { - return ((atomic_read(&error->reset_counter) & ~I915_WEDGED) + 1) / 2; + return ((i915_reset_counter(error) & ~I915_WEDGED) + 1) / 2; } static inline bool i915_stop_ring_allow_ban(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 99fd6aa4dd62..78bf980a69bf 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -83,7 +83,7 @@ i915_gem_wait_for_error(struct i915_gpu_error *error) { int ret; -#define EXIT_COND (!i915_reset_in_progress(error) || \ +#define EXIT_COND (!i915_reset_in_progress_or_wedged(error) || \ i915_terminally_wedged(error)) if (EXIT_COND) return 0; @@ -,7 +,7 @@ int i915_gem_check_wedge(struct i915_gpu_error *error, bool interruptible) { - if (i915_reset_in_progress(error)) { + if (i915_reset_in_progress_or_wedged(error)) { /* Non-interruptible callers can't handle -EAGAIN, hence return * -EIO unconditionally for these. */ if (!interruptible) @@ -1295,7 +1295,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, /* We need to check whether any gpu reset happened in between * the caller grabbing the seqno and now ... */ - if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) { + if
[Intel-gfx] [PATCH 011/190] drm/i915: Simplify reset_counter handling during atomic modesetting
Now that the reset_counter is stored on the request, we can rearrange the code to handle reading the counter versus waiting during the atomic modesetting for readibility (by deleting the hairiest of codes). Signed-off-by: Chris Wilson Cc: Daniel Vetter Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 18 +++--- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 4f36313f31ac..ee0ec72b16b4 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13504,9 +13504,9 @@ static int intel_atomic_prepare_commit(struct drm_device *dev, return ret; ret = drm_atomic_helper_prepare_planes(dev, state); - if (!ret && !async && !i915_reset_in_progress_or_wedged(&dev_priv->gpu_error)) { - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&dev->struct_mutex); + if (!ret && !async) { for_each_plane_in_state(state, plane, plane_state, i) { struct intel_plane_state *intel_plane_state = to_intel_plane_state(plane_state); @@ -13520,19 +13520,15 @@ static int intel_atomic_prepare_commit(struct drm_device *dev, /* Swallow -EIO errors to allow updates during hw lockup. */ if (ret == -EIO) ret = 0; - - if (ret) + if (ret) { + mutex_lock(&dev->struct_mutex); + drm_atomic_helper_cleanup_planes(dev, state); + mutex_unlock(&dev->struct_mutex); break; + } } - - if (!ret) - return 0; - - mutex_lock(&dev->struct_mutex); - drm_atomic_helper_cleanup_planes(dev, state); } - mutex_unlock(&dev->struct_mutex); return ret; } -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 005/190] drm/i915: Force clean compilation with -Werror
Our driver compiles clean (nowadays thanks to 0day) but for me, at least, it would be beneficial if the compiler threw an error rather than a warning when it found a piece of suspect code. (I use this to compile-check patch series and want to break on the first compiler error in order to fix the patch.) v2: Kick off a new "Debugging" submenu for i915.ko At this point, we applied it to the kernel and promptly kicked it out again as it broke buildbots (due to a compiler warning on 32bits): commit 908d759b210effb33d927a8cb6603a16448474e4 Author: Daniel Vetter Date: Tue May 26 07:46:21 2015 +0200 Revert "drm/i915: Force clean compilation with -Werror" v3: Avoid enabling -Werror for allyesconfig/allmodconfig builds, using COMPILE_TEST as a suitable proxy suggested by Andrew Morton. (Damien) Only make the option available for EXPERT to reinforce that the option should not be casually enabled. Signed-off-by: Chris Wilson Cc: Jani Nikula Cc: Damien Lespiau Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/Kconfig | 6 ++ drivers/gpu/drm/i915/Kconfig.debug | 12 drivers/gpu/drm/i915/Makefile | 2 ++ 3 files changed, 20 insertions(+) create mode 100644 drivers/gpu/drm/i915/Kconfig.debug diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index b979295aab82..33e8563c2f99 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -59,3 +59,9 @@ config DRM_I915_USERPTR selected to enabled full userptr support. If in doubt, say "Y". + +menu "drm/i915 Debugging" +depends on DRM_I915 +depends on EXPERT +source drivers/gpu/drm/i915/Kconfig.debug +endmenu diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug new file mode 100644 index ..1f10ee228eda --- /dev/null +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -0,0 +1,12 @@ +config DRM_I915_WERROR + bool "Force GCC to throw an error instead of a warning when compiling" + default n + # As this may inadvertently break the build, only allow the user + # to shoot oneself in the foot iff they aim really hard + depends on EXPERT + # We use the dependency on !COMPILE_TEST to not be enabled in + # allmodconfig or allyesconfig configurations + depends on !COMPILE_TEST + ---help--- + Add -Werror to the build flags for (and only for) i915.ko. + Do not enable this unless you are writing code for the i915.ko module. diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 0851de07bd13..1e9895b9a546 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -2,6 +2,8 @@ # Makefile for the drm device driver. This driver provides support for the # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. +subdir-ccflags-$(CONFIG_DRM_I915_WERROR) := -Werror + # Please keep these build lists sorted! # core driver code -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 017/190] drm/i915: Remove forcewake dance from seqno/irq barrier on legacy gen6+
In order to ensure seqno/irq coherency, we current read a ring register. We are not sure quite how it works, only that is does. Experiments show that e.g. doing a clflush(seqno) instead is not sufficient, but we can remove the forcewake dance from the mmio access. v2: Baytrail wants a clflush too. Signed-off-by: Chris Wilson Cc: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 15 +-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 99780b674311..a1d43b2c7077 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1490,10 +1490,21 @@ gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) { /* Workaround to force correct ordering between irq and seqno writes on * ivb (and maybe also on snb) by reading from a CS register (like -* ACTHD) before reading the status page. */ +* ACTHD) before reading the status page. +* +* Note that this effectively effectively stalls the read by the time +* it takes to do a memory transaction, which more or less ensures +* that the write from the GPU has sufficient time to invalidate +* the CPU cacheline. Alternatively we could delay the interrupt from +* the CS ring to give the write time to land, but that would incur +* a delay after every batch i.e. much more frequent than a delay +* when waiting for the interrupt (with the same net latency). +*/ if (!lazy_coherency) { struct drm_i915_private *dev_priv = ring->dev->dev_private; - POSTING_READ(RING_ACTHD(ring->mmio_base)); + POSTING_READ_FW(RING_ACTHD(ring->mmio_base)); + + intel_flush_status_page(ring, I915_GEM_HWS_INDEX); } return intel_read_status_page(ring, I915_GEM_HWS_INDEX); -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 010/190] drm/i915: Store the reset counter when constructing a request
As the request is only valid during the same global reset epoch, we can record the current reset_counter when constructing the request and reuse it when waiting upon that request in future. This removes a very hairy atomic check serialised by the struct_mutex at the time of waiting and allows us to transfer those waits to a central dispatcher for all waiters and all requests. PS: With per-engine resets, we obviously cannot assume a global reset epoch for the requests - a per-engine epoch makes the most sense. The challenge then is how to handle checking in the waiter for when to break the wait, as the fine-grained reset may also want to requeue the request (i.e. the assumption that just because the epoch changes the request is completed may be broken - or we just avoid breaking that assumption with the fine-grained resets). Signed-off-by: Chris Wilson Cc: Daniel Vetter Reviewed-by:: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 40 +++-- drivers/gpu/drm/i915/intel_display.c| 7 +- drivers/gpu/drm/i915/intel_lrc.c| 7 -- drivers/gpu/drm/i915/intel_ringbuffer.c | 6 - 5 files changed, 15 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 60531df3844c..f74bca326b79 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2191,6 +2191,7 @@ struct drm_i915_gem_request { /** On Which ring this request was generated */ struct drm_i915_private *i915; struct intel_engine_cs *ring; + unsigned reset_counter; /** GEM sequence number associated with the previous request, * when the HWS breadcrumb is equal to this the GPU is processing @@ -3050,7 +3051,6 @@ void __i915_add_request(struct drm_i915_gem_request *req, #define i915_add_request_no_flush(req) \ __i915_add_request(req, NULL, false) int __i915_wait_request(struct drm_i915_gem_request *req, - unsigned reset_counter, bool interruptible, s64 *timeout, struct intel_rps_client *rps); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2cdd20b3aeaf..56069bdada85 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1212,7 +1212,6 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state) /** * __i915_wait_request - wait until execution of request has finished * @req: duh! - * @reset_counter: reset sequence associated with the given request * @interruptible: do an interruptible wait (normally yes) * @timeout: in - how long to wait (NULL forever); out - how much time remaining * @@ -1227,7 +1226,6 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state) * errno with remaining time filled in timeout argument. */ int __i915_wait_request(struct drm_i915_gem_request *req, - unsigned reset_counter, bool interruptible, s64 *timeout, struct intel_rps_client *rps) @@ -1286,7 +1284,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, /* We need to check whether any gpu reset happened in between * the caller grabbing the seqno and now ... */ - if (reset_counter != i915_reset_counter(&dev_priv->gpu_error)) { + if (req->reset_counter != i915_reset_counter(&dev_priv->gpu_error)) { /* ... but upgrade the -EAGAIN to an -EIO if the gpu * is truely gone. */ ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); @@ -1459,13 +1457,7 @@ i915_wait_request(struct drm_i915_gem_request *req) BUG_ON(!mutex_is_locked(&dev->struct_mutex)); - ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); - if (ret) - return ret; - - ret = __i915_wait_request(req, - i915_reset_counter(&dev_priv->gpu_error), - interruptible, NULL, NULL); + ret = __i915_wait_request(req, interruptible, NULL, NULL); if (ret) return ret; @@ -1540,7 +1532,6 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_request *requests[I915_NUM_RINGS]; - unsigned reset_counter; int ret, i, n = 0; BUG_ON(!mutex_is_locked(&dev->struct_mutex)); @@ -1549,12 +1540,6 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, if (!obj->active) return 0; - ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); -
[Intel-gfx] [PATCH 020/190] drm/i915: Remove the lazy_coherency parameter from request-completed?
Now that we have split out the seqno-barrier from the engine->get_seqno() callback itself, we can move the users of the seqno-barrier to the required callsites simplifying the common code and making the required workaround handling much more explicit. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 4 ++-- drivers/gpu/drm/i915/i915_drv.h | 17 - drivers/gpu/drm/i915/i915_gem.c | 24 drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 4 ++-- 5 files changed, 29 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 1499e2337e5d..d09e48455dcb 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -601,7 +601,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) i915_gem_request_get_seqno(work->flip_queued_req), dev_priv->next_seqno, ring->get_seqno(ring), - i915_gem_request_completed(work->flip_queued_req, true)); + i915_gem_request_completed(work->flip_queued_req)); } else seq_printf(m, "Flip not associated with any ring\n"); seq_printf(m, "Flip queued on frame %d, (was ready on frame %d), now %d\n", @@ -1354,8 +1354,8 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) intel_runtime_pm_get(dev_priv); for_each_ring(ring, dev_priv, i) { - seqno[i] = ring->get_seqno(ring); acthd[i] = intel_ring_get_active_head(ring); + seqno[i] = ring->get_seqno(ring); } i915_get_extra_instdone(dev, instdone); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9762aa76bb0a..44d46018ee13 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2969,20 +2969,14 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2) return (int32_t)(seq1 - seq2) >= 0; } -static inline bool i915_gem_request_started(struct drm_i915_gem_request *req, - bool lazy_coherency) +static inline bool i915_gem_request_started(struct drm_i915_gem_request *req) { - if (!lazy_coherency && req->ring->irq_seqno_barrier) - req->ring->irq_seqno_barrier(req->ring); return i915_seqno_passed(req->ring->get_seqno(req->ring), req->previous_seqno); } -static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, - bool lazy_coherency) +static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req) { - if (!lazy_coherency && req->ring->irq_seqno_barrier) - req->ring->irq_seqno_barrier(req->ring); return i915_seqno_passed(req->ring->get_seqno(req->ring), req->seqno); } @@ -3636,6 +3630,8 @@ static inline void i915_trace_irq_get(struct intel_engine_cs *ring, static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) { + struct intel_engine_cs *engine = req->ring; + /* Ensure our read of the seqno is coherent so that we * do not "miss an interrupt" (i.e. if this is the last * request and the seqno write from the GPU is not visible @@ -3647,7 +3643,10 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) * but it is easier and safer to do it every time the waiter * is woken. */ - if (i915_gem_request_completed(req, false)) + if (engine->irq_seqno_barrier) + engine->irq_seqno_barrier(engine); + + if (i915_gem_request_completed(req)) return true; /* We need to check whether any gpu reset happened in between diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4b26529f1f44..d125820c6309 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1171,12 +1171,12 @@ static bool __i915_spin_request(struct drm_i915_gem_request *req, */ /* Only spin if we know the GPU is processing this request */ - if (!i915_gem_request_started(req, true)) + if (!i915_gem_request_started(req)) return false; timeout = local_clock_us(&cpu) + 5; do { - if (i915_gem_request_completed(req, true)) + if (i915_gem_request_completed(req)) return true; if (signal_pending_state(state, wait->task)) @@ -1228,7 +1228,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, if (list_empty(&req->list)
[Intel-gfx] [PATCH 026/190] drm/i915: Stop setting wraparound seqno on initialisation
We have testcases to ensure that seqno wraparound works fine, so we can forgo forcing everyone to encounter seqno wraparound during early uptime. seqno wraparound incurs a full GPU stall so not forcing it will eliminate one jitter from the early system. Using the testcases, we have very deterministic testing which given how difficult it would be to debug an issue (GPU hang) stemming from a wraparound using pure postmortem analysis I see no value in forcing a wrap during boot. Advancing the global next_seqno after a GPU reset is equally pointless. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 16 +--- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d125820c6309..a0744626a110 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4814,14 +4814,6 @@ i915_gem_init_hw(struct drm_device *dev) } } - /* -* Increment the next seqno by 0x100 so we have a visible break -* on re-initialisation -*/ - ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100); - if (ret) - goto out; - /* Now it is safe to go back round and do everything else: */ for_each_ring(ring, dev_priv, i) { struct drm_i915_gem_request *req; @@ -5001,13 +4993,7 @@ i915_gem_load(struct drm_device *dev) dev_priv->num_fence_regs = I915_READ(vgtif_reg(avail_rs.fence_num)); - /* -* Set initial sequence number for requests. -* Using this number allows the wraparound to happen early, -* catching any obvious problems. -*/ - dev_priv->next_seqno = ((u32)~0 - 0x1100); - dev_priv->last_seqno = ((u32)~0 - 0x1101); + dev_priv->next_seqno = 1; /* Initialize fence registers to zero */ INIT_LIST_HEAD(&dev_priv->mm.fence_list); -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 031/190] drm/i915: Harden detection of missed interrupts
Only declare a missed interrupt if we find that the GPU is idle with waiters and a hangcheck interval has passed in which no new user interrupts have been raised. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 6 ++ drivers/gpu/drm/i915/i915_irq.c | 10 -- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 ++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 5a706c700684..567f8db4c70a 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -735,6 +735,9 @@ static void i915_ring_seqno_info(struct seq_file *m, seq_printf(m, "Current sequence (%s): %x\n", ring->name, intel_ring_get_seqno(ring)); + seq_printf(m, "Current user interrupts (%s): %x\n", + ring->name, READ_ONCE(ring->user_interrupts)); + spin_lock(&ring->breadcrumbs.lock); for (rb = rb_first(&ring->breadcrumbs.waiters); rb != NULL; @@ -1372,6 +1375,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) seq_printf(m, "\tseqno = %x [current %x], waiters? %d\n", ring->hangcheck.seqno, seqno[i], intel_engine_has_waiter(ring)); + seq_printf(m, "\tuser interrupts = %x [current %x]\n", + ring->hangcheck.user_interrupts, + ring->user_interrupts); seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n", (long long)ring->hangcheck.acthd, (long long)acthd[i]); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index bf48fa63127a..b3942dec7de4 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -997,8 +997,10 @@ static void ironlake_rps_change_irq_handler(struct drm_device *dev) static void notify_ring(struct intel_engine_cs *ring) { ring->irq_posted = true; /* paired with mb() in wake_up_process() */ - if (intel_engine_wakeup(ring)) + if (intel_engine_wakeup(ring)) { trace_i915_gem_request_notify(ring); + ring->user_interrupts++; + } } static void vlv_c0_read(struct drm_i915_private *dev_priv, @@ -3061,12 +3063,14 @@ static void i915_hangcheck_elapsed(struct work_struct *work) for_each_ring(ring, dev_priv, i) { u64 acthd; u32 seqno; + unsigned user_interrupts; bool busy = true; semaphore_clear_deadlocks(dev_priv); acthd = intel_ring_get_active_head(ring); seqno = intel_ring_get_seqno(ring); + user_interrupts = READ_ONCE(ring->user_interrupts); if (ring->hangcheck.seqno == seqno) { if (ring_idle(ring, seqno)) { @@ -3074,7 +3078,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (intel_engine_has_waiter(ring)) { /* Issue a wake-up to catch stuck h/w. */ - if (!test_and_set_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings)) { + if (ring->hangcheck.user_interrupts == user_interrupts && + !test_and_set_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings)) { if (!test_bit(ring->id, &dev_priv->gpu_error.test_irq_rings)) DRM_ERROR("Hangcheck timer elapsed... %s idle\n", ring->name); @@ -3142,6 +3147,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) ring->hangcheck.seqno = seqno; ring->hangcheck.acthd = acthd; + ring->hangcheck.user_interrupts = user_interrupts; busy_count += busy; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 3364bcebd456..73da75fa47c1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -90,6 +90,7 @@ struct intel_ring_hangcheck { u64 acthd; u64 max_acthd; u32 seqno; + unsigned user_interrupts; int score; enum intel_ring_hangcheck_action action; int deadlock; @@ -328,6 +329,7 @@ struct intel_engine_cs { * inspecting request list. */ u32 last_submitted_seqno; + unsigned user_interrupts; bool gpu_caches_dirty; -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 013/190] drm/i915: Suppress error message when GPU resets are disabled
If we do not have lowlevel support for reseting the GPU, or if the user has explicitly disabled reseting the device, the failure is expected. Since it is an expected failure, we should be using a lower priority message than *ERROR*, perhaps NOTICE. In the absence of DRM_NOTICE, just emit the expected failure as a DEBUG message. Signed-off-by: Chris Wilson Cc: Daniel Vetter Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 2f03379cdb4b..5160f1414de4 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -910,7 +910,10 @@ int i915_reset(struct drm_device *dev) pr_notice("drm/i915: Resetting chip after gpu hang\n"); if (ret) { - DRM_ERROR("Failed to reset chip: %i\n", ret); + if (ret != -ENODEV) + DRM_ERROR("Failed to reset chip: %i\n", ret); + else + DRM_DEBUG_DRIVER("GPU reset disabled\n"); goto error; } -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 041/190] drm/i915: Allow userspace to request no-error-capture upon GPU hangs
igt likes to inject GPU hangs into its command streams. However, as we expect these hangs, we don't actually want them recorded in the dmesg output or stored in the i915_error_state (usually). To accomodate this allow userspace to set a flag on the context that any hang emanating from that context will not be recorded. We still do the error capture (otherwise how do we find the guilty context and know its intent?) as part of the reason for random GPU hang injection is to exercise the race conditions between the error capture and normal execution. v2: Split out the request->ringbuf error capture changes. v3: Move the flag defines next to the intel_context->flags definition Signed-off-by: Chris Wilson Acked-by: Daniel Vetter Reviewed-by: Dave Gordon --- drivers/gpu/drm/i915/i915_drv.h | 7 +-- drivers/gpu/drm/i915/i915_gem_context.c | 13 + drivers/gpu/drm/i915/i915_gpu_error.c | 14 +- include/uapi/drm/i915_drm.h | 1 + 4 files changed, 28 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c3b795f1566b..57e450e25ad6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -486,6 +486,7 @@ struct drm_i915_error_state { struct timeval time; char error_msg[128]; + bool simulated; int iommu; u32 reset_count; u32 suspend_count; @@ -842,7 +843,6 @@ struct i915_ctx_hang_stats { /* This must match up with the value previously used for execbuf2.rsvd1. */ #define DEFAULT_CONTEXT_HANDLE 0 -#define CONTEXT_NO_ZEROMAP (1<<0) /** * struct intel_context - as the name implies, represents a context. * @ref: reference count. @@ -867,11 +867,14 @@ struct intel_context { int user_handle; uint8_t remap_slice; struct drm_i915_private *i915; - int flags; struct drm_i915_file_private *file_priv; struct i915_ctx_hang_stats hang_stats; struct i915_hw_ppgtt *ppgtt; + unsigned flags; +#define CONTEXT_NO_ZEROMAP (1<<0) +#define CONTEXT_NO_ERROR_CAPTURE (1<<1) + /* Legacy ring buffer submission */ struct { struct drm_i915_gem_object *rcs_state; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index e5e9a8918f19..0aea5ccf6d68 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -939,6 +939,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, else args->value = to_i915(dev)->gtt.base.total; break; + case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: + args->value = !!(ctx->flags & CONTEXT_NO_ERROR_CAPTURE); + break; default: ret = -EINVAL; break; @@ -984,6 +987,16 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, ctx->flags |= args->value ? CONTEXT_NO_ZEROMAP : 0; } break; + case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: + if (args->size) { + ret = -EINVAL; + } else { + if (args->value) + ctx->flags |= CONTEXT_NO_ERROR_CAPTURE; + else + ctx->flags &= ~CONTEXT_NO_ERROR_CAPTURE; + } + break; default: ret = -EINVAL; break; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 93da2c7581f6..4f17d6847569 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1040,6 +1040,8 @@ static void i915_gem_record_rings(struct drm_device *dev, rcu_read_unlock(); } + error->simulated |= request->ctx->flags & CONTEXT_NO_ERROR_CAPTURE; + rb = request->ringbuf; error->ring[i].cpu_ring_head = rb->head; error->ring[i].cpu_ring_tail = rb->tail; @@ -1333,12 +1335,14 @@ void i915_capture_error_state(struct drm_device *dev, bool wedged, i915_error_capture_msg(dev, error, wedged, error_msg); DRM_INFO("%s\n", error->error_msg); - spin_lock_irqsave(&dev_priv->gpu_error.lock, flags); - if (dev_priv->gpu_error.first_error == NULL) { - dev_priv->gpu_error.first_error = error; - error = NULL; + if (!error->simulated) { + spin_lock_irqsave(&dev_priv->gpu_error.lock, flags); + if (dev_priv->gpu_error.first_error == NULL) { + dev_priv->gpu_error.first_error = error; + error = NULL; + } + spin_unlock_irqrestore(&dev_priv->gpu
[Intel-gfx] [PATCH 032/190] drm/i915: Remove debug noise on detecting fault-injection of missed interrupts
Since the tests can and do explicitly check debugfs/i915_ring_missed_irqs for the handling of a "missed interrupt", adding it to the dmesg at INFO is just noise. When it happens for real, we still class it as an ERROR. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_irq.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index b3942dec7de4..502663f13cd8 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3083,9 +3083,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (!test_bit(ring->id, &dev_priv->gpu_error.test_irq_rings)) DRM_ERROR("Hangcheck timer elapsed... %s idle\n", ring->name); - else - DRM_INFO("Fake missed irq on %s\n", -ring->name); intel_engine_enable_fake_irq(ring); } -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 030/190] drm/i915: Move the get/put irq locking into the caller
With only a single callsite for intel_engine_cs->irq_get and ->irq_put, we can reduce the code size by moving the common preamble into the caller, and we can also eliminate the reference counting. For completeness, as we are no longer doing reference counting on irq, rename the get/put vfunctions to enable/disable respectively. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 8 +- drivers/gpu/drm/i915/intel_lrc.c | 53 ++ drivers/gpu/drm/i915/intel_ringbuffer.c | 302 ++- drivers/gpu/drm/i915/intel_ringbuffer.h | 5 +- 4 files changed, 125 insertions(+), 243 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index cf9cbcc2d5d7..0ea01bd6811c 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -51,12 +51,16 @@ static void irq_enable(struct intel_engine_cs *engine) */ engine->irq_posted = true; - WARN_ON(!engine->irq_get(engine)); + spin_lock_irq(&engine->i915->irq_lock); + engine->irq_enable(engine); + spin_unlock_irq(&engine->i915->irq_lock); } static void irq_disable(struct intel_engine_cs *engine) { - engine->irq_put(engine); + spin_lock_irq(&engine->i915->irq_lock); + engine->irq_disable(engine); + spin_unlock_irq(&engine->i915->irq_lock); engine->irq_posted = false; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 27d91f1ceb2b..b1ede2e9b372 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1640,37 +1640,20 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, return 0; } -static bool gen8_logical_ring_get_irq(struct intel_engine_cs *ring) +static void gen8_logical_ring_enable_irq(struct intel_engine_cs *ring) { - struct drm_device *dev = ring->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - unsigned long flags; - - if (WARN_ON(!intel_irqs_enabled(dev_priv))) - return false; - - spin_lock_irqsave(&dev_priv->irq_lock, flags); - if (ring->irq_refcount++ == 0) { - I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask)); - POSTING_READ(RING_IMR(ring->mmio_base)); - } - spin_unlock_irqrestore(&dev_priv->irq_lock, flags); + struct drm_i915_private *dev_priv = ring->i915; - return true; + I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask)); + POSTING_READ(RING_IMR(ring->mmio_base)); } -static void gen8_logical_ring_put_irq(struct intel_engine_cs *ring) +static void gen8_logical_ring_disable_irq(struct intel_engine_cs *ring) { - struct drm_device *dev = ring->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - unsigned long flags; + struct drm_i915_private *dev_priv = ring->i915; - spin_lock_irqsave(&dev_priv->irq_lock, flags); - if (--ring->irq_refcount == 0) { - I915_WRITE_IMR(ring, ~ring->irq_keep_mask); - POSTING_READ(RING_IMR(ring->mmio_base)); - } - spin_unlock_irqrestore(&dev_priv->irq_lock, flags); + I915_WRITE_IMR(ring, ~ring->irq_keep_mask); + POSTING_READ(RING_IMR(ring->mmio_base)); } static int gen8_emit_flush(struct drm_i915_gem_request *request, @@ -1993,8 +1976,8 @@ static int logical_render_ring_init(struct drm_device *dev) ring->irq_seqno_barrier = gen6_seqno_barrier; ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush_render; - ring->irq_get = gen8_logical_ring_get_irq; - ring->irq_put = gen8_logical_ring_put_irq; + ring->irq_enable = gen8_logical_ring_enable_irq; + ring->irq_disable = gen8_logical_ring_disable_irq; ring->emit_bb_start = gen8_emit_bb_start; ring->dev = dev; @@ -2039,8 +2022,8 @@ static int logical_bsd_ring_init(struct drm_device *dev) ring->irq_seqno_barrier = gen6_seqno_barrier; ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush; - ring->irq_get = gen8_logical_ring_get_irq; - ring->irq_put = gen8_logical_ring_put_irq; + ring->irq_enable = gen8_logical_ring_enable_irq; + ring->irq_disable = gen8_logical_ring_disable_irq; ring->emit_bb_start = gen8_emit_bb_start; return logical_ring_init(dev, ring); @@ -2063,8 +2046,8 @@ static int logical_bsd2_ring_init(struct drm_device *dev) ring->irq_seqno_barrier = gen6_seqno_barrier; ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush; - ring->irq_get = gen8_logical_ring_get_irq; - ring->irq_put = gen8_logical_ring_put_irq; + ring->irq_enable = gen8_logical_ring_enable_irq; + ring->irq_disable = gen8_logical_ring_disable_irq; ring->emit_bb_start =
[Intel-gfx] [PATCH 023/190] drm/i915: Only apply one barrier after a breadcrumb interrupt is posted
If we flag the seqno as potentially stale upon receiving an interrupt, we can use that information to reduce the frequency that we apply the heavyweight coherent seqno read (i.e. if we wake up a chain of waiters). Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 15 ++- drivers/gpu/drm/i915/i915_irq.c | 1 + drivers/gpu/drm/i915/intel_breadcrumbs.c | 8 drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 4 files changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c2ee8efdd928..8940b8d3fa59 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3649,7 +3649,20 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) * but it is easier and safer to do it every time the waiter * is woken. */ - if (engine->irq_seqno_barrier) { + if (engine->irq_seqno_barrier && READ_ONCE(engine->irq_posted)) { + /* The ordering of irq_posted versus applying the barrier +* is crucial. The clearing of the current irq_posted must +* be visible before we perform the barrier operation, +* such that if a subsequent interrupt arrives, irq_posted +* is reasserted and our task rewoken (which causes us to +* do another __i915_request_irq_complete() immediately +* and reapply the barrier). Conversely, if the clear +* occurs after the barrier, then an interrupt that arrived +* whilst we waited on the barrier would not trigger a +* barrier on the next pass, and the read may not see the +* seqno update. +*/ + WRITE_ONCE(engine->irq_posted, false); engine->irq_seqno_barrier(engine); if (i915_gem_request_completed(req)) return true; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 627c7fb6aa9b..738edd7fbf8d 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1000,6 +1000,7 @@ static void notify_ring(struct intel_engine_cs *ring) return; trace_i915_gem_request_notify(ring); + ring->irq_posted = true; /* paired with mb() in wake_up_process() */ intel_engine_wakeup(ring); } diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index f66acf820c40..d689bd61534e 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -43,12 +43,20 @@ static void intel_breadcrumbs_fake_irq(unsigned long data) static void irq_enable(struct intel_engine_cs *engine) { + /* Enabling the IRQ may miss the generation of the interrupt, but +* we still need to force the barrier before reading the seqno, +* just in case. +*/ + engine->irq_posted = true; + WARN_ON(!engine->irq_get(engine)); } static void irq_disable(struct intel_engine_cs *engine) { engine->irq_put(engine); + + engine->irq_posted = false; } static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 28ab07b38c05..6cc8e9c5f8d6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -198,6 +198,7 @@ struct intel_engine_cs { struct i915_ctx_workarounds wa_ctx; unsigned irq_refcount; /* protected by dev_priv->irq_lock */ + boolirq_posted; u32 irq_enable_mask;/* bitmask to enable ring interrupt */ struct drm_i915_gem_request *trace_irq_req; bool __must_check (*irq_get)(struct intel_engine_cs *ring); -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 012/190] drm/i915: Prevent leaking of -EIO from i915_wait_request()
Reporting -EIO from i915_wait_request() has proven very troublematic over the years, with numerous hard-to-reproduce bugs cropping up in the corner case of where a reset occurs and the code wasn't expecting such an error. If the we reset the GPU or have detected a hang and wish to reset the GPU, the request is forcibly complete and the wait broken. Currently, we report either -EAGAIN or -EIO in order for the caller to retreat and restart the wait (if appropriate) after dropping and then reacquiring the struct_mutex (essential to allow the GPU reset to proceed). However, if we take the view that the request is complete (no further work will be done on it by the GPU because it is dead and soon to be reset), then we can proceed with the task at hand and then drop the struct_mutex allowing the reset to occur. This transfers the burden of checking whether it is safe to proceed to the caller, which in all but one instance it is safe - completely eliminating the source of all spurious -EIO. Of note, we only have two API entry points where we expect that userspace can observe an EIO. First is when submitting an execbuf, if the GPU is terminally wedged, then the operation cannot succeed and an -EIO is reported. Secondly, existing userspace uses the throttle ioctl to detect an already wedged GPU before starting using HW acceleration (or to confirm that the GPU is wedged after an error condition). So if the GPU is wedged when the user calls throttle, also report -EIO. v2: Split more carefully the change to i915_wait_request() and assorted ABI from the reset handling. v3: Add a couple of WARN_ON(EIO) to the interruptible modesetting code so that we don't start to leak EIO there in future (and break our hang resistant modesetting). Signed-off-by: Chris Wilson Cc: Daniel Vetter Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 -- drivers/gpu/drm/i915/i915_gem.c | 44 - drivers/gpu/drm/i915/i915_gem_userptr.c | 6 ++--- drivers/gpu/drm/i915/intel_display.c| 13 +- drivers/gpu/drm/i915/intel_lrc.c| 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 6 files changed, 32 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f74bca326b79..bbdb056d2a8e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2978,8 +2978,6 @@ i915_gem_find_active_request(struct intel_engine_cs *ring); bool i915_gem_retire_requests(struct drm_device *dev); void i915_gem_retire_requests_ring(struct intel_engine_cs *ring); -int __must_check i915_gem_check_wedge(struct i915_gpu_error *error, - bool interruptible); static inline u32 i915_reset_counter(struct i915_gpu_error *error) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 56069bdada85..f570990f03e0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -206,11 +206,10 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) BUG_ON(obj->madv == __I915_MADV_PURGED); ret = i915_gem_object_set_to_cpu_domain(obj, true); - if (ret) { + if (WARN_ON(ret)) { /* In the event of a disaster, abandon all caches and * hope for the best. */ - WARN_ON(ret != -EIO); obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; } @@ -1104,15 +1103,13 @@ put_rpm: return ret; } -int -i915_gem_check_wedge(struct i915_gpu_error *error, -bool interruptible) +static int +i915_gem_check_wedge(unsigned reset_counter, bool interruptible) { - if (i915_reset_in_progress_or_wedged(error)) { - /* Recovery complete, but the reset failed ... */ - if (i915_terminally_wedged(error)) - return -EIO; + if (__i915_terminally_wedged(reset_counter)) + return -EIO; + if (__i915_reset_in_progress(reset_counter)) { /* Non-interruptible callers can't handle -EAGAIN, hence return * -EIO unconditionally for these. */ if (!interruptible) @@ -1283,13 +1280,14 @@ int __i915_wait_request(struct drm_i915_gem_request *req, prepare_to_wait(&ring->irq_queue, &wait, state); /* We need to check whether any gpu reset happened in between -* the caller grabbing the seqno and now ... */ +* the request being submitted and now. If a reset has occurred, +* the request is effectively complete (we either are in the +* process of or have discarded the rendering and completely +* reset the GPU. The results of the request are lost and we +* are free to continue on with the original operation. +*/
[Intel-gfx] [PATCH 019/190] drm/i915: Separate out the seqno-barrier from engine->get_seqno
In order to simplify the next couple of patches, extract the lazy_coherency optimisation our of the engine->get_seqno() vfunc into its own callback. v2: Rename the barrier to engine->irq_seqno_barrier to try and better reflect that the barrier is only required after the user interrupt before reading the seqno (to ensure that the seqno update lands in time as we do not have strict seqno-irq ordering on all platforms). Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 6 ++--- drivers/gpu/drm/i915/i915_drv.h | 12 ++ drivers/gpu/drm/i915/i915_gpu_error.c| 2 +- drivers/gpu/drm/i915/i915_irq.c | 4 ++-- drivers/gpu/drm/i915/i915_trace.h| 2 +- drivers/gpu/drm/i915/intel_breadcrumbs.c | 4 ++-- drivers/gpu/drm/i915/intel_lrc.c | 39 drivers/gpu/drm/i915/intel_ringbuffer.c | 36 +++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 4 ++-- 9 files changed, 53 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 9396597b136d..1499e2337e5d 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -600,7 +600,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) ring->name, i915_gem_request_get_seqno(work->flip_queued_req), dev_priv->next_seqno, - ring->get_seqno(ring, true), + ring->get_seqno(ring), i915_gem_request_completed(work->flip_queued_req, true)); } else seq_printf(m, "Flip not associated with any ring\n"); @@ -734,7 +734,7 @@ static void i915_ring_seqno_info(struct seq_file *m, if (ring->get_seqno) { seq_printf(m, "Current sequence (%s): %x\n", - ring->name, ring->get_seqno(ring, false)); + ring->name, ring->get_seqno(ring)); } spin_lock(&ring->breadcrumbs.lock); @@ -1354,7 +1354,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) intel_runtime_pm_get(dev_priv); for_each_ring(ring, dev_priv, i) { - seqno[i] = ring->get_seqno(ring, false); + seqno[i] = ring->get_seqno(ring); acthd[i] = intel_ring_get_active_head(ring); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a9e8de57e848..9762aa76bb0a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2972,15 +2972,19 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2) static inline bool i915_gem_request_started(struct drm_i915_gem_request *req, bool lazy_coherency) { - u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency); - return i915_seqno_passed(seqno, req->previous_seqno); + if (!lazy_coherency && req->ring->irq_seqno_barrier) + req->ring->irq_seqno_barrier(req->ring); + return i915_seqno_passed(req->ring->get_seqno(req->ring), +req->previous_seqno); } static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, bool lazy_coherency) { - u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency); - return i915_seqno_passed(seqno, req->seqno); + if (!lazy_coherency && req->ring->irq_seqno_barrier) + req->ring->irq_seqno_barrier(req->ring); + return i915_seqno_passed(req->ring->get_seqno(req->ring), +req->seqno); } int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index f805d117f3d1..01d0206ca4dd 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -902,8 +902,8 @@ static void i915_record_ring_state(struct drm_device *dev, ering->waiting = intel_engine_has_waiter(ring); ering->instpm = I915_READ(RING_INSTPM(ring->mmio_base)); - ering->seqno = ring->get_seqno(ring, false); ering->acthd = intel_ring_get_active_head(ring); + ering->seqno = ring->get_seqno(ring); ering->start = I915_READ_START(ring); ering->head = I915_READ_HEAD(ring); ering->tail = I915_READ_TAIL(ring); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 95b997a57da8..d73669783045 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2903,7 +2903,7 @@ static int semaphore_passed(struct intel_engine_cs *ring) if (signaller->hangc
[Intel-gfx] [PATCH 036/190] drm/i915: Restore waitboost credit to the synchronous waiter
Ideally, we want to automagically have the GPU respond to the instantaneous load by reclocking itself. However, reclocking occurs relatively slowly, and to the client waiting for a result from the GPU, too late. To compensate and reduce the client latency, we allow the first wait from a client to boost the GPU clocks to maximum. This overcomes the lag in autoreclocking, at the expense of forcing the GPU clocks too high. So to offset the excessive power usage, we currently allow a client to only boost the clocks once before we detect the GPU is idle again. This works reasonably for say the first frame in a benchmark, but for many more synchronous workloads (like OpenCL) we find the GPU clocks remain too low. By noting a wait which would idle the GPU (i.e. we just waited upon the last known request), we can give that client the idle boost credit (for their next wait) without the 100ms delay required for us to detect the GPU idle state. The intention is to boost clients that are stalling in the process of feeding the GPU more work (and who in doing so let the GPU idle), without granting boost credits to clients that are throttling themselves (such as compositors). Signed-off-by: Chris Wilson Cc: "Zou, Nanhai" Cc: Jesse Barnes Reviewed-by: Jesse Barnes --- drivers/gpu/drm/i915/i915_gem.c | 16 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e9f5ca7ea835..3fea582768e9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1314,6 +1314,22 @@ complete: *timeout = 0; } + if (ret == 0 && rps && req->seqno == req->ring->last_submitted_seqno) { + /* The GPU is now idle and this client has stalled. +* Since no other client has submitted a request in the +* meantime, assume that this client is the only one +* supplying work to the GPU but is unable to keep that +* work supplied because it is waiting. Since the GPU is +* then never kept fully busy, RPS autoclocking will +* keep the clocks relatively low, causing further delays. +* Compensate by giving the synchronous client credit for +* a waitboost next time. +*/ + spin_lock(&req->i915->rps.client_lock); + list_del_init(&rps->link); + spin_unlock(&req->i915->rps.client_lock); + } + return ret; } -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 050/190] drm/i915: Refactor duplicate object vmap functions
We now have two implementations for vmapping a whole object, one for dma-buf and one for the ringbuffer. If we couple the vmapping into the obj->pages lifetime, then we can reuse an obj->vmapping for both and at the same time couple it into the shrinker. v2: Mark the failable kmalloc() as __GFP_NOWARN (vsyrjala) v3: Call unpin_vmap from the right dmabuf unmapper Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 12 +--- drivers/gpu/drm/i915/i915_gem.c | 41 + drivers/gpu/drm/i915/i915_gem_dmabuf.c | 53 - drivers/gpu/drm/i915/intel_ringbuffer.c | 53 ++--- 4 files changed, 71 insertions(+), 88 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 49a151126b2a..56cf2ffc1eac 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2114,10 +2114,7 @@ struct drm_i915_gem_object { struct scatterlist *sg; int last; } get_page; - - /* prime dma-buf support */ - void *dma_buf_vmapping; - int vmapping_count; + void *vmapping; /** Breadcrumb of last rendering to the buffer. * There can only be one writer, but we allow for multiple readers. @@ -2774,12 +2771,19 @@ static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) BUG_ON(obj->pages == NULL); obj->pages_pin_count++; } + static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) { BUG_ON(obj->pages_pin_count == 0); obj->pages_pin_count--; } +void *__must_check i915_gem_object_pin_vmap(struct drm_i915_gem_object *obj); +static inline void i915_gem_object_unpin_vmap(struct drm_i915_gem_object *obj) +{ + i915_gem_object_unpin_pages(obj); +} + int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); int i915_gem_object_sync(struct drm_i915_gem_object *obj, struct intel_engine_cs *to, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9df00e694cd9..2912e8714f5b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1854,6 +1854,11 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj) ops->put_pages(obj); obj->pages = NULL; + if (obj->vmapping) { + vunmap(obj->vmapping); + obj->vmapping = NULL; + } + i915_gem_object_invalidate(obj); return 0; @@ -2019,6 +2024,42 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj) return 0; } +void *i915_gem_object_pin_vmap(struct drm_i915_gem_object *obj) +{ + int ret; + + ret = i915_gem_object_get_pages(obj); + if (ret) + return ERR_PTR(ret); + + i915_gem_object_pin_pages(obj); + + if (obj->vmapping == NULL) { + struct sg_page_iter sg_iter; + struct page **pages; + int n; + + n = obj->base.size >> PAGE_SHIFT; + pages = kmalloc(n*sizeof(*pages), GFP_TEMPORARY | __GFP_NOWARN); + if (pages == NULL) + pages = drm_malloc_ab(n, sizeof(*pages)); + if (pages != NULL) { + n = 0; + for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) + pages[n++] = sg_page_iter_page(&sg_iter); + + obj->vmapping = vmap(pages, n, 0, PAGE_KERNEL); + drm_free_large(pages); + } + if (obj->vmapping == NULL) { + i915_gem_object_unpin_pages(obj); + return ERR_PTR(-ENOMEM); + } + } + + return obj->vmapping; +} + void i915_vma_move_to_active(struct i915_vma *vma, struct drm_i915_gem_request *req) { diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index e9c2bfd85b52..8894648acee0 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -95,14 +95,12 @@ static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment, { struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf); - mutex_lock(&obj->base.dev->struct_mutex); - dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, dir); sg_free_table(sg); kfree(sg); + mutex_lock(&obj->base.dev->struct_mutex); i915_gem_object_unpin_pages(obj); - mutex_unlock(&obj->base.dev->struct_mutex); } @@ -110,51 +108,17 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf) { struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); struct drm_device *dev = obj->base.dev; - struct sg_page_iter sg_iter; - struct page **pages; - int ret, i;
[Intel-gfx] [PATCH 048/190] drm/i915: Disable waitboosting for fence_wait()
We want to restrict waitboosting to known process contexts, where we can track which clients are receiving waitboosts and prevent excessive power wasting. For fence_wait() we do not have any client tracking and so that leaves it open to abuse. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_request.c | 6 +++--- drivers/gpu/drm/i915/i915_gem_request.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index a796dbd1b0e4..01893d847dfd 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -68,7 +68,7 @@ static signed long i915_fence_wait(struct fence *fence, ret = __i915_wait_request(to_i915_request(fence), interruptible, timeout, - NULL); + NO_WAITBOOST); if (ret == -ETIME) return 0; @@ -621,7 +621,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, * forcing the clocks too high for the whole system, we only allow * each client to waitboost once in a busy period. */ - if (INTEL_INFO(req->i915)->gen >= 6) + if (!IS_ERR(rps) && INTEL_INFO(req->i915)->gen >= 6) gen6_rps_boost(req->i915, rps, req->emitted_jiffies); intel_wait_init(&wait, req->fence.seqno); @@ -691,7 +691,7 @@ complete: *timeout = 0; } - if (ret == 0 && rps && + if (ret == 0 && !IS_ERR_OR_NULL(rps) && req->fence.seqno == req->ring->last_submitted_seqno) { /* The GPU is now idle and this client has stalled. * Since no other client has submitted a request in the diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 0ab14fd0fce0..6b3de827929a 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -179,6 +179,7 @@ void __i915_add_request(struct drm_i915_gem_request *req, __i915_add_request(req, NULL, false) struct intel_rps_client; +#define NO_WAITBOOST ERR_PTR(-1) int __i915_wait_request(struct drm_i915_gem_request *req, bool interruptible, -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 015/190] drm/i915: Remove the dedicated hangcheck workqueue
The queue only ever contains at most one item and has no special flags. It is just a very simple wrapper around the system-wq - a complication with no benefits. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_dma.c | 11 --- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_irq.c | 6 +++--- 3 files changed, 3 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 44a896ce32e6..9e49e304dd8e 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1016,14 +1016,6 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) goto out_freewq; } - dev_priv->gpu_error.hangcheck_wq = - alloc_ordered_workqueue("i915-hangcheck", 0); - if (dev_priv->gpu_error.hangcheck_wq == NULL) { - DRM_ERROR("Failed to create our hangcheck workqueue.\n"); - ret = -ENOMEM; - goto out_freedpwq; - } - intel_irq_init(dev_priv); intel_uncore_sanitize(dev); @@ -1105,8 +1097,6 @@ out_gem_unload: intel_teardown_gmbus(dev); intel_teardown_mchbar(dev); pm_qos_remove_request(&dev_priv->pm_qos); - destroy_workqueue(dev_priv->gpu_error.hangcheck_wq); -out_freedpwq: destroy_workqueue(dev_priv->hotplug.dp_wq); out_freewq: destroy_workqueue(dev_priv->wq); @@ -1209,7 +1199,6 @@ int i915_driver_unload(struct drm_device *dev) destroy_workqueue(dev_priv->hotplug.dp_wq); destroy_workqueue(dev_priv->wq); - destroy_workqueue(dev_priv->gpu_error.hangcheck_wq); pm_qos_remove_request(&dev_priv->pm_qos); i915_global_gtt_cleanup(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d9d411919779..188bed933f11 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1330,7 +1330,6 @@ struct i915_gpu_error { /* Hang gpu twice in this window and your context gets banned */ #define DRM_I915_CTX_BAN_PERIOD DIV_ROUND_UP(8*DRM_I915_HANGCHECK_PERIOD, 1000) - struct workqueue_struct *hangcheck_wq; struct delayed_work hangcheck_work; /* For reset and error_state handling. */ diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 94f5f4e99446..8939438d747d 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3175,7 +3175,7 @@ out: void i915_queue_hangcheck(struct drm_i915_private *dev_priv) { - struct i915_gpu_error *e = &dev_priv->gpu_error; + unsigned long delay; if (!i915.enable_hangcheck) return; @@ -3185,8 +3185,8 @@ void i915_queue_hangcheck(struct drm_i915_private *dev_priv) * we will ignore a hung ring if a second ring is kept busy. */ - queue_delayed_work(e->hangcheck_wq, &e->hangcheck_work, - round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES)); + delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES); + schedule_delayed_work(&dev_priv->gpu_error.hangcheck_work, delay); } static void ibx_irq_reset(struct drm_device *dev) -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 045/190] drm/i915: Move releasing of the GEM request from free to retire/cancel
If we move the release of the GEM request (i.e. decoupling it from the various lists used for client and context tracking) after it is complete (either by the GPU retiring the request, or by the caller cancelling the request), we can remove the requirement that the final unreference of the GEM request need to be under the struct_mutex. v2: Execlists as always is badly asymetric and year old patches still haven't landed to fix it up. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 4 +-- drivers/gpu/drm/i915/i915_gem_request.c | 50 ++-- drivers/gpu/drm/i915/i915_gem_request.h | 14 - drivers/gpu/drm/i915/intel_breadcrumbs.c | 2 +- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 6 ++-- drivers/gpu/drm/i915/intel_pm.c | 2 +- 7 files changed, 30 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 68a25617ca7a..6d8d65304abf 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2502,7 +2502,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ret = __i915_wait_request(req[i], true, args->timeout_ns > 0 ? &args->timeout_ns : NULL, to_rps_client(file)); - i915_gem_request_unreference__unlocked(req[i]); + i915_gem_request_unreference(req[i]); } return ret; @@ -3505,7 +3505,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) return 0; ret = __i915_wait_request(target, true, NULL, NULL); - i915_gem_request_unreference__unlocked(target); + i915_gem_request_unreference(target); return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index b4ede6dd7b20..1c4f4d83a3c2 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -184,13 +184,6 @@ err: return ret; } -void i915_gem_request_cancel(struct drm_i915_gem_request *req) -{ - intel_ring_reserved_space_cancel(req->ringbuf); - - i915_gem_request_unreference(req); -} - int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, struct drm_file *file) { @@ -235,9 +228,28 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) request->pid = NULL; } +static void __i915_gem_request_release(struct drm_i915_gem_request *request) +{ + i915_gem_request_remove_from_client(request); + + i915_gem_context_unreference(request->ctx); + i915_gem_request_unreference(request); +} + +void i915_gem_request_cancel(struct drm_i915_gem_request *req) +{ + intel_ring_reserved_space_cancel(req->ringbuf); + if (i915.enable_execlists) { + if (req->ctx != req->ring->default_context) + intel_lr_context_unpin(req); + } + __i915_gem_request_release(req); +} + static void i915_gem_request_retire(struct drm_i915_gem_request *request) { trace_i915_gem_request_retire(request); + list_del_init(&request->list); /* We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position @@ -248,11 +260,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) * completion order. */ request->ringbuf->last_retired_head = request->postfix; - - list_del_init(&request->list); - i915_gem_request_remove_from_client(request); - - i915_gem_request_unreference(request); + __i915_gem_request_release(request); } void @@ -639,21 +647,7 @@ i915_wait_request(struct drm_i915_gem_request *req) void i915_gem_request_free(struct kref *req_ref) { - struct drm_i915_gem_request *req = container_of(req_ref, -typeof(*req), ref); - struct intel_context *ctx = req->ctx; - - if (req->file_priv) - i915_gem_request_remove_from_client(req); - - if (ctx) { - if (i915.enable_execlists) { - if (ctx != req->ring->default_context) - intel_lr_context_unpin(req); - } - - i915_gem_context_unreference(ctx); - } - + struct drm_i915_gem_request *req = + container_of(req_ref, typeof(*req), ref); kmem_cache_free(req->i915->requests, req); } diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index d46f22f30b0a..af1b825fce50 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -154,23 +154,9 @@ i915_gem_request_reference(struct drm_i915_g
[Intel-gfx] [PATCH 059/190] drm/i915: Rename request->ringbuf to request->ring
Now that we have disambuigated ring and engine, we can use the clearer and more consistent name for the intel_ringbuffer pointer in the request. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c| 8 +- drivers/gpu/drm/i915/i915_gem_context.c| 2 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +- drivers/gpu/drm/i915/i915_gem_gtt.c| 6 +- drivers/gpu/drm/i915/i915_gem_request.c| 20 ++-- drivers/gpu/drm/i915/i915_gem_request.h| 2 +- drivers/gpu/drm/i915/i915_gpu_error.c | 31 +++--- drivers/gpu/drm/i915/i915_guc_submission.c | 4 +- drivers/gpu/drm/i915/intel_display.c | 10 +- drivers/gpu/drm/i915/intel_lrc.c | 152 ++--- drivers/gpu/drm/i915/intel_mocs.c | 34 +++ drivers/gpu/drm/i915/intel_overlay.c | 42 drivers/gpu/drm/i915/intel_ringbuffer.c| 86 13 files changed, 198 insertions(+), 203 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6622c9bb3af8..430c439ece26 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4083,11 +4083,11 @@ int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice) * at initialization time. */ for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) { - intel_ring_emit(req->ringbuf, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(req->ringbuf, GEN7_L3LOG(slice, i)); - intel_ring_emit(req->ringbuf, remap_info[i]); + intel_ring_emit(req->ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit_reg(req->ring, GEN7_L3LOG(slice, i)); + intel_ring_emit(req->ring, remap_info[i]); } - intel_ring_advance(req->ringbuf); + intel_ring_advance(req->ring); return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index dece033cf604..5b4e77a80c19 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -519,7 +519,7 @@ i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id) static inline int mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; u32 flags = hw_flags | MI_MM_SPACE_GTT; const int num_rings = /* Use an extended w/a on ivb+ if signalling from other rings */ diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index e7df91f9a51f..a0f5a997c2f2 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1148,7 +1148,7 @@ i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params) static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; int ret, i; if (!IS_GEN7(req->i915) || req->engine->id != RCS) { @@ -1229,7 +1229,7 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, struct drm_i915_gem_execbuffer2 *args, struct list_head *vmas) { - struct intel_ringbuffer *ring = params->request->ringbuf; + struct intel_ringbuffer *ring = params->request->ring; struct drm_i915_private *dev_priv = params->request->i915; u64 exec_start, exec_len; int instp_mode; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index cb7cb59d4c4a..38c109cda904 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -656,7 +656,7 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req, unsigned entry, dma_addr_t addr) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; int ret; BUG_ON(entry >= 4); @@ -1648,7 +1648,7 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; int ret; /* NB: TLBs must be flushed and invalidated before a switch */ @@ -1686,7 +1686,7 @@ static int vgpu_mm_switch(struct i915_hw_ppgtt *ppgtt, static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; int ret; /* NB: TLBs must be flushed and invalidated before a switch */ diff --git a/drivers/gpu/drm/i915/i915_gem_req
[Intel-gfx] [PATCH 040/190] drm/i915: Record the ringbuffer associated with the request
The request tells us where to read the ringbuf from, so use that information to simplify the error capture. If no request was active at the time of the hang, the ring is idle and there is no information inside the ring pertaining to the hang. Note carefully that this will reduce the amount of information stored in the error state - any ring without an active request will not be recorded. Signed-off-by: Chris Wilson Reviewed-by: Dave Gordon --- drivers/gpu/drm/i915/i915_gpu_error.c | 28 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 3e137fc701cf..93da2c7581f6 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -995,7 +995,6 @@ static void i915_gem_record_rings(struct drm_device *dev, for (i = 0; i < I915_NUM_RINGS; i++) { struct intel_engine_cs *ring = &dev_priv->ring[i]; - struct intel_ringbuffer *rbuf; error->ring[i].pid = -1; @@ -1009,6 +1008,7 @@ static void i915_gem_record_rings(struct drm_device *dev, request = i915_gem_find_active_request(ring); if (request) { struct i915_address_space *vm; + struct intel_ringbuffer *rb; vm = request->ctx && request->ctx->ppgtt ? &request->ctx->ppgtt->base : @@ -1039,26 +1039,14 @@ static void i915_gem_record_rings(struct drm_device *dev, } rcu_read_unlock(); } - } - if (i915.enable_execlists) { - /* TODO: This is only a small fix to keep basic error -* capture working, but we need to add more information -* for it to be useful (e.g. dump the context being -* executed). -*/ - if (request) - rbuf = request->ctx->engine[ring->id].ringbuf; - else - rbuf = ring->default_context->engine[ring->id].ringbuf; - } else - rbuf = ring->buffer; - - error->ring[i].cpu_ring_head = rbuf->head; - error->ring[i].cpu_ring_tail = rbuf->tail; - - error->ring[i].ringbuffer = - i915_error_ggtt_object_create(dev_priv, rbuf->obj); + rb = request->ringbuf; + error->ring[i].cpu_ring_head = rb->head; + error->ring[i].cpu_ring_tail = rb->tail; + error->ring[i].ringbuffer = + i915_error_ggtt_object_create(dev_priv, + rb->obj); + } error->ring[i].hws_page = i915_error_ggtt_object_create(dev_priv, ring->status_page.obj); -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 043/190] drm/i915: Skip capturing an error state if we already have one
As we only ever keep the first error state around, we can avoid some work that can be quite intrusive if we don't record the error the second time around. This does move the race whereby the user could discard one error state as the second is being captured, but that race exists in the current code and we hope that recapturing error state is only done for debugging. Note that as we discard the error state for simulated errors, igt that exercise error capture continue to function. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gpu_error.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 4f17d6847569..86f582115313 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1312,6 +1312,9 @@ void i915_capture_error_state(struct drm_device *dev, bool wedged, struct drm_i915_error_state *error; unsigned long flags; + if (READ_ONCE(dev_priv->gpu_error.first_error)) + return; + /* Account for pipe specific data like PIPE*STAT */ error = kzalloc(sizeof(*error), GFP_ATOMIC); if (!error) { -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 058/190] drm/i915: Rename request->ring to request->engine
In order to disambiguate between the pointer to the intel_engine_cs (called ring) and the intel_ringbuffer (called ringbuf), rename s/ring/engine/. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 11 +-- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 32 +++ drivers/gpu/drm/i915/i915_gem_context.c | 70 +++--- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 8 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 47 +- drivers/gpu/drm/i915/i915_gem_render_state.c | 18 ++-- drivers/gpu/drm/i915/i915_gem_request.c | 53 --- drivers/gpu/drm/i915/i915_gem_request.h | 10 +- drivers/gpu/drm/i915/i915_gpu_error.c| 3 +- drivers/gpu/drm/i915/i915_guc_submission.c | 8 +- drivers/gpu/drm/i915/i915_trace.h| 32 +++ drivers/gpu/drm/i915/intel_breadcrumbs.c | 2 +- drivers/gpu/drm/i915/intel_display.c | 10 +- drivers/gpu/drm/i915/intel_lrc.c | 134 +-- drivers/gpu/drm/i915/intel_mocs.c| 13 ++- drivers/gpu/drm/i915/intel_ringbuffer.c | 62 ++--- 17 files changed, 240 insertions(+), 275 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 387ae77d3c29..018076c89247 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -185,8 +185,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_printf(m, " (%s mappable)", s); } if (obj->last_write_req != NULL) - seq_printf(m, " (%s)", - i915_gem_request_get_ring(obj->last_write_req)->name); + seq_printf(m, " (%s)", obj->last_write_req->engine->name); if (obj->frontbuffer_bits) seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits); } @@ -593,14 +592,14 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) pipe, plane); } if (work->flip_queued_req) { - struct intel_engine_cs *ring = - i915_gem_request_get_ring(work->flip_queued_req); + struct intel_engine_cs *engine = + work->flip_queued_req->engine; seq_printf(m, "Flip queued on %s at seqno %x, next seqno %x [current breadcrumb %x], completed? %d\n", - ring->name, + engine->name, i915_gem_request_get_seqno(work->flip_queued_req), dev_priv->next_seqno, - intel_ring_get_seqno(ring), + intel_ring_get_seqno(engine), i915_gem_request_completed(work->flip_queued_req)); } else seq_printf(m, "Flip not associated with any ring\n"); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 58e9e5e50769..baede4517c70 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3410,7 +3410,7 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) } static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = req->ring; + struct intel_engine_cs *engine = req->engine; /* Before we do the heavier coherent read of the seqno, * check the value (hopefully) in the CPU cacheline. diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 247731672cb1..6622c9bb3af8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1122,7 +1122,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, if (ret) return ret; - i = obj->last_write_req->ring->id; + i = obj->last_write_req->engine->id; if (obj->last_read_req[i] == obj->last_write_req) i915_gem_object_retire__read(obj, i); else @@ -1149,7 +1149,7 @@ static void i915_gem_object_retire_request(struct drm_i915_gem_object *obj, struct drm_i915_gem_request *req) { - int ring = req->ring->id; + int ring = req->engine->id; if (obj->last_read_req[ring] == req) i915_gem_object_retire__read(obj, ring); @@ -2062,17 +2062,15 @@ void i915_vma_move_to_active(struct i915_vma *vma, struct drm_i915_gem_request
[Intel-gfx] [PATCH 055/190] drm/i915: Unify intel_logical_ring_emit and intel_ring_emit
Both perform the same actions with more or less indirection, so just unify the code. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c| 2 +- drivers/gpu/drm/i915/i915_gem_context.c| 8 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 34 - drivers/gpu/drm/i915/i915_gem_gtt.c| 26 +++ drivers/gpu/drm/i915/intel_display.c | 26 +++ drivers/gpu/drm/i915/intel_lrc.c | 114 ++--- drivers/gpu/drm/i915/intel_lrc.h | 26 --- drivers/gpu/drm/i915/intel_mocs.c | 30 drivers/gpu/drm/i915/intel_overlay.c | 42 +-- drivers/gpu/drm/i915/intel_ringbuffer.c| 101 - drivers/gpu/drm/i915/intel_ringbuffer.h| 21 ++ 11 files changed, 194 insertions(+), 236 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c2a1ec8abc11..247731672cb1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4068,7 +4068,7 @@ err: int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice) { - struct intel_engine_cs *ring = req->ring; + struct intel_ringbuffer *ring = req->ringbuf; struct drm_i915_private *dev_priv = req->i915; u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; int i, ret; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 3e3b4bf3fed1..d58de7e084dc 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -519,7 +519,7 @@ i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id) static inline int mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) { - struct intel_engine_cs *ring = req->ring; + struct intel_ringbuffer *ring = req->ringbuf; u32 flags = hw_flags | MI_MM_SPACE_GTT; const int num_rings = /* Use an extended w/a on ivb+ if signalling from other rings */ @@ -534,7 +534,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) * itlb_before_ctx_switch. */ if (IS_GEN6(req->i915)) { - ret = ring->flush(req, I915_GEM_GPU_DOMAINS, 0); + ret = req->ring->flush(req, I915_GEM_GPU_DOMAINS, 0); if (ret) return ret; } @@ -562,7 +562,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings)); for_each_ring(signaller, req->i915, i) { - if (signaller == ring) + if (signaller == req->ring) continue; intel_ring_emit_reg(ring, RING_PSMI_CTL(signaller->mmio_base)); @@ -587,7 +587,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings)); for_each_ring(signaller, req->i915, i) { - if (signaller == ring) + if (signaller == req->ring) continue; intel_ring_emit_reg(ring, RING_PSMI_CTL(signaller->mmio_base)); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 78b462956c78..603a247ac333 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1146,14 +1146,12 @@ i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params) } static int -i915_reset_gen7_sol_offsets(struct drm_device *dev, - struct drm_i915_gem_request *req) +i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) { - struct intel_engine_cs *ring = req->ring; - struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_ringbuffer *ring = req->ringbuf; int ret, i; - if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) { + if (!IS_GEN7(req->i915) || req->ring->id != RCS) { DRM_DEBUG("sol reset is gen7/rcs only\n"); return -EINVAL; } @@ -1231,9 +1229,8 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, struct drm_i915_gem_execbuffer2 *args, struct list_head *vmas) { - struct drm_device *dev = params->dev; - struct intel_engine_cs *ring = params->ring; - struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_ringbuffer *ring = params->request->ringbuf; + struct drm_i915_private *dev_priv = params->request->i915; u64 exec_start, exec_len; int instp_mode; u32 instp_mask; @@ -1247,34 +1244,31 @@ i915_gem_ringb
[Intel-gfx] [PATCH 037/190] drm/i915: Add background commentary to "waitboosting"
Describe the intent of boosting the GPU frequency to maximum before waiting on the GPU. RPS waitboosting was introduced with commit b29c19b645287f7062e17d70fa4e9781a01a5d88 Author: Chris Wilson Date: Wed Sep 25 17:34:56 2013 +0100 drm/i915: Boost RPS frequency for CPU stalls but lacked a concise comment in the code to explain itself. Signed-off-by: Chris Wilson Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 16 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3fea582768e9..3948e85eaa48 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1244,6 +1244,22 @@ int __i915_wait_request(struct drm_i915_gem_request *req, } trace_i915_gem_request_wait_begin(req); + + /* This client is about to stall waiting for the GPU. In many cases +* this is undesirable and limits the throughput of the system, as +* many clients cannot continue processing user input/output whilst +* blocked. RPS autotuning may take tens of milliseconds to respond +* to the GPU load and thus incurs additional latency for the client. +* We can circumvent that by promoting the GPU frequency to maximum +* before we wait. This makes the GPU throttle up much more quickly +* (good for benchmarks and user experience, e.g. window animations), +* but at a cost of spending more power processing the workload +* (bad for battery). Not all clients even want their results +* immediately and for them we should just let the GPU select its own +* frequency to maximise efficiency. To prevent a single client from +* forcing the clocks too high for the whole system, we only allow +* each client to waitboost once in a busy period. +*/ if (INTEL_INFO(req->i915)->gen >= 6) gen6_rps_boost(req->i915, rps, req->emitted_jiffies); -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 047/190] drm/i915: Rename request reference/unreference to get/put
Now that we derive requests from struct fence, swap over to its nomenclature for references. It's shorter and more idiomatic across the kernel. s/i915_gem_request_reference/i915_gem_request_get/ s/i915_gem_request_unreference/i915_gem_request_put/ Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 14 +++--- drivers/gpu/drm/i915/i915_gem_request.c | 2 +- drivers/gpu/drm/i915/i915_gem_request.h | 8 drivers/gpu/drm/i915/intel_breadcrumbs.c | 4 ++-- drivers/gpu/drm/i915/intel_display.c | 4 ++-- drivers/gpu/drm/i915/intel_lrc.c | 4 ++-- drivers/gpu/drm/i915/intel_pm.c | 5 ++--- 7 files changed, 20 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6d8d65304abf..fd61e722b595 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1185,7 +1185,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, if (req == NULL) return 0; - requests[n++] = i915_gem_request_reference(req); + requests[n++] = i915_gem_request_get(req); } else { for (i = 0; i < I915_NUM_RINGS; i++) { struct drm_i915_gem_request *req; @@ -1194,7 +1194,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, if (req == NULL) continue; - requests[n++] = i915_gem_request_reference(req); + requests[n++] = i915_gem_request_get(req); } } @@ -1207,7 +1207,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, for (i = 0; i < n; i++) { if (ret == 0) i915_gem_object_retire_request(obj, requests[i]); - i915_gem_request_unreference(requests[i]); + i915_gem_request_put(requests[i]); } return ret; @@ -2492,7 +2492,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (obj->last_read_req[i] == NULL) continue; - req[n++] = i915_gem_request_reference(obj->last_read_req[i]); + req[n++] = i915_gem_request_get(obj->last_read_req[i]); } mutex_unlock(&dev->struct_mutex); @@ -2502,7 +2502,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ret = __i915_wait_request(req[i], true, args->timeout_ns > 0 ? &args->timeout_ns : NULL, to_rps_client(file)); - i915_gem_request_unreference(req[i]); + i915_gem_request_put(req[i]); } return ret; @@ -3498,14 +3498,14 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) target = request; } if (target) - i915_gem_request_reference(target); + i915_gem_request_get(target); spin_unlock(&file_priv->mm.lock); if (target == NULL) return 0; ret = __i915_wait_request(target, true, NULL, NULL); - i915_gem_request_unreference(target); + i915_gem_request_put(target); return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index e366ca0dcd99..a796dbd1b0e4 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -326,7 +326,7 @@ static void __i915_gem_request_release(struct drm_i915_gem_request *request) i915_gem_request_remove_from_client(request); i915_gem_context_unreference(request->ctx); - i915_gem_request_unreference(request); + i915_gem_request_put(request); } void i915_gem_request_cancel(struct drm_i915_gem_request *req) diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index b55d0b7c7f2a..0ab14fd0fce0 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -147,13 +147,13 @@ to_request(struct fence *fence) } static inline struct drm_i915_gem_request * -i915_gem_request_reference(struct drm_i915_gem_request *req) +i915_gem_request_get(struct drm_i915_gem_request *req) { return to_request(fence_get(&req->fence)); } static inline void -i915_gem_request_unreference(struct drm_i915_gem_request *req) +i915_gem_request_put(struct drm_i915_gem_request *req) { fence_put(&req->fence); } @@ -162,10 +162,10 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, struct drm_i915_gem_request *src) { if (src) - i915_gem_request_reference(src); + i91
[Intel-gfx] [PATCH 008/190] drm/i915: Simplify checking of GPU reset_counter in display pageflips
If we, when we store the reset_counter for the operation, we ensure that it is not in a wedged or in the middle of a reset, we can then assert that if any reset occurs the reset_counter must change. Later we can just compare the operation's reset epoch against the current counter to see if we need to abort the operation (to handle the hang). Signed-off-by: Chris Wilson Cc: Daniel Vetter Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 16 +--- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0933bdbaa935..183c05bdb220 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3288,14 +3288,12 @@ void intel_finish_reset(struct drm_device *dev) static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); unsigned reset_counter; bool pending; - reset_counter = i915_reset_counter(&dev_priv->gpu_error); - if (intel_crtc->reset_counter != reset_counter || - __i915_reset_in_progress_or_wedged(reset_counter)) + reset_counter = i915_reset_counter(&to_i915(dev)->gpu_error); + if (intel_crtc->reset_counter != reset_counter) return false; spin_lock_irq(&dev->event_lock); @@ -11011,8 +11009,7 @@ static bool page_flip_finished(struct intel_crtc *crtc) unsigned reset_counter; reset_counter = i915_reset_counter(&dev_priv->gpu_error); - if (crtc->reset_counter != reset_counter || - __i915_reset_in_progress_or_wedged(reset_counter)) + if (crtc->reset_counter != reset_counter) return true; /* @@ -11668,8 +11665,13 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, if (ret) goto cleanup; - atomic_inc(&intel_crtc->unpin_work_count); intel_crtc->reset_counter = i915_reset_counter(&dev_priv->gpu_error); + if (__i915_reset_in_progress_or_wedged(intel_crtc->reset_counter)) { + ret = -EIO; + goto cleanup; + } + + atomic_inc(&intel_crtc->unpin_work_count); if (INTEL_INFO(dev)->gen >= 5 || IS_G4X(dev)) work->flip_count = I915_READ(PIPE_FLIPCOUNT_G4X(pipe)) + 1; -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 021/190] drm/i915: Use HWS for seqno tracking everywhere
By using the same address for storing the HWS on every platform, we can remove the platform specific vfuncs and reduce the get-seqno routine to a single read of a cached memory location. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 10 ++-- drivers/gpu/drm/i915/i915_drv.h | 4 +- drivers/gpu/drm/i915/i915_gpu_error.c| 2 +- drivers/gpu/drm/i915/i915_irq.c | 4 +- drivers/gpu/drm/i915/i915_trace.h| 2 +- drivers/gpu/drm/i915/intel_breadcrumbs.c | 4 +- drivers/gpu/drm/i915/intel_lrc.c | 46 ++--- drivers/gpu/drm/i915/intel_ringbuffer.c | 86 drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +-- 9 files changed, 43 insertions(+), 122 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index d09e48455dcb..5a706c700684 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -600,7 +600,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) ring->name, i915_gem_request_get_seqno(work->flip_queued_req), dev_priv->next_seqno, - ring->get_seqno(ring), + intel_ring_get_seqno(ring), i915_gem_request_completed(work->flip_queued_req)); } else seq_printf(m, "Flip not associated with any ring\n"); @@ -732,10 +732,8 @@ static void i915_ring_seqno_info(struct seq_file *m, { struct rb_node *rb; - if (ring->get_seqno) { - seq_printf(m, "Current sequence (%s): %x\n", - ring->name, ring->get_seqno(ring)); - } + seq_printf(m, "Current sequence (%s): %x\n", + ring->name, intel_ring_get_seqno(ring)); spin_lock(&ring->breadcrumbs.lock); for (rb = rb_first(&ring->breadcrumbs.waiters); @@ -1355,7 +1353,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) for_each_ring(ring, dev_priv, i) { acthd[i] = intel_ring_get_active_head(ring); - seqno[i] = ring->get_seqno(ring); + seqno[i] = intel_ring_get_seqno(ring); } i915_get_extra_instdone(dev, instdone); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 44d46018ee13..fcedcbc50834 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2971,13 +2971,13 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2) static inline bool i915_gem_request_started(struct drm_i915_gem_request *req) { - return i915_seqno_passed(req->ring->get_seqno(req->ring), + return i915_seqno_passed(intel_ring_get_seqno(req->ring), req->previous_seqno); } static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req) { - return i915_seqno_passed(req->ring->get_seqno(req->ring), + return i915_seqno_passed(intel_ring_get_seqno(req->ring), req->seqno); } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 01d0206ca4dd..3e137fc701cf 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -903,7 +903,7 @@ static void i915_record_ring_state(struct drm_device *dev, ering->waiting = intel_engine_has_waiter(ring); ering->instpm = I915_READ(RING_INSTPM(ring->mmio_base)); ering->acthd = intel_ring_get_active_head(ring); - ering->seqno = ring->get_seqno(ring); + ering->seqno = intel_ring_get_seqno(ring); ering->start = I915_READ_START(ring); ering->head = I915_READ_HEAD(ring); ering->tail = I915_READ_TAIL(ring); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index d73669783045..627c7fb6aa9b 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2903,7 +2903,7 @@ static int semaphore_passed(struct intel_engine_cs *ring) if (signaller->hangcheck.deadlock >= I915_NUM_RINGS) return -1; - if (i915_seqno_passed(signaller->get_seqno(signaller), seqno)) + if (i915_seqno_passed(intel_ring_get_seqno(signaller), seqno)) return 1; /* cursory check for an unkickable deadlock */ @@ -3068,7 +3068,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) semaphore_clear_deadlocks(dev_priv); acthd = intel_ring_get_active_head(ring); - seqno = ring->get_seqno(ring); + seqno = intel_ring_get_seqno(ring); if (ring->hangcheck.seqno == seqno) { if (ring_idle(ring, seqno)) { d
[Intel-gfx] [PATCH 051/190] drm,i915: Introduce drm_malloc_gfp()
I have instances where I want to use drm_malloc_ab() but with a custom gfp mask. And with those, where I want a temporary allocation, I want to try a high-order kmalloc() before using a vmalloc(). So refactor my usage into drm_malloc_gfp(). Signed-off-by: Chris Wilson Cc: dri-de...@lists.freedesktop.org Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Acked-by: Dave Airlie --- drivers/gpu/drm/i915/i915_gem.c| 4 +--- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 8 +++- drivers/gpu/drm/i915/i915_gem_gtt.c| 5 +++-- drivers/gpu/drm/i915/i915_gem_userptr.c| 15 --- include/drm/drm_mem_util.h | 19 +++ 5 files changed, 30 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2912e8714f5b..a4f9c5bbb883 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2040,9 +2040,7 @@ void *i915_gem_object_pin_vmap(struct drm_i915_gem_object *obj) int n; n = obj->base.size >> PAGE_SHIFT; - pages = kmalloc(n*sizeof(*pages), GFP_TEMPORARY | __GFP_NOWARN); - if (pages == NULL) - pages = drm_malloc_ab(n, sizeof(*pages)); + pages = drm_malloc_gfp(n, sizeof(*pages), GFP_TEMPORARY); if (pages != NULL) { n = 0; for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index da1c6fe5b40e..dfabeee2ff0b 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1766,11 +1766,9 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, return -EINVAL; } - exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count, -GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); - if (exec2_list == NULL) - exec2_list = drm_malloc_ab(sizeof(*exec2_list), - args->buffer_count); + exec2_list = drm_malloc_gfp(sizeof(*exec2_list), + args->buffer_count, + GFP_TEMPORARY); if (exec2_list == NULL) { DRM_DEBUG("Failed to allocate exec list for %d buffers\n", args->buffer_count); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 56f4f2e58d53..224fe89baca3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3376,8 +3376,9 @@ intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view, int ret = -ENOMEM; /* Allocate a temporary list of source pages for random access. */ - page_addr_list = drm_malloc_ab(obj->base.size / PAGE_SIZE, - sizeof(dma_addr_t)); + page_addr_list = drm_malloc_gfp(obj->base.size / PAGE_SIZE, + sizeof(dma_addr_t), + GFP_TEMPORARY); if (!page_addr_list) return ERR_PTR(ret); diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 1a5f89dba4af..251e81c4b0ea 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -573,10 +573,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) ret = -ENOMEM; pinned = 0; - pvec = kmalloc(npages*sizeof(struct page *), - GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); - if (pvec == NULL) - pvec = drm_malloc_ab(npages, sizeof(struct page *)); + pvec = drm_malloc_gfp(npages, sizeof(struct page *), GFP_TEMPORARY); if (pvec != NULL) { struct mm_struct *mm = obj->userptr.mm->mm; @@ -713,14 +710,10 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) pvec = NULL; pinned = 0; if (obj->userptr.mm->mm == current->mm) { - pvec = kmalloc(num_pages*sizeof(struct page *), - GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); + pvec = drm_malloc_gfp(num_pages, sizeof(struct page *), GFP_TEMPORARY); if (pvec == NULL) { - pvec = drm_malloc_ab(num_pages, sizeof(struct page *)); - if (pvec == NULL) { - __i915_gem_userptr_set_active(obj, false); - return -ENOMEM; - } + __i915_gem_userptr_set_active(obj, false); + return -ENOMEM; } pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages, diff --git a/include/drm/drm_mem_util.h b/include/drm/drm_mem_u
[Intel-gfx] [PATCH 054/190] drm/i915: Use the new rq->i915 field where appropriate
In a few frequent cases, having a direct pointer to the drm_i915_private from the request is very useful. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c| 7 +++--- drivers/gpu/drm/i915/i915_gem_context.c| 21 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +-- drivers/gpu/drm/i915/i915_gem_request.c| 2 +- drivers/gpu/drm/i915/intel_lrc.c | 6 ++ drivers/gpu/drm/i915/intel_pm.c| 3 +-- drivers/gpu/drm/i915/intel_ringbuffer.c| 34 -- 7 files changed, 32 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 31926a4fb42a..c2a1ec8abc11 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2568,7 +2568,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, return 0; if (!i915.semaphores) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct drm_i915_private *i915 = from_req->i915; ret = __i915_wait_request(from_req, i915->mm.interruptible, NULL, @@ -4069,12 +4069,11 @@ err: int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice) { struct intel_engine_cs *ring = req->ring; - struct drm_device *dev = ring->dev; - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = req->i915; u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; int i, ret; - if (!HAS_L3_DPF(dev) || !remap_info) + if (!HAS_L3_DPF(dev_priv) || !remap_info) return 0; ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 361be1085a18..3e3b4bf3fed1 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -524,7 +524,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) const int num_rings = /* Use an extended w/a on ivb+ if signalling from other rings */ i915.semaphores ? - hweight32(INTEL_INFO(ring->dev)->ring_mask) - 1 : + hweight32(INTEL_INFO(req->i915)->ring_mask) - 1 : 0; int len, i, ret; @@ -533,21 +533,21 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) * explicitly, so we rely on the value at ring init, stored in * itlb_before_ctx_switch. */ - if (IS_GEN6(ring->dev)) { + if (IS_GEN6(req->i915)) { ret = ring->flush(req, I915_GEM_GPU_DOMAINS, 0); if (ret) return ret; } /* These flags are for resource streamer on HSW+ */ - if (IS_HASWELL(ring->dev) || INTEL_INFO(ring->dev)->gen >= 8) + if (IS_HASWELL(req->i915) || INTEL_INFO(req->i915)->gen >= 8) flags |= (HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN); - else if (INTEL_INFO(ring->dev)->gen < 8) + else if (INTEL_INFO(req->i915)->gen < 8) flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN); len = 4; - if (INTEL_INFO(ring->dev)->gen >= 7) + if (INTEL_INFO(req->i915)->gen >= 7) len += 2 + (num_rings ? 4*num_rings + 2 : 0); ret = intel_ring_begin(req, len); @@ -555,13 +555,13 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) return ret; /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ - if (INTEL_INFO(ring->dev)->gen >= 7) { + if (INTEL_INFO(req->i915)->gen >= 7) { intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE); if (num_rings) { struct intel_engine_cs *signaller; intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings)); - for_each_ring(signaller, to_i915(ring->dev), i) { + for_each_ring(signaller, req->i915, i) { if (signaller == ring) continue; @@ -581,12 +581,12 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) */ intel_ring_emit(ring, MI_NOOP); - if (INTEL_INFO(ring->dev)->gen >= 7) { + if (INTEL_INFO(req->i915)->gen >= 7) { if (num_rings) { struct intel_engine_cs *signaller; intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings)); - for_each_ring(signaller, to_i915(ring->dev), i) { + for_each_ring(signaller, req->i915, i) { if (signaller == ring) continue; @@ -827,10 +827,9 @@ unpin_
[Intel-gfx] [PATCH 027/190] drm/i915: Only query timestamp when measuring elapsed time
Avoid the two calls to ktime_get_raw_ns() (at best it reads the TSC) as we only need to compute the elapsed time for a timed wait. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 13 + 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a0744626a110..b956b8813307 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1220,7 +1220,6 @@ int __i915_wait_request(struct drm_i915_gem_request *req, int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; struct intel_wait wait; unsigned long timeout_remain; - s64 before, now; int ret = 0; might_sleep(); @@ -1239,13 +1238,12 @@ int __i915_wait_request(struct drm_i915_gem_request *req, if (*timeout == 0) return -ETIME; + /* Record current time in case interrupted, or wedged */ timeout_remain = nsecs_to_jiffies_timeout(*timeout); + *timeout += ktime_get_raw_ns(); } - /* Record current time in case interrupted by signal, or wedged */ trace_i915_gem_request_wait_begin(req); - before = ktime_get_raw_ns(); - if (INTEL_INFO(req->i915)->gen >= 6) gen6_rps_boost(req->i915, rps, req->emitted_jiffies); @@ -1298,13 +1296,12 @@ wakeup: complete: intel_engine_remove_wait(req->ring, &wait); __set_task_state(wait.task, TASK_RUNNING); - now = ktime_get_raw_ns(); trace_i915_gem_request_wait_end(req); if (timeout) { - s64 tres = *timeout - (now - before); - - *timeout = tres < 0 ? 0 : tres; + *timeout -= ktime_get_raw_ns(); + if (*timeout < 0) + *timeout = 0; /* * Apparently ktime isn't accurate enough and occasionally has a -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 073/190] drm/i915: Introduce i915_gem_active for request tracking
In the next patch, request tracking is made more generic and for that we need a new expanded struct and to separate out the logic changes from the mechanical churn, we split out the structure renaming into this patch. v2: Writer's block. Add some spiel about why we track requests. v3: Now i915_gem_active. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c| 10 +++--- drivers/gpu/drm/i915/i915_drv.h| 9 +++-- drivers/gpu/drm/i915/i915_gem.c| 56 +++--- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +-- drivers/gpu/drm/i915/i915_gem_fence.c | 6 ++-- drivers/gpu/drm/i915/i915_gem_request.h| 38 drivers/gpu/drm/i915/i915_gem_tiling.c | 2 +- drivers/gpu/drm/i915/i915_gpu_error.c | 6 ++-- drivers/gpu/drm/i915/intel_display.c | 10 +++--- 9 files changed, 89 insertions(+), 52 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 8de944ed3369..65cb1d6a5d64 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -146,10 +146,10 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) obj->base.write_domain); for_each_ring(ring, dev_priv, i) seq_printf(m, "%x ", - i915_gem_request_get_seqno(obj->last_read_req[i])); + i915_gem_request_get_seqno(obj->last_read[i].request)); seq_printf(m, "] %x %x%s%s%s", - i915_gem_request_get_seqno(obj->last_write_req), - i915_gem_request_get_seqno(obj->last_fenced_req), + i915_gem_request_get_seqno(obj->last_write.request), + i915_gem_request_get_seqno(obj->last_fence.request), i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level), obj->dirty ? " dirty" : "", obj->madv == I915_MADV_DONTNEED ? " purgeable" : ""); @@ -184,8 +184,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) *t = '\0'; seq_printf(m, " (%s mappable)", s); } - if (obj->last_write_req != NULL) - seq_printf(m, " (%s)", obj->last_write_req->engine->name); + if (obj->last_write.request != NULL) + seq_printf(m, " (%s)", obj->last_write.request->engine->name); if (obj->frontbuffer_bits) seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cae448e238ca..c577f86d94f8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2110,11 +2110,10 @@ struct drm_i915_gem_object { * requests on one ring where the write request is older than the * read request. This allows for the CPU to read from an active * buffer by only waiting for the write to complete. -* */ - struct drm_i915_gem_request *last_read_req[I915_NUM_RINGS]; - struct drm_i915_gem_request *last_write_req; - /** Breadcrumb of last fenced GPU access to the buffer. */ - struct drm_i915_gem_request *last_fenced_req; +*/ + struct i915_gem_active last_read[I915_NUM_RINGS]; + struct i915_gem_active last_write; + struct i915_gem_active last_fence; /** Current tiling stride for the object, if it's tiled. */ uint32_t stride; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b0230e7151ce..77c253ddf060 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1117,23 +1117,23 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, return 0; if (readonly) { - if (obj->last_write_req != NULL) { - ret = i915_wait_request(obj->last_write_req); + if (obj->last_write.request != NULL) { + ret = i915_wait_request(obj->last_write.request); if (ret) return ret; - i = obj->last_write_req->engine->id; - if (obj->last_read_req[i] == obj->last_write_req) + i = obj->last_write.request->engine->id; + if (obj->last_read[i].request == obj->last_write.request) i915_gem_object_retire__read(obj, i); else i915_gem_object_retire__write(obj); } } else { for (i = 0; i < I915_NUM_RINGS; i++) { - if (obj->last_read_req[i] == NULL) + if (obj->last_read[i].request == NULL) continue; - ret = i915_wait_request(obj->last_read_req[i]); +
[Intel-gfx] [PATCH 056/190] drm/i915: Unify intel_ring_begin()
Combine the near identical implementations of intel_logical_ring_begin() and intel_ring_begin() - the only difference is that the logical wait has to check for a matching ring (which is assumed by legacy). Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_lrc.c| 141 ++-- drivers/gpu/drm/i915/intel_lrc.h| 1 - drivers/gpu/drm/i915/intel_mocs.c | 12 +-- drivers/gpu/drm/i915/intel_ringbuffer.c | 111 + 4 files changed, 69 insertions(+), 196 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index dc4fc9d8612c..3d14b69632e8 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -698,48 +698,6 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request return 0; } -static int logical_ring_wait_for_space(struct drm_i915_gem_request *req, - int bytes) -{ - struct intel_ringbuffer *ringbuf = req->ringbuf; - struct intel_engine_cs *ring = req->ring; - struct drm_i915_gem_request *target; - unsigned space; - int ret; - - if (intel_ring_space(ringbuf) >= bytes) - return 0; - - /* The whole point of reserving space is to not wait! */ - WARN_ON(ringbuf->reserved_in_use); - - list_for_each_entry(target, &ring->request_list, list) { - /* -* The request queue is per-engine, so can contain requests -* from multiple ringbuffers. Here, we must ignore any that -* aren't from the ringbuffer we're considering. -*/ - if (target->ringbuf != ringbuf) - continue; - - /* Would completion of this request free enough space? */ - space = __intel_ring_space(target->postfix, ringbuf->tail, - ringbuf->size); - if (space >= bytes) - break; - } - - if (WARN_ON(&target->list == &ring->request_list)) - return -ENOSPC; - - ret = i915_wait_request(target); - if (ret) - return ret; - - ringbuf->space = space; - return 0; -} - /* * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload * @request: Request to advance the logical ringbuffer of. @@ -763,89 +721,6 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) execlists_context_queue(request); } -static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf) -{ - int rem = ringbuf->size - ringbuf->tail; - memset(ringbuf->virtual_start + ringbuf->tail, 0, rem); - - ringbuf->tail = 0; - intel_ring_update_space(ringbuf); -} - -static int logical_ring_prepare(struct drm_i915_gem_request *req, int bytes) -{ - struct intel_ringbuffer *ringbuf = req->ringbuf; - int remain_usable = ringbuf->effective_size - ringbuf->tail; - int remain_actual = ringbuf->size - ringbuf->tail; - int ret, total_bytes, wait_bytes = 0; - bool need_wrap = false; - - if (ringbuf->reserved_in_use) - total_bytes = bytes; - else - total_bytes = bytes + ringbuf->reserved_size; - - if (unlikely(bytes > remain_usable)) { - /* -* Not enough space for the basic request. So need to flush -* out the remainder and then wait for base + reserved. -*/ - wait_bytes = remain_actual + total_bytes; - need_wrap = true; - } else { - if (unlikely(total_bytes > remain_usable)) { - /* -* The base request will fit but the reserved space -* falls off the end. So only need to to wait for the -* reserved size after flushing out the remainder. -*/ - wait_bytes = remain_actual + ringbuf->reserved_size; - need_wrap = true; - } else if (total_bytes > ringbuf->space) { - /* No wrapping required, just waiting. */ - wait_bytes = total_bytes; - } - } - - if (wait_bytes) { - ret = logical_ring_wait_for_space(req, wait_bytes); - if (unlikely(ret)) - return ret; - - if (need_wrap) - __wrap_ring_buffer(ringbuf); - } - - return 0; -} - -/** - * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands - * - * @req: The request to start some new work for - * @num_dwords: number of DWORDs that we plan to write to the ringbuffer. - * - * The ringbuffer might not be ready to accept the commands right away (maybe it needs to - * be wrapped
[Intel-gfx] [PATCH 067/190] drm/i915: Unify legacy/execlists emission of MI_BATCHBUFFER_START
Both the ->dispatch_execbuffer and ->emit_bb_start callbacks do exactly the same thing, add MI_BATCHBUFFER_START to the request's ringbuffer - we need only one vfunc. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 6 +-- drivers/gpu/drm/i915/i915_gem_render_state.c | 16 +++ drivers/gpu/drm/i915/intel_lrc.c | 9 +++- drivers/gpu/drm/i915/intel_ringbuffer.c | 67 +--- drivers/gpu/drm/i915/intel_ringbuffer.h | 12 +++-- 5 files changed, 55 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 3956d74d8c8c..3e6384deca65 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1297,9 +1297,9 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, exec_start = params->batch_obj_vm_offset + params->args_batch_start_offset; - ret = params->ring->dispatch_execbuffer(params->request, - exec_start, exec_len, - params->dispatch_flags); + ret = params->ring->emit_bb_start(params->request, + exec_start, exec_len, + params->dispatch_flags); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index bee3f0ccd0cd..ccc988c2b226 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -205,18 +205,18 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req) if (so.rodata == NULL) return 0; - ret = req->engine->dispatch_execbuffer(req, so.ggtt_offset, - so.rodata->batch_items * 4, - I915_DISPATCH_SECURE); + ret = req->engine->emit_bb_start(req, so.ggtt_offset, +so.rodata->batch_items * 4, +I915_DISPATCH_SECURE); if (ret) goto out; if (so.aux_batch_size > 8) { - ret = req->engine->dispatch_execbuffer(req, - (so.ggtt_offset + - so.aux_batch_offset), - so.aux_batch_size, - I915_DISPATCH_SECURE); + ret = req->engine->emit_bb_start(req, +(so.ggtt_offset + + so.aux_batch_offset), +so.aux_batch_size, +I915_DISPATCH_SECURE); if (ret) goto out; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 82b21a883732..30effca91184 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -783,7 +783,9 @@ int intel_execlists_submission(struct i915_execbuffer_params *params, exec_start = params->batch_obj_vm_offset + args->batch_start_offset; - ret = engine->emit_bb_start(params->request, exec_start, params->dispatch_flags); + ret = engine->emit_bb_start(params->request, + exec_start, args->batch_len, + params->dispatch_flags); if (ret) return ret; @@ -1409,7 +1411,8 @@ static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) } static int gen8_emit_bb_start(struct drm_i915_gem_request *req, - u64 offset, unsigned dispatch_flags) + u64 offset, u32 len, + unsigned dispatch_flags) { struct intel_ring *ring = req->ring; bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE); @@ -1637,12 +1640,14 @@ static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req) return 0; ret = req->engine->emit_bb_start(req, so.ggtt_offset, +so.rodata->batch_items * 4, I915_DISPATCH_SECURE); if (ret) goto out; ret = req->engine->emit_bb_start(req, (so.ggtt_offset + so.aux_batch_offset), +so.aux_batch_size, I915_DISPATCH_SECURE); if (ret) goto out; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/i
[Intel-gfx] [PATCH 038/190] drm/i915: Flush the RPS bottom-half when the GPU idles
Make sure that the RPS bottom-half is flushed before we set the idle frequency when we decide the GPU is idle. This should prevent any races with the bottom-half and setting the idle frequency, and ensures that the bottom-half is bounded by the GPU's rpm reference taken for when it is active (i.e. between gen6_rps_busy() and gen6_rps_idle()). v2: Avoid recursively using the i915->wq - RPS does not touch the struct_mutex so has no place being on the ordered i915->wq. v3: Enable/disable interrupts for RPS busy/idle in order to prevent further HW access from RPS outside of the wakeref. Signed-off-by: Chris Wilson Cc: Imre Deak Cc: Jesse Barnes --- drivers/gpu/drm/i915/i915_drv.c | 1 - drivers/gpu/drm/i915/i915_irq.c | 45 +++- drivers/gpu/drm/i915/intel_display.c | 1 + drivers/gpu/drm/i915/intel_drv.h | 6 ++--- drivers/gpu/drm/i915/intel_pm.c | 23 +- 5 files changed, 34 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 4c090f1cf69c..442e1217e442 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1492,7 +1492,6 @@ static int intel_runtime_suspend(struct device *device) intel_guc_suspend(dev); - intel_suspend_gt_powersave(dev); intel_runtime_pm_disable_interrupts(dev_priv); ret = intel_suspend_complete(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 8866e981bcba..d9757d227c86 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -336,9 +336,8 @@ void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask) __gen6_disable_pm_irq(dev_priv, mask); } -void gen6_reset_rps_interrupts(struct drm_device *dev) +void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; i915_reg_t reg = gen6_pm_iir(dev_priv); spin_lock_irq(&dev_priv->irq_lock); @@ -349,14 +348,14 @@ void gen6_reset_rps_interrupts(struct drm_device *dev) spin_unlock_irq(&dev_priv->irq_lock); } -void gen6_enable_rps_interrupts(struct drm_device *dev) +void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; + if (dev_priv->rps.interrupts_enabled) + return; spin_lock_irq(&dev_priv->irq_lock); - - WARN_ON(dev_priv->rps.pm_iir); - WARN_ON(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events); + WARN_ON_ONCE(dev_priv->rps.pm_iir); + WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events); dev_priv->rps.interrupts_enabled = true; I915_WRITE(gen6_pm_ier(dev_priv), I915_READ(gen6_pm_ier(dev_priv)) | dev_priv->pm_rps_events); @@ -382,17 +381,13 @@ u32 gen6_sanitize_rps_pm_mask(struct drm_i915_private *dev_priv, u32 mask) return mask; } -void gen6_disable_rps_interrupts(struct drm_device *dev) +void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; + if (!dev_priv->rps.interrupts_enabled) + return; spin_lock_irq(&dev_priv->irq_lock); dev_priv->rps.interrupts_enabled = false; - spin_unlock_irq(&dev_priv->irq_lock); - - cancel_work_sync(&dev_priv->rps.work); - - spin_lock_irq(&dev_priv->irq_lock); I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0)); @@ -401,8 +396,15 @@ void gen6_disable_rps_interrupts(struct drm_device *dev) ~dev_priv->pm_rps_events); spin_unlock_irq(&dev_priv->irq_lock); + synchronize_irq(dev_priv->dev->irq); - synchronize_irq(dev->irq); + /* Now that we will not be generating any more work, flush any +* outsanding tasks. As we are called on the RPS idle path, +* we will reset the GPU to minimum frequencies, so the current +* state of the worker can be discarded. +*/ + cancel_work_sync(&dev_priv->rps.work); + gen6_reset_rps_interrupts(dev_priv); } /** @@ -1103,13 +1105,6 @@ static void gen6_pm_rps_work(struct work_struct *work) return; } - /* -* The RPS work is synced during runtime suspend, we don't require a -* wakeref. TODO: instead of disabling the asserts make sure that we -* always hold an RPM reference while the work is running. -*/ - DISABLE_RPM_WAKEREF_ASSERTS(dev_priv); - pm_iir = dev_priv->rps.pm_iir; dev_priv->rps.pm_iir = 0; /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */ @@ -1122,7 +1117,7 @@ static void gen6_pm_rps_work(struct work_struct *work) WARN_ON(pm_iir & ~dev_priv->pm_rps_events); if ((pm_iir & dev_pri
[Intel-gfx] [PATCH 029/190] drm/i915: Convert trace-irq to the breadcrumb waiter
If we convert the tracing over from direct use of ring->irq_get() and over to the breadcrumb infrastructure, we only have a single user of the ring->irq_get and so we will be able to simplify the driver routines (eliminating the redundant validation and irq refcounting). v2: Move to a signaling framework based upon the waiter. v3: Track the first-signal to avoid having to walk the rbtree everytime. v4: Mark the signaler thread as RT priority to reduce latency in the indirect wakeups. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 8 -- drivers/gpu/drm/i915/i915_gem.c | 6 -- drivers/gpu/drm/i915/i915_irq.c | 7 +- drivers/gpu/drm/i915/i915_trace.h| 2 +- drivers/gpu/drm/i915/intel_breadcrumbs.c | 177 +++ drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +- 6 files changed, 186 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8940b8d3fa59..7f021505e32f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3620,14 +3620,6 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) schedule_timeout_uninterruptible(remaining_jiffies); } } - -static inline void i915_trace_irq_get(struct intel_engine_cs *ring, - struct drm_i915_gem_request *req) -{ - if (ring->trace_irq_req == NULL && ring->irq_get(ring)) - i915_gem_request_assign(&ring->trace_irq_req, req); -} - static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->ring; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a713e8a6cb36..5ddb2ed0f785 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2889,12 +2889,6 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) i915_gem_object_retire__read(obj, ring->id); } - if (unlikely(ring->trace_irq_req && -i915_gem_request_completed(ring->trace_irq_req))) { - ring->irq_put(ring); - i915_gem_request_assign(&ring->trace_irq_req, NULL); - } - WARN_ON(i915_verify_lists(ring->dev)); } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 738edd7fbf8d..bf48fa63127a 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -996,12 +996,9 @@ static void ironlake_rps_change_irq_handler(struct drm_device *dev) static void notify_ring(struct intel_engine_cs *ring) { - if (!intel_ring_initialized(ring)) - return; - - trace_i915_gem_request_notify(ring); ring->irq_posted = true; /* paired with mb() in wake_up_process() */ - intel_engine_wakeup(ring); + if (intel_engine_wakeup(ring)) + trace_i915_gem_request_notify(ring); } static void vlv_c0_read(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index efca75bcace3..43bb2e0bb949 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -503,7 +503,7 @@ TRACE_EVENT(i915_gem_ring_dispatch, __entry->ring = ring->id; __entry->seqno = i915_gem_request_get_seqno(req); __entry->flags = flags; - i915_trace_irq_get(ring, req); + intel_engine_enable_signaling(req); ), TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x", diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index d689bd61534e..cf9cbcc2d5d7 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -22,6 +22,8 @@ * */ +#include + #include "i915_drv.h" static void intel_breadcrumbs_fake_irq(unsigned long data) @@ -320,10 +322,185 @@ void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) (unsigned long)engine); } +struct signal { + struct rb_node node; + struct intel_wait wait; + struct drm_i915_gem_request *request; +}; + +static bool signal_complete(struct signal *signal) +{ + if (signal == NULL) + return false; + + /* If another process served as the bottom-half it may have already +* signalled that this wait is already completed. +*/ + if (intel_wait_complete(&signal->wait)) + return true; + + /* Carefully check if the request is complete, giving time for the +* seqno to be visible or if the GPU hung. +*/ + if (__i915_request_irq_complete(signal->request)) + return true; + + return false; +} + +static struct signal *to_s
[Intel-gfx] [PATCH 068/190] drm/i915: Unify adding requests between ringbuffer and execlists
Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_request.c | 8 +- drivers/gpu/drm/i915/intel_lrc.c| 14 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 129 +--- drivers/gpu/drm/i915/intel_ringbuffer.h | 21 +++--- 4 files changed, 87 insertions(+), 85 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index ce663acc9c7d..01443d8d9224 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -434,13 +434,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, */ request->postfix = intel_ring_get_tail(ring); - if (i915.enable_execlists) - ret = request->engine->emit_request(request); - else { - ret = request->engine->add_request(request); - - request->tail = intel_ring_get_tail(ring); - } + ret = request->engine->add_request(request); /* Not allowed to fail! */ WARN(ret, "emit|add_request failed: %d!\n", ret); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 30effca91184..9838503fafca 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -445,7 +445,7 @@ static void execlists_context_unqueue(struct intel_engine_cs *engine) if (req0->elsp_submitted) { /* * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL -* as we resubmit the request. See gen8_emit_request() +* as we resubmit the request. See gen8_add_request() * for where we prepare the padding after the end of the * request. */ @@ -1588,7 +1588,7 @@ gen6_seqno_barrier(struct intel_engine_cs *ring) intel_flush_status_page(ring, I915_GEM_HWS_INDEX); } -static int gen8_emit_request(struct drm_i915_gem_request *request) +static int gen8_add_request(struct drm_i915_gem_request *request) { struct intel_ring *ring = request->ring; u32 cmd; @@ -1782,8 +1782,8 @@ static int logical_render_ring_init(struct drm_device *dev) ring->init_context = gen8_init_rcs_context; ring->cleanup = intel_fini_pipe_control; ring->irq_seqno_barrier = gen6_seqno_barrier; - ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush_render; + ring->add_request = gen8_add_request; ring->irq_enable = gen8_logical_ring_enable_irq; ring->irq_disable = gen8_logical_ring_disable_irq; ring->emit_bb_start = gen8_emit_bb_start; @@ -1828,8 +1828,8 @@ static int logical_bsd_ring_init(struct drm_device *dev) ring->init_hw = gen8_init_common_ring; ring->irq_seqno_barrier = gen6_seqno_barrier; - ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush; + ring->add_request = gen8_add_request; ring->irq_enable = gen8_logical_ring_enable_irq; ring->irq_disable = gen8_logical_ring_disable_irq; ring->emit_bb_start = gen8_emit_bb_start; @@ -1852,8 +1852,8 @@ static int logical_bsd2_ring_init(struct drm_device *dev) ring->init_hw = gen8_init_common_ring; ring->irq_seqno_barrier = gen6_seqno_barrier; - ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush; + ring->add_request = gen8_add_request; ring->irq_enable = gen8_logical_ring_enable_irq; ring->irq_disable = gen8_logical_ring_disable_irq; ring->emit_bb_start = gen8_emit_bb_start; @@ -1876,8 +1876,8 @@ static int logical_blt_ring_init(struct drm_device *dev) ring->init_hw = gen8_init_common_ring; ring->irq_seqno_barrier = gen6_seqno_barrier; - ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush; + ring->add_request = gen8_add_request; ring->irq_enable = gen8_logical_ring_enable_irq; ring->irq_disable = gen8_logical_ring_disable_irq; ring->emit_bb_start = gen8_emit_bb_start; @@ -1900,8 +1900,8 @@ static int logical_vebox_ring_init(struct drm_device *dev) ring->init_hw = gen8_init_common_ring; ring->irq_seqno_barrier = gen6_seqno_barrier; - ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush; + ring->add_request = gen8_add_request; ring->irq_enable = gen8_logical_ring_enable_irq; ring->irq_disable = gen8_logical_ring_disable_irq; ring->emit_bb_start = gen8_emit_bb_start; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 04f0a77d49cf..556e9e2c1fec 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -59,13 +59,6 @@ int intel_ring_space(struct intel_ring *ringbuf) return ringbuf->space;
[Intel-gfx] [PATCH 022/190] drm/i915: Check the CPU cached value of seqno after waking the waiter
If we have multiple waiters, we may find that many complete on the same wake up. If we first inspect the seqno from the CPU cache, we may reduce the number of heavyweight coherent seqno reads we require. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 14 ++ 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index fcedcbc50834..c2ee8efdd928 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3632,6 +3632,12 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->ring; + /* Before we do the heavier coherent read of the seqno, +* check the value (hopefully) in the CPU cacheline. +*/ + if (i915_gem_request_completed(req)) + return true; + /* Ensure our read of the seqno is coherent so that we * do not "miss an interrupt" (i.e. if this is the last * request and the seqno write from the GPU is not visible @@ -3643,11 +3649,11 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) * but it is easier and safer to do it every time the waiter * is woken. */ - if (engine->irq_seqno_barrier) + if (engine->irq_seqno_barrier) { engine->irq_seqno_barrier(engine); - - if (i915_gem_request_completed(req)) - return true; + if (i915_gem_request_completed(req)) + return true; + } /* We need to check whether any gpu reset happened in between * the request being submitted and now. If a reset has occurred, -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 065/190] drm/i915: Remove obsolete engine->gpu_caches_dirty
Space for flushing the GPU cache prior to completing the request is preallocated and so cannot fail. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_context.c| 2 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 +--- drivers/gpu/drm/i915/i915_gem_gtt.c| 18 drivers/gpu/drm/i915/i915_gem_request.c| 7 ++- drivers/gpu/drm/i915/intel_lrc.c | 47 +++ drivers/gpu/drm/i915/intel_lrc.h | 2 - drivers/gpu/drm/i915/intel_ringbuffer.c| 72 +++--- drivers/gpu/drm/i915/intel_ringbuffer.h| 7 --- 8 files changed, 39 insertions(+), 125 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 17fe8ed991d6..c078ebc29da5 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -534,7 +534,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) * itlb_before_ctx_switch. */ if (IS_GEN6(req->i915)) { - ret = req->engine->flush(req, I915_GEM_GPU_DOMAINS, 0); + ret = req->engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 731ce13dbdbc..a56fae99a1bc 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -969,10 +969,8 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, if (flush_domains & I915_GEM_DOMAIN_GTT) wmb(); - /* Unconditionally invalidate gpu caches and ensure that we do flush -* any residual writes from the previous batch. -*/ - return intel_engine_invalidate_all_caches(req); + /* Unconditionally invalidate gpu caches and TLBs. */ + return req->engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0); } static bool @@ -1138,9 +1136,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, static void i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params) { - /* Unconditionally force add_request to emit a full flush. */ - params->ring->gpu_caches_dirty = true; - /* Add a breadcrumb for the completion of the batch buffer */ __i915_add_request(params->request, params->batch_obj, true); } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 9a91451d66ac..cddbd8c00663 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1652,9 +1652,9 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, int ret; /* NB: TLBs must be flushed and invalidated before a switch */ - ret = req->engine->flush(req, -I915_GEM_GPU_DOMAINS, -I915_GEM_GPU_DOMAINS); + ret = req->engine->emit_flush(req, + I915_GEM_GPU_DOMAINS, + I915_GEM_GPU_DOMAINS); if (ret) return ret; @@ -1690,9 +1690,9 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, int ret; /* NB: TLBs must be flushed and invalidated before a switch */ - ret = req->engine->flush(req, -I915_GEM_GPU_DOMAINS, -I915_GEM_GPU_DOMAINS); + ret = req->engine->emit_flush(req, + I915_GEM_GPU_DOMAINS, + I915_GEM_GPU_DOMAINS); if (ret) return ret; @@ -1710,9 +1710,9 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, /* XXX: RCS is the only one to auto invalidate the TLBs? */ if (req->engine->id != RCS) { - ret = req->engine->flush(req, -I915_GEM_GPU_DOMAINS, -I915_GEM_GPU_DOMAINS); + ret = req->engine->emit_flush(req, + I915_GEM_GPU_DOMAINS, + I915_GEM_GPU_DOMAINS); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index e1f2af046b6c..e911430575fe 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -426,10 +426,9 @@ void __i915_add_request(struct drm_i915_gem_request *request, * what. */ if (flush_caches) { - if (i915.enable_execlists) - ret = logical_ring_flush_all_caches(request); - else - ret = intel_engine_flush_all_caches(request); + ret = request->engine->emit_flush(request, +
[Intel-gfx] [PATCH 060/190] drm/i915: Rename backpointer from intel_ringbuffer to intel_engine_cs
Having ringbuf->ring point to an engine is confusing, so rename it once again to ring->engine. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_guc_submission.c | 10 +++--- drivers/gpu/drm/i915/intel_lrc.c | 35 +-- drivers/gpu/drm/i915/intel_ringbuffer.c| 54 +++--- drivers/gpu/drm/i915/intel_ringbuffer.h| 2 +- 4 files changed, 49 insertions(+), 52 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index e82cc9182dfa..53abe2143f8a 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -391,7 +391,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc, for (i = 0; i < I915_NUM_RINGS; i++) { struct guc_execlist_context *lrc = &desc.lrc[i]; struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf; - struct intel_engine_cs *ring; + struct intel_engine_cs *engine; struct drm_i915_gem_object *obj; uint64_t ctx_desc; @@ -406,15 +406,15 @@ static void guc_init_ctx_desc(struct intel_guc *guc, if (!obj) break; /* XXX: continue? */ - ring = ringbuf->ring; - ctx_desc = intel_lr_context_descriptor(ctx, ring); + engine = ringbuf->engine; + ctx_desc = intel_lr_context_descriptor(ctx, engine); lrc->context_desc = (u32)ctx_desc; /* The state page is after PPHWSP */ lrc->ring_lcra = i915_gem_obj_ggtt_offset(obj) + LRC_STATE_PN * PAGE_SIZE; lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) | - (ring->id << GUC_ELC_ENGINE_OFFSET); + (engine->id << GUC_ELC_ENGINE_OFFSET); obj = ringbuf->obj; @@ -423,7 +423,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc, lrc->ring_next_free_location = lrc->ring_begin; lrc->ring_current_tail_pointer_value = 0; - desc.engines_used |= (1 << ring->id); + desc.engines_used |= (1 << engine->id); } WARN_ON(desc.engines_used == 0); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 87d325b6e7dc..8639ebfab96f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2179,13 +2179,13 @@ void intel_lr_context_free(struct intel_context *ctx) if (ctx_obj) { struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf; - struct intel_engine_cs *ring = ringbuf->ring; + struct intel_engine_cs *engine = ringbuf->engine; - if (ctx == ring->default_context) { + if (ctx == engine->default_context) { intel_unpin_ringbuffer_obj(ringbuf); i915_gem_object_ggtt_unpin(ctx_obj); } - WARN_ON(ctx->engine[ring->id].pin_count); + WARN_ON(ctx->engine[engine->id].pin_count); intel_ringbuffer_free(ringbuf); drm_gem_object_unreference(&ctx_obj->base); } @@ -2261,57 +2261,54 @@ static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring, * * Return: non-zero on error. */ - int intel_lr_context_deferred_alloc(struct intel_context *ctx, -struct intel_engine_cs *ring) + struct intel_engine_cs *engine) { - struct drm_device *dev = ring->dev; struct drm_i915_gem_object *ctx_obj; uint32_t context_size; struct intel_ringbuffer *ringbuf; int ret; WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL); - WARN_ON(ctx->engine[ring->id].state); + WARN_ON(ctx->engine[engine->id].state); - context_size = round_up(intel_lr_context_size(ring), 4096); + context_size = round_up(intel_lr_context_size(engine), 4096); /* One extra page as the sharing data between driver and GuC */ context_size += PAGE_SIZE * LRC_PPHWSP_PN; - ctx_obj = i915_gem_alloc_object(dev, context_size); + ctx_obj = i915_gem_alloc_object(engine->dev, context_size); if (!ctx_obj) { DRM_DEBUG_DRIVER("Alloc LRC backing obj failed.\n"); return -ENOMEM; } - ringbuf = intel_engine_create_ringbuffer(ring, 4 * PAGE_SIZE); + ringbuf = intel_engine_create_ringbuffer(engine, 4 * PAGE_SIZE); if (IS_ERR(ringbuf)) { ret = PTR_ERR(ringbuf); goto error_deref_obj; } - ret = populate_lr_context(ctx, ctx_obj, rin
[Intel-gfx] [PATCH 046/190] drm/i915: Derive GEM requests from dma-fence
dma-buf provides a generic fence class for interoperation between drivers. Internally we use the request structure as a fence, and so with only a little bit of interfacing we can rebase those requests on top of dma-buf fences. This will allow us, in the future, to pass those fences back to userspace or between drivers. v2: The fence_context needs to be globally unique, not just unique to this device. Signed-off-by: Chris Wilson Cc: Jesse Barnes Cc: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c| 2 +- drivers/gpu/drm/i915/i915_gem_request.c| 111 + drivers/gpu/drm/i915/i915_gem_request.h| 33 - drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- drivers/gpu/drm/i915/i915_guc_submission.c | 2 +- drivers/gpu/drm/i915/i915_trace.h | 2 +- drivers/gpu/drm/i915/intel_breadcrumbs.c | 3 +- drivers/gpu/drm/i915/intel_lrc.c | 3 +- drivers/gpu/drm/i915/intel_ringbuffer.c| 15 ++-- drivers/gpu/drm/i915/intel_ringbuffer.h| 1 + 10 files changed, 133 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 6172649b7e56..b82482573a8f 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -710,7 +710,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data) if (req->pid) task = pid_task(req->pid, PIDTYPE_PID); seq_printf(m, "%x @ %d: %s [%d]\n", - req->seqno, + req->fence.seqno, (int) (jiffies - req->emitted_jiffies), task ? task->comm : "", task ? task->pid : -1); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 1c4f4d83a3c2..e366ca0dcd99 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -24,6 +24,92 @@ #include "i915_drv.h" +static inline struct drm_i915_gem_request * +to_i915_request(struct fence *fence) +{ + return container_of(fence, struct drm_i915_gem_request, fence); +} + +static const char *i915_fence_get_driver_name(struct fence *fence) +{ + return "i915"; +} + +static const char *i915_fence_get_timeline_name(struct fence *fence) +{ + return to_i915_request(fence)->ring->name; +} + +static bool i915_fence_signaled(struct fence *fence) +{ + return i915_gem_request_completed(to_i915_request(fence)); +} + +static bool i915_fence_enable_signaling(struct fence *fence) +{ + if (i915_fence_signaled(fence)) + return false; + + return intel_engine_enable_signaling(to_i915_request(fence)) == 0; +} + +static signed long i915_fence_wait(struct fence *fence, + bool interruptible, + signed long timeout_jiffies) +{ + s64 timeout_ns, *timeout; + int ret; + + if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) { + timeout_ns = jiffies_to_nsecs(timeout_jiffies); + timeout = &timeout_ns; + } else + timeout = NULL; + + ret = __i915_wait_request(to_i915_request(fence), + interruptible, timeout, + NULL); + if (ret == -ETIME) + return 0; + + if (ret < 0) + return ret; + + if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) + timeout_jiffies = nsecs_to_jiffies(timeout_ns); + + return timeout_jiffies; +} + +static void i915_fence_value_str(struct fence *fence, char *str, int size) +{ + snprintf(str, size, "%u", fence->seqno); +} + +static void i915_fence_timeline_value_str(struct fence *fence, char *str, + int size) +{ + snprintf(str, size, "%u", +intel_ring_get_seqno(to_i915_request(fence)->ring)); +} + +static void i915_fence_release(struct fence *fence) +{ + struct drm_i915_gem_request *req = to_i915_request(fence); + kmem_cache_free(req->i915->requests, req); +} + +static const struct fence_ops i915_fence_ops = { + .get_driver_name = i915_fence_get_driver_name, + .get_timeline_name = i915_fence_get_timeline_name, + .enable_signaling = i915_fence_enable_signaling, + .signaled = i915_fence_signaled, + .wait = i915_fence_wait, + .release = i915_fence_release, + .fence_value_str = i915_fence_value_str, + .timeline_value_str = i915_fence_timeline_value_str, +}; + static int i915_gem_check_wedge(unsigned reset_counter, bool interruptible) { @@ -116,6 +202,7 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring, struct drm_i915_private *dev_priv = to_i915(ring->dev); unsigned reset_counter = i915_reset_co
[Intel-gfx] [PATCH 016/190] drm/i915: Make queueing the hangcheck work inline
Since the function is a small wrapper around schedule_delayed_work(), move it inline to remove the function call overhead for the principle caller. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 17 - drivers/gpu/drm/i915/i915_irq.c | 16 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 188bed933f11..201dd330f66a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2709,7 +2709,22 @@ void intel_hpd_cancel_work(struct drm_i915_private *dev_priv); bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port); /* i915_irq.c */ -void i915_queue_hangcheck(struct drm_i915_private *dev_priv); +static inline void i915_queue_hangcheck(struct drm_i915_private *dev_priv) +{ + unsigned long delay; + + if (unlikely(!i915.enable_hangcheck)) + return; + + /* Don't continually defer the hangcheck so that it is always run at +* least once after work has been scheduled on any ring. Otherwise, +* we will ignore a hung ring if a second ring is kept busy. +*/ + + delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES); + schedule_delayed_work(&dev_priv->gpu_error.hangcheck_work, delay); +} + __printf(3, 4) void i915_handle_error(struct drm_device *dev, bool wedged, const char *fmt, ...); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 8939438d747d..2a8a9694eec5 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3173,22 +3173,6 @@ out: ENABLE_RPM_WAKEREF_ASSERTS(dev_priv); } -void i915_queue_hangcheck(struct drm_i915_private *dev_priv) -{ - unsigned long delay; - - if (!i915.enable_hangcheck) - return; - - /* Don't continually defer the hangcheck so that it is always run at -* least once after work has been scheduled on any ring. Otherwise, -* we will ignore a hung ring if a second ring is kept busy. -*/ - - delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES); - schedule_delayed_work(&dev_priv->gpu_error.hangcheck_work, delay); -} - static void ibx_irq_reset(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 009/190] drm/i915: Tighten reset_counter for reset status
In the reset_counter, we use two bits to track a GPU hang and reset. The low bit is a "reset-in-progress" flag that we set to signal when we need to break waiters in order for the recovery task to grab the mutex. As soon as the recovery task has the mutex, we can clear that flag (which we do by incrementing the reset_counter thereby incrementing the gobal reset epoch). By clearing that flag when the recovery task holds the struct_mutex, we can forgo a second flag that simply tells GEM to ignore the "reset-in-progress" flag. The second flag we store in the reset_counter is whether the reset failed and we consider the GPU terminally wedged. Whilst this flag is set, all access to the GPU (at least through GEM rather than direct mmio access) is verboten. PS: Fun is in store, as in the future we want to move from a global reset epoch to a per-engine reset engine with request recovery. Signed-off-by: Chris Wilson Cc: Daniel Vetter Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 4 ++-- drivers/gpu/drm/i915/i915_drv.c | 39 ++--- drivers/gpu/drm/i915/i915_drv.h | 3 --- drivers/gpu/drm/i915/i915_gem.c | 27 + drivers/gpu/drm/i915/i915_irq.c | 21 ++-- 5 files changed, 36 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 932af05b8eec..6ff2d23faaa7 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4696,7 +4696,7 @@ i915_wedged_get(void *data, u64 *val) struct drm_device *dev = data; struct drm_i915_private *dev_priv = dev->dev_private; - *val = i915_reset_counter(&dev_priv->gpu_error); + *val = i915_terminally_wedged(&dev_priv->gpu_error); return 0; } @@ -4715,7 +4715,7 @@ i915_wedged_set(void *data, u64 val) * while it is writing to 'i915_wedged' */ - if (i915_reset_in_progress_or_wedged(&dev_priv->gpu_error)) + if (i915_reset_in_progress(&dev_priv->gpu_error)) return -EAGAIN; intel_runtime_pm_get(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 288fec7691dc..2f03379cdb4b 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -873,23 +873,32 @@ int i915_resume_switcheroo(struct drm_device *dev) int i915_reset(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - bool simulated; + struct i915_gpu_error *error = &dev_priv->gpu_error; + unsigned reset_counter; int ret; intel_reset_gt_powersave(dev); mutex_lock(&dev->struct_mutex); - i915_gem_reset(dev); + /* Clear any previous failed attempts at recovery. Time to try again. */ + atomic_andnot(I915_WEDGED, &error->reset_counter); - simulated = dev_priv->gpu_error.stop_rings != 0; + /* Clear the reset-in-progress flag and increment the reset epoch. */ + reset_counter = atomic_inc_return(&error->reset_counter); + if (WARN_ON(__i915_reset_in_progress(reset_counter))) { + ret = -EIO; + goto error; + } + + i915_gem_reset(dev); ret = intel_gpu_reset(dev); /* Also reset the gpu hangman. */ - if (simulated) { + if (error->stop_rings != 0) { DRM_INFO("Simulated gpu hang, resetting stop_rings\n"); - dev_priv->gpu_error.stop_rings = 0; + error->stop_rings = 0; if (ret == -ENODEV) { DRM_INFO("Reset not implemented, but ignoring " "error for simulated gpu hangs\n"); @@ -902,8 +911,7 @@ int i915_reset(struct drm_device *dev) if (ret) { DRM_ERROR("Failed to reset chip: %i\n", ret); - mutex_unlock(&dev->struct_mutex); - return ret; + goto error; } intel_overlay_reset(dev_priv); @@ -922,20 +930,14 @@ int i915_reset(struct drm_device *dev) * was running at the time of the reset (i.e. we weren't VT * switched away). */ - - /* Used to prevent gem_check_wedged returning -EAGAIN during gpu reset */ - dev_priv->gpu_error.reload_in_reset = true; - ret = i915_gem_init_hw(dev); - - dev_priv->gpu_error.reload_in_reset = false; - - mutex_unlock(&dev->struct_mutex); if (ret) { DRM_ERROR("Failed hw init on reset %d\n", ret); - return ret; + goto error; } + mutex_unlock(&dev->struct_mutex); + /* * rps/rc6 re-init is necessary to restore state lost after the * reset and the re-install of gt irqs. Skip for ironlake per @@ -946,6 +948,11 @@ int i915_reset(struct drm_device *dev) intel_enable_gt_powersave(dev); return 0;
[Intel-gfx] [PATCH 039/190] drm/i915: Remove stop-rings debugfs interface
Now that we have (near) universal GPU recovery code, we can inject a real hang from userspace and not need any fakery. Not only does this mean that the testing is far more realistic, but we can simplify the kernel in the process. v2: Replace the i915_stop_rings with a dummy implementation as igt encodified its existence until we can release an update. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 19 +-- drivers/gpu/drm/i915/i915_drv.c | 17 ++--- drivers/gpu/drm/i915/i915_drv.h | 19 --- drivers/gpu/drm/i915/i915_gem.c | 13 +++-- drivers/gpu/drm/i915/intel_lrc.c| 5 - drivers/gpu/drm/i915/intel_ringbuffer.c | 8 drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - 7 files changed, 6 insertions(+), 76 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 567f8db4c70a..6172649b7e56 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4752,30 +4752,13 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops, static int i915_ring_stop_get(void *data, u64 *val) { - struct drm_device *dev = data; - struct drm_i915_private *dev_priv = dev->dev_private; - - *val = dev_priv->gpu_error.stop_rings; - + *val = 0; return 0; } static int i915_ring_stop_set(void *data, u64 val) { - struct drm_device *dev = data; - struct drm_i915_private *dev_priv = dev->dev_private; - int ret; - - DRM_DEBUG_DRIVER("Stopping rings 0x%08llx\n", val); - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - - dev_priv->gpu_error.stop_rings = val; - mutex_unlock(&dev->struct_mutex); - return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 442e1217e442..e9f85fd0542f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -891,24 +891,11 @@ int i915_reset(struct drm_device *dev) goto error; } + pr_notice("drm/i915: Resetting chip after gpu hang\n"); + i915_gem_reset(dev); ret = intel_gpu_reset(dev); - - /* Also reset the gpu hangman. */ - if (error->stop_rings != 0) { - DRM_INFO("Simulated gpu hang, resetting stop_rings\n"); - error->stop_rings = 0; - if (ret == -ENODEV) { - DRM_INFO("Reset not implemented, but ignoring " -"error for simulated gpu hangs\n"); - ret = 0; - } - } - - if (i915_stop_ring_allow_warn(dev_priv)) - pr_notice("drm/i915: Resetting chip after gpu hang\n"); - if (ret) { if (ret != -ENODEV) DRM_ERROR("Failed to reset chip: %i\n", ret); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9ec6f3e9e74d..c3b795f1566b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1371,13 +1371,6 @@ struct i915_gpu_error { */ wait_queue_head_t reset_queue; - /* Userspace knobs for gpu hang simulation; -* combines both a ring mask, and extra flags -*/ - u32 stop_rings; -#define I915_STOP_RING_ALLOW_BAN (1 << 31) -#define I915_STOP_RING_ALLOW_WARN (1 << 30) - /* For missed irq/seqno simulation. */ unsigned long test_irq_rings; }; @@ -3030,18 +3023,6 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error) return ((i915_reset_counter(error) & ~I915_WEDGED) + 1) / 2; } -static inline bool i915_stop_ring_allow_ban(struct drm_i915_private *dev_priv) -{ - return dev_priv->gpu_error.stop_rings == 0 || - dev_priv->gpu_error.stop_rings & I915_STOP_RING_ALLOW_BAN; -} - -static inline bool i915_stop_ring_allow_warn(struct drm_i915_private *dev_priv) -{ - return dev_priv->gpu_error.stop_rings == 0 || - dev_priv->gpu_error.stop_rings & I915_STOP_RING_ALLOW_WARN; -} - void i915_gem_reset(struct drm_device *dev); bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); int __must_check i915_gem_init(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3948e85eaa48..ea9344503bf6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2633,21 +2633,14 @@ static bool i915_context_is_banned(struct drm_i915_private *dev_priv, { unsigned long elapsed; - elapsed = get_seconds() - ctx->hang_stats.guilty_ts; - if (ctx->hang_stats.banned) return true; + elapsed = get_seconds() - ctx->hang_stats.guilty_ts; if (ctx->hang_stats.ban_period_seconds && elapsed <= ctx->hang_stats.ban_period_seconds) { -
[Intel-gfx] [PATCH 024/190] drm/i915: Replace manual barrier() with READ_ONCE() in HWS accessor
When reading from the HWS page, we use barrier() to prevent the compiler optimising away the read from the volatile (may be updated by the GPU) memory address. This is more suited to READ_ONCE(); make it so. Signed-off-by: Chris Wilson Cc: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 6cc8e9c5f8d6..8f305ce253ae 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -418,8 +418,7 @@ intel_read_status_page(struct intel_engine_cs *ring, int reg) { /* Ensure that the compiler doesn't optimize away the load. */ - barrier(); - return ring->status_page.page_addr[reg]; + return READ_ONCE(ring->status_page.page_addr[reg]); } static inline void -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 075/190] drm/i915: Refactor activity tracking for requests
With the introduction of requests, we amplified the number of atomic refcounted objects we use and update every execbuffer; from none to several references, and a set of references that need to be changed. We also introduced interesting side-effects in the order of retiring requests and objects. Instead of independently tracking the last request for an object, track the active objects for each request. The object will reside in the buffer list of its most recent active request and so we reduce the kref interchange to a list_move. Now retirements are entirely driven by the request, dramatically simplifying activity tracking on the object themselves, and removing the ambiguity between retiring objects and retiring requests. All told, less code, simpler and faster, and more extensible. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/Makefile | 1 - drivers/gpu/drm/i915/i915_drv.h | 10 -- drivers/gpu/drm/i915/i915_gem.c | 160 drivers/gpu/drm/i915/i915_gem_debug.c | 70 -- drivers/gpu/drm/i915/i915_gem_fence.c | 10 +- drivers/gpu/drm/i915/i915_gem_request.c | 44 +++-- drivers/gpu/drm/i915/i915_gem_request.h | 16 +++- drivers/gpu/drm/i915/intel_lrc.c| 1 - drivers/gpu/drm/i915/intel_ringbuffer.c | 1 - drivers/gpu/drm/i915/intel_ringbuffer.h | 12 --- 10 files changed, 89 insertions(+), 236 deletions(-) delete mode 100644 drivers/gpu/drm/i915/i915_gem_debug.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index b0a83215db80..79d657f29241 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -23,7 +23,6 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o i915-y += i915_cmd_parser.o \ i915_gem_batch_pool.o \ i915_gem_context.o \ - i915_gem_debug.o \ i915_gem_dmabuf.o \ i915_gem_evict.o \ i915_gem_execbuffer.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c577f86d94f8..c9c1a5cdc1e5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -435,8 +435,6 @@ void intel_link_compute_m_n(int bpp, int nlanes, #define DRIVER_MINOR 6 #define DRIVER_PATCHLEVEL 0 -#define WATCH_LISTS0 - struct opregion_header; struct opregion_acpi; struct opregion_swsci; @@ -2024,7 +2022,6 @@ struct drm_i915_gem_object { struct drm_mm_node *stolen; struct list_head global_list; - struct list_head ring_list[I915_NUM_RINGS]; /** Used in execbuf to temporarily hold a ref */ struct list_head obj_exec_link; @@ -3068,13 +3065,6 @@ static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_objec obj->tiling_mode != I915_TILING_NONE; } -/* i915_gem_debug.c */ -#if WATCH_LISTS -int i915_verify_lists(struct drm_device *dev); -#else -#define i915_verify_lists(dev) 0 -#endif - /* i915_debugfs.c */ int i915_debugfs_init(struct drm_minor *minor); void i915_debugfs_cleanup(struct drm_minor *minor); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f314b3ea2726..4eef13ebdaf3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -40,10 +40,6 @@ static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); -static void -i915_gem_object_retire__write(struct drm_i915_gem_object *obj); -static void -i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); static bool cpu_cache_is_coherent(struct drm_device *dev, enum i915_cache_level level) @@ -117,7 +113,6 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) if (ret) return ret; - WARN_ON(i915_verify_lists(dev)); return 0; } @@ -1117,27 +1112,14 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, return 0; if (readonly) { - if (obj->last_write.request != NULL) { - ret = i915_wait_request(obj->last_write.request); - if (ret) - return ret; - - i = obj->last_write.request->engine->id; - if (obj->last_read[i].request == obj->last_write.request) - i915_gem_object_retire__read(obj, i); - else - i915_gem_object_retire__write(obj); - } + ret = i915_wait_request(obj->last_write.request); + if (ret) + return ret; } else { for (i = 0; i < I915_NUM_RINGS; i++) { - if (obj->last_read[i].request == NULL) - continue; - ret = i915_wait_reques
[Intel-gfx] [PATCH 078/190] drm/i915: Split early global GTT initialisation
Initialising the global GTT is tricky as we wish to use the drm_mm range manager during the modesetting initialisation (to capture stolen allocations from the BIOS) before we actually enable GEM. To overcome this, we currently setup the drm_mm first and then carefully rebind them. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_dma.c| 2 ++ drivers/gpu/drm/i915/i915_gem.c| 5 +-- drivers/gpu/drm/i915/i915_gem_gtt.c| 62 +++--- drivers/gpu/drm/i915/i915_gem_gtt.h| 1 + drivers/gpu/drm/i915/i915_gem_stolen.c | 17 +- 5 files changed, 33 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index c0242ce45e43..4a24831a14fa 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -989,6 +989,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) dev_priv->gtt.mtrr = arch_phys_wc_add(dev_priv->gtt.mappable_base, aperture_size); + i915_gem_init_global_gtt(dev); + /* The i915 workqueue is primarily used for batched retirement of * requests (and thus managing bo) once the task has been completed * by the GPU. i915_gem_retire_requests() is called directly when we diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e4d7c7f5aca2..44bd514a6c2e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4237,7 +4237,9 @@ int i915_gem_init(struct drm_device *dev) if (ret) goto out_unlock; - i915_gem_init_global_gtt(dev); + ret = i915_global_gtt_setup(dev); + if (ret) + goto out_unlock; ret = i915_gem_context_init(dev); if (ret) @@ -4312,7 +4314,6 @@ i915_gem_load(struct drm_device *dev) SLAB_HWCACHE_ALIGN, NULL); - INIT_LIST_HEAD(&dev_priv->vm_list); INIT_LIST_HEAD(&dev_priv->context_list); INIT_LIST_HEAD(&dev_priv->mm.unbound_list); INIT_LIST_HEAD(&dev_priv->mm.bound_list); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 6168182a87d8..b5c3bbe6dc2a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2681,10 +2681,7 @@ static void i915_gtt_color_adjust(struct drm_mm_node *node, } } -static int i915_gem_setup_global_gtt(struct drm_device *dev, -u64 start, -u64 mappable_end, -u64 end) +int i915_global_gtt_setup(struct drm_device *dev) { /* Let GEM Manage all of the aperture. * @@ -2697,48 +2694,16 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev, */ struct drm_i915_private *dev_priv = dev->dev_private; struct i915_address_space *ggtt_vm = &dev_priv->gtt.base; - struct drm_mm_node *entry; - struct drm_i915_gem_object *obj; unsigned long hole_start, hole_end; + struct drm_mm_node *entry; int ret; - BUG_ON(mappable_end > end); - - ggtt_vm->start = start; - - /* Subtract the guard page before address space initialization to -* shrink the range used by drm_mm */ - ggtt_vm->total = end - start - PAGE_SIZE; - i915_address_space_init(ggtt_vm, dev_priv); - ggtt_vm->total += PAGE_SIZE; - if (intel_vgpu_active(dev)) { ret = intel_vgt_balloon(dev); if (ret) return ret; } - if (!HAS_LLC(dev)) - ggtt_vm->mm.color_adjust = i915_gtt_color_adjust; - - /* Mark any preallocated objects as occupied */ - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { - struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm); - - DRM_DEBUG_KMS("reserving preallocated space: %llx + %zx\n", - i915_gem_obj_ggtt_offset(obj), obj->base.size); - - WARN_ON(i915_gem_obj_ggtt_bound(obj)); - ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node); - if (ret) { - DRM_DEBUG_KMS("Reservation failed: %i\n", ret); - return ret; - } - vma->bound |= GLOBAL_BIND; - __i915_vma_set_map_and_fenceable(vma); - list_add_tail(&vma->vm_link, &ggtt_vm->inactive_list); - } - /* Clear any non-preallocated blocks */ drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) { DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", @@ -2748,7 +2713,9 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev, } /* And finally clear the reserved guard page */ - ggtt_vm->cle
[Intel-gfx] [PATCH 033/190] drm/i915: Only start retire worker when idle
The retire worker is a low frequency task that makes sure we retire outstanding requests if userspace is being lax. We only need to start it once as it remains active until the GPU is idle, so do a cheap test before the more expensive queue_work(). A consequence of this is that we need correct locking in the worker to make the hot path of request submission cheap. To keep the symmetry and keep hangcheck strictly bound by the GPU's wakelock, we move the cancel_sync(hangcheck) to the idle worker before dropping the wakelock. v2: Guard against RCU fouling the breadcrumbs bottom-half whilst we kick the waiter. v3: Remove the wakeref assertion squelching (now we hold a wakeref for the hangcheck, any rpm error there is genuine). Signed-off-by: Chris Wilson References: https://bugs.freedesktop.org/show_bug.cgi?id=88437 --- drivers/gpu/drm/i915/i915_drv.c| 2 - drivers/gpu/drm/i915/i915_drv.h| 2 +- drivers/gpu/drm/i915/i915_gem.c| 83 -- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 6 +++ drivers/gpu/drm/i915/i915_irq.c| 16 +- drivers/gpu/drm/i915/intel_display.c | 29 --- 6 files changed, 66 insertions(+), 72 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 5160f1414de4..4c090f1cf69c 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1490,8 +1490,6 @@ static int intel_runtime_suspend(struct device *device) i915_gem_release_all_mmaps(dev_priv); mutex_unlock(&dev->struct_mutex); - cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); - intel_guc_suspend(dev); intel_suspend_gt_powersave(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7f021505e32f..9ec6f3e9e74d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2987,7 +2987,7 @@ int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno); struct drm_i915_gem_request * i915_gem_find_active_request(struct intel_engine_cs *ring); -bool i915_gem_retire_requests(struct drm_device *dev); +void i915_gem_retire_requests(struct drm_device *dev); void i915_gem_retire_requests_ring(struct intel_engine_cs *ring); static inline u32 i915_reset_counter(struct i915_gpu_error *error) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5ddb2ed0f785..3788fce136f3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2479,6 +2479,37 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) return 0; } +static void i915_gem_mark_busy(struct drm_i915_private *dev_priv) +{ + if (dev_priv->mm.busy) + return; + + intel_runtime_pm_get_noresume(dev_priv); + + i915_update_gfx_val(dev_priv); + if (INTEL_INFO(dev_priv)->gen >= 6) + gen6_rps_busy(dev_priv); + + queue_delayed_work(dev_priv->wq, + &dev_priv->mm.retire_work, + round_jiffies_up_relative(HZ)); + + dev_priv->mm.busy = true; +} + +static void i915_gem_mark_idle(struct drm_i915_private *dev_priv) +{ + dev_priv->mm.busy = false; + + if (cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work)) + intel_kick_waiters(dev_priv); + + if (INTEL_INFO(dev_priv)->gen >= 6) + gen6_rps_idle(dev_priv); + + intel_runtime_pm_put(dev_priv); +} + /* * NB: This function is not allowed to fail. Doing so would mean the the * request is not being tracked for completion but the work itself is @@ -2559,10 +2590,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, trace_i915_gem_request_add(request); - queue_delayed_work(dev_priv->wq, - &dev_priv->mm.retire_work, - round_jiffies_up_relative(HZ)); - intel_mark_busy(dev_priv->dev); + i915_gem_mark_busy(dev_priv); /* Sanity check that the reserved size was large enough. */ intel_ring_reserved_space_end(ringbuf); @@ -2892,7 +2920,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) WARN_ON(i915_verify_lists(ring->dev)); } -bool +void i915_gem_retire_requests(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -2900,6 +2928,9 @@ i915_gem_retire_requests(struct drm_device *dev) bool idle = true; int i; + if (!dev_priv->mm.busy) + return; + for_each_ring(ring, dev_priv, i) { i915_gem_retire_requests_ring(ring); idle &= list_empty(&ring->request_list); @@ -2918,8 +2949,6 @@ i915_gem_retire_requests(struct drm_device *dev) mod_delayed_work(dev_priv->wq, &dev_priv->mm.idle_work, msecs_to_jiffies(100)); - -
[Intel-gfx] [PATCH 070/190] drm/i915: Unify legacy/execlists submit_execbuf callbacks
Now that emitting requests is identical between legacy and execlists, we can use the same function to build up the ring for submitting to either engine. (With the exception of i915_switch_contexts(), but in time that will also be handled gracefully.) Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h| 20 - drivers/gpu/drm/i915/i915_gem.c| 2 - drivers/gpu/drm/i915/i915_gem_context.c| 3 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 24 -- drivers/gpu/drm/i915/intel_lrc.c | 129 - drivers/gpu/drm/i915/intel_lrc.h | 4 - 6 files changed, 20 insertions(+), 162 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0c580124d46d..cae448e238ca 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1655,18 +1655,6 @@ struct i915_virtual_gpu { bool active; }; -struct i915_execbuffer_params { - struct drm_device *dev; - struct drm_file *file; - uint32_tdispatch_flags; - uint32_targs_batch_start_offset; - uint64_tbatch_obj_vm_offset; - struct intel_engine_cs *ring; - struct drm_i915_gem_object *batch_obj; - struct intel_context*ctx; - struct drm_i915_gem_request *request; -}; - /* used in computing the new watermarks state */ struct intel_wm_config { unsigned int num_pipes_active; @@ -1934,9 +1922,6 @@ struct drm_i915_private { /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ struct { - int (*execbuf_submit)(struct i915_execbuffer_params *params, - struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas); int (*init_rings)(struct drm_device *dev); void (*cleanup_ring)(struct intel_engine_cs *ring); void (*stop_ring)(struct intel_engine_cs *ring); @@ -2656,11 +2641,6 @@ int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); -void i915_gem_execbuffer_move_to_active(struct list_head *vmas, - struct drm_i915_gem_request *req); -int i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, - struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas); int i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_execbuffer2(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5b5afdcd9634..235a3de6e0a0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4308,12 +4308,10 @@ int i915_gem_init(struct drm_device *dev) mutex_lock(&dev->struct_mutex); if (!i915.enable_execlists) { - dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; dev_priv->gt.init_rings = i915_gem_init_rings; dev_priv->gt.cleanup_ring = intel_engine_cleanup; dev_priv->gt.stop_ring = intel_engine_stop; } else { - dev_priv->gt.execbuf_submit = intel_execlists_submission; dev_priv->gt.init_rings = intel_logical_rings_init; dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup; dev_priv->gt.stop_ring = intel_logical_ring_stop; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index c078ebc29da5..72b0875a95a4 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -819,8 +819,9 @@ unpin_out: */ int i915_switch_context(struct drm_i915_gem_request *req) { + if (i915.enable_execlists) + return 0; - WARN_ON(i915.enable_execlists); WARN_ON(!mutex_is_locked(&req->i915->dev->struct_mutex)); if (req->ctx->legacy_hw_ctx.rcs_state == NULL) { /* We have the fake context */ diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 3e6384deca65..6dee27224ddb 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -41,6 +41,18 @@ #define BATCH_OFFSET_BIAS (256*1024) +struct i915_execbuffer_params { + struct drm_device *dev; + struct drm_file *file; + uint32_tdispatch_flags; + uint32_targs_batch_start_offset; + uint64_
[Intel-gfx] [PATCH 014/190] drm/i915: Delay queuing hangcheck to wait-request
We can forgo queuing the hangcheck from the start of every request to until we wait upon a request. This reduces the overhead of every request, but may increase the latency of detecting a hang. Howeever, if nothing every waits upon a hang, did it ever hang? It also improves the robustness of the wait-request by ensuring that the hangchecker is indeed running before we sleep indefinitely (and thereby ensuring that we never actually sleep forever waiting for a dead GPU). v2: Also queue the hangcheck from retire work in case the GPU become stuck when no one is watching. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 13 - drivers/gpu/drm/i915/i915_irq.c | 9 - 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bbdb056d2a8e..d9d411919779 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2710,7 +2710,7 @@ void intel_hpd_cancel_work(struct drm_i915_private *dev_priv); bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port); /* i915_irq.c */ -void i915_queue_hangcheck(struct drm_device *dev); +void i915_queue_hangcheck(struct drm_i915_private *dev_priv); __printf(3, 4) void i915_handle_error(struct drm_device *dev, bool wedged, const char *fmt, ...); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f570990f03e0..b4da8b354a3b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1306,6 +1306,9 @@ int __i915_wait_request(struct drm_i915_gem_request *req, break; } + /* Ensure that even if the GPU hangs, we get woken up. */ + i915_queue_hangcheck(dev_priv); + timer.function = NULL; if (timeout || missed_irq(dev_priv, ring)) { unsigned long expire; @@ -2592,8 +2595,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, trace_i915_gem_request_add(request); - i915_queue_hangcheck(ring->dev); - queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, round_jiffies_up_relative(HZ)); @@ -2947,8 +2948,8 @@ i915_gem_retire_requests(struct drm_device *dev) if (idle) mod_delayed_work(dev_priv->wq, - &dev_priv->mm.idle_work, - msecs_to_jiffies(100)); +&dev_priv->mm.idle_work, +msecs_to_jiffies(100)); return idle; } @@ -2967,9 +2968,11 @@ i915_gem_retire_work_handler(struct work_struct *work) idle = i915_gem_retire_requests(dev); mutex_unlock(&dev->struct_mutex); } - if (!idle) + if (!idle) { + i915_queue_hangcheck(dev_priv); queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, round_jiffies_up_relative(HZ)); + } } static void diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 15973e917566..94f5f4e99446 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3165,18 +3165,17 @@ static void i915_hangcheck_elapsed(struct work_struct *work) goto out; } + /* Reset timer in case GPU hangs without another request being added */ if (busy_count) - /* Reset timer case chip hangs without another request -* being added */ - i915_queue_hangcheck(dev); + i915_queue_hangcheck(dev_priv); out: ENABLE_RPM_WAKEREF_ASSERTS(dev_priv); } -void i915_queue_hangcheck(struct drm_device *dev) +void i915_queue_hangcheck(struct drm_i915_private *dev_priv) { - struct i915_gpu_error *e = &to_i915(dev)->gpu_error; + struct i915_gpu_error *e = &dev_priv->gpu_error; if (!i915.enable_hangcheck) return; -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 061/190] drm/i915: Rename intel_context[engine].ringbuf
Perform s/ringbuf/ring/ on the context struct for consistency with the ring/engine split. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c| 2 +- drivers/gpu/drm/i915/i915_drv.h| 2 +- drivers/gpu/drm/i915/i915_guc_submission.c | 6 +-- drivers/gpu/drm/i915/intel_lrc.c | 63 ++ 4 files changed, 35 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 018076c89247..6e91726db8d3 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1988,7 +1988,7 @@ static int i915_context_status(struct seq_file *m, void *unused) struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state; struct intel_ringbuffer *ringbuf = - ctx->engine[i].ringbuf; + ctx->engine[i].ring; seq_printf(m, "%s: ", ring->name); if (ctx_obj) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index baede4517c70..9f06dd19bfb2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -885,7 +885,7 @@ struct intel_context { /* Execlists */ struct { struct drm_i915_gem_object *state; - struct intel_ringbuffer *ringbuf; + struct intel_ringbuffer *ring; int pin_count; } engine[I915_NUM_RINGS]; diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 53abe2143f8a..b47e630e048a 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -390,7 +390,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc, for (i = 0; i < I915_NUM_RINGS; i++) { struct guc_execlist_context *lrc = &desc.lrc[i]; - struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf; + struct intel_ringbuffer *ring = ctx->engine[i].ring; struct intel_engine_cs *engine; struct drm_i915_gem_object *obj; uint64_t ctx_desc; @@ -406,7 +406,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc, if (!obj) break; /* XXX: continue? */ - engine = ringbuf->engine; + engine = ring->engine; ctx_desc = intel_lr_context_descriptor(ctx, engine); lrc->context_desc = (u32)ctx_desc; @@ -416,7 +416,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc, lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) | (engine->id << GUC_ELC_ENGINE_OFFSET); - obj = ringbuf->obj; + obj = ring->obj; lrc->ring_begin = i915_gem_obj_ggtt_offset(obj); lrc->ring_end = lrc->ring_begin + obj->base.size - 1; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8639ebfab96f..65beb7267d1a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -402,24 +402,24 @@ static void execlists_submit_requests(struct drm_i915_gem_request *rq0, execlists_elsp_write(rq0, rq1); } -static void execlists_context_unqueue(struct intel_engine_cs *ring) +static void execlists_context_unqueue(struct intel_engine_cs *engine) { struct drm_i915_gem_request *req0 = NULL, *req1 = NULL; struct drm_i915_gem_request *cursor = NULL, *tmp = NULL; - assert_spin_locked(&ring->execlist_lock); + assert_spin_locked(&engine->execlist_lock); /* * If irqs are not active generate a warning as batches that finish * without the irqs may get lost and a GPU Hang may occur. */ - WARN_ON(!intel_irqs_enabled(ring->dev->dev_private)); + WARN_ON(!intel_irqs_enabled(engine->dev->dev_private)); - if (list_empty(&ring->execlist_queue)) + if (list_empty(&engine->execlist_queue)) return; /* Try to read in pairs */ - list_for_each_entry_safe(cursor, tmp, &ring->execlist_queue, + list_for_each_entry_safe(cursor, tmp, &engine->execlist_queue, execlist_link) { if (!req0) { req0 = cursor; @@ -429,7 +429,7 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) cursor->elsp_submitted = req0->elsp_submitted; list_del(&req0->execlist_link); list_add_tail(&req0->execlist_link, - &ring->execlist_retired_req_list); + &engine->execlist_retired_req_list);
[Intel-gfx] [PATCH 034/190] drm/i915: Do not keep postponing the idle-work
Rather than persistently postponing the idle-work everytime somebody calls i915_gem_retire_requests() (potentially ensuring that we never reach the idle state), queue the work the first time we detect all requests are complete. Then if in 100ms, more requests have been queued, we will abort the idle-worker and wait again until all the new requests have been completed. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3788fce136f3..efd46adb978b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2946,9 +2946,9 @@ i915_gem_retire_requests(struct drm_device *dev) } if (idle) - mod_delayed_work(dev_priv->wq, -&dev_priv->mm.idle_work, -msecs_to_jiffies(100)); + queue_delayed_work(dev_priv->wq, + &dev_priv->mm.idle_work, + msecs_to_jiffies(100)); } static void -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 018/190] drm/i915: Slaughter the thundering i915_wait_request herd
One particularly stressful scenario consists of many independent tasks all competing for GPU time and waiting upon the results (e.g. realtime transcoding of many, many streams). One bottleneck in particular is that each client waits on its own results, but every client is woken up after every batchbuffer - hence the thunder of hooves as then every client must do its heavyweight dance to read a coherent seqno to see if it is the lucky one. Ideally, we only want one client to wake up after the interrupt and check its request for completion. Since the requests must retire in order, we can select the first client on the oldest request to be woken. Once that client has completed his wait, we can then wake up the next client and so on. However, all clients then incur latency as every process in the chain may be delayed for scheduling - this may also then cause some priority inversion. To reduce the latency, when a client is added or removed from the list, we scan the tree for completed seqno and wake up all the completed waiters in parallel. Using igt/benchmarks/gem_latency, we can demonstrate this effect. The benchmark measures the number of GPU cycles between completion of a batch and the client waking up from a call to wait-ioctl. With many concurrent waiters, with each on a different request, we observe that the wakeup latency before the patch scales nearly linearly with the number of waiters (before external factors kick in making the scaling much worse). After applying the patch, we can see that only the single waiter for the request is being woken up, providing a constant wakeup latency for every operation. However, the situation is not quite as rosy for many waiters on the same request, though to the best of my knowledge this is much less likely in practice. Here, we can observe that the concurrent waiters incur extra latency from being woken up by the solitary bottom-half, rather than directly by the interrupt. This appears to be scheduler induced (having discounted adverse effects from having a rbtree walk/erase in the wakeup path), each additional wake_up_process() costs aproximately 1us on big core. Another effect of performing the secondary wakeups from the first bottom-half is the incurred delay this imposes on high priority threads - rather than immediately returning to userspace and leaving the interrupt handler to wake the others. To offset the delay incurred with additional waiters on a request, we could use a hybrid scheme that did a quick read in the interrupt handler and dequeued all the completed waiters (incurring the overhead in the interrupt handler, not the best plan either as we then incur GPU submission latency) but we would still have to wake up the bottom-half everytime to do the heavyweight slow read. Or we could only kick the waiters on the seqno with the same priority as the current task (i.e. in the realtime waiter scenario, only it is woken up immediately by the interrupt and simply queues the next waiter before returning to userspace, minimising its delay at the expense of the chain, and also reducing contention on its scheduler runqueue). This is effective at avoid long pauses in the interrupt handler and at avoiding the extra latency in realtime/high-priority waiters. v2: Convert from a kworker per engine into a dedicated kthread for the bottom-half. v3: Rename request members and tweak comments. v4: Use a per-engine spinlock in the breadcrumbs bottom-half. v5: Fix race in locklessly checking waiter status and kicking the task on adding a new waiter. v6: Fix deciding when to force the timer to hide missing interrupts. v7: Move the bottom-half from the kthread to the first client process. v8: Reword a few comments v9: Break the busy loop when the interrupt is unmasked or has fired. v10: Comments, unnecessary churn, better debugging from Tvrtko v11: Wake all completed waiters on removing the current bottom-half to reduce the latency of waking up a herd of clients all waiting on the same request. v12: Rearrange missed-interrupt fault injection so that it works with igt/drv_missed_irq_hang v13: Rename intel_breadcrumb and friends to intel_wait in preparation for signal handling. v14: RCU commentary, assert_spin_locked v15: Hide BUG_ON behind the compiler; report on gem_latency findings. v16: Sort seqno-groups by priority so that first-waiter has the highest task priority (and so avoid priority inversion). Testcase: igt/gem_concurrent_blit Testcase: igt/benchmarks/gem_latency Signed-off-by: Chris Wilson Cc: "Rogozhkin, Dmitry V" Cc: "Gong, Zhipeng" Cc: Tvrtko Ursulin Cc: Dave Gordon --- drivers/gpu/drm/i915/Makefile| 1 + drivers/gpu/drm/i915/i915_debugfs.c | 19 +- drivers/gpu/drm/i915/i915_drv.h | 32 ++- drivers/gpu/drm/i915/i915_gem.c | 141 + drivers/gpu/drm/i915/i915_gpu_error.c| 2 +- drivers/gpu/drm/i915/i915_irq.c | 20 +- drivers/gpu/drm/i915/intel_breadcrumbs.c | 336 +
[Intel-gfx] [PATCH 083/190] drm/i915: Be more careful when unbinding vma
When we call i915_vma_unbind(), we will wait upon outstanding rendering. This will also trigger a retirement phase, which may update the object lists. If, we extend request tracking to the VMA itself (rather than keep it at the encompassing object), then there is a potential that the obj->vma_list be modified for other elements upon i915_vma_unbind(). As a result, if we walk over the object list and call i915_vma_unbind(), we need to be prepared for that list to change. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 54 drivers/gpu/drm/i915/i915_gem_shrinker.c | 6 +--- drivers/gpu/drm/i915/i915_gem_userptr.c | 4 +-- 4 files changed, 45 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8f5cf244094e..9fa925389332 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2707,6 +2707,8 @@ int __must_check i915_vma_unbind(struct i915_vma *vma); * _guarantee_ VMA in question is _not in use_ anywhere. */ int __must_check __i915_vma_unbind_no_wait(struct i915_vma *vma); + +int i915_gem_object_unbind(struct drm_i915_gem_object *obj); int i915_gem_object_put_pages(struct drm_i915_gem_object *obj); void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv); void i915_gem_release_mmap(struct drm_i915_gem_object *obj); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ed3f306af42f..95e69dc47fc8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -254,18 +254,38 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { .release = i915_gem_object_release_phys, }; +int +i915_gem_object_unbind(struct drm_i915_gem_object *obj) +{ + struct list_head still_in_list; + + INIT_LIST_HEAD(&still_in_list); + while (!list_empty(&obj->vma_list)) { + struct i915_vma *vma = + list_first_entry(&obj->vma_list, +struct i915_vma, +obj_link); + int ret; + + list_move_tail(&vma->obj_link, &still_in_list); + ret = i915_vma_unbind(vma); + if (ret) + break; + } + list_splice(&still_in_list, &obj->vma_list); + + return 0; +} + static int drop_pages(struct drm_i915_gem_object *obj) { - struct i915_vma *vma, *next; int ret; drm_gem_object_reference(&obj->base); - list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) - if (i915_vma_unbind(vma)) - break; - - ret = i915_gem_object_put_pages(obj); + ret = i915_gem_object_unbind(obj); + if (ret == 0) + ret = i915_gem_object_put_pages(obj); drm_gem_object_unreference(&obj->base); return ret; @@ -3038,7 +3058,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { struct drm_device *dev = obj->base.dev; - struct i915_vma *vma, *next; + struct i915_vma *vma; int ret = 0; if (obj->cache_level == cache_level) @@ -3049,7 +3069,8 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, * catch the issue of the CS prefetch crossing page boundaries and * reading an invalid PTE on older architectures. */ - list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { +restart: + list_for_each_entry(vma, &obj->vma_list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -3058,11 +3079,18 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, return -EBUSY; } - if (!i915_gem_valid_gtt_space(vma, cache_level)) { - ret = i915_vma_unbind(vma); - if (ret) - return ret; - } + if (i915_gem_valid_gtt_space(vma, cache_level)) + continue; + + ret = i915_vma_unbind(vma); + if (ret) + return ret; + + /* As unbinding may affect other elements in the +* obj->vma_list (due to side-effects from retiring +* an active vma), play safe and restart the iterator. +*/ + goto restart; } /* We can reuse the existing drm_mm nodes but need to change the diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index fa190ef3f727..e15fc7531f08 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -141,7 +141,6
[Intel-gfx] [PATCH 035/190] drm/i915: Remove redundant queue_delayed_work() from throttle ioctl
We know, by design, that whilst the GPU is active (and thus we are throttling) the retire_worker is queued. Therefore attempting to requeue it with queue_delayed_work() is a no-op and we can safely remove it. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index efd46adb978b..e9f5ca7ea835 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4116,9 +4116,6 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) return 0; ret = __i915_wait_request(target, true, NULL, NULL); - if (ret == 0) - queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); - i915_gem_request_unreference__unlocked(target); return ret; -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 044/190] drm/i915: Move GEM request routines to i915_gem_request.c
Migrate the request operations out of the main body of i915_gem.c and into their own C file for easier expansion. v2: Move __i915_add_request() across as well Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.h | 205 +- drivers/gpu/drm/i915/i915_gem.c | 652 +-- drivers/gpu/drm/i915/i915_gem_request.c | 659 drivers/gpu/drm/i915/i915_gem_request.h | 223 +++ 5 files changed, 895 insertions(+), 845 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_gem_request.c create mode 100644 drivers/gpu/drm/i915/i915_gem_request.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 99ce591c8574..b0a83215db80 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -31,6 +31,7 @@ i915-y += i915_cmd_parser.o \ i915_gem_gtt.o \ i915_gem.o \ i915_gem_render_state.o \ + i915_gem_request.o \ i915_gem_shrinker.o \ i915_gem_stolen.o \ i915_gem_tiling.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 57e450e25ad6..ee146ce02412 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -41,6 +41,7 @@ #include "intel_lrc.h" #include "i915_gem_gtt.h" #include "i915_gem_render_state.h" +#include "i915_gem_request.h" #include #include #include @@ -2162,179 +2163,15 @@ struct drm_i915_gem_object { }; #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base) -void i915_gem_track_fb(struct drm_i915_gem_object *old, - struct drm_i915_gem_object *new, - unsigned frontbuffer_bits); - -/** - * Request queue structure. - * - * The request queue allows us to note sequence numbers that have been emitted - * and may be associated with active buffers to be retired. - * - * By keeping this list, we can avoid having to do questionable sequence - * number comparisons on buffer last_read|write_seqno. It also allows an - * emission time to be associated with the request for tracking how far ahead - * of the GPU the submission is. - * - * The requests are reference counted, so upon creation they should have an - * initial reference taken using kref_init - */ -struct drm_i915_gem_request { - struct kref ref; - - /** On Which ring this request was generated */ - struct drm_i915_private *i915; - struct intel_engine_cs *ring; - unsigned reset_counter; - -/** GEM sequence number associated with the previous request, - * when the HWS breadcrumb is equal to this the GPU is processing - * this request. - */ - u32 previous_seqno; - -/** GEM sequence number associated with this request, - * when the HWS breadcrumb is equal or greater than this the GPU - * has finished processing this request. - */ - u32 seqno; - - /** Position in the ringbuffer of the start of the request */ - u32 head; - - /** -* Position in the ringbuffer of the start of the postfix. -* This is required to calculate the maximum available ringbuffer -* space without overwriting the postfix. -*/ -u32 postfix; - - /** Position in the ringbuffer of the end of the whole request */ - u32 tail; - - /** -* Context and ring buffer related to this request -* Contexts are refcounted, so when this request is associated with a -* context, we must increment the context's refcount, to guarantee that -* it persists while any request is linked to it. Requests themselves -* are also refcounted, so the request will only be freed when the last -* reference to it is dismissed, and the code in -* i915_gem_request_free() will then decrement the refcount on the -* context. -*/ - struct intel_context *ctx; - struct intel_ringbuffer *ringbuf; - - /** Batch buffer related to this request if any (used for - error state dump only) */ - struct drm_i915_gem_object *batch_obj; - - /** Time at which this request was emitted, in jiffies. */ - unsigned long emitted_jiffies; - - /** global list entry for this request */ - struct list_head list; - - struct drm_i915_file_private *file_priv; - /** file_priv list entry for this request */ - struct list_head client_list; - - /** process identifier submitting this request */ - struct pid *pid; - - /** -* The ELSP only accepts two elements at a time, so we queue -* context/tail pairs on a given queue (ring->execlist_queue) until the -* hardware is available. The queue serves a double purpose: we also use -* it to keep track of the up to 2 contexts currently in the hardware -* (usuall
[Intel-gfx] [PATCH 080/190] drm/i915: Store owning file on the i915_address_space
For the global GTT (and aliasing GTT), the address space is owned by the device (it is a global resource) and so the per-file owner field is NULL. For per-process GTT (where we create an address space per context), each is owned by the opening file. We can use this ownership information to both distinguish GGTT and ppGTT address spaces, as well as occasionally inspect the owner. v2: Whitespace, tells us who owns i915_address_space Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_gem_context.c | 3 ++- drivers/gpu/drm/i915/i915_gem_gtt.c | 27 ++- drivers/gpu/drm/i915/i915_gem_gtt.h | 21 ++--- 5 files changed, 31 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 99a6181b012e..0d1f470567b0 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -352,7 +352,7 @@ static int per_file_stats(int id, void *ptr, void *data) = container_of(vma->vm, struct i915_hw_ppgtt, base); - if (ppgtt->file_priv != stats->file_priv) + if (ppgtt->base.file != stats->file_priv) continue; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f840cc55f1ab..0cc3ee589dfb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2913,7 +2913,6 @@ i915_vm_to_ppgtt(struct i915_address_space *vm) return container_of(vm, struct i915_hw_ppgtt, base); } - static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj) { return i915_gem_obj_ggtt_bound_view(obj, &i915_ggtt_view_normal); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 05b4e0e85f24..fab702abd1cb 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -296,7 +296,8 @@ i915_gem_create_context(struct drm_device *dev, } if (USES_FULL_PPGTT(dev)) { - struct i915_hw_ppgtt *ppgtt = i915_ppgtt_create(dev, file_priv); + struct i915_hw_ppgtt *ppgtt = + i915_ppgtt_create(to_i915(dev), file_priv); if (IS_ERR_OR_NULL(ppgtt)) { DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 06117bd0fc00..3a07ff622bd6 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2112,11 +2112,12 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) return 0; } -static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) +static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt, + struct drm_i915_private *dev_priv) { - ppgtt->base.dev = dev; + ppgtt->base.dev = dev_priv->dev; - if (INTEL_INFO(dev)->gen < 8) + if (INTEL_INFO(dev_priv)->gen < 8) return gen6_ppgtt_init(ppgtt); else return gen8_ppgtt_init(ppgtt); @@ -2132,15 +2133,17 @@ static void i915_address_space_init(struct i915_address_space *vm, list_add_tail(&vm->global_link, &dev_priv->vm_list); } -int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) +int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt, + struct drm_i915_private *dev_priv, + struct drm_i915_file_private *file_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - int ret = 0; + int ret; - ret = __hw_ppgtt_init(dev, ppgtt); + ret = __hw_ppgtt_init(ppgtt, dev_priv); if (ret == 0) { kref_init(&ppgtt->ref); i915_address_space_init(&ppgtt->base, dev_priv); + ppgtt->base.file = file_priv; } return ret; @@ -2183,7 +2186,8 @@ int i915_ppgtt_init_ring(struct drm_i915_gem_request *req) } struct i915_hw_ppgtt * -i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) +i915_ppgtt_create(struct drm_i915_private *dev_priv, + struct drm_i915_file_private *fpriv) { struct i915_hw_ppgtt *ppgtt; int ret; @@ -2192,14 +2196,12 @@ i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) if (!ppgtt) return ERR_PTR(-ENOMEM); - ret = i915_ppgtt_init(dev, ppgtt); + ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv); if (ret) { kfree(ppgtt); return ERR_PTR(ret); } - ppgtt->file_priv = fpriv; - trace_i915_ppgtt_create(&ppgtt->base)
[Intel-gfx] [PATCH 077/190] drm/i915: Amalgamate GGTT/ppGTT vma debug list walkers
As we can now have multiple VMA inside the global GTT (with partial mappings, rotations, etc), it is no longer true that there may just be a single GGTT entry and so we should walk the full vma_list to count up the actual usage. In addition to unifying the two walkers, switch from multiplying the object size for each vma to summing the bound vma sizes. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 46 +++-- 1 file changed, 18 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index f311df758195..dd1788c81b90 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -332,6 +332,7 @@ static int per_file_stats(int id, void *ptr, void *data) struct drm_i915_gem_object *obj = ptr; struct file_stats *stats = data; struct i915_vma *vma; + int bound = 0; stats->count++; stats->total += obj->base.size; @@ -339,41 +340,30 @@ static int per_file_stats(int id, void *ptr, void *data) if (obj->base.name || obj->base.dma_buf) stats->shared += obj->base.size; - if (USES_FULL_PPGTT(obj->base.dev)) { - list_for_each_entry(vma, &obj->vma_list, obj_link) { - struct i915_hw_ppgtt *ppgtt; + list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (!drm_mm_node_allocated(&vma->node)) + continue; - if (!drm_mm_node_allocated(&vma->node)) - continue; + bound++; - if (i915_is_ggtt(vma->vm)) { - stats->global += obj->base.size; - continue; - } - - ppgtt = container_of(vma->vm, struct i915_hw_ppgtt, base); + if (i915_is_ggtt(vma->vm)) { + stats->global += vma->node.size; + } else { + struct i915_hw_ppgtt *ppgtt + = container_of(vma->vm, + struct i915_hw_ppgtt, + base); if (ppgtt->file_priv != stats->file_priv) continue; - - if (obj->active) /* XXX per-vma statistic */ - stats->active += obj->base.size; - else - stats->inactive += obj->base.size; - - return 0; - } - } else { - if (i915_gem_obj_ggtt_bound(obj)) { - stats->global += obj->base.size; - if (obj->active) - stats->active += obj->base.size; - else - stats->inactive += obj->base.size; - return 0; } + + if (obj->active) /* XXX per-vma statistic */ + stats->active += vma->node.size; + else + stats->inactive += vma->node.size; } - if (!list_empty(&obj->global_list)) + if (!bound) stats->unbound += obj->base.size; return 0; -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 053/190] drm/i915: Convert i915_semaphores_is_enabled over to early sanitize
Rather than recomputing whether semaphores are enabled, we can do that computation once during early initialisation as the i915.semaphores module parameter is now read-only. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_dma.c | 2 +- drivers/gpu/drm/i915/i915_drv.c | 25 --- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_gem.c | 35 ++--- drivers/gpu/drm/i915/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 20 +-- 8 files changed, 46 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 5335072f2047..387ae77d3c29 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3146,7 +3146,7 @@ static int i915_semaphore_status(struct seq_file *m, void *unused) int num_rings = hweight32(INTEL_INFO(dev)->ring_mask); int i, j, ret; - if (!i915_semaphore_is_enabled(dev)) { + if (!i915.semaphores) { seq_puts(m, "Semaphores are disabled\n"); return 0; } diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 9e49e304dd8e..4c72c83cfa28 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -126,7 +126,7 @@ static int i915_getparam(struct drm_device *dev, void *data, value = 1; break; case I915_PARAM_HAS_SEMAPHORES: - value = i915_semaphore_is_enabled(dev); + value = i915.semaphores; break; case I915_PARAM_HAS_PRIME_VMAP_FLUSH: value = 1; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index e9f85fd0542f..cc831a34f7bb 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -515,31 +515,6 @@ void intel_detect_pch(struct drm_device *dev) pci_dev_put(pch); } -bool i915_semaphore_is_enabled(struct drm_device *dev) -{ - if (INTEL_INFO(dev)->gen < 6) - return false; - - if (i915.semaphores >= 0) - return i915.semaphores; - - /* TODO: make semaphores and Execlists play nicely together */ - if (i915.enable_execlists) - return false; - - /* Until we get further testing... */ - if (IS_GEN8(dev)) - return false; - -#ifdef CONFIG_INTEL_IOMMU - /* Enable semaphores on SNB when IO remapping is off */ - if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) - return false; -#endif - - return true; -} - static void intel_suspend_encoders(struct drm_i915_private *dev_priv) { struct drm_device *dev = dev_priv->dev; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 56cf2ffc1eac..58e9e5e50769 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3226,7 +3226,6 @@ extern void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, extern void intel_detect_pch(struct drm_device *dev); extern int intel_enable_rc6(const struct drm_device *dev); -extern bool i915_semaphore_is_enabled(struct drm_device *dev); int i915_reg_read_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a4f9c5bbb883..31926a4fb42a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2567,7 +2567,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, if (i915_gem_request_completed(from_req)) return 0; - if (!i915_semaphore_is_enabled(obj->base.dev)) { + if (!i915.semaphores) { struct drm_i915_private *i915 = to_i915(obj->base.dev); ret = __i915_wait_request(from_req, i915->mm.interruptible, @@ -4304,13 +4304,42 @@ out: return ret; } +static bool i915_gem_sanitize_semaphore(struct drm_i915_private *dev_priv, + int param_value) +{ + if (INTEL_INFO(dev_priv)->gen < 6) + return false; + + if (param_value >= 0) + return param_value; + + /* TODO: make semaphores and Execlists play nicely together */ + if (i915.enable_execlists) + return false; + + /* Until we get further testing... */ + if (IS_GEN8(dev_priv)) + return false; + +#ifdef CONFIG_INTEL_IOMMU + /* Enable semaphores on SNB when IO remapping is off */ + if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped) + return false; +#endif + + return t
[Intel-gfx] [PATCH 066/190] drm/i915: Simplify request_alloc by returning the allocated request
If is simpler and leads to more readable code through the callstack if the allocation returns the allocated struct through the return value. The importance of this is that it no longer looks like we accidentally allocate requests as side-effect of calling certain functions. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h| 3 +- drivers/gpu/drm/i915/i915_gem.c| 82 ++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 8 +-- drivers/gpu/drm/i915/i915_gem_request.c| 22 +++- drivers/gpu/drm/i915/i915_gem_request.h| 6 +-- drivers/gpu/drm/i915/i915_trace.h | 15 +++--- drivers/gpu/drm/i915/intel_display.c | 25 + drivers/gpu/drm/i915/intel_lrc.c | 6 +-- drivers/gpu/drm/i915/intel_overlay.c | 24 - 9 files changed, 77 insertions(+), 114 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 44e8738c5310..0c580124d46d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2786,8 +2786,7 @@ static inline void i915_gem_object_unpin_vmap(struct drm_i915_gem_object *obj) int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); int i915_gem_object_sync(struct drm_i915_gem_object *obj, -struct intel_engine_cs *to, -struct drm_i915_gem_request **to_req); +struct drm_i915_gem_request *to); void i915_vma_move_to_active(struct i915_vma *vma, struct drm_i915_gem_request *req); int i915_gem_dumb_create(struct drm_file *file_priv, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1c6beb154d07..5b5afdcd9634 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2550,47 +2550,35 @@ out: static int __i915_gem_object_sync(struct drm_i915_gem_object *obj, - struct intel_engine_cs *to, - struct drm_i915_gem_request *from_req, - struct drm_i915_gem_request **to_req) + struct drm_i915_gem_request *to, + struct drm_i915_gem_request *from) { - struct intel_engine_cs *from; int ret; - from = from_req->engine; - if (to == from) + if (to->engine == from->engine) return 0; - if (i915_gem_request_completed(from_req)) + if (i915_gem_request_completed(from)) return 0; if (!i915.semaphores) { - struct drm_i915_private *i915 = from_req->i915; - ret = __i915_wait_request(from_req, - i915->mm.interruptible, + ret = __i915_wait_request(from, + to->i915->mm.interruptible, NULL, NO_WAITBOOST); if (ret) return ret; - i915_gem_object_retire_request(obj, from_req); + i915_gem_object_retire_request(obj, from); } else { - int idx = intel_engine_sync_index(from, to); - u32 seqno = i915_gem_request_get_seqno(from_req); + int idx = intel_engine_sync_index(from->engine, to->engine); + u32 seqno = i915_gem_request_get_seqno(from); - WARN_ON(!to_req); - - if (seqno <= from->semaphore.sync_seqno[idx]) + if (seqno <= from->engine->semaphore.sync_seqno[idx]) return 0; - if (*to_req == NULL) { - ret = i915_gem_request_alloc(to, to->default_context, to_req); - if (ret) - return ret; - } - - trace_i915_gem_ring_sync_to(*to_req, from, from_req); - ret = to->semaphore.sync_to(*to_req, from, seqno); + trace_i915_gem_ring_sync_to(to, from); + ret = to->engine->semaphore.sync_to(to, from->engine, seqno); if (ret) return ret; @@ -2598,8 +2586,8 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, * might have just caused seqno wrap under * the radar. */ - from->semaphore.sync_seqno[idx] = - i915_gem_request_get_seqno(obj->last_read_req[from->id]); + from->engine->semaphore.sync_seqno[idx] = + i915_gem_request_get_seqno(obj->last_read_req[from->engine->id]); } return 0; @@ -2609,17 +2597,12 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, * i915_gem_object_sync - sync an object to a ring. * * @obj: object which may be in use on another ring. - * @to: ring we wish to use the object on. May be NULL. - * @to_req: request we wish
[Intel-gfx] [PATCH 057/190] drm/i915: Remove the identical implementations of request space reservation
Now that we share intel_ring_begin(), reserving space for the tail of the request is identical between legacy/execlists and so the tautology can be removed. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_request.c | 7 +++ drivers/gpu/drm/i915/intel_lrc.c| 15 --- drivers/gpu/drm/i915/intel_lrc.h| 1 - drivers/gpu/drm/i915/intel_ringbuffer.c | 15 --- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 --- 5 files changed, 3 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 619a9b063d9c..85067069995e 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -255,10 +255,9 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring, * to be redone if the request is not actually submitted straight * away, e.g. because a GPU scheduler has deferred it. */ - if (i915.enable_execlists) - ret = intel_logical_ring_reserve_space(req); - else - ret = intel_ring_reserve_space(req); + intel_ring_reserved_space_reserve(req->ringbuf, + MIN_SPACE_FOR_ADD_REQUEST); + ret = intel_ring_begin(req, 0); if (ret) { /* * At this point, the request is fully allocated even if not diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 3d14b69632e8..4f1944929330 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -721,21 +721,6 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) execlists_context_queue(request); } -int intel_logical_ring_reserve_space(struct drm_i915_gem_request *request) -{ - /* -* The first call merely notes the reserve request and is common for -* all back ends. The subsequent localised _begin() call actually -* ensures that the reservation is available. Without the begin, if -* the request creator immediately submitted the request without -* adding any commands to it then there might not actually be -* sufficient room for the submission commands. -*/ - intel_ring_reserved_space_reserve(request->ringbuf, MIN_SPACE_FOR_ADD_REQUEST); - - return intel_ring_begin(request, 0); -} - /** * execlists_submission() - submit a batchbuffer for execution, Execlists style * @dev: DRM device. diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 32401e11cebe..c88988a41898 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -56,7 +56,6 @@ /* Logical Rings */ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request); -int intel_logical_ring_reserve_space(struct drm_i915_gem_request *request); void intel_logical_ring_stop(struct intel_engine_cs *ring); void intel_logical_ring_cleanup(struct intel_engine_cs *ring); int intel_logical_rings_init(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index c694f602a0b8..db5c407f7720 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2086,21 +2086,6 @@ int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request) return 0; } -int intel_ring_reserve_space(struct drm_i915_gem_request *request) -{ - /* -* The first call merely notes the reserve request and is common for -* all back ends. The subsequent localised _begin() call actually -* ensures that the reservation is available. Without the begin, if -* the request creator immediately submitted the request without -* adding any commands to it then there might not actually be -* sufficient room for the submission commands. -*/ - intel_ring_reserved_space_reserve(request->ringbuf, MIN_SPACE_FOR_ADD_REQUEST); - - return intel_ring_begin(request, 0); -} - void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size) { WARN_ON(ringbuf->reserved_size); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 9c19a6ca8e7d..bc6ceb54b1f3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -536,9 +536,6 @@ void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf); /* Finish with the reserved space - for use by i915_add_request() only. */ void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf); -/* Legacy ringbuffer specific portion of reservation code: */ -int intel_ring_reserve_space(struct drm_i915_gem_request *request); - /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ struct intel_wait { struct rb_node node; -- 2.7.0.r
[Intel-gfx] [PATCH 071/190] drm/i915: Simplify calling engine->sync_to
Since requests can no longer be generated as a side-effect of intel_ring_begin(), we know that the seqno will be unchanged during ring-emission. This predicatablity then means we do not have to check for the seqno wrapping around whilst emitting the semaphore for engine->sync_to(). Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 13 ++- drivers/gpu/drm/i915/intel_ringbuffer.c | 67 ++--- drivers/gpu/drm/i915/intel_ringbuffer.h | 5 +-- 3 files changed, 33 insertions(+), 52 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 235a3de6e0a0..b0230e7151ce 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2572,22 +2572,15 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, i915_gem_object_retire_request(obj, from); } else { int idx = intel_engine_sync_index(from->engine, to->engine); - u32 seqno = i915_gem_request_get_seqno(from); - - if (seqno <= from->engine->semaphore.sync_seqno[idx]) + if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx]) return 0; trace_i915_gem_ring_sync_to(to, from); - ret = to->engine->semaphore.sync_to(to, from->engine, seqno); + ret = to->engine->semaphore.sync_to(to, from); if (ret) return ret; - /* We use last_read_req because sync_to() -* might have just caused seqno wrap under -* the radar. -*/ - from->engine->semaphore.sync_seqno[idx] = - i915_gem_request_get_seqno(obj->last_read_req[from->engine->id]); + from->engine->semaphore.sync_seqno[idx] = from->fence.seqno; } return 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 556e9e2c1fec..d37cdb2f9073 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1384,69 +1384,58 @@ static inline bool i915_gem_has_seqno_wrapped(struct drm_i915_private *dev_priv, */ static int -gen8_ring_sync(struct drm_i915_gem_request *waiter_req, - struct intel_engine_cs *signaller, - u32 seqno) +gen8_ring_sync(struct drm_i915_gem_request *wait, + struct drm_i915_gem_request *signal) { - struct intel_ring *waiter = waiter_req->ring; - struct drm_i915_private *dev_priv = waiter_req->i915; + struct intel_ring *waiter = wait->ring; + struct drm_i915_private *dev_priv = wait->i915; int ret; - ret = intel_ring_begin(waiter_req, 4); + ret = intel_ring_begin(wait, 4); if (ret) return ret; - intel_ring_emit(waiter, MI_SEMAPHORE_WAIT | - MI_SEMAPHORE_GLOBAL_GTT | - MI_SEMAPHORE_POLL | - MI_SEMAPHORE_SAD_GTE_SDD); - intel_ring_emit(waiter, seqno); intel_ring_emit(waiter, - lower_32_bits(GEN8_WAIT_OFFSET(waiter_req->engine, - signaller->id))); + MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_GTE_SDD); + intel_ring_emit(waiter, signal->fence.seqno); intel_ring_emit(waiter, - upper_32_bits(GEN8_WAIT_OFFSET(waiter_req->engine, - signaller->id))); + lower_32_bits(GEN8_WAIT_OFFSET(wait->engine, + signal->engine->id))); + intel_ring_emit(waiter, + upper_32_bits(GEN8_WAIT_OFFSET(wait->engine, + signal->engine->id))); intel_ring_advance(waiter); return 0; } static int -gen6_ring_sync(struct drm_i915_gem_request *waiter_req, - struct intel_engine_cs *signaller, - u32 seqno) +gen6_ring_sync(struct drm_i915_gem_request *wait, + struct drm_i915_gem_request *signal) { - struct intel_ring *waiter = waiter_req->ring; + struct intel_ring *waiter = wait->ring; u32 dw1 = MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | MI_SEMAPHORE_REGISTER; - u32 wait_mbox = signaller->semaphore.mbox.wait[waiter_req->engine->id]; + u32 wait_mbox = signal->engine->semaphore.mbox.wait[wait->engine->id]; int ret; - /* Throughout all of the GEM code, seqno passed implies our current -* seqno is >= the last seqno executed. However for hardware the -* comparison is strictly greater than. -*/ -
[Intel-gfx] [PATCH 074/190] drm/i915: Rename request->list to link for consistency
We use "list" to denote the list and "link" to denote an element on that list. Rename request->list to match this idiom. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 4 ++-- drivers/gpu/drm/i915/i915_gem.c | 12 ++-- drivers/gpu/drm/i915/i915_gem_request.c | 10 +- drivers/gpu/drm/i915/i915_gem_request.h | 4 ++-- drivers/gpu/drm/i915/i915_gpu_error.c | 4 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 6 +++--- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 65cb1d6a5d64..efa9572fc217 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -695,13 +695,13 @@ static int i915_gem_request_info(struct seq_file *m, void *data) int count; count = 0; - list_for_each_entry(req, &ring->request_list, list) + list_for_each_entry(req, &ring->request_list, link) count++; if (count == 0) continue; seq_printf(m, "%s requests: %d\n", ring->name, count); - list_for_each_entry(req, &ring->request_list, list) { + list_for_each_entry(req, &ring->request_list, link) { struct task_struct *task; rcu_read_lock(); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 77c253ddf060..f314b3ea2726 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2183,7 +2183,7 @@ i915_gem_find_active_request(struct intel_engine_cs *ring) * extra delay for a recent interrupt is pointless. Hence, we do * not need an engine->irq_seqno_barrier() before the seqno reads. */ - list_for_each_entry(request, &ring->request_list, list) { + list_for_each_entry(request, &ring->request_list, link) { if (i915_gem_request_completed(request)) continue; @@ -2208,7 +2208,7 @@ static void i915_gem_reset_ring_status(struct intel_engine_cs *ring) i915_set_reset_status(dev_priv, request->ctx, ring_hung); - list_for_each_entry_continue(request, &ring->request_list, list) + list_for_each_entry_continue(request, &ring->request_list, link) i915_set_reset_status(dev_priv, request->ctx, false); } @@ -2255,7 +2255,7 @@ static void i915_gem_reset_ring_cleanup(struct intel_engine_cs *engine) request = list_last_entry(&engine->request_list, struct drm_i915_gem_request, - list); + link); i915_gem_request_retire_upto(request); } @@ -2317,7 +2317,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) request = list_first_entry(&ring->request_list, struct drm_i915_gem_request, - list); + link); if (!i915_gem_request_completed(request)) break; @@ -2336,7 +2336,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) struct drm_i915_gem_object, ring_list[ring->id]); - if (!list_empty(&obj->last_read[ring->id].request->list)) + if (!list_empty(&obj->last_read[ring->id].request->link)) break; i915_gem_object_retire__read(obj, ring->id); @@ -2449,7 +2449,7 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj) if (req == NULL) continue; - if (list_empty(&req->list)) + if (list_empty(&req->link)) goto retire; if (i915_gem_request_completed(req)) { diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 01443d8d9224..7f38d8972721 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -333,7 +333,7 @@ void i915_gem_request_cancel(struct drm_i915_gem_request *req) static void i915_gem_request_retire(struct drm_i915_gem_request *request) { trace_i915_gem_request_retire(request); - list_del_init(&request->list); + list_del_init(&request->link); /* We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position @@ -355,12 +355,12 @@ i915_gem_request_retire_upto(struct drm_i915_gem_request *req) lockdep_assert_held(&engine->dev->struct_mutex); - if (list_empty(&req->list)) + if (list_empty(&req->link)) return; do { tmp = list_fi
[Intel-gfx] [PATCH 062/190] drm/i915: Rename extern functions operating on intel_engine_cs
Using intel_ring_* to refer to the intel_engine_cs functions is most confusing! Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c| 10 +++ drivers/gpu/drm/i915/i915_dma.c| 8 +++--- drivers/gpu/drm/i915/i915_drv.h| 4 +-- drivers/gpu/drm/i915/i915_gem.c| 22 +++--- drivers/gpu/drm/i915/i915_gem_context.c| 8 +++--- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 6 ++-- drivers/gpu/drm/i915/i915_gem_request.c| 8 +++--- drivers/gpu/drm/i915/i915_gem_request.h| 4 +-- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +++--- drivers/gpu/drm/i915/i915_guc_submission.c | 6 ++-- drivers/gpu/drm/i915/i915_irq.c| 18 ++-- drivers/gpu/drm/i915/i915_trace.h | 2 +- drivers/gpu/drm/i915/intel_breadcrumbs.c | 4 +-- drivers/gpu/drm/i915/intel_lrc.c | 17 +-- drivers/gpu/drm/i915/intel_mocs.c | 6 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c| 46 ++ drivers/gpu/drm/i915/intel_ringbuffer.h| 36 +++ 17 files changed, 104 insertions(+), 109 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 6e91726db8d3..dec10784c2bc 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -599,7 +599,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) engine->name, i915_gem_request_get_seqno(work->flip_queued_req), dev_priv->next_seqno, - intel_ring_get_seqno(engine), + intel_engine_get_seqno(engine), i915_gem_request_completed(work->flip_queued_req)); } else seq_printf(m, "Flip not associated with any ring\n"); @@ -732,7 +732,7 @@ static void i915_ring_seqno_info(struct seq_file *m, struct rb_node *rb; seq_printf(m, "Current sequence (%s): %x\n", - ring->name, intel_ring_get_seqno(ring)); + ring->name, intel_engine_get_seqno(ring)); seq_printf(m, "Current user interrupts (%s): %x\n", ring->name, READ_ONCE(ring->user_interrupts)); @@ -1354,8 +1354,8 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) intel_runtime_pm_get(dev_priv); for_each_ring(ring, dev_priv, i) { - acthd[i] = intel_ring_get_active_head(ring); - seqno[i] = intel_ring_get_seqno(ring); + acthd[i] = intel_engine_get_active_head(ring); + seqno[i] = intel_engine_get_seqno(ring); } i915_get_extra_instdone(dev, instdone); @@ -2496,7 +2496,7 @@ static int i915_guc_info(struct seq_file *m, void *data) struct intel_guc guc; struct i915_guc_client client = {}; struct intel_engine_cs *ring; - enum intel_ring_id i; + enum intel_engine_id i; u64 total = 0; if (!HAS_GUC_SCHED(dev_priv->dev)) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 4c72c83cfa28..c0242ce45e43 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -87,16 +87,16 @@ static int i915_getparam(struct drm_device *dev, void *data, value = 1; break; case I915_PARAM_HAS_BSD: - value = intel_ring_initialized(&dev_priv->ring[VCS]); + value = intel_engine_initialized(&dev_priv->ring[VCS]); break; case I915_PARAM_HAS_BLT: - value = intel_ring_initialized(&dev_priv->ring[BCS]); + value = intel_engine_initialized(&dev_priv->ring[BCS]); break; case I915_PARAM_HAS_VEBOX: - value = intel_ring_initialized(&dev_priv->ring[VECS]); + value = intel_engine_initialized(&dev_priv->ring[VECS]); break; case I915_PARAM_HAS_BSD2: - value = intel_ring_initialized(&dev_priv->ring[VCS2]); + value = intel_engine_initialized(&dev_priv->ring[VCS2]); break; case I915_PARAM_HAS_RELAXED_FENCING: value = 1; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9f06dd19bfb2..466adc6617f0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -520,7 +520,7 @@ struct drm_i915_error_state { /* Software tracked state */ bool waiting; int hangcheck_score; - enum intel_ring_hangcheck_action hangcheck_action; + enum intel_engine_hangcheck_action hangcheck_action; int num_requests; /* our own tracking
[Intel-gfx] [PATCH 025/190] drm/i915: Broadwell execlists needs exactly the same seqno w/a as legacy
In legacy mode, we use the gen6 seqno barrier to insert a delay after the interrupt before reading the seqno (as the seqno write is not flushed before the interrupt is sent, the interrupt arrives before the seqno is visible). Execlists ignored the evidence of igt. Note that is harder, but not impossible, to reproduce the missed interrupt syndrome with execlists. This is primarily because execlists itself being interrupt driven helps mask the issue. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_lrc.c | 39 +-- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index ad51b1fc37cd..27d91f1ceb2b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1775,18 +1775,24 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, return 0; } -static void bxt_seqno_barrier(struct intel_engine_cs *ring) +static void +gen6_seqno_barrier(struct intel_engine_cs *ring) { - /* -* On BXT A steppings there is a HW coherency issue whereby the -* MI_STORE_DATA_IMM storing the completed request's seqno -* occasionally doesn't invalidate the CPU cache. Work around this by -* clflushing the corresponding cacheline whenever the caller wants -* the coherency to be guaranteed. Note that this cacheline is known -* to be clean at this point, since we only write it in -* bxt_a_set_seqno(), where we also do a clflush after the write. So -* this clflush in practice becomes an invalidate operation. + /* Workaround to force correct ordering between irq and seqno writes on +* ivb (and maybe also on snb) by reading from a CS register (like +* ACTHD) before reading the status page. +* +* Note that this effectively effectively stalls the read by the time +* it takes to do a memory transaction, which more or less ensures +* that the write from the GPU has sufficient time to invalidate +* the CPU cacheline. Alternatively we could delay the interrupt from +* the CS ring to give the write time to land, but that would incur +* a delay after every batch i.e. much more frequent than a delay +* when waiting for the interrupt (with the same net latency). */ + struct drm_i915_private *dev_priv = ring->i915; + POSTING_READ_FW(RING_ACTHD(ring->mmio_base)); + intel_flush_status_page(ring, I915_GEM_HWS_INDEX); } @@ -1984,8 +1990,7 @@ static int logical_render_ring_init(struct drm_device *dev) ring->init_hw = gen8_init_render_ring; ring->init_context = gen8_init_rcs_context; ring->cleanup = intel_fini_pipe_control; - if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) - ring->irq_seqno_barrier = bxt_seqno_barrier; + ring->irq_seqno_barrier = gen6_seqno_barrier; ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush_render; ring->irq_get = gen8_logical_ring_get_irq; @@ -2031,8 +2036,7 @@ static int logical_bsd_ring_init(struct drm_device *dev) GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; ring->init_hw = gen8_init_common_ring; - if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) - ring->irq_seqno_barrier = bxt_seqno_barrier; + ring->irq_seqno_barrier = gen6_seqno_barrier; ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush; ring->irq_get = gen8_logical_ring_get_irq; @@ -2056,6 +2060,7 @@ static int logical_bsd2_ring_init(struct drm_device *dev) GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; ring->init_hw = gen8_init_common_ring; + ring->irq_seqno_barrier = gen6_seqno_barrier; ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush; ring->irq_get = gen8_logical_ring_get_irq; @@ -2079,8 +2084,7 @@ static int logical_blt_ring_init(struct drm_device *dev) GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT; ring->init_hw = gen8_init_common_ring; - if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) - ring->irq_seqno_barrier = bxt_seqno_barrier; + ring->irq_seqno_barrier = gen6_seqno_barrier; ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush; ring->irq_get = gen8_logical_ring_get_irq; @@ -2104,8 +2108,7 @@ static int logical_vebox_ring_init(struct drm_device *dev) GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT; ring->init_hw = gen8_init_common_ring; - if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) - ring->irq_seqno_barrier = bxt_seqno_barrier; + ring->irq_seqno_barrier = gen6_seqno_barrier; ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush; ring
[Intel-gfx] [PATCH 042/190] drm/i915: Clean up GPU hang message
Remove some redundant kernel messages as we deduce a hung GPU and capture the error state. v2: Fix "hang" vs "no progress" message whilst I was there Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_irq.c | 21 +++-- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index d9757d227c86..ce52d7d9ad91 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3031,8 +3031,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) struct drm_device *dev = dev_priv->dev; struct intel_engine_cs *ring; int i; - int busy_count = 0, rings_hung = 0; - bool stuck[I915_NUM_RINGS] = { 0 }; + int busy_count = 0; #define BUSY 1 #define KICK 5 #define HUNG 20 @@ -3108,7 +3107,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work) break; case HANGCHECK_HUNG: ring->hangcheck.score += HUNG; - stuck[i] = true; break; } } @@ -3134,17 +3132,12 @@ static void i915_hangcheck_elapsed(struct work_struct *work) busy_count += busy; } - for_each_ring(ring, dev_priv, i) { - if (ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG) { - DRM_INFO("%s on %s\n", -stuck[i] ? "stuck" : "no progress", -ring->name); - rings_hung++; - } - } - - if (rings_hung) - return i915_handle_error(dev, true, "Ring hung"); + for_each_ring(ring, dev_priv, i) + if (ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG) + return i915_handle_error(dev, true, +"%s on %s", +ring->hangcheck.action == HANGCHECK_HUNG ? "Hang" : "No progress" , +ring->name); /* Reset timer in case GPU hangs without another request being added */ if (busy_count) -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 081/190] drm/i915: i915_vma_move_to_active prep patch
This patch is broken out of the next just to remove the code motion from that patch and make it more readable. What we do here is move the i915_vma_move_to_active() to i915_gem_execbuffer.c and put the three stages (read, write, fenced) together so that future modifications to active handling are all located in the same spot. The importance of this is so that we can more simply control the order in which the requests are place in the retirement list (i.e. control the order at which we retire and so control the lifetimes to avoid having to hold onto references). Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/i915_gem.c | 15 --- drivers/gpu/drm/i915/i915_gem_context.c | 7 ++-- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 63 ++-- drivers/gpu/drm/i915/i915_gem_render_state.c | 2 +- 5 files changed, 49 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0cc3ee589dfb..aa9d3782107e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2764,7 +2764,8 @@ int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); int i915_gem_object_sync(struct drm_i915_gem_object *obj, struct drm_i915_gem_request *to); void i915_vma_move_to_active(struct i915_vma *vma, -struct drm_i915_gem_request *req); +struct drm_i915_gem_request *req, +unsigned flags); int i915_gem_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9a22fdd8a9f5..164ebdaa0369 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2026,21 +2026,6 @@ void *i915_gem_object_pin_vmap(struct drm_i915_gem_object *obj) return obj->vmapping; } -void i915_vma_move_to_active(struct i915_vma *vma, -struct drm_i915_gem_request *req) -{ - struct drm_i915_gem_object *obj = vma->obj; - struct intel_engine_cs *engine = req->engine; - - /* Add a reference if we're newly entering the active list. */ - if (obj->active == 0) - drm_gem_object_reference(&obj->base); - obj->active |= intel_engine_flag(engine); - - i915_gem_request_mark_active(req, &obj->last_read[engine->id]); - list_move_tail(&vma->vm_link, &vma->vm->active_list); -} - static void i915_gem_object_retire__fence(struct i915_gem_active *active, struct drm_i915_gem_request *req) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index fab702abd1cb..310a770b7984 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -778,8 +778,8 @@ static int do_switch(struct drm_i915_gem_request *req) * MI_SET_CONTEXT instead of when the next seqno has completed. */ if (from != NULL) { - from->legacy_hw_ctx.rcs_state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION; - i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), req); + struct drm_i915_gem_object *obj = from->legacy_hw_ctx.rcs_state; + /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the * whole damn pipeline, we don't need to explicitly mark the * object dirty. The only exception is that the context must be @@ -787,7 +787,8 @@ static int do_switch(struct drm_i915_gem_request *req) * able to defer doing this until we know the object would be * swapped, but there is no way to do that yet. */ - from->legacy_hw_ctx.rcs_state->dirty = 1; + obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION; + i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), req, 0); /* obj is kept alive until the next request by its active ref */ i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index c10795f58bfc..9e549bded186 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1104,6 +1104,44 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, return ctx; } +void i915_vma_move_to_active(struct i915_vma *vma, +struct drm_i915_gem_request *req, +unsigned flags) +{ + struct drm_i915_gem_object *obj = vma->obj; + const unsigned engine = req->engine->id; + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node))
[Intel-gfx] [PATCH 079/190] drm/i915: Reduce the pointer dance of i915_is_ggtt()
The multiple levels of indirect do nothing but hinder the compiler and the pointer chasing turns to be quite painful but painless to fix. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c| 13 ++--- drivers/gpu/drm/i915/i915_drv.h| 7 --- drivers/gpu/drm/i915/i915_gem.c| 18 +++--- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 5 ++--- drivers/gpu/drm/i915/i915_gem_gtt.c| 12 +--- drivers/gpu/drm/i915/i915_gem_gtt.h| 5 + drivers/gpu/drm/i915/i915_trace.h | 27 --- 7 files changed, 33 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index dd1788c81b90..99a6181b012e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -118,7 +118,7 @@ static u64 i915_gem_obj_total_ggtt_size(struct drm_i915_gem_object *obj) struct i915_vma *vma; list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (i915_is_ggtt(vma->vm) && drm_mm_node_allocated(&vma->node)) + if (vma->is_ggtt && drm_mm_node_allocated(&vma->node)) size += vma->node.size; } @@ -165,12 +165,11 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_printf(m, " (fence: %d)", obj->fence_reg); list_for_each_entry(vma, &obj->vma_list, obj_link) { seq_printf(m, " (%sgtt offset: %08llx, size: %08llx", - i915_is_ggtt(vma->vm) ? "g" : "pp", + vma->is_ggtt ? "g" : "pp", vma->node.start, vma->node.size); - if (i915_is_ggtt(vma->vm)) - seq_printf(m, ", type: %u)", vma->ggtt_view.type); - else - seq_puts(m, ")"); + if (vma->is_ggtt) + seq_printf(m, ", type: %u", vma->ggtt_view.type); + seq_puts(m, ")"); } if (obj->stolen) seq_printf(m, " (stolen: %08llx)", obj->stolen->start); @@ -346,7 +345,7 @@ static int per_file_stats(int id, void *ptr, void *data) bound++; - if (i915_is_ggtt(vma->vm)) { + if (vma->is_ggtt) { stats->global += vma->node.size; } else { struct i915_hw_ppgtt *ppgtt diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c9c1a5cdc1e5..f840cc55f1ab 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2905,18 +2905,11 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj); /* Some GGTT VM helpers */ #define i915_obj_to_ggtt(obj) \ (&((struct drm_i915_private *)(obj)->base.dev->dev_private)->gtt.base) -static inline bool i915_is_ggtt(struct i915_address_space *vm) -{ - struct i915_address_space *ggtt = - &((struct drm_i915_private *)(vm)->dev->dev_private)->gtt.base; - return vm == ggtt; -} static inline struct i915_hw_ppgtt * i915_vm_to_ppgtt(struct i915_address_space *vm) { WARN_ON(i915_is_ggtt(vm)); - return container_of(vm, struct i915_hw_ppgtt, base); } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 44bd514a6c2e..9a22fdd8a9f5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2595,8 +2595,7 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait) return ret; } - if (i915_is_ggtt(vma->vm) && - vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { + if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { i915_gem_object_finish_gtt(obj); /* release the fence reg _after_ flushing */ @@ -2611,7 +2610,7 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait) vma->bound = 0; list_del_init(&vma->vm_link); - if (i915_is_ggtt(vma->vm)) { + if (vma->is_ggtt) { if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { obj->map_and_fenceable = false; } else if (vma->ggtt_view.pages) { @@ -3880,17 +3879,14 @@ struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, void i915_gem_vma_destroy(struct i915_vma *vma) { - struct i915_address_space *vm = NULL; WARN_ON(vma->node.allocated); /* Keep the vma as a placeholder in the execbuffer reservation lists */ if (!list_empty(&vma->exec_list)) return; - vm = vma->vm; - - if (!i915_is_ggtt(vm)) - i915_ppgtt_put(i915_vm_to_ppgtt(vm)); + if (!vma->is_ggtt) + i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); list_del(&vma->obj_link); @@ -4446,7 +4442,7 @@ u64 i915_gem_obj_offset(struct dr
[Intel-gfx] [PATCH 069/190] drm/i915: Remove duplicate golden render state init from execlists
Now that we use the same vfuncs for emitting the batch buffer in both execlists and legacy, the golden render state initialisation is identical between both. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_render_state.c | 22 -- drivers/gpu/drm/i915/i915_gem_render_state.h | 18 --- drivers/gpu/drm/i915/intel_lrc.c | 34 +--- drivers/gpu/drm/i915/intel_renderstate.h | 16 + 4 files changed, 27 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index ccc988c2b226..222f25777bb4 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -28,6 +28,15 @@ #include "i915_drv.h" #include "intel_renderstate.h" +struct render_state { + const struct intel_renderstate_rodata *rodata; + struct drm_i915_gem_object *obj; + u64 ggtt_offset; + int gen; + u32 aux_batch_size; + u32 aux_batch_offset; +}; + static const struct intel_renderstate_rodata * render_state_get_rodata(struct drm_device *dev, const int gen) { @@ -163,14 +172,14 @@ err_out: #undef OUT_BATCH -void i915_gem_render_state_fini(struct render_state *so) +static void render_state_fini(struct render_state *so) { i915_gem_object_ggtt_unpin(so->obj); drm_gem_object_unreference(&so->obj->base); } -int i915_gem_render_state_prepare(struct intel_engine_cs *ring, - struct render_state *so) +static int render_state_prepare(struct intel_engine_cs *ring, + struct render_state *so) { int ret; @@ -186,7 +195,7 @@ int i915_gem_render_state_prepare(struct intel_engine_cs *ring, ret = render_state_setup(so); if (ret) { - i915_gem_render_state_fini(so); + render_state_fini(so); return ret; } @@ -198,7 +207,7 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req) struct render_state so; int ret; - ret = i915_gem_render_state_prepare(req->engine, &so); + ret = render_state_prepare(req->engine, &so); if (ret) return ret; @@ -222,8 +231,7 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req) } i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req); - out: - i915_gem_render_state_fini(&so); + render_state_fini(&so); return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h index e641bb093a90..c44fca8599bb 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.h +++ b/drivers/gpu/drm/i915/i915_gem_render_state.h @@ -26,24 +26,6 @@ #include -struct intel_renderstate_rodata { - const u32 *reloc; - const u32 *batch; - const u32 batch_items; -}; - -struct render_state { - const struct intel_renderstate_rodata *rodata; - struct drm_i915_gem_object *obj; - u64 ggtt_offset; - int gen; - u32 aux_batch_size; - u32 aux_batch_offset; -}; - int i915_gem_render_state_init(struct drm_i915_gem_request *req); -void i915_gem_render_state_fini(struct render_state *so); -int i915_gem_render_state_prepare(struct intel_engine_cs *ring, - struct render_state *so); #endif /* _I915_GEM_RENDER_STATE_H_ */ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 9838503fafca..2f92c43397eb 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1627,38 +1627,6 @@ static int gen8_add_request(struct drm_i915_gem_request *request) return 0; } -static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req) -{ - struct render_state so; - int ret; - - ret = i915_gem_render_state_prepare(req->engine, &so); - if (ret) - return ret; - - if (so.rodata == NULL) - return 0; - - ret = req->engine->emit_bb_start(req, so.ggtt_offset, -so.rodata->batch_items * 4, -I915_DISPATCH_SECURE); - if (ret) - goto out; - - ret = req->engine->emit_bb_start(req, -(so.ggtt_offset + so.aux_batch_offset), -so.aux_batch_size, -I915_DISPATCH_SECURE); - if (ret) - goto out; - - i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req); - -out: - i915_gem_render_state_fini(&so); - return ret; -} - static int gen8_init_rcs_context(struct drm_i915_gem_request *req) { int ret; @@ -1675,7 +1643,7 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req) if (ret)
[Intel-gfx] [PATCH 072/190] drm/i915: Execlists cannot pin a context without the object
Given that the intel_lr_context_pin cannot succeed without the object, we cannot reach intel_lr_context_unpin() without first allocating that object - so we can remove the redundant test. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_lrc.c | 19 --- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 84a8bcc90d78..0f0bf97e4032 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -769,17 +769,14 @@ static int intel_lr_context_pin(struct drm_i915_gem_request *rq) void intel_lr_context_unpin(struct drm_i915_gem_request *rq) { int engine = rq->engine->id; - struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[engine].state; - struct intel_ring *ring = rq->ring; - - if (ctx_obj) { - WARN_ON(!mutex_is_locked(&rq->i915->dev->struct_mutex)); - if (--rq->ctx->engine[engine].pin_count == 0) { - intel_ring_unmap(ring); - i915_gem_object_ggtt_unpin(ctx_obj); - i915_gem_context_unreference(rq->ctx); - } - } + + WARN_ON(!mutex_is_locked(&rq->i915->dev->struct_mutex)); + if (--rq->ctx->engine[engine].pin_count) + return; + + intel_ring_unmap(rq->ring); + i915_gem_object_ggtt_unpin(rq->ctx->engine[engine].state); + i915_gem_context_unreference(rq->ctx); } static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 049/190] drm/i915: Disable waitboosting for mmioflips/semaphores
Since commit a6f766f3975185af66a31a2cea2cd38721645999 Author: Chris Wilson Date: Mon Apr 27 13:41:20 2015 +0100 drm/i915: Limit ring synchronisation (sw sempahores) RPS boosts and commit bcafc4e38b6ad03f48989b7ecaff03845b5b7acf Author: Chris Wilson Date: Mon Apr 27 13:41:21 2015 +0100 drm/i915: Limit mmio flip RPS boosts we have limited the waitboosting for semaphores and flips. Ideally we do not want to boost in either of these instances as no consumer is waiting upon the results. With the introduction of NO_WAITBOOST in the previous patch, we can finally disable these needless boosts. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 8 +--- drivers/gpu/drm/i915/i915_drv.h | 2 -- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 2 -- 5 files changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index b82482573a8f..5335072f2047 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2398,13 +2398,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) list_empty(&file_priv->rps.link) ? "" : ", active"); rcu_read_unlock(); } - seq_printf(m, "Semaphore boosts: %d%s\n", - dev_priv->rps.semaphores.boosts, - list_empty(&dev_priv->rps.semaphores.link) ? "" : ", active"); - seq_printf(m, "MMIO flip boosts: %d%s\n", - dev_priv->rps.mmioflips.boosts, - list_empty(&dev_priv->rps.mmioflips.link) ? "" : ", active"); - seq_printf(m, "Kernel boosts: %d\n", dev_priv->rps.boosts); + seq_printf(m, "Kernel (anonymous) boosts: %d\n", dev_priv->rps.boosts); spin_unlock(&dev_priv->rps.client_lock); return 0; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ee146ce02412..49a151126b2a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1136,8 +1136,6 @@ struct intel_gen6_power_mgmt { struct delayed_work delayed_resume_work; unsigned boosts; - struct intel_rps_client semaphores, mmioflips; - /* manual wa residency calculations */ struct intel_rps_ei up_ei, down_ei; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fd61e722b595..9df00e694cd9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2533,7 +2533,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, ret = __i915_wait_request(from_req, i915->mm.interruptible, NULL, - &i915->rps.semaphores); + NO_WAITBOOST); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ae247927e931..e2822530af25 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11430,7 +11430,7 @@ static void intel_mmio_flip_work_func(struct work_struct *work) if (mmio_flip->req) { WARN_ON(__i915_wait_request(mmio_flip->req, false, NULL, - &mmio_flip->i915->rps.mmioflips)); + NO_WAITBOOST)); i915_gem_request_put(mmio_flip->req); } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 39b7ca9c3e66..b340f2a1f110 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7324,8 +7324,6 @@ void intel_pm_setup(struct drm_device *dev) INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work, intel_gen6_powersave_work); INIT_LIST_HEAD(&dev_priv->rps.clients); - INIT_LIST_HEAD(&dev_priv->rps.semaphores.link); - INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link); dev_priv->pm.suspended = false; atomic_set(&dev_priv->pm.wakeref_count, 0); -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 003/190] drm/i915: Add an optional selection from i915 of CONFIG_MMU_NOTIFIER
userptr requires mmu-notifier for full unprivileged support. Most systems have mmu-notifier support already enabled as a requirement for virtualisation support, but we should make the option for i915 to take advantage of mmu-notifiers explicit (and enable by default so that regular userspace can take advantage of passing client memory to the GPU.) Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/Kconfig | 11 +++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index fcd77b27514d..b979295aab82 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -48,3 +48,14 @@ config DRM_I915_PRELIMINARY_HW_SUPPORT option changes the default for that module option. If in doubt, say "N". + +config DRM_I915_USERPTR + bool "Always enable userptr support" + depends on DRM_I915 + select MMU_NOTIFIER + default y + help + This option selects CONFIG_MMU_NOTIFIER if it isn't already + selected to enabled full userptr support. + + If in doubt, say "Y". -- 2.7.0.rc3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 086/190] drm/i915: Mark the context and address space as closed
When the user closes the context mark it and the dependent address space as closed. As we use an asynchronous destruct method, this has two purposes. First it allows us to flag the closed context and detect internal errors if we to create any new objects for it (as it is removed from the user's namespace, these should be internal bugs only). And secondly, it allows us to immediately reap stale vma. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 3 +++ drivers/gpu/drm/i915/i915_gem.c | 17 +++--- drivers/gpu/drm/i915/i915_gem_context.c | 40 + drivers/gpu/drm/i915/i915_gem_gtt.c | 9 ++-- drivers/gpu/drm/i915/i915_gem_gtt.h | 9 drivers/gpu/drm/i915/i915_gem_stolen.c | 2 +- 6 files changed, 65 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 262d1b247344..fc35a9b8d910 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -888,6 +888,8 @@ struct intel_context { } engine[I915_NUM_RINGS]; struct list_head link; + + bool closed:1; }; enum fb_op_origin { @@ -2707,6 +2709,7 @@ int __must_check i915_vma_unbind(struct i915_vma *vma); * _guarantee_ VMA in question is _not in use_ anywhere. */ int __must_check __i915_vma_unbind_no_wait(struct i915_vma *vma); +void i915_vma_close(struct i915_vma *vma); int i915_gem_object_unbind(struct drm_i915_gem_object *obj); int i915_gem_object_put_pages(struct drm_i915_gem_object *obj); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1f95cf39b7d2..16ee3bd7010e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2385,7 +2385,7 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj) } } -static void i915_vma_close(struct i915_vma *vma) +void i915_vma_close(struct i915_vma *vma) { GEM_BUG_ON(vma->closed); vma->closed = true; @@ -2654,12 +2654,15 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait) return ret; } - trace_i915_vma_unbind(vma); - - vma->vm->unbind_vma(vma); + if (likely(!vma->vm->closed)) { + trace_i915_vma_unbind(vma); + vma->vm->unbind_vma(vma); + } vma->bound = 0; - list_del_init(&vma->vm_link); + drm_mm_remove_node(&vma->node); + list_move_tail(&vma->vm_link, &vma->vm->unbound_list); + if (vma->is_ggtt) { if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { obj->map_and_fenceable = false; @@ -2670,8 +2673,6 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait) vma->ggtt_view.pages = NULL; } - drm_mm_remove_node(&vma->node); - /* Since the unbound list is global, only move to that list if * no more VMAs exist. */ if (--obj->bind_count == 0) @@ -2917,7 +2918,7 @@ search_free: goto err_remove_node; list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); - list_add_tail(&vma->vm_link, &vm->inactive_list); + list_move_tail(&vma->vm_link, &vm->inactive_list); obj->bind_count++; return vma; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 310a770b7984..4583d8fe3585 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -153,6 +153,7 @@ void i915_gem_context_free(struct kref *ctx_ref) struct intel_context *ctx = container_of(ctx_ref, typeof(*ctx), ref); trace_i915_context_free(ctx); + GEM_BUG_ON(!ctx->closed); if (i915.enable_execlists) intel_lr_context_free(ctx); @@ -209,6 +210,37 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) return obj; } +static void i915_ppgtt_close(struct i915_address_space *vm) +{ + struct list_head *phases[] = { + &vm->active_list, + &vm->inactive_list, + &vm->unbound_list, + NULL, + }, **phase; + + GEM_BUG_ON(i915_is_ggtt(vm)); + GEM_BUG_ON(vm->closed); + vm->closed = true; + + for (phase = phases; *phase; phase++) { + struct i915_vma *vma, *vn; + + list_for_each_entry_safe(vma, vn, *phase, vm_link) + if (!vma->closed) + i915_vma_close(vma); + } +} + +static void context_close(struct intel_context *ctx) +{ + GEM_BUG_ON(ctx->closed); + ctx->closed = true; + if (ctx->ppgtt) + i915_ppgtt_close(&ctx->ppgtt->base); + i915_gem_context_unreference(ctx); +} + static struct intel_context * __create_hw_context(struct drm_device *dev, struct drm_i915_file_private *file_priv) @@ -256,7 +288,7 @@ __create_
[Intel-gfx] [PATCH 052/190] drm/i915: Treat ringbuffer writes as write to normal memory
Ringbuffers are now being written to either through LLC or WC paths, so treating them as simply iomem is no longer adequate. However, for the older !llc hardware, the hardware is documentated as treating the TAIL register update as serialising, so we can relax the barriers when filling the rings (but even if it were not, it is still an uncached register write and so serialising anyway.). For simplicity, let's ignore the iomem annotation. v2: Remove iomem from ringbuffer->virtual_address Signed-off-by: Chris Wilson Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/intel_lrc.c| 7 +-- drivers/gpu/drm/i915/intel_lrc.h| 6 +++--- drivers/gpu/drm/i915/intel_ringbuffer.c | 7 +-- drivers/gpu/drm/i915/intel_ringbuffer.h | 19 +-- 4 files changed, 18 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 433e9f60e926..527eaf59be25 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -766,13 +766,8 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf) { - uint32_t __iomem *virt; int rem = ringbuf->size - ringbuf->tail; - - virt = ringbuf->virtual_start + ringbuf->tail; - rem /= 4; - while (rem--) - iowrite32(MI_NOOP, virt++); + memset(ringbuf->virtual_start + ringbuf->tail, 0, rem); ringbuf->tail = 0; intel_ring_update_space(ringbuf); diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index de41ad6cd63d..1e58f2550777 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -71,8 +71,9 @@ int logical_ring_flush_all_caches(struct drm_i915_gem_request *req); */ static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf) { - ringbuf->tail &= ringbuf->size - 1; + intel_ringbuffer_advance(ringbuf); } + /** * intel_logical_ring_emit() - write a DWORD to the ringbuffer. * @ringbuf: Ringbuffer to write to. @@ -81,8 +82,7 @@ static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf) static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf, u32 data) { - iowrite32(data, ringbuf->virtual_start + ringbuf->tail); - ringbuf->tail += 4; + intel_ringbuffer_emit(ringbuf, data); } static inline void intel_logical_ring_emit_reg(struct intel_ringbuffer *ringbuf, i915_reg_t reg) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 2728c0ca0871..02b7032e16e0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2099,13 +2099,8 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n) static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf) { - uint32_t __iomem *virt; int rem = ringbuf->size - ringbuf->tail; - - virt = ringbuf->virtual_start + ringbuf->tail; - rem /= 4; - while (rem--) - iowrite32(MI_NOOP, virt++); + memset(ringbuf->virtual_start + ringbuf->tail, 0, rem); ringbuf->tail = 0; intel_ring_update_space(ringbuf); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index a1fcb6c7501f..7669a8d30f27 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -99,7 +99,7 @@ struct intel_ring_hangcheck { struct intel_ringbuffer { struct drm_i915_gem_object *obj; - void __iomem *virtual_start; + void *virtual_start; struct intel_engine_cs *ring; struct list_head link; @@ -468,12 +468,20 @@ int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request); int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n); int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); +static inline void intel_ringbuffer_emit(struct intel_ringbuffer *rb, +u32 data) +{ + *(uint32_t *)(rb->virtual_start + rb->tail) = data; + rb->tail += 4; +} +static inline void intel_ringbuffer_advance(struct intel_ringbuffer *rb) +{ + rb->tail &= rb->size - 1; +} static inline void intel_ring_emit(struct intel_engine_cs *ring, u32 data) { - struct intel_ringbuffer *ringbuf = ring->buffer; - iowrite32(data, ringbuf->virtual_start + ringbuf->tail); - ringbuf->tail += 4; + intel_ringbuffer_emit(ring->buffer, data); } static inline void intel_ring_emit_reg(struct intel_engine_cs *ring, i915_reg_t reg) @@ -482,8 +490,7 @@ static inline void intel_ring_emit_reg(struct intel_engine_cs *r
[Intel-gfx] [PATCH 085/190] drm/i915: Release vma when the handle is closed
In order to prevent a leak of the vma on shared objects, we need to hook into the object_close callback to destroy the vma on the object for this file. However, if we destroyed that vma immediately we may cause unexpected application stalls as we try to unbind a busy vma - hence we defer the unbind to when we retire the vma. v2: Keep vma allocated until closed. This is useful for a later optimisation, but it is required now in order to handle potential recursion of i915_vma_unbind() by retiring itself. v3: Comments are important. Testcase: igt/gem_ppggtt/flink-and-close-vma-leak Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Daniele Ceraolo Spurio closed); + vma->closed = true; + + list_del_init(&vma->obj_link); + if (!vma->active) + WARN_ON(i915_vma_unbind(vma)); +} + +void i915_gem_close_object(struct drm_gem_object *gem, + struct drm_file *file) +{ + struct drm_i915_gem_object *obj = to_intel_bo(gem); + struct drm_i915_file_private *fpriv = file->driver_priv; + struct i915_vma *vma, *vn; + + mutex_lock(&obj->base.dev->struct_mutex); + list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link) + if (vma->vm->file == fpriv) + i915_vma_close(vma); + mutex_unlock(&obj->base.dev->struct_mutex); +} + /** * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT * @DRM_IOCTL_ARGS: standard ioctl arguments @@ -2571,31 +2595,56 @@ static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) old_write_domain); } +static void i915_vma_destroy(struct i915_vma *vma) +{ + GEM_BUG_ON(vma->node.allocated); + GEM_BUG_ON(vma->active); + GEM_BUG_ON(!vma->closed); + + list_del(&vma->vm_link); + if (!vma->is_ggtt) + i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); + + kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); +} + static int __i915_vma_unbind(struct i915_vma *vma, bool wait) { struct drm_i915_gem_object *obj = vma->obj; - int ret; + int ret, i; - if (list_empty(&vma->obj_link)) - return 0; + /* First wait upon any activity as retiring the request may +* have side-effects such as unpinning or even unbinding this vma. +*/ + if (vma->active && wait) { + bool was_closed; - if (!drm_mm_node_allocated(&vma->node)) { - i915_gem_vma_destroy(vma); - return 0; + /* When a closed VMA is retired, it is unbound - eek. */ + was_closed = vma->closed; + vma->closed = false; + + for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) { + ret = i915_wait_request(vma->last_read[i].request); + if (ret) + break; + } + + vma->closed = was_closed; + if (ret) + return ret; + + GEM_BUG_ON(vma->active); } if (vma->pin_count) return -EBUSY; + if (!drm_mm_node_allocated(&vma->node)) + goto destroy; + GEM_BUG_ON(obj->bind_count == 0); GEM_BUG_ON(obj->pages == NULL); - if (wait) { - ret = i915_gem_object_wait_rendering(obj, false); - if (ret) - return ret; - } - if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { i915_gem_object_finish_gtt(obj); @@ -2622,7 +2671,6 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait) } drm_mm_remove_node(&vma->node); - i915_gem_vma_destroy(vma); /* Since the unbound list is global, only move to that list if * no more VMAs exist. */ @@ -2636,6 +2684,10 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait) */ i915_gem_object_unpin_pages(obj); +destroy: + if (unlikely(vma->closed)) + i915_vma_destroy(vma); + return 0; } @@ -2814,7 +2866,7 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, if (offset & (alignment - 1) || offset + size > end) { ret = -EINVAL; - goto err_free_vma; + goto err_vma; } vma->node.start = offset; vma->node.size = size; @@ -2826,7 +2878,7 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, ret = drm_mm_reserve_node(&vm->mm, &vma->node); } if (ret) - goto err_free_vma; + goto err_vma; } else { if (flags & PIN_HIGH) { search_flag = DRM_MM_SEARCH_BELOW; @@ -2851,7 +2903,7 @@ search_free: if (ret == 0)